diff --git a/ivy.xml b/ivy.xml
index 96c1de844..ee24bc367 100644
--- a/ivy.xml
+++ b/ivy.xml
@@ -76,7 +76,7 @@
-
+
diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
index f8e87aa58..2ceb4ab46 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@@ -480,7 +480,7 @@ public class GenomeAnalysisEngine {
}
} else if (walker instanceof ReadPairWalker) {
if(readsDataSource != null && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.queryname)
- throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.queryname, "Read pair walkers can only walk over query name-sorted data. Please resort your input BAM file.");
+ throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.queryname, "Read pair walkers are exceptions in that they cannot be run on coordinate-sorted BAMs but instead require query name-sorted files. You will need to resort your input BAM file in query name order to use this walker.");
if(intervals != null && !intervals.isEmpty())
throw new UserException.CommandLineException("Pairs traversal cannot be used in conjunction with intervals.");
diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java b/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java
index b39fdd79d..a14d999ea 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java
@@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.samples;
import org.broadinstitute.sting.utils.exceptions.UserException;
+import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
@@ -110,6 +111,17 @@ public class Sample implements Comparable { // implements java.io.Serial
return infoDB.getSample(paternalID);
}
+ public ArrayList getParents(){
+ ArrayList parents = new ArrayList(2);
+ Sample parent = getMother();
+ if(parent != null)
+ parents.add(parent);
+ parent = getFather();
+ if(parent != null)
+ parents.add(parent);
+ return parents;
+ }
+
/**
* Get gender of the sample
* @return property of key "gender" - must be of type Gender
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java
index 8098de5b1..ab38b69cd 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java
@@ -49,5 +49,5 @@ public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnno
public List getKeyNames() { return Arrays.asList(VCFConstants.DEPTH_KEY); }
- public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Integer, "Filtered Depth")); }
+ public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Integer, "Approximate read depth; some reads may have been filtered")); }
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java
index 85977bf8e..1956dac6c 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java
@@ -56,7 +56,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
// We refuse to parse SnpEff output files generated by unsupported versions, or
// lacking a SnpEff version number in the VCF header:
- public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.2" };
+ public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.4" };
public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion";
public static final String SNPEFF_VCF_HEADER_COMMAND_LINE_KEY = "SnpEffCmd";
@@ -77,13 +77,13 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
public enum InfoFieldKey {
EFFECT_KEY ("SNPEFF_EFFECT", -1),
IMPACT_KEY ("SNPEFF_IMPACT", 0),
- CODON_CHANGE_KEY ("SNPEFF_CODON_CHANGE", 1),
- AMINO_ACID_CHANGE_KEY ("SNPEFF_AMINO_ACID_CHANGE", 2),
- GENE_NAME_KEY ("SNPEFF_GENE_NAME", 3),
- GENE_BIOTYPE_KEY ("SNPEFF_GENE_BIOTYPE", 4),
- TRANSCRIPT_ID_KEY ("SNPEFF_TRANSCRIPT_ID", 6),
- EXON_ID_KEY ("SNPEFF_EXON_ID", 7),
- FUNCTIONAL_CLASS_KEY ("SNPEFF_FUNCTIONAL_CLASS", -1);
+ FUNCTIONAL_CLASS_KEY ("SNPEFF_FUNCTIONAL_CLASS", 1),
+ CODON_CHANGE_KEY ("SNPEFF_CODON_CHANGE", 2),
+ AMINO_ACID_CHANGE_KEY ("SNPEFF_AMINO_ACID_CHANGE", 3),
+ GENE_NAME_KEY ("SNPEFF_GENE_NAME", 4),
+ GENE_BIOTYPE_KEY ("SNPEFF_GENE_BIOTYPE", 5),
+ TRANSCRIPT_ID_KEY ("SNPEFF_TRANSCRIPT_ID", 7),
+ EXON_ID_KEY ("SNPEFF_EXON_ID", 8);
// Actual text of the key
private final String keyName;
@@ -110,70 +110,53 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
// are validated against this list.
public enum EffectType {
// High-impact effects:
- FRAME_SHIFT (EffectFunctionalClass.NONE, false),
- STOP_GAINED (EffectFunctionalClass.NONSENSE, false),
- START_LOST (EffectFunctionalClass.NONE, false),
- SPLICE_SITE_ACCEPTOR (EffectFunctionalClass.NONE, false),
- SPLICE_SITE_DONOR (EffectFunctionalClass.NONE, false),
- EXON_DELETED (EffectFunctionalClass.NONE, false),
- STOP_LOST (EffectFunctionalClass.NONE, false),
+ SPLICE_SITE_ACCEPTOR,
+ SPLICE_SITE_DONOR,
+ START_LOST,
+ EXON_DELETED,
+ FRAME_SHIFT,
+ STOP_GAINED,
+ STOP_LOST,
// Moderate-impact effects:
- NON_SYNONYMOUS_CODING (EffectFunctionalClass.MISSENSE, false),
- CODON_CHANGE (EffectFunctionalClass.NONE, false),
- CODON_INSERTION (EffectFunctionalClass.NONE, false),
- CODON_CHANGE_PLUS_CODON_INSERTION (EffectFunctionalClass.NONE, false),
- CODON_DELETION (EffectFunctionalClass.NONE, false),
- CODON_CHANGE_PLUS_CODON_DELETION (EffectFunctionalClass.NONE, false),
- UTR_5_DELETED (EffectFunctionalClass.NONE, false),
- UTR_3_DELETED (EffectFunctionalClass.NONE, false),
+ NON_SYNONYMOUS_CODING,
+ CODON_CHANGE,
+ CODON_INSERTION,
+ CODON_CHANGE_PLUS_CODON_INSERTION,
+ CODON_DELETION,
+ CODON_CHANGE_PLUS_CODON_DELETION,
+ UTR_5_DELETED,
+ UTR_3_DELETED,
// Low-impact effects:
- SYNONYMOUS_CODING (EffectFunctionalClass.SILENT, false),
- SYNONYMOUS_START (EffectFunctionalClass.SILENT, false),
- NON_SYNONYMOUS_START (EffectFunctionalClass.SILENT, false),
- SYNONYMOUS_STOP (EffectFunctionalClass.SILENT, false),
- NON_SYNONYMOUS_STOP (EffectFunctionalClass.SILENT, false),
- START_GAINED (EffectFunctionalClass.NONE, false),
+ SYNONYMOUS_START,
+ NON_SYNONYMOUS_START,
+ START_GAINED,
+ SYNONYMOUS_CODING,
+ SYNONYMOUS_STOP,
+ NON_SYNONYMOUS_STOP,
// Modifiers:
- NONE (EffectFunctionalClass.NONE, true),
- CHROMOSOME (EffectFunctionalClass.NONE, true),
- INTERGENIC (EffectFunctionalClass.NONE, true),
- UPSTREAM (EffectFunctionalClass.NONE, true),
- UTR_5_PRIME (EffectFunctionalClass.NONE, true),
- CDS (EffectFunctionalClass.NONE, true),
- GENE (EffectFunctionalClass.NONE, true),
- TRANSCRIPT (EffectFunctionalClass.NONE, true),
- EXON (EffectFunctionalClass.NONE, true),
- INTRON (EffectFunctionalClass.NONE, true),
- UTR_3_PRIME (EffectFunctionalClass.NONE, true),
- DOWNSTREAM (EffectFunctionalClass.NONE, true),
- INTRON_CONSERVED (EffectFunctionalClass.NONE, true),
- INTERGENIC_CONSERVED (EffectFunctionalClass.NONE, true),
- REGULATION (EffectFunctionalClass.NONE, true),
- CUSTOM (EffectFunctionalClass.NONE, true),
- WITHIN_NON_CODING_GENE (EffectFunctionalClass.NONE, true);
-
- private final EffectFunctionalClass functionalClass;
- private final boolean isModifier;
-
- EffectType ( EffectFunctionalClass functionalClass, boolean isModifier ) {
- this.functionalClass = functionalClass;
- this.isModifier = isModifier;
- }
-
- public EffectFunctionalClass getFunctionalClass() {
- return functionalClass;
- }
-
- public boolean isModifier() {
- return isModifier;
- }
+ NONE,
+ CHROMOSOME,
+ CUSTOM,
+ CDS,
+ GENE,
+ TRANSCRIPT,
+ EXON,
+ INTRON_CONSERVED,
+ UTR_5_PRIME,
+ UTR_3_PRIME,
+ DOWNSTREAM,
+ INTRAGENIC,
+ INTERGENIC,
+ INTERGENIC_CONSERVED,
+ UPSTREAM,
+ REGULATION,
+ INTRON
}
- // SnpEff labels each effect as either LOW, MODERATE, or HIGH impact. We take the additional step of
- // classifying some of the LOW impact effects as MODIFIERs.
+ // SnpEff labels each effect as either LOW, MODERATE, or HIGH impact, or as a MODIFIER.
public enum EffectImpact {
MODIFIER (0),
LOW (1),
@@ -202,7 +185,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
UNKNOWN
}
- // We assign a functional class to each SnpEff effect.
+ // SnpEff assigns a functional class to each effect.
public enum EffectFunctionalClass {
NONE (0),
SILENT (1),
@@ -379,13 +362,13 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
public List getKeyNames() {
return Arrays.asList( InfoFieldKey.EFFECT_KEY.getKeyName(),
InfoFieldKey.IMPACT_KEY.getKeyName(),
+ InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(),
InfoFieldKey.CODON_CHANGE_KEY.getKeyName(),
InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(),
InfoFieldKey.GENE_NAME_KEY.getKeyName(),
InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(),
InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(),
- InfoFieldKey.EXON_ID_KEY.getKeyName(),
- InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()
+ InfoFieldKey.EXON_ID_KEY.getKeyName()
);
}
@@ -393,13 +376,13 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
return Arrays.asList(
new VCFInfoHeaderLine(InfoFieldKey.EFFECT_KEY.getKeyName(), 1, VCFHeaderLineType.String, "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
new VCFInfoHeaderLine(InfoFieldKey.IMPACT_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Impact of the highest-impact effect resulting from the current variant " + Arrays.toString(EffectImpact.values())),
+ new VCFInfoHeaderLine(InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Functional class of the highest-impact effect resulting from the current variant: " + Arrays.toString(EffectFunctionalClass.values())),
new VCFInfoHeaderLine(InfoFieldKey.CODON_CHANGE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Old/New codon for the highest-impact effect resulting from the current variant"),
- new VCFInfoHeaderLine(InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Old/New amino acid for the highest-impact effect resulting from the current variant"),
+ new VCFInfoHeaderLine(InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Old/New amino acid for the highest-impact effect resulting from the current variant (in HGVS style)"),
new VCFInfoHeaderLine(InfoFieldKey.GENE_NAME_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene name for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene biotype for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Transcript ID for the highest-impact effect resulting from the current variant"),
- new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant"),
- new VCFInfoHeaderLine(InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Functional class of the highest-impact effect resulting from the current variant: " + Arrays.toString(EffectFunctionalClass.values()))
+ new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant")
);
}
@@ -409,6 +392,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
protected static class SnpEffEffect {
private EffectType effect;
private EffectImpact impact;
+ private EffectFunctionalClass functionalClass;
private String codonChange;
private String aminoAcidChange;
private String geneName;
@@ -420,16 +404,21 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
private String parseError = null;
private boolean isWellFormed = true;
- private static final int EXPECTED_NUMBER_OF_METADATA_FIELDS = 8;
- private static final int NUMBER_OF_METADATA_FIELDS_UPON_WARNING = 9;
- private static final int NUMBER_OF_METADATA_FIELDS_UPON_ERROR = 10;
+ private static final int EXPECTED_NUMBER_OF_METADATA_FIELDS = 9;
+ private static final int NUMBER_OF_METADATA_FIELDS_UPON_EITHER_WARNING_OR_ERROR = 10;
+ private static final int NUMBER_OF_METADATA_FIELDS_UPON_BOTH_WARNING_AND_ERROR = 11;
- // Note that contrary to the description for the EFF field layout that SnpEff adds to the VCF header,
- // errors come after warnings, not vice versa:
- private static final int SNPEFF_WARNING_FIELD_INDEX = NUMBER_OF_METADATA_FIELDS_UPON_WARNING - 1;
- private static final int SNPEFF_ERROR_FIELD_INDEX = NUMBER_OF_METADATA_FIELDS_UPON_ERROR - 1;
+ // If there is either a warning OR an error, it will be in the last field. If there is both
+ // a warning AND an error, the warning will be in the second-to-last field, and the error will
+ // be in the last field.
+ private static final int SNPEFF_WARNING_OR_ERROR_FIELD_UPON_SINGLE_ERROR = NUMBER_OF_METADATA_FIELDS_UPON_EITHER_WARNING_OR_ERROR - 1;
+ private static final int SNPEFF_WARNING_FIELD_UPON_BOTH_WARNING_AND_ERROR = NUMBER_OF_METADATA_FIELDS_UPON_BOTH_WARNING_AND_ERROR - 2;
+ private static final int SNPEFF_ERROR_FIELD_UPON_BOTH_WARNING_AND_ERROR = NUMBER_OF_METADATA_FIELDS_UPON_BOTH_WARNING_AND_ERROR - 1;
- private static final int SNPEFF_CODING_FIELD_INDEX = 5;
+ // Position of the field indicating whether the effect is coding or non-coding. This field is used
+ // in selecting the most significant effect, but is not included in the annotations we return
+ // since it can be deduced from the SNPEFF_GENE_BIOTYPE field.
+ private static final int SNPEFF_CODING_FIELD_INDEX = 6;
public SnpEffEffect ( String effectName, String[] effectMetadata ) {
parseEffectName(effectName);
@@ -447,11 +436,14 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
private void parseEffectMetadata ( String[] effectMetadata ) {
if ( effectMetadata.length != EXPECTED_NUMBER_OF_METADATA_FIELDS ) {
- if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_WARNING ) {
- parseError(String.format("SnpEff issued the following warning: %s", effectMetadata[SNPEFF_WARNING_FIELD_INDEX]));
+ if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_EITHER_WARNING_OR_ERROR ) {
+ parseError(String.format("SnpEff issued the following warning or error: \"%s\"",
+ effectMetadata[SNPEFF_WARNING_OR_ERROR_FIELD_UPON_SINGLE_ERROR]));
}
- else if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_ERROR ) {
- parseError(String.format("SnpEff issued the following error: %s", effectMetadata[SNPEFF_ERROR_FIELD_INDEX]));
+ else if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_BOTH_WARNING_AND_ERROR ) {
+ parseError(String.format("SnpEff issued the following warning: \"%s\", and the following error: \"%s\"",
+ effectMetadata[SNPEFF_WARNING_FIELD_UPON_BOTH_WARNING_AND_ERROR],
+ effectMetadata[SNPEFF_ERROR_FIELD_UPON_BOTH_WARNING_AND_ERROR]));
}
else {
parseError(String.format("Wrong number of effect metadata fields. Expected %d but found %d",
@@ -461,23 +453,33 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
return;
}
- if ( effect != null && effect.isModifier() ) {
- impact = EffectImpact.MODIFIER;
+ // The impact field will never be empty, and should always contain one of the enumerated values:
+ try {
+ impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]);
}
- else {
+ catch ( IllegalArgumentException e ) {
+ parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]));
+ }
+
+ // The functional class field will be empty when the effect has no functional class associated with it:
+ if ( effectMetadata[InfoFieldKey.FUNCTIONAL_CLASS_KEY.getFieldIndex()].trim().length() > 0 ) {
try {
- impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]);
+ functionalClass = EffectFunctionalClass.valueOf(effectMetadata[InfoFieldKey.FUNCTIONAL_CLASS_KEY.getFieldIndex()]);
}
catch ( IllegalArgumentException e ) {
- parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]));
+ parseError(String.format("Unrecognized value for effect functional class: %s", effectMetadata[InfoFieldKey.FUNCTIONAL_CLASS_KEY.getFieldIndex()]));
}
}
+ else {
+ functionalClass = EffectFunctionalClass.NONE;
+ }
codonChange = effectMetadata[InfoFieldKey.CODON_CHANGE_KEY.getFieldIndex()];
aminoAcidChange = effectMetadata[InfoFieldKey.AMINO_ACID_CHANGE_KEY.getFieldIndex()];
geneName = effectMetadata[InfoFieldKey.GENE_NAME_KEY.getFieldIndex()];
geneBiotype = effectMetadata[InfoFieldKey.GENE_BIOTYPE_KEY.getFieldIndex()];
+ // The coding field will be empty when SnpEff has no coding info for the effect:
if ( effectMetadata[SNPEFF_CODING_FIELD_INDEX].trim().length() > 0 ) {
try {
coding = EffectCoding.valueOf(effectMetadata[SNPEFF_CODING_FIELD_INDEX]);
@@ -534,7 +536,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
return true;
}
else if ( impact.isSameImpactAs(other.impact) ) {
- return effect.getFunctionalClass().isHigherPriorityThan(other.effect.getFunctionalClass());
+ return functionalClass.isHigherPriorityThan(other.functionalClass);
}
return false;
@@ -545,13 +547,13 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
addAnnotation(annotations, InfoFieldKey.EFFECT_KEY.getKeyName(), effect.toString());
addAnnotation(annotations, InfoFieldKey.IMPACT_KEY.getKeyName(), impact.toString());
+ addAnnotation(annotations, InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), functionalClass.toString());
addAnnotation(annotations, InfoFieldKey.CODON_CHANGE_KEY.getKeyName(), codonChange);
addAnnotation(annotations, InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), aminoAcidChange);
addAnnotation(annotations, InfoFieldKey.GENE_NAME_KEY.getKeyName(), geneName);
addAnnotation(annotations, InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), geneBiotype);
addAnnotation(annotations, InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), transcriptID);
addAnnotation(annotations, InfoFieldKey.EXON_ID_KEY.getKeyName(), exonID);
- addAnnotation(annotations, InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), effect.getFunctionalClass().toString());
return annotations;
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
index 489e963e8..74c55dbfe 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
@@ -26,7 +26,6 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.apache.log4j.Logger;
-import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@@ -36,7 +35,6 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.Allele;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Map;
@@ -83,8 +81,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
* @param priors priors to use for GLs
* @param GLs hash of sample->GL to fill in
* @param alternateAlleleToUse the alternate allele to use, null if not set
- *
- * @param useBAQedPileup
+ * @param useBAQedPileup should we use the BAQed pileup or the raw one?
* @return genotype likelihoods per sample for AA, AB, BB
*/
public abstract Allele getLikelihoods(RefMetaDataTracker tracker,
@@ -93,13 +90,14 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
AlignmentContextUtils.ReadOrientation contextType,
GenotypePriors priors,
Map GLs,
- Allele alternateAlleleToUse, boolean useBAQedPileup);
+ Allele alternateAlleleToUse,
+ boolean useBAQedPileup);
protected int getFilteredDepth(ReadBackedPileup pileup) {
int count = 0;
for ( PileupElement p : pileup ) {
if ( BaseUtils.isRegularBase( p.getBase() ) )
- count++;
+ count += p.getRepresentativeCount();
}
return count;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
index bdd4e2c65..369c2d0c6 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@@ -258,7 +258,7 @@ public class UnifiedGenotyper extends LocusWalker result = new HashSet();
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
- result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
+ result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth (reads with MQ=255 or with bad mates are filtered)"));
result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
return result;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java
index 3eedc2a28..847165e3e 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java
@@ -7,38 +7,83 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
-import org.broadinstitute.sting.utils.text.XReadLines;
+import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
-import java.io.File;
-import java.io.FileNotFoundException;
+import java.io.PrintStream;
import java.util.*;
/**
- * Phases a trio VCF (child phased by transmission, implied phase carried over to parents). Given genotypes for a trio,
- * this walker modifies the genotypes (if necessary) to reflect the most likely configuration given the genotype
- * likelihoods and inheritance constraints, phases child by transmission and carries over implied phase to the parents
- * (their alleles in their genotypes are ordered as transmitted|untransmitted). Computes probability that the
- * determined phase is correct given that the genotype configuration is correct (useful if you want to use this to
- * compare phasing accuracy, but want to break that comparison down by phasing confidence in the truth set). Optionally
- * filters out sites where the phasing is indeterminate (site has no-calls), ambiguous (everyone is heterozygous), or
- * the genotypes exhibit a Mendelian violation. This walker assumes there are only three samples in the VCF file to
- * begin.
+ * Computes the most likely genotype combination and phases trios and parent/child pairs
+ *
+ *
+ * PhaseByTransmission is a GATK tool that 1) computes the most likely genotype combination and phases trios and parent/child pairs given their genotype likelihoods and a mutation prior and 2) phases
+ * all sites were parent/child transmission can be inferred unambiguously. It reports the genotype combination (and hence phasing) probability.
+ * Ambiguous sites are:
+ *
+ * - Sites where all individuals are heterozygous
+ * - Sites where there is a Mendelian violation
+ *
+ * Missing genotypes are handled as follows:
+ *
+ * - In parent/child pairs: If an individual genotype is missing at one site, the other one is phased if it is homozygous. No phasing probability is emitted.
+ * - In trios: If the child is missing, parents are treated as separate individuals and phased if homozygous. No phasing probability is emitted.
+ * - In trios: If one of the parents is missing, it is handled like a parent/child pair. Phasing is done unless both the parent and child are heterozygous and a phasing probabilitt is emitted.
+ * - In trios: If two individuals are missing, the remaining individual is phased if it is homozygous. No phasing probability is emitted.
+ *
+ *
+ * Input
+ *
+ *
+ * - A VCF variant set containing trio(s) and/or parent/child pair(s).
+ * - A PED pedigree file containing the description of the individuals relationships.
+ *
+ *
+ *
+ * Options
+ *
+ *
+ * - MendelianViolationsFile: An optional argument for reporting. If a file is specified, all sites that remain in mendelian violation after being assigned the most likely genotype
+ * combination will be reported there. Information reported: chromosome, position, filter, allele count in VCF, family, transmission probability,
+ * and each individual genotype, depth, allelic depth and likelihoods.
+ * - DeNovoPrior: Mutation prio; default is 1e-8
+ *
+ *
+ *
+ * Output
+ *
+ * An VCF with genotypes recalibrated as most likely under the familial constraint and phased by descent where non ambiguous..
+ *
+ *
+ * Examples
+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ * -R ref.fasta \
+ * -T PhaseByTransmission \
+ * -V input.vcf \
+ * -ped input.ped \
+ * -o output.vcf
+ *
+ *
*/
-public class PhaseByTransmission extends RodWalker {
+public class PhaseByTransmission extends RodWalker, HashMap> {
@ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
- @Argument(shortName="f", fullName="familySpec", required=true, doc="Patterns for the family structure (usage: mom+dad=child). Specify several trios by supplying this argument many times and/or a file containing many patterns.")
- public ArrayList familySpecs = null;
+ @Argument(shortName = "mvf",required = false,fullName = "MendelianViolationsFile", doc="File to output the mendelian violation details.")
+ private PrintStream mvFile = null;
+
+ @Argument(shortName = "prior",required = false,fullName = "DeNovoPrior", doc="Prior for de novo mutations. Default: 1e-8")
+ private double deNovoPrior=1e-8;
@Output
protected VCFWriter vcfWriter = null;
@@ -46,241 +91,633 @@ public class PhaseByTransmission extends RodWalker {
private final String TRANSMISSION_PROBABILITY_TAG_NAME = "TP";
private final String SOURCE_NAME = "PhaseByTransmission";
- private final Double MENDELIAN_VIOLATION_PRIOR = 1e-8;
+ public final double NO_TRANSMISSION_PROB = -1.0;
- private class Trio {
- private String mother;
- private String father;
- private String child;
+ private ArrayList trios = new ArrayList();
- public Trio(String mother, String father, String child) {
- this.mother = mother;
- this.father = father;
- this.child = child;
- }
+ //Matrix of priors for all genotype combinations
+ private EnumMap>> mvCountMatrix;
- public Trio(String familySpec) {
- String[] pieces = familySpec.split("[\\+\\=]");
+ //Matrix of allele transmission
+ private EnumMap>> transmissionMatrix;
- this.mother = pieces[0];
- this.father = pieces[1];
- this.child = pieces[2];
- }
+ //Metrics counters hash keys
+ private final Byte NUM_TRIO_GENOTYPES_CALLED = 0;
+ private final Byte NUM_TRIO_GENOTYPES_NOCALL = 1;
+ private final Byte NUM_TRIO_GENOTYPES_PHASED = 2;
+ private final Byte NUM_TRIO_HET_HET_HET = 3;
+ private final Byte NUM_TRIO_VIOLATIONS = 4;
+ private final Byte NUM_TRIO_DOUBLE_VIOLATIONS = 10;
+ private final Byte NUM_PAIR_GENOTYPES_CALLED = 5;
+ private final Byte NUM_PAIR_GENOTYPES_NOCALL = 6;
+ private final Byte NUM_PAIR_GENOTYPES_PHASED = 7;
+ private final Byte NUM_PAIR_HET_HET = 8;
+ private final Byte NUM_PAIR_VIOLATIONS = 9;
+ private final Byte NUM_GENOTYPES_MODIFIED = 11;
- public String getMother() { return mother; }
- public String getFather() { return father; }
- public String getChild() { return child; }
+ //Random number generator
+ private Random rand = new Random();
+
+ private enum FamilyMember {
+ MOTHER,
+ FATHER,
+ CHILD
}
- private ArrayList trios = new ArrayList();
+ //Stores a conceptual trio or parent/child pair genotype combination along with its phasing.
+ //This combination can then be "applied" to a given trio or pair using the getPhasedGenotypes method.
+ private class TrioPhase {
- public ArrayList getFamilySpecsFromCommandLineInput(ArrayList familySpecs) {
- if (familySpecs != null) {
- // Let's first go through the list and see if we were given any files. We'll add every entry in the file to our
- // spec list set, and treat the entries as if they had been specified on the command line.
- ArrayList specs = new ArrayList();
- for (String familySpec : familySpecs) {
- File specFile = new File(familySpec);
+ //Create 2 fake alleles
+ //The actual bases will never be used but the Genotypes created using the alleles will be.
+ private final Allele REF = Allele.create("A",true);
+ private final Allele VAR = Allele.create("A",false);
+ private final Allele NO_CALL = Allele.create(".",false);
+ private final String DUMMY_NAME = "DummySample";
- try {
- XReadLines reader = new XReadLines(specFile);
+ private EnumMap trioPhasedGenotypes = new EnumMap(FamilyMember.class);
- List lines = reader.readLines();
- for (String line : lines) {
- specs.add(new Trio(line));
- }
- } catch (FileNotFoundException e) {
- specs.add(new Trio(familySpec)); // not a file, so must be a family spec
+ private ArrayList getAlleles(Genotype.Type genotype){
+ ArrayList alleles = new ArrayList(2);
+ if(genotype == Genotype.Type.HOM_REF){
+ alleles.add(REF);
+ alleles.add(REF);
+ }
+ else if(genotype == Genotype.Type.HET){
+ alleles.add(REF);
+ alleles.add(VAR);
+ }
+ else if(genotype == Genotype.Type.HOM_VAR){
+ alleles.add(VAR);
+ alleles.add(VAR);
+ }
+ else{
+ return null;
+ }
+ return alleles;
+ }
+
+ private boolean isPhasable(Genotype.Type genotype){
+ return genotype == Genotype.Type.HOM_REF || genotype == Genotype.Type.HET || genotype == Genotype.Type.HOM_VAR;
+ }
+
+ //Create a new Genotype based on information from a single individual
+ //Homozygous genotypes will be set as phased, heterozygous won't be
+ private void phaseSingleIndividualAlleles(Genotype.Type genotype, FamilyMember familyMember){
+ if(genotype == Genotype.Type.HOM_REF || genotype == Genotype.Type.HOM_VAR){
+ trioPhasedGenotypes.put(familyMember, new Genotype(DUMMY_NAME, getAlleles(genotype), Genotype.NO_NEG_LOG_10PERROR, null, null, true));
+ }
+ else
+ trioPhasedGenotypes.put(familyMember, new Genotype(DUMMY_NAME,getAlleles(genotype),Genotype.NO_NEG_LOG_10PERROR,null,null,false));
+ }
+
+ //Find the phase for a parent/child pair
+ private void phasePairAlleles(Genotype.Type parentGenotype, Genotype.Type childGenotype, FamilyMember parent){
+
+ //Special case for Het/Het as it is ambiguous
+ if(parentGenotype == Genotype.Type.HET && childGenotype == Genotype.Type.HET){
+ trioPhasedGenotypes.put(parent, new Genotype(DUMMY_NAME, getAlleles(parentGenotype), Genotype.NO_NEG_LOG_10PERROR, null, null, false));
+ trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME,getAlleles(childGenotype),Genotype.NO_NEG_LOG_10PERROR,null,null,false));
+ return;
+ }
+
+ ArrayList parentAlleles = getAlleles(parentGenotype);
+ ArrayList childAlleles = getAlleles(childGenotype);
+ ArrayList parentPhasedAlleles = new ArrayList(2);
+ ArrayList childPhasedAlleles = new ArrayList(2);
+
+ //If there is a possible phasing between the mother and child => phase
+ int childTransmittedAlleleIndex = childAlleles.indexOf(parentAlleles.get(0));
+ if(childTransmittedAlleleIndex > -1){
+ trioPhasedGenotypes.put(parent, new Genotype(DUMMY_NAME, parentAlleles, Genotype.NO_NEG_LOG_10PERROR, null, null, true));
+ childPhasedAlleles.add(childAlleles.remove(childTransmittedAlleleIndex));
+ childPhasedAlleles.add(childAlleles.get(0));
+ trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME, childPhasedAlleles, Genotype.NO_NEG_LOG_10PERROR, null, null, true));
+ }
+ else if((childTransmittedAlleleIndex = childAlleles.indexOf(parentAlleles.get(1))) > -1){
+ parentPhasedAlleles.add(parentAlleles.get(1));
+ parentPhasedAlleles.add(parentAlleles.get(0));
+ trioPhasedGenotypes.put(parent, new Genotype(DUMMY_NAME, parentPhasedAlleles, Genotype.NO_NEG_LOG_10PERROR, null, null, true));
+ childPhasedAlleles.add(childAlleles.remove(childTransmittedAlleleIndex));
+ childPhasedAlleles.add(childAlleles.get(0));
+ trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME, childPhasedAlleles, Genotype.NO_NEG_LOG_10PERROR, null, null, true));
+ }
+ //This is a Mendelian Violation => Do not phase
+ else{
+ trioPhasedGenotypes.put(parent, new Genotype(DUMMY_NAME,getAlleles(parentGenotype),Genotype.NO_NEG_LOG_10PERROR,null,null,false));
+ trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME,getAlleles(childGenotype),Genotype.NO_NEG_LOG_10PERROR,null,null,false));
+ }
+ }
+
+ //Phases a family by transmission
+ private void phaseFamilyAlleles(Genotype.Type mother, Genotype.Type father, Genotype.Type child){
+
+ Set> possiblePhasedChildGenotypes = new HashSet>();
+ ArrayList motherAlleles = getAlleles(mother);
+ ArrayList fatherAlleles = getAlleles(father);
+ ArrayList childAlleles = getAlleles(child);
+
+ //Build all possible child genotypes for the given parent's genotypes
+ for (Allele momAllele : motherAlleles) {
+ for (Allele fatherAllele : fatherAlleles) {
+ ArrayList possiblePhasedChildAlleles = new ArrayList(2);
+ possiblePhasedChildAlleles.add(momAllele);
+ possiblePhasedChildAlleles.add(fatherAllele);
+ possiblePhasedChildGenotypes.add(possiblePhasedChildAlleles);
}
}
- return specs;
+ for (ArrayList childPhasedAllelesAlleles : possiblePhasedChildGenotypes) {
+ int firstAlleleIndex = childPhasedAllelesAlleles.indexOf(childAlleles.get(0));
+ int secondAlleleIndex = childPhasedAllelesAlleles.lastIndexOf(childAlleles.get(1));
+ //If a possible combination has been found, create the genotypes
+ if (firstAlleleIndex != secondAlleleIndex && firstAlleleIndex > -1 && secondAlleleIndex > -1) {
+ //Create mother's genotype
+ ArrayList motherPhasedAlleles = new ArrayList(2);
+ motherPhasedAlleles.add(childPhasedAllelesAlleles.get(0));
+ if(motherAlleles.get(0) != motherPhasedAlleles.get(0))
+ motherPhasedAlleles.add(motherAlleles.get(0));
+ else
+ motherPhasedAlleles.add(motherAlleles.get(1));
+ trioPhasedGenotypes.put(FamilyMember.MOTHER, new Genotype(DUMMY_NAME,motherPhasedAlleles,Genotype.NO_NEG_LOG_10PERROR,null,null,true));
+
+ //Create father's genotype
+ ArrayList fatherPhasedAlleles = new ArrayList(2);
+ fatherPhasedAlleles.add(childPhasedAllelesAlleles.get(1));
+ if(fatherAlleles.get(0) != fatherPhasedAlleles.get(0))
+ fatherPhasedAlleles.add(fatherAlleles.get(0));
+ else
+ fatherPhasedAlleles.add(fatherAlleles.get(1));
+ trioPhasedGenotypes.put(FamilyMember.FATHER, new Genotype(DUMMY_NAME,fatherPhasedAlleles,Genotype.NO_NEG_LOG_10PERROR,null,null,true));
+
+ //Create child's genotype
+ trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME,childPhasedAllelesAlleles,Genotype.NO_NEG_LOG_10PERROR,null,null,true));
+
+ //Once a phased combination is found; exit
+ return;
+ }
+ }
+
+ //If this is reached then no phasing could be found
+ trioPhasedGenotypes.put(FamilyMember.MOTHER, new Genotype(DUMMY_NAME,getAlleles(mother),Genotype.NO_NEG_LOG_10PERROR,null,null,false));
+ trioPhasedGenotypes.put(FamilyMember.FATHER, new Genotype(DUMMY_NAME,getAlleles(father),Genotype.NO_NEG_LOG_10PERROR,null,null,false));
+ trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME,getAlleles(child),Genotype.NO_NEG_LOG_10PERROR,null,null,false));
}
- return new ArrayList();
+ /* Constructor: Creates a conceptual trio genotype combination from the given genotypes.
+ If one or more genotypes are set as NO_CALL or UNAVAILABLE, it will phase them like a pair
+ or single individual.
+ */
+ public TrioPhase(Genotype.Type mother, Genotype.Type father, Genotype.Type child){
+
+ //Take care of cases where one or more family members are no call
+ if(!isPhasable(child)){
+ phaseSingleIndividualAlleles(mother, FamilyMember.MOTHER);
+ phaseSingleIndividualAlleles(father, FamilyMember.FATHER);
+ phaseSingleIndividualAlleles(child, FamilyMember.CHILD);
+ }
+ else if(!isPhasable(mother)){
+ phaseSingleIndividualAlleles(mother, FamilyMember.MOTHER);
+ if(!isPhasable(father)){
+ phaseSingleIndividualAlleles(father, FamilyMember.FATHER);
+ phaseSingleIndividualAlleles(child, FamilyMember.CHILD);
+ }
+ else
+ phasePairAlleles(father, child, FamilyMember.FATHER);
+ }
+ else if(!isPhasable(father)){
+ phasePairAlleles(mother, child, FamilyMember.MOTHER);
+ phaseSingleIndividualAlleles(father, FamilyMember.FATHER);
+ }
+ //Special case for Het/Het/Het as it is ambiguous
+ else if(mother == Genotype.Type.HET && father == Genotype.Type.HET && child == Genotype.Type.HET){
+ phaseSingleIndividualAlleles(mother, FamilyMember.MOTHER);
+ phaseSingleIndividualAlleles(father, FamilyMember.FATHER);
+ phaseSingleIndividualAlleles(child, FamilyMember.CHILD);
+ }
+ //All family members have genotypes and at least one of them is not Het
+ else{
+ phaseFamilyAlleles(mother, father, child);
+ }
+ }
+
+ /**
+ * Applies the trio genotype combination to the given trio.
+ * @param ref: Reference allele
+ * @param alt: Alternate allele
+ * @param motherGenotype: Genotype of the mother to phase using this trio genotype combination
+ * @param fatherGenotype: Genotype of the father to phase using this trio genotype combination
+ * @param childGenotype: Genotype of the child to phase using this trio genotype combination
+ * @param transmissionProb: Probability for this trio genotype combination to be correct (pass NO_TRANSMISSION_PROB if unavailable)
+ * @param phasedGenotypes: An ArrayList to which the newly phased genotypes are added in the following order: Mother, Father, Child
+ */
+ public void getPhasedGenotypes(Allele ref, Allele alt, Genotype motherGenotype, Genotype fatherGenotype, Genotype childGenotype, double transmissionProb,ArrayList phasedGenotypes){
+ phasedGenotypes.add(getPhasedGenotype(ref,alt,motherGenotype,transmissionProb,this.trioPhasedGenotypes.get(FamilyMember.MOTHER)));
+ phasedGenotypes.add(getPhasedGenotype(ref,alt,fatherGenotype,transmissionProb,this.trioPhasedGenotypes.get(FamilyMember.FATHER)));
+ phasedGenotypes.add(getPhasedGenotype(ref,alt,childGenotype,transmissionProb,this.trioPhasedGenotypes.get(FamilyMember.CHILD)));
+ }
+
+ private Genotype getPhasedGenotype(Allele refAllele, Allele altAllele, Genotype genotype, double transmissionProb, Genotype phasedGenotype){
+
+ int phredScoreTransmission = -1;
+ if(transmissionProb != NO_TRANSMISSION_PROB)
+ phredScoreTransmission = MathUtils.probabilityToPhredScale(1-(transmissionProb));
+
+ //Handle null, missing and unavailable genotypes
+ //Note that only cases where a null/missing/unavailable genotype was passed in the first place can lead to a null/missing/unavailable
+ //genotype so it is safe to return the original genotype in this case.
+ //In addition, if the phasing confidence is 0, then return the unphased, original genotypes.
+ if(phredScoreTransmission ==0 || genotype == null || !isPhasable(genotype.getType()))
+ return genotype;
+
+ //Add the transmission probability
+ Map genotypeAttributes = new HashMap();
+ genotypeAttributes.putAll(genotype.getAttributes());
+ if(transmissionProb>NO_TRANSMISSION_PROB)
+ genotypeAttributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, phredScoreTransmission);
+
+ ArrayList phasedAlleles = new ArrayList(2);
+ for(Allele allele : phasedGenotype.getAlleles()){
+ if(allele.isReference())
+ phasedAlleles.add(refAllele);
+ else if(allele.isNonReference())
+ phasedAlleles.add(altAllele);
+ //At this point there should not be any other alleles left
+ else
+ throw new UserException(String.format("BUG: Unexpected allele: %s. Please report.",allele.toString()));
+
+ }
+
+ //Compute the new Log10Error if the genotype is different from the original genotype
+ double negLog10Error;
+ if(genotype.getType() == phasedGenotype.getType())
+ negLog10Error = genotype.getNegLog10PError();
+ else
+ negLog10Error = genotype.getLikelihoods().getNegLog10GQ(phasedGenotype.getType());
+
+ return new Genotype(genotype.getSampleName(), phasedAlleles, negLog10Error, null, genotypeAttributes, phasedGenotype.isPhased());
+ }
+
+
}
/**
- * Parse the familial relationship specification, and initialize VCF writer
+ * Parse the familial relationship specification, build the transmission matrices and initialize VCF writer
*/
public void initialize() {
- trios = getFamilySpecsFromCommandLineInput(familySpecs);
-
ArrayList rodNames = new ArrayList();
rodNames.add(variantCollection.variants.getName());
-
Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
Set vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
+ //Get the trios from the families passed as ped
+ setTrios();
+ if(trios.size()<1)
+ throw new UserException.BadInput("No PED file passed or no trios found in PED file. Aborted.");
+
+
Set headerLines = new HashSet();
headerLines.addAll(VCFUtils.getHeaderFields(this.getToolkit()));
- headerLines.add(new VCFFormatHeaderLine(TRANSMISSION_PROBABILITY_TAG_NAME, 1, VCFHeaderLineType.Float, "Probability that the phase is correct given that the genotypes are correct"));
+ headerLines.add(new VCFFormatHeaderLine(TRANSMISSION_PROBABILITY_TAG_NAME, 1, VCFHeaderLineType.Integer, "Phred score of the genotype combination and phase given that the genotypes are correct"));
headerLines.add(new VCFHeaderLine("source", SOURCE_NAME));
vcfWriter.writeHeader(new VCFHeader(headerLines, vcfSamples));
+
+ buildMatrices();
+
+ if(mvFile != null)
+ mvFile.println("#CHROM\tPOS\tFILTER\tAC\tFAMILY\tTP\tMOTHER_GT\tMOTHER_DP\tMOTHER_RAD\tMOTHER_AAD\tMOTHER_HRPL\tMOTHER_HETPL\tMOTHER_HAPL\tFATHER_GT\tFATHER_DP\tFATHER_RAD\tFATHER_AAD\tFATHER_HRPL\tFATHER_HETPL\tFATHER_HAPL\tCHILD_GT\tCHILD_DP\tCHILD_RAD\tCHILD_AAD\tCHILD_HRPL\tCHILD_HETPL\tCHILD_HAPL");
+
}
- private double computeTransmissionLikelihoodOfGenotypeConfiguration(Genotype mom, Genotype dad, Genotype child) {
- double[] momLikelihoods = MathUtils.normalizeFromLog10(mom.getLikelihoods().getAsVector());
- double[] dadLikelihoods = MathUtils.normalizeFromLog10(dad.getLikelihoods().getAsVector());
- double[] childLikelihoods = MathUtils.normalizeFromLog10(child.getLikelihoods().getAsVector());
+ /**
+ * Select trios and parent/child pairs only
+ */
+ private void setTrios(){
+
+ Map> families = this.getSampleDB().getFamilies();
+ Set family;
+ ArrayList parents;
+ for(String familyID : families.keySet()){
+ family = families.get(familyID);
+ if(family.size()<2 || family.size()>3){
+ logger.info(String.format("Caution: Family %s has %d members; At the moment Phase By Transmission only supports trios and parent/child pairs. Family skipped.",familyID,family.size()));
+ }
+ else{
+ for(Sample familyMember : family){
+ parents = familyMember.getParents();
+ if(parents.size()>0){
+ if(family.containsAll(parents))
+ this.trios.add(familyMember);
+ else
+ logger.info(String.format("Caution: Family %s skipped as it is not a trio nor a parent/child pair; At the moment Phase By Transmission only supports trios and parent/child pairs. Family skipped.",familyID));
+ break;
+ }
+ }
+ }
+
+ }
+
- int momIndex = mom.getType().ordinal() - 1;
- int dadIndex = dad.getType().ordinal() - 1;
- int childIndex = child.getType().ordinal() - 1;
- return momLikelihoods[momIndex]*dadLikelihoods[dadIndex]*childLikelihoods[childIndex];
}
- private ArrayList createAllThreeGenotypes(Allele refAllele, Allele altAllele, Genotype g) {
- List homRefAlleles = new ArrayList();
- homRefAlleles.add(refAllele);
- homRefAlleles.add(refAllele);
- Genotype homRef = new Genotype(g.getSampleName(), homRefAlleles, g.getNegLog10PError(), null, g.getAttributes(), false);
-
- List hetAlleles = new ArrayList();
- hetAlleles.add(refAllele);
- hetAlleles.add(altAllele);
- Genotype het = new Genotype(g.getSampleName(), hetAlleles, g.getNegLog10PError(), null, g.getAttributes(), false);
-
- List homVarAlleles = new ArrayList();
- homVarAlleles.add(altAllele);
- homVarAlleles.add(altAllele);
- Genotype homVar = new Genotype(g.getSampleName(), homVarAlleles, g.getNegLog10PError(), null, g.getAttributes(), false);
-
- ArrayList genotypes = new ArrayList();
- genotypes.add(homRef);
- genotypes.add(het);
- genotypes.add(homVar);
-
- return genotypes;
+ //Create the transmission matrices
+ private void buildMatrices(){
+ mvCountMatrix = new EnumMap>>(Genotype.Type.class);
+ transmissionMatrix = new EnumMap>>(Genotype.Type.class);
+ for(Genotype.Type mother : Genotype.Type.values()){
+ mvCountMatrix.put(mother,new EnumMap>(Genotype.Type.class));
+ transmissionMatrix.put(mother,new EnumMap>(Genotype.Type.class));
+ for(Genotype.Type father : Genotype.Type.values()){
+ mvCountMatrix.get(mother).put(father,new EnumMap(Genotype.Type.class));
+ transmissionMatrix.get(mother).put(father,new EnumMap(Genotype.Type.class));
+ for(Genotype.Type child : Genotype.Type.values()){
+ mvCountMatrix.get(mother).get(father).put(child, getCombinationMVCount(mother, father, child));
+ transmissionMatrix.get(mother).get(father).put(child,new TrioPhase(mother,father,child));
+ }
+ }
+ }
}
- private int getNumberOfMatchingAlleles(Allele alleleToMatch, Genotype g) {
- List alleles = g.getAlleles();
- int matchingAlleles = 0;
+ //Returns the number of Mendelian Violations for a given genotype combination.
+ //If one of the parents genotype is missing, it will consider it as a parent/child pair
+ //If the child genotype or both parents genotypes are missing, 0 is returned.
+ private int getCombinationMVCount(Genotype.Type mother, Genotype.Type father, Genotype.Type child){
- for (Allele a : alleles) {
- if (!alleleToMatch.equals(a)) {
- matchingAlleles++;
+ //Child is no call => No MV
+ if(child == Genotype.Type.NO_CALL || child == Genotype.Type.UNAVAILABLE)
+ return 0;
+ //Add parents with genotypes for the evaluation
+ ArrayList parents = new ArrayList();
+ if (!(mother == Genotype.Type.NO_CALL || mother == Genotype.Type.UNAVAILABLE))
+ parents.add(mother);
+ if (!(father == Genotype.Type.NO_CALL || father == Genotype.Type.UNAVAILABLE))
+ parents.add(father);
+
+ //Both parents no calls => No MV
+ if (parents.isEmpty())
+ return 0;
+
+ //If at least one parent had a genotype, then count the number of ref and alt alleles that can be passed
+ int parentsNumRefAlleles = 0;
+ int parentsNumAltAlleles = 0;
+
+ for(Genotype.Type parent : parents){
+ if(parent == Genotype.Type.HOM_REF){
+ parentsNumRefAlleles++;
+ }
+ else if(parent == Genotype.Type.HET){
+ parentsNumRefAlleles++;
+ parentsNumAltAlleles++;
+ }
+ else if(parent == Genotype.Type.HOM_VAR){
+ parentsNumAltAlleles++;
}
}
- return matchingAlleles;
- }
-
- private boolean isMendelianViolation(Allele refAllele, Allele altAllele, Genotype mom, Genotype dad, Genotype child) {
- int numMomRefAlleles = getNumberOfMatchingAlleles(refAllele, mom) > 0 ? 1 : 0;
- int numMomAltAlleles = getNumberOfMatchingAlleles(altAllele, mom) > 0 ? 1 : 0;
-
- int numDadRefAlleles = getNumberOfMatchingAlleles(refAllele, dad) > 0 ? 1 : 0;
- int numDadAltAlleles = getNumberOfMatchingAlleles(altAllele, dad) > 0 ? 1 : 0;
-
- int numChildRefAlleles = getNumberOfMatchingAlleles(refAllele, child);
- int numChildAltAlleles = getNumberOfMatchingAlleles(altAllele, child);
-
- return (numMomRefAlleles + numDadRefAlleles < numChildRefAlleles || numMomAltAlleles + numDadAltAlleles < numChildAltAlleles);
- }
-
- private ArrayList getPhasedGenotypes(Genotype mom, Genotype dad, Genotype child) {
- Set possiblePhasedChildGenotypes = new HashSet();
-
- for (Allele momAllele : mom.getAlleles()) {
- for (Allele dadAllele : dad.getAlleles()) {
- ArrayList possiblePhasedChildAlleles = new ArrayList();
- possiblePhasedChildAlleles.add(momAllele);
- possiblePhasedChildAlleles.add(dadAllele);
-
- Genotype possiblePhasedChildGenotype = new Genotype(child.getSampleName(), possiblePhasedChildAlleles, child.getNegLog10PError(), child.getFilters(), child.getAttributes(), true);
-
- possiblePhasedChildGenotypes.add(possiblePhasedChildGenotype);
- }
+ //Case Child is HomRef
+ if(child == Genotype.Type.HOM_REF){
+ if(parentsNumRefAlleles == parents.size())
+ return 0;
+ else return (parents.size()-parentsNumRefAlleles);
}
- ArrayList finalGenotypes = new ArrayList();
-
- for (Genotype phasedChildGenotype : possiblePhasedChildGenotypes) {
- if (child.sameGenotype(phasedChildGenotype, true)) {
- Allele momTransmittedAllele = phasedChildGenotype.getAllele(0);
- Allele momUntransmittedAllele = mom.getAllele(0) != momTransmittedAllele ? mom.getAllele(0) : mom.getAllele(1);
-
- ArrayList phasedMomAlleles = new ArrayList();
- phasedMomAlleles.add(momTransmittedAllele);
- phasedMomAlleles.add(momUntransmittedAllele);
-
- Genotype phasedMomGenotype = new Genotype(mom.getSampleName(), phasedMomAlleles, mom.getNegLog10PError(), mom.getFilters(), mom.getAttributes(), true);
-
- Allele dadTransmittedAllele = phasedChildGenotype.getAllele(1);
- Allele dadUntransmittedAllele = dad.getAllele(0) != dadTransmittedAllele ? dad.getAllele(0) : dad.getAllele(1);
-
- ArrayList phasedDadAlleles = new ArrayList();
- phasedDadAlleles.add(dadTransmittedAllele);
- phasedDadAlleles.add(dadUntransmittedAllele);
-
- Genotype phasedDadGenotype = new Genotype(dad.getSampleName(), phasedDadAlleles, dad.getNegLog10PError(), dad.getFilters(), dad.getAttributes(), true);
-
- finalGenotypes.add(phasedMomGenotype);
- finalGenotypes.add(phasedDadGenotype);
- finalGenotypes.add(phasedChildGenotype);
-
- return finalGenotypes;
- }
+ //Case child is HomVar
+ if(child == Genotype.Type.HOM_VAR){
+ if(parentsNumAltAlleles == parents.size())
+ return 0;
+ else return parents.size()-parentsNumAltAlleles;
}
- finalGenotypes.add(mom);
- finalGenotypes.add(dad);
- finalGenotypes.add(child);
+ //Case child is Het
+ if(child == Genotype.Type.HET && ((parentsNumRefAlleles > 0 && parentsNumAltAlleles > 0) || parents.size()<2))
+ return 0;
- return finalGenotypes;
+ //MV
+ return 1;
}
- private ArrayList phaseTrioGenotypes(Allele ref, Allele alt, Genotype mother, Genotype father, Genotype child) {
- ArrayList finalGenotypes = new ArrayList();
- finalGenotypes.add(mother);
- finalGenotypes.add(father);
- finalGenotypes.add(child);
+ //Given two trio genotypes combinations, returns the number of different genotypes between the two combinations.
+ private int countFamilyGenotypeDiff(Genotype.Type motherOriginal,Genotype.Type fatherOriginal,Genotype.Type childOriginal,Genotype.Type motherNew,Genotype.Type fatherNew,Genotype.Type childNew){
+ int count = 0;
+ if(motherOriginal!=motherNew)
+ count++;
+ if(fatherOriginal!=fatherNew)
+ count++;
+ if(childOriginal!=childNew)
+ count++;
+ return count;
+ }
- if (mother.isCalled() && father.isCalled() && child.isCalled()) {
- ArrayList possibleMotherGenotypes = createAllThreeGenotypes(ref, alt, mother);
- ArrayList possibleFatherGenotypes = createAllThreeGenotypes(ref, alt, father);
- ArrayList possibleChildGenotypes = createAllThreeGenotypes(ref, alt, child);
+ //Get a Map of genotype likelihoods.
+ //In case of null, unavailable or no call, all likelihoods are 1/3.
+ private EnumMap getLikelihoodsAsMapSafeNull(Genotype genotype){
+ if(genotype == null || !genotype.isCalled()){
+ EnumMap likelihoods = new EnumMap(Genotype.Type.class);
+ likelihoods.put(Genotype.Type.HOM_REF,1.0/3.0);
+ likelihoods.put(Genotype.Type.HET,1.0/3.0);
+ likelihoods.put(Genotype.Type.HOM_VAR,1.0/3.0);
+ return likelihoods;
+ }
+ return genotype.getLikelihoods().getAsMap(true);
+ }
- double bestConfigurationLikelihood = 0.0;
- double bestPrior = 0.0;
- Genotype bestMotherGenotype = mother;
- Genotype bestFatherGenotype = father;
- Genotype bestChildGenotype = child;
+ //Returns the Genotype.Type; returns UNVAILABLE if given null
+ private Genotype.Type getTypeSafeNull(Genotype genotype){
+ if(genotype == null)
+ return Genotype.Type.UNAVAILABLE;
+ return genotype.getType();
+ }
- double norm = 0.0;
- for (Genotype motherGenotype : possibleMotherGenotypes) {
- for (Genotype fatherGenotype : possibleFatherGenotypes) {
- for (Genotype childGenotype : possibleChildGenotypes) {
- double prior = isMendelianViolation(ref, alt, motherGenotype, fatherGenotype, childGenotype) ? MENDELIAN_VIOLATION_PRIOR : 1.0 - 12*MENDELIAN_VIOLATION_PRIOR;
- double configurationLikelihood = computeTransmissionLikelihoodOfGenotypeConfiguration(motherGenotype, fatherGenotype, childGenotype);
- norm += prior*configurationLikelihood;
+ /**
+ * Phases the genotypes of the given trio. If one of the parents is null, it is considered a parent/child pair.
+ * @param ref: Reference allele
+ * @param alt: Alternative allele
+ * @param mother: Mother's genotype
+ * @param father: Father's genotype
+ * @param child: Child's genotype
+ * @param finalGenotypes: An ArrayList that will be added the genotypes phased by transmission in the following order: Mother, Father, Child
+ * @return
+ */
+ private int phaseTrioGenotypes(Allele ref, Allele alt, Genotype mother, Genotype father, Genotype child,ArrayList finalGenotypes) {
- if (prior*configurationLikelihood > bestPrior*bestConfigurationLikelihood) {
- bestConfigurationLikelihood = configurationLikelihood;
- bestPrior = prior;
- bestMotherGenotype = motherGenotype;
- bestFatherGenotype = fatherGenotype;
- bestChildGenotype = childGenotype;
+ //Check whether it is a pair or trio
+ //Always assign the first parent as the parent having genotype information in pairs
+ //Always assign the mother as the first parent in trios
+ int parentsCalled = 0;
+ Map firstParentLikelihoods;
+ Map secondParentLikelihoods;
+ ArrayList bestFirstParentGenotype = new ArrayList();
+ ArrayList bestSecondParentGenotype = new ArrayList();
+ ArrayList bestChildGenotype = new ArrayList();
+ Genotype.Type pairSecondParentGenotype = null;
+ if(mother == null || !mother.isCalled()){
+ firstParentLikelihoods = getLikelihoodsAsMapSafeNull(father);
+ secondParentLikelihoods = getLikelihoodsAsMapSafeNull(mother);
+ bestFirstParentGenotype.add(getTypeSafeNull(father));
+ bestSecondParentGenotype.add(getTypeSafeNull(mother));
+ pairSecondParentGenotype = mother == null ? Genotype.Type.UNAVAILABLE : mother.getType();
+ if(father != null && father.isCalled())
+ parentsCalled = 1;
+ }
+ else{
+ firstParentLikelihoods = getLikelihoodsAsMapSafeNull(mother);
+ secondParentLikelihoods = getLikelihoodsAsMapSafeNull(father);
+ bestFirstParentGenotype.add(getTypeSafeNull(mother));
+ bestSecondParentGenotype.add(getTypeSafeNull(father));
+ if(father == null || !father.isCalled()){
+ parentsCalled = 1;
+ pairSecondParentGenotype = father == null ? Genotype.Type.UNAVAILABLE : father.getType();
+ }else{
+ parentsCalled = 2;
+ }
+ }
+ Map childLikelihoods = getLikelihoodsAsMapSafeNull(child);
+ bestChildGenotype.add(getTypeSafeNull(child));
+
+ //Prior vars
+ double bestConfigurationLikelihood = 0.0;
+ double norm = 0.0;
+ int configuration_index =0;
+ ArrayList bestMVCount = new ArrayList();
+ bestMVCount.add(0);
+
+ //Get the most likely combination
+ //Only check for most likely combination if at least a parent and the child have genotypes
+ if(child.isCalled() && parentsCalled > 0){
+ int mvCount;
+ int cumulativeMVCount = 0;
+ double configurationLikelihood = 0;
+ for(Map.Entry childGenotype : childLikelihoods.entrySet()){
+ for(Map.Entry firstParentGenotype : firstParentLikelihoods.entrySet()){
+ for(Map.Entry secondParentGenotype : secondParentLikelihoods.entrySet()){
+ mvCount = mvCountMatrix.get(firstParentGenotype.getKey()).get(secondParentGenotype.getKey()).get(childGenotype.getKey());
+ //For parent/child pairs, sum over the possible genotype configurations of the missing parent
+ if(parentsCalled<2){
+ cumulativeMVCount += mvCount;
+ configurationLikelihood += mvCount>0 ? Math.pow(deNovoPrior,mvCount)*firstParentGenotype.getValue()*secondParentGenotype.getValue()*childGenotype.getValue() : (1.0-11*deNovoPrior)*firstParentGenotype.getValue()*secondParentGenotype.getValue()*childGenotype.getValue();
}
+ //Evaluate configurations of trios
+ else{
+ configurationLikelihood = mvCount>0 ? Math.pow(deNovoPrior,mvCount)*firstParentGenotype.getValue()*secondParentGenotype.getValue()*childGenotype.getValue() : (1.0-11*deNovoPrior)*firstParentGenotype.getValue()*secondParentGenotype.getValue()*childGenotype.getValue();
+ norm += configurationLikelihood;
+ //Keep this combination if
+ //It has a better likelihood
+ //Or it has the same likelihood but requires less changes from original genotypes
+ if (configurationLikelihood > bestConfigurationLikelihood){
+ bestConfigurationLikelihood = configurationLikelihood;
+ bestMVCount.clear();
+ bestMVCount.add(mvCount);
+ bestFirstParentGenotype.clear();
+ bestFirstParentGenotype.add(firstParentGenotype.getKey());
+ bestSecondParentGenotype.clear();
+ bestSecondParentGenotype.add(secondParentGenotype.getKey());
+ bestChildGenotype.clear();
+ bestChildGenotype.add(childGenotype.getKey());
+ }
+ else if(configurationLikelihood == bestConfigurationLikelihood) {
+ bestFirstParentGenotype.add(firstParentGenotype.getKey());
+ bestSecondParentGenotype.add(secondParentGenotype.getKey());
+ bestChildGenotype.add(childGenotype.getKey());
+ bestMVCount.add(mvCount);
+ }
+ }
+ }
+ //Evaluate configurations of parent/child pairs
+ if(parentsCalled<2){
+ norm += configurationLikelihood;
+ //Keep this combination if
+ //It has a better likelihood
+ //Or it has the same likelihood but requires less changes from original genotypes
+ if (configurationLikelihood > bestConfigurationLikelihood){
+ bestConfigurationLikelihood = configurationLikelihood;
+ bestMVCount.clear();
+ bestMVCount.add(cumulativeMVCount/3);
+ bestChildGenotype.clear();
+ bestFirstParentGenotype.clear();
+ bestSecondParentGenotype.clear();
+ bestChildGenotype.add(childGenotype.getKey());
+ bestFirstParentGenotype.add(firstParentGenotype.getKey());
+ bestSecondParentGenotype.add(pairSecondParentGenotype);
+ }
+ else if(configurationLikelihood == bestConfigurationLikelihood) {
+ bestFirstParentGenotype.add(firstParentGenotype.getKey());
+ bestSecondParentGenotype.add(pairSecondParentGenotype);
+ bestChildGenotype.add(childGenotype.getKey());
+ bestMVCount.add(cumulativeMVCount/3);
+ }
+ configurationLikelihood = 0;
}
}
}
- if (!(bestMotherGenotype.isHet() && bestFatherGenotype.isHet() && bestChildGenotype.isHet())) {
- Map attributes = new HashMap();
- attributes.putAll(bestChildGenotype.getAttributes());
- attributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, bestPrior*bestConfigurationLikelihood / norm);
- bestChildGenotype = Genotype.modifyAttributes(bestChildGenotype, attributes);
+ //normalize the best configuration probability
+ bestConfigurationLikelihood = bestConfigurationLikelihood / norm;
- finalGenotypes = getPhasedGenotypes(bestMotherGenotype, bestFatherGenotype, bestChildGenotype);
+ //In case of multiple equally likely combinations, take a random one
+ if(bestFirstParentGenotype.size()>1){
+ configuration_index = rand.nextInt(bestFirstParentGenotype.size()-1);
}
+
+ }
+ else{
+ bestConfigurationLikelihood = NO_TRANSMISSION_PROB;
}
- return finalGenotypes;
+ TrioPhase phasedTrioGenotypes;
+ if(parentsCalled < 2 && mother == null || !mother.isCalled())
+ phasedTrioGenotypes = transmissionMatrix.get(bestSecondParentGenotype.get(configuration_index)).get(bestFirstParentGenotype.get(configuration_index)).get(bestChildGenotype.get(configuration_index));
+ else
+ phasedTrioGenotypes = transmissionMatrix.get(bestFirstParentGenotype.get(configuration_index)).get(bestSecondParentGenotype.get(configuration_index)).get(bestChildGenotype.get(configuration_index));
+
+ //Return the phased genotypes
+ phasedTrioGenotypes.getPhasedGenotypes(ref,alt,mother,father,child,bestConfigurationLikelihood,finalGenotypes);
+ return bestMVCount.get(configuration_index);
+
+ }
+
+
+ private void updatePairMetricsCounters(Genotype parent, Genotype child, int mvCount, HashMap counters){
+
+ //Increment metrics counters
+ if(parent.isCalled() && child.isCalled()){
+ counters.put(NUM_PAIR_GENOTYPES_CALLED,counters.get(NUM_PAIR_GENOTYPES_CALLED)+1);
+ if(parent.isPhased())
+ counters.put(NUM_PAIR_GENOTYPES_PHASED,counters.get(NUM_PAIR_GENOTYPES_PHASED)+1);
+ else{
+ counters.put(NUM_PAIR_VIOLATIONS,counters.get(NUM_PAIR_VIOLATIONS)+mvCount);
+ if(parent.isHet() && child.isHet())
+ counters.put(NUM_PAIR_HET_HET,counters.get(NUM_PAIR_HET_HET)+1);
+ }
+ }else{
+ counters.put(NUM_PAIR_GENOTYPES_NOCALL,counters.get(NUM_PAIR_GENOTYPES_NOCALL)+1);
+ }
+
+ }
+
+ private void updateTrioMetricsCounters(Genotype mother, Genotype father, Genotype child, int mvCount, HashMap counters){
+
+ //Increment metrics counters
+ if(mother.isCalled() && father.isCalled() && child.isCalled()){
+ counters.put(NUM_TRIO_GENOTYPES_CALLED,counters.get(NUM_TRIO_GENOTYPES_CALLED)+1);
+ if(mother.isPhased())
+ counters.put(NUM_TRIO_GENOTYPES_PHASED,counters.get(NUM_TRIO_GENOTYPES_PHASED)+1);
+
+ else{
+ if(mvCount > 0){
+ if(mvCount >1)
+ counters.put(NUM_TRIO_DOUBLE_VIOLATIONS,counters.get(NUM_TRIO_DOUBLE_VIOLATIONS)+1);
+ else
+ counters.put(NUM_TRIO_VIOLATIONS,counters.get(NUM_TRIO_VIOLATIONS)+1);
+ }
+ else if(mother.isHet() && father.isHet() && child.isHet())
+ counters.put(NUM_TRIO_HET_HET_HET,counters.get(NUM_TRIO_HET_HET_HET)+1);
+
+ }
+ }else{
+ counters.put(NUM_TRIO_GENOTYPES_NOCALL,counters.get(NUM_TRIO_GENOTYPES_NOCALL)+1);
+ }
}
/**
@@ -292,55 +729,156 @@ public class PhaseByTransmission extends RodWalker {
* @return null
*/
@Override
- public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+ public HashMap map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+
+ HashMap metricsCounters = new HashMap(10);
+ metricsCounters.put(NUM_TRIO_GENOTYPES_CALLED,0);
+ metricsCounters.put(NUM_TRIO_GENOTYPES_NOCALL,0);
+ metricsCounters.put(NUM_TRIO_GENOTYPES_PHASED,0);
+ metricsCounters.put(NUM_TRIO_HET_HET_HET,0);
+ metricsCounters.put(NUM_TRIO_VIOLATIONS,0);
+ metricsCounters.put(NUM_PAIR_GENOTYPES_CALLED,0);
+ metricsCounters.put(NUM_PAIR_GENOTYPES_NOCALL,0);
+ metricsCounters.put(NUM_PAIR_GENOTYPES_PHASED,0);
+ metricsCounters.put(NUM_PAIR_HET_HET,0);
+ metricsCounters.put(NUM_PAIR_VIOLATIONS,0);
+ metricsCounters.put(NUM_TRIO_DOUBLE_VIOLATIONS,0);
+ metricsCounters.put(NUM_GENOTYPES_MODIFIED,0);
+
+ String mvfLine;
+
if (tracker != null) {
VariantContext vc = tracker.getFirstValue(variantCollection.variants, context.getLocation());
Map genotypeMap = vc.getGenotypes();
- for (Trio trio : trios) {
- Genotype mother = vc.getGenotype(trio.getMother());
- Genotype father = vc.getGenotype(trio.getFather());
- Genotype child = vc.getGenotype(trio.getChild());
+ int mvCount;
- ArrayList trioGenotypes = phaseTrioGenotypes(vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), mother, father, child);
+ for (Sample sample : trios) {
+ Genotype mother = vc.getGenotype(sample.getMaternalID());
+ Genotype father = vc.getGenotype(sample.getPaternalID());
+ Genotype child = vc.getGenotype(sample.getID());
+
+ //Keep only trios and parent/child pairs
+ if(mother == null && father == null || child == null)
+ continue;
+
+ ArrayList trioGenotypes = new ArrayList(3);
+ mvCount = phaseTrioGenotypes(vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), mother, father, child,trioGenotypes);
Genotype phasedMother = trioGenotypes.get(0);
Genotype phasedFather = trioGenotypes.get(1);
Genotype phasedChild = trioGenotypes.get(2);
- genotypeMap.put(phasedMother.getSampleName(), phasedMother);
- genotypeMap.put(phasedFather.getSampleName(), phasedFather);
- genotypeMap.put(phasedChild.getSampleName(), phasedChild);
+ //Fill the genotype map with the new genotypes and increment metrics counters
+ genotypeMap.put(phasedChild.getSampleName(),phasedChild);
+ if(mother != null){
+ genotypeMap.put(phasedMother.getSampleName(), phasedMother);
+ if(father != null){
+ genotypeMap.put(phasedFather.getSampleName(), phasedFather);
+ updateTrioMetricsCounters(phasedMother,phasedFather,phasedChild,mvCount,metricsCounters);
+ mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t%s:%s:%s:%s\t%s:%s:%s:%s",vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),phasedMother.getAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getAttribute(VCFConstants.DEPTH_KEY),phasedMother.getAttribute("AD"),phasedMother.getLikelihoods().toString(),phasedFather.getGenotypeString(),phasedFather.getAttribute(VCFConstants.DEPTH_KEY),phasedFather.getAttribute("AD"),phasedFather.getLikelihoods().toString(),phasedChild.getGenotypeString(),phasedChild.getAttribute(VCFConstants.DEPTH_KEY),phasedChild.getAttribute("AD"),phasedChild.getLikelihoods().toString());
+ if(!(phasedMother.getType()==mother.getType() && phasedFather.getType()==father.getType() && phasedChild.getType()==child.getType()))
+ metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
+ }
+ else{
+ updatePairMetricsCounters(phasedMother,phasedChild,mvCount,metricsCounters);
+ if(!(phasedMother.getType()==mother.getType() && phasedChild.getType()==child.getType()))
+ metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
+ mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t.:.:.:.\t%s:%s:%s:%s",vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),phasedMother.getAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getAttribute(VCFConstants.DEPTH_KEY),phasedMother.getAttribute("AD"),phasedMother.getLikelihoods().toString(),phasedChild.getGenotypeString(),phasedChild.getAttribute(VCFConstants.DEPTH_KEY),phasedChild.getAttribute("AD"),phasedChild.getLikelihoods().toString());
+ }
+ }
+ else{
+ genotypeMap.put(phasedFather.getSampleName(),phasedFather);
+ updatePairMetricsCounters(phasedFather,phasedChild,mvCount,metricsCounters);
+ if(!(phasedFather.getType()==father.getType() && phasedChild.getType()==child.getType()))
+ metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
+ mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t.:.:.:.\t%s:%s:%s:%s\t%s:%s:%s:%s",vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),phasedFather.getAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedFather.getGenotypeString(),phasedFather.getAttribute(VCFConstants.DEPTH_KEY),phasedFather.getAttribute("AD"),phasedFather.getLikelihoods().toString(),phasedChild.getGenotypeString(),phasedChild.getAttribute(VCFConstants.DEPTH_KEY),phasedChild.getAttribute("AD"),phasedChild.getLikelihoods().toString());
+ }
+
+ //Report violation if set so
+ //TODO: ADAPT FOR PAIRS TOO!!
+ if(mvCount>0 && mvFile != null)
+ mvFile.println(mvfLine);
+
}
+
VariantContext newvc = VariantContext.modifyGenotypes(vc, genotypeMap);
vcfWriter.add(newvc);
}
-
- return null;
+ return metricsCounters;
}
/**
- * Provide an initial value for reduce computations.
+ * Initializes the reporting counters.
*
- * @return Initial value of reduce.
+ * @return All counters initialized to 0
*/
@Override
- public Integer reduceInit() {
- return null;
+ public HashMap reduceInit() {
+ HashMap metricsCounters = new HashMap(10);
+ metricsCounters.put(NUM_TRIO_GENOTYPES_CALLED,0);
+ metricsCounters.put(NUM_TRIO_GENOTYPES_NOCALL,0);
+ metricsCounters.put(NUM_TRIO_GENOTYPES_PHASED,0);
+ metricsCounters.put(NUM_TRIO_HET_HET_HET,0);
+ metricsCounters.put(NUM_TRIO_VIOLATIONS,0);
+ metricsCounters.put(NUM_PAIR_GENOTYPES_CALLED,0);
+ metricsCounters.put(NUM_PAIR_GENOTYPES_NOCALL,0);
+ metricsCounters.put(NUM_PAIR_GENOTYPES_PHASED,0);
+ metricsCounters.put(NUM_PAIR_HET_HET,0);
+ metricsCounters.put(NUM_PAIR_VIOLATIONS,0);
+ metricsCounters.put(NUM_TRIO_DOUBLE_VIOLATIONS,0);
+ metricsCounters.put(NUM_GENOTYPES_MODIFIED,0);
+
+ return metricsCounters;
}
/**
- * Reduces a single map with the accumulator provided as the ReduceType.
+ * Adds the value of the site phased to the reporting counters.
*
- * @param value result of the map.
- * @param sum accumulator for the reduce.
+ * @param value Site values
+ * @param sum accumulator for the reporting counters
* @return accumulator with result of the map taken into account.
*/
@Override
- public Integer reduce(Integer value, Integer sum) {
- return null;
+ public HashMap reduce(HashMap value, HashMap sum) {
+ sum.put(NUM_TRIO_GENOTYPES_CALLED,value.get(NUM_TRIO_GENOTYPES_CALLED)+sum.get(NUM_TRIO_GENOTYPES_CALLED));
+ sum.put(NUM_TRIO_GENOTYPES_NOCALL,value.get(NUM_TRIO_GENOTYPES_NOCALL)+sum.get(NUM_TRIO_GENOTYPES_NOCALL));
+ sum.put(NUM_TRIO_GENOTYPES_PHASED,value.get(NUM_TRIO_GENOTYPES_PHASED)+sum.get(NUM_TRIO_GENOTYPES_PHASED));
+ sum.put(NUM_TRIO_HET_HET_HET,value.get(NUM_TRIO_HET_HET_HET)+sum.get(NUM_TRIO_HET_HET_HET));
+ sum.put(NUM_TRIO_VIOLATIONS,value.get(NUM_TRIO_VIOLATIONS)+sum.get(NUM_TRIO_VIOLATIONS));
+ sum.put(NUM_PAIR_GENOTYPES_CALLED,value.get(NUM_PAIR_GENOTYPES_CALLED)+sum.get(NUM_PAIR_GENOTYPES_CALLED));
+ sum.put(NUM_PAIR_GENOTYPES_NOCALL,value.get(NUM_PAIR_GENOTYPES_NOCALL)+sum.get(NUM_PAIR_GENOTYPES_NOCALL));
+ sum.put(NUM_PAIR_GENOTYPES_PHASED,value.get(NUM_PAIR_GENOTYPES_PHASED)+sum.get(NUM_PAIR_GENOTYPES_PHASED));
+ sum.put(NUM_PAIR_HET_HET,value.get(NUM_PAIR_HET_HET)+sum.get(NUM_PAIR_HET_HET));
+ sum.put(NUM_PAIR_VIOLATIONS,value.get(NUM_PAIR_VIOLATIONS)+sum.get(NUM_PAIR_VIOLATIONS));
+ sum.put(NUM_TRIO_DOUBLE_VIOLATIONS,value.get(NUM_TRIO_DOUBLE_VIOLATIONS)+sum.get(NUM_TRIO_DOUBLE_VIOLATIONS));
+ sum.put(NUM_GENOTYPES_MODIFIED,value.get(NUM_GENOTYPES_MODIFIED)+sum.get(NUM_GENOTYPES_MODIFIED));
+
+ return sum;
+ }
+
+
+ /**
+ * Reports statistics on the phasing by transmission process.
+ * @param result Accumulator with all counters.
+ */
+ @Override
+ public void onTraversalDone(HashMap result) {
+ logger.info("Number of complete trio-genotypes: " + result.get(NUM_TRIO_GENOTYPES_CALLED));
+ logger.info("Number of trio-genotypes containing no call(s): " + result.get(NUM_TRIO_GENOTYPES_NOCALL));
+ logger.info("Number of trio-genotypes phased: " + result.get(NUM_TRIO_GENOTYPES_PHASED));
+ logger.info("Number of resulting Het/Het/Het trios: " + result.get(NUM_TRIO_HET_HET_HET));
+ logger.info("Number of remaining single mendelian violations in trios: " + result.get(NUM_TRIO_VIOLATIONS));
+ logger.info("Number of remaining double mendelian violations in trios: " + result.get(NUM_TRIO_DOUBLE_VIOLATIONS));
+ logger.info("Number of complete pair-genotypes: " + result.get(NUM_PAIR_GENOTYPES_CALLED));
+ logger.info("Number of pair-genotypes containing no call(s): " + result.get(NUM_PAIR_GENOTYPES_NOCALL));
+ logger.info("Number of pair-genotypes phased: " + result.get(NUM_PAIR_GENOTYPES_PHASED));
+ logger.info("Number of resulting Het/Het pairs: " + result.get(NUM_PAIR_HET_HET));
+ logger.info("Number of remaining mendelian violations in pairs: " + result.get(NUM_PAIR_VIOLATIONS));
+ logger.info("Number of genotypes updated: " + result.get(NUM_GENOTYPES_MODIFIED));
+
}
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java
deleted file mode 100644
index e770418c1..000000000
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (c) 2010.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.walkers.qc;
-
-import net.sf.samtools.SAMRecord;
-import org.broadinstitute.sting.commandline.Output;
-import org.broadinstitute.sting.gatk.walkers.ReadPairWalker;
-import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
-
-import java.io.PrintStream;
-import java.util.Collection;
-import java.util.List;
-
-/**
- * Counts the number of read pairs encountered in a file sorted in
- * query name order. Breaks counts down by total pairs and number
- * of paired reads.
- *
- *
- * Input
- *
- * One or more bam files.
- *
- *
- * Output
- *
- * Number of pairs seen.
- *
- *
- * Examples
- *
- * java -Xmx2g -jar GenomeAnalysisTK.jar \
- * -R ref.fasta \
- * -T CountPairs \
- * -o output.txt \
- * -I input.bam
- *
- *
- * @author mhanna
- */
-public class CountPairsWalker extends ReadPairWalker {
- @Output
- private PrintStream out;
-
- /**
- * How many reads are the first in a pair, based on flag 0x0040 from the SAM spec.
- */
- private long firstOfPair = 0;
-
- /**
- * How many reads are the second in a pair, based on flag 0x0080 from the SAM spec.
- */
- private long secondOfPair = 0;
-
- /**
- * A breakdown of the total number of reads seen with exactly the same read name.
- */
- private List pairCountsByType = new ExpandingArrayList();
-
- /**
- * Maps a read pair to a given reduce of type MapType. Semantics determined by subclasser.
- * @param reads Collection of reads having the same name.
- * @return Semantics defined by implementer.
- */
- @Override
- public Integer map(Collection reads) {
- if(pairCountsByType.get(reads.size()) != null)
- pairCountsByType.set(reads.size(),pairCountsByType.get(reads.size())+1);
- else
- pairCountsByType.set(reads.size(),1L);
-
- for(SAMRecord read: reads) {
- if(read.getFirstOfPairFlag()) firstOfPair++;
- if(read.getSecondOfPairFlag()) secondOfPair++;
- }
-
- return 1;
- }
-
- /**
- * No pairs at the beginning of a traversal.
- * @return 0 always.
- */
- @Override
- public Long reduceInit() {
- return 0L;
- }
-
- /**
- * Combine number of pairs seen in this iteration (always 1) with total number of pairs
- * seen in previous iterations.
- * @param value Pairs in this iteration (1), from the map function.
- * @param sum Count of all pairs in prior iterations.
- * @return All pairs encountered in previous iterations + all pairs encountered in this iteration (sum + 1).
- */
- @Override
- public Long reduce(Integer value, Long sum) {
- return value + sum;
- }
-
- /**
- * Print summary statistics over the entire traversal.
- * @param sum A count of all read pairs viewed.
- */
- @Override
- public void onTraversalDone(Long sum) {
- out.printf("Total number of pairs : %d%n",sum);
- out.printf("Total number of first reads in pair : %d%n",firstOfPair);
- out.printf("Total number of second reads in pair: %d%n",secondOfPair);
- for(int i = 1; i < pairCountsByType.size(); i++) {
- if(pairCountsByType.get(i) == null)
- continue;
- out.printf("Pairs of size %d: %d%n",i,pairCountsByType.get(i));
- }
- }
-
-}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
index 609593acc..b92b1cee0 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
@@ -24,10 +24,8 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
-import org.apache.poi.hpsf.Variant;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
-import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;
@@ -275,8 +273,8 @@ public class SelectVariants extends RodWalker {
private double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 0;
/**
- * Variants are kept in memory to guarantee that exactly n variants will be chosen randomly, so use it only for a reasonable
- * number of variants. Use --select_random_fraction for larger numbers of variants.
+ * Variants are kept in memory to guarantee that exactly n variants will be chosen randomly, so make sure you supply the program with enough memory
+ * given your input set. This option will NOT work well for large callsets; use --select_random_fraction for sets with a large numbers of variants.
*/
@Argument(fullName="select_random_number", shortName="number", doc="Selects a number of variants at random from the variant track", required=false)
private int numRandom = 0;
@@ -532,7 +530,7 @@ public class SelectVariants extends RodWalker {
}
}
if (SELECT_RANDOM_NUMBER) {
- randomlyAddVariant(++variantNumber, sub, ref.getBase());
+ randomlyAddVariant(++variantNumber, sub);
}
else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
vcfWriter.add(sub);
@@ -705,7 +703,7 @@ public class SelectVariants extends RodWalker {
return sub;
}
- private void randomlyAddVariant(int rank, VariantContext vc, byte refBase) {
+ private void randomlyAddVariant(int rank, VariantContext vc) {
if (nVariantsAdded < numRandom)
variantArray[nVariantsAdded++] = new RandomVariantStructure(vc);
diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java
index e10bcbaa0..8cba183da 100644
--- a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java
+++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java
@@ -554,4 +554,54 @@ public class GenomeLocParser {
return createGenomeLoc(contigName,contig.getSequenceIndex(),1,contig.getSequenceLength(), true);
}
+ /**
+ * Creates a loc to the left (starting at the loc start + 1) of maxBasePairs size.
+ * @param loc The original loc
+ * @param maxBasePairs The maximum number of basePairs
+ * @return The contiguous loc of up to maxBasePairs length or null if the loc is already at the start of the contig.
+ */
+ @Requires({"loc != null", "maxBasePairs > 0"})
+ public GenomeLoc createGenomeLocAtStart(GenomeLoc loc, int maxBasePairs) {
+ if (GenomeLoc.isUnmapped(loc))
+ return null;
+ String contigName = loc.getContig();
+ SAMSequenceRecord contig = contigInfo.getSequence(contigName);
+ int contigIndex = contig.getSequenceIndex();
+
+ int start = loc.getStart() - maxBasePairs;
+ int stop = loc.getStart() - 1;
+
+ if (start < 1)
+ start = 1;
+ if (stop < 1)
+ return null;
+
+ return createGenomeLoc(contigName, contigIndex, start, stop, true);
+ }
+
+ /**
+ * Creates a loc to the right (starting at the loc stop + 1) of maxBasePairs size.
+ * @param loc The original loc
+ * @param maxBasePairs The maximum number of basePairs
+ * @return The contiguous loc of up to maxBasePairs length or null if the loc is already at the end of the contig.
+ */
+ @Requires({"loc != null", "maxBasePairs > 0"})
+ public GenomeLoc createGenomeLocAtStop(GenomeLoc loc, int maxBasePairs) {
+ if (GenomeLoc.isUnmapped(loc))
+ return null;
+ String contigName = loc.getContig();
+ SAMSequenceRecord contig = contigInfo.getSequence(contigName);
+ int contigIndex = contig.getSequenceIndex();
+ int contigLength = contig.getSequenceLength();
+
+ int start = loc.getStop() + 1;
+ int stop = loc.getStop() + maxBasePairs;
+
+ if (start > contigLength)
+ return null;
+ if (stop > contigLength)
+ stop = contigLength;
+
+ return createGenomeLoc(contigName, contigIndex, start, stop, true);
+ }
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java
index f0e164c87..159b145a0 100644
--- a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java
@@ -233,8 +233,12 @@ public class IntervalUtils {
*
* Returns a null string if there are no differences, otherwise returns a string describing the difference
* (useful for UnitTests). Assumes both lists are sorted
+ *
+ * @param masterArg sorted master genome locs
+ * @param testArg sorted test genome locs
+ * @return null string if there are no difference, otherwise a string describing the difference
*/
- public static final String equateIntervals(List masterArg, List testArg) {
+ public static String equateIntervals(List masterArg, List testArg) {
LinkedList master = new LinkedList(masterArg);
LinkedList test = new LinkedList(testArg);
@@ -317,23 +321,6 @@ public class IntervalUtils {
return lengths;
}
- /**
- * Counts the number of interval files an interval list can be split into using scatterIntervalArguments.
- * @param locs The genome locs.
- * @return The maximum number of parts the intervals can be split into.
- */
- public static int countContigIntervals(List locs) {
- int maxFiles = 0;
- String contig = null;
- for (GenomeLoc loc: locs) {
- if (contig == null || !contig.equals(loc.getContig())) {
- maxFiles++;
- contig = loc.getContig();
- }
- }
- return maxFiles;
- }
-
/**
* Splits an interval list into multiple files.
* @param fileHeader The sam file header.
@@ -373,7 +360,6 @@ public class IntervalUtils {
* @return A list of lists of genome locs, split according to splits
*/
public static List> splitIntervalsToSubLists(List locs, List splits) {
- int locIndex = 1;
int start = 0;
List> sublists = new ArrayList>(splits.size());
for (Integer stop: splits) {
@@ -465,7 +451,7 @@ public class IntervalUtils {
@Requires({"remaining != null", "!remaining.isEmpty()", "idealSplitSize > 0"})
@Ensures({"result != null"})
- final static SplitLocusRecursive splitLocusIntervals1(LinkedList remaining, long idealSplitSize) {
+ static SplitLocusRecursive splitLocusIntervals1(LinkedList remaining, long idealSplitSize) {
final List split = new ArrayList();
long size = 0;
@@ -579,10 +565,101 @@ public class IntervalUtils {
}
}
- public static final long intervalSize(final List locs) {
+ public static long intervalSize(final List locs) {
long size = 0;
for ( final GenomeLoc loc : locs )
size += loc.size();
return size;
}
+
+ public static void writeFlankingIntervals(File reference, File inputIntervals, File flankingIntervals, int basePairs) {
+ ReferenceDataSource referenceDataSource = new ReferenceDataSource(reference);
+ GenomeLocParser parser = new GenomeLocParser(referenceDataSource.getReference());
+ List originalList = intervalFileToList(parser, inputIntervals.getAbsolutePath());
+
+ if (originalList.isEmpty())
+ throw new UserException.MalformedFile(inputIntervals, "File contains no intervals");
+
+ List flankingList = getFlankingIntervals(parser, originalList, basePairs);
+
+ if (flankingList.isEmpty())
+ throw new UserException.MalformedFile(inputIntervals, "Unable to produce any flanks for the intervals");
+
+ SAMFileHeader samFileHeader = new SAMFileHeader();
+ samFileHeader.setSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary());
+ IntervalList intervalList = new IntervalList(samFileHeader);
+ int i = 0;
+ for (GenomeLoc loc: flankingList)
+ intervalList.add(toInterval(loc, ++i));
+ intervalList.write(flankingIntervals);
+ }
+
+ /**
+ * Returns a list of intervals between the passed int locs. Does not extend UNMAPPED locs.
+ * @param parser A genome loc parser for creating the new intervals
+ * @param locs Original genome locs
+ * @param basePairs Number of base pairs on each side of loc
+ * @return The list of intervals between the locs
+ */
+ public static List getFlankingIntervals(final GenomeLocParser parser, final List locs, final int basePairs) {
+ List sorted = sortAndMergeIntervals(parser, locs, IntervalMergingRule.ALL).toList();
+
+ if (sorted.size() == 0)
+ return Collections.emptyList();
+
+ LinkedHashMap> locsByContig = splitByContig(sorted);
+ List expanded = new ArrayList();
+ for (String contig: locsByContig.keySet()) {
+ List contigLocs = locsByContig.get(contig);
+ int contigLocsSize = contigLocs.size();
+
+ GenomeLoc startLoc, stopLoc;
+
+ // Create loc at start of the list
+ startLoc = parser.createGenomeLocAtStart(contigLocs.get(0), basePairs);
+ if (startLoc != null)
+ expanded.add(startLoc);
+
+ // Create locs between each loc[i] and loc[i+1]
+ for (int i = 0; i < contigLocsSize - 1; i++) {
+ stopLoc = parser.createGenomeLocAtStop(contigLocs.get(i), basePairs);
+ startLoc = parser.createGenomeLocAtStart(contigLocs.get(i + 1), basePairs);
+ if (stopLoc.getStop() + 1 >= startLoc.getStart()) {
+ // NOTE: This is different than GenomeLoc.merge()
+ // merge() returns a loc which covers the entire range of stop and start,
+ // possibly returning positions inside loc(i) or loc(i+1)
+ // We want to make sure that the start of the stopLoc is used, and the stop of the startLoc
+ GenomeLoc merged = parser.createGenomeLoc(
+ stopLoc.getContig(), stopLoc.getStart(), startLoc.getStop());
+ expanded.add(merged);
+ } else {
+ expanded.add(stopLoc);
+ expanded.add(startLoc);
+ }
+ }
+
+ // Create loc at the end of the list
+ stopLoc = parser.createGenomeLocAtStop(contigLocs.get(contigLocsSize - 1), basePairs);
+ if (stopLoc != null)
+ expanded.add(stopLoc);
+ }
+ return expanded;
+ }
+
+ private static LinkedHashMap> splitByContig(List sorted) {
+ LinkedHashMap> splits = new LinkedHashMap>();
+ GenomeLoc last = null;
+ List contigLocs = null;
+ for (GenomeLoc loc: sorted) {
+ if (GenomeLoc.isUnmapped(loc))
+ continue;
+ if (last == null || !last.onSameContig(loc)) {
+ contigLocs = new ArrayList();
+ splits.put(loc.getContig(), contigLocs);
+ }
+ contigLocs.add(loc);
+ last = loc;
+ }
+ return splits;
+ }
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
index 3fe1060dd..6d7c8dad9 100755
--- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
@@ -188,7 +188,9 @@ public class GATKSAMRecord extends BAMRecord {
}
public final byte getReducedCount(final int i) {
- return getReducedReadCounts()[i];
+ byte firstCount = getReducedReadCounts()[0];
+ byte offsetCount = getReducedReadCounts()[i];
+ return (i==0) ? firstCount : (byte) Math.min(firstCount + offsetCount, Byte.MAX_VALUE);
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java
index dba16cf86..8c8e4f257 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java
@@ -25,7 +25,13 @@
package org.broadinstitute.sting.utils.variantcontext;
import org.broad.tribble.TribbleException;
+import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
+import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
+import org.jgrapht.util.MathUtil;
+
+import java.util.EnumMap;
+import java.util.Map;
public class GenotypeLikelihoods {
public static final boolean CAP_PLS = false;
@@ -94,6 +100,48 @@ public class GenotypeLikelihoods {
return likelihoodsAsString_PLs;
}
+ //Return genotype likelihoods as an EnumMap with Genotypes as keys and likelihoods as values
+ //Returns null in case of missing likelihoods
+ public EnumMap getAsMap(boolean normalizeFromLog10){
+ //Make sure that the log10likelihoods are set
+ double[] likelihoods = normalizeFromLog10 ? MathUtils.normalizeFromLog10(getAsVector()) : getAsVector();
+ if(likelihoods == null)
+ return null;
+ EnumMap likelihoodsMap = new EnumMap(Genotype.Type.class);
+ likelihoodsMap.put(Genotype.Type.HOM_REF,likelihoods[Genotype.Type.HOM_REF.ordinal()-1]);
+ likelihoodsMap.put(Genotype.Type.HET,likelihoods[Genotype.Type.HET.ordinal()-1]);
+ likelihoodsMap.put(Genotype.Type.HOM_VAR, likelihoods[Genotype.Type.HOM_VAR.ordinal() - 1]);
+ return likelihoodsMap;
+ }
+
+ //Return the neg log10 Genotype Quality (GQ) for the given genotype
+ //Returns Double.NEGATIVE_INFINITY in case of missing genotype
+ public double getNegLog10GQ(Genotype.Type genotype){
+
+ double qual = Double.NEGATIVE_INFINITY;
+ EnumMap likelihoods = getAsMap(false);
+ if(likelihoods == null)
+ return qual;
+ for(Map.Entry likelihood : likelihoods.entrySet()){
+ if(likelihood.getKey() == genotype)
+ continue;
+ if(likelihood.getValue() > qual)
+ qual = likelihood.getValue();
+
+ }
+
+ //Quality of the most likely genotype = likelihood(most likely) - likelihood (2nd best)
+ qual = likelihoods.get(genotype) - qual;
+
+ //Quality of other genotypes 1-P(G)
+ if (qual < 0) {
+ double[] normalized = MathUtils.normalizeFromLog10(getAsVector());
+ double chosenGenotype = normalized[genotype.ordinal()-1];
+ qual = -1.0 * Math.log10(1.0 - chosenGenotype);
+ }
+ return qual;
+ }
+
private final static double[] parsePLsIntoLikelihoods(String likelihoodsAsString_PLs) {
if ( !likelihoodsAsString_PLs.equals(VCFConstants.MISSING_VALUE_v4) ) {
String[] strings = likelihoodsAsString_PLs.split(",");
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
index f52a7087b..204b4b841 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
@@ -223,7 +223,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param referenceBaseForIndel padded reference base
*/
public VariantContext(String source, String contig, long start, long stop, Collection alleles, Map genotypes, double negLog10PError, Set filters, Map attributes, Byte referenceBaseForIndel) {
- this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, referenceBaseForIndel, false);
+ this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, referenceBaseForIndel, false, true);
}
/**
@@ -240,7 +240,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param attributes attributes
*/
public VariantContext(String source, String contig, long start, long stop, Collection alleles, Map genotypes, double negLog10PError, Set filters, Map attributes) {
- this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, null, false);
+ this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, null, false, true);
}
/**
@@ -261,7 +261,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param referenceBaseForIndel padded reference base
*/
public VariantContext(String source, String contig, long start, long stop, Collection alleles, double negLog10PError, Set filters, Map attributes, Byte referenceBaseForIndel) {
- this(source, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, referenceBaseForIndel, true);
+ this(source, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, referenceBaseForIndel, true, true);
}
/**
@@ -278,7 +278,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param attributes attributes
*/
public VariantContext(String source, String contig, long start, long stop, Collection alleles, Collection genotypes, double negLog10PError, Set filters, Map attributes) {
- this(source, contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap(), genotypes) : null, negLog10PError, filters, attributes, null, false);
+ this(source, contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap(), genotypes) : null, negLog10PError, filters, attributes, null, false, true);
}
/**
@@ -291,7 +291,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param alleles alleles
*/
public VariantContext(String source, String contig, long start, long stop, Collection alleles) {
- this(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, null, false);
+ this(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, null, false, true);
}
/**
@@ -314,7 +314,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param other the VariantContext to copy
*/
public VariantContext(VariantContext other) {
- this(other.getSource(), other.getChr(), other.getStart(), other.getEnd() , other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.filtersWereApplied() ? other.getFilters() : null, other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, false);
+ this(other.getSource(), other.getChr(), other.getStart(), other.getEnd() , other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.filtersWereApplied() ? other.getFilters() : null, other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, false, true);
}
/**
@@ -331,11 +331,13 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param attributes attributes
* @param referenceBaseForIndel padded reference base
* @param genotypesAreUnparsed true if the genotypes have not yet been parsed
+ * @param performValidation if true, call validate() as the final step in construction
*/
private VariantContext(String source, String contig, long start, long stop,
Collection alleles, Map genotypes,
double negLog10PError, Set filters, Map attributes,
- Byte referenceBaseForIndel, boolean genotypesAreUnparsed) {
+ Byte referenceBaseForIndel, boolean genotypesAreUnparsed,
+ boolean performValidation ) {
if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); }
this.contig = contig;
this.start = start;
@@ -371,39 +373,57 @@ public class VariantContext implements Feature { // to enable tribble intergrati
}
}
- validate();
+ if ( performValidation ) {
+ validate();
+ }
}
// ---------------------------------------------------------------------------------------------------------
//
// Partial-cloning routines (because Variant Context is immutable).
+ //
+ // IMPORTANT: These routines assume that the VariantContext on which they're called is already valid.
+ // Due to this assumption, they explicitly tell the constructor NOT to perform validation by
+ // calling validate(), and instead perform validation only on the data that's changed.
+ //
// Note that we don't call vc.getGenotypes() because that triggers the lazy loading.
// Also note that we need to create a new attributes map because it's unmodifiable and the constructor may try to modify it.
//
// ---------------------------------------------------------------------------------------------------------
public static VariantContext modifyGenotypes(VariantContext vc, Map genotypes) {
- return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap(vc.getAttributes()), vc.getReferenceBaseForIndel(), false);
+ VariantContext modifiedVC = new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap(vc.getAttributes()), vc.getReferenceBaseForIndel(), false, false);
+ modifiedVC.validateGenotypes();
+ return modifiedVC;
}
public static VariantContext modifyLocation(VariantContext vc, String chr, int start, int end) {
- return new VariantContext(vc.getSource(), chr, start, end, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap(vc.getAttributes()), vc.getReferenceBaseForIndel(), true);
+ VariantContext modifiedVC = new VariantContext(vc.getSource(), chr, start, end, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap(vc.getAttributes()), vc.getReferenceBaseForIndel(), true, false);
+
+ // Since start and end have changed, we need to call both validateAlleles() and validateReferencePadding(),
+ // since those validation routines rely on the values of start and end:
+ modifiedVC.validateAlleles();
+ modifiedVC.validateReferencePadding();
+
+ return modifiedVC;
}
public static VariantContext modifyFilters(VariantContext vc, Set filters) {
- return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd() , vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, new HashMap(vc.getAttributes()), vc.getReferenceBaseForIndel(), true);
+ return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd() , vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, new HashMap(vc.getAttributes()), vc.getReferenceBaseForIndel(), true, false);
}
public static VariantContext modifyAttributes(VariantContext vc, Map attributes) {
- return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, vc.getReferenceBaseForIndel(), true);
+ return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, vc.getReferenceBaseForIndel(), true, false);
}
public static VariantContext modifyReferencePadding(VariantContext vc, Byte b) {
- return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), b, true);
+ VariantContext modifiedVC = new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), b, true, false);
+ modifiedVC.validateReferencePadding();
+ return modifiedVC;
}
public static VariantContext modifyPErrorFiltersAndAttributes(VariantContext vc, double negLog10PError, Set filters, Map attributes) {
- return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, negLog10PError, filters, attributes, vc.getReferenceBaseForIndel(), true);
+ return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, negLog10PError, filters, attributes, vc.getReferenceBaseForIndel(), true, false);
}
// ---------------------------------------------------------------------------------------------------------
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUnitTest.java
index 462abeba1..5c8fa32a8 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUnitTest.java
@@ -33,7 +33,7 @@ public class SnpEffUnitTest {
@Test
public void testParseWellFormedEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
- String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
+ String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertTrue( effect.isWellFormed() && effect.isCoding() );
@@ -42,7 +42,7 @@ public class SnpEffUnitTest {
@Test
public void testParseInvalidEffectNameEffect() {
String effectName = "MADE_UP_EFFECT";
- String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
+ String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertFalse(effect.isWellFormed());
@@ -51,7 +51,7 @@ public class SnpEffUnitTest {
@Test
public void testParseInvalidEffectImpactEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
- String[] effectMetadata = { "MEDIUM", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
+ String[] effectMetadata = { "MEDIUM", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertFalse(effect.isWellFormed());
@@ -60,27 +60,27 @@ public class SnpEffUnitTest {
@Test
public void testParseWrongNumberOfMetadataFieldsEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
- String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990" };
+ String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertFalse(effect.isWellFormed());
}
@Test
- public void testParseSnpEffWarningEffect() {
+ public void testParseSnpEffOneWarningOrErrorEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
- String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "SNPEFF_WARNING" };
+ String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "SNPEFF_WARNING_OR_ERROR_TEXT" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
- Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following warning: SNPEFF_WARNING") );
+ Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following warning or error: \"SNPEFF_WARNING_OR_ERROR_TEXT\"") );
}
@Test
- public void testParseSnpEffErrorEffect() {
+ public void testParseSnpEffBothWarningAndErrorEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
- String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "", "SNPEFF_ERROR" };
+ String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "SNPEFF_WARNING_TEXT", "SNPEFF_ERROR_TEXT" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
- Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following error: SNPEFF_ERROR") );
+ Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following warning: \"SNPEFF_WARNING_TEXT\", and the following error: \"SNPEFF_ERROR_TEXT\"") );
}
}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
index bde4c4a8f..3bfb81dd0 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
@@ -32,7 +32,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
- Arrays.asList("8e7de435105499cd71ffc099e268a83e"));
+ Arrays.asList("9beb795536e95954f810835c6058f2ad"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@@ -40,7 +40,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
- Arrays.asList("64b6804cb1e27826e3a47089349be581"));
+ Arrays.asList("2977bb30c8b84a5f4094fe6090658561"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@@ -64,7 +64,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
- Arrays.asList("fd1ffb669800c2e07df1e2719aa38e49"));
+ Arrays.asList("49d989f467b8d6d8f98f7c1b67cd4a05"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@@ -72,7 +72,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
- Arrays.asList("09f8e840770a9411ff77508e0ed0837f"));
+ Arrays.asList("0948cd1dba7d61f283cc4cf2a7757d92"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}
@@ -80,7 +80,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testExcludeAnnotations() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
- Arrays.asList("b49fe03aa4b675db80a9db38a3552c95"));
+ Arrays.asList("33062eccd6eb73bc49440365430454c4"));
executeTest("test exclude annotations", spec);
}
@@ -88,7 +88,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testOverwritingHeader() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
- Arrays.asList("78d2c19f8107d865970dbaf3e12edd92"));
+ Arrays.asList("062155edec46a8c52243475fbf3a2943"));
executeTest("test overwriting header", spec);
}
@@ -96,7 +96,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoReads() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + validationDataLocation + "vcfexample3empty.vcf -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
- Arrays.asList("16e3a1403fc376320d7c69492cad9345"));
+ Arrays.asList("06635f2dd91b539bfbce9bf7914d8e43"));
executeTest("not passing it any reads", spec);
}
@@ -104,7 +104,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTagWithDbsnp() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --dbsnp " + b36dbSNP129 + " -G Standard --variant " + validationDataLocation + "vcfexample3empty.vcf -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
- Arrays.asList("3da8ca2b6bdaf6e92d94a8c77a71313d"));
+ Arrays.asList("820eeba1f6e3a0758a69d937c524a38e"));
executeTest("getting DB tag with dbSNP", spec);
}
@@ -112,7 +112,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTagWithHapMap() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --comp:H3 " + validationDataLocation + "fakeHM3.vcf -G Standard --variant " + validationDataLocation + "vcfexample3empty.vcf -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
- Arrays.asList("1bc01c5b3bd0b7aef75230310c3ce688"));
+ Arrays.asList("31cc2ce157dd20771418c08d6b3be1fa"));
executeTest("getting DB tag with HM3", spec);
}
@@ -120,7 +120,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testUsingExpression() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --resource:foo " + validationDataLocation + "targetAnnotations.vcf -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
- Arrays.asList("ae30a1ac7bfbc3d22a327f8b689cad31"));
+ Arrays.asList("074865f8f8c0ca7bfd58681f396c49e9"));
executeTest("using expression", spec);
}
@@ -128,7 +128,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testUsingExpressionWithID() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --resource:foo " + validationDataLocation + "targetAnnotations.vcf -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.ID -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
- Arrays.asList("1b4921085b26cbfe07d53b7c947de1e5"));
+ Arrays.asList("97b26db8135d083566fb585a677fbe8a"));
executeTest("using expression with ID", spec);
}
@@ -148,9 +148,9 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " +
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
- "snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000 -L 2:232,325,429",
+ "snpEff2.0.4.AFR.unfiltered.vcf -L 1:1-1,500,000 -L 2:232,325,429",
1,
- Arrays.asList("122321a85e448f21679f6ca15c5e22ad")
+ Arrays.asList("51258f5c880bd1ca3eb45a1711335c66")
);
executeTest("Testing SnpEff annotations", spec);
}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
index b80f214b1..0197f94e5 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@@ -5,10 +5,8 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.Test;
-import java.io.File;
import java.util.Arrays;
import java.util.HashMap;
-import java.util.List;
import java.util.Map;
// ********************************************************************************** //
@@ -30,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
- Arrays.asList("b27939251539439a382538e507e03507"));
+ Arrays.asList("364a6112e6034571a896d4c68b6ff02a"));
executeTest("test MultiSample Pilot1", spec);
}
@@ -38,12 +36,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithAllelesPassedIn() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
- Arrays.asList("8de2602679ffc92388da0b6cb4325ef6"));
+ Arrays.asList("6cce55734ecd1a56c33f0ecba67d1258"));
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
- Arrays.asList("6458f3b8fe4954e2ffc2af972aaab19e"));
+ Arrays.asList("b2a00130a4b6f9a3c31288a236f5d159"));
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
}
@@ -51,7 +49,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
- Arrays.asList("6762b72ae60155ad71738d7c76b80e4b"));
+ Arrays.asList("3ccce5d909f8f128e496f6841836e5f7"));
executeTest("test SingleSample Pilot2", spec);
}
@@ -61,7 +59,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
- private final static String COMPRESSED_OUTPUT_MD5 = "bc71dba7bbdb23e7d5cc60461fdd897b";
+ private final static String COMPRESSED_OUTPUT_MD5 = "890143b366050e78d6c6ba6b2c6b6864";
@Test
public void testCompressedOutput() {
@@ -82,7 +80,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
- String md5 = "b9504e446b9313559c3ed97add7e8dc1";
+ String md5 = "95614280c565ad90f8c000376fef822c";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
@@ -113,8 +111,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testCallingParameters() {
HashMap e = new HashMap();
- e.put( "--min_base_quality_score 26", "bb3f294eab3e2cf52c70e63b23aac5ee" );
- e.put( "--computeSLOD", "eb34979efaadba1e34bd82bcacf5c722" );
+ e.put( "--min_base_quality_score 26", "7acb1a5aee5fdadb0cc0ea07a212efc6" );
+ e.put( "--computeSLOD", "e9d23a08472e4e27b4f25e844f5bad57" );
for ( Map.Entry entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@@ -127,9 +125,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testOutputParameter() {
HashMap e = new HashMap();
- e.put( "-sites_only", "d40114aa201aa33ff5f174f15b6b73af" );
- e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "3c681b053fd2280f3c42041d24243752" );
- e.put( "--output_mode EMIT_ALL_SITES", "eafa6d71c5ecd64dfee5d7a3f60e392e" );
+ e.put( "-sites_only", "44f3b5b40e6ad44486cddfdb7e0bfcd8" );
+ e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "94e53320f14c5ff29d62f68d36b46fcd" );
+ e.put( "--output_mode EMIT_ALL_SITES", "73ad1cc41786b12c5f0e6f3e9ec2b728" );
for ( Map.Entry entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@@ -143,12 +141,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
- Arrays.asList("c71ca370947739cb7d87b59452be7a07"));
+ Arrays.asList("902327e8a45fe585c8dfd1a7c4fcf60f"));
executeTest("test confidence 1", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
- Arrays.asList("1c0a599d475cc7d5e745df6e9b6c0d29"));
+ Arrays.asList("9ef66764a0ecfe89c3e4e1a93a69e349"));
executeTest("test confidence 2", spec2);
}
@@ -160,8 +158,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testHeterozyosity() {
HashMap e = new HashMap();
- e.put( 0.01, "f84da90c310367bd51f2ab6e346fa3d8" );
- e.put( 1.0 / 1850, "5791e7fef40d4412b6d8f84e0a809c6c" );
+ e.put( 0.01, "46243ecc2b9dc716f48ea280c9bb7e72" );
+ e.put( 1.0 / 1850, "6b2a59dbc76984db6d4d6d6b5ee5d62c" );
for ( Map.Entry entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@@ -185,7 +183,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
- Arrays.asList("9cc9538ac83770e12bd0830d285bfbd0"));
+ Arrays.asList("f0fbe472f155baf594b1eeb58166edef"));
executeTest(String.format("test multiple technologies"), spec);
}
@@ -204,7 +202,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq CALCULATE_AS_NECESSARY",
1,
- Arrays.asList("eaf8043edb46dfbe9f97ae03baa797ed"));
+ Arrays.asList("8c87c749a7bb5a76ed8504d4ec254272"));
executeTest(String.format("test calling with BAQ"), spec);
}
@@ -223,7 +221,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
- Arrays.asList("eeba568272f9b42d5450da75c7cc6d2d"));
+ Arrays.asList("a64d2e65b5927260e4ce0d948760cc5c"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@@ -238,7 +236,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -minIndelCnt 1" +
" -L 1:10,000,000-10,100,000",
1,
- Arrays.asList("5fe98ee853586dc9db58f0bc97daea63"));
+ Arrays.asList("2ad52c2e75b3ffbfd8f03237c444e8e6"));
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
}
@@ -251,7 +249,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
- Arrays.asList("19ff9bd3139480bdf79dcbf117cf2b24"));
+ Arrays.asList("69107157632714150fc068d412e31939"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@@ -261,7 +259,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
- Arrays.asList("118918f2e9e56a3cfc5ccb2856d529c8"));
+ Arrays.asList("84f065e115004e4c9d9d62f1bf6b4f44"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
}
@@ -271,7 +269,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
- Arrays.asList("a20799237accd52c1b8c2ac096309c8f"));
+ Arrays.asList("7ea8372e4074f90ec942f964e1863351"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
}
@@ -281,7 +279,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
- Arrays.asList("18ef8181157b4ac3eb8492f538467f92"));
+ Arrays.asList("94afa8c011b47af7323c821bf19106b4"));
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
}
@@ -290,7 +288,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
- Arrays.asList("ad884e511a751b05e64db5314314365a"));
+ Arrays.asList("1e02f57fafaa41db71c531eb25e148e1"));
executeTest("test MultiSample 1000G Phase1 indels with complicated records emitting all sites", spec4);
}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java
index c663c1dd7..2cd76e7a5 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java
@@ -6,23 +6,131 @@ import org.testng.annotations.Test;
import java.util.Arrays;
public class PhaseByTransmissionIntegrationTest extends WalkerTest {
- private static String phaseByTransmissionTestDataRoot = validationDataLocation + "/PhaseByTransmission";
- private static String fundamentalTestVCF = phaseByTransmissionTestDataRoot + "/" + "FundamentalsTest.unfiltered.vcf";
+ private static String phaseByTransmissionTestDataRoot = validationDataLocation + "PhaseByTransmission/";
+ private static String goodFamilyFile = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.goodFamilies.ped";
+ private static String TNTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.TN.vcf";
+ private static String TPTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.TP.vcf";
+ private static String FPTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.FP.vcf";
+ private static String SpecialTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.Special.vcf";
+ //Tests using PbT on all genotypes with default parameters
+ //And all reporting options
@Test
- public void testBasicFunctionality() {
+ public void testTrueNegativeMV() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T PhaseByTransmission",
"-NO_HEADER",
"-R " + b37KGReference,
- "--variant " + fundamentalTestVCF,
- "-f NA12892+NA12891=NA12878",
+ "--variant " + TNTest,
+ "-ped "+ goodFamilyFile,
+ "-L 1:10109-10315",
+ "-mvf %s",
+ "-o %s"
+ ),
+ 2,
+ Arrays.asList("16fefda693156eadf1481fd9de23facb","9418a7a6405b78179ca13a67b8bfcc14")
+ );
+ executeTest("testTrueNegativeMV", spec);
+ }
+
+ @Test
+ public void testTruePositiveMV() {
+ WalkerTestSpec spec = new WalkerTestSpec(
+ buildCommandLine(
+ "-T PhaseByTransmission",
+ "-NO_HEADER",
+ "-R " + b37KGReference,
+ "--variant " + TPTest,
+ "-ped "+ goodFamilyFile,
+ "-L 1:10109-10315",
+ "-mvf %s",
+ "-o %s"
+ ),
+ 2,
+ Arrays.asList("14cf1d21a54d8b9fb506df178b634c56","efc66ae3d036715b721f9bd35b65d556")
+ );
+ executeTest("testTruePositiveMV", spec);
+ }
+
+ @Test
+ public void testFalsePositiveMV() {
+ WalkerTestSpec spec = new WalkerTestSpec(
+ buildCommandLine(
+ "-T PhaseByTransmission",
+ "-NO_HEADER",
+ "-R " + b37KGReference,
+ "--variant " + FPTest,
+ "-ped "+ goodFamilyFile,
+ "-L 1:10109-10315",
+ "-mvf %s",
+ "-o %s"
+ ),
+ 2,
+ Arrays.asList("f9b0fae9fe1e0f09b883a292b0e70a12","398724bc1e65314cc5ee92706e05a3ee")
+ );
+ executeTest("testFalsePositiveMV", spec);
+ }
+
+ @Test
+ public void testSpecialCases() {
+ WalkerTestSpec spec = new WalkerTestSpec(
+ buildCommandLine(
+ "-T PhaseByTransmission",
+ "-NO_HEADER",
+ "-R " + b37KGReference,
+ "--variant " + SpecialTest,
+ "-ped "+ goodFamilyFile,
+ "-L 1:10109-10315",
+ "-mvf %s",
+ "-o %s"
+ ),
+ 2,
+ Arrays.asList("b8d1aa3789ce77b45430c62d13ee3006","a1a333e08fafb288cda0e7711909e1c3")
+ );
+ executeTest("testSpecialCases", spec);
+ }
+
+ //Test using a different prior
+ //Here the FP file is used but as the prior is lowered, 3 turn to TP
+ @Test
+ public void testPriorOption() {
+ WalkerTestSpec spec = new WalkerTestSpec(
+ buildCommandLine(
+ "-T PhaseByTransmission",
+ "-NO_HEADER",
+ "-R " + b37KGReference,
+ "--variant " + FPTest,
+ "-ped "+ goodFamilyFile,
+ "-L 1:10109-10315",
+ "-prior 1e-4",
+ "-mvf %s",
+ "-o %s"
+ ),
+ 2,
+ Arrays.asList("7201ce7cc47db5840ac6b647709f7c33","c11b5e7cd7459d90d0160f917eff3b1e")
+ );
+ executeTest("testPriorOption", spec);
+ }
+
+ //Test when running without MV reporting option
+ //This is the exact same test file as FP but should not generate a .mvf file
+ @Test
+ public void testMVFileOption() {
+ WalkerTestSpec spec = new WalkerTestSpec(
+ buildCommandLine(
+ "-T PhaseByTransmission",
+ "-NO_HEADER",
+ "-R " + b37KGReference,
+ "--variant " + FPTest,
+ "-ped "+ goodFamilyFile,
+ "-L 1:10109-10315",
"-o %s"
),
1,
- Arrays.asList("")
+ Arrays.asList("398724bc1e65314cc5ee92706e05a3ee")
);
- executeTest("testBasicFunctionality", spec);
+ executeTest("testMVFileOption", spec);
}
+
}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
index 3dceb9bd2..102d4715e 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
@@ -21,16 +21,16 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-T VariantEval",
"-R " + b37KGReference,
"--dbsnp " + b37dbSNP132,
- "--eval " + validationDataLocation + "snpEff.AFR.unfiltered.VariantAnnotator.output.vcf",
+ "--eval " + validationDataLocation + "snpEff2.0.4.AFR.unfiltered.VariantAnnotator.output.vcf",
"-noEV",
"-EV TiTvVariantEvaluator",
"-noST",
"-ST FunctionalClass",
- "-L " + validationDataLocation + "snpEff.AFR.unfiltered.VariantAnnotator.output.vcf",
+ "-L " + validationDataLocation + "snpEff2.0.4.AFR.unfiltered.VariantAnnotator.output.vcf",
"-o %s"
),
1,
- Arrays.asList("d9dcb352c53106f54fcc981f15d35a90")
+ Arrays.asList("a36414421621b377d6146d58d2fcecd0")
);
executeTest("testFunctionClassWithSnpeff", spec);
}
diff --git a/public/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java
index f1f849bf5..e9f138a0e 100644
--- a/public/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java
@@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils;
import net.sf.samtools.SAMFileHeader;
-import net.sf.samtools.SAMSequenceDictionary;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
@@ -11,6 +10,7 @@ import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
/**
@@ -36,7 +36,6 @@ public class GenomeLocParserUnitTest extends BaseTest {
@Test
public void testGetContigIndexValid() {
- SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
assertEquals(genomeLocParser.getContigIndex("chr1"), 0); // should be in the reference
}
@@ -67,7 +66,6 @@ public class GenomeLocParserUnitTest extends BaseTest {
@Test
public void testGetContigInfoKnownContig() {
- SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
assertEquals(0, "chr1".compareTo(genomeLocParser.getContigInfo("chr1").getSequenceName())); // should be in the reference
}
@@ -191,4 +189,104 @@ public class GenomeLocParserUnitTest extends BaseTest {
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",1,-2)); // bad stop
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",10,11)); // bad start, past end
}
+
+ private static class FlankingGenomeLocTestData extends TestDataProvider {
+ final GenomeLocParser parser;
+ final int basePairs;
+ final GenomeLoc original, flankStart, flankStop;
+
+ private FlankingGenomeLocTestData(String name, GenomeLocParser parser, int basePairs, String original, String flankStart, String flankStop) {
+ super(FlankingGenomeLocTestData.class, name);
+ this.parser = parser;
+ this.basePairs = basePairs;
+ this.original = parse(parser, original);
+ this.flankStart = flankStart == null ? null : parse(parser, flankStart);
+ this.flankStop = flankStop == null ? null : parse(parser, flankStop);
+ }
+
+ private static GenomeLoc parse(GenomeLocParser parser, String str) {
+ return "unmapped".equals(str) ? GenomeLoc.UNMAPPED : parser.parseGenomeLoc(str);
+ }
+ }
+
+ @DataProvider(name = "flankingGenomeLocs")
+ public Object[][] getFlankingGenomeLocs() {
+ int contigLength = 10000;
+ SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, contigLength);
+ GenomeLocParser parser = new GenomeLocParser(header.getSequenceDictionary());
+
+ new FlankingGenomeLocTestData("atStartBase1", parser, 1,
+ "chr1:1", null, "chr1:2");
+
+ new FlankingGenomeLocTestData("atStartBase50", parser, 50,
+ "chr1:1", null, "chr1:2-51");
+
+ new FlankingGenomeLocTestData("atStartRange50", parser, 50,
+ "chr1:1-10", null, "chr1:11-60");
+
+ new FlankingGenomeLocTestData("atEndBase1", parser, 1,
+ "chr1:" + contigLength, "chr1:" + (contigLength - 1), null);
+
+ new FlankingGenomeLocTestData("atEndBase50", parser, 50,
+ "chr1:" + contigLength, String.format("chr1:%d-%d", contigLength - 50, contigLength - 1), null);
+
+ new FlankingGenomeLocTestData("atEndRange50", parser, 50,
+ String.format("chr1:%d-%d", contigLength - 10, contigLength),
+ String.format("chr1:%d-%d", contigLength - 60, contigLength - 11),
+ null);
+
+ new FlankingGenomeLocTestData("nearStartBase1", parser, 1,
+ "chr1:2", "chr1:1", "chr1:3");
+
+ new FlankingGenomeLocTestData("nearStartRange50", parser, 50,
+ "chr1:21-30", "chr1:1-20", "chr1:31-80");
+
+ new FlankingGenomeLocTestData("nearEndBase1", parser, 1,
+ "chr1:" + (contigLength - 1), "chr1:" + (contigLength - 2), "chr1:" + contigLength);
+
+ new FlankingGenomeLocTestData("nearEndRange50", parser, 50,
+ String.format("chr1:%d-%d", contigLength - 30, contigLength - 21),
+ String.format("chr1:%d-%d", contigLength - 80, contigLength - 31),
+ String.format("chr1:%d-%d", contigLength - 20, contigLength));
+
+ new FlankingGenomeLocTestData("beyondStartBase1", parser, 1,
+ "chr1:3", "chr1:2", "chr1:4");
+
+ new FlankingGenomeLocTestData("beyondStartRange50", parser, 50,
+ "chr1:101-200", "chr1:51-100", "chr1:201-250");
+
+ new FlankingGenomeLocTestData("beyondEndBase1", parser, 1,
+ "chr1:" + (contigLength - 3),
+ "chr1:" + (contigLength - 4),
+ "chr1:" + (contigLength - 2));
+
+ new FlankingGenomeLocTestData("beyondEndRange50", parser, 50,
+ String.format("chr1:%d-%d", contigLength - 200, contigLength - 101),
+ String.format("chr1:%d-%d", contigLength - 250, contigLength - 201),
+ String.format("chr1:%d-%d", contigLength - 100, contigLength - 51));
+
+ new FlankingGenomeLocTestData("unmapped", parser, 50,
+ "unmapped", null, null);
+
+ new FlankingGenomeLocTestData("fullContig", parser, 50,
+ "chr1", null, null);
+
+ return FlankingGenomeLocTestData.getTests(FlankingGenomeLocTestData.class);
+ }
+
+ @Test(dataProvider = "flankingGenomeLocs")
+ public void testCreateGenomeLocAtStart(FlankingGenomeLocTestData data) {
+ GenomeLoc actual = data.parser.createGenomeLocAtStart(data.original, data.basePairs);
+ String description = String.format("%n name: %s%n original: %s%n actual: %s%n expected: %s%n",
+ data.toString(), data.original, actual, data.flankStart);
+ assertEquals(actual, data.flankStart, description);
+ }
+
+ @Test(dataProvider = "flankingGenomeLocs")
+ public void testCreateGenomeLocAtStop(FlankingGenomeLocTestData data) {
+ GenomeLoc actual = data.parser.createGenomeLocAtStop(data.original, data.basePairs);
+ String description = String.format("%n name: %s%n original: %s%n actual: %s%n expected: %s%n",
+ data.toString(), data.original, actual, data.flankStop);
+ assertEquals(actual, data.flankStop, description);
+ }
}
diff --git a/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java
index 46134cd24..53368c339 100755
--- a/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java
@@ -16,7 +16,8 @@ public class ReadUtilsUnitTest extends BaseTest {
GATKSAMRecord read, reducedRead;
final static String BASES = "ACTG";
final static String QUALS = "!+5?";
- final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40};
+ final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40};
+ final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30}; // just the offsets
@BeforeTest
public void init() {
@@ -29,7 +30,7 @@ public class ReadUtilsUnitTest extends BaseTest {
reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length());
reducedRead.setReadBases(BASES.getBytes());
reducedRead.setBaseQualityString(QUALS);
- reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS);
+ reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG);
}
private void testReadBasesAndQuals(GATKSAMRecord read, int expectedStart, int expectedStop) {
diff --git a/public/java/test/org/broadinstitute/sting/utils/SimpleTimerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/SimpleTimerUnitTest.java
index 3f5d05e66..7a2696b7b 100755
--- a/public/java/test/org/broadinstitute/sting/utils/SimpleTimerUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/SimpleTimerUnitTest.java
@@ -41,11 +41,6 @@ public class SimpleTimerUnitTest extends BaseTest {
double t6 = t.getElapsedTime();
Assert.assertTrue(t5 >= t4, "Restarted timer elapsed time should be after elapsed time preceding the restart");
Assert.assertTrue(t6 >= t5, "Second elapsed time not after the first in restarted timer");
-
- t.stop().start();
- Assert.assertTrue(t.isRunning(), "second started timer isn't running");
- Assert.assertTrue(t.getElapsedTime() >= 0.0, "elapsed time should have been reset");
- Assert.assertTrue(t.getElapsedTime() < t6, "elapsed time isn't less than time before start call"); // we should have effective no elapsed time
}
private final static void idleLoop() {
diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java
index f625af23c..ecb5a6d33 100644
--- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java
@@ -30,8 +30,10 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
-import org.testng.annotations.BeforeClass;
-import org.testng.annotations.Test;
+import org.testng.annotations.*;
+
+import java.util.LinkedList;
+import java.util.List;
/**
* Created by IntelliJ IDEA.
@@ -44,180 +46,214 @@ public class ReadClipperUnitTest extends BaseTest {
// TODO: Add error messages on failed tests
+ //int debug = 0;
+
GATKSAMRecord read, expected;
ReadClipper readClipper;
final static String BASES = "ACTG";
final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63
- @BeforeClass
+
+ public void testIfEqual( GATKSAMRecord read, byte[] readBases, String baseQuals, String cigar) {
+ Assert.assertEquals(read.getReadBases(), readBases);
+ Assert.assertEquals(read.getBaseQualityString(), baseQuals);
+ Assert.assertEquals(read.getCigarString(), cigar);
+ }
+
+ public class testParameter {
+ int inputStart;
+ int inputStop;
+ int substringStart;
+ int substringStop;
+ String cigar;
+
+ public testParameter(int InputStart, int InputStop, int SubstringStart, int SubstringStop, String Cigar) {
+ inputStart = InputStart;
+ inputStop = InputStop;
+ substringStart = SubstringStart;
+ substringStop = SubstringStop;
+ cigar = Cigar;
+ }
+ }
+
+ // What the test read looks like
+ // Ref: 1 2 3 4 5 6 7 8
+ // Read: 0 1 2 3 - - - -
+ // -----------------------------
+ // Bases: A C T G - - - -
+ // Quals: ! + 5 ? - - - -
+
+ @BeforeMethod
public void init() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length());
- read.setReadUnmappedFlag(true);
read.setReadBases(new String(BASES).getBytes());
read.setBaseQualityString(new String(QUALS));
readClipper = new ReadClipper(read);
+ //logger.warn(read.getCigarString());
}
- @Test ( enabled = false )
+ @Test ( enabled = true )
public void testHardClipBothEndsByReferenceCoordinates() {
- logger.warn("Executing testHardClipBothEndsByReferenceCoordinates");
+ logger.warn("Executing testHardClipBothEndsByReferenceCoordinates");
+ //int debug = 1;
//Clip whole read
- Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(0,0), new GATKSAMRecord(read.getHeader()));
+ Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(1,1), new GATKSAMRecord(read.getHeader()));
+
//clip 1 base
- expected = readClipper.hardClipBothEndsByReferenceCoordinates(0,3);
+ expected = readClipper.hardClipBothEndsByReferenceCoordinates(1,4);
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,3).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,3));
Assert.assertEquals(expected.getCigarString(), "1H2M1H");
}
- @Test ( enabled = false )
+ @Test ( enabled = true )
public void testHardClipByReadCoordinates() {
+
logger.warn("Executing testHardClipByReadCoordinates");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReadCoordinates(0,3), new GATKSAMRecord(read.getHeader()));
- //clip 1 base at start
- expected = readClipper.hardClipByReadCoordinates(0,0);
- Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
- Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
- Assert.assertEquals(expected.getCigarString(), "1H3M");
+ List testList = new LinkedList();
+ testList.add(new testParameter(0,0,1,4,"1H3M"));//clip 1 base at start
+ testList.add(new testParameter(3,3,0,3,"3M1H"));//clip 1 base at end
+ testList.add(new testParameter(0,1,2,4,"2H2M"));//clip 2 bases at start
+ testList.add(new testParameter(2,3,0,2,"2M2H"));//clip 2 bases at end
- //clip 1 base at end
- expected = readClipper.hardClipByReadCoordinates(3,3);
- Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
- Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
- Assert.assertEquals(expected.getCigarString(), "3M1H");
-
- //clip 2 bases at start
- expected = readClipper.hardClipByReadCoordinates(0,1);
- Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
- Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
- Assert.assertEquals(expected.getCigarString(), "2H2M");
-
- //clip 2 bases at end
- expected = readClipper.hardClipByReadCoordinates(2,3);
- Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
- Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
- Assert.assertEquals(expected.getCigarString(), "2M2H");
+ for ( testParameter p : testList ) {
+ init();
+ //logger.warn("Testing Parameters: " + p.inputStart+","+p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
+ testIfEqual( readClipper.hardClipByReadCoordinates(p.inputStart, p.inputStop),
+ BASES.substring(p.substringStart,p.substringStop).getBytes(),
+ QUALS.substring(p.substringStart,p.substringStop),
+ p.cigar );
+ }
}
- @Test ( enabled = false )
+ @Test ( enabled = true )
public void testHardClipByReferenceCoordinates() {
logger.warn("Executing testHardClipByReferenceCoordinates");
-
+ //logger.warn(debug);
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(1,4), new GATKSAMRecord(read.getHeader()));
- //clip 1 base at start
- expected = readClipper.hardClipByReferenceCoordinates(-1,1);
- Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
- Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
- Assert.assertEquals(expected.getCigarString(), "1H3M");
+ List testList = new LinkedList();
+ testList.add(new testParameter(-1,1,1,4,"1H3M"));//clip 1 base at start
+ testList.add(new testParameter(4,-1,0,3,"3M1H"));//clip 1 base at end
+ testList.add(new testParameter(-1,2,2,4,"2H2M"));//clip 2 bases at start
+ testList.add(new testParameter(3,-1,0,2,"2M2H"));//clip 2 bases at end
- //clip 1 base at end
- expected = readClipper.hardClipByReferenceCoordinates(3,-1);
- Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
- Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
- Assert.assertEquals(expected.getCigarString(), "3M1H");
-
- //clip 2 bases at start
- expected = readClipper.hardClipByReferenceCoordinates(-1,2);
- Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
- Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
- Assert.assertEquals(expected.getCigarString(), "2H2M");
-
- //clip 2 bases at end
- expected = readClipper.hardClipByReferenceCoordinates(2,-1);
- Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
- Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
- Assert.assertEquals(expected.getCigarString(), "2M2H");
+ for ( testParameter p : testList ) {
+ init();
+ //logger.warn("Testing Parameters: " + p.inputStart+","+p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
+ testIfEqual( readClipper.hardClipByReferenceCoordinates(p.inputStart,p.inputStop),
+ BASES.substring(p.substringStart,p.substringStop).getBytes(),
+ QUALS.substring(p.substringStart,p.substringStop),
+ p.cigar );
+ }
}
- @Test ( enabled = false )
+ @Test ( enabled = true )
public void testHardClipByReferenceCoordinatesLeftTail() {
+ init();
logger.warn("Executing testHardClipByReferenceCoordinatesLeftTail");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(4), new GATKSAMRecord(read.getHeader()));
- //clip 1 base at start
- expected = readClipper.hardClipByReferenceCoordinatesLeftTail(1);
- Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
- Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
- Assert.assertEquals(expected.getCigarString(), "1H3M");
+ List testList = new LinkedList();
+ testList.add(new testParameter(1, -1, 1, 4, "1H3M"));//clip 1 base at start
+ testList.add(new testParameter(2, -1, 2, 4, "2H2M"));//clip 2 bases at start
- //clip 2 bases at start
- expected = readClipper.hardClipByReferenceCoordinatesLeftTail(2);
- Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
- Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
- Assert.assertEquals(expected.getCigarString(), "2H2M");
+ for ( testParameter p : testList ) {
+ init();
+ //logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
+ testIfEqual( readClipper.hardClipByReferenceCoordinatesLeftTail(p.inputStart),
+ BASES.substring(p.substringStart,p.substringStop).getBytes(),
+ QUALS.substring(p.substringStart,p.substringStop),
+ p.cigar );
+ }
}
- @Test ( enabled = false )
+ @Test ( enabled = true )
public void testHardClipByReferenceCoordinatesRightTail() {
+ init();
logger.warn("Executing testHardClipByReferenceCoordinatesRightTail");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(1), new GATKSAMRecord(read.getHeader()));
- //clip 1 base at end
- expected = readClipper.hardClipByReferenceCoordinatesRightTail(3);
- Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
- Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
- Assert.assertEquals(expected.getCigarString(), "3M1H");
+ List testList = new LinkedList();
+ testList.add(new testParameter(-1, 4, 0, 3, "3M1H"));//clip 1 base at end
+ testList.add(new testParameter(-1, 3, 0, 2, "2M2H"));//clip 2 bases at end
- //clip 2 bases at end
- expected = readClipper.hardClipByReferenceCoordinatesRightTail(2);
- Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
- Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
- Assert.assertEquals(expected.getCigarString(), "2M2H");
+ for ( testParameter p : testList ) {
+ init();
+ //logger.warn("Testing Parameters: " + p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
+ testIfEqual( readClipper.hardClipByReferenceCoordinatesRightTail(p.inputStop),
+ BASES.substring(p.substringStart,p.substringStop).getBytes(),
+ QUALS.substring(p.substringStart,p.substringStop),
+ p.cigar );
+ }
}
- @Test ( enabled = false )
+ @Test ( enabled = true ) // TODO This function is returning null reads
public void testHardClipLowQualEnds() {
- logger.warn("Executing testHardClipByReferenceCoordinates");
+ logger.warn("Executing testHardClipByReferenceCoordinates");
//Clip whole read
Assert.assertEquals(readClipper.hardClipLowQualEnds((byte)64), new GATKSAMRecord(read.getHeader()));
- //clip 1 base at start
- expected = readClipper.hardClipLowQualEnds((byte)34);
- Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
- Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
- Assert.assertEquals(expected.getCigarString(), "1H3M");
-
- //clip 2 bases at start
- expected = readClipper.hardClipLowQualEnds((byte)44);
- Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
- Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
- Assert.assertEquals(expected.getCigarString(), "2H2M");
+ List testList = new LinkedList();
+ testList.add(new testParameter(1,-1,1,4,"1H3M"));//clip 1 base at start
+ testList.add(new testParameter(11,-1,2,4,"2H2M"));//clip 2 bases at start
+ for ( testParameter p : testList ) {
+ init();
+ //logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
+ testIfEqual( readClipper.hardClipLowQualEnds( (byte)p.inputStart ),
+ BASES.substring(p.substringStart,p.substringStop).getBytes(),
+ QUALS.substring(p.substringStart,p.substringStop),
+ p.cigar );
+ }
+ /* todo find a better way to test lowqual tail clipping on both sides
// Reverse Quals sequence
readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33
- //clip 1 base at end
- expected = readClipper.hardClipLowQualEnds((byte)34);
- Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
- Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
- Assert.assertEquals(expected.getCigarString(), "3M1H");
+ testList = new LinkedList();
+ testList.add(new testParameter(1,-1,0,3,"3M1H"));//clip 1 base at end
+ testList.add(new testParameter(11,-1,0,2,"2M2H"));//clip 2 bases at end
- //clip 2 bases at end
- expected = readClipper.hardClipLowQualEnds((byte)44);
- Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
- Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
- Assert.assertEquals(expected.getCigarString(), "2M2H");
-
- // revert Qual sequence
- readClipper.getRead().setBaseQualityString(QUALS);
+ for ( testParameter p : testList ) {
+ init();
+ readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33
+ //logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
+ testIfEqual( readClipper.hardClipLowQualEnds( (byte)p.inputStart ),
+ BASES.substring(p.substringStart,p.substringStop).getBytes(),
+ QUALS.substring(p.substringStart,p.substringStop),
+ p.cigar );
+ }
+ */
}
-}
+
+ public class CigarReadMaker {
+
+ }
+
+ @Test ( enabled = false )
+ public void testHardClipSoftClippedBases() {
+
+ // Generate a list of cigars to test
+ // We will use testParameter in the following way
+ // Right tail, left tail,
+ }
+}
\ No newline at end of file
diff --git a/public/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java
index 9c3b905c2..03d33d2c5 100644
--- a/public/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java
@@ -1,8 +1,8 @@
package org.broadinstitute.sting.utils.interval;
import net.sf.picard.reference.ReferenceSequenceFile;
-import net.sf.picard.util.IntervalUtil;
import net.sf.samtools.SAMFileHeader;
+import org.apache.commons.io.FileUtils;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
@@ -762,4 +762,225 @@ public class IntervalUtilsUnitTest extends BaseTest {
List merged = IntervalUtils.mergeIntervalLocations(locs, IntervalMergingRule.ALL);
Assert.assertEquals(merged.size(), 1);
}
+
+ /*
+ Split into tests that can be written to files and tested by writeFlankingIntervals,
+ and lists that cannot but are still handled by getFlankingIntervals.
+ */
+ private static abstract class FlankingIntervalsTestData extends TestDataProvider {
+ final public File referenceFile;
+ final public GenomeLocParser parser;
+ final int basePairs;
+ final List original;
+ final List expected;
+
+ protected FlankingIntervalsTestData(Class> clazz, String name, File referenceFile, GenomeLocParser parser,
+ int basePairs, List original, List expected) {
+ super(clazz, name);
+ this.referenceFile = referenceFile;
+ this.parser = parser;
+ this.basePairs = basePairs;
+ this.original = parse(parser, original);
+ this.expected = parse(parser, expected);
+ }
+
+ private static List parse(GenomeLocParser parser, List locs) {
+ List parsed = new ArrayList();
+ for (String loc: locs)
+ parsed.add("unmapped".equals(loc) ? GenomeLoc.UNMAPPED : parser.parseGenomeLoc(loc));
+ return parsed;
+ }
+ }
+
+ private static class FlankingIntervalsFile extends FlankingIntervalsTestData {
+ public FlankingIntervalsFile(String name, File referenceFile, GenomeLocParser parser,
+ int basePairs, List original, List expected) {
+ super(FlankingIntervalsFile.class, name, referenceFile, parser, basePairs, original, expected);
+ }
+ }
+
+ private static class FlankingIntervalsList extends FlankingIntervalsTestData {
+ public FlankingIntervalsList(String name, File referenceFile, GenomeLocParser parser,
+ int basePairs, List original, List expected) {
+ super(FlankingIntervalsList.class, name, referenceFile, parser, basePairs, original, expected);
+ }
+ }
+
+ /* Intervals where the original and the flanks can be written to files. */
+ @DataProvider(name = "flankingIntervalsFiles")
+ public Object[][] getFlankingIntervalsFiles() {
+ File hg19ReferenceFile = new File(BaseTest.hg19Reference);
+ int hg19Length1 = hg19GenomeLocParser.getContigInfo("1").getSequenceLength();
+
+ new FlankingIntervalsFile("atStartBase1", hg19ReferenceFile, hg19GenomeLocParser, 1,
+ Arrays.asList("1:1"),
+ Arrays.asList("1:2"));
+
+ new FlankingIntervalsFile("atStartBase50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+ Arrays.asList("1:1"),
+ Arrays.asList("1:2-51"));
+
+ new FlankingIntervalsFile("atStartRange50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+ Arrays.asList("1:1-10"),
+ Arrays.asList("1:11-60"));
+
+ new FlankingIntervalsFile("atEndBase1", hg19ReferenceFile, hg19GenomeLocParser, 1,
+ Arrays.asList("1:" + hg19Length1),
+ Arrays.asList("1:" + (hg19Length1 - 1)));
+
+ new FlankingIntervalsFile("atEndBase50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+ Arrays.asList("1:" + hg19Length1),
+ Arrays.asList(String.format("1:%d-%d", hg19Length1 - 50, hg19Length1 - 1)));
+
+ new FlankingIntervalsFile("atEndRange50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+ Arrays.asList(String.format("1:%d-%d", hg19Length1 - 10, hg19Length1)),
+ Arrays.asList(String.format("1:%d-%d", hg19Length1 - 60, hg19Length1 - 11)));
+
+ new FlankingIntervalsFile("nearStartBase1", hg19ReferenceFile, hg19GenomeLocParser, 1,
+ Arrays.asList("1:2"),
+ Arrays.asList("1:1", "1:3"));
+
+ new FlankingIntervalsFile("nearStartRange50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+ Arrays.asList("1:21-30"),
+ Arrays.asList("1:1-20", "1:31-80"));
+
+ new FlankingIntervalsFile("nearEndBase1", hg19ReferenceFile, hg19GenomeLocParser, 1,
+ Arrays.asList("1:" + (hg19Length1 - 1)),
+ Arrays.asList("1:" + (hg19Length1 - 2), "1:" + hg19Length1));
+
+ new FlankingIntervalsFile("nearEndRange50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+ Arrays.asList(String.format("1:%d-%d", hg19Length1 - 30, hg19Length1 - 21)),
+ Arrays.asList(
+ String.format("1:%d-%d", hg19Length1 - 80, hg19Length1 - 31),
+ String.format("1:%d-%d", hg19Length1 - 20, hg19Length1)));
+
+ new FlankingIntervalsFile("beyondStartBase1", hg19ReferenceFile, hg19GenomeLocParser, 1,
+ Arrays.asList("1:3"),
+ Arrays.asList("1:2", "1:4"));
+
+ new FlankingIntervalsFile("beyondStartRange50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+ Arrays.asList("1:101-200"),
+ Arrays.asList("1:51-100", "1:201-250"));
+
+ new FlankingIntervalsFile("beyondEndBase1", hg19ReferenceFile, hg19GenomeLocParser, 1,
+ Arrays.asList("1:" + (hg19Length1 - 3)),
+ Arrays.asList("1:" + (hg19Length1 - 4), "1:" + (hg19Length1 - 2)));
+
+ new FlankingIntervalsFile("beyondEndRange50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+ Arrays.asList(String.format("1:%d-%d", hg19Length1 - 200, hg19Length1 - 101)),
+ Arrays.asList(
+ String.format("1:%d-%d", hg19Length1 - 250, hg19Length1 - 201),
+ String.format("1:%d-%d", hg19Length1 - 100, hg19Length1 - 51)));
+
+ new FlankingIntervalsFile("betweenFar50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+ Arrays.asList("1:101-200", "1:401-500"),
+ Arrays.asList("1:51-100", "1:201-250", "1:351-400", "1:501-550"));
+
+ new FlankingIntervalsFile("betweenSpan50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+ Arrays.asList("1:101-200", "1:301-400"),
+ Arrays.asList("1:51-100", "1:201-300", "1:401-450"));
+
+ new FlankingIntervalsFile("betweenOverlap50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+ Arrays.asList("1:101-200", "1:271-400"),
+ Arrays.asList("1:51-100", "1:201-270", "1:401-450"));
+
+ new FlankingIntervalsFile("betweenShort50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+ Arrays.asList("1:101-200", "1:221-400"),
+ Arrays.asList("1:51-100", "1:201-220", "1:401-450"));
+
+ new FlankingIntervalsFile("betweenNone50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+ Arrays.asList("1:101-200", "1:121-400"),
+ Arrays.asList("1:51-100", "1:401-450"));
+
+ new FlankingIntervalsFile("twoContigs", hg19ReferenceFile, hg19GenomeLocParser, 50,
+ Arrays.asList("1:101-200", "2:301-400"),
+ Arrays.asList("1:51-100", "1:201-250", "2:251-300", "2:401-450"));
+
+ // Explicit testing a problematic agilent target pair
+ new FlankingIntervalsFile("badAgilent", hg19ReferenceFile, hg19GenomeLocParser, 50,
+ Arrays.asList("2:74756257-74756411", "2:74756487-74756628"),
+ // wrong! ("2:74756206-74756256", "2:74756412-74756462", "2:74756436-74756486", "2:74756629-74756679")
+ Arrays.asList("2:74756207-74756256", "2:74756412-74756486", "2:74756629-74756678"));
+
+ return TestDataProvider.getTests(FlankingIntervalsFile.class);
+ }
+
+ /* Intervals where either the original and/or the flanks cannot be written to a file. */
+ @DataProvider(name = "flankingIntervalsLists")
+ public Object[][] getFlankingIntervalsLists() {
+ File hg19ReferenceFile = new File(BaseTest.hg19Reference);
+ List empty = Collections.emptyList();
+
+ new FlankingIntervalsList("empty", hg19ReferenceFile, hg19GenomeLocParser, 50,
+ empty,
+ empty);
+
+ new FlankingIntervalsList("unmapped", hg19ReferenceFile, hg19GenomeLocParser, 50,
+ Arrays.asList("unmapped"),
+ empty);
+
+ new FlankingIntervalsList("fullContig", hg19ReferenceFile, hg19GenomeLocParser, 50,
+ Arrays.asList("1"),
+ empty);
+
+ new FlankingIntervalsList("fullContigs", hg19ReferenceFile, hg19GenomeLocParser, 50,
+ Arrays.asList("1", "2", "3"),
+ empty);
+
+ new FlankingIntervalsList("betweenWithUnmapped", hg19ReferenceFile, hg19GenomeLocParser, 50,
+ Arrays.asList("1:101-200", "1:301-400", "unmapped"),
+ Arrays.asList("1:51-100", "1:201-300", "1:401-450"));
+
+ return TestDataProvider.getTests(FlankingIntervalsList.class);
+ }
+
+ @Test(dataProvider = "flankingIntervalsFiles")
+ public void testWriteFlankingIntervals(FlankingIntervalsTestData data) throws Exception {
+ File originalFile = createTempFile("original.", ".intervals");
+ File flankingFile = createTempFile("flanking.", ".intervals");
+ try {
+ List lines = new ArrayList();
+ for (GenomeLoc loc: data.original)
+ lines.add(loc.toString());
+ FileUtils.writeLines(originalFile, lines);
+
+ IntervalUtils.writeFlankingIntervals(data.referenceFile, originalFile, flankingFile, data.basePairs);
+
+ List actual = IntervalUtils.intervalFileToList(data.parser, flankingFile.getAbsolutePath());
+
+ String description = String.format("%n name: %s%n original: %s%n actual: %s%n expected: %s%n",
+ data.toString(), data.original, actual, data.expected);
+ Assert.assertEquals(actual, data.expected, description);
+ } finally {
+ FileUtils.deleteQuietly(originalFile);
+ FileUtils.deleteQuietly(flankingFile);
+ }
+ }
+
+ @Test(dataProvider = "flankingIntervalsLists", expectedExceptions = UserException.class)
+ public void testWritingBadFlankingIntervals(FlankingIntervalsTestData data) throws Exception {
+ File originalFile = createTempFile("original.", ".intervals");
+ File flankingFile = createTempFile("flanking.", ".intervals");
+ try {
+ List lines = new ArrayList();
+ for (GenomeLoc loc: data.original)
+ lines.add(loc.toString());
+ FileUtils.writeLines(originalFile, lines);
+
+ // Should throw a user exception on bad input if either the original
+ // intervals are empty or if the flanking intervals are empty
+ IntervalUtils.writeFlankingIntervals(data.referenceFile, originalFile, flankingFile, data.basePairs);
+ } finally {
+ FileUtils.deleteQuietly(originalFile);
+ FileUtils.deleteQuietly(flankingFile);
+ }
+ }
+
+ @Test(dataProvider = "flankingIntervalsLists")
+ public void testGetFlankingIntervals(FlankingIntervalsTestData data) {
+ List actual = IntervalUtils.getFlankingIntervals(data.parser, data.original, data.basePairs);
+ String description = String.format("%n name: %s%n original: %s%n actual: %s%n expected: %s%n",
+ data.toString(), data.original, actual, data.expected);
+ Assert.assertEquals(actual, data.expected, description);
+ }
}
diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoodsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoodsUnitTest.java
index 9243588ab..f3d0dedcd 100755
--- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoodsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoodsUnitTest.java
@@ -29,10 +29,13 @@ package org.broadinstitute.sting.utils.variantcontext;
// the imports for unit testing.
+import org.broadinstitute.sting.utils.MathUtils;
import org.testng.Assert;
import org.testng.annotations.Test;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
+import java.util.EnumMap;
+
/**
* Basic unit test for Genotype likelihoods objects
@@ -69,6 +72,50 @@ public class GenotypeLikelihoodsUnitTest {
gl.getAsVector();
}
+ @Test
+ public void testGetAsMap(){
+ GenotypeLikelihoods gl = new GenotypeLikelihoods(v);
+ //Log scale
+ EnumMap glMap = gl.getAsMap(false);
+ Assert.assertEquals(v[Genotype.Type.HOM_REF.ordinal()-1],glMap.get(Genotype.Type.HOM_REF));
+ Assert.assertEquals(v[Genotype.Type.HET.ordinal()-1],glMap.get(Genotype.Type.HET));
+ Assert.assertEquals(v[Genotype.Type.HOM_VAR.ordinal()-1],glMap.get(Genotype.Type.HOM_VAR));
+
+ //Linear scale
+ glMap = gl.getAsMap(true);
+ double [] vl = MathUtils.normalizeFromLog10(v);
+ Assert.assertEquals(vl[Genotype.Type.HOM_REF.ordinal()-1],glMap.get(Genotype.Type.HOM_REF));
+ Assert.assertEquals(vl[Genotype.Type.HET.ordinal()-1],glMap.get(Genotype.Type.HET));
+ Assert.assertEquals(vl[Genotype.Type.HOM_VAR.ordinal()-1],glMap.get(Genotype.Type.HOM_VAR));
+
+ //Test missing likelihoods
+ gl = new GenotypeLikelihoods(".");
+ glMap = gl.getAsMap(false);
+ Assert.assertNull(glMap);
+
+ }
+
+ @Test
+ public void testGetNegLog10GQ(){
+ GenotypeLikelihoods gl = new GenotypeLikelihoods(vPLString);
+
+ //GQ for the best guess genotype
+ Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HET),3.9);
+
+ double[] test = MathUtils.normalizeFromLog10(gl.getAsVector());
+
+ //GQ for the other genotypes
+ Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HOM_REF), -1.0 * Math.log10(1.0 - test[Genotype.Type.HOM_REF.ordinal()-1]));
+ Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HOM_VAR), -1.0 * Math.log10(1.0 - test[Genotype.Type.HOM_VAR.ordinal()-1]));
+
+ //Test missing likelihoods
+ gl = new GenotypeLikelihoods(".");
+ Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HOM_REF),Double.NEGATIVE_INFINITY);
+ Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HET),Double.NEGATIVE_INFINITY);
+ Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HOM_VAR),Double.NEGATIVE_INFINITY);
+
+ }
+
private void assertDoubleArraysAreEqual(double[] v1, double[] v2) {
Assert.assertEquals(v1.length, v2.length);
for ( int i = 0; i < v1.length; i++ ) {
diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/WriteFlankingIntervalsFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/WriteFlankingIntervalsFunction.scala
new file mode 100644
index 000000000..d90db0de4
--- /dev/null
+++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/WriteFlankingIntervalsFunction.scala
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.queue.extensions.gatk
+
+import org.broadinstitute.sting.queue.function.InProcessFunction
+import org.broadinstitute.sting.commandline.{Output, Argument, Input}
+import java.io.File
+import org.broadinstitute.sting.utils.interval.IntervalUtils
+
+class WriteFlankingIntervalsFunction extends InProcessFunction {
+ @Input(doc="The reference sequence")
+ var reference : File = _
+
+ @Input(doc="The interval list to flank")
+ var inputIntervals : File = _
+
+ @Output(doc="The output intervals file to write to")
+ var outputIntervals: File = _
+
+ @Argument(doc="Number of base pair to flank the input intervals")
+ var flankSize : Int = _
+
+ def run() {
+ IntervalUtils.writeFlankingIntervals(reference, inputIntervals, outputIntervals, flankSize)
+ }
+}
diff --git a/public/scala/src/org/broadinstitute/sting/queue/library/ipf/intervals/ExpandIntervals.scala b/public/scala/src/org/broadinstitute/sting/queue/library/ipf/intervals/ExpandIntervals.scala
deleted file mode 100755
index 77eb3ccbc..000000000
--- a/public/scala/src/org/broadinstitute/sting/queue/library/ipf/intervals/ExpandIntervals.scala
+++ /dev/null
@@ -1,135 +0,0 @@
-package org.broadinstitute.sting.queue.library.ipf.intervals
-
-import org.broadinstitute.sting.queue.function.InProcessFunction
-import org.broadinstitute.sting.commandline._
-import java.io.{PrintStream, File}
-import collection.JavaConversions._
-import org.broadinstitute.sting.utils.text.XReadLines
-import net.sf.picard.reference.FastaSequenceFile
-import org.broadinstitute.sting.utils.{GenomeLoc, GenomeLocParser}
-import collection.immutable.TreeSet
-
-// todo -- this is unsafe. Need to use a reference dictionary to ensure no off-contig targets are created
-class ExpandIntervals(in : File, start: Int, size: Int, out: File, ref: File, ipType: String, opType: String) extends InProcessFunction {
- @Input(doc="The interval list to expand") val inList : File = in
- @Input(doc="The reference sequence") val refDict : File = ref
- @Argument(doc="Number of basepair to start the expanded interval") val startInt : Int = start
- @Argument(doc="Number of baispair to stop the expanded interval") val sizeInt : Int = size
- @Output(doc="The output intervals file to write to") val outList : File = out
- @Argument(doc="The output format for the intervals") val outTypeStr = opType
- @Argument(doc="The input format for the intervals") val inTypeStr = ipType
-
- var output : PrintStream = _
- var parser : GenomeLocParser = _
- var xrl : XReadLines = _
- val outType = IntervalFormatType.convert(outTypeStr)
- val inType = IntervalFormatType.convert(inTypeStr)
-
- var offsetIn : Int = 0
- var offsetOut : Int = 0
-
- var first : Boolean = true
- var lastTwo : (GenomeLoc,GenomeLoc) = _
-
- var intervalCache : TreeSet[GenomeLoc] = _
- val LINES_TO_CACHE : Int = 1000
-
- def run = {
- output = new PrintStream(outList)
- intervalCache = new TreeSet[GenomeLoc]()(new Ordering[GenomeLoc]{
- def compare(o1: GenomeLoc, o2: GenomeLoc) : Int = { o1.compareTo(o2) }
- })
- parser = new GenomeLocParser(new FastaSequenceFile(ref,true))
- xrl = new XReadLines(inList)
- offsetIn = if (isBed(inType)) 1 else 0
- offsetOut = if( isBed(outType)) 1 else 0
- var line : String = xrl.next
- while ( line.startsWith("@") ) {
- line = xrl.next
- }
- var prevLoc: GenomeLoc = null
- var curLoc: GenomeLoc = null
- var nextLoc : GenomeLoc = parseGenomeInterval(line)
- var linesProcessed : Int = 1
- while ( prevLoc != null || curLoc != null || nextLoc != null ) {
- prevLoc = curLoc
- curLoc = nextLoc
- nextLoc = if ( xrl.hasNext ) parseGenomeInterval(xrl.next) else null
- if ( curLoc != null ) {
- val left: GenomeLoc = refine(expandLeft(curLoc),prevLoc)
- val right: GenomeLoc = refine(expandRight(curLoc),nextLoc)
- if ( left != null ) {
- intervalCache += left
- }
- if ( right != null ) {
- intervalCache += right
- }
- }
- linesProcessed += 1
- if ( linesProcessed % LINES_TO_CACHE == 0 ) {
- val toPrint = intervalCache.filter( u => (u.isBefore(prevLoc) && u.distance(prevLoc) > startInt+sizeInt))
- intervalCache = intervalCache -- toPrint
- toPrint.foreach(u => output.print("%s%n".format(repr(u))))
- }
- //System.out.printf("%s".format(if ( curLoc == null ) "null" else repr(curLoc)))
- }
-
- intervalCache.foreach(u => output.print("%s%n".format(repr(u))))
-
- output.close()
- }
-
- def expandLeft(g: GenomeLoc) : GenomeLoc = {
- parser.createGenomeLoc(g.getContig,g.getStart-startInt-sizeInt,g.getStart-startInt)
- }
-
- def expandRight(g: GenomeLoc) : GenomeLoc = {
- parser.createGenomeLoc(g.getContig,g.getStop+startInt,g.getStop+startInt+sizeInt)
- }
-
- def refine(newG: GenomeLoc, borderG: GenomeLoc) : GenomeLoc = {
- if ( borderG == null || ! newG.overlapsP(borderG) ) {
- return newG
- } else {
- if ( newG.getStart < borderG.getStart ) {
- if ( borderG.getStart - startInt > newG.getStart ) {
- return parser.createGenomeLoc(newG.getContig,newG.getStart,borderG.getStart-startInt)
- }
- } else {
- if ( borderG.getStop + startInt < newG.getStop ){
- return parser.createGenomeLoc(newG.getContig,borderG.getStop+startInt,newG.getStop)
- }
- }
- }
-
- null
- }
-
- def repr(loc : GenomeLoc) : String = {
- if ( loc == null ) return "null"
- if ( outType == IntervalFormatType.INTERVALS ) {
- return "%s:%d-%d".format(loc.getContig,loc.getStart,loc.getStop)
- } else {
- return "%s\t%d\t%d".format(loc.getContig,loc.getStart-offsetOut,loc.getStop+offsetOut)
- }
- }
-
- def isBed(t: IntervalFormatType.IntervalFormatType) : Boolean = {
- t == IntervalFormatType.BED
- }
-
- def parseGenomeInterval( s : String ) : GenomeLoc = {
- val sp = s.split("\\s+")
- // todo -- maybe specify whether the bed format [0,6) --> (1,2,3,4,5) is what's wanted
- if ( s.contains(":") ) parser.parseGenomeLoc(s) else parser.createGenomeLoc(sp(0),sp(1).toInt+offsetIn,sp(2).toInt-offsetIn)
- }
-
- object IntervalFormatType extends Enumeration("INTERVALS","BED","TDF") {
- type IntervalFormatType = Value
- val INTERVALS,BED,TDF = Value
-
- def convert(s : String) : IntervalFormatType = {
- if ( s.equals("INTERVALS") ) INTERVALS else { if (s.equals("BED") ) BED else TDF}
- }
- }
-}
\ No newline at end of file
diff --git a/public/scala/src/org/broadinstitute/sting/queue/library/ipf/intervals/IntersectIntervals.scala b/public/scala/src/org/broadinstitute/sting/queue/library/ipf/intervals/IntersectIntervals.scala
deleted file mode 100755
index e929477a1..000000000
--- a/public/scala/src/org/broadinstitute/sting/queue/library/ipf/intervals/IntersectIntervals.scala
+++ /dev/null
@@ -1,70 +0,0 @@
-package org.broadinstitute.sting.queue.library.ipf.intervals
-
-import org.broadinstitute.sting.queue.function.InProcessFunction
-import collection.JavaConversions._
-import org.broadinstitute.sting.commandline._
-import java.io.{PrintStream, File}
-import net.sf.samtools.{SAMSequenceRecord, SAMFileHeader, SAMSequenceDictionary}
-import org.broadinstitute.sting.utils.text.XReadLines
-import org.broadinstitute.sting.utils.{GenomeLoc, GenomeLocParser}
-
-class IntersectIntervals(iVals: List[File], outFile: File, bed: Boolean) extends InProcessFunction {
- @Input(doc="List of interval files to find the intersection of") val intervals : List[File] = iVals
- @Output(doc="Output interval file to which to write") val output : File = outFile
- @Argument(doc="Assume the input interval lists are sorted in the proper order") var assumeSorted = false
- @Argument(doc="Is the tdf in bed file (0-based clopen: 0 5 for {1,2,3,4}?") var isBed = bed
-
-
- var outStream : PrintStream = _
- var contigs : List[String] = Nil
- var dict : SAMSequenceDictionary = _
- var parser : GenomeLocParser = _
-
- def run = {
- outStream = new PrintStream(output)
- dict = new SAMSequenceDictionary
- // note: memory hog
- val sources : List[(List[(String,Int,Int)],Int)] = intervals.map(g => asScalaIterator(new XReadLines(g)).map(u => parse(u)).toList).zipWithIndex
- sources.map(u => u._1).flatten.map(u => u._1).distinct.foreach(u => dict.addSequence(new SAMSequenceRecord(u,Integer.MAX_VALUE)))
- parser = new GenomeLocParser(dict)
- sources.map( (u: (List[(String,Int,Int)],Int)) => u._1.map(g => (newGenomeLoc(g),u._2))).flatten.sortWith( (a,b) => (a._1 compareTo b._1) < 0 ).foldLeft[List[List[(GenomeLoc,Int)]]](Nil)( (a,b) => overlapFold(a,b)).map(u => mapIntersect(u)).filter(h => h != null && h.size > 0).foreach(h => writeOut(h))
- outStream.close()
- }
-
- def writeOut(g : GenomeLoc) : Unit = {
- outStream.print("%s%n".format(g.toString))
- }
-
- def parse(s : String) : (String,Int,Int) = {
- if ( s.contains(":") ) {
- val split1 = s.split(":")
- val split2 = split1(1).split("-")
- return (split1(0),split2(0).toInt,split2(1).toInt)
- } else {
- val split = s.split("\\s+")
- return (split(0),split(1).toInt + (if(isBed) 1 else 0) ,split(2).toInt - (if(isBed) 1 else 0) )
- }
- }
-
- def newGenomeLoc(coords : (String,Int,Int) ) : GenomeLoc = {
- parser.createGenomeLoc(coords._1,coords._2,coords._3)
- }
-
- def overlapFold( a: List[List[(GenomeLoc,Int)]], b: (GenomeLoc,Int) ) : List[List[(GenomeLoc,Int)]] = {
- if ( a.last.forall(u => u._1.overlapsP(b._1)) ) {
- a.init :+ (a.last :+ b)
- } else {
- a :+ ( a.last.dropWhile(u => ! u._1.overlapsP(b._1)) :+ b)
- }
- }
-
- def mapIntersect( u: List[(GenomeLoc,Int)]) : GenomeLoc = {
- if ( u.map(h => h._2).distinct.sum != range(1,intervals.size).sum ) { // if all sources not accounted for
- null
- }
- u.map(h => h._1).reduceLeft[GenomeLoc]( (a,b) => a.intersect(b) )
- }
-
- def range(a: Int, b: Int) : Range = new Range(a,b+1,1)
-
-}
\ No newline at end of file
diff --git a/settings/repository/net.sf.snpeff/snpeff-2.0.2.jar b/settings/repository/net.sf.snpeff/snpeff-2.0.4rc3.jar
old mode 100755
new mode 100644
similarity index 88%
rename from settings/repository/net.sf.snpeff/snpeff-2.0.2.jar
rename to settings/repository/net.sf.snpeff/snpeff-2.0.4rc3.jar
index bfd06f97f..ee5d02367
Binary files a/settings/repository/net.sf.snpeff/snpeff-2.0.2.jar and b/settings/repository/net.sf.snpeff/snpeff-2.0.4rc3.jar differ
diff --git a/settings/repository/net.sf.snpeff/snpeff-2.0.2.xml b/settings/repository/net.sf.snpeff/snpeff-2.0.4rc3.xml
similarity index 77%
rename from settings/repository/net.sf.snpeff/snpeff-2.0.2.xml
rename to settings/repository/net.sf.snpeff/snpeff-2.0.4rc3.xml
index f0568def4..5417641d3 100644
--- a/settings/repository/net.sf.snpeff/snpeff-2.0.2.xml
+++ b/settings/repository/net.sf.snpeff/snpeff-2.0.4rc3.xml
@@ -1,3 +1,3 @@
-
+