diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/AllelicVariant.java b/java/src/org/broadinstitute/sting/gatk/refdata/AllelicVariant.java index 0e1ccd313..eb59bac9e 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/AllelicVariant.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/AllelicVariant.java @@ -139,5 +139,9 @@ public interface AllelicVariant extends ReferenceOrderedDatum { * two allelic variants, 'A' and 'T' in addition to the (known) reference variant 'C'). * @return */ - boolean isBiallelic() ; + boolean isBiallelic(); + + /** returns the length of the variant. For SNPs this is just 1. + */ + int length(); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/Genotype.java b/java/src/org/broadinstitute/sting/gatk/refdata/Genotype.java index 6a10a26d7..416f85e7e 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/Genotype.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/Genotype.java @@ -161,7 +161,7 @@ public interface Genotype extends Comparable { */ // char getAltSnpFWD() throws IllegalStateException; - + int length(); /** Return actual number of observed alleles (chromosomes) in the genotype. diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/KGenomesSNPROD.java b/java/src/org/broadinstitute/sting/gatk/refdata/KGenomesSNPROD.java index 6da9736e1..a979626d4 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/KGenomesSNPROD.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/KGenomesSNPROD.java @@ -45,4 +45,5 @@ public class KGenomesSNPROD extends TabularROD implements SNPCallFromGenotypes { public int nHetGenotypes() { return -1; } public int nHomVarGenotypes() { return -1; } public List getGenotypes() { return null; } + public int length() { return 1; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/PooledEMSNPROD.java b/java/src/org/broadinstitute/sting/gatk/refdata/PooledEMSNPROD.java index 7718e5c31..f883a134f 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/PooledEMSNPROD.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/PooledEMSNPROD.java @@ -45,6 +45,7 @@ public class PooledEMSNPROD extends TabularROD implements SNPCallFromGenotypes { public List getGenotype() throws IllegalStateException { throw new IllegalStateException(); } public int getPloidy() throws IllegalStateException { return 2; } public boolean isBiallelic() { return true; } + public int length() { return 1; } // SNPCallFromGenotypes interface public int nIndividuals() { return Integer.parseInt(this.get("EM_N")); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java index 630d788d5..a0d88f002 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java @@ -362,6 +362,8 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator getGenotype() throws IllegalStateException { throw new IllegalStateException(); } public int getPloidy() throws IllegalStateException { return 2; } public boolean isBiallelic() { return true; } + public int length() { return 1; } // SNPCallFromGenotypes interface public int nIndividuals() { return -1; } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/SimpleIndelROD.java b/java/src/org/broadinstitute/sting/gatk/refdata/SimpleIndelROD.java index 020c9f39b..3195dd6c6 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/SimpleIndelROD.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/SimpleIndelROD.java @@ -5,7 +5,9 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.*; -public class SimpleIndelROD extends TabularROD implements Genotype { +public class SimpleIndelROD extends TabularROD implements Genotype, AllelicVariant { + + private boolean KGENOMES_FORMAT = false, checkedFormat = false; public SimpleIndelROD(String name) { super(name); @@ -16,24 +18,50 @@ public class SimpleIndelROD extends TabularROD implements Genotype { } public List getFWDAlleles() { + if ( is1KGFormat() ) + return Arrays.asList(this.get("4")); + String str = this.get("3"); return Arrays.asList(str.substring(0, str.indexOf(":"))); } public String getFWDRefBases() { return ""; } + public String getAltBasesFWD() { return getFWDAlleles().get(0); } + public String getRefBasesFWD() { return ""; } + public char getRefSnpFWD() { throw new IllegalStateException("I'm an indel, not a SNP"); } + public char getAltSnpFWD() { throw new IllegalStateException("I'm an indel, not a SNP"); } public char getRef() { return 'N'; } + public List getGenotype() { return getFWDAlleles(); } + public boolean isGenotype() { return false; } public boolean isPointGenotype() { return false; } public boolean isIndelGenotype() { return true; } public boolean isSNP() { return false; } public boolean isReference() { return false; } - public boolean isInsertion() { return this.get("3").charAt(0) == '+'; } - public boolean isDeletion() { return this.get("3").charAt(0) == '-'; } - public boolean isIndel() { return false; } + public boolean isInsertion() { + if ( is1KGFormat() ) + return this.get("3").equals("I"); + return this.get("3").charAt(0) == '+'; + } + public boolean isDeletion() { + if ( is1KGFormat() ) + return this.get("3").equals("D"); + return this.get("3").charAt(0) == '-'; + } + public boolean isIndel() { return true; } public double getVariantConfidence() { return 0.0; } + public double getVariationConfidence() { return 0.0; } public double getConsensusConfidence() { return 0.0; } public boolean isBiallelic() { return true; } public boolean isHom() { return false; } public boolean isHet() { return false; } + public double getHeterozygosity() { return 0.0; } + public double getMAF() { return 0.0; } + public int getPloidy() { return 2; } + public int length() { + if ( is1KGFormat() ) + return Integer.parseInt(this.get("2")); + return getFWDAlleles().get(0).length(); + } public String toString() { StringBuffer sb = new StringBuffer(); @@ -42,4 +70,12 @@ public class SimpleIndelROD extends TabularROD implements Genotype { sb.append((indel.length()-1) + "\t" + (isInsertion() ? "I" : "D") + "\t" + indel.substring(1)); return sb.toString(); } + + private boolean is1KGFormat() { + if ( !checkedFormat ) { + checkedFormat = true; + KGENOMES_FORMAT = this.get("3").equals("D") || this.get("3").equals("I"); + } + return KGENOMES_FORMAT; + } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java index 6fc327af5..de148028b 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java @@ -246,4 +246,6 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria // TODO Auto-generated method stub return observed.indexOf('/')==observed.lastIndexOf('/'); } + + public int length() { return (int)(loc.getStop() - loc.getStart() + 1); } } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodFLT.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodFLT.java index 6b6985570..687fd04b8 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodFLT.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodFLT.java @@ -55,6 +55,7 @@ public class rodFLT extends TabularROD implements SNPCallFromGenotypes { public List getGenotype() throws IllegalStateException { return Arrays.asList(getAltBasesFWD()); } public int getPloidy() throws IllegalStateException { return 2; } public boolean isBiallelic() { return true; } + public int length() { return 1; } // SNPCallFromGenotypes interface public int nIndividuals() { return -1; } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodGFF.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodGFF.java index 77b76dda3..5e8ecfacd 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodGFF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodGFF.java @@ -174,4 +174,5 @@ public class rodGFF extends BasicReferenceOrderedDatum implements AllelicVariant public int getPloidy() throws IllegalStateException { return 2; } public boolean isBiallelic() { return true; } + public int length() { return 1; } } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java index cb0335a2d..852f6db4d 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java @@ -169,6 +169,8 @@ public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVa return true; } + public int length() { return 1; } + public char getReferenceBase() { return refBase; } public int getPileupDepth() { return depth; } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/IndelMetricsAnalysis.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/IndelMetricsAnalysis.java new file mode 100755 index 000000000..2e20abe23 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/IndelMetricsAnalysis.java @@ -0,0 +1,64 @@ +package org.broadinstitute.sting.playground.gatk.walkers.varianteval; + +import org.broadinstitute.sting.gatk.refdata.AllelicVariant; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.LocusContext; + +import java.util.List; +import java.util.ArrayList; + +/** + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + * + * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + * + */ +public class IndelMetricsAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis { + long insertions = 0; + long deletions = 0; + int maxSize = 0; + long[][] sizes = new long[2][100]; + + public IndelMetricsAnalysis() { + super("indel_metrics"); + for (int i = 0; i < 100; i++) + sizes[0][i] = sizes[1][i] = 0; + } + + public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, LocusContext context) { + if ( eval != null && eval.isIndel() ) { + if ( eval.isInsertion() ) + insertions++; + else if ( eval.isDeletion() ) + deletions++; + else + throw new RuntimeException("Variant is indel, but isn't insertion or deletion!"); + + if ( eval.length() < 100 ) { + sizes[eval.isDeletion() ? 0 : 1][eval.length()]++; + if ( eval.length() > maxSize ) + maxSize = eval.length(); + } + } + + return null; + } + + public List done() { + List s = new ArrayList(); + s.add(String.format("total_deletions %d", deletions)); + s.add(String.format("total_insertions %d", insertions)); + s.add(String.format("")); + + s.add("Size Distribution"); + s.add("size\tdeletions\tinsertions"); + for ( int i = 1; i <= maxSize; i++ ) + s.add(String.format("%d\t%d\t\t%d", i, sizes[0][i], sizes[1][i])); + + return s; + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java index cf6b1e76d..74252c3cb 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java @@ -83,7 +83,7 @@ public class VariantEvalWalker extends RefWalker { ArrayList analyses = new ArrayList(); // - // Add new analyzes here! + // Add new analyses here! // analyses.add(new VariantCounter()); analyses.add(new VariantDBCoverage(knownSNPDBName)); @@ -93,9 +93,10 @@ public class VariantEvalWalker extends RefWalker { analyses.add(new HardyWeinbergEquilibrium(badHWEThreshold)); analyses.add(new ClusterCounterAnalysis()); analyses.add(new CallableBasesAnalysis()); + analyses.add(new IndelMetricsAnalysis()); // - // Filter out analyzes inappropriate for our evaluation type Population or Genotype + // Filter out analyses inappropriate for our evaluation type Population or Genotype // Iterator iter = analyses.iterator(); while ( iter.hasNext() ) { diff --git a/java/src/org/broadinstitute/sting/utils/GenotypeUtils.java b/java/src/org/broadinstitute/sting/utils/GenotypeUtils.java index dec6e1b0c..f68a45ef2 100644 --- a/java/src/org/broadinstitute/sting/utils/GenotypeUtils.java +++ b/java/src/org/broadinstitute/sting/utils/GenotypeUtils.java @@ -187,6 +187,11 @@ public class GenotypeUtils { return false; } + @Override + public int length() { + return (int)(location.getStop() - location.getStart() + 1); + } + @Override public int compareTo(ReferenceOrderedDatum o) { return location.compareTo(o.getLocation());