diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/AllelicVariant.java b/java/src/org/broadinstitute/sting/gatk/refdata/AllelicVariant.java
index 0e1ccd313..eb59bac9e 100644
--- a/java/src/org/broadinstitute/sting/gatk/refdata/AllelicVariant.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/AllelicVariant.java
@@ -139,5 +139,9 @@ public interface AllelicVariant extends ReferenceOrderedDatum {
* two allelic variants, 'A' and 'T' in addition to the (known) reference variant 'C').
* @return
*/
- boolean isBiallelic() ;
+ boolean isBiallelic();
+
+ /** returns the length of the variant. For SNPs this is just 1.
+ */
+ int length();
}
diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/Genotype.java b/java/src/org/broadinstitute/sting/gatk/refdata/Genotype.java
index 6a10a26d7..416f85e7e 100644
--- a/java/src/org/broadinstitute/sting/gatk/refdata/Genotype.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/Genotype.java
@@ -161,7 +161,7 @@ public interface Genotype extends Comparable {
*/
// char getAltSnpFWD() throws IllegalStateException;
-
+ int length();
/** Return actual number of observed alleles (chromosomes) in the genotype.
diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/KGenomesSNPROD.java b/java/src/org/broadinstitute/sting/gatk/refdata/KGenomesSNPROD.java
index 6da9736e1..a979626d4 100755
--- a/java/src/org/broadinstitute/sting/gatk/refdata/KGenomesSNPROD.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/KGenomesSNPROD.java
@@ -45,4 +45,5 @@ public class KGenomesSNPROD extends TabularROD implements SNPCallFromGenotypes {
public int nHetGenotypes() { return -1; }
public int nHomVarGenotypes() { return -1; }
public List getGenotypes() { return null; }
+ public int length() { return 1; }
}
\ No newline at end of file
diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/PooledEMSNPROD.java b/java/src/org/broadinstitute/sting/gatk/refdata/PooledEMSNPROD.java
index 7718e5c31..f883a134f 100755
--- a/java/src/org/broadinstitute/sting/gatk/refdata/PooledEMSNPROD.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/PooledEMSNPROD.java
@@ -45,6 +45,7 @@ public class PooledEMSNPROD extends TabularROD implements SNPCallFromGenotypes {
public List getGenotype() throws IllegalStateException { throw new IllegalStateException(); }
public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; }
+ public int length() { return 1; }
// SNPCallFromGenotypes interface
public int nIndividuals() { return Integer.parseInt(this.get("EM_N")); }
diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java
index 630d788d5..a0d88f002 100644
--- a/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java
@@ -362,6 +362,8 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator getGenotype() throws IllegalStateException { throw new IllegalStateException(); }
public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; }
+ public int length() { return 1; }
// SNPCallFromGenotypes interface
public int nIndividuals() { return -1; }
diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/SimpleIndelROD.java b/java/src/org/broadinstitute/sting/gatk/refdata/SimpleIndelROD.java
index 020c9f39b..3195dd6c6 100755
--- a/java/src/org/broadinstitute/sting/gatk/refdata/SimpleIndelROD.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/SimpleIndelROD.java
@@ -5,7 +5,9 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.*;
-public class SimpleIndelROD extends TabularROD implements Genotype {
+public class SimpleIndelROD extends TabularROD implements Genotype, AllelicVariant {
+
+ private boolean KGENOMES_FORMAT = false, checkedFormat = false;
public SimpleIndelROD(String name) {
super(name);
@@ -16,24 +18,50 @@ public class SimpleIndelROD extends TabularROD implements Genotype {
}
public List getFWDAlleles() {
+ if ( is1KGFormat() )
+ return Arrays.asList(this.get("4"));
+
String str = this.get("3");
return Arrays.asList(str.substring(0, str.indexOf(":")));
}
public String getFWDRefBases() { return ""; }
+ public String getAltBasesFWD() { return getFWDAlleles().get(0); }
+ public String getRefBasesFWD() { return ""; }
+ public char getRefSnpFWD() { throw new IllegalStateException("I'm an indel, not a SNP"); }
+ public char getAltSnpFWD() { throw new IllegalStateException("I'm an indel, not a SNP"); }
public char getRef() { return 'N'; }
+ public List getGenotype() { return getFWDAlleles(); }
+ public boolean isGenotype() { return false; }
public boolean isPointGenotype() { return false; }
public boolean isIndelGenotype() { return true; }
public boolean isSNP() { return false; }
public boolean isReference() { return false; }
- public boolean isInsertion() { return this.get("3").charAt(0) == '+'; }
- public boolean isDeletion() { return this.get("3").charAt(0) == '-'; }
- public boolean isIndel() { return false; }
+ public boolean isInsertion() {
+ if ( is1KGFormat() )
+ return this.get("3").equals("I");
+ return this.get("3").charAt(0) == '+';
+ }
+ public boolean isDeletion() {
+ if ( is1KGFormat() )
+ return this.get("3").equals("D");
+ return this.get("3").charAt(0) == '-';
+ }
+ public boolean isIndel() { return true; }
public double getVariantConfidence() { return 0.0; }
+ public double getVariationConfidence() { return 0.0; }
public double getConsensusConfidence() { return 0.0; }
public boolean isBiallelic() { return true; }
public boolean isHom() { return false; }
public boolean isHet() { return false; }
+ public double getHeterozygosity() { return 0.0; }
+ public double getMAF() { return 0.0; }
+ public int getPloidy() { return 2; }
+ public int length() {
+ if ( is1KGFormat() )
+ return Integer.parseInt(this.get("2"));
+ return getFWDAlleles().get(0).length();
+ }
public String toString() {
StringBuffer sb = new StringBuffer();
@@ -42,4 +70,12 @@ public class SimpleIndelROD extends TabularROD implements Genotype {
sb.append((indel.length()-1) + "\t" + (isInsertion() ? "I" : "D") + "\t" + indel.substring(1));
return sb.toString();
}
+
+ private boolean is1KGFormat() {
+ if ( !checkedFormat ) {
+ checkedFormat = true;
+ KGENOMES_FORMAT = this.get("3").equals("D") || this.get("3").equals("I");
+ }
+ return KGENOMES_FORMAT;
+ }
}
\ No newline at end of file
diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java
index 6fc327af5..de148028b 100644
--- a/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodDbSNP.java
@@ -246,4 +246,6 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
// TODO Auto-generated method stub
return observed.indexOf('/')==observed.lastIndexOf('/');
}
+
+ public int length() { return (int)(loc.getStop() - loc.getStart() + 1); }
}
diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodFLT.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodFLT.java
index 6b6985570..687fd04b8 100755
--- a/java/src/org/broadinstitute/sting/gatk/refdata/rodFLT.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodFLT.java
@@ -55,6 +55,7 @@ public class rodFLT extends TabularROD implements SNPCallFromGenotypes {
public List getGenotype() throws IllegalStateException { return Arrays.asList(getAltBasesFWD()); }
public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; }
+ public int length() { return 1; }
// SNPCallFromGenotypes interface
public int nIndividuals() { return -1; }
diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodGFF.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodGFF.java
index 77b76dda3..5e8ecfacd 100644
--- a/java/src/org/broadinstitute/sting/gatk/refdata/rodGFF.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodGFF.java
@@ -174,4 +174,5 @@ public class rodGFF extends BasicReferenceOrderedDatum implements AllelicVariant
public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; }
+ public int length() { return 1; }
}
diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java
index cb0335a2d..852f6db4d 100755
--- a/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java
@@ -169,6 +169,8 @@ public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVa
return true;
}
+ public int length() { return 1; }
+
public char getReferenceBase() { return refBase; }
public int getPileupDepth() { return depth; }
diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/IndelMetricsAnalysis.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/IndelMetricsAnalysis.java
new file mode 100755
index 000000000..2e20abe23
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/IndelMetricsAnalysis.java
@@ -0,0 +1,64 @@
+package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
+
+import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.LocusContext;
+
+import java.util.List;
+import java.util.ArrayList;
+
+/**
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2009 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+ * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ *
+ */
+public class IndelMetricsAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis {
+ long insertions = 0;
+ long deletions = 0;
+ int maxSize = 0;
+ long[][] sizes = new long[2][100];
+
+ public IndelMetricsAnalysis() {
+ super("indel_metrics");
+ for (int i = 0; i < 100; i++)
+ sizes[0][i] = sizes[1][i] = 0;
+ }
+
+ public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, LocusContext context) {
+ if ( eval != null && eval.isIndel() ) {
+ if ( eval.isInsertion() )
+ insertions++;
+ else if ( eval.isDeletion() )
+ deletions++;
+ else
+ throw new RuntimeException("Variant is indel, but isn't insertion or deletion!");
+
+ if ( eval.length() < 100 ) {
+ sizes[eval.isDeletion() ? 0 : 1][eval.length()]++;
+ if ( eval.length() > maxSize )
+ maxSize = eval.length();
+ }
+ }
+
+ return null;
+ }
+
+ public List done() {
+ List s = new ArrayList();
+ s.add(String.format("total_deletions %d", deletions));
+ s.add(String.format("total_insertions %d", insertions));
+ s.add(String.format(""));
+
+ s.add("Size Distribution");
+ s.add("size\tdeletions\tinsertions");
+ for ( int i = 1; i <= maxSize; i++ )
+ s.add(String.format("%d\t%d\t\t%d", i, sizes[0][i], sizes[1][i]));
+
+ return s;
+ }
+}
\ No newline at end of file
diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java
index cf6b1e76d..74252c3cb 100755
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java
@@ -83,7 +83,7 @@ public class VariantEvalWalker extends RefWalker {
ArrayList analyses = new ArrayList();
//
- // Add new analyzes here!
+ // Add new analyses here!
//
analyses.add(new VariantCounter());
analyses.add(new VariantDBCoverage(knownSNPDBName));
@@ -93,9 +93,10 @@ public class VariantEvalWalker extends RefWalker {
analyses.add(new HardyWeinbergEquilibrium(badHWEThreshold));
analyses.add(new ClusterCounterAnalysis());
analyses.add(new CallableBasesAnalysis());
+ analyses.add(new IndelMetricsAnalysis());
//
- // Filter out analyzes inappropriate for our evaluation type Population or Genotype
+ // Filter out analyses inappropriate for our evaluation type Population or Genotype
//
Iterator iter = analyses.iterator();
while ( iter.hasNext() ) {
diff --git a/java/src/org/broadinstitute/sting/utils/GenotypeUtils.java b/java/src/org/broadinstitute/sting/utils/GenotypeUtils.java
index dec6e1b0c..f68a45ef2 100644
--- a/java/src/org/broadinstitute/sting/utils/GenotypeUtils.java
+++ b/java/src/org/broadinstitute/sting/utils/GenotypeUtils.java
@@ -187,6 +187,11 @@ public class GenotypeUtils {
return false;
}
+ @Override
+ public int length() {
+ return (int)(location.getStop() - location.getStart() + 1);
+ }
+
@Override
public int compareTo(ReferenceOrderedDatum o) {
return location.compareTo(o.getLocation());