-add basic indel metrics to variant eval

-variants need a length method (can't assume it's a SNP)!


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1324 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-07-28 03:25:03 +00:00
parent 1d6d99ed9c
commit 3c4410f104
15 changed files with 138 additions and 12 deletions

View File

@ -139,5 +139,9 @@ public interface AllelicVariant extends ReferenceOrderedDatum {
* two allelic variants, 'A' and 'T' <i>in addition</i> to the (known) reference variant 'C'). * two allelic variants, 'A' and 'T' <i>in addition</i> to the (known) reference variant 'C').
* @return * @return
*/ */
boolean isBiallelic() ; boolean isBiallelic();
/** returns the length of the variant. For SNPs this is just 1.
*/
int length();
} }

View File

@ -161,7 +161,7 @@ public interface Genotype extends Comparable<ReferenceOrderedDatum> {
*/ */
// char getAltSnpFWD() throws IllegalStateException; // char getAltSnpFWD() throws IllegalStateException;
int length();
/** Return actual number of observed alleles (chromosomes) in the genotype. /** Return actual number of observed alleles (chromosomes) in the genotype.

View File

@ -45,4 +45,5 @@ public class KGenomesSNPROD extends TabularROD implements SNPCallFromGenotypes {
public int nHetGenotypes() { return -1; } public int nHetGenotypes() { return -1; }
public int nHomVarGenotypes() { return -1; } public int nHomVarGenotypes() { return -1; }
public List<Genotype> getGenotypes() { return null; } public List<Genotype> getGenotypes() { return null; }
public int length() { return 1; }
} }

View File

@ -45,6 +45,7 @@ public class PooledEMSNPROD extends TabularROD implements SNPCallFromGenotypes {
public List<String> getGenotype() throws IllegalStateException { throw new IllegalStateException(); } public List<String> getGenotype() throws IllegalStateException { throw new IllegalStateException(); }
public int getPloidy() throws IllegalStateException { return 2; } public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; } public boolean isBiallelic() { return true; }
public int length() { return 1; }
// SNPCallFromGenotypes interface // SNPCallFromGenotypes interface
public int nIndividuals() { return Integer.parseInt(this.get("EM_N")); } public int nIndividuals() { return Integer.parseInt(this.get("EM_N")); }

View File

@ -362,6 +362,8 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
return false; return false;
} }
public int length() { return 1; }
@Override @Override
public int compareTo(ReferenceOrderedDatum that) { public int compareTo(ReferenceOrderedDatum that) {
return this.mLoc.compareTo(that.getLocation()); return this.mLoc.compareTo(that.getLocation());

View File

@ -490,10 +490,15 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup {
return nNonref < 2; return nNonref < 2;
} }
@Override @Override
public double getConsensusConfidence() { public double getConsensusConfidence() {
return consensusScore; return consensusScore;
} }
@Override
public int length() {
return eventLength;
}
@Override @Override
public boolean isIndelGenotype() { public boolean isIndelGenotype() {

View File

@ -31,6 +31,7 @@ public class SangerSNPROD extends TabularROD implements SNPCallFromGenotypes {
public List<String> getGenotype() throws IllegalStateException { throw new IllegalStateException(); } public List<String> getGenotype() throws IllegalStateException { throw new IllegalStateException(); }
public int getPloidy() throws IllegalStateException { return 2; } public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; } public boolean isBiallelic() { return true; }
public int length() { return 1; }
// SNPCallFromGenotypes interface // SNPCallFromGenotypes interface
public int nIndividuals() { return -1; } public int nIndividuals() { return -1; }

View File

@ -5,7 +5,9 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.*; import java.util.*;
public class SimpleIndelROD extends TabularROD implements Genotype { public class SimpleIndelROD extends TabularROD implements Genotype, AllelicVariant {
private boolean KGENOMES_FORMAT = false, checkedFormat = false;
public SimpleIndelROD(String name) { public SimpleIndelROD(String name) {
super(name); super(name);
@ -16,24 +18,50 @@ public class SimpleIndelROD extends TabularROD implements Genotype {
} }
public List<String> getFWDAlleles() { public List<String> getFWDAlleles() {
if ( is1KGFormat() )
return Arrays.asList(this.get("4"));
String str = this.get("3"); String str = this.get("3");
return Arrays.asList(str.substring(0, str.indexOf(":"))); return Arrays.asList(str.substring(0, str.indexOf(":")));
} }
public String getFWDRefBases() { return ""; } public String getFWDRefBases() { return ""; }
public String getAltBasesFWD() { return getFWDAlleles().get(0); }
public String getRefBasesFWD() { return ""; }
public char getRefSnpFWD() { throw new IllegalStateException("I'm an indel, not a SNP"); }
public char getAltSnpFWD() { throw new IllegalStateException("I'm an indel, not a SNP"); }
public char getRef() { return 'N'; } public char getRef() { return 'N'; }
public List<String> getGenotype() { return getFWDAlleles(); }
public boolean isGenotype() { return false; }
public boolean isPointGenotype() { return false; } public boolean isPointGenotype() { return false; }
public boolean isIndelGenotype() { return true; } public boolean isIndelGenotype() { return true; }
public boolean isSNP() { return false; } public boolean isSNP() { return false; }
public boolean isReference() { return false; } public boolean isReference() { return false; }
public boolean isInsertion() { return this.get("3").charAt(0) == '+'; } public boolean isInsertion() {
public boolean isDeletion() { return this.get("3").charAt(0) == '-'; } if ( is1KGFormat() )
public boolean isIndel() { return false; } return this.get("3").equals("I");
return this.get("3").charAt(0) == '+';
}
public boolean isDeletion() {
if ( is1KGFormat() )
return this.get("3").equals("D");
return this.get("3").charAt(0) == '-';
}
public boolean isIndel() { return true; }
public double getVariantConfidence() { return 0.0; } public double getVariantConfidence() { return 0.0; }
public double getVariationConfidence() { return 0.0; }
public double getConsensusConfidence() { return 0.0; } public double getConsensusConfidence() { return 0.0; }
public boolean isBiallelic() { return true; } public boolean isBiallelic() { return true; }
public boolean isHom() { return false; } public boolean isHom() { return false; }
public boolean isHet() { return false; } public boolean isHet() { return false; }
public double getHeterozygosity() { return 0.0; }
public double getMAF() { return 0.0; }
public int getPloidy() { return 2; }
public int length() {
if ( is1KGFormat() )
return Integer.parseInt(this.get("2"));
return getFWDAlleles().get(0).length();
}
public String toString() { public String toString() {
StringBuffer sb = new StringBuffer(); StringBuffer sb = new StringBuffer();
@ -42,4 +70,12 @@ public class SimpleIndelROD extends TabularROD implements Genotype {
sb.append((indel.length()-1) + "\t" + (isInsertion() ? "I" : "D") + "\t" + indel.substring(1)); sb.append((indel.length()-1) + "\t" + (isInsertion() ? "I" : "D") + "\t" + indel.substring(1));
return sb.toString(); return sb.toString();
} }
private boolean is1KGFormat() {
if ( !checkedFormat ) {
checkedFormat = true;
KGENOMES_FORMAT = this.get("3").equals("D") || this.get("3").equals("I");
}
return KGENOMES_FORMAT;
}
} }

View File

@ -246,4 +246,6 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
// TODO Auto-generated method stub // TODO Auto-generated method stub
return observed.indexOf('/')==observed.lastIndexOf('/'); return observed.indexOf('/')==observed.lastIndexOf('/');
} }
public int length() { return (int)(loc.getStop() - loc.getStart() + 1); }
} }

View File

@ -55,6 +55,7 @@ public class rodFLT extends TabularROD implements SNPCallFromGenotypes {
public List<String> getGenotype() throws IllegalStateException { return Arrays.asList(getAltBasesFWD()); } public List<String> getGenotype() throws IllegalStateException { return Arrays.asList(getAltBasesFWD()); }
public int getPloidy() throws IllegalStateException { return 2; } public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; } public boolean isBiallelic() { return true; }
public int length() { return 1; }
// SNPCallFromGenotypes interface // SNPCallFromGenotypes interface
public int nIndividuals() { return -1; } public int nIndividuals() { return -1; }

View File

@ -174,4 +174,5 @@ public class rodGFF extends BasicReferenceOrderedDatum implements AllelicVariant
public int getPloidy() throws IllegalStateException { return 2; } public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; } public boolean isBiallelic() { return true; }
public int length() { return 1; }
} }

View File

@ -169,6 +169,8 @@ public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVa
return true; return true;
} }
public int length() { return 1; }
public char getReferenceBase() { return refBase; } public char getReferenceBase() { return refBase; }
public int getPileupDepth() { return depth; } public int getPileupDepth() { return depth; }

View File

@ -0,0 +1,64 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.LocusContext;
import java.util.List;
import java.util.ArrayList;
/**
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
public class IndelMetricsAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis {
long insertions = 0;
long deletions = 0;
int maxSize = 0;
long[][] sizes = new long[2][100];
public IndelMetricsAnalysis() {
super("indel_metrics");
for (int i = 0; i < 100; i++)
sizes[0][i] = sizes[1][i] = 0;
}
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, LocusContext context) {
if ( eval != null && eval.isIndel() ) {
if ( eval.isInsertion() )
insertions++;
else if ( eval.isDeletion() )
deletions++;
else
throw new RuntimeException("Variant is indel, but isn't insertion or deletion!");
if ( eval.length() < 100 ) {
sizes[eval.isDeletion() ? 0 : 1][eval.length()]++;
if ( eval.length() > maxSize )
maxSize = eval.length();
}
}
return null;
}
public List<String> done() {
List<String> s = new ArrayList<String>();
s.add(String.format("total_deletions %d", deletions));
s.add(String.format("total_insertions %d", insertions));
s.add(String.format(""));
s.add("Size Distribution");
s.add("size\tdeletions\tinsertions");
for ( int i = 1; i <= maxSize; i++ )
s.add(String.format("%d\t%d\t\t%d", i, sizes[0][i], sizes[1][i]));
return s;
}
}

View File

@ -83,7 +83,7 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
ArrayList<VariantAnalysis> analyses = new ArrayList<VariantAnalysis>(); ArrayList<VariantAnalysis> analyses = new ArrayList<VariantAnalysis>();
// //
// Add new analyzes here! // Add new analyses here!
// //
analyses.add(new VariantCounter()); analyses.add(new VariantCounter());
analyses.add(new VariantDBCoverage(knownSNPDBName)); analyses.add(new VariantDBCoverage(knownSNPDBName));
@ -93,9 +93,10 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
analyses.add(new HardyWeinbergEquilibrium(badHWEThreshold)); analyses.add(new HardyWeinbergEquilibrium(badHWEThreshold));
analyses.add(new ClusterCounterAnalysis()); analyses.add(new ClusterCounterAnalysis());
analyses.add(new CallableBasesAnalysis()); analyses.add(new CallableBasesAnalysis());
analyses.add(new IndelMetricsAnalysis());
// //
// Filter out analyzes inappropriate for our evaluation type Population or Genotype // Filter out analyses inappropriate for our evaluation type Population or Genotype
// //
Iterator<VariantAnalysis> iter = analyses.iterator(); Iterator<VariantAnalysis> iter = analyses.iterator();
while ( iter.hasNext() ) { while ( iter.hasNext() ) {

View File

@ -187,6 +187,11 @@ public class GenotypeUtils {
return false; return false;
} }
@Override
public int length() {
return (int)(location.getStop() - location.getStart() + 1);
}
@Override @Override
public int compareTo(ReferenceOrderedDatum o) { public int compareTo(ReferenceOrderedDatum o) {
return location.compareTo(o.getLocation()); return location.compareTo(o.getLocation());