-add basic indel metrics to variant eval

-variants need a length method (can't assume it's a SNP)!


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1324 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-07-28 03:25:03 +00:00
parent 1d6d99ed9c
commit 3c4410f104
15 changed files with 138 additions and 12 deletions

View File

@ -139,5 +139,9 @@ public interface AllelicVariant extends ReferenceOrderedDatum {
* two allelic variants, 'A' and 'T' <i>in addition</i> to the (known) reference variant 'C').
* @return
*/
boolean isBiallelic() ;
boolean isBiallelic();
/** returns the length of the variant. For SNPs this is just 1.
*/
int length();
}

View File

@ -161,7 +161,7 @@ public interface Genotype extends Comparable<ReferenceOrderedDatum> {
*/
// char getAltSnpFWD() throws IllegalStateException;
int length();
/** Return actual number of observed alleles (chromosomes) in the genotype.

View File

@ -45,4 +45,5 @@ public class KGenomesSNPROD extends TabularROD implements SNPCallFromGenotypes {
public int nHetGenotypes() { return -1; }
public int nHomVarGenotypes() { return -1; }
public List<Genotype> getGenotypes() { return null; }
public int length() { return 1; }
}

View File

@ -45,6 +45,7 @@ public class PooledEMSNPROD extends TabularROD implements SNPCallFromGenotypes {
public List<String> getGenotype() throws IllegalStateException { throw new IllegalStateException(); }
public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; }
public int length() { return 1; }
// SNPCallFromGenotypes interface
public int nIndividuals() { return Integer.parseInt(this.get("EM_N")); }

View File

@ -362,6 +362,8 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
return false;
}
public int length() { return 1; }
@Override
public int compareTo(ReferenceOrderedDatum that) {
return this.mLoc.compareTo(that.getLocation());

View File

@ -490,10 +490,15 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup {
return nNonref < 2;
}
@Override
public double getConsensusConfidence() {
return consensusScore;
}
@Override
public double getConsensusConfidence() {
return consensusScore;
}
@Override
public int length() {
return eventLength;
}
@Override
public boolean isIndelGenotype() {

View File

@ -31,6 +31,7 @@ public class SangerSNPROD extends TabularROD implements SNPCallFromGenotypes {
public List<String> getGenotype() throws IllegalStateException { throw new IllegalStateException(); }
public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; }
public int length() { return 1; }
// SNPCallFromGenotypes interface
public int nIndividuals() { return -1; }

View File

@ -5,7 +5,9 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.*;
public class SimpleIndelROD extends TabularROD implements Genotype {
public class SimpleIndelROD extends TabularROD implements Genotype, AllelicVariant {
private boolean KGENOMES_FORMAT = false, checkedFormat = false;
public SimpleIndelROD(String name) {
super(name);
@ -16,24 +18,50 @@ public class SimpleIndelROD extends TabularROD implements Genotype {
}
public List<String> getFWDAlleles() {
if ( is1KGFormat() )
return Arrays.asList(this.get("4"));
String str = this.get("3");
return Arrays.asList(str.substring(0, str.indexOf(":")));
}
public String getFWDRefBases() { return ""; }
public String getAltBasesFWD() { return getFWDAlleles().get(0); }
public String getRefBasesFWD() { return ""; }
public char getRefSnpFWD() { throw new IllegalStateException("I'm an indel, not a SNP"); }
public char getAltSnpFWD() { throw new IllegalStateException("I'm an indel, not a SNP"); }
public char getRef() { return 'N'; }
public List<String> getGenotype() { return getFWDAlleles(); }
public boolean isGenotype() { return false; }
public boolean isPointGenotype() { return false; }
public boolean isIndelGenotype() { return true; }
public boolean isSNP() { return false; }
public boolean isReference() { return false; }
public boolean isInsertion() { return this.get("3").charAt(0) == '+'; }
public boolean isDeletion() { return this.get("3").charAt(0) == '-'; }
public boolean isIndel() { return false; }
public boolean isInsertion() {
if ( is1KGFormat() )
return this.get("3").equals("I");
return this.get("3").charAt(0) == '+';
}
public boolean isDeletion() {
if ( is1KGFormat() )
return this.get("3").equals("D");
return this.get("3").charAt(0) == '-';
}
public boolean isIndel() { return true; }
public double getVariantConfidence() { return 0.0; }
public double getVariationConfidence() { return 0.0; }
public double getConsensusConfidence() { return 0.0; }
public boolean isBiallelic() { return true; }
public boolean isHom() { return false; }
public boolean isHet() { return false; }
public double getHeterozygosity() { return 0.0; }
public double getMAF() { return 0.0; }
public int getPloidy() { return 2; }
public int length() {
if ( is1KGFormat() )
return Integer.parseInt(this.get("2"));
return getFWDAlleles().get(0).length();
}
public String toString() {
StringBuffer sb = new StringBuffer();
@ -42,4 +70,12 @@ public class SimpleIndelROD extends TabularROD implements Genotype {
sb.append((indel.length()-1) + "\t" + (isInsertion() ? "I" : "D") + "\t" + indel.substring(1));
return sb.toString();
}
private boolean is1KGFormat() {
if ( !checkedFormat ) {
checkedFormat = true;
KGENOMES_FORMAT = this.get("3").equals("D") || this.get("3").equals("I");
}
return KGENOMES_FORMAT;
}
}

View File

@ -246,4 +246,6 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
// TODO Auto-generated method stub
return observed.indexOf('/')==observed.lastIndexOf('/');
}
public int length() { return (int)(loc.getStop() - loc.getStart() + 1); }
}

View File

@ -55,6 +55,7 @@ public class rodFLT extends TabularROD implements SNPCallFromGenotypes {
public List<String> getGenotype() throws IllegalStateException { return Arrays.asList(getAltBasesFWD()); }
public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; }
public int length() { return 1; }
// SNPCallFromGenotypes interface
public int nIndividuals() { return -1; }

View File

@ -174,4 +174,5 @@ public class rodGFF extends BasicReferenceOrderedDatum implements AllelicVariant
public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; }
public int length() { return 1; }
}

View File

@ -169,6 +169,8 @@ public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVa
return true;
}
public int length() { return 1; }
public char getReferenceBase() { return refBase; }
public int getPileupDepth() { return depth; }

View File

@ -0,0 +1,64 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.LocusContext;
import java.util.List;
import java.util.ArrayList;
/**
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
public class IndelMetricsAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis {
long insertions = 0;
long deletions = 0;
int maxSize = 0;
long[][] sizes = new long[2][100];
public IndelMetricsAnalysis() {
super("indel_metrics");
for (int i = 0; i < 100; i++)
sizes[0][i] = sizes[1][i] = 0;
}
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, LocusContext context) {
if ( eval != null && eval.isIndel() ) {
if ( eval.isInsertion() )
insertions++;
else if ( eval.isDeletion() )
deletions++;
else
throw new RuntimeException("Variant is indel, but isn't insertion or deletion!");
if ( eval.length() < 100 ) {
sizes[eval.isDeletion() ? 0 : 1][eval.length()]++;
if ( eval.length() > maxSize )
maxSize = eval.length();
}
}
return null;
}
public List<String> done() {
List<String> s = new ArrayList<String>();
s.add(String.format("total_deletions %d", deletions));
s.add(String.format("total_insertions %d", insertions));
s.add(String.format(""));
s.add("Size Distribution");
s.add("size\tdeletions\tinsertions");
for ( int i = 1; i <= maxSize; i++ )
s.add(String.format("%d\t%d\t\t%d", i, sizes[0][i], sizes[1][i]));
return s;
}
}

View File

@ -83,7 +83,7 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
ArrayList<VariantAnalysis> analyses = new ArrayList<VariantAnalysis>();
//
// Add new analyzes here!
// Add new analyses here!
//
analyses.add(new VariantCounter());
analyses.add(new VariantDBCoverage(knownSNPDBName));
@ -93,9 +93,10 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
analyses.add(new HardyWeinbergEquilibrium(badHWEThreshold));
analyses.add(new ClusterCounterAnalysis());
analyses.add(new CallableBasesAnalysis());
analyses.add(new IndelMetricsAnalysis());
//
// Filter out analyzes inappropriate for our evaluation type Population or Genotype
// Filter out analyses inappropriate for our evaluation type Population or Genotype
//
Iterator<VariantAnalysis> iter = analyses.iterator();
while ( iter.hasNext() ) {

View File

@ -187,6 +187,11 @@ public class GenotypeUtils {
return false;
}
@Override
public int length() {
return (int)(location.getStop() - location.getStart() + 1);
}
@Override
public int compareTo(ReferenceOrderedDatum o) {
return location.compareTo(o.getLocation());