-add basic indel metrics to variant eval
-variants need a length method (can't assume it's a SNP)! git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1324 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
1d6d99ed9c
commit
3c4410f104
|
|
@ -139,5 +139,9 @@ public interface AllelicVariant extends ReferenceOrderedDatum {
|
||||||
* two allelic variants, 'A' and 'T' <i>in addition</i> to the (known) reference variant 'C').
|
* two allelic variants, 'A' and 'T' <i>in addition</i> to the (known) reference variant 'C').
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
boolean isBiallelic() ;
|
boolean isBiallelic();
|
||||||
|
|
||||||
|
/** returns the length of the variant. For SNPs this is just 1.
|
||||||
|
*/
|
||||||
|
int length();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -161,7 +161,7 @@ public interface Genotype extends Comparable<ReferenceOrderedDatum> {
|
||||||
*/
|
*/
|
||||||
// char getAltSnpFWD() throws IllegalStateException;
|
// char getAltSnpFWD() throws IllegalStateException;
|
||||||
|
|
||||||
|
int length();
|
||||||
|
|
||||||
|
|
||||||
/** Return actual number of observed alleles (chromosomes) in the genotype.
|
/** Return actual number of observed alleles (chromosomes) in the genotype.
|
||||||
|
|
|
||||||
|
|
@ -45,4 +45,5 @@ public class KGenomesSNPROD extends TabularROD implements SNPCallFromGenotypes {
|
||||||
public int nHetGenotypes() { return -1; }
|
public int nHetGenotypes() { return -1; }
|
||||||
public int nHomVarGenotypes() { return -1; }
|
public int nHomVarGenotypes() { return -1; }
|
||||||
public List<Genotype> getGenotypes() { return null; }
|
public List<Genotype> getGenotypes() { return null; }
|
||||||
|
public int length() { return 1; }
|
||||||
}
|
}
|
||||||
|
|
@ -45,6 +45,7 @@ public class PooledEMSNPROD extends TabularROD implements SNPCallFromGenotypes {
|
||||||
public List<String> getGenotype() throws IllegalStateException { throw new IllegalStateException(); }
|
public List<String> getGenotype() throws IllegalStateException { throw new IllegalStateException(); }
|
||||||
public int getPloidy() throws IllegalStateException { return 2; }
|
public int getPloidy() throws IllegalStateException { return 2; }
|
||||||
public boolean isBiallelic() { return true; }
|
public boolean isBiallelic() { return true; }
|
||||||
|
public int length() { return 1; }
|
||||||
|
|
||||||
// SNPCallFromGenotypes interface
|
// SNPCallFromGenotypes interface
|
||||||
public int nIndividuals() { return Integer.parseInt(this.get("EM_N")); }
|
public int nIndividuals() { return Integer.parseInt(this.get("EM_N")); }
|
||||||
|
|
|
||||||
|
|
@ -362,6 +362,8 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int length() { return 1; }
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int compareTo(ReferenceOrderedDatum that) {
|
public int compareTo(ReferenceOrderedDatum that) {
|
||||||
return this.mLoc.compareTo(that.getLocation());
|
return this.mLoc.compareTo(that.getLocation());
|
||||||
|
|
|
||||||
|
|
@ -490,10 +490,15 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup {
|
||||||
return nNonref < 2;
|
return nNonref < 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public double getConsensusConfidence() {
|
public double getConsensusConfidence() {
|
||||||
return consensusScore;
|
return consensusScore;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int length() {
|
||||||
|
return eventLength;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isIndelGenotype() {
|
public boolean isIndelGenotype() {
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,7 @@ public class SangerSNPROD extends TabularROD implements SNPCallFromGenotypes {
|
||||||
public List<String> getGenotype() throws IllegalStateException { throw new IllegalStateException(); }
|
public List<String> getGenotype() throws IllegalStateException { throw new IllegalStateException(); }
|
||||||
public int getPloidy() throws IllegalStateException { return 2; }
|
public int getPloidy() throws IllegalStateException { return 2; }
|
||||||
public boolean isBiallelic() { return true; }
|
public boolean isBiallelic() { return true; }
|
||||||
|
public int length() { return 1; }
|
||||||
|
|
||||||
// SNPCallFromGenotypes interface
|
// SNPCallFromGenotypes interface
|
||||||
public int nIndividuals() { return -1; }
|
public int nIndividuals() { return -1; }
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,9 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
public class SimpleIndelROD extends TabularROD implements Genotype {
|
public class SimpleIndelROD extends TabularROD implements Genotype, AllelicVariant {
|
||||||
|
|
||||||
|
private boolean KGENOMES_FORMAT = false, checkedFormat = false;
|
||||||
|
|
||||||
public SimpleIndelROD(String name) {
|
public SimpleIndelROD(String name) {
|
||||||
super(name);
|
super(name);
|
||||||
|
|
@ -16,24 +18,50 @@ public class SimpleIndelROD extends TabularROD implements Genotype {
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getFWDAlleles() {
|
public List<String> getFWDAlleles() {
|
||||||
|
if ( is1KGFormat() )
|
||||||
|
return Arrays.asList(this.get("4"));
|
||||||
|
|
||||||
String str = this.get("3");
|
String str = this.get("3");
|
||||||
return Arrays.asList(str.substring(0, str.indexOf(":")));
|
return Arrays.asList(str.substring(0, str.indexOf(":")));
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getFWDRefBases() { return ""; }
|
public String getFWDRefBases() { return ""; }
|
||||||
|
public String getAltBasesFWD() { return getFWDAlleles().get(0); }
|
||||||
|
public String getRefBasesFWD() { return ""; }
|
||||||
|
public char getRefSnpFWD() { throw new IllegalStateException("I'm an indel, not a SNP"); }
|
||||||
|
public char getAltSnpFWD() { throw new IllegalStateException("I'm an indel, not a SNP"); }
|
||||||
public char getRef() { return 'N'; }
|
public char getRef() { return 'N'; }
|
||||||
|
public List<String> getGenotype() { return getFWDAlleles(); }
|
||||||
|
public boolean isGenotype() { return false; }
|
||||||
public boolean isPointGenotype() { return false; }
|
public boolean isPointGenotype() { return false; }
|
||||||
public boolean isIndelGenotype() { return true; }
|
public boolean isIndelGenotype() { return true; }
|
||||||
public boolean isSNP() { return false; }
|
public boolean isSNP() { return false; }
|
||||||
public boolean isReference() { return false; }
|
public boolean isReference() { return false; }
|
||||||
public boolean isInsertion() { return this.get("3").charAt(0) == '+'; }
|
public boolean isInsertion() {
|
||||||
public boolean isDeletion() { return this.get("3").charAt(0) == '-'; }
|
if ( is1KGFormat() )
|
||||||
public boolean isIndel() { return false; }
|
return this.get("3").equals("I");
|
||||||
|
return this.get("3").charAt(0) == '+';
|
||||||
|
}
|
||||||
|
public boolean isDeletion() {
|
||||||
|
if ( is1KGFormat() )
|
||||||
|
return this.get("3").equals("D");
|
||||||
|
return this.get("3").charAt(0) == '-';
|
||||||
|
}
|
||||||
|
public boolean isIndel() { return true; }
|
||||||
public double getVariantConfidence() { return 0.0; }
|
public double getVariantConfidence() { return 0.0; }
|
||||||
|
public double getVariationConfidence() { return 0.0; }
|
||||||
public double getConsensusConfidence() { return 0.0; }
|
public double getConsensusConfidence() { return 0.0; }
|
||||||
public boolean isBiallelic() { return true; }
|
public boolean isBiallelic() { return true; }
|
||||||
public boolean isHom() { return false; }
|
public boolean isHom() { return false; }
|
||||||
public boolean isHet() { return false; }
|
public boolean isHet() { return false; }
|
||||||
|
public double getHeterozygosity() { return 0.0; }
|
||||||
|
public double getMAF() { return 0.0; }
|
||||||
|
public int getPloidy() { return 2; }
|
||||||
|
public int length() {
|
||||||
|
if ( is1KGFormat() )
|
||||||
|
return Integer.parseInt(this.get("2"));
|
||||||
|
return getFWDAlleles().get(0).length();
|
||||||
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuffer sb = new StringBuffer();
|
StringBuffer sb = new StringBuffer();
|
||||||
|
|
@ -42,4 +70,12 @@ public class SimpleIndelROD extends TabularROD implements Genotype {
|
||||||
sb.append((indel.length()-1) + "\t" + (isInsertion() ? "I" : "D") + "\t" + indel.substring(1));
|
sb.append((indel.length()-1) + "\t" + (isInsertion() ? "I" : "D") + "\t" + indel.substring(1));
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean is1KGFormat() {
|
||||||
|
if ( !checkedFormat ) {
|
||||||
|
checkedFormat = true;
|
||||||
|
KGENOMES_FORMAT = this.get("3").equals("D") || this.get("3").equals("I");
|
||||||
|
}
|
||||||
|
return KGENOMES_FORMAT;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -246,4 +246,6 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
|
||||||
// TODO Auto-generated method stub
|
// TODO Auto-generated method stub
|
||||||
return observed.indexOf('/')==observed.lastIndexOf('/');
|
return observed.indexOf('/')==observed.lastIndexOf('/');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int length() { return (int)(loc.getStop() - loc.getStart() + 1); }
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -55,6 +55,7 @@ public class rodFLT extends TabularROD implements SNPCallFromGenotypes {
|
||||||
public List<String> getGenotype() throws IllegalStateException { return Arrays.asList(getAltBasesFWD()); }
|
public List<String> getGenotype() throws IllegalStateException { return Arrays.asList(getAltBasesFWD()); }
|
||||||
public int getPloidy() throws IllegalStateException { return 2; }
|
public int getPloidy() throws IllegalStateException { return 2; }
|
||||||
public boolean isBiallelic() { return true; }
|
public boolean isBiallelic() { return true; }
|
||||||
|
public int length() { return 1; }
|
||||||
|
|
||||||
// SNPCallFromGenotypes interface
|
// SNPCallFromGenotypes interface
|
||||||
public int nIndividuals() { return -1; }
|
public int nIndividuals() { return -1; }
|
||||||
|
|
|
||||||
|
|
@ -174,4 +174,5 @@ public class rodGFF extends BasicReferenceOrderedDatum implements AllelicVariant
|
||||||
|
|
||||||
public int getPloidy() throws IllegalStateException { return 2; }
|
public int getPloidy() throws IllegalStateException { return 2; }
|
||||||
public boolean isBiallelic() { return true; }
|
public boolean isBiallelic() { return true; }
|
||||||
|
public int length() { return 1; }
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -169,6 +169,8 @@ public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVa
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int length() { return 1; }
|
||||||
|
|
||||||
public char getReferenceBase() { return refBase; }
|
public char getReferenceBase() { return refBase; }
|
||||||
|
|
||||||
public int getPileupDepth() { return depth; }
|
public int getPileupDepth() { return depth; }
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,64 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The Broad Institute
|
||||||
|
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||||
|
* This software and its documentation are copyright 2009 by the
|
||||||
|
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||||
|
*
|
||||||
|
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||||
|
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class IndelMetricsAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis {
|
||||||
|
long insertions = 0;
|
||||||
|
long deletions = 0;
|
||||||
|
int maxSize = 0;
|
||||||
|
long[][] sizes = new long[2][100];
|
||||||
|
|
||||||
|
public IndelMetricsAnalysis() {
|
||||||
|
super("indel_metrics");
|
||||||
|
for (int i = 0; i < 100; i++)
|
||||||
|
sizes[0][i] = sizes[1][i] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||||
|
if ( eval != null && eval.isIndel() ) {
|
||||||
|
if ( eval.isInsertion() )
|
||||||
|
insertions++;
|
||||||
|
else if ( eval.isDeletion() )
|
||||||
|
deletions++;
|
||||||
|
else
|
||||||
|
throw new RuntimeException("Variant is indel, but isn't insertion or deletion!");
|
||||||
|
|
||||||
|
if ( eval.length() < 100 ) {
|
||||||
|
sizes[eval.isDeletion() ? 0 : 1][eval.length()]++;
|
||||||
|
if ( eval.length() > maxSize )
|
||||||
|
maxSize = eval.length();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> done() {
|
||||||
|
List<String> s = new ArrayList<String>();
|
||||||
|
s.add(String.format("total_deletions %d", deletions));
|
||||||
|
s.add(String.format("total_insertions %d", insertions));
|
||||||
|
s.add(String.format(""));
|
||||||
|
|
||||||
|
s.add("Size Distribution");
|
||||||
|
s.add("size\tdeletions\tinsertions");
|
||||||
|
for ( int i = 1; i <= maxSize; i++ )
|
||||||
|
s.add(String.format("%d\t%d\t\t%d", i, sizes[0][i], sizes[1][i]));
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -83,7 +83,7 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
||||||
ArrayList<VariantAnalysis> analyses = new ArrayList<VariantAnalysis>();
|
ArrayList<VariantAnalysis> analyses = new ArrayList<VariantAnalysis>();
|
||||||
|
|
||||||
//
|
//
|
||||||
// Add new analyzes here!
|
// Add new analyses here!
|
||||||
//
|
//
|
||||||
analyses.add(new VariantCounter());
|
analyses.add(new VariantCounter());
|
||||||
analyses.add(new VariantDBCoverage(knownSNPDBName));
|
analyses.add(new VariantDBCoverage(knownSNPDBName));
|
||||||
|
|
@ -93,9 +93,10 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
||||||
analyses.add(new HardyWeinbergEquilibrium(badHWEThreshold));
|
analyses.add(new HardyWeinbergEquilibrium(badHWEThreshold));
|
||||||
analyses.add(new ClusterCounterAnalysis());
|
analyses.add(new ClusterCounterAnalysis());
|
||||||
analyses.add(new CallableBasesAnalysis());
|
analyses.add(new CallableBasesAnalysis());
|
||||||
|
analyses.add(new IndelMetricsAnalysis());
|
||||||
|
|
||||||
//
|
//
|
||||||
// Filter out analyzes inappropriate for our evaluation type Population or Genotype
|
// Filter out analyses inappropriate for our evaluation type Population or Genotype
|
||||||
//
|
//
|
||||||
Iterator<VariantAnalysis> iter = analyses.iterator();
|
Iterator<VariantAnalysis> iter = analyses.iterator();
|
||||||
while ( iter.hasNext() ) {
|
while ( iter.hasNext() ) {
|
||||||
|
|
|
||||||
|
|
@ -187,6 +187,11 @@ public class GenotypeUtils {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int length() {
|
||||||
|
return (int)(location.getStop() - location.getStart() + 1);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int compareTo(ReferenceOrderedDatum o) {
|
public int compareTo(ReferenceOrderedDatum o) {
|
||||||
return location.compareTo(o.getLocation());
|
return location.compareTo(o.getLocation());
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue