-add basic indel metrics to variant eval
-variants need a length method (can't assume it's a SNP)! git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1324 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
1d6d99ed9c
commit
3c4410f104
|
|
@ -139,5 +139,9 @@ public interface AllelicVariant extends ReferenceOrderedDatum {
|
|||
* two allelic variants, 'A' and 'T' <i>in addition</i> to the (known) reference variant 'C').
|
||||
* @return
|
||||
*/
|
||||
boolean isBiallelic() ;
|
||||
boolean isBiallelic();
|
||||
|
||||
/** returns the length of the variant. For SNPs this is just 1.
|
||||
*/
|
||||
int length();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -161,7 +161,7 @@ public interface Genotype extends Comparable<ReferenceOrderedDatum> {
|
|||
*/
|
||||
// char getAltSnpFWD() throws IllegalStateException;
|
||||
|
||||
|
||||
int length();
|
||||
|
||||
|
||||
/** Return actual number of observed alleles (chromosomes) in the genotype.
|
||||
|
|
|
|||
|
|
@ -45,4 +45,5 @@ public class KGenomesSNPROD extends TabularROD implements SNPCallFromGenotypes {
|
|||
public int nHetGenotypes() { return -1; }
|
||||
public int nHomVarGenotypes() { return -1; }
|
||||
public List<Genotype> getGenotypes() { return null; }
|
||||
public int length() { return 1; }
|
||||
}
|
||||
|
|
@ -45,6 +45,7 @@ public class PooledEMSNPROD extends TabularROD implements SNPCallFromGenotypes {
|
|||
public List<String> getGenotype() throws IllegalStateException { throw new IllegalStateException(); }
|
||||
public int getPloidy() throws IllegalStateException { return 2; }
|
||||
public boolean isBiallelic() { return true; }
|
||||
public int length() { return 1; }
|
||||
|
||||
// SNPCallFromGenotypes interface
|
||||
public int nIndividuals() { return Integer.parseInt(this.get("EM_N")); }
|
||||
|
|
|
|||
|
|
@ -362,6 +362,8 @@ public class RodGLF implements ReferenceOrderedDatum, AllelicVariant, Iterator<R
|
|||
return false;
|
||||
}
|
||||
|
||||
public int length() { return 1; }
|
||||
|
||||
@Override
|
||||
public int compareTo(ReferenceOrderedDatum that) {
|
||||
return this.mLoc.compareTo(that.getLocation());
|
||||
|
|
|
|||
|
|
@ -490,10 +490,15 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup {
|
|||
return nNonref < 2;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getConsensusConfidence() {
|
||||
return consensusScore;
|
||||
}
|
||||
@Override
|
||||
public double getConsensusConfidence() {
|
||||
return consensusScore;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
return eventLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isIndelGenotype() {
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ public class SangerSNPROD extends TabularROD implements SNPCallFromGenotypes {
|
|||
public List<String> getGenotype() throws IllegalStateException { throw new IllegalStateException(); }
|
||||
public int getPloidy() throws IllegalStateException { return 2; }
|
||||
public boolean isBiallelic() { return true; }
|
||||
public int length() { return 1; }
|
||||
|
||||
// SNPCallFromGenotypes interface
|
||||
public int nIndividuals() { return -1; }
|
||||
|
|
|
|||
|
|
@ -5,7 +5,9 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
public class SimpleIndelROD extends TabularROD implements Genotype {
|
||||
public class SimpleIndelROD extends TabularROD implements Genotype, AllelicVariant {
|
||||
|
||||
private boolean KGENOMES_FORMAT = false, checkedFormat = false;
|
||||
|
||||
public SimpleIndelROD(String name) {
|
||||
super(name);
|
||||
|
|
@ -16,24 +18,50 @@ public class SimpleIndelROD extends TabularROD implements Genotype {
|
|||
}
|
||||
|
||||
public List<String> getFWDAlleles() {
|
||||
if ( is1KGFormat() )
|
||||
return Arrays.asList(this.get("4"));
|
||||
|
||||
String str = this.get("3");
|
||||
return Arrays.asList(str.substring(0, str.indexOf(":")));
|
||||
}
|
||||
|
||||
public String getFWDRefBases() { return ""; }
|
||||
public String getAltBasesFWD() { return getFWDAlleles().get(0); }
|
||||
public String getRefBasesFWD() { return ""; }
|
||||
public char getRefSnpFWD() { throw new IllegalStateException("I'm an indel, not a SNP"); }
|
||||
public char getAltSnpFWD() { throw new IllegalStateException("I'm an indel, not a SNP"); }
|
||||
public char getRef() { return 'N'; }
|
||||
public List<String> getGenotype() { return getFWDAlleles(); }
|
||||
public boolean isGenotype() { return false; }
|
||||
public boolean isPointGenotype() { return false; }
|
||||
public boolean isIndelGenotype() { return true; }
|
||||
public boolean isSNP() { return false; }
|
||||
public boolean isReference() { return false; }
|
||||
public boolean isInsertion() { return this.get("3").charAt(0) == '+'; }
|
||||
public boolean isDeletion() { return this.get("3").charAt(0) == '-'; }
|
||||
public boolean isIndel() { return false; }
|
||||
public boolean isInsertion() {
|
||||
if ( is1KGFormat() )
|
||||
return this.get("3").equals("I");
|
||||
return this.get("3").charAt(0) == '+';
|
||||
}
|
||||
public boolean isDeletion() {
|
||||
if ( is1KGFormat() )
|
||||
return this.get("3").equals("D");
|
||||
return this.get("3").charAt(0) == '-';
|
||||
}
|
||||
public boolean isIndel() { return true; }
|
||||
public double getVariantConfidence() { return 0.0; }
|
||||
public double getVariationConfidence() { return 0.0; }
|
||||
public double getConsensusConfidence() { return 0.0; }
|
||||
public boolean isBiallelic() { return true; }
|
||||
public boolean isHom() { return false; }
|
||||
public boolean isHet() { return false; }
|
||||
public double getHeterozygosity() { return 0.0; }
|
||||
public double getMAF() { return 0.0; }
|
||||
public int getPloidy() { return 2; }
|
||||
public int length() {
|
||||
if ( is1KGFormat() )
|
||||
return Integer.parseInt(this.get("2"));
|
||||
return getFWDAlleles().get(0).length();
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
|
|
@ -42,4 +70,12 @@ public class SimpleIndelROD extends TabularROD implements Genotype {
|
|||
sb.append((indel.length()-1) + "\t" + (isInsertion() ? "I" : "D") + "\t" + indel.substring(1));
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private boolean is1KGFormat() {
|
||||
if ( !checkedFormat ) {
|
||||
checkedFormat = true;
|
||||
KGENOMES_FORMAT = this.get("3").equals("D") || this.get("3").equals("I");
|
||||
}
|
||||
return KGENOMES_FORMAT;
|
||||
}
|
||||
}
|
||||
|
|
@ -246,4 +246,6 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
|
|||
// TODO Auto-generated method stub
|
||||
return observed.indexOf('/')==observed.lastIndexOf('/');
|
||||
}
|
||||
|
||||
public int length() { return (int)(loc.getStop() - loc.getStart() + 1); }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,6 +55,7 @@ public class rodFLT extends TabularROD implements SNPCallFromGenotypes {
|
|||
public List<String> getGenotype() throws IllegalStateException { return Arrays.asList(getAltBasesFWD()); }
|
||||
public int getPloidy() throws IllegalStateException { return 2; }
|
||||
public boolean isBiallelic() { return true; }
|
||||
public int length() { return 1; }
|
||||
|
||||
// SNPCallFromGenotypes interface
|
||||
public int nIndividuals() { return -1; }
|
||||
|
|
|
|||
|
|
@ -174,4 +174,5 @@ public class rodGFF extends BasicReferenceOrderedDatum implements AllelicVariant
|
|||
|
||||
public int getPloidy() throws IllegalStateException { return 2; }
|
||||
public boolean isBiallelic() { return true; }
|
||||
public int length() { return 1; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -169,6 +169,8 @@ public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVa
|
|||
return true;
|
||||
}
|
||||
|
||||
public int length() { return 1; }
|
||||
|
||||
public char getReferenceBase() { return refBase; }
|
||||
|
||||
public int getPileupDepth() { return depth; }
|
||||
|
|
|
|||
|
|
@ -0,0 +1,64 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
public class IndelMetricsAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis {
|
||||
long insertions = 0;
|
||||
long deletions = 0;
|
||||
int maxSize = 0;
|
||||
long[][] sizes = new long[2][100];
|
||||
|
||||
public IndelMetricsAnalysis() {
|
||||
super("indel_metrics");
|
||||
for (int i = 0; i < 100; i++)
|
||||
sizes[0][i] = sizes[1][i] = 0;
|
||||
}
|
||||
|
||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||
if ( eval != null && eval.isIndel() ) {
|
||||
if ( eval.isInsertion() )
|
||||
insertions++;
|
||||
else if ( eval.isDeletion() )
|
||||
deletions++;
|
||||
else
|
||||
throw new RuntimeException("Variant is indel, but isn't insertion or deletion!");
|
||||
|
||||
if ( eval.length() < 100 ) {
|
||||
sizes[eval.isDeletion() ? 0 : 1][eval.length()]++;
|
||||
if ( eval.length() > maxSize )
|
||||
maxSize = eval.length();
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public List<String> done() {
|
||||
List<String> s = new ArrayList<String>();
|
||||
s.add(String.format("total_deletions %d", deletions));
|
||||
s.add(String.format("total_insertions %d", insertions));
|
||||
s.add(String.format(""));
|
||||
|
||||
s.add("Size Distribution");
|
||||
s.add("size\tdeletions\tinsertions");
|
||||
for ( int i = 1; i <= maxSize; i++ )
|
||||
s.add(String.format("%d\t%d\t\t%d", i, sizes[0][i], sizes[1][i]));
|
||||
|
||||
return s;
|
||||
}
|
||||
}
|
||||
|
|
@ -83,7 +83,7 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
|||
ArrayList<VariantAnalysis> analyses = new ArrayList<VariantAnalysis>();
|
||||
|
||||
//
|
||||
// Add new analyzes here!
|
||||
// Add new analyses here!
|
||||
//
|
||||
analyses.add(new VariantCounter());
|
||||
analyses.add(new VariantDBCoverage(knownSNPDBName));
|
||||
|
|
@ -93,9 +93,10 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
|||
analyses.add(new HardyWeinbergEquilibrium(badHWEThreshold));
|
||||
analyses.add(new ClusterCounterAnalysis());
|
||||
analyses.add(new CallableBasesAnalysis());
|
||||
analyses.add(new IndelMetricsAnalysis());
|
||||
|
||||
//
|
||||
// Filter out analyzes inappropriate for our evaluation type Population or Genotype
|
||||
// Filter out analyses inappropriate for our evaluation type Population or Genotype
|
||||
//
|
||||
Iterator<VariantAnalysis> iter = analyses.iterator();
|
||||
while ( iter.hasNext() ) {
|
||||
|
|
|
|||
|
|
@ -187,6 +187,11 @@ public class GenotypeUtils {
|
|||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
return (int)(location.getStop() - location.getStart() + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(ReferenceOrderedDatum o) {
|
||||
return location.compareTo(o.getLocation());
|
||||
|
|
|
|||
Loading…
Reference in New Issue