refactoring: a couple of general purpose (hopefully useful?) methods/classes extracted into a standalone utils class
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@802 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4b718688d5
commit
02fc4f145f
|
|
@ -19,7 +19,9 @@ import org.broadinstitute.sting.gatk.walkers.RMD;
|
||||||
import org.broadinstitute.sting.playground.utils.GenotypingCallStats;
|
import org.broadinstitute.sting.playground.utils.GenotypingCallStats;
|
||||||
import org.broadinstitute.sting.playground.utils.TrioConcordanceRecord;
|
import org.broadinstitute.sting.playground.utils.TrioConcordanceRecord;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenotypeUtils;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.GenotypeUtils.VariantType;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
|
||||||
//@Requires(value=DataSource.REFERENCE,referenceMetaData={@RMD(name="mother",type=rodSAMPileup.class),
|
//@Requires(value=DataSource.REFERENCE,referenceMetaData={@RMD(name="mother",type=rodSAMPileup.class),
|
||||||
|
|
@ -34,18 +36,16 @@ public class MendelianInheritanceWalker extends RefWalker<TrioConcordanceRecord
|
||||||
@Argument(fullName="log_concordant", shortName="LC",doc="If set, all trio-concordant sites will be logged at level INFO") public boolean LOG_CONCORDANT;
|
@Argument(fullName="log_concordant", shortName="LC",doc="If set, all trio-concordant sites will be logged at level INFO") public boolean LOG_CONCORDANT;
|
||||||
@Argument(fullName="log_discordant", shortName="LD",doc="If set, all trio-discordant sites will be logged at level INFO") public boolean LOG_DISCORDANT;
|
@Argument(fullName="log_discordant", shortName="LD",doc="If set, all trio-discordant sites will be logged at level INFO") public boolean LOG_DISCORDANT;
|
||||||
@Argument(fullName="default_reference_calls",shortName="DRC",
|
@Argument(fullName="default_reference_calls",shortName="DRC",
|
||||||
doc="If set to INDEL or POINT, any position where the specified genotype is NOT explicitly specified, while the other (point or indel, respectively) is provided, is considered to be a confident 'reference' (no-indel or no-snp) call")
|
doc="If set, any position where the specified genotype is NOT explicitly specified, while the other is provided, is considered to be an implicit confident 'reference' (no-indel or no-snp) call")
|
||||||
public String defCalls;
|
public boolean defCalls;
|
||||||
@Argument(fullName="variant_type",
|
@Argument(fullName="variant_type",
|
||||||
shortName="VT",
|
shortName="VT",
|
||||||
doc="Assess concordance for the variants of the specified type, INDEL or POINT. If genotype track(s) provide both types, the requested one will be selected",
|
doc="Assess concordance for the variants of the specified type, INDEL or POINT. If genotype track(s) provide both types, the requested one will be selected",
|
||||||
required=true)
|
required=true)
|
||||||
public String VARIANT_TYPE;
|
public GenotypeUtils.VariantType VARIANT_TYPE;
|
||||||
|
|
||||||
private static Logger logger = Logger.getLogger(MendelianInheritanceWalker.class);
|
private static Logger logger = Logger.getLogger(MendelianInheritanceWalker.class);
|
||||||
private final static String star = new String("*");
|
private final static String star = new String("*");
|
||||||
private int variant_type = 1; //0 - point, 1 - indel
|
|
||||||
private int default_calls = 0; // 0 - none, 1 - indels, 2 - point, 3 - both
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TrioConcordanceRecord map(RefMetaDataTracker rodData, char ref, LocusContext context) {
|
public TrioConcordanceRecord map(RefMetaDataTracker rodData, char ref, LocusContext context) {
|
||||||
|
|
@ -56,54 +56,25 @@ public class MendelianInheritanceWalker extends RefWalker<TrioConcordanceRecord
|
||||||
ReferenceOrderedDatum rodDad = rodData.lookup("father", null);
|
ReferenceOrderedDatum rodDad = rodData.lookup("father", null);
|
||||||
ReferenceOrderedDatum rodKid = rodData.lookup("daughter", null);
|
ReferenceOrderedDatum rodKid = rodData.lookup("daughter", null);
|
||||||
|
|
||||||
Genotype mom = extractGenotype(rodMom,variant_type);
|
Genotype mom = GenotypeUtils.extractGenotype(rodMom,VARIANT_TYPE,defCalls);
|
||||||
Genotype dad = extractGenotype(rodDad,variant_type);
|
Genotype dad = GenotypeUtils.extractGenotype(rodDad,VARIANT_TYPE,defCalls);
|
||||||
Genotype kid = extractGenotype(rodKid,variant_type);
|
Genotype kid = GenotypeUtils.extractGenotype(rodKid,VARIANT_TYPE,defCalls);
|
||||||
|
|
||||||
return assessGenotypesInTrio(mom, dad, kid);
|
return assessGenotypesInTrio(mom, dad, kid);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
/*
|
||||||
|
* @Override(non-Javadoc)
|
||||||
|
* @see org.broadinstitute.sting.gatk.walkers.Walker#initialize()
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
super.initialize();
|
super.initialize();
|
||||||
VARIANT_TYPE = VARIANT_TYPE.toUpperCase();
|
|
||||||
if ( VARIANT_TYPE.equals("POINT")) variant_type = 0;
|
|
||||||
else if ( VARIANT_TYPE.equals("INDEL")) variant_type = 1;
|
|
||||||
else throw new StingException("Unknown value specified for VARIANT_TYPE. Allowed: POINT, INDEL; passed: "+VARIANT_TYPE);
|
|
||||||
if ( defCalls == null ) return;
|
if ( defCalls == null ) return;
|
||||||
defCalls = defCalls.toUpperCase();
|
defCalls = defCalls.toUpperCase();
|
||||||
if ( defCalls.equals("INDEL")) default_calls = 1;
|
if ( defCalls.equals("INDEL")) default_calls = 1;
|
||||||
else throw new StingException("POINT or BOTH default calls are not implemented yet");
|
else throw new StingException("POINT or BOTH default calls are not implemented yet");
|
||||||
};
|
};
|
||||||
|
*/
|
||||||
private Genotype extractGenotype(ReferenceOrderedDatum gl, int variant_type) {
|
|
||||||
|
|
||||||
if ( gl == null ) return null;
|
|
||||||
|
|
||||||
if ( gl instanceof GenotypeList ) {
|
|
||||||
|
|
||||||
if ( variant_type == 0 ) return ((GenotypeList)gl).getPointGenotype();
|
|
||||||
else if ( variant_type == 1 ) {
|
|
||||||
if ( ((GenotypeList)gl).hasIndelGenotype() ) return ((GenotypeList)gl).getIndelGenotype();
|
|
||||||
else {
|
|
||||||
if ( ( default_calls == 1|| default_calls == 3 ) && ((GenotypeList)gl).hasPointGenotype() ) return new DefaultIndelGenotype(gl.getLocation());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if ( gl instanceof Genotype ) {
|
|
||||||
switch ( variant_type ) {
|
|
||||||
case 0:
|
|
||||||
if ( ((Genotype)gl).isIndelGenotype() ) return null;
|
|
||||||
else return (Genotype)gl;
|
|
||||||
case 1:
|
|
||||||
if ( ((Genotype)gl).isPointGenotype() ) return null;
|
|
||||||
else return (Genotype)gl;
|
|
||||||
default: throw new StingException("Unknown variant type specified");
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
else throw new StingException("track "+gl.getName()+" is not a Genotype or GenotypeList");
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Takes a single genotype object and returns properly filled new assessment object (covered/assessed/ref/variant set to 0/1
|
/** Takes a single genotype object and returns properly filled new assessment object (covered/assessed/ref/variant set to 0/1
|
||||||
* according to what the genotype says)
|
* according to what the genotype says)
|
||||||
|
|
@ -254,100 +225,6 @@ public class MendelianInheritanceWalker extends RefWalker<TrioConcordanceRecord
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class DefaultIndelGenotype implements Genotype {
|
|
||||||
private GenomeLoc location;
|
|
||||||
private static final List<String> alleles = Arrays.asList("","") ;
|
|
||||||
|
|
||||||
DefaultIndelGenotype(GenomeLoc l) {
|
|
||||||
location = l;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public double getConsensusConfidence() {
|
|
||||||
return 1000;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<String> getFWDAlleles() {
|
|
||||||
return alleles;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getFWDRefBases() {
|
|
||||||
return alleles.get(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public GenomeLoc getLocation() {
|
|
||||||
return location;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public char getRef() {
|
|
||||||
return '*';
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public double getVariantConfidence() {
|
|
||||||
return 0.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isBiallelic() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isDeletion() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isHet() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isHom() {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isIndel() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isIndelGenotype() {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isInsertion() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isPointGenotype() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isReference() {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isSNP() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int compareTo(ReferenceOrderedDatum o) {
|
|
||||||
return location.compareTo(o.getLocation());
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
protected String shortLine(Genotype av) {
|
protected String shortLine(Genotype av) {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,196 @@
|
||||||
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.Genotype;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.GenotypeList;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
|
|
||||||
|
/** Holds useful utility methods and auxiliary default classes for working with Genotype objects
|
||||||
|
*
|
||||||
|
* @author asivache
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class GenotypeUtils {
|
||||||
|
|
||||||
|
public static enum VariantType {
|
||||||
|
POINT, INDEL;
|
||||||
|
VariantType() {}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** This method accepts rods that implement either Genotype or GenotypeList interface (all others will result in an exception). Variant
|
||||||
|
* (Genotype object) of the specified type (point mutation or indel) will be extracted from GenotypeList rod if such variant exists, or the rod itself
|
||||||
|
* will be typecasted and returned back if it implements Genotype and represents the specified variant type. If the last argument is false, then
|
||||||
|
* null will be returned in all other cases. If the last argument is true and either a) rod is a GenotypeList that lacks a call of the specified type, but call
|
||||||
|
* of the other type was made, or b) rod is a Genotype and its type is difference from variant_type, then it will be assumed that the implicit ref/ref call
|
||||||
|
* of the specified type also exists at this genomic position, and new object representing such default call will be returned. If rod argument
|
||||||
|
* is null, then this method safely (and silently) returns null.
|
||||||
|
*
|
||||||
|
* @param rod GenotypeList or Genotype to extract requested call from/upgrade to requested call
|
||||||
|
* @param variant_type type of the variant to extract (POINT mutations or INDEL)
|
||||||
|
* @param assume_implicit_ref_calls true if presence of only a call of different type means that ref/ref call of the requested type is implicitly present
|
||||||
|
* @return Genotyping call of the requested type or null if no explicit or implicit call was made.
|
||||||
|
*/
|
||||||
|
public static Genotype extractGenotype(ReferenceOrderedDatum rod, VariantType variant_type, boolean assume_implicit_ref_calls) {
|
||||||
|
|
||||||
|
if ( rod == null ) return null;
|
||||||
|
|
||||||
|
if ( rod instanceof GenotypeList ) {
|
||||||
|
|
||||||
|
GenotypeList rod_gl = (GenotypeList)rod;
|
||||||
|
switch ( variant_type ) {
|
||||||
|
case POINT :
|
||||||
|
if ( rod_gl.hasPointGenotype() ) return rod_gl.getPointGenotype();
|
||||||
|
else {
|
||||||
|
if ( assume_implicit_ref_calls && rod_gl.hasIndelGenotype() ) throw new StingException("Default (reference) implicit POINT genotype is not implemented yet");
|
||||||
|
else return null;
|
||||||
|
}
|
||||||
|
case INDEL:
|
||||||
|
if ( rod_gl.hasIndelGenotype() ) return rod_gl.getIndelGenotype();
|
||||||
|
else {
|
||||||
|
if ( assume_implicit_ref_calls && rod_gl.hasPointGenotype() ) return new DefaultIndelGenotype(rod_gl.getLocation());
|
||||||
|
else return null;
|
||||||
|
}
|
||||||
|
default: throw new StingException("Unrecognized variant type: "+variant_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
} else if ( rod instanceof Genotype ) {
|
||||||
|
|
||||||
|
Genotype rod_g = (Genotype)rod;
|
||||||
|
switch ( variant_type ) {
|
||||||
|
case POINT:
|
||||||
|
if ( rod_g.isIndelGenotype() ) {
|
||||||
|
if ( assume_implicit_ref_calls ) throw new StingException("Default (reference) implicit POINT genotype is not implemented yet");
|
||||||
|
else return null;
|
||||||
|
} else return rod_g;
|
||||||
|
case INDEL:
|
||||||
|
if ( rod_g.isPointGenotype() ) {
|
||||||
|
if ( assume_implicit_ref_calls ) return new DefaultIndelGenotype(rod_g.getLocation());
|
||||||
|
else return null;
|
||||||
|
} else return rod_g;
|
||||||
|
default: throw new StingException("Unrecognized variant type: "+variant_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
else throw new StingException("track "+rod.getName()+" is not a Genotype or GenotypeList");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** This class represents a "default" indel-type genotype with homozygous reference (i.e. confidently no indel)
|
||||||
|
* call. All the interface methods are implemented and return consistent values. Use this class when working with
|
||||||
|
* genotyping data where absence of explicit indel call actually means that no evidence for an indel was observed
|
||||||
|
* (SAM pileup is one such example).
|
||||||
|
* @author asivache
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public static class DefaultIndelGenotype implements Genotype {
|
||||||
|
private GenomeLoc location;
|
||||||
|
private static final List<String> alleles = Arrays.asList("","") ;
|
||||||
|
private int confidence ;
|
||||||
|
|
||||||
|
/** Creates default indel genotype (ref/ref = no indel) at the specified position
|
||||||
|
* with default consensus confidence of 1000 (variant confidence is 0).
|
||||||
|
* @param l reference position to associate the genotyping call with
|
||||||
|
*/
|
||||||
|
public DefaultIndelGenotype(GenomeLoc l) {
|
||||||
|
this(l,1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Creates ref/ref (i.e. absense of indel) indel genotype at the specified position
|
||||||
|
* with the specified consensus confidence (variant confidence is 0).
|
||||||
|
* @param l reference position to associate the genotyping call with
|
||||||
|
*/
|
||||||
|
public DefaultIndelGenotype(GenomeLoc l, int confidence) {
|
||||||
|
location = l;
|
||||||
|
this.confidence = confidence;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double getConsensusConfidence() {
|
||||||
|
return confidence;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getFWDAlleles() {
|
||||||
|
return alleles;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getFWDRefBases() {
|
||||||
|
return alleles.get(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public GenomeLoc getLocation() {
|
||||||
|
return location;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public char getRef() {
|
||||||
|
return '*';
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double getVariantConfidence() {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isBiallelic() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isDeletion() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isHet() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isHom() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isIndel() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isIndelGenotype() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isInsertion() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isPointGenotype() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isReference() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isSNP() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareTo(ReferenceOrderedDatum o) {
|
||||||
|
return location.compareTo(o.getLocation());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue