2009-07-17 22:36:12 +08:00
|
|
|
package org.broadinstitute.sting.gatk.refdata;
|
|
|
|
|
|
|
|
|
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
|
|
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
|
|
|
|
|
2009-09-24 02:24:05 +08:00
|
|
|
import java.util.Arrays;
|
|
|
|
|
import java.util.List;
|
2009-07-17 22:36:12 +08:00
|
|
|
|
2010-01-07 13:51:10 +08:00
|
|
|
public class SimpleIndelROD extends TabularROD implements VariationRod {
|
2009-07-28 11:25:03 +08:00
|
|
|
|
|
|
|
|
private boolean KGENOMES_FORMAT = false, checkedFormat = false;
|
2009-07-17 22:36:12 +08:00
|
|
|
|
|
|
|
|
public SimpleIndelROD(String name) {
|
|
|
|
|
super(name);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public GenomeLoc getLocation() {
|
2009-09-01 11:34:39 +08:00
|
|
|
long pos = Long.parseLong(this.get("1"));
|
|
|
|
|
return GenomeLocParser.createGenomeLoc(this.get("0"), pos, (isDeletion() ? pos+length() : pos+1));
|
2009-07-17 22:36:12 +08:00
|
|
|
}
|
|
|
|
|
|
2009-09-24 02:24:05 +08:00
|
|
|
/**
|
|
|
|
|
* get the reference base(s) at this position
|
|
|
|
|
*
|
|
|
|
|
* @return the reference base or bases, as a string
|
|
|
|
|
*/
|
|
|
|
|
public String getReference() {
|
|
|
|
|
return String.valueOf(getRef());
|
|
|
|
|
}
|
|
|
|
|
|
2009-07-17 22:36:12 +08:00
|
|
|
public List<String> getFWDAlleles() {
|
2009-07-28 11:25:03 +08:00
|
|
|
if ( is1KGFormat() )
|
|
|
|
|
return Arrays.asList(this.get("4"));
|
|
|
|
|
|
2009-07-17 22:36:12 +08:00
|
|
|
String str = this.get("3");
|
2009-08-12 04:28:24 +08:00
|
|
|
return Arrays.asList(str.substring(1, str.indexOf(":")));
|
2009-07-17 22:36:12 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public String getFWDRefBases() { return ""; }
|
2009-07-28 11:25:03 +08:00
|
|
|
public String getAltBasesFWD() { return getFWDAlleles().get(0); }
|
|
|
|
|
public String getRefBasesFWD() { return ""; }
|
|
|
|
|
public char getRefSnpFWD() { throw new IllegalStateException("I'm an indel, not a SNP"); }
|
|
|
|
|
public char getAltSnpFWD() { throw new IllegalStateException("I'm an indel, not a SNP"); }
|
2009-07-17 22:36:12 +08:00
|
|
|
public char getRef() { return 'N'; }
|
2009-07-28 11:25:03 +08:00
|
|
|
public List<String> getGenotype() { return getFWDAlleles(); }
|
|
|
|
|
public boolean isGenotype() { return false; }
|
2009-07-17 22:36:12 +08:00
|
|
|
public boolean isPointGenotype() { return false; }
|
|
|
|
|
public boolean isIndelGenotype() { return true; }
|
2009-09-24 02:24:05 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* get the frequency of this variant
|
|
|
|
|
*
|
|
|
|
|
* @return VariantFrequency with the stored frequency
|
|
|
|
|
*/
|
|
|
|
|
public double getNonRefAlleleFrequency() {
|
|
|
|
|
return 0.0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** @return the VARIANT_TYPE of the current variant */
|
|
|
|
|
public VARIANT_TYPE getType() {
|
2009-11-25 05:07:55 +08:00
|
|
|
return isInsertion() ? VARIANT_TYPE.INSERTION : VARIANT_TYPE.DELETION;
|
2009-09-24 02:24:05 +08:00
|
|
|
}
|
|
|
|
|
|
2009-07-17 22:36:12 +08:00
|
|
|
public boolean isSNP() { return false; }
|
|
|
|
|
public boolean isReference() { return false; }
|
2009-09-24 02:24:05 +08:00
|
|
|
|
2009-07-28 11:25:03 +08:00
|
|
|
public boolean isInsertion() {
|
|
|
|
|
if ( is1KGFormat() )
|
|
|
|
|
return this.get("3").equals("I");
|
|
|
|
|
return this.get("3").charAt(0) == '+';
|
|
|
|
|
}
|
|
|
|
|
public boolean isDeletion() {
|
|
|
|
|
if ( is1KGFormat() )
|
|
|
|
|
return this.get("3").equals("D");
|
|
|
|
|
return this.get("3").charAt(0) == '-';
|
|
|
|
|
}
|
|
|
|
|
public boolean isIndel() { return true; }
|
2009-09-24 02:24:05 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* gets the alternate base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
|
|
|
|
|
* of
|
|
|
|
|
*
|
|
|
|
|
* @return a char, representing the alternate base
|
|
|
|
|
*/
|
|
|
|
|
public char getAlternativeBaseForSNP() {
|
|
|
|
|
return getAltSnpFWD();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
|
|
|
|
|
*
|
|
|
|
|
* @return a char, representing the alternate base
|
|
|
|
|
*/
|
|
|
|
|
public char getReferenceForSNP() {
|
|
|
|
|
return getRefSnpFWD();
|
|
|
|
|
}
|
|
|
|
|
|
2009-07-17 22:36:12 +08:00
|
|
|
public double getVariantConfidence() { return 0.0; }
|
2009-07-28 11:25:03 +08:00
|
|
|
public double getVariationConfidence() { return 0.0; }
|
2009-07-17 22:36:12 +08:00
|
|
|
public double getConsensusConfidence() { return 0.0; }
|
|
|
|
|
public boolean isBiallelic() { return true; }
|
2009-09-24 02:24:05 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* get the -1 * (log 10 of the error value)
|
|
|
|
|
*
|
|
|
|
|
* @return the log based error estimate
|
|
|
|
|
*/
|
|
|
|
|
public double getNegLog10PError() {
|
|
|
|
|
return getVariationConfidence();
|
|
|
|
|
}
|
|
|
|
|
|
2009-10-23 14:31:15 +08:00
|
|
|
/**
|
|
|
|
|
* gets the alternate alleles. This method should return all the alleles present at the location,
|
|
|
|
|
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
|
|
|
|
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
|
|
|
|
* frequency).
|
|
|
|
|
*
|
|
|
|
|
* @return an alternate allele list
|
|
|
|
|
*/
|
|
|
|
|
public List<String> getAlternateAlleleList() {
|
|
|
|
|
List<String> ret = getAlleleList();
|
|
|
|
|
for (String val : ret) {
|
|
|
|
|
if (val.equals(this.getReference())) ret.remove(val);
|
|
|
|
|
}
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* gets the alleles. This method should return all the alleles present at the location,
|
|
|
|
|
* including the reference base. The first allele should always be the reference allele, followed
|
|
|
|
|
* by an unordered list of alternate alleles.
|
|
|
|
|
*
|
|
|
|
|
* @return an alternate allele list
|
|
|
|
|
*/
|
|
|
|
|
public List<String> getAlleleList() {
|
|
|
|
|
return this.getFWDAlleles();
|
|
|
|
|
}
|
|
|
|
|
|
2009-07-17 22:36:12 +08:00
|
|
|
public boolean isHom() { return false; }
|
|
|
|
|
public boolean isHet() { return false; }
|
2009-07-28 11:25:03 +08:00
|
|
|
public double getHeterozygosity() { return 0.0; }
|
|
|
|
|
public double getMAF() { return 0.0; }
|
|
|
|
|
public int getPloidy() { return 2; }
|
|
|
|
|
public int length() {
|
|
|
|
|
if ( is1KGFormat() )
|
|
|
|
|
return Integer.parseInt(this.get("2"));
|
|
|
|
|
return getFWDAlleles().get(0).length();
|
|
|
|
|
}
|
2009-07-17 22:36:12 +08:00
|
|
|
|
2009-08-18 23:26:10 +08:00
|
|
|
public boolean allowIncompleteRecords() {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2009-09-07 09:12:09 +08:00
|
|
|
public String getSamplesString() {
|
|
|
|
|
return (is1KGFormat() && this.get("5") != null ? this.get("5") : "");
|
|
|
|
|
}
|
|
|
|
|
|
2009-07-17 22:36:12 +08:00
|
|
|
public String toString() {
|
|
|
|
|
StringBuffer sb = new StringBuffer();
|
2009-07-17 23:59:18 +08:00
|
|
|
sb.append(getLocation().getContig() + "\t" + getLocation().getStart() + "\t");
|
2009-08-18 11:04:12 +08:00
|
|
|
sb.append(length() + "\t" + (isInsertion() ? "I" : "D") + "\t" + getFWDAlleles().get(0));
|
2009-09-07 09:12:09 +08:00
|
|
|
String samples = getSamplesString();
|
|
|
|
|
if ( samples.length() > 0 )
|
|
|
|
|
sb.append("\t" + samples);
|
2009-07-17 22:36:12 +08:00
|
|
|
return sb.toString();
|
|
|
|
|
}
|
2009-07-28 11:25:03 +08:00
|
|
|
|
|
|
|
|
private boolean is1KGFormat() {
|
|
|
|
|
if ( !checkedFormat ) {
|
|
|
|
|
checkedFormat = true;
|
|
|
|
|
KGENOMES_FORMAT = this.get("3").equals("D") || this.get("3").equals("I");
|
|
|
|
|
}
|
|
|
|
|
return KGENOMES_FORMAT;
|
|
|
|
|
}
|
2009-09-25 12:35:52 +08:00
|
|
|
}
|