diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/Allele.java b/java/src/org/broadinstitute/sting/gatk/refdata/Allele.java index 5e34aaa5a..cc35f4ad1 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/Allele.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/Allele.java @@ -17,7 +17,7 @@ public class Allele { // the types of variants we currently allow public enum AlleleType { - REFERENCE, SNP, INSERTION, DELETION, INVERSION + REFERENCE, SNP, INSERTION, DELETION, INVERSION, UNKNOWN_POINT_ALLELE } public Allele(AlleleType type, String bases) { @@ -26,7 +26,7 @@ public class Allele { throw new IllegalArgumentException("Constructor: the Allele base string cannot be null"); if ( type == AlleleType.DELETION && bases.length() > 0 ) throw new IllegalArgumentException("Constructor: deletions cannot have observed bases"); - if ( (type == AlleleType.REFERENCE || type == AlleleType.SNP) && bases.length() > 1 ) + if ( (type == AlleleType.REFERENCE || type == AlleleType.SNP || type == AlleleType.UNKNOWN_POINT_ALLELE) && bases.length() > 1 ) throw new IllegalArgumentException("Constructor: point alleles cannot have more than one observed base"); this.bases = bases.toUpperCase(); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/Genotype.java b/java/src/org/broadinstitute/sting/gatk/refdata/Genotype.java index ace9cda35..cbdb96e3a 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/Genotype.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/Genotype.java @@ -11,6 +11,10 @@ import java.util.*; */ public class Genotype { + public enum StandardAttributes { + DELETION_LENGTH, INVERSION_LENGTH + } + private List alleles; private double negLog10PError; diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/SequenomRodWithGenomeLoc.java b/java/src/org/broadinstitute/sting/gatk/refdata/SequenomRodWithGenomeLoc.java index 69fae36c9..57a4230c3 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/SequenomRodWithGenomeLoc.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/SequenomRodWithGenomeLoc.java @@ -157,6 +157,10 @@ class SequenomVariantInfo implements Comparable { private ArrayList genotypes; private ArrayList sampleNames; + private ArrayList deletionHolder = new ArrayList(); + private ArrayList sampleHolder = new ArrayList(); + private int siteDeletionLength = -1; + public GenomeLoc getLocation() { return loc; } @@ -188,16 +192,105 @@ class SequenomVariantInfo implements Comparable { public void addGenotypeEntry(String genotypeString, String sampleName) { String[] alleleStrs = genotypeString.split(" "); - ArrayList alleles = new ArrayList(2); // most, if not all, will be bi-allelic - for ( String alStr : alleleStrs ) { - Allele.AlleleType type = alStr.indexOf("-") > -1 ? Allele.AlleleType.DELETION : alStr.length() > 1 ? Allele.AlleleType.INSERTION : Allele.AlleleType.SNP; - alleles.add(new Allele(type,alStr)); + // identify if we're dealing with a deletion + if ( genotypeString.contains("-") ) { + this.addDeletion(alleleStrs, sampleName); + } else { + // simple SNP or indel (easier to handle) + this.addIndelOrSNP(alleleStrs,sampleName); } + } - this.genotypes.add( new Genotype(alleles, sampleName, 20.0)); + private void addIndelOrSNP(String[] alleleStrings, String sampleName) { + ArrayList alleles = new ArrayList(2); + + if ( alleleStrings[0].length() > 1 || alleleStrings[1].length() > 1 ) { + // insertion + for ( String alStr : alleleStrings ) { + if ( alStr.length() > 1 ) { + alleles.add(new Allele(Allele.AlleleType.INSERTION,alStr)); + } else { + alleles.add(new Allele(Allele.AlleleType.REFERENCE, alStr)); + } + } + } else { + // SNP + for ( String alStr : alleleStrings ) { + alleles.add(new Allele(Allele.AlleleType.UNKNOWN_POINT_ALLELE,alStr)); + } + } + } + + private void addDeletion(String[] alleleStrings, String sampleName) { + String alleleStr1 = alleleStrings[0]; + String alleleStr2 = alleleStrings[1]; + Allele allele1 = null; + Allele allele2 = null; + + if ( alleleStr1.contains("-") && alleleStr2.contains("-") ) { + // homozygous deletion + this.addHomDeletion(allele1,allele2, sampleName); + } else { + // heterozygous deletion + if ( alleleStr1.contains("-") ) { + this.addHetDeletion(allele1,allele2, alleleStr1, alleleStr2, sampleName); + } else { + this.addHetDeletion(allele2,allele1, alleleStr2, alleleStr1, sampleName); // note the order change + } + } + } + + private void addHetDeletion(Allele del, Allele ref, String delStr, String refStr, String sampleName) { + del = new Allele(Allele.AlleleType.DELETION,""); + ref = new Allele(Allele.AlleleType.REFERENCE,refStr.substring(0,1)); + this.setDeletionLength(del,refStr.length()); + if ( ! deletionHolder.isEmpty() ) { + siteDeletionLength = refStr.length(); + this.addHeldDeletions(); + } + Genotype indel = new Genotype(Arrays.asList(ref,del), sampleName, 20.0); + this.setIndelGenotypeLength(indel,siteDeletionLength); + this.genotypes.add(indel); this.sampleNames.add(sampleName); } + private void addHomDeletion(Allele allele1, Allele allele2, String sampleName) { + allele1 = new Allele(Allele.AlleleType.DELETION,""); + allele2 = new Allele(Allele.AlleleType.DELETION,""); + if ( siteDeletionLength != -1 ) { + this.setDeletionLength(allele1,siteDeletionLength); + this.setDeletionLength(allele2,siteDeletionLength); + Genotype indel = new Genotype(Arrays.asList(allele1,allele2), sampleName, 20.0); + this.setIndelGenotypeLength(indel, siteDeletionLength); + this.genotypes.add(indel); + this.sampleNames.add(sampleName); + } else { + deletionHolder.add(allele1); + deletionHolder.add(allele2); + sampleHolder.add(sampleName); + } + } + + private void setIndelGenotypeLength(Genotype g, int length) { + g.setAttribute(Genotype.StandardAttributes.DELETION_LENGTH,length); + } + + private void addHeldDeletions() { + Allele del1; + Allele del2; + int startingSize = deletionHolder.size(); + for ( int i = 0; i < startingSize ; i+=2 ) { + del1 = deletionHolder.get(i); + del2 = deletionHolder.get(i+1); + this.addHomDeletion(del1,del2,sampleHolder.get(i/2)); + if ( deletionHolder.size() != startingSize ) { + throw new StingException("Halting algorithm -- possible infinite loop"); + } + } + deletionHolder.clear(); + sampleHolder.clear(); + } + public int compareTo(Object obj) { if ( ! ( obj instanceof SequenomVariantInfo ) ) { return 1; @@ -205,4 +298,10 @@ class SequenomVariantInfo implements Comparable { return loc.compareTo(((SequenomVariantInfo) obj).getLocation()); } + + private void setDeletionLength(Allele al, int length) { + // Todo -- once alleles support deletion lengths add that information + // Todo -- into the object; for now this can just return + return; + } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfNonrefBasesSupportingSNP.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfNonrefBasesSupportingSNP.java index 6234e59f5..239f986f1 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfNonrefBasesSupportingSNP.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfNonrefBasesSupportingSNP.java @@ -40,6 +40,8 @@ public class ProportionOfNonrefBasesSupportingSNP implements VariantAnnotation { totalNonref_totalSNP = getNonrefAndSNP(pileup, ref.getBase(), var.getAlternativeBaseForSNP(), totalNonref_totalSNP); } + if ( totalNonref_totalSNP.equals(new Pair(0,0)) ) + return null; double p = getProportionOfNonrefBasesThatAreSNP(totalNonref_totalSNP); return String.format("%f", p ); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfRefSecondBasesSupportingSNP.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfRefSecondBasesSupportingSNP.java index dd3219ba2..14ba6a050 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfRefSecondBasesSupportingSNP.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfRefSecondBasesSupportingSNP.java @@ -43,6 +43,8 @@ public class ProportionOfRefSecondBasesSupportingSNP implements VariantAnnotatio totalAndSNPSupporting = getTotalRefAndSNPSupportCounts(pileup, ref.getBase(), var.getAlternativeBaseForSNP(), totalAndSNPSupporting); } + if ( totalAndSNPSupporting.equals(new Pair(0,0)) ) + return null; double p = getProportionOfRefSecondaryBasesSupportingSNP(totalAndSNPSupporting); return String.format("%f", p ); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfSNPSecondBasesSupportingRef.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfSNPSecondBasesSupportingRef.java index 33216fdf0..5df9c2b47 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfSNPSecondBasesSupportingRef.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfSNPSecondBasesSupportingRef.java @@ -39,6 +39,8 @@ public class ProportionOfSNPSecondBasesSupportingRef implements VariantAnnotatio totalAndSNPSupporting = getTotalSNPandRefSupporting(pileup, ref.getBase(), var.getAlternativeBaseForSNP(), totalAndSNPSupporting); } + if ( totalAndSNPSupporting.equals(new Pair(0,0)) ) + return null; double p = getProportionOfSNPSecondaryBasesSupportingRef(totalAndSNPSupporting); return String.format("%f", p ); }