- Annotations return null when given pileups with no second-base information

- SequenomRodWithGenomeLoc -- beter handling of indels

Eric; I made two small changes to the new Genotype interface that we should talk about (they basically have to do with allele/genotype representation):

Allele - added a new UNKNOWN_POINT_MUTATION to AlleleType. If I see a sequenom genotype AG; one's got to be ref, one's got to be SNP, but until I have
         an actual reference base in hand, I don't know which is which. That's what this entry is for.

Genotype - added an enum class StandardAttributes for dealing with things like deletion/inversion length. This is probably not the way we want to
         represent indels, so we should talk about this. Plus now that there's a direct link between my ROD and the genotype; when we do decide
         how to deal with indels, we'll be forced to alter the SequenomRodWithGenomeLoc accordingly.




git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2642 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-01-20 16:45:17 +00:00
parent cf46e3c85f
commit ab289872e4
6 changed files with 116 additions and 7 deletions

View File

@ -17,7 +17,7 @@ public class Allele {
// the types of variants we currently allow
public enum AlleleType {
REFERENCE, SNP, INSERTION, DELETION, INVERSION
REFERENCE, SNP, INSERTION, DELETION, INVERSION, UNKNOWN_POINT_ALLELE
}
public Allele(AlleleType type, String bases) {
@ -26,7 +26,7 @@ public class Allele {
throw new IllegalArgumentException("Constructor: the Allele base string cannot be null");
if ( type == AlleleType.DELETION && bases.length() > 0 )
throw new IllegalArgumentException("Constructor: deletions cannot have observed bases");
if ( (type == AlleleType.REFERENCE || type == AlleleType.SNP) && bases.length() > 1 )
if ( (type == AlleleType.REFERENCE || type == AlleleType.SNP || type == AlleleType.UNKNOWN_POINT_ALLELE) && bases.length() > 1 )
throw new IllegalArgumentException("Constructor: point alleles cannot have more than one observed base");
this.bases = bases.toUpperCase();
}

View File

@ -11,6 +11,10 @@ import java.util.*;
*/
public class Genotype {
public enum StandardAttributes {
DELETION_LENGTH, INVERSION_LENGTH
}
private List<Allele> alleles;
private double negLog10PError;

View File

@ -157,6 +157,10 @@ class SequenomVariantInfo implements Comparable {
private ArrayList<Genotype> genotypes;
private ArrayList<String> sampleNames;
private ArrayList<Allele> deletionHolder = new ArrayList<Allele>();
private ArrayList<String> sampleHolder = new ArrayList<String>();
private int siteDeletionLength = -1;
public GenomeLoc getLocation() {
return loc;
}
@ -188,16 +192,105 @@ class SequenomVariantInfo implements Comparable {
public void addGenotypeEntry(String genotypeString, String sampleName) {
String[] alleleStrs = genotypeString.split(" ");
ArrayList<Allele> alleles = new ArrayList<Allele>(2); // most, if not all, will be bi-allelic
for ( String alStr : alleleStrs ) {
Allele.AlleleType type = alStr.indexOf("-") > -1 ? Allele.AlleleType.DELETION : alStr.length() > 1 ? Allele.AlleleType.INSERTION : Allele.AlleleType.SNP;
alleles.add(new Allele(type,alStr));
// identify if we're dealing with a deletion
if ( genotypeString.contains("-") ) {
this.addDeletion(alleleStrs, sampleName);
} else {
// simple SNP or indel (easier to handle)
this.addIndelOrSNP(alleleStrs,sampleName);
}
}
this.genotypes.add( new Genotype(alleles, sampleName, 20.0));
private void addIndelOrSNP(String[] alleleStrings, String sampleName) {
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
if ( alleleStrings[0].length() > 1 || alleleStrings[1].length() > 1 ) {
// insertion
for ( String alStr : alleleStrings ) {
if ( alStr.length() > 1 ) {
alleles.add(new Allele(Allele.AlleleType.INSERTION,alStr));
} else {
alleles.add(new Allele(Allele.AlleleType.REFERENCE, alStr));
}
}
} else {
// SNP
for ( String alStr : alleleStrings ) {
alleles.add(new Allele(Allele.AlleleType.UNKNOWN_POINT_ALLELE,alStr));
}
}
}
private void addDeletion(String[] alleleStrings, String sampleName) {
String alleleStr1 = alleleStrings[0];
String alleleStr2 = alleleStrings[1];
Allele allele1 = null;
Allele allele2 = null;
if ( alleleStr1.contains("-") && alleleStr2.contains("-") ) {
// homozygous deletion
this.addHomDeletion(allele1,allele2, sampleName);
} else {
// heterozygous deletion
if ( alleleStr1.contains("-") ) {
this.addHetDeletion(allele1,allele2, alleleStr1, alleleStr2, sampleName);
} else {
this.addHetDeletion(allele2,allele1, alleleStr2, alleleStr1, sampleName); // note the order change
}
}
}
private void addHetDeletion(Allele del, Allele ref, String delStr, String refStr, String sampleName) {
del = new Allele(Allele.AlleleType.DELETION,"");
ref = new Allele(Allele.AlleleType.REFERENCE,refStr.substring(0,1));
this.setDeletionLength(del,refStr.length());
if ( ! deletionHolder.isEmpty() ) {
siteDeletionLength = refStr.length();
this.addHeldDeletions();
}
Genotype indel = new Genotype(Arrays.asList(ref,del), sampleName, 20.0);
this.setIndelGenotypeLength(indel,siteDeletionLength);
this.genotypes.add(indel);
this.sampleNames.add(sampleName);
}
private void addHomDeletion(Allele allele1, Allele allele2, String sampleName) {
allele1 = new Allele(Allele.AlleleType.DELETION,"");
allele2 = new Allele(Allele.AlleleType.DELETION,"");
if ( siteDeletionLength != -1 ) {
this.setDeletionLength(allele1,siteDeletionLength);
this.setDeletionLength(allele2,siteDeletionLength);
Genotype indel = new Genotype(Arrays.asList(allele1,allele2), sampleName, 20.0);
this.setIndelGenotypeLength(indel, siteDeletionLength);
this.genotypes.add(indel);
this.sampleNames.add(sampleName);
} else {
deletionHolder.add(allele1);
deletionHolder.add(allele2);
sampleHolder.add(sampleName);
}
}
private void setIndelGenotypeLength(Genotype g, int length) {
g.setAttribute(Genotype.StandardAttributes.DELETION_LENGTH,length);
}
private void addHeldDeletions() {
Allele del1;
Allele del2;
int startingSize = deletionHolder.size();
for ( int i = 0; i < startingSize ; i+=2 ) {
del1 = deletionHolder.get(i);
del2 = deletionHolder.get(i+1);
this.addHomDeletion(del1,del2,sampleHolder.get(i/2));
if ( deletionHolder.size() != startingSize ) {
throw new StingException("Halting algorithm -- possible infinite loop");
}
}
deletionHolder.clear();
sampleHolder.clear();
}
public int compareTo(Object obj) {
if ( ! ( obj instanceof SequenomVariantInfo ) ) {
return 1;
@ -205,4 +298,10 @@ class SequenomVariantInfo implements Comparable {
return loc.compareTo(((SequenomVariantInfo) obj).getLocation());
}
private void setDeletionLength(Allele al, int length) {
// Todo -- once alleles support deletion lengths add that information
// Todo -- into the object; for now this can just return
return;
}
}

View File

@ -40,6 +40,8 @@ public class ProportionOfNonrefBasesSupportingSNP implements VariantAnnotation {
totalNonref_totalSNP = getNonrefAndSNP(pileup, ref.getBase(), var.getAlternativeBaseForSNP(), totalNonref_totalSNP);
}
if ( totalNonref_totalSNP.equals(new Pair<Integer,Integer>(0,0)) )
return null;
double p = getProportionOfNonrefBasesThatAreSNP(totalNonref_totalSNP);
return String.format("%f", p );
}

View File

@ -43,6 +43,8 @@ public class ProportionOfRefSecondBasesSupportingSNP implements VariantAnnotatio
totalAndSNPSupporting = getTotalRefAndSNPSupportCounts(pileup, ref.getBase(), var.getAlternativeBaseForSNP(), totalAndSNPSupporting);
}
if ( totalAndSNPSupporting.equals(new Pair<Integer,Integer>(0,0)) )
return null;
double p = getProportionOfRefSecondaryBasesSupportingSNP(totalAndSNPSupporting);
return String.format("%f", p );
}

View File

@ -39,6 +39,8 @@ public class ProportionOfSNPSecondBasesSupportingRef implements VariantAnnotatio
totalAndSNPSupporting = getTotalSNPandRefSupporting(pileup, ref.getBase(), var.getAlternativeBaseForSNP(), totalAndSNPSupporting);
}
if ( totalAndSNPSupporting.equals(new Pair<Integer,Integer>(0,0)) )
return null;
double p = getProportionOfSNPSecondaryBasesSupportingRef(totalAndSNPSupporting);
return String.format("%f", p );
}