Changes:
- Annotations return null when given pileups with no second-base information
- SequenomRodWithGenomeLoc -- beter handling of indels
Eric; I made two small changes to the new Genotype interface that we should talk about (they basically have to do with allele/genotype representation):
Allele - added a new UNKNOWN_POINT_MUTATION to AlleleType. If I see a sequenom genotype AG; one's got to be ref, one's got to be SNP, but until I have
an actual reference base in hand, I don't know which is which. That's what this entry is for.
Genotype - added an enum class StandardAttributes for dealing with things like deletion/inversion length. This is probably not the way we want to
represent indels, so we should talk about this. Plus now that there's a direct link between my ROD and the genotype; when we do decide
how to deal with indels, we'll be forced to alter the SequenomRodWithGenomeLoc accordingly.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2642 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
cf46e3c85f
commit
ab289872e4
|
|
@ -17,7 +17,7 @@ public class Allele {
|
|||
|
||||
// the types of variants we currently allow
|
||||
public enum AlleleType {
|
||||
REFERENCE, SNP, INSERTION, DELETION, INVERSION
|
||||
REFERENCE, SNP, INSERTION, DELETION, INVERSION, UNKNOWN_POINT_ALLELE
|
||||
}
|
||||
|
||||
public Allele(AlleleType type, String bases) {
|
||||
|
|
@ -26,7 +26,7 @@ public class Allele {
|
|||
throw new IllegalArgumentException("Constructor: the Allele base string cannot be null");
|
||||
if ( type == AlleleType.DELETION && bases.length() > 0 )
|
||||
throw new IllegalArgumentException("Constructor: deletions cannot have observed bases");
|
||||
if ( (type == AlleleType.REFERENCE || type == AlleleType.SNP) && bases.length() > 1 )
|
||||
if ( (type == AlleleType.REFERENCE || type == AlleleType.SNP || type == AlleleType.UNKNOWN_POINT_ALLELE) && bases.length() > 1 )
|
||||
throw new IllegalArgumentException("Constructor: point alleles cannot have more than one observed base");
|
||||
this.bases = bases.toUpperCase();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,6 +11,10 @@ import java.util.*;
|
|||
*/
|
||||
public class Genotype {
|
||||
|
||||
public enum StandardAttributes {
|
||||
DELETION_LENGTH, INVERSION_LENGTH
|
||||
}
|
||||
|
||||
private List<Allele> alleles;
|
||||
|
||||
private double negLog10PError;
|
||||
|
|
|
|||
|
|
@ -157,6 +157,10 @@ class SequenomVariantInfo implements Comparable {
|
|||
private ArrayList<Genotype> genotypes;
|
||||
private ArrayList<String> sampleNames;
|
||||
|
||||
private ArrayList<Allele> deletionHolder = new ArrayList<Allele>();
|
||||
private ArrayList<String> sampleHolder = new ArrayList<String>();
|
||||
private int siteDeletionLength = -1;
|
||||
|
||||
public GenomeLoc getLocation() {
|
||||
return loc;
|
||||
}
|
||||
|
|
@ -188,16 +192,105 @@ class SequenomVariantInfo implements Comparable {
|
|||
|
||||
public void addGenotypeEntry(String genotypeString, String sampleName) {
|
||||
String[] alleleStrs = genotypeString.split(" ");
|
||||
ArrayList<Allele> alleles = new ArrayList<Allele>(2); // most, if not all, will be bi-allelic
|
||||
for ( String alStr : alleleStrs ) {
|
||||
Allele.AlleleType type = alStr.indexOf("-") > -1 ? Allele.AlleleType.DELETION : alStr.length() > 1 ? Allele.AlleleType.INSERTION : Allele.AlleleType.SNP;
|
||||
alleles.add(new Allele(type,alStr));
|
||||
// identify if we're dealing with a deletion
|
||||
if ( genotypeString.contains("-") ) {
|
||||
this.addDeletion(alleleStrs, sampleName);
|
||||
} else {
|
||||
// simple SNP or indel (easier to handle)
|
||||
this.addIndelOrSNP(alleleStrs,sampleName);
|
||||
}
|
||||
}
|
||||
|
||||
this.genotypes.add( new Genotype(alleles, sampleName, 20.0));
|
||||
private void addIndelOrSNP(String[] alleleStrings, String sampleName) {
|
||||
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
|
||||
|
||||
if ( alleleStrings[0].length() > 1 || alleleStrings[1].length() > 1 ) {
|
||||
// insertion
|
||||
for ( String alStr : alleleStrings ) {
|
||||
if ( alStr.length() > 1 ) {
|
||||
alleles.add(new Allele(Allele.AlleleType.INSERTION,alStr));
|
||||
} else {
|
||||
alleles.add(new Allele(Allele.AlleleType.REFERENCE, alStr));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// SNP
|
||||
for ( String alStr : alleleStrings ) {
|
||||
alleles.add(new Allele(Allele.AlleleType.UNKNOWN_POINT_ALLELE,alStr));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void addDeletion(String[] alleleStrings, String sampleName) {
|
||||
String alleleStr1 = alleleStrings[0];
|
||||
String alleleStr2 = alleleStrings[1];
|
||||
Allele allele1 = null;
|
||||
Allele allele2 = null;
|
||||
|
||||
if ( alleleStr1.contains("-") && alleleStr2.contains("-") ) {
|
||||
// homozygous deletion
|
||||
this.addHomDeletion(allele1,allele2, sampleName);
|
||||
} else {
|
||||
// heterozygous deletion
|
||||
if ( alleleStr1.contains("-") ) {
|
||||
this.addHetDeletion(allele1,allele2, alleleStr1, alleleStr2, sampleName);
|
||||
} else {
|
||||
this.addHetDeletion(allele2,allele1, alleleStr2, alleleStr1, sampleName); // note the order change
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void addHetDeletion(Allele del, Allele ref, String delStr, String refStr, String sampleName) {
|
||||
del = new Allele(Allele.AlleleType.DELETION,"");
|
||||
ref = new Allele(Allele.AlleleType.REFERENCE,refStr.substring(0,1));
|
||||
this.setDeletionLength(del,refStr.length());
|
||||
if ( ! deletionHolder.isEmpty() ) {
|
||||
siteDeletionLength = refStr.length();
|
||||
this.addHeldDeletions();
|
||||
}
|
||||
Genotype indel = new Genotype(Arrays.asList(ref,del), sampleName, 20.0);
|
||||
this.setIndelGenotypeLength(indel,siteDeletionLength);
|
||||
this.genotypes.add(indel);
|
||||
this.sampleNames.add(sampleName);
|
||||
}
|
||||
|
||||
private void addHomDeletion(Allele allele1, Allele allele2, String sampleName) {
|
||||
allele1 = new Allele(Allele.AlleleType.DELETION,"");
|
||||
allele2 = new Allele(Allele.AlleleType.DELETION,"");
|
||||
if ( siteDeletionLength != -1 ) {
|
||||
this.setDeletionLength(allele1,siteDeletionLength);
|
||||
this.setDeletionLength(allele2,siteDeletionLength);
|
||||
Genotype indel = new Genotype(Arrays.asList(allele1,allele2), sampleName, 20.0);
|
||||
this.setIndelGenotypeLength(indel, siteDeletionLength);
|
||||
this.genotypes.add(indel);
|
||||
this.sampleNames.add(sampleName);
|
||||
} else {
|
||||
deletionHolder.add(allele1);
|
||||
deletionHolder.add(allele2);
|
||||
sampleHolder.add(sampleName);
|
||||
}
|
||||
}
|
||||
|
||||
private void setIndelGenotypeLength(Genotype g, int length) {
|
||||
g.setAttribute(Genotype.StandardAttributes.DELETION_LENGTH,length);
|
||||
}
|
||||
|
||||
private void addHeldDeletions() {
|
||||
Allele del1;
|
||||
Allele del2;
|
||||
int startingSize = deletionHolder.size();
|
||||
for ( int i = 0; i < startingSize ; i+=2 ) {
|
||||
del1 = deletionHolder.get(i);
|
||||
del2 = deletionHolder.get(i+1);
|
||||
this.addHomDeletion(del1,del2,sampleHolder.get(i/2));
|
||||
if ( deletionHolder.size() != startingSize ) {
|
||||
throw new StingException("Halting algorithm -- possible infinite loop");
|
||||
}
|
||||
}
|
||||
deletionHolder.clear();
|
||||
sampleHolder.clear();
|
||||
}
|
||||
|
||||
public int compareTo(Object obj) {
|
||||
if ( ! ( obj instanceof SequenomVariantInfo ) ) {
|
||||
return 1;
|
||||
|
|
@ -205,4 +298,10 @@ class SequenomVariantInfo implements Comparable {
|
|||
|
||||
return loc.compareTo(((SequenomVariantInfo) obj).getLocation());
|
||||
}
|
||||
|
||||
private void setDeletionLength(Allele al, int length) {
|
||||
// Todo -- once alleles support deletion lengths add that information
|
||||
// Todo -- into the object; for now this can just return
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
@ -40,6 +40,8 @@ public class ProportionOfNonrefBasesSupportingSNP implements VariantAnnotation {
|
|||
totalNonref_totalSNP = getNonrefAndSNP(pileup, ref.getBase(), var.getAlternativeBaseForSNP(), totalNonref_totalSNP);
|
||||
|
||||
}
|
||||
if ( totalNonref_totalSNP.equals(new Pair<Integer,Integer>(0,0)) )
|
||||
return null;
|
||||
double p = getProportionOfNonrefBasesThatAreSNP(totalNonref_totalSNP);
|
||||
return String.format("%f", p );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,6 +43,8 @@ public class ProportionOfRefSecondBasesSupportingSNP implements VariantAnnotatio
|
|||
totalAndSNPSupporting = getTotalRefAndSNPSupportCounts(pileup, ref.getBase(), var.getAlternativeBaseForSNP(), totalAndSNPSupporting);
|
||||
|
||||
}
|
||||
if ( totalAndSNPSupporting.equals(new Pair<Integer,Integer>(0,0)) )
|
||||
return null;
|
||||
double p = getProportionOfRefSecondaryBasesSupportingSNP(totalAndSNPSupporting);
|
||||
return String.format("%f", p );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -39,6 +39,8 @@ public class ProportionOfSNPSecondBasesSupportingRef implements VariantAnnotatio
|
|||
totalAndSNPSupporting = getTotalSNPandRefSupporting(pileup, ref.getBase(), var.getAlternativeBaseForSNP(), totalAndSNPSupporting);
|
||||
|
||||
}
|
||||
if ( totalAndSNPSupporting.equals(new Pair<Integer,Integer>(0,0)) )
|
||||
return null;
|
||||
double p = getProportionOfSNPSecondaryBasesSupportingRef(totalAndSNPSupporting);
|
||||
return String.format("%f", p );
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue