reimplemented; now implements Genotype interface instead of AllelicVariant

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@481 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
asivache 2009-04-21 21:06:42 +00:00
parent 5f37ba8f26
commit 55ca272919
1 changed files with 47 additions and 34 deletions

View File

@ -25,7 +25,7 @@ import org.broadinstitute.sting.utils.Pileup;
* Time: 2:58:33 PM * Time: 2:58:33 PM
* To change this template use File | Settings | File Templates. * To change this template use File | Settings | File Templates.
*/ */
public class rodSAMPileup extends ReferenceOrderedDatum implements AllelicVariant, Pileup { public class rodSAMPileup extends ReferenceOrderedDatum implements Genotype, Pileup {
private static final int NO_VARIANT = -1; private static final int NO_VARIANT = -1;
private static final int SNP_VARIANT = 0; private static final int SNP_VARIANT = 0;
private static final int INSERTION_VARIANT = 1; private static final int INSERTION_VARIANT = 1;
@ -39,13 +39,13 @@ public class rodSAMPileup extends ReferenceOrderedDatum implements AllelicVarian
private static final String baseT = new String("T"); private static final String baseT = new String("T");
private static final String emptyStr = new String(); // we will use this for "reference" allele in insertions private static final String emptyStr = new String(); // we will use this for "reference" allele in insertions
protected GenomeLoc loc; // genome location of SNP protected GenomeLoc loc; // genomic location of this genotyped site
// Reference sequence chromosome or scaffold // Reference sequence chromosome or scaffold
// Start and stop positions in chrom // Start and stop positions in chrom
protected char refBaseChar; // what we have set for the reference base (is set to a '*' for indel!) protected char refBaseChar; // what we have set for the reference base (is set to a '*' for indel!)
protected String refBases; // the reference base according to NCBI, in the dbSNP file protected String refBases; // the reference base sequence according to NCBI
protected String observedString; // store the actual string representation of observed alleles protected String observedString; // store the actual string representation of observed alleles
protected String pileupQuals; // the read base qualities protected String pileupQuals; // the read base qualities
@ -53,7 +53,7 @@ public class rodSAMPileup extends ReferenceOrderedDatum implements AllelicVarian
protected List<String> observedAlleles = null; // The sequences of the observed alleles from rs-fasta files protected List<String> observedAlleles = null; // The sequences of the observed alleles from rs-fasta files
protected int varType = NO_VARIANT; protected int varType = NO_VARIANT;
protected int ploidy = 2; // how many allelic variants we get? protected int ploidy = 2; // how many allelic variants we observe?
protected int nNonref = 0; // number of non-reference alleles protected int nNonref = 0; // number of non-reference alleles
protected int eventLength = 0; protected int eventLength = 0;
@ -92,8 +92,8 @@ public class rodSAMPileup extends ReferenceOrderedDatum implements AllelicVarian
if ( refBaseChar == '*' ) { if ( refBaseChar == '*' ) {
parseIndels(parts[3]) ; parseIndels(parts[3]) ;
if ( varType == DELETION_VARIANT ) loc = new GenomeLoc(contig, start, start+eventLength); if ( varType == DELETION_VARIANT ) loc = new GenomeLoc(contig, start, start+eventLength-1);
else loc = new GenomeLoc(contig, start, start); // if it's not a deletion and we are biallelic, this got to be an insertion; otherwise the state is inconsistent!!!! else loc = new GenomeLoc(contig, start, start-1); // if it's not a deletion and we are biallelic, this got to be an insertion; otherwise the state is inconsistent!!!!
} }
else { else {
// if the variant is a SNP or a reference base (i.e. no variant at all) // if the variant is a SNP or a reference base (i.e. no variant at all)
@ -134,7 +134,7 @@ public class rodSAMPileup extends ReferenceOrderedDatum implements AllelicVarian
System.out.printf(" Exception caught during parsing BasicPileup line: %s%n", Utils.join(" <=> ", parts)); System.out.printf(" Exception caught during parsing BasicPileup line: %s%n", Utils.join(" <=> ", parts));
throw e; throw e;
} }
if ( nNonref > 1 ) System.out.println("SAM pileup: WARNING: multi-allelic variant : ("+refBaseChar+") -->"+toMediumString()); // if ( nNonref > 1 ) System.out.println("SAM pileup: WARNING: multi-allelic variant : ("+refBaseChar+") -->"+toMediumString());
} }
@ -152,7 +152,7 @@ public class rodSAMPileup extends ReferenceOrderedDatum implements AllelicVarian
continue; // we will fill reference allele later continue; // we will fill reference allele later
} }
String varBases = obs[i].substring(1).toUpperCase(); String varBases = obs[i].toUpperCase();
switch ( obs[i].charAt(0) ) { switch ( obs[i].charAt(0) ) {
case '+': case '+':
@ -161,7 +161,7 @@ public class rodSAMPileup extends ReferenceOrderedDatum implements AllelicVarian
refBases = emptyStr; refBases = emptyStr;
break; break;
case '-' : case '-' :
if ( varType != -1 && varType != DELETION_VARIANT ) varType = INDEL_VARIANT; if ( varType != NO_VARIANT && varType != DELETION_VARIANT ) varType = INDEL_VARIANT;
else varType = DELETION_VARIANT; else varType = DELETION_VARIANT;
refBases = varBases; // remember what was deleted, this will be saved as "reference allele" refBases = varBases; // remember what was deleted, this will be saved as "reference allele"
break; break;
@ -186,9 +186,8 @@ public class rodSAMPileup extends ReferenceOrderedDatum implements AllelicVarian
} else { } else {
// we observe two non-ref alleles; they better be the same variant, otherwise the site is not bi-allelic and at the moment we // we observe two non-ref alleles; they better be the same variant, otherwise the site is not bi-allelic and at the moment we
// fail to set data in a consistent way.. (the check for INDEL_VARIANT ensures that recorded variants are indeed both insertions // fail to set data in a consistent way.. (the check for INDEL_VARIANT ensures that recorded variants are indeed both insertions
// or both deletions as compared to +ACC/-ACC which would still have the same bases (no matter how crazy and improbable // or both deletions as compared to +ACC/-ACC which would still have the same bases
// such event would be) if ( observedAlleles.get(0).equals(observedAlleles.get(1)) ) nNonref = 1;
if ( observedAlleles.get(0).equals(observedAlleles.get(1)) && varType != INDEL_VARIANT ) nNonref = 1;
else nNonref = 2; else nNonref = 2;
} }
// DONE with indels // DONE with indels
@ -272,19 +271,21 @@ public class rodSAMPileup extends ReferenceOrderedDatum implements AllelicVarian
return refBases; return refBases;
} }
/** /**
* Returns reference (major) allele base for a SNP variant as a character; should throw IllegalStateException * Returns reference (major) allele base for a SNP variant as a character; should throw IllegalStateException
* if variant is not a SNP. * if variant is not a SNP.
* *
* @return reference base on the forward strand * @return reference base on the forward strand
*/ */
public char getRefSnpFWD() throws IllegalStateException { /* public char getRefSnpFWD() throws IllegalStateException {
if ( isIndel() ) throw new IllegalStateException("Variant is not a SNP"); if ( isIndel() ) throw new IllegalStateException("Variant is not a SNP");
return refBaseChar; return refBaseChar;
} }
*/
@Override @Override
public List<String> getGenotype() { public List<String> getFWDAlleles() {
return observedAlleles; return observedAlleles;
} }
@ -299,6 +300,18 @@ public class rodSAMPileup extends ReferenceOrderedDatum implements AllelicVarian
public boolean isIndel() { return isInsertion() || isDeletion() || varType == INDEL_VARIANT; } public boolean isIndel() { return isInsertion() || isDeletion() || varType == INDEL_VARIANT; }
public boolean isReference() { return varType == NO_VARIANT; } public boolean isReference() { return varType == NO_VARIANT; }
public boolean isHom() {
// implementation-dependent: here we use the fact that for ref and snps we actually use fixed static strings to remember the genotype
if ( ! isIndel() ) return ( observedAlleles.get(0) == observedAlleles.get(1) );
return ( isInsertion() || isDeletion() ) && observedAlleles.get(0).equals(observedAlleles.get(1) );
}
public boolean isHet() {
// implementation-dependent: here we use the fact that for ref and snps we actually use fixed static strings to remember the genotype
if ( ! isIndel() ) return ( observedAlleles.get(0) != observedAlleles.get(1) );
return isIndel() || ( ! observedAlleles.get(0).equals(observedAlleles.get(1) ) );
}
// ---------------------------------------------------------------------- // ----------------------------------------------------------------------
// //
// formatting // formatting
@ -336,7 +349,7 @@ public class rodSAMPileup extends ReferenceOrderedDatum implements AllelicVarian
return String.format("REPL not implemented yet"); return String.format("REPL not implemented yet");
} }
/*
@Override @Override
public String getAltBasesFWD() { public String getAltBasesFWD() {
if ( ! isSNP() && ! isIndel() ) return emptyStr; if ( ! isSNP() && ! isIndel() ) return emptyStr;
@ -360,7 +373,9 @@ public class rodSAMPileup extends ReferenceOrderedDatum implements AllelicVarian
System.out.printf("WARNING: unexpected variant type in pileup %s at %s%n",name,getLocation().toString()); System.out.printf("WARNING: unexpected variant type in pileup %s at %s%n",name,getLocation().toString());
return null; return null;
} }
*/
/*
@Override @Override
public char getAltSnpFWD() throws IllegalStateException { public char getAltSnpFWD() throws IllegalStateException {
if ( ! isSNP() ) throw new IllegalStateException("Variant is not a SNP"); if ( ! isSNP() ) throw new IllegalStateException("Variant is not a SNP");
@ -373,35 +388,33 @@ public class rodSAMPileup extends ReferenceOrderedDatum implements AllelicVarian
public double getConsensusConfidence() { public double getConsensusConfidence() {
return consensusScore; return consensusScore;
} }
*/
/*
@Override
public double getMAF() {
if ( nNonref > 1 ) System.out.println("SAM pileup: WARNING: can not determine minor allele freq for multiallelic site");
if ( isSNP() || isIndel() ) {
if ( observedAlleles.get(0).equals(observedAlleles.get(1)) ) return 1.0;
else return 0.5;
}
return 0;
}
@Override @Override
public int getPloidy() throws IllegalStateException { public int getPloidy() throws IllegalStateException {
return 2; // ??? return 2; // ???
} }
*/
@Override @Override
public double getVariationConfidence() { public double getVariantConfidence() {
return variantScore; return variantScore;
} }
@Override
public boolean isGenotype() {
return true;
}
@Override @Override
public boolean isBiallelic() { public boolean isBiallelic() {
return nNonref < 2; return nNonref < 2;
} }
@Override
public double getConsensusConfidence() {
return consensusScore;
}
@Override
public String getFWDRefBases() {
return refBases;
}
} }