updates to the paper genotyper based on Mark's comments. There's still more work to do, including more testing.

Also a 250% improvement in the getBases() and getQuals() of BasicPileup, which was nearly all of the runtime for the genotyper (using primitives instead of objects when possible).

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2097 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-11-19 23:06:49 +00:00
parent 22aaf8c5e0
commit 33dcfc858d
2 changed files with 43 additions and 32 deletions

View File

@ -26,9 +26,6 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, SimpleCallList>
// the possible diploid genotype strings // the possible diploid genotype strings
private static enum GENOTYPE { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT } private static enum GENOTYPE { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
// the epsilon value we're using to model our error rate
private static double EPSILON = 1e-4;
@Argument(fullName = "call_location", shortName = "cl", doc = "File to which calls should be written", required = true) @Argument(fullName = "call_location", shortName = "cl", doc = "File to which calls should be written", required = true)
private File LOCATION = new File("genotyping.output"); private File LOCATION = new File("genotyping.output");
@ -46,22 +43,20 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, SimpleCallList>
ReadBackedPileup pileup = new ReadBackedPileup(context.getLocation(), ref.getBase(), context.getReads(), context.getOffsets()); ReadBackedPileup pileup = new ReadBackedPileup(context.getLocation(), ref.getBase(), context.getReads(), context.getOffsets());
double likelihoods[] = DiploidGenotypePriors.getReferencePolarizedPriors(ref.getBase(), double likelihoods[] = DiploidGenotypePriors.getReferencePolarizedPriors(ref.getBase(),
DiploidGenotypePriors.HUMAN_HETEROZYGOSITY, DiploidGenotypePriors.HUMAN_HETEROZYGOSITY,
DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE); DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
for (GENOTYPE genotype : GENOTYPE.values()) for (GENOTYPE genotype : GENOTYPE.values())
for (byte pileupBase : pileup.getBases()) { for (int index = 0; index < pileup.getBases().length; index++) {
// todo -- epsilon isn't a constant variable, it's the de-phred error probabilities of the base if (pileup.getQuals()[index] > 0) {
// you need to grab the qual score associated with this base and calcluate double epsilon = Math.pow(10, pileup.getQuals()[index] / -10.0);
// epsilon = pow(10, qual / -10.0) byte pileupBase = pileup.getBases()[index];
// Also, only do the calculations below for bases with qual > 0 for (char genotypeBase : genotype.toString().toCharArray())
for (char genotypeBase : genotype.toString().toCharArray()) if (genotypeBase == pileupBase)
// todo -- all of these calculations should be in log10 space (like the priors are) likelihoods[genotype.ordinal()] += Math.log10(0.5 * ((1 - epsilon) + epsilon / 3));
if (genotypeBase == pileupBase) else
// todo -- potential int flow problems there. Needs parens likelihoods[genotype.ordinal()] += Math.log10(epsilon / 3);
likelihoods[genotype.ordinal()] += 1 / 2 * (1 - EPSILON) + EPSILON / 3; }
else
likelihoods[genotype.ordinal()] += EPSILON / 3;
} }
Integer sortedList[] = Utils.SortPermutation(likelihoods); Integer sortedList[] = Utils.SortPermutation(likelihoods);
@ -72,8 +67,8 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, SimpleCallList>
// create call using the best genotype (GENOTYPE.values()[sortedList[9]].toString()) // create call using the best genotype (GENOTYPE.values()[sortedList[9]].toString())
// and calculate the LOD score from best - ref (likelihoods[sortedList[9]] - likelihoods[sortedList[8]) // and calculate the LOD score from best - ref (likelihoods[sortedList[9]] - likelihoods[sortedList[8])
return new SimpleCall(context.getLocation(), return new SimpleCall(context.getLocation(),
GENOTYPE.values()[sortedList[9]].toString(), GENOTYPE.values()[sortedList[9]].toString(),
likelihoods[sortedList[9]] - likelihoods[GENOTYPE.valueOf(refGenotype).ordinal()]); likelihoods[sortedList[9]] - likelihoods[GENOTYPE.valueOf(refGenotype).ordinal()]);
} }
/** /**
@ -111,7 +106,7 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, SimpleCallList>
/** /**
* when we finish traversing, close the result list * when we finish traversing, close the result list
* @param result the final reduce result * @param result the final reduce result
*/ */
public void onTraversalDone(SimpleCallList result) { public void onTraversalDone(SimpleCallList result) {
result.close(); result.close();

View File

@ -85,19 +85,19 @@ abstract public class BasicPileup implements Pileup {
// byte[] methods // byte[] methods
// //
public static byte[] getBases( List<SAMRecord> reads, List<Integer> offsets ) { public static byte[] getBases( List<SAMRecord> reads, List<Integer> offsets ) {
return ArrayList2byte(getBasesAsArrayList( reads, offsets )); return getBasesAsArray(reads,offsets,false);
} }
public static byte[] getBases( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) { public static byte[] getBases( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
return ArrayList2byte(getBasesAsArrayList( reads, offsets, includeDeletions )); return getBasesAsArray(reads,offsets,includeDeletions);
} }
public static byte[] getQuals( List<SAMRecord> reads, List<Integer> offsets ) { public static byte[] getQuals( List<SAMRecord> reads, List<Integer> offsets ) {
return ArrayList2byte(getQualsAsArrayList( reads, offsets )); return getQualsAsArray( reads, offsets,false);
} }
public static byte[] getQuals( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) { public static byte[] getQuals( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
return ArrayList2byte(getQualsAsArrayList( reads, offsets, includeDeletions )); return getQualsAsArray( reads, offsets, includeDeletions);
} }
// //
@ -107,18 +107,27 @@ abstract public class BasicPileup implements Pileup {
return getBasesAsArrayList( reads, offsets, false ); return getBasesAsArrayList( reads, offsets, false );
} }
public static ArrayList<Byte> getBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) { public static byte[] getBasesAsArray( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
ArrayList<Byte> bases = new ArrayList<Byte>(reads.size()); byte array[] = new byte[reads.size()];
int index = 0;
for ( int i = 0; i < reads.size(); i++ ) { for ( int i = 0; i < reads.size(); i++ ) {
SAMRecord read = reads.get(i); SAMRecord read = reads.get(i);
int offset = offsets.get(i); int offset = offsets.get(i);
if ( offset == -1 ) { if ( offset == -1 ) {
if ( includeDeletions ) if ( includeDeletions )
bases.add((byte)DELETION_CHAR); array[index++] = ((byte)DELETION_CHAR);
} else { } else {
bases.add(read.getReadBases()[offset]); array[index++] = read.getReadBases()[offset];
} }
} }
return array;
}
public static ArrayList<Byte> getBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
ArrayList<Byte> bases = new ArrayList<Byte>(reads.size());
for (byte value : getBasesAsArray(reads, offsets, includeDeletions))
bases.add(value);
return bases; return bases;
} }
@ -128,6 +137,14 @@ abstract public class BasicPileup implements Pileup {
public static ArrayList<Byte> getQualsAsArrayList( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) { public static ArrayList<Byte> getQualsAsArrayList( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size()); ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
for (byte value : getQualsAsArray(reads, offsets, includeDeletions))
quals.add(value);
return quals;
}
public static byte[] getQualsAsArray( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
byte array[] = new byte[reads.size()];
int index = 0;
for ( int i = 0; i < reads.size(); i++ ) { for ( int i = 0; i < reads.size(); i++ ) {
SAMRecord read = reads.get(i); SAMRecord read = reads.get(i);
int offset = offsets.get(i); int offset = offsets.get(i);
@ -135,13 +152,12 @@ abstract public class BasicPileup implements Pileup {
// skip deletion sites // skip deletion sites
if ( offset == -1 ) { if ( offset == -1 ) {
if ( includeDeletions ) // we need the qual vector to be the same length as base vector! if ( includeDeletions ) // we need the qual vector to be the same length as base vector!
quals.add((byte)0); array[index++] = ((byte)0);
} else { } else {
byte qual = read.getBaseQualities()[offset]; array[index++] = read.getBaseQualities()[offset];
quals.add(qual);
} }
} }
return quals; return array;
} }
public static ArrayList<Byte> mappingQualPileup( List<SAMRecord> reads) { public static ArrayList<Byte> mappingQualPileup( List<SAMRecord> reads) {