updates to the paper genotyper based on Mark's comments. There's still more work to do, including more testing.
Also a 250% improvement in the getBases() and getQuals() of BasicPileup, which was nearly all of the runtime for the genotyper (using primitives instead of objects when possible). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2097 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
22aaf8c5e0
commit
33dcfc858d
|
|
@ -26,9 +26,6 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, SimpleCallList>
|
|||
// the possible diploid genotype strings
|
||||
private static enum GENOTYPE { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
|
||||
|
||||
// the epsilon value we're using to model our error rate
|
||||
private static double EPSILON = 1e-4;
|
||||
|
||||
@Argument(fullName = "call_location", shortName = "cl", doc = "File to which calls should be written", required = true)
|
||||
private File LOCATION = new File("genotyping.output");
|
||||
|
||||
|
|
@ -46,22 +43,20 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, SimpleCallList>
|
|||
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(context.getLocation(), ref.getBase(), context.getReads(), context.getOffsets());
|
||||
double likelihoods[] = DiploidGenotypePriors.getReferencePolarizedPriors(ref.getBase(),
|
||||
DiploidGenotypePriors.HUMAN_HETEROZYGOSITY,
|
||||
DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
|
||||
DiploidGenotypePriors.HUMAN_HETEROZYGOSITY,
|
||||
DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
|
||||
|
||||
for (GENOTYPE genotype : GENOTYPE.values())
|
||||
for (byte pileupBase : pileup.getBases()) {
|
||||
// todo -- epsilon isn't a constant variable, it's the de-phred error probabilities of the base
|
||||
// you need to grab the qual score associated with this base and calcluate
|
||||
// epsilon = pow(10, qual / -10.0)
|
||||
// Also, only do the calculations below for bases with qual > 0
|
||||
for (char genotypeBase : genotype.toString().toCharArray())
|
||||
// todo -- all of these calculations should be in log10 space (like the priors are)
|
||||
if (genotypeBase == pileupBase)
|
||||
// todo -- potential int flow problems there. Needs parens
|
||||
likelihoods[genotype.ordinal()] += 1 / 2 * (1 - EPSILON) + EPSILON / 3;
|
||||
else
|
||||
likelihoods[genotype.ordinal()] += EPSILON / 3;
|
||||
for (int index = 0; index < pileup.getBases().length; index++) {
|
||||
if (pileup.getQuals()[index] > 0) {
|
||||
double epsilon = Math.pow(10, pileup.getQuals()[index] / -10.0);
|
||||
byte pileupBase = pileup.getBases()[index];
|
||||
for (char genotypeBase : genotype.toString().toCharArray())
|
||||
if (genotypeBase == pileupBase)
|
||||
likelihoods[genotype.ordinal()] += Math.log10(0.5 * ((1 - epsilon) + epsilon / 3));
|
||||
else
|
||||
likelihoods[genotype.ordinal()] += Math.log10(epsilon / 3);
|
||||
}
|
||||
}
|
||||
|
||||
Integer sortedList[] = Utils.SortPermutation(likelihoods);
|
||||
|
|
@ -72,8 +67,8 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, SimpleCallList>
|
|||
// create call using the best genotype (GENOTYPE.values()[sortedList[9]].toString())
|
||||
// and calculate the LOD score from best - ref (likelihoods[sortedList[9]] - likelihoods[sortedList[8])
|
||||
return new SimpleCall(context.getLocation(),
|
||||
GENOTYPE.values()[sortedList[9]].toString(),
|
||||
likelihoods[sortedList[9]] - likelihoods[GENOTYPE.valueOf(refGenotype).ordinal()]);
|
||||
GENOTYPE.values()[sortedList[9]].toString(),
|
||||
likelihoods[sortedList[9]] - likelihoods[GENOTYPE.valueOf(refGenotype).ordinal()]);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -111,7 +106,7 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, SimpleCallList>
|
|||
|
||||
/**
|
||||
* when we finish traversing, close the result list
|
||||
* @param result the final reduce result
|
||||
* @param result the final reduce result
|
||||
*/
|
||||
public void onTraversalDone(SimpleCallList result) {
|
||||
result.close();
|
||||
|
|
|
|||
|
|
@ -85,19 +85,19 @@ abstract public class BasicPileup implements Pileup {
|
|||
// byte[] methods
|
||||
//
|
||||
public static byte[] getBases( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
return ArrayList2byte(getBasesAsArrayList( reads, offsets ));
|
||||
return getBasesAsArray(reads,offsets,false);
|
||||
}
|
||||
|
||||
public static byte[] getBases( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
||||
return ArrayList2byte(getBasesAsArrayList( reads, offsets, includeDeletions ));
|
||||
return getBasesAsArray(reads,offsets,includeDeletions);
|
||||
}
|
||||
|
||||
public static byte[] getQuals( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
return ArrayList2byte(getQualsAsArrayList( reads, offsets ));
|
||||
return getQualsAsArray( reads, offsets,false);
|
||||
}
|
||||
|
||||
public static byte[] getQuals( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
||||
return ArrayList2byte(getQualsAsArrayList( reads, offsets, includeDeletions ));
|
||||
return getQualsAsArray( reads, offsets, includeDeletions);
|
||||
}
|
||||
|
||||
//
|
||||
|
|
@ -107,18 +107,27 @@ abstract public class BasicPileup implements Pileup {
|
|||
return getBasesAsArrayList( reads, offsets, false );
|
||||
}
|
||||
|
||||
public static ArrayList<Byte> getBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
||||
ArrayList<Byte> bases = new ArrayList<Byte>(reads.size());
|
||||
public static byte[] getBasesAsArray( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
||||
byte array[] = new byte[reads.size()];
|
||||
int index = 0;
|
||||
for ( int i = 0; i < reads.size(); i++ ) {
|
||||
SAMRecord read = reads.get(i);
|
||||
int offset = offsets.get(i);
|
||||
if ( offset == -1 ) {
|
||||
if ( includeDeletions )
|
||||
bases.add((byte)DELETION_CHAR);
|
||||
array[index++] = ((byte)DELETION_CHAR);
|
||||
} else {
|
||||
bases.add(read.getReadBases()[offset]);
|
||||
array[index++] = read.getReadBases()[offset];
|
||||
}
|
||||
}
|
||||
return array;
|
||||
}
|
||||
|
||||
|
||||
public static ArrayList<Byte> getBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
||||
ArrayList<Byte> bases = new ArrayList<Byte>(reads.size());
|
||||
for (byte value : getBasesAsArray(reads, offsets, includeDeletions))
|
||||
bases.add(value);
|
||||
return bases;
|
||||
}
|
||||
|
||||
|
|
@ -128,6 +137,14 @@ abstract public class BasicPileup implements Pileup {
|
|||
|
||||
public static ArrayList<Byte> getQualsAsArrayList( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
||||
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
|
||||
for (byte value : getQualsAsArray(reads, offsets, includeDeletions))
|
||||
quals.add(value);
|
||||
return quals;
|
||||
}
|
||||
|
||||
public static byte[] getQualsAsArray( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
||||
byte array[] = new byte[reads.size()];
|
||||
int index = 0;
|
||||
for ( int i = 0; i < reads.size(); i++ ) {
|
||||
SAMRecord read = reads.get(i);
|
||||
int offset = offsets.get(i);
|
||||
|
|
@ -135,13 +152,12 @@ abstract public class BasicPileup implements Pileup {
|
|||
// skip deletion sites
|
||||
if ( offset == -1 ) {
|
||||
if ( includeDeletions ) // we need the qual vector to be the same length as base vector!
|
||||
quals.add((byte)0);
|
||||
array[index++] = ((byte)0);
|
||||
} else {
|
||||
byte qual = read.getBaseQualities()[offset];
|
||||
quals.add(qual);
|
||||
array[index++] = read.getBaseQualities()[offset];
|
||||
}
|
||||
}
|
||||
return quals;
|
||||
return array;
|
||||
}
|
||||
|
||||
public static ArrayList<Byte> mappingQualPileup( List<SAMRecord> reads) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue