updates to the paper genotyper based on Mark's comments. There's still more work to do, including more testing.
Also a 250% improvement in the getBases() and getQuals() of BasicPileup, which was nearly all of the runtime for the genotyper (using primitives instead of objects when possible). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2097 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
22aaf8c5e0
commit
33dcfc858d
|
|
@ -26,9 +26,6 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, SimpleCallList>
|
||||||
// the possible diploid genotype strings
|
// the possible diploid genotype strings
|
||||||
private static enum GENOTYPE { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
|
private static enum GENOTYPE { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
|
||||||
|
|
||||||
// the epsilon value we're using to model our error rate
|
|
||||||
private static double EPSILON = 1e-4;
|
|
||||||
|
|
||||||
@Argument(fullName = "call_location", shortName = "cl", doc = "File to which calls should be written", required = true)
|
@Argument(fullName = "call_location", shortName = "cl", doc = "File to which calls should be written", required = true)
|
||||||
private File LOCATION = new File("genotyping.output");
|
private File LOCATION = new File("genotyping.output");
|
||||||
|
|
||||||
|
|
@ -46,22 +43,20 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, SimpleCallList>
|
||||||
|
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(context.getLocation(), ref.getBase(), context.getReads(), context.getOffsets());
|
ReadBackedPileup pileup = new ReadBackedPileup(context.getLocation(), ref.getBase(), context.getReads(), context.getOffsets());
|
||||||
double likelihoods[] = DiploidGenotypePriors.getReferencePolarizedPriors(ref.getBase(),
|
double likelihoods[] = DiploidGenotypePriors.getReferencePolarizedPriors(ref.getBase(),
|
||||||
DiploidGenotypePriors.HUMAN_HETEROZYGOSITY,
|
DiploidGenotypePriors.HUMAN_HETEROZYGOSITY,
|
||||||
DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
|
DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
|
||||||
|
|
||||||
for (GENOTYPE genotype : GENOTYPE.values())
|
for (GENOTYPE genotype : GENOTYPE.values())
|
||||||
for (byte pileupBase : pileup.getBases()) {
|
for (int index = 0; index < pileup.getBases().length; index++) {
|
||||||
// todo -- epsilon isn't a constant variable, it's the de-phred error probabilities of the base
|
if (pileup.getQuals()[index] > 0) {
|
||||||
// you need to grab the qual score associated with this base and calcluate
|
double epsilon = Math.pow(10, pileup.getQuals()[index] / -10.0);
|
||||||
// epsilon = pow(10, qual / -10.0)
|
byte pileupBase = pileup.getBases()[index];
|
||||||
// Also, only do the calculations below for bases with qual > 0
|
for (char genotypeBase : genotype.toString().toCharArray())
|
||||||
for (char genotypeBase : genotype.toString().toCharArray())
|
if (genotypeBase == pileupBase)
|
||||||
// todo -- all of these calculations should be in log10 space (like the priors are)
|
likelihoods[genotype.ordinal()] += Math.log10(0.5 * ((1 - epsilon) + epsilon / 3));
|
||||||
if (genotypeBase == pileupBase)
|
else
|
||||||
// todo -- potential int flow problems there. Needs parens
|
likelihoods[genotype.ordinal()] += Math.log10(epsilon / 3);
|
||||||
likelihoods[genotype.ordinal()] += 1 / 2 * (1 - EPSILON) + EPSILON / 3;
|
}
|
||||||
else
|
|
||||||
likelihoods[genotype.ordinal()] += EPSILON / 3;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Integer sortedList[] = Utils.SortPermutation(likelihoods);
|
Integer sortedList[] = Utils.SortPermutation(likelihoods);
|
||||||
|
|
@ -72,8 +67,8 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, SimpleCallList>
|
||||||
// create call using the best genotype (GENOTYPE.values()[sortedList[9]].toString())
|
// create call using the best genotype (GENOTYPE.values()[sortedList[9]].toString())
|
||||||
// and calculate the LOD score from best - ref (likelihoods[sortedList[9]] - likelihoods[sortedList[8])
|
// and calculate the LOD score from best - ref (likelihoods[sortedList[9]] - likelihoods[sortedList[8])
|
||||||
return new SimpleCall(context.getLocation(),
|
return new SimpleCall(context.getLocation(),
|
||||||
GENOTYPE.values()[sortedList[9]].toString(),
|
GENOTYPE.values()[sortedList[9]].toString(),
|
||||||
likelihoods[sortedList[9]] - likelihoods[GENOTYPE.valueOf(refGenotype).ordinal()]);
|
likelihoods[sortedList[9]] - likelihoods[GENOTYPE.valueOf(refGenotype).ordinal()]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -111,7 +106,7 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, SimpleCallList>
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* when we finish traversing, close the result list
|
* when we finish traversing, close the result list
|
||||||
* @param result the final reduce result
|
* @param result the final reduce result
|
||||||
*/
|
*/
|
||||||
public void onTraversalDone(SimpleCallList result) {
|
public void onTraversalDone(SimpleCallList result) {
|
||||||
result.close();
|
result.close();
|
||||||
|
|
|
||||||
|
|
@ -85,19 +85,19 @@ abstract public class BasicPileup implements Pileup {
|
||||||
// byte[] methods
|
// byte[] methods
|
||||||
//
|
//
|
||||||
public static byte[] getBases( List<SAMRecord> reads, List<Integer> offsets ) {
|
public static byte[] getBases( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||||
return ArrayList2byte(getBasesAsArrayList( reads, offsets ));
|
return getBasesAsArray(reads,offsets,false);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static byte[] getBases( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
public static byte[] getBases( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
||||||
return ArrayList2byte(getBasesAsArrayList( reads, offsets, includeDeletions ));
|
return getBasesAsArray(reads,offsets,includeDeletions);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static byte[] getQuals( List<SAMRecord> reads, List<Integer> offsets ) {
|
public static byte[] getQuals( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||||
return ArrayList2byte(getQualsAsArrayList( reads, offsets ));
|
return getQualsAsArray( reads, offsets,false);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static byte[] getQuals( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
public static byte[] getQuals( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
||||||
return ArrayList2byte(getQualsAsArrayList( reads, offsets, includeDeletions ));
|
return getQualsAsArray( reads, offsets, includeDeletions);
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
@ -107,18 +107,27 @@ abstract public class BasicPileup implements Pileup {
|
||||||
return getBasesAsArrayList( reads, offsets, false );
|
return getBasesAsArrayList( reads, offsets, false );
|
||||||
}
|
}
|
||||||
|
|
||||||
public static ArrayList<Byte> getBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
public static byte[] getBasesAsArray( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
||||||
ArrayList<Byte> bases = new ArrayList<Byte>(reads.size());
|
byte array[] = new byte[reads.size()];
|
||||||
|
int index = 0;
|
||||||
for ( int i = 0; i < reads.size(); i++ ) {
|
for ( int i = 0; i < reads.size(); i++ ) {
|
||||||
SAMRecord read = reads.get(i);
|
SAMRecord read = reads.get(i);
|
||||||
int offset = offsets.get(i);
|
int offset = offsets.get(i);
|
||||||
if ( offset == -1 ) {
|
if ( offset == -1 ) {
|
||||||
if ( includeDeletions )
|
if ( includeDeletions )
|
||||||
bases.add((byte)DELETION_CHAR);
|
array[index++] = ((byte)DELETION_CHAR);
|
||||||
} else {
|
} else {
|
||||||
bases.add(read.getReadBases()[offset]);
|
array[index++] = read.getReadBases()[offset];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return array;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static ArrayList<Byte> getBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
||||||
|
ArrayList<Byte> bases = new ArrayList<Byte>(reads.size());
|
||||||
|
for (byte value : getBasesAsArray(reads, offsets, includeDeletions))
|
||||||
|
bases.add(value);
|
||||||
return bases;
|
return bases;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -128,6 +137,14 @@ abstract public class BasicPileup implements Pileup {
|
||||||
|
|
||||||
public static ArrayList<Byte> getQualsAsArrayList( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
public static ArrayList<Byte> getQualsAsArrayList( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
||||||
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
|
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
|
||||||
|
for (byte value : getQualsAsArray(reads, offsets, includeDeletions))
|
||||||
|
quals.add(value);
|
||||||
|
return quals;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static byte[] getQualsAsArray( List<SAMRecord> reads, List<Integer> offsets, boolean includeDeletions ) {
|
||||||
|
byte array[] = new byte[reads.size()];
|
||||||
|
int index = 0;
|
||||||
for ( int i = 0; i < reads.size(); i++ ) {
|
for ( int i = 0; i < reads.size(); i++ ) {
|
||||||
SAMRecord read = reads.get(i);
|
SAMRecord read = reads.get(i);
|
||||||
int offset = offsets.get(i);
|
int offset = offsets.get(i);
|
||||||
|
|
@ -135,13 +152,12 @@ abstract public class BasicPileup implements Pileup {
|
||||||
// skip deletion sites
|
// skip deletion sites
|
||||||
if ( offset == -1 ) {
|
if ( offset == -1 ) {
|
||||||
if ( includeDeletions ) // we need the qual vector to be the same length as base vector!
|
if ( includeDeletions ) // we need the qual vector to be the same length as base vector!
|
||||||
quals.add((byte)0);
|
array[index++] = ((byte)0);
|
||||||
} else {
|
} else {
|
||||||
byte qual = read.getBaseQualities()[offset];
|
array[index++] = read.getBaseQualities()[offset];
|
||||||
quals.add(qual);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return quals;
|
return array;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static ArrayList<Byte> mappingQualPileup( List<SAMRecord> reads) {
|
public static ArrayList<Byte> mappingQualPileup( List<SAMRecord> reads) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue