No longer allow the lod_threshold argument - use confidence instead.
Have UG output qscores in all cases. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1968 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
2fb45dbd73
commit
11d950abe0
|
|
@ -26,28 +26,46 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
|||
|
||||
// run the EM calculation
|
||||
EMOutput overall = runEM(ref, contexts, priors, StratifiedContext.OVERALL);
|
||||
double lod = overall.getPofD() - overall.getPofNull();
|
||||
logger.debug(String.format("LOD=%f", lod));
|
||||
|
||||
// return a null call if we don't pass the lod cutoff
|
||||
if ( !ALL_BASE_MODE && lod < LOD_THRESHOLD )
|
||||
return new Pair<List<Genotype>, GenotypeLocusData>(null, null);
|
||||
double PofD = Math.pow(10, overall.getPofD());
|
||||
double PofNull = Math.pow(10, overall.getPofNull());
|
||||
double sum = PofD + PofNull;
|
||||
|
||||
// calculate the phred-scaled confidence score
|
||||
double phredScaledConfidence;
|
||||
boolean bestIsRef = false;
|
||||
if ( PofD > PofNull ) {
|
||||
phredScaledConfidence = -10.0 * Math.log10(PofNull / sum);
|
||||
} else {
|
||||
phredScaledConfidence = -10.0 * Math.log10(PofD / sum);
|
||||
bestIsRef = true;
|
||||
}
|
||||
|
||||
// are we above the lod threshold for emitting calls (and not in all-bases mode)?
|
||||
if ( !ALL_BASE_MODE && (bestIsRef || phredScaledConfidence < CONFIDENCE_THRESHOLD) ) {
|
||||
return new Pair<List<Genotype>, GenotypeLocusData>(null, null);
|
||||
}
|
||||
|
||||
// generate the calls
|
||||
GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, context.getLocation());
|
||||
if ( locusdata != null ) {
|
||||
if ( locusdata instanceof ConfidenceBacked ) {
|
||||
((ConfidenceBacked)locusdata).setConfidence(lod);
|
||||
((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence);
|
||||
}
|
||||
if ( locusdata instanceof SLODBacked ) {
|
||||
|
||||
// calculate strand score
|
||||
|
||||
double lod = overall.getPofD() - overall.getPofNull();
|
||||
logger.debug(String.format("LOD=%f", lod));
|
||||
|
||||
EMOutput forward = runEM(ref, contexts, priors, StratifiedContext.FORWARD);
|
||||
EMOutput reverse = runEM(ref, contexts, priors, StratifiedContext.REVERSE);
|
||||
double forwardLod = (forward.getPofD() + reverse.getPofNull()) - overall.getPofNull();
|
||||
double reverseLod = (reverse.getPofD() + forward.getPofNull()) - overall.getPofNull();
|
||||
logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
|
||||
double strandScore = Math.max(forwardLod - lod, reverseLod - lod);
|
||||
logger.debug(String.format("SLOD=%f", lod, strandScore));
|
||||
logger.debug(String.format("SLOD=%f", strandScore));
|
||||
|
||||
((SLODBacked)locusdata).setSLOD(strandScore);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
|||
protected boolean GENOTYPE_MODE;
|
||||
protected boolean POOLED_INPUT;
|
||||
protected int POOL_SIZE;
|
||||
protected double LOD_THRESHOLD;
|
||||
protected double CONFIDENCE_THRESHOLD;
|
||||
protected double MINIMUM_ALLELE_FREQUENCY;
|
||||
protected int maxDeletionsInPileup;
|
||||
|
|
@ -68,7 +67,6 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
|||
GENOTYPE_MODE = UAC.GENOTYPE;
|
||||
POOLED_INPUT = UAC.POOLED;
|
||||
POOL_SIZE = UAC.POOLSIZE;
|
||||
LOD_THRESHOLD = UAC.LOD_THRESHOLD;
|
||||
CONFIDENCE_THRESHOLD = UAC.CONFIDENCE_THRESHOLD;
|
||||
MINIMUM_ALLELE_FREQUENCY = UAC.MINIMUM_ALLELE_FREQUENCY;
|
||||
maxDeletionsInPileup = UAC.MAX_DELETIONS;
|
||||
|
|
|
|||
|
|
@ -45,16 +45,27 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
|||
// get the genotype likelihoods
|
||||
Pair<ReadBackedPileup, GenotypeLikelihoods> discoveryGL = getSingleSampleLikelihoods(ref, sampleContext, priors, StratifiedContext.OVERALL);
|
||||
|
||||
// calculate the lod threshold
|
||||
double[] posteriors = discoveryGL.second.getPosteriors();
|
||||
// find the index of the best genotype
|
||||
double[] posteriors = discoveryGL.second.getNormalizedPosteriors();
|
||||
Integer sortedPosteriors[] = Utils.SortPermutation(posteriors);
|
||||
double bestGenotype = posteriors[sortedPosteriors[sortedPosteriors.length - 1]];
|
||||
double nextBestGenotype = posteriors[sortedPosteriors[sortedPosteriors.length - 2]];
|
||||
double refGenotype = posteriors[DiploidGenotype.createHomGenotype(ref).ordinal()];
|
||||
double lodConfidence = (GENOTYPE_MODE ? (bestGenotype - nextBestGenotype) : (bestGenotype - refGenotype));
|
||||
int bestIndex = sortedPosteriors[sortedPosteriors.length - 1];
|
||||
|
||||
// flag to determine if ref is the best call (not necessary in genotype mode)
|
||||
boolean bestIsRef = false;
|
||||
|
||||
// calculate the phred-scaled confidence score
|
||||
double phredScaledConfidence;
|
||||
if ( GENOTYPE_MODE ) {
|
||||
phredScaledConfidence = -10.0 * Math.log10(1.0 - posteriors[bestIndex]);
|
||||
} else {
|
||||
int refIndex = DiploidGenotype.createHomGenotype(ref).ordinal();
|
||||
bestIsRef = (refIndex == bestIndex);
|
||||
double pError = (bestIsRef ? 1.0 - posteriors[refIndex] : posteriors[refIndex]);
|
||||
phredScaledConfidence = -10.0 * Math.log10(pError);
|
||||
}
|
||||
|
||||
// are we above the lod threshold for emitting calls (and not in all-bases mode)?
|
||||
if ( !ALL_BASE_MODE && lodConfidence < LOD_THRESHOLD ) {
|
||||
if ( !ALL_BASE_MODE && (bestIsRef || phredScaledConfidence < CONFIDENCE_THRESHOLD) ) {
|
||||
return new Pair<List<Genotype>, GenotypeLocusData>(null, null);
|
||||
}
|
||||
|
||||
|
|
@ -77,7 +88,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
|||
GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, context.getLocation());
|
||||
if ( locusdata != null ) {
|
||||
if ( locusdata instanceof ConfidenceBacked ) {
|
||||
((ConfidenceBacked)locusdata).setConfidence(lodConfidence);
|
||||
((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -71,11 +71,11 @@ public class UnifiedArgumentCollection {
|
|||
|
||||
|
||||
// control the various parameters to be used
|
||||
@Argument(fullName = "lod_threshold", shortName = "lod", doc = "The lod threshold by which variants should be filtered (for EM_POINT_ESTIMATE model)", required = false)
|
||||
@Argument(fullName = "lod_threshold", shortName = "lod", doc = "[DEPRECATED] The lod threshold by which variants should be filtered", required = false)
|
||||
public double LOD_THRESHOLD = Double.MIN_VALUE;
|
||||
|
||||
@Argument(fullName = "min_confidence_threshold", shortName = "confidence", doc = "The phred-scaled confidence threshold by which variants should be filtered (for JOINT_ESTIMATE model)", required = false)
|
||||
public double CONFIDENCE_THRESHOLD = Double.MIN_VALUE;
|
||||
@Argument(fullName = "min_confidence_threshold", shortName = "confidence", doc = "The phred-scaled confidence threshold by which variants should be filtered", required = false)
|
||||
public double CONFIDENCE_THRESHOLD = 0.0;
|
||||
|
||||
@Argument(fullName = "max_deletions", shortName = "deletions", doc = "Maximum reads with deletions spanning this locus for it to be callable [default:1]", required = false)
|
||||
public Integer MAX_DELETIONS = 1;
|
||||
|
|
|
|||
|
|
@ -100,9 +100,17 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL
|
|||
**/
|
||||
public void initialize() {
|
||||
|
||||
// deal with input errors
|
||||
if ( UAC.POOLED && UAC.genotypeModel == GenotypeCalculationModel.Model.EM_POINT_ESTIMATE ) {
|
||||
throw new StingException("This was an attempt to use an EM Point Estimate model with pooled genotype calculations. This model does not work with pooled data.");
|
||||
}
|
||||
if ( UAC.LOD_THRESHOLD > Double.MIN_VALUE ) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("\n***\tThe --lod_threshold argument is no longer supported; instead, please use --min_confidence_threshold.");
|
||||
sb.append("\n***\tThere is approximately a 10-to-1 mapping from confidence to LOD.");
|
||||
sb.append("\n***\tUse Q" + (10.0 * UAC.LOD_THRESHOLD) + " as an approximate equivalent to your LOD " + UAC.LOD_THRESHOLD + " cutoff");
|
||||
throw new StingException(sb.toString());
|
||||
}
|
||||
|
||||
// get all of the unique sample names
|
||||
samples = new HashSet<String>();
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
}
|
||||
|
||||
public static String testGeliLod5() {
|
||||
return baseTestString() + " --variant_output_format GELI -lod 5";
|
||||
return baseTestString() + " --variant_output_format GELI -confidence 50";
|
||||
}
|
||||
|
||||
private static String OneMb1StateMD5 = "7e3fc1d8427329eb2a3e05a81011749a";
|
||||
|
|
@ -42,16 +42,16 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -lod 5", 1,
|
||||
Arrays.asList("dba7f51b0ddb1afebcd8f229444f70e5"));
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 50", 1,
|
||||
Arrays.asList("9985a92060b512b5d27b4074faf8b60b"));
|
||||
executeTest("testMultiSamplePilot1", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultiSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -lod 5", 1,
|
||||
Arrays.asList("c2a7dd9bec6819b2d7702564955c993b"));
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 50", 1,
|
||||
Arrays.asList("394f009c9bad34eb584fa10d133d79e0"));
|
||||
executeTest("testMultiSamplePilot2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -108,7 +108,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void genotypeTest() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
testGeliLod5() + " -L 1:10,000,000-10,100,000 -bm empirical --genotype", 1,
|
||||
Arrays.asList("6a930a8f115495f5319541c21a892d8e"));
|
||||
Arrays.asList("45da29e3b1306d546a7b80c30c979ad4"));
|
||||
executeTest("genotypeTest", spec);
|
||||
}
|
||||
|
||||
|
|
@ -167,12 +167,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testLOD() {
|
||||
HashMap<Double, String> e = new HashMap<Double, String>();
|
||||
e.put( 10.0, "94c5b48c0c956fcdacbffaa38a80d926" );
|
||||
e.put( 3.0, "f1f53b944b821f05d5dd1ad8fda2f57b" );
|
||||
e.put( 100.0, "94c5b48c0c956fcdacbffaa38a80d926" );
|
||||
e.put( 30.0, "df455abc1b2bc533aa1dc6eb088a835a" );
|
||||
|
||||
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseTestString() + " --variant_output_format GELI -L 1:10,000,000-11,000,000 -bm EMPIRICAL -lod " + entry.getKey(), 1,
|
||||
baseTestString() + " --variant_output_format GELI -L 1:10,000,000-11,000,000 -bm EMPIRICAL -confidence " + entry.getKey(), 1,
|
||||
Arrays.asList(entry.getValue()));
|
||||
executeTest("testLOD", spec);
|
||||
}
|
||||
|
|
@ -186,7 +186,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHeterozyosity() {
|
||||
HashMap<Double, String> e = new HashMap<Double, String>();
|
||||
e.put( 0.01, "953fe510a81e357d0b56feac2a90095d" );
|
||||
e.put( 0.01, "b8837be7e8beb3ab2ed7150cdc022c65" );
|
||||
e.put( 0.0001, "ef0f2af7d13f166829d86b15fabc2b81" );
|
||||
e.put( 1.0 / 1850, "a435c8c966c11f4393a25a9d01c4fc3d" );
|
||||
|
||||
|
|
@ -204,7 +204,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void empirical1MbTestBinaryGeli() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseTestString() + " -L 1:10,000,000-11,000,000 -bm empirical --variant_output_format GELI_BINARY -lod 5", 1,
|
||||
baseTestString() + " -L 1:10,000,000-11,000,000 -bm empirical --variant_output_format GELI_BINARY -confidence 50", 1,
|
||||
Arrays.asList("915abf82a04fcd1842f6865501bae67c"));
|
||||
executeTest("empirical1MbTestBinaryGeli", spec);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue