Don't have the spanning deletions argument be a hard cutoff, but instead be a percentage of the reads in the pileup. Default is now 5% of reads.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2155 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-11-25 04:54:44 +00:00
parent 03342c1fdd
commit e1e5b35b19
4 changed files with 27 additions and 19 deletions

View File

@ -39,7 +39,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
protected double CONFIDENCE_THRESHOLD;
protected double MINIMUM_ALLELE_FREQUENCY;
protected boolean REPORT_SLOD;
protected int maxDeletionsInPileup;
protected double maxDeletionFractionInPileup;
protected String assumedSingleSample;
protected PrintWriter verboseWriter;
@ -70,7 +70,10 @@ public abstract class GenotypeCalculationModel implements Cloneable {
POOL_SIZE = UAC.POOLSIZE;
CONFIDENCE_THRESHOLD = UAC.CONFIDENCE_THRESHOLD;
MINIMUM_ALLELE_FREQUENCY = UAC.MINIMUM_ALLELE_FREQUENCY;
maxDeletionsInPileup = UAC.MAX_DELETIONS;
maxDeletionFractionInPileup = UAC.MAX_DELETION_FRACTION;
// disable if that's what is requested
if ( maxDeletionFractionInPileup < 0.0 || maxDeletionFractionInPileup > 1.0 )
maxDeletionFractionInPileup = -1.0;
assumedSingleSample = UAC.ASSUME_SINGLE_SAMPLE;
if ( UAC.VERBOSE != null ) {
try {
@ -163,17 +166,20 @@ public abstract class GenotypeCalculationModel implements Cloneable {
}
// check for deletions
if ( offset == -1 ) {
// are there too many deletions in the pileup?
if ( ++deletionsInPileup > maxDeletionsInPileup && maxDeletionsInPileup >= 0 )
return null;
}
if ( offset == -1 )
deletionsInPileup++;
// add the read to this sample's context
// note that bad bases are added to the context (for DoC calculations later)
myContext.add(read, offset);
}
// are there too many deletions in the pileup?
if ( maxDeletionFractionInPileup != -1.0 &&
(double)deletionsInPileup / (double)reads.size() > maxDeletionFractionInPileup )
return null;
return contexts;
}

View File

@ -56,17 +56,19 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
for (int i = 0; i < reads.size(); i++) {
// check for deletions
int offset = offsets.get(i);
if ( offset == -1 ) {
// are there too many deletions in the pileup?
if ( ++deletionsInPileup > maxDeletionsInPileup && maxDeletionsInPileup >= 0 )
return null;
}
if ( offset == -1 )
deletionsInPileup++;
// add the read to this sample's context
// note that bad bases are added to the context (for DoC calculations later)
pooledContext.add(reads.get(i), offset);
}
// are there too many deletions in the pileup?
if ( maxDeletionFractionInPileup != -1.0 &&
(double)deletionsInPileup / (double)reads.size() > maxDeletionFractionInPileup )
return null;
HashMap<String, AlignmentContextBySample> contexts = new HashMap<String, AlignmentContextBySample>();
contexts.put(POOL_SAMPLE_NAME, pooledContext);
return contexts;

View File

@ -76,8 +76,8 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "min_confidence_threshold", shortName = "confidence", doc = "The phred-scaled confidence threshold by which variants should be filtered", required = false)
public double CONFIDENCE_THRESHOLD = 0.0;
@Argument(fullName = "max_deletions", shortName = "deletions", doc = "Maximum reads with deletions spanning this locus for it to be callable [to disable, set to < 0; default:1]", required = false)
public Integer MAX_DELETIONS = 1;
@Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false)
public Double MAX_DELETION_FRACTION = 0.05;
@Argument(fullName = "max_coverage", shortName = "mc", doc = "Maximum reads at this locus for it to be callable; to disable, provide value < 1 [default:10,000]", required = false)
public Integer MAX_READS_IN_PILEUP = 10000;

View File

@ -47,7 +47,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1PointEM() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
Arrays.asList("90072e7f935357695f8f48c40ff6e7a4"));
Arrays.asList("b14783b877e4857366cf9b1f516de343"));
executeTest("testMultiSamplePilot1 - Point Estimate EM", spec);
}
@ -68,7 +68,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testPooled1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1,
Arrays.asList("cdbda4a455f11f75fa666e9317139f22"));
Arrays.asList("48f17694db2fc8744741e4eb73227e5b"));
executeTest("testPooled1", spec);
}
@ -81,7 +81,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("415f4a276005cad7fea0a8fcc3b8c3b5"));
Arrays.asList("e1467f309e407a037404535b07b7e969"));
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
}
@ -125,7 +125,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -bm empirical" +
" -vf GELI",
1,
Arrays.asList("f490a407c9c62d43eb83107e83c858f6"));
Arrays.asList("46acd5a5d7301129ee3c0c9a4ab10259"));
executeTest(String.format("testMultiTechnologies"), spec);
}
@ -162,7 +162,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void genotypeTest() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
testGeliLod5() + " -L 1:10,000,000-10,100,000 -bm empirical --genotype", 1,
Arrays.asList("45da29e3b1306d546a7b80c30c979ad4"));
Arrays.asList("db34a348abd13ee894780296e8a7e909"));
executeTest("genotypeTest", spec);
}