Updated the min mapping quality and min base quality defaults to be 10 in both cases (and updated all integration tests) as suggested by Mark.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2494 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-01-03 21:31:04 +00:00
parent b6ecc9e151
commit b668d32cf1
3 changed files with 27 additions and 27 deletions

View File

@ -38,7 +38,7 @@ import org.broadinstitute.sting.utils.cmdLine.Argument;
public class MappingQualityReadFilter implements SamRecordFilter {
@Argument(fullName = "min_mapping_quality_score", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false)
public int MIN_MAPPING_QUALTY_SCORE = 30;
public int MIN_MAPPING_QUALTY_SCORE = 10;
public boolean filterOut(SAMRecord rec) {
return (rec.getMappingQuality() < MIN_MAPPING_QUALTY_SCORE);

View File

@ -72,6 +72,12 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "min_confidence_threshold", shortName = "confidence", doc = "The phred-scaled confidence threshold by which variants should be filtered", required = false)
public double CONFIDENCE_THRESHOLD = 0.0;
@Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false)
public int MIN_BASE_QUALTY_SCORE = 20;
@Argument(fullName = "max_mismatches_in_40bp_window", shortName = "mm40", doc = "Maximum number of mismatches within a 40 bp window (20bp on either side) around the target position for a read to be used for calling", required = false)
public int MAX_MISMATCHES = 3;
@Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false)
public Double MAX_DELETION_FRACTION = 0.05;
@ -80,10 +86,4 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "min_allele_frequency", shortName = "min_freq", doc = "The minimum possible allele frequency in a population (advanced)", required = false)
public double MINIMUM_ALLELE_FREQUENCY = 1e-8;
@Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false)
public int MIN_BASE_QUALTY_SCORE = 20;
@Argument(fullName = "max_mismatches_in_40bp_window", shortName = "mm40", doc = "Maximum number of mismatches within a 40 bp window (20bp on either side) around the target position for a read to be used for calling", required = false)
public int MAX_MISMATCHES = 3;
}

View File

@ -22,7 +22,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1PointEM() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
Arrays.asList("94c6c400cbeae33fcd6fea3388fcf73a"));
Arrays.asList("8acbb0879a7c9f35e7cf7d77a7cab850"));
executeTest("testMultiSamplePilot1 - Point Estimate EM", spec);
}
@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot2PointEM() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
Arrays.asList("ee14f4328fde95b35e3b1cb919c3712b"));
Arrays.asList("a5ec48e6695d9a5e7fcced7775baf6ff"));
executeTest("testMultiSamplePilot2 - Point Estimate EM", spec);
}
@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testPooled1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1,
Arrays.asList("68a4120d7dc9f1880f41311f095978ea"));
Arrays.asList("c20f370440ed55b0b6c38715985a850d"));
executeTest("testPooled1", spec);
}
@ -56,7 +56,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("4504dd9c77dc502e9acbe687063a82c7"));
Arrays.asList("0a2df3fc9fc72a3ace46aab899275636"));
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
}
@ -64,7 +64,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("15fefcebedae65c1f0c94b8498bc647a"));
Arrays.asList("52249b69d5d984a8bdc0438e1f106dcb"));
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
}
@ -72,7 +72,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("d87b46694da0cc8b0ff82c1c69ee073f"));
Arrays.asList("172ca88b718f24fa0f2a3460b2dab33d"));
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
}
@ -85,7 +85,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testParallelization() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,400,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30 -nt 4", 1,
Arrays.asList("bcbdd0369a0621d40bbdd6ef4c13f057"));
Arrays.asList("d86d9148b1eee16da9a5bb57c929c0c9"));
executeTest("test parallelization", spec);
}
@ -98,11 +98,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testParameter() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "-genotype", "990d3e9b63310f56bf781959763804ae" );
e.put( "-all_bases", "6f24401c4b82b270739d596077da8582" );
e.put( "--min_base_quality_score 10", "2a53a3889fe1c32b066228f749ab4790" );
e.put( "--min_mapping_quality_score 10", "224c962fc6178059ae36ed9a4d614d26" );
e.put( "--max_mismatches_in_40bp_window 5", "fa8dd3c00d36ca62a88b5ceeb50ee33b" );
e.put( "-genotype", "2a8b8d4da5ceed494e2210b15c2c3eb0" );
e.put( "-all_bases", "69b20ce7fc18f78011ee02b48a8d69aa" );
e.put( "--min_base_quality_score 26", "67175e0262e626d46aaf3bc8680cf888" );
e.put( "--min_mapping_quality_score 26", "8edb24206529a7a344008280ab69cbb2" );
e.put( "--max_mismatches_in_40bp_window 5", "870812ed23b2c656439dc1c8e6376f89" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -116,7 +116,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 10 ", 1,
Arrays.asList("13aad04333ef26eca6179221acf8abc0"));
Arrays.asList("eb708a714a0ecbebed707f2d9684e165"));
executeTest("testConfidence", spec);
}
@ -129,8 +129,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testOtherFormat() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "GLF", "8c72131dfb2b830efb9938a582672a3e" );
e.put( "GELI_BINARY", "46162567eac3a5004f5f9b4c93d1b8d3" );
e.put( "GLF", "2dabd6313cbf65564933187511bf823e" );
e.put( "GELI_BINARY", "e0af9777893c4b872b9ad867f0a2758c" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -152,8 +152,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testHeterozyosity() {
HashMap<Double, String> e = new HashMap<Double, String>();
e.put( 0.01, "700e6426c4142c823f7ac1dde2aa19ea" );
e.put( 1.0 / 1850, "e9e00bdb32ce63420988956c1a9b805f" );
e.put( 0.01, "0556dc160a18eeaf1a7303d5c1e1ce46" );
e.put( 1.0 / 1850, "0556dc160a18eeaf1a7303d5c1e1ce46" );
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -172,8 +172,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testOtherBaseCallModel() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "one_state", "d69abadc3bf861d621017c0e41b87b0a" );
e.put( "three_state", "ebcc76cc4579393f98aecb59bdc56507" );
e.put( "one_state", "78c4cfdd9ef21650709de5355178449a" );
e.put( "three_state", "8ae5cebc06d520b1277b2f71265dd7e0" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -200,7 +200,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -gm JOINT_ESTIMATE" +
" -vf GELI",
1,
Arrays.asList("64ffb4ef633ad4c2ff6afbc75450f743"));
Arrays.asList("1eabfec6a377992edb847d84608ab562"));
executeTest(String.format("testMultiTechnologies"), spec);
}