Remove the check in the Unified Genotyper for hitting the max reads at locus value. Instead, simply add a flag to the INFO field if any of the samples has been downsampled. 95% hooked up.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4126 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-08-26 05:50:47 +00:00
parent e06b2c90ef
commit 45d895dcf4
3 changed files with 28 additions and 21 deletions

View File

@ -142,6 +142,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
headerInfo.add(new VCFInfoHeaderLine(dbSet.getValue(), 0, VCFHeaderLineType.Flag, (dbSet.getKey().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ? "dbSNP" : dbSet.getValue()) + " Membership"));
if ( !UAC.NO_SLOD )
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.STRAND_BIAS_KEY, 1, VCFHeaderLineType.Float, "Strand Bias"));
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.DOWNSAMPLED_KEY, 0, VCFHeaderLineType.Flag, "Were any of the samples downsampled?"));
// FORMAT and INFO fields
headerInfo.addAll(VCFUtils.getSupportedHeaderStrings());

View File

@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.filters.BadMateFilter;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
@ -132,10 +133,6 @@ public class UnifiedGenotyperEngine {
if ( !BaseUtils.isRegularBase(ref) )
return null;
// don't try to call if we couldn't read in all reads at this locus (since it wasn't properly downsampled)
if ( rawContext.hasExceededMaxPileup() )
return null;
VariantCallContext call;
BadReadPileupFilter badReadPileupFilter = new BadReadPileupFilter(refContext);
@ -198,7 +195,16 @@ public class UnifiedGenotyperEngine {
}
}
if ( call != null ) call.setRefBase(ref);
if ( call != null && call.vc != null ) {
call.setRefBase(ref);
// if the site was downsampled, record that fact
if ( false ) { //rawContext.hasPileupBeenDownsampled() ) {
Map<String, Object> attrs = new HashMap<String, Object>(call.vc.getAttributes());
attrs.put(VCFConstants.DOWNSAMPLED_KEY, true);
VariantContextUtils.modifyAttributes(call.vc, attrs);
}
}
return call;
}

View File

@ -24,7 +24,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("99ff7d7031cc5038ec95f7872311c6b5"));
Arrays.asList("42f589f8743ec16e72a4697c728502ed"));
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
}
@ -32,7 +32,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,050,000", 1,
Arrays.asList("497ac35ab217d5c0c21ee7733219ef5d"));
Arrays.asList("32ba9f34185a0aec3107efafe5130556"));
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
}
@ -40,7 +40,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("29670c1c6ae4a1e7fc27c4d78c434a72"));
Arrays.asList("3d9de73b764a55deac6a956c56c46373"));
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
}
@ -52,7 +52,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testParallelization() {
String md5 = "c6a4347807f624c2a31fb95080ed68b8";
String md5 = "6fdddf70e8320e04dba50a9ed0f26854";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
@ -79,11 +79,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testParameter() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "-genotype", "fefaff399971f080c1a5393ea7f6d5d1" );
e.put( "-all_bases", "300decbc3ebd4022da1045cb6c1bdbbb" );
e.put( "--min_base_quality_score 26", "3fd8b652f2fca1dbd9156aec5978a90c" );
e.put( "--min_mapping_quality_score 26", "7d50c7cd55b3a9dee6aca3980c4612a8" );
e.put( "--max_mismatches_in_40bp_window 5", "3e347f056ed9cf2e64f2060771244a6c" );
e.put( "-genotype", "8428439ee41ea0e024fcfe1c267b5e2d" );
e.put( "-all_bases", "b8fd0a213362743b174bd2aeba7d0f8c" );
e.put( "--min_base_quality_score 26", "be88f3992f7da095e788ff372bf94190" );
e.put( "--min_mapping_quality_score 26", "266e9f95bd577a3b8ebdb5d66925a74d" );
e.put( "--max_mismatches_in_40bp_window 5", "61d7de457fd591344a7f8561aeb816a0" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -97,12 +97,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
Arrays.asList("0937f45888cab1aacfa129d45d82384f"));
Arrays.asList("758555ebda5a145e21edd3cba5193817"));
executeTest("testConfidence1", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
Arrays.asList("99419b1852b744092bc7418747dc78f3"));
Arrays.asList("291e2b7d0f8c3b262306c18d61782711"));
executeTest("testConfidence2", spec2);
}
@ -114,8 +114,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testHeterozyosity() {
HashMap<Double, String> e = new HashMap<Double, String>();
e.put( 0.01, "f64f88e8c085ff21db1570bb5cd74f8a" );
e.put( 1.0 / 1850, "0804d411ed7cf77c79e1ff540b8b85f1" );
e.put( 0.01, "61a54794bc262479a981b6eb83ce6243" );
e.put( 1.0 / 1850, "b34d30518143ea407e6570eff101f708" );
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -134,8 +134,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testOtherBaseCallModel() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "one_state", "a2b9f07c406ff60b81b42a8d5da45f10" );
e.put( "three_state", "5a1b3c215e18686a4254849efcb45c40" );
e.put( "one_state", "97d22192536b1efe0163c062ecf64612" );
e.put( "three_state", "fd48388392b877afcfa28507067fbe63" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -158,7 +158,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("ce22e33e550ea62d8786deb38a70049a"));
Arrays.asList("bb112d6f907608a0e8de37cd0c887956"));
executeTest(String.format("testMultiTechnologies"), spec);
}