The recalibrator now uses all input RODs when looking for known polymorphic sites not just the one named dbsnp. Added an integration test which uses both dbsnp and an input vcf file and skips over the union of the two.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2564 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
16777e3875
commit
189829841b
|
|
@ -125,7 +125,7 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
}
|
||||
|
||||
/**
|
||||
* given a existing file, open it and append all the valid triplet lines to an existing list
|
||||
* given an existing file, open it and append all the valid triplet lines to an existing list
|
||||
*
|
||||
* @param rodTripletList the list of existing triplets
|
||||
* @param filename the file to attempt to extract ROD triplets from
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.walkers.*;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.RODRecordList;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariationRod;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||
|
|
@ -103,7 +104,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
private long solidInsertedReferenceBases = 0; // Number of bases where we believe SOLID has inserted the reference because the color space is inconsistent with the read base
|
||||
private long otherColorSpaceInconsistency = 0; // Number of bases where the color space is inconsistent with the read but the reference wasn't inserted.
|
||||
private int numUnprocessed = 0; // Number of consecutive loci skipped because we are only processing every Nth site
|
||||
private static final String versionString = "v2.2.4"; // Major version, minor version, and build number
|
||||
private static final String versionString = "v2.2.5"; // Major version, minor version, and build number
|
||||
private Pair<Long, Long> dbSNP_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for dbSNP loci
|
||||
private Pair<Long, Long> novel_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for non-dbSNP loci
|
||||
private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878)
|
||||
|
|
@ -149,8 +150,9 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
// Warn the user if no dbSNP file was specified
|
||||
boolean foundDBSNP = false;
|
||||
for( ReferenceOrderedDataSource rod : this.getToolkit().getRodDataSources() ) {
|
||||
if( rod.getName().equalsIgnoreCase( "dbsnp" ) ) {
|
||||
if( rod != null ) {
|
||||
foundDBSNP = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( !foundDBSNP ) {
|
||||
|
|
@ -232,15 +234,12 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
*/
|
||||
public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||
|
||||
// Pull out anything passed by -B name,type,file that has the name "dbsnp"
|
||||
final RODRecordList<ReferenceOrderedDatum> dbsnpRODs = tracker.getTrackData( "dbsnp", null );
|
||||
// Pull out data for this locus for all the input RODs and check if this is a known variant site in any of them
|
||||
boolean isSNP = false;
|
||||
if (dbsnpRODs != null) {
|
||||
for( ReferenceOrderedDatum rod : dbsnpRODs ) {
|
||||
if( ((Variation)rod).isSNP() ) {
|
||||
isSNP = true; // At least one of the rods says this is a snp site
|
||||
break;
|
||||
}
|
||||
for( ReferenceOrderedDatum rod : tracker.getAllRods() ) {
|
||||
if( rod != null && rod instanceof Variation && ((Variation)rod).isSNP() ) {
|
||||
isSNP = true; // At least one of the rods says this is a snp site
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -133,6 +133,34 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCountCovariatesVCFPlusDBsnp() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "cc1cc9c1ff184d388d81574fdccc608e");
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
String md5 = entry.getValue();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +
|
||||
" -B anyNameABCD,VCF," + validationDataLocation + "vcfexample3.vcf" +
|
||||
" -T CountCovariates" +
|
||||
" -I " + bam +
|
||||
" --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" +
|
||||
" -L 1:10,000,000-10,200,000" +
|
||||
" -cov ReadGroupCovariate" +
|
||||
" -cov QualityScoreCovariate" +
|
||||
" -cov CycleCovariate" +
|
||||
" -cov DinucCovariate" +
|
||||
" --solid_recal_mode SET_Q_ZERO" +
|
||||
" -recalFile %s",
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
executeTest("testCountCovariatesVCFPlusDBsnp", spec);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCountCovariatesNoReadGroups() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
|
|
|
|||
Loading…
Reference in New Issue