implementation of the truth allele, different cases for REF , HOMVAR, FILTERED and HET.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6051 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3f974c62e6
commit
082abfd84f
|
|
@ -1,8 +1,10 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.replication_validation;
|
package org.broadinstitute.sting.playground.gatk.walkers.replication_validation;
|
||||||
|
|
||||||
import com.google.common.collect.ArrayListMultimap;
|
import com.google.common.collect.ArrayListMultimap;
|
||||||
|
import com.sun.xml.internal.ws.client.BindingProviderProperties;
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import net.sf.samtools.SAMReadGroupRecord;
|
import net.sf.samtools.SAMReadGroupRecord;
|
||||||
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
|
|
@ -34,26 +36,33 @@ import java.util.regex.Pattern;
|
||||||
*/
|
*/
|
||||||
public class ReplicationValidationWalker extends LocusWalker<Integer, Long> implements TreeReducible<Long> {
|
public class ReplicationValidationWalker extends LocusWalker<Integer, Long> implements TreeReducible<Long> {
|
||||||
|
|
||||||
|
|
||||||
|
@Argument(shortName="refsample", fullName="reference_sample_name", doc="Reference sample name.", required=true)
|
||||||
|
String referenceSampleName;
|
||||||
|
|
||||||
@Argument(shortName="nsamples", fullName="number_of_samples", doc="Number of samples in the dataset (not counting the reference sample).", required=true)
|
@Argument(shortName="nsamples", fullName="number_of_samples", doc="Number of samples in the dataset (not counting the reference sample).", required=true)
|
||||||
int nSamples = -1;
|
int nSamples = -1;
|
||||||
|
|
||||||
@Argument(shortName="nchr", fullName="number_of_chromosomes", doc="Number of chromosomes per sample (in case you're not dealing with diploids). Default: 2.", required=false)
|
@Argument(shortName="nchr", fullName="number_of_chromosomes", doc="Number of chromosomes per sample (in case you're not dealing with diploids). Default: 2.", required=false)
|
||||||
int nChromosomes = 2;
|
int nChromosomes = 2;
|
||||||
|
|
||||||
@Argument(shortName="maxac", fullName="max_allele_count", doc="Max number of alleles expected in a site. Default: 2 * number of samples. Smaller numbers process faster.", required=false)
|
@Argument(shortName="maxac", fullName="max_allele_count", doc="Max number of alleles expected in a site. Smaller numbers process faster. Default: 2 * number of samples. ", required=false)
|
||||||
int overrideMaxAlleleCount = -1;
|
int overrideMaxAlleleCount = -1;
|
||||||
|
|
||||||
@Argument(shortName="maxqs", fullName="max_quality_score", doc="Max quality score to consider. Default: Q40. Smaller numbers process faster.", required=false)
|
@Argument(shortName="maxqs", fullName="max_quality_score", doc="Max quality score to consider. Smaller numbers process faster. Default: Q40.", required=false)
|
||||||
int maxQualityScore= 40;
|
int maxQualityScore= 40;
|
||||||
|
|
||||||
@Argument(shortName="prior", fullName="site_quality_prior", doc="Phred-Scaled prior quality of the site. Default: Q20.", required=false)
|
@Argument(shortName="prior", fullName="site_quality_prior", doc="Phred-Scaled prior quality of the site. Default: Q20.", required=false)
|
||||||
byte defaultPrior= 20;
|
byte defaultPrior= 20;
|
||||||
|
|
||||||
|
@Argument(shortName="ef", fullName="exclude_filtered_reference_sites", doc="Don't include in the analysis sites where the reference sample VCF is filtered. Default: false.", required=false)
|
||||||
|
boolean EXCLUDE_FILTERED_REFERENCE_SITES = false;
|
||||||
|
|
||||||
|
|
||||||
@Output(doc="Write output to this file instead of STDOUT")
|
@Output(doc="Write output to this file instead of STDOUT")
|
||||||
PrintStream out;
|
PrintStream out;
|
||||||
|
|
||||||
int maxAlleleCount;
|
int maxAlleleCount;
|
||||||
String referenceSampleName;
|
|
||||||
boolean USE_TRUTH_ROD;
|
boolean USE_TRUTH_ROD;
|
||||||
|
|
||||||
final String REFERENCE_ROD_NAME = "reference";
|
final String REFERENCE_ROD_NAME = "reference";
|
||||||
|
|
@ -145,6 +154,36 @@ public class ReplicationValidationWalker extends LocusWalker<Integer, Long> impl
|
||||||
}
|
}
|
||||||
|
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
|
||||||
|
// Get reference base from VCF or Reference
|
||||||
|
VariantContext referenceContext = tracker.getVariantContext(ref, REFERENCE_ROD_NAME, context.getLocation());
|
||||||
|
byte trueReferenceBase;
|
||||||
|
|
||||||
|
// Site is not a variant, take from the reference
|
||||||
|
if (referenceContext == null) {
|
||||||
|
trueReferenceBase = ref.getBase();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Site has a VCF entry -- is variant
|
||||||
|
else {
|
||||||
|
// Site is filtered, don't mess with it if option is set
|
||||||
|
if (referenceContext.isFiltered() && EXCLUDE_FILTERED_REFERENCE_SITES) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Site is HomVar -- easy, take the only alt allele.
|
||||||
|
else if (referenceContext.getGenotype(referenceSampleName).isHomVar()) {
|
||||||
|
trueReferenceBase = referenceContext.getAlternateAllele(0).getBases()[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: treat HET cases specially?
|
||||||
|
else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
ReadBackedPileup contextPileup = context.getBasePileup();
|
ReadBackedPileup contextPileup = context.getBasePileup();
|
||||||
Set<String> lanesInLocus = getLaneIDs(contextPileup.getReadGroups());
|
Set<String> lanesInLocus = getLaneIDs(contextPileup.getReadGroups());
|
||||||
for (String laneID : lanesInLocus) {
|
for (String laneID : lanesInLocus) {
|
||||||
|
|
@ -167,8 +206,6 @@ public class ReplicationValidationWalker extends LocusWalker<Integer, Long> impl
|
||||||
if (referenceSamplePileup != null && !referenceSamplePileup.isEmpty() && !poolPileup.isEmpty()) {
|
if (referenceSamplePileup != null && !referenceSamplePileup.isEmpty() && !poolPileup.isEmpty()) {
|
||||||
|
|
||||||
// Build error model
|
// Build error model
|
||||||
//TODO: Build a reference sample VCF and get true reference base from there.
|
|
||||||
byte trueReferenceBase = referenceSamplePileup.getBases()[0]; // THIS IS A QUICK FIX JUST TO BE ABLE TO TEST -- IT IS WRONG!
|
|
||||||
double [] errorModel = buildErrorModel(referenceSamplePileup.getBases(), trueReferenceBase, defaultPrior);
|
double [] errorModel = buildErrorModel(referenceSamplePileup.getBases(), trueReferenceBase, defaultPrior);
|
||||||
|
|
||||||
// Debug error model
|
// Debug error model
|
||||||
|
|
@ -185,10 +222,6 @@ public class ReplicationValidationWalker extends LocusWalker<Integer, Long> impl
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public Long reduceInit() {
|
public Long reduceInit() {
|
||||||
return 0l;
|
return 0l;
|
||||||
}
|
}
|
||||||
|
|
@ -197,10 +230,6 @@ public class ReplicationValidationWalker extends LocusWalker<Integer, Long> impl
|
||||||
return value + sum;
|
return value + sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Reduces two subtrees together. In this case, the implementation of the tree reduce
|
|
||||||
* is exactly the same as the implementation of the single reduce.
|
|
||||||
*/
|
|
||||||
public Long treeReduce(Long lhs, Long rhs) {
|
public Long treeReduce(Long lhs, Long rhs) {
|
||||||
return lhs + rhs;
|
return lhs + rhs;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue