Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Guillermo del Angel 2011-09-07 16:49:49 -04:00
commit 45d54f6258
15 changed files with 72 additions and 114 deletions

View File

@ -149,9 +149,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
@Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false) @Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50; protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;
@Argument(fullName="tranchesFile", shortName="tf", doc="The input tranches file describing where to cut the data", required=false)
private String TRANCHE_FILENAME = null;
@Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false) @Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false)
private File ancestralAlignmentsFile = null; private File ancestralAlignmentsFile = null;
@ -226,16 +223,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
} }
sampleNamesForStratification.add(ALL_SAMPLE_NAME); sampleNamesForStratification.add(ALL_SAMPLE_NAME);
// Add select expressions for anything in the tranches file
if ( TRANCHE_FILENAME != null ) {
// we are going to build a few select names automatically from the tranches file
for ( Tranche t : Tranche.readTranches(new File(TRANCHE_FILENAME)) ) {
logger.info("Adding select for all variant above the pCut of : " + t);
SELECT_EXPS.add(String.format(VariantRecalibrator.VQS_LOD_KEY + " >= %.2f", t.minVQSLod));
SELECT_NAMES.add(String.format("TS-%.2f", t.ts));
}
}
// Initialize select expressions // Initialize select expressions
for (VariantContextUtils.JexlVCMatchExp jexl : VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)) { for (VariantContextUtils.JexlVCMatchExp jexl : VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)) {
SortableJexlVCMatchExp sjexl = new SortableJexlVCMatchExp(jexl.name, jexl.exp); SortableJexlVCMatchExp sjexl = new SortableJexlVCMatchExp(jexl.name, jexl.exp);
@ -245,18 +232,13 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
// Initialize the set of stratifications and evaluations to use // Initialize the set of stratifications and evaluations to use
stratificationObjects = variantEvalUtils.initializeStratificationObjects(this, NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE); stratificationObjects = variantEvalUtils.initializeStratificationObjects(this, NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE);
Set<Class<? extends VariantEvaluator>> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE); Set<Class<? extends VariantEvaluator>> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE);
boolean usingJEXL = false;
for ( VariantStratifier vs : getStratificationObjects() ) { for ( VariantStratifier vs : getStratificationObjects() ) {
if ( vs.getClass().getSimpleName().equals("Filter") ) if ( vs.getClass().getSimpleName().equals("Filter") )
byFilterIsEnabled = true; byFilterIsEnabled = true;
else if ( vs.getClass().getSimpleName().equals("Sample") ) else if ( vs.getClass().getSimpleName().equals("Sample") )
perSampleIsEnabled = true; perSampleIsEnabled = true;
usingJEXL = usingJEXL || vs.getClass().equals(JexlExpression.class);
} }
if ( TRANCHE_FILENAME != null && ! usingJEXL )
throw new UserException.BadArgumentValue("tf", "Requires the JexlExpression ST to enabled");
// Initialize the evaluation contexts // Initialize the evaluation contexts
evaluationContexts = variantEvalUtils.initializeEvaluationContexts(stratificationObjects, evaluationObjects, null, null); evaluationContexts = variantEvalUtils.initializeEvaluationContexts(stratificationObjects, evaluationObjects, null, null);

View File

@ -75,7 +75,7 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
} }
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
boolean evalIsGood = eval != null && eval.isVariant(); boolean evalIsGood = eval != null && eval.isPolymorphic();
boolean compIsGood = comp != null && comp.isNotFiltered() && (eval == null || comp.getType() == eval.getType()); boolean compIsGood = comp != null && comp.isNotFiltered() && (eval == null || comp.getType() == eval.getType());
if (compIsGood) nCompVariants++; // count the number of comp events if (compIsGood) nCompVariants++; // count the number of comp events

View File

@ -100,11 +100,12 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
// So in order to maintain consistency with the previous implementation (and the intention of the original author), I've // So in order to maintain consistency with the previous implementation (and the intention of the original author), I've
// added in a proxy check for monomorphic status here. // added in a proxy check for monomorphic status here.
// Protect against case when vc only as no-calls too - can happen if we strafity by sample and sample as a single no-call. // Protect against case when vc only as no-calls too - can happen if we strafity by sample and sample as a single no-call.
if ( !vc1.isVariant() || (vc1.hasGenotypes() && vc1.getHomRefCount() + vc1.getNoCallCount() == vc1.getNSamples()) ) { if ( vc1.isMonomorphic() ) {
nRefLoci++; nRefLoci++;
} else { } else {
switch (vc1.getType()) { switch (vc1.getType()) {
case NO_VARIATION: case NO_VARIATION:
// shouldn't get here
break; break;
case SNP: case SNP:
nVariantLoci++; nVariantLoci++;

View File

@ -90,18 +90,19 @@ public class IndelLengthHistogram extends VariantEvaluator {
public int getComparisonOrder() { return 1; } // need only the evals public int getComparisonOrder() { return 1; } // need only the evals
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( ! vc1.isBiallelic() && vc1.isIndel() ) {
//veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored.");
return vc1.toString(); // biallelic sites are output
}
if ( vc1.isIndel() ) { if ( vc1.isIndel() && vc1.isPolymorphic() ) {
if ( ! vc1.isBiallelic() ) {
//veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored.");
return vc1.toString(); // biallelic sites are output
}
// only count simple insertions/deletions, not complex indels
if ( vc1.isSimpleInsertion() ) { if ( vc1.isSimpleInsertion() ) {
indelHistogram.update(vc1.getAlternateAllele(0).length()); indelHistogram.update(vc1.getAlternateAllele(0).length());
} else if ( vc1.isSimpleDeletion() ) { } else if ( vc1.isSimpleDeletion() ) {
indelHistogram.update(-vc1.getReference().length()); indelHistogram.update(-vc1.getReference().length());
} else {
throw new ReviewedStingException("Indel type that is not insertion or deletion.");
} }
} }

View File

@ -270,7 +270,7 @@ public class IndelStatistics extends VariantEvaluator {
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (eval != null ) { if (eval != null && eval.isPolymorphic()) {
if ( indelStats == null ) { if ( indelStats == null ) {
indelStats = new IndelStats(eval); indelStats = new IndelStats(eval);
} }

View File

@ -166,7 +166,7 @@ public class SimpleMetricsByAC extends VariantEvaluator implements StandardEval
} }
} }
if ( eval.isSNP() && eval.isBiallelic() && metrics != null ) { if ( eval.isSNP() && eval.isBiallelic() && eval.isPolymorphic() && metrics != null ) {
metrics.incrValue(eval); metrics.incrValue(eval);
} }
} }

View File

@ -37,77 +37,74 @@ public class ThetaVariantEvaluator extends VariantEvaluator {
} }
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (vc == null || !vc.isSNP() || !vc.hasGenotypes()) { if (vc == null || !vc.isSNP() || !vc.hasGenotypes() || vc.isMonomorphic()) {
return null; //no interesting sites return null; //no interesting sites
} }
if (vc.hasGenotypes()) { //this maps allele to a count
ConcurrentMap<String, Integer> alleleCounts = new ConcurrentHashMap<String, Integer>();
//this maps allele to a count int numHetsHere = 0;
ConcurrentMap<String, Integer> alleleCounts = new ConcurrentHashMap<String, Integer>(); float numGenosHere = 0;
int numIndsHere = 0;
int numHetsHere = 0; for (Genotype genotype : vc.getGenotypes().values()) {
float numGenosHere = 0; numIndsHere++;
int numIndsHere = 0; if (!genotype.isNoCall()) {
//increment stats for heterozygosity
if (genotype.isHet()) {
numHetsHere++;
}
for (Genotype genotype : vc.getGenotypes().values()) { numGenosHere++;
numIndsHere++; //increment stats for pairwise mismatches
if (!genotype.isNoCall()) {
//increment stats for heterozygosity
if (genotype.isHet()) {
numHetsHere++;
}
numGenosHere++; for (Allele allele : genotype.getAlleles()) {
//increment stats for pairwise mismatches if (allele.isNonNull() && allele.isCalled()) {
String alleleString = allele.toString();
for (Allele allele : genotype.getAlleles()) { alleleCounts.putIfAbsent(alleleString, 0);
if (allele.isNonNull() && allele.isCalled()) { alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);
String alleleString = allele.toString();
alleleCounts.putIfAbsent(alleleString, 0);
alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);
}
} }
} }
} }
if (numGenosHere > 0) { }
//only if have one called genotype at least if (numGenosHere > 0) {
this.numSites++; //only if have one called genotype at least
this.numSites++;
this.totalHet += numHetsHere / numGenosHere; this.totalHet += numHetsHere / numGenosHere;
//compute based on num sites //compute based on num sites
float harmonicFactor = 0; float harmonicFactor = 0;
for (int i = 1; i <= numIndsHere; i++) { for (int i = 1; i <= numIndsHere; i++) {
harmonicFactor += 1.0 / i; harmonicFactor += 1.0 / i;
} }
this.thetaRegionNumSites += 1.0 / harmonicFactor; this.thetaRegionNumSites += 1.0 / harmonicFactor;
//now compute pairwise mismatches //now compute pairwise mismatches
float numPairwise = 0; float numPairwise = 0;
float numDiffs = 0; float numDiffs = 0;
for (String allele1 : alleleCounts.keySet()) { for (String allele1 : alleleCounts.keySet()) {
int allele1Count = alleleCounts.get(allele1); int allele1Count = alleleCounts.get(allele1);
for (String allele2 : alleleCounts.keySet()) { for (String allele2 : alleleCounts.keySet()) {
if (allele1.compareTo(allele2) < 0) { if (allele1.compareTo(allele2) < 0) {
continue; continue;
} }
if (allele1 .compareTo(allele2) == 0) { if (allele1 .compareTo(allele2) == 0) {
numPairwise += allele1Count * (allele1Count - 1) * .5; numPairwise += allele1Count * (allele1Count - 1) * .5;
} }
else { else {
int allele2Count = alleleCounts.get(allele2); int allele2Count = alleleCounts.get(allele2);
numPairwise += allele1Count * allele2Count; numPairwise += allele1Count * allele2Count;
numDiffs += allele1Count * allele2Count; numDiffs += allele1Count * allele2Count;
}
} }
} }
}
if (numPairwise > 0) { if (numPairwise > 0) {
this.totalAvgDiffs += numDiffs / numPairwise; this.totalAvgDiffs += numDiffs / numPairwise;
}
} }
} }

View File

@ -40,7 +40,7 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
} }
public void updateTiTv(VariantContext vc, boolean updateStandard) { public void updateTiTv(VariantContext vc, boolean updateStandard) {
if (vc != null && vc.isSNP() && vc.isBiallelic()) { if (vc != null && vc.isSNP() && vc.isBiallelic() && vc.isPolymorphic()) {
if (VariantContextUtils.isTransition(vc)) { if (VariantContextUtils.isTransition(vc)) {
if (updateStandard) nTiInComp++; if (updateStandard) nTiInComp++;
else nTi++; else nTi++;

View File

@ -117,7 +117,8 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
public SiteStatus calcSiteStatus(VariantContext vc) { public SiteStatus calcSiteStatus(VariantContext vc) {
if ( vc == null ) return SiteStatus.NO_CALL; if ( vc == null ) return SiteStatus.NO_CALL;
if ( vc.isFiltered() ) return SiteStatus.FILTERED; if ( vc.isFiltered() ) return SiteStatus.FILTERED;
if ( ! vc.isVariant() ) return SiteStatus.MONO; if ( vc.isMonomorphic() ) return SiteStatus.MONO;
if ( vc.hasGenotypes() ) return SiteStatus.POLY; // must be polymorphic if isMonomorphic was false and there are genotypes
if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) { if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
int ac = 0; int ac = 0;
@ -132,8 +133,6 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
else else
ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY); ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY);
return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO; return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO;
} else if ( vc.hasGenotypes() ) {
return vc.isPolymorphic() ? SiteStatus.POLY : SiteStatus.MONO;
} else { } else {
return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED ? SiteStatus.POLY : SiteStatus.NO_CALL; // we can't figure out what to do return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED ? SiteStatus.POLY : SiteStatus.NO_CALL; // we can't figure out what to do
//return SiteStatus.NO_CALL; // we can't figure out what to do //return SiteStatus.NO_CALL; // we can't figure out what to do

View File

@ -232,7 +232,7 @@ public class VariantQualityScore extends VariantEvaluator {
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
final String interesting = null; final String interesting = null;
if( eval != null && eval.isSNP() && eval.isBiallelic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites) if( eval != null && eval.isSNP() && eval.isBiallelic() && eval.isPolymorphic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
if( titvStats == null ) { titvStats = new TiTvStats(); } if( titvStats == null ) { titvStats = new TiTvStats(); }
titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval)); titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval));

View File

@ -277,7 +277,7 @@ public class VariantEvalUtils {
* @return a new VariantContext with just the requested samples * @return a new VariantContext with just the requested samples
*/ */
public VariantContext getSubsetOfVariantContext(VariantContext vc, Collection<String> sampleNames) { public VariantContext getSubsetOfVariantContext(VariantContext vc, Collection<String> sampleNames) {
VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values()); VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values(), vc.getAlleles());
HashMap<String, Object> newAts = new HashMap<String, Object>(vcsub.getAttributes()); HashMap<String, Object> newAts = new HashMap<String, Object>(vcsub.getAttributes());

View File

@ -983,7 +983,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @return true if it's monomorphic * @return true if it's monomorphic
*/ */
public boolean isMonomorphic() { public boolean isMonomorphic() {
return ! isVariant() || getChromosomeCount(getReference()) == getChromosomeCount(); return ! isVariant() || (hasGenotypes() && getHomRefCount() + getNoCallCount() == getNSamples());
} }
/** /**

View File

@ -127,6 +127,7 @@ public class TraverseReadsUnitTest extends BaseTest {
Object accumulator = countReadWalker.reduceInit(); Object accumulator = countReadWalker.reduceInit();
while (shardStrategy.hasNext()) { while (shardStrategy.hasNext()) {
traversalEngine.startTimersIfNecessary();
Shard shard = shardStrategy.next(); Shard shard = shardStrategy.next();
if (shard == null) { if (shard == null) {

View File

@ -261,10 +261,10 @@ public class VariantEvalIntegrationTest extends WalkerTest {
return String.format("%s -select '%s' -selectName %s", cmd, select, name); return String.format("%s -select '%s' -selectName %s", cmd, select, name);
} }
@Test @Test(enabled = false) // no longer supported in the GATK
public void testTranches() { public void testTranches() {
String extraArgs = "-T VariantEval -R "+ hg18Reference +" --eval " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt"; String extraArgs = "-T VariantEval -R "+ hg18Reference +" --eval " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("984df6e94a546294fc7e0846cbac2dfe")); WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("6af2b9959aa1778a5b712536de453952"));
executeTestParallel("testTranches",spec); executeTestParallel("testTranches",spec);
} }

View File

@ -37,11 +37,6 @@ class DataProcessingPipeline extends QScript {
* Optional Parameters * Optional Parameters
****************************************************************************/ ****************************************************************************/
// @Input(doc="path to Picard's SortSam.jar (if re-aligning a previously processed BAM file)", fullName="path_to_sort_jar", shortName="sort", required=false)
// var sortSamJar: File = _
//
@Input(doc="extra VCF files to use as reference indels for Indel Realignment", fullName="extra_indels", shortName="indels", required=false) @Input(doc="extra VCF files to use as reference indels for Indel Realignment", fullName="extra_indels", shortName="indels", required=false)
var indels: List[File] = List() var indels: List[File] = List()
@ -132,24 +127,6 @@ class DataProcessingPipeline extends QScript {
} }
} }
return sampleTable.toMap return sampleTable.toMap
// println("\n\n*** INPUT FILES ***\n")
// // Creating one file for each sample in the dataset
// val sampleBamFiles = scala.collection.mutable.Map.empty[String, File]
// for ((sample, flist) <- sampleTable) {
//
// println(sample + ":")
// for (f <- flist)
// println (f)
// println()
//
// val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".list")
// sampleBamFiles(sample) = sampleFileName
// //add(writeList(flist, sampleFileName))
// }
// println("*** INPUT FILES ***\n\n")
//
// return sampleBamFiles.toMap
} }
// Rebuilds the Read Group string to give BWA // Rebuilds the Read Group string to give BWA