Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
45d54f6258
|
|
@ -149,9 +149,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
@Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
|
||||
protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;
|
||||
|
||||
@Argument(fullName="tranchesFile", shortName="tf", doc="The input tranches file describing where to cut the data", required=false)
|
||||
private String TRANCHE_FILENAME = null;
|
||||
|
||||
@Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false)
|
||||
private File ancestralAlignmentsFile = null;
|
||||
|
||||
|
|
@ -226,16 +223,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
}
|
||||
sampleNamesForStratification.add(ALL_SAMPLE_NAME);
|
||||
|
||||
// Add select expressions for anything in the tranches file
|
||||
if ( TRANCHE_FILENAME != null ) {
|
||||
// we are going to build a few select names automatically from the tranches file
|
||||
for ( Tranche t : Tranche.readTranches(new File(TRANCHE_FILENAME)) ) {
|
||||
logger.info("Adding select for all variant above the pCut of : " + t);
|
||||
SELECT_EXPS.add(String.format(VariantRecalibrator.VQS_LOD_KEY + " >= %.2f", t.minVQSLod));
|
||||
SELECT_NAMES.add(String.format("TS-%.2f", t.ts));
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize select expressions
|
||||
for (VariantContextUtils.JexlVCMatchExp jexl : VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)) {
|
||||
SortableJexlVCMatchExp sjexl = new SortableJexlVCMatchExp(jexl.name, jexl.exp);
|
||||
|
|
@ -245,18 +232,13 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
// Initialize the set of stratifications and evaluations to use
|
||||
stratificationObjects = variantEvalUtils.initializeStratificationObjects(this, NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE);
|
||||
Set<Class<? extends VariantEvaluator>> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE);
|
||||
boolean usingJEXL = false;
|
||||
for ( VariantStratifier vs : getStratificationObjects() ) {
|
||||
if ( vs.getClass().getSimpleName().equals("Filter") )
|
||||
byFilterIsEnabled = true;
|
||||
else if ( vs.getClass().getSimpleName().equals("Sample") )
|
||||
perSampleIsEnabled = true;
|
||||
usingJEXL = usingJEXL || vs.getClass().equals(JexlExpression.class);
|
||||
}
|
||||
|
||||
if ( TRANCHE_FILENAME != null && ! usingJEXL )
|
||||
throw new UserException.BadArgumentValue("tf", "Requires the JexlExpression ST to enabled");
|
||||
|
||||
// Initialize the evaluation contexts
|
||||
evaluationContexts = variantEvalUtils.initializeEvaluationContexts(stratificationObjects, evaluationObjects, null, null);
|
||||
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
|
|||
}
|
||||
|
||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
boolean evalIsGood = eval != null && eval.isVariant();
|
||||
boolean evalIsGood = eval != null && eval.isPolymorphic();
|
||||
boolean compIsGood = comp != null && comp.isNotFiltered() && (eval == null || comp.getType() == eval.getType());
|
||||
|
||||
if (compIsGood) nCompVariants++; // count the number of comp events
|
||||
|
|
|
|||
|
|
@ -100,11 +100,12 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
|||
// So in order to maintain consistency with the previous implementation (and the intention of the original author), I've
|
||||
// added in a proxy check for monomorphic status here.
|
||||
// Protect against case when vc only as no-calls too - can happen if we strafity by sample and sample as a single no-call.
|
||||
if ( !vc1.isVariant() || (vc1.hasGenotypes() && vc1.getHomRefCount() + vc1.getNoCallCount() == vc1.getNSamples()) ) {
|
||||
if ( vc1.isMonomorphic() ) {
|
||||
nRefLoci++;
|
||||
} else {
|
||||
switch (vc1.getType()) {
|
||||
case NO_VARIATION:
|
||||
// shouldn't get here
|
||||
break;
|
||||
case SNP:
|
||||
nVariantLoci++;
|
||||
|
|
|
|||
|
|
@ -90,18 +90,19 @@ public class IndelLengthHistogram extends VariantEvaluator {
|
|||
public int getComparisonOrder() { return 1; } // need only the evals
|
||||
|
||||
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( ! vc1.isBiallelic() && vc1.isIndel() ) {
|
||||
//veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored.");
|
||||
return vc1.toString(); // biallelic sites are output
|
||||
}
|
||||
|
||||
if ( vc1.isIndel() ) {
|
||||
if ( vc1.isIndel() && vc1.isPolymorphic() ) {
|
||||
|
||||
if ( ! vc1.isBiallelic() ) {
|
||||
//veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored.");
|
||||
return vc1.toString(); // biallelic sites are output
|
||||
}
|
||||
|
||||
// only count simple insertions/deletions, not complex indels
|
||||
if ( vc1.isSimpleInsertion() ) {
|
||||
indelHistogram.update(vc1.getAlternateAllele(0).length());
|
||||
} else if ( vc1.isSimpleDeletion() ) {
|
||||
indelHistogram.update(-vc1.getReference().length());
|
||||
} else {
|
||||
throw new ReviewedStingException("Indel type that is not insertion or deletion.");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -270,7 +270,7 @@ public class IndelStatistics extends VariantEvaluator {
|
|||
|
||||
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
||||
if (eval != null ) {
|
||||
if (eval != null && eval.isPolymorphic()) {
|
||||
if ( indelStats == null ) {
|
||||
indelStats = new IndelStats(eval);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -166,7 +166,7 @@ public class SimpleMetricsByAC extends VariantEvaluator implements StandardEval
|
|||
}
|
||||
}
|
||||
|
||||
if ( eval.isSNP() && eval.isBiallelic() && metrics != null ) {
|
||||
if ( eval.isSNP() && eval.isBiallelic() && eval.isPolymorphic() && metrics != null ) {
|
||||
metrics.incrValue(eval);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,77 +37,74 @@ public class ThetaVariantEvaluator extends VariantEvaluator {
|
|||
}
|
||||
|
||||
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (vc == null || !vc.isSNP() || !vc.hasGenotypes()) {
|
||||
if (vc == null || !vc.isSNP() || !vc.hasGenotypes() || vc.isMonomorphic()) {
|
||||
return null; //no interesting sites
|
||||
}
|
||||
|
||||
if (vc.hasGenotypes()) {
|
||||
//this maps allele to a count
|
||||
ConcurrentMap<String, Integer> alleleCounts = new ConcurrentHashMap<String, Integer>();
|
||||
|
||||
//this maps allele to a count
|
||||
ConcurrentMap<String, Integer> alleleCounts = new ConcurrentHashMap<String, Integer>();
|
||||
int numHetsHere = 0;
|
||||
float numGenosHere = 0;
|
||||
int numIndsHere = 0;
|
||||
|
||||
int numHetsHere = 0;
|
||||
float numGenosHere = 0;
|
||||
int numIndsHere = 0;
|
||||
for (Genotype genotype : vc.getGenotypes().values()) {
|
||||
numIndsHere++;
|
||||
if (!genotype.isNoCall()) {
|
||||
//increment stats for heterozygosity
|
||||
if (genotype.isHet()) {
|
||||
numHetsHere++;
|
||||
}
|
||||
|
||||
for (Genotype genotype : vc.getGenotypes().values()) {
|
||||
numIndsHere++;
|
||||
if (!genotype.isNoCall()) {
|
||||
//increment stats for heterozygosity
|
||||
if (genotype.isHet()) {
|
||||
numHetsHere++;
|
||||
}
|
||||
numGenosHere++;
|
||||
//increment stats for pairwise mismatches
|
||||
|
||||
numGenosHere++;
|
||||
//increment stats for pairwise mismatches
|
||||
|
||||
for (Allele allele : genotype.getAlleles()) {
|
||||
if (allele.isNonNull() && allele.isCalled()) {
|
||||
String alleleString = allele.toString();
|
||||
alleleCounts.putIfAbsent(alleleString, 0);
|
||||
alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);
|
||||
}
|
||||
for (Allele allele : genotype.getAlleles()) {
|
||||
if (allele.isNonNull() && allele.isCalled()) {
|
||||
String alleleString = allele.toString();
|
||||
alleleCounts.putIfAbsent(alleleString, 0);
|
||||
alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (numGenosHere > 0) {
|
||||
//only if have one called genotype at least
|
||||
this.numSites++;
|
||||
}
|
||||
if (numGenosHere > 0) {
|
||||
//only if have one called genotype at least
|
||||
this.numSites++;
|
||||
|
||||
this.totalHet += numHetsHere / numGenosHere;
|
||||
this.totalHet += numHetsHere / numGenosHere;
|
||||
|
||||
//compute based on num sites
|
||||
float harmonicFactor = 0;
|
||||
for (int i = 1; i <= numIndsHere; i++) {
|
||||
harmonicFactor += 1.0 / i;
|
||||
}
|
||||
this.thetaRegionNumSites += 1.0 / harmonicFactor;
|
||||
//compute based on num sites
|
||||
float harmonicFactor = 0;
|
||||
for (int i = 1; i <= numIndsHere; i++) {
|
||||
harmonicFactor += 1.0 / i;
|
||||
}
|
||||
this.thetaRegionNumSites += 1.0 / harmonicFactor;
|
||||
|
||||
//now compute pairwise mismatches
|
||||
float numPairwise = 0;
|
||||
float numDiffs = 0;
|
||||
for (String allele1 : alleleCounts.keySet()) {
|
||||
int allele1Count = alleleCounts.get(allele1);
|
||||
//now compute pairwise mismatches
|
||||
float numPairwise = 0;
|
||||
float numDiffs = 0;
|
||||
for (String allele1 : alleleCounts.keySet()) {
|
||||
int allele1Count = alleleCounts.get(allele1);
|
||||
|
||||
for (String allele2 : alleleCounts.keySet()) {
|
||||
if (allele1.compareTo(allele2) < 0) {
|
||||
continue;
|
||||
}
|
||||
if (allele1 .compareTo(allele2) == 0) {
|
||||
numPairwise += allele1Count * (allele1Count - 1) * .5;
|
||||
for (String allele2 : alleleCounts.keySet()) {
|
||||
if (allele1.compareTo(allele2) < 0) {
|
||||
continue;
|
||||
}
|
||||
if (allele1 .compareTo(allele2) == 0) {
|
||||
numPairwise += allele1Count * (allele1Count - 1) * .5;
|
||||
|
||||
}
|
||||
else {
|
||||
int allele2Count = alleleCounts.get(allele2);
|
||||
numPairwise += allele1Count * allele2Count;
|
||||
numDiffs += allele1Count * allele2Count;
|
||||
}
|
||||
}
|
||||
else {
|
||||
int allele2Count = alleleCounts.get(allele2);
|
||||
numPairwise += allele1Count * allele2Count;
|
||||
numDiffs += allele1Count * allele2Count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (numPairwise > 0) {
|
||||
this.totalAvgDiffs += numDiffs / numPairwise;
|
||||
}
|
||||
if (numPairwise > 0) {
|
||||
this.totalAvgDiffs += numDiffs / numPairwise;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
|
|||
}
|
||||
|
||||
public void updateTiTv(VariantContext vc, boolean updateStandard) {
|
||||
if (vc != null && vc.isSNP() && vc.isBiallelic()) {
|
||||
if (vc != null && vc.isSNP() && vc.isBiallelic() && vc.isPolymorphic()) {
|
||||
if (VariantContextUtils.isTransition(vc)) {
|
||||
if (updateStandard) nTiInComp++;
|
||||
else nTi++;
|
||||
|
|
|
|||
|
|
@ -117,7 +117,8 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
|
|||
public SiteStatus calcSiteStatus(VariantContext vc) {
|
||||
if ( vc == null ) return SiteStatus.NO_CALL;
|
||||
if ( vc.isFiltered() ) return SiteStatus.FILTERED;
|
||||
if ( ! vc.isVariant() ) return SiteStatus.MONO;
|
||||
if ( vc.isMonomorphic() ) return SiteStatus.MONO;
|
||||
if ( vc.hasGenotypes() ) return SiteStatus.POLY; // must be polymorphic if isMonomorphic was false and there are genotypes
|
||||
|
||||
if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
|
||||
int ac = 0;
|
||||
|
|
@ -132,8 +133,6 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
|
|||
else
|
||||
ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY);
|
||||
return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO;
|
||||
} else if ( vc.hasGenotypes() ) {
|
||||
return vc.isPolymorphic() ? SiteStatus.POLY : SiteStatus.MONO;
|
||||
} else {
|
||||
return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED ? SiteStatus.POLY : SiteStatus.NO_CALL; // we can't figure out what to do
|
||||
//return SiteStatus.NO_CALL; // we can't figure out what to do
|
||||
|
|
|
|||
|
|
@ -232,7 +232,7 @@ public class VariantQualityScore extends VariantEvaluator {
|
|||
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
final String interesting = null;
|
||||
|
||||
if( eval != null && eval.isSNP() && eval.isBiallelic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
|
||||
if( eval != null && eval.isSNP() && eval.isBiallelic() && eval.isPolymorphic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
|
||||
if( titvStats == null ) { titvStats = new TiTvStats(); }
|
||||
titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval));
|
||||
|
||||
|
|
|
|||
|
|
@ -277,7 +277,7 @@ public class VariantEvalUtils {
|
|||
* @return a new VariantContext with just the requested samples
|
||||
*/
|
||||
public VariantContext getSubsetOfVariantContext(VariantContext vc, Collection<String> sampleNames) {
|
||||
VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values());
|
||||
VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values(), vc.getAlleles());
|
||||
|
||||
HashMap<String, Object> newAts = new HashMap<String, Object>(vcsub.getAttributes());
|
||||
|
||||
|
|
|
|||
|
|
@ -983,7 +983,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @return true if it's monomorphic
|
||||
*/
|
||||
public boolean isMonomorphic() {
|
||||
return ! isVariant() || getChromosomeCount(getReference()) == getChromosomeCount();
|
||||
return ! isVariant() || (hasGenotypes() && getHomRefCount() + getNoCallCount() == getNSamples());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -127,6 +127,7 @@ public class TraverseReadsUnitTest extends BaseTest {
|
|||
Object accumulator = countReadWalker.reduceInit();
|
||||
|
||||
while (shardStrategy.hasNext()) {
|
||||
traversalEngine.startTimersIfNecessary();
|
||||
Shard shard = shardStrategy.next();
|
||||
|
||||
if (shard == null) {
|
||||
|
|
|
|||
|
|
@ -261,10 +261,10 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
return String.format("%s -select '%s' -selectName %s", cmd, select, name);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(enabled = false) // no longer supported in the GATK
|
||||
public void testTranches() {
|
||||
String extraArgs = "-T VariantEval -R "+ hg18Reference +" --eval " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("984df6e94a546294fc7e0846cbac2dfe"));
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("6af2b9959aa1778a5b712536de453952"));
|
||||
executeTestParallel("testTranches",spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -37,11 +37,6 @@ class DataProcessingPipeline extends QScript {
|
|||
* Optional Parameters
|
||||
****************************************************************************/
|
||||
|
||||
|
||||
// @Input(doc="path to Picard's SortSam.jar (if re-aligning a previously processed BAM file)", fullName="path_to_sort_jar", shortName="sort", required=false)
|
||||
// var sortSamJar: File = _
|
||||
//
|
||||
|
||||
@Input(doc="extra VCF files to use as reference indels for Indel Realignment", fullName="extra_indels", shortName="indels", required=false)
|
||||
var indels: List[File] = List()
|
||||
|
||||
|
|
@ -132,24 +127,6 @@ class DataProcessingPipeline extends QScript {
|
|||
}
|
||||
}
|
||||
return sampleTable.toMap
|
||||
|
||||
// println("\n\n*** INPUT FILES ***\n")
|
||||
// // Creating one file for each sample in the dataset
|
||||
// val sampleBamFiles = scala.collection.mutable.Map.empty[String, File]
|
||||
// for ((sample, flist) <- sampleTable) {
|
||||
//
|
||||
// println(sample + ":")
|
||||
// for (f <- flist)
|
||||
// println (f)
|
||||
// println()
|
||||
//
|
||||
// val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".list")
|
||||
// sampleBamFiles(sample) = sampleFileName
|
||||
// //add(writeList(flist, sampleFileName))
|
||||
// }
|
||||
// println("*** INPUT FILES ***\n\n")
|
||||
//
|
||||
// return sampleBamFiles.toMap
|
||||
}
|
||||
|
||||
// Rebuilds the Read Group string to give BWA
|
||||
|
|
|
|||
Loading…
Reference in New Issue