As per Eric and Mark's suggestions, separated the segregating MNP merger (MergeMNPs) from the more general merger employed for annotation purposes (MergeSegregatingAlternateAlleles). Both use the same core MergePhasedSegregatingAlternateAllelesVCFWriter
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4763 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
984e94521e
commit
b4ef716aaf
|
|
@ -701,13 +701,27 @@ public class VariantContextUtils {
|
|||
return genomeLocParser.createGenomeLoc(vc.getChr(),(int)vc.getStart(),(int)vc.getEnd());
|
||||
}
|
||||
|
||||
// NOTE: returns null if vc1 and vc2 are not mergeable into a single MNP record
|
||||
public static VariantContext mergeIntoMNP(GenomeLocParser genomeLocParser,VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) {
|
||||
public abstract static class AlleleMergeRule {
|
||||
// vc1, vc2 are ONLY passed to allelesShouldBeMerged() if mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2) AND allSamplesAreMergeable(vc1, vc2):
|
||||
abstract public boolean allelesShouldBeMerged(VariantContext vc1, VariantContext vc2);
|
||||
|
||||
public String toString() {
|
||||
return "all samples are mergeable";
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: returns null if vc1 and vc2 are not merged into a single MNP record
|
||||
|
||||
public static VariantContext mergeIntoMNP(GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile, AlleleMergeRule alleleMergeRule) {
|
||||
if (!mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2))
|
||||
return null;
|
||||
|
||||
// Check that it's logically possible to merge the VCs, and that there's a point in doing so (e.g., annotations could be changed):
|
||||
if (!allSamplesAreMergeable(vc1, vc2) || !someSampleHasDoubleNonReferenceAllele(vc1, vc2))
|
||||
// Check that it's logically possible to merge the VCs:
|
||||
if (!allSamplesAreMergeable(vc1, vc2))
|
||||
return null;
|
||||
|
||||
// Check if there's a "point" in merging the VCs (e.g., annotations could be changed)
|
||||
if (!alleleMergeRule.allelesShouldBeMerged(vc1, vc2))
|
||||
return null;
|
||||
|
||||
return reallyMergeIntoMNP(vc1, vc2, referenceFile);
|
||||
|
|
@ -788,7 +802,7 @@ public class VariantContextUtils {
|
|||
if (!(other instanceof AlleleOneAndTwo))
|
||||
return false;
|
||||
|
||||
AlleleOneAndTwo otherAot = (AlleleOneAndTwo)other;
|
||||
AlleleOneAndTwo otherAot = (AlleleOneAndTwo) other;
|
||||
return (this.all1.equals(otherAot.all1) && this.all2.equals(otherAot.all2));
|
||||
}
|
||||
}
|
||||
|
|
@ -877,9 +891,9 @@ public class VariantContextUtils {
|
|||
return mergedAttribs;
|
||||
}
|
||||
|
||||
private static boolean mergeIntoMNPvalidationCheck(GenomeLocParser genomeLocParser,VariantContext vc1, VariantContext vc2) {
|
||||
GenomeLoc loc1 = VariantContextUtils.getLocation(genomeLocParser,vc1);
|
||||
GenomeLoc loc2 = VariantContextUtils.getLocation(genomeLocParser,vc2);
|
||||
private static boolean mergeIntoMNPvalidationCheck(GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2) {
|
||||
GenomeLoc loc1 = VariantContextUtils.getLocation(genomeLocParser, vc1);
|
||||
GenomeLoc loc2 = VariantContextUtils.getLocation(genomeLocParser, vc2);
|
||||
|
||||
if (!loc1.onSameContig(loc2))
|
||||
throw new ReviewedStingException("Can only merge vc1, vc2 if on the same chromosome");
|
||||
|
|
@ -910,6 +924,7 @@ public class VariantContextUtils {
|
|||
}
|
||||
|
||||
// Assumes that vc1 and vc2 were already checked to have the same sample names:
|
||||
|
||||
private static boolean allSamplesAreMergeable(VariantContext vc1, VariantContext vc2) {
|
||||
// Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1:
|
||||
for (Map.Entry<String, Genotype> gt1Entry : vc1.getGenotypes().entrySet()) {
|
||||
|
|
@ -948,6 +963,7 @@ public class VariantContextUtils {
|
|||
}
|
||||
|
||||
// Assumes that alleleSegregationIsKnown(gt1, gt2):
|
||||
|
||||
private static PhaseAndQuality calcPhaseForMergedGenotypes(Genotype gt1, Genotype gt2) {
|
||||
if (gt2.genotypesArePhased() || gt2.isHom())
|
||||
return new PhaseAndQuality(gt1); // maintain the phase of gt1
|
||||
|
|
@ -972,7 +988,8 @@ public class VariantContextUtils {
|
|||
/* Checks if any sample has a MNP of ALT alleles (segregating together):
|
||||
[Assumes that vc1 and vc2 were already checked to have the same sample names && allSamplesAreMergeable(vc1, vc2)]
|
||||
*/
|
||||
private static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) {
|
||||
|
||||
public static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) {
|
||||
for (Map.Entry<String, Genotype> gt1Entry : vc1.getGenotypes().entrySet()) {
|
||||
String sample = gt1Entry.getKey();
|
||||
Genotype gt1 = gt1Entry.getValue();
|
||||
|
|
@ -992,4 +1009,47 @@ public class VariantContextUtils {
|
|||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Checks if all samples are consistent in their haplotypes:
|
||||
[Assumes that vc1 and vc2 were already checked to have the same sample names && allSamplesAreMergeable(vc1, vc2)]
|
||||
*/
|
||||
|
||||
public static boolean doubleAllelesSegregatePerfectlyAmongSamples(VariantContext vc1, VariantContext vc2) {
|
||||
// Check that Alleles at vc1 and at vc2 always segregate together in all samples (including reference):
|
||||
Map<Allele, Allele> allele1ToAllele2 = new HashMap<Allele, Allele>();
|
||||
Map<Allele, Allele> allele2ToAllele1 = new HashMap<Allele, Allele>();
|
||||
|
||||
// Note the segregation of the alleles for the reference genome:
|
||||
allele1ToAllele2.put(vc1.getReference(), vc2.getReference());
|
||||
allele2ToAllele1.put(vc2.getReference(), vc1.getReference());
|
||||
|
||||
// Note the segregation of the alleles for each sample (and check that it is consistent with the reference and all previous samples).
|
||||
for (Map.Entry<String, Genotype> gt1Entry : vc1.getGenotypes().entrySet()) {
|
||||
String sample = gt1Entry.getKey();
|
||||
Genotype gt1 = gt1Entry.getValue();
|
||||
Genotype gt2 = vc2.getGenotype(sample);
|
||||
|
||||
List<Allele> site1Alleles = gt1.getAlleles();
|
||||
List<Allele> site2Alleles = gt2.getAlleles();
|
||||
|
||||
Iterator<Allele> all2It = site2Alleles.iterator();
|
||||
for (Allele all1 : site1Alleles) {
|
||||
Allele all2 = all2It.next();
|
||||
|
||||
Allele all1To2 = allele1ToAllele2.get(all1);
|
||||
if (all1To2 == null)
|
||||
allele1ToAllele2.put(all1, all2);
|
||||
else if (!all1To2.equals(all2)) // all1 segregates with two different alleles at site 2
|
||||
return false;
|
||||
|
||||
Allele all2To1 = allele2ToAllele1.get(all2);
|
||||
if (all2To1 == null)
|
||||
allele2ToAllele1.put(all2, all1);
|
||||
else if (!all2To1.equals(all1)) // all2 segregates with two different alleles at site 1
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,146 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import static org.broadinstitute.sting.utils.vcf.VCFUtils.getVCFHeadersFromRods;
|
||||
|
||||
|
||||
/**
|
||||
* Walks along all variant ROD loci, and merges consecutive sites if they segregate in all samples in the ROD.
|
||||
*/
|
||||
@Allows(value = {DataSource.REFERENCE})
|
||||
@Requires(value = {DataSource.REFERENCE}, referenceMetaData = @RMD(name = "variant", type = ReferenceOrderedDatum.class))
|
||||
@By(DataSource.REFERENCE_ORDERED_DATA)
|
||||
|
||||
public class MergeMNPsWalker extends RodWalker<Integer, Integer> {
|
||||
|
||||
@Output(doc = "File to which variants should be written", required = true)
|
||||
protected VCFWriter writer = null;
|
||||
private MergeSegregatingAlternateAllelesVCFWriter vcMergerWriter = null;
|
||||
|
||||
@Argument(fullName = "maxGenomicDistanceForMNP", shortName = "maxDistMNP", doc = "The maximum reference-genome distance between consecutive heterozygous sites to permit merging phased VCF records into a MNP record; [default:1]", required = false)
|
||||
protected int maxGenomicDistanceForMNP = 1;
|
||||
|
||||
private LinkedList<String> rodNames = null;
|
||||
|
||||
public void initialize() {
|
||||
rodNames = new LinkedList<String>();
|
||||
rodNames.add("variant");
|
||||
|
||||
initializeVcfWriter();
|
||||
}
|
||||
|
||||
private void initializeVcfWriter() {
|
||||
// false <-> don't take control of writer, since didn't create it:
|
||||
vcMergerWriter = new MergeSegregatingAlternateAllelesVCFWriter(writer, getToolkit().getGenomeLocParser(), getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, logger, false);
|
||||
writer = null; // so it can't be accessed directly [i.e., not through vcMergerWriter]
|
||||
|
||||
// setup the header fields:
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
||||
|
||||
Map<String, VCFHeader> rodNameToHeader = getVCFHeadersFromRods(getToolkit(), rodNames);
|
||||
vcMergerWriter.writeHeader(new VCFHeader(hInfo, new TreeSet<String>(rodNameToHeader.get(rodNames.get(0)).getGenotypeSamples())));
|
||||
}
|
||||
|
||||
public boolean generateExtendedEvents() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* For each site, send it to be (possibly) merged with previously observed sites.
|
||||
*
|
||||
* @param tracker the meta-data tracker
|
||||
* @param ref the reference base
|
||||
* @param context the context for the given locus
|
||||
* @return dummy Integer
|
||||
*/
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (tracker == null)
|
||||
return null;
|
||||
|
||||
boolean requireStartHere = true; // only see each VariantContext once
|
||||
boolean takeFirstOnly = false; // take as many entries as the VCF file has
|
||||
for (VariantContext vc : tracker.getVariantContexts(ref, rodNames, null, context.getLocation(), requireStartHere, takeFirstOnly))
|
||||
writeVCF(vc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
private void writeVCF(VariantContext vc) {
|
||||
byte refBase;
|
||||
if (!vc.isIndel()) {
|
||||
Allele varAllele = vc.getReference();
|
||||
refBase = SNPallelePair.getSingleBase(varAllele);
|
||||
}
|
||||
else {
|
||||
refBase = vc.getReferenceBaseForIndel();
|
||||
}
|
||||
|
||||
vcMergerWriter.add(vc, refBase);
|
||||
}
|
||||
|
||||
public Integer reduce(Integer result, Integer total) {
|
||||
if (result == null)
|
||||
return total;
|
||||
|
||||
return total + result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Release any VariantContexts not yet processed.
|
||||
*
|
||||
* @param result Empty for now...
|
||||
*/
|
||||
public void onTraversalDone(Integer result) {
|
||||
vcMergerWriter.close();
|
||||
|
||||
System.out.println("Number of successive pairs of records: " + vcMergerWriter.getNumRecordsAttemptToMerge());
|
||||
System.out.println("Number of potentially merged records (" + vcMergerWriter.getVcMergeRule() + "): " + vcMergerWriter.getNumRecordsSatisfyingMergeRule());
|
||||
System.out.println("Number of records merged ("+ vcMergerWriter.getAlleleMergeRule() + "): " + vcMergerWriter.getNumMergedRecords());
|
||||
System.out.println(vcMergerWriter.getAltAlleleStats());
|
||||
}
|
||||
}
|
||||
|
|
@ -42,13 +42,15 @@ import java.util.*;
|
|||
|
||||
// Streams in VariantContext objects and streams out VariantContexts produced by merging phased segregating polymorphisms into MNP VariantContexts
|
||||
|
||||
public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
||||
public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter {
|
||||
private VCFWriter innerWriter;
|
||||
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
private ReferenceSequenceFile referenceFileForMNPmerging;
|
||||
private MergeRule mergeRule;
|
||||
|
||||
private VariantContextMergeRule vcMergeRule;
|
||||
private VariantContextUtils.AlleleMergeRule alleleMergeRule;
|
||||
|
||||
private String useSingleSample = null;
|
||||
|
||||
|
|
@ -67,11 +69,12 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
|||
// Should we call innerWriter.close() in close()
|
||||
private boolean takeOwnershipOfInner;
|
||||
|
||||
public MergePhasedSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, GenomeLocParser genomeLocParser, File referenceFile, MergeRule mergeRule, String singleSample, boolean emitOnlyMergedRecords, Logger logger, boolean takeOwnershipOfInner, boolean trackAltAlleleStats) {
|
||||
public MergeSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, GenomeLocParser genomeLocParser, File referenceFile, VariantContextMergeRule vcMergeRule, VariantContextUtils.AlleleMergeRule alleleMergeRule, String singleSample, boolean emitOnlyMergedRecords, Logger logger, boolean takeOwnershipOfInner, boolean trackAltAlleleStats) {
|
||||
this.innerWriter = innerWriter;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.referenceFileForMNPmerging = new IndexedFastaSequenceFile(referenceFile);
|
||||
this.mergeRule = mergeRule;
|
||||
this.vcMergeRule = vcMergeRule;
|
||||
this.alleleMergeRule = alleleMergeRule;
|
||||
this.useSingleSample = singleSample;
|
||||
this.emitOnlyMergedRecords = emitOnlyMergedRecords;
|
||||
|
||||
|
|
@ -87,8 +90,8 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
|||
this.takeOwnershipOfInner = takeOwnershipOfInner;
|
||||
}
|
||||
|
||||
public MergePhasedSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, GenomeLocParser genomeLocParser, File referenceFile, int maxGenomicDistanceForMNP, Logger logger) {
|
||||
this(innerWriter, genomeLocParser, referenceFile, new DistanceMergeRule(maxGenomicDistanceForMNP, genomeLocParser), null, false, logger, false, false); // by default: consider all samples, emit all records, don't own inner, don't keep track of alt allele statistics
|
||||
public MergeSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, GenomeLocParser genomeLocParser, File referenceFile, int maxGenomicDistanceForMNP, Logger logger, boolean takeOwnershipOfInner) {
|
||||
this(innerWriter, genomeLocParser, referenceFile, new DistanceMergeRule(maxGenomicDistanceForMNP, genomeLocParser), new SegregatingMNPmergeAllelesRule(), null, false, logger, takeOwnershipOfInner, true); // by default: consider all samples, emit all records, keep track of alt allele statistics
|
||||
}
|
||||
|
||||
public void writeHeader(VCFHeader header) {
|
||||
|
|
@ -146,7 +149,7 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
|||
}
|
||||
else { // waiting to merge vcfrWaitingToMerge, and curVcIsNotFiltered. So, attempt to merge them:
|
||||
numRecordsAttemptToMerge++;
|
||||
boolean shouldMerge = mergeRule.shouldMerge(vcfrWaitingToMerge.vc, vc);
|
||||
boolean shouldAttemptToMerge = vcMergeRule.shouldAttemptToMerge(vcfrWaitingToMerge.vc, vc);
|
||||
|
||||
/*
|
||||
TODO: -- CONSIDER THE FOLLOWING EXAMPLE: WHAT DO WE WANT HERE??? --
|
||||
|
|
@ -163,14 +166,20 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
|||
But, since we merged sites 1 and 2, we get that sites 1-2 and 3 are counted as two haplotypes of: ALT-REF and ALT-ALT
|
||||
*/
|
||||
if (altAlleleStats != null)
|
||||
altAlleleStats.updateSampleStats(vcfrWaitingToMerge.vc, vc, shouldMerge);
|
||||
altAlleleStats.updateSampleStats(vcfrWaitingToMerge.vc, vc, shouldAttemptToMerge);
|
||||
|
||||
boolean mergedRecords = false;
|
||||
if (shouldMerge) {
|
||||
if (shouldAttemptToMerge) {
|
||||
numRecordsSatisfyingMergeRule++;
|
||||
VariantContext mergedVc = VariantContextUtils.mergeIntoMNP(genomeLocParser, vcfrWaitingToMerge.vc, vc, referenceFileForMNPmerging);
|
||||
VariantContext mergedVc = VariantContextUtils.mergeIntoMNP(genomeLocParser, vcfrWaitingToMerge.vc, vc, referenceFileForMNPmerging, alleleMergeRule);
|
||||
|
||||
if (mergedVc != null) {
|
||||
mergedRecords = true;
|
||||
|
||||
Map<String, Object> addedAttribs = vcMergeRule.addToMergedAttributes(vcfrWaitingToMerge.vc, vc);
|
||||
addedAttribs.putAll(mergedVc.getAttributes());
|
||||
mergedVc = VariantContext.modifyAttributes(mergedVc, addedAttribs);
|
||||
|
||||
vcfrWaitingToMerge = new VCFRecord(mergedVc, vcfrWaitingToMerge.refBase, true);
|
||||
numMergedRecords++;
|
||||
}
|
||||
|
|
@ -213,8 +222,12 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
|||
return numMergedRecords;
|
||||
}
|
||||
|
||||
public MergeRule getMergeRule() {
|
||||
return mergeRule;
|
||||
public VariantContextMergeRule getVcMergeRule() {
|
||||
return vcMergeRule;
|
||||
}
|
||||
|
||||
public VariantContextUtils.AlleleMergeRule getAlleleMergeRule() {
|
||||
return alleleMergeRule;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -248,7 +261,7 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
|||
|
||||
private class AltAlleleStats {
|
||||
public int numSuccessiveGenotypes;
|
||||
public int numSuccessiveGenotypesThatCouldBeMerged;
|
||||
public int numSuccessiveGenotypesAttemptedToBeMerged;
|
||||
|
||||
public int oneSampleMissing;
|
||||
public int atLeastOneSampleNotCalledOrFiltered;
|
||||
|
|
@ -267,7 +280,7 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
|||
|
||||
public AltAlleleStats() {
|
||||
this.numSuccessiveGenotypes = 0;
|
||||
this.numSuccessiveGenotypesThatCouldBeMerged = 0;
|
||||
this.numSuccessiveGenotypesAttemptedToBeMerged = 0;
|
||||
|
||||
this.oneSampleMissing = 0;
|
||||
this.atLeastOneSampleNotCalledOrFiltered = 0;
|
||||
|
|
@ -292,11 +305,11 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
|||
sb.append("Not called or filtered:\t" + atLeastOneSampleNotCalledOrFiltered + "\n");
|
||||
|
||||
sb.append("* Number of successive pairs of genotypes:\t" + numSuccessiveGenotypes + "\n");
|
||||
sb.append("Number of successive pairs of genotypes with " + mergeRule + ":\t" + numSuccessiveGenotypesThatCouldBeMerged + "\n");
|
||||
sb.append("Number of successive pairs of genotypes with " + vcMergeRule + ":\t" + numSuccessiveGenotypesAttemptedToBeMerged + "\n");
|
||||
|
||||
sb.append("Unknown segregation, " + mergeRule + ":\t" + segregationUnknown + "\n");
|
||||
sb.append("Not variant at least one of pair, segregation known, " + mergeRule + ":\t" + eitherNotVariant + "\n");
|
||||
sb.append("* Variant at both, segregation known, " + mergeRule + ":\t" + percentageString(bothInPairHaveVariant, numSuccessiveGenotypes) + "\n");
|
||||
sb.append("Unknown segregation, " + vcMergeRule + ":\t" + segregationUnknown + "\n");
|
||||
sb.append("Not variant at least one of pair, segregation known, " + vcMergeRule + ":\t" + eitherNotVariant + "\n");
|
||||
sb.append("* Variant at both, segregation known, " + vcMergeRule + ":\t" + percentageString(bothInPairHaveVariant, numSuccessiveGenotypes) + "\n");
|
||||
|
||||
sb.append("[Total haplotypes at pairs:\t" + (ref_ref_pair + ref_alt_pair + alt_ref_pair + alt_alt_pair) + "\n");
|
||||
sb.append("REF-REF:\t" + ref_ref_pair + "\n");
|
||||
|
|
@ -326,7 +339,7 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
|||
this.sampleStats = new HashMap<String, AltAlleleStats>();
|
||||
}
|
||||
|
||||
public void updateSampleStats(VariantContext vc1, VariantContext vc2, boolean shouldMerge) {
|
||||
public void updateSampleStats(VariantContext vc1, VariantContext vc2, boolean shouldAttemptToMerge) {
|
||||
if (vc1.isFiltered() || vc2.isFiltered())
|
||||
return;
|
||||
|
||||
|
|
@ -351,8 +364,8 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
|||
else {
|
||||
aas.numSuccessiveGenotypes++;
|
||||
|
||||
if (shouldMerge) {
|
||||
aas.numSuccessiveGenotypesThatCouldBeMerged++;
|
||||
if (shouldAttemptToMerge) {
|
||||
aas.numSuccessiveGenotypesAttemptedToBeMerged++;
|
||||
|
||||
if (!VariantContextUtils.alleleSegregationIsKnown(gt1, gt2)) {
|
||||
aas.segregationUnknown++;
|
||||
|
|
@ -428,7 +441,7 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
|||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("-------------------------------------------------------------------------\n");
|
||||
sb.append("Per-sample alternate allele statistics [" + mergeRule + "]\n");
|
||||
sb.append("Per-sample alternate allele statistics [" + vcMergeRule + "]\n");
|
||||
sb.append("-------------------------------------------------------------------------");
|
||||
|
||||
for (Map.Entry<String, AltAlleleStats> sampAltAllStatsEntry : sampleStats.entrySet()) {
|
||||
|
|
@ -440,4 +453,66 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
|||
return sb.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
External classes:
|
||||
*/
|
||||
|
||||
abstract class VariantContextMergeRule {
|
||||
abstract public boolean shouldAttemptToMerge(VariantContext vc1, VariantContext vc2);
|
||||
|
||||
public Map<String, Object> addToMergedAttributes(VariantContext vc1, VariantContext vc2) {
|
||||
return new HashMap<String, Object>();
|
||||
}
|
||||
}
|
||||
|
||||
class DistanceMergeRule extends VariantContextMergeRule {
|
||||
private int maxGenomicDistanceForMNP;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
public DistanceMergeRule(int maxGenomicDistanceForMNP, GenomeLocParser genomeLocParser) {
|
||||
this.maxGenomicDistanceForMNP = maxGenomicDistanceForMNP;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
}
|
||||
|
||||
public boolean shouldAttemptToMerge(VariantContext vc1, VariantContext vc2) {
|
||||
return minDistance(vc1, vc2) <= maxGenomicDistanceForMNP;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "Merge distance <= " + maxGenomicDistanceForMNP;
|
||||
}
|
||||
|
||||
public int minDistance(VariantContext vc1, VariantContext vc2) {
|
||||
return VariantContextUtils.getLocation(genomeLocParser, vc1).minDistance(VariantContextUtils.getLocation(genomeLocParser, vc2));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class ExistsDoubleAltAlleleMergeRule extends VariantContextUtils.AlleleMergeRule {
|
||||
public boolean allelesShouldBeMerged(VariantContext vc1, VariantContext vc2) {
|
||||
return VariantContextUtils.someSampleHasDoubleNonReferenceAllele(vc1, vc2);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return super.toString() + ", some sample has a MNP of ALT alleles";
|
||||
}
|
||||
}
|
||||
|
||||
class SegregatingMNPmergeAllelesRule extends ExistsDoubleAltAlleleMergeRule {
|
||||
public SegregatingMNPmergeAllelesRule() {
|
||||
super();
|
||||
}
|
||||
|
||||
public boolean allelesShouldBeMerged(VariantContext vc1, VariantContext vc2) {
|
||||
// Must be interesting AND consistent:
|
||||
return super.allelesShouldBeMerged(vc1, vc2) && VariantContextUtils.doubleAllelesSegregatePerfectlyAmongSamples(vc1, vc2);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return super.toString() + ", all alleles segregate consistently";
|
||||
}
|
||||
}
|
||||
|
|
@ -45,7 +45,7 @@ import java.util.*;
|
|||
import static org.broadinstitute.sting.utils.vcf.VCFUtils.getVCFHeadersFromRods;
|
||||
|
||||
/**
|
||||
* Walks along all variant ROD loci, and merges consecutive sites if they segregate in all samples in the ROD.
|
||||
* Walks along all variant ROD loci, and merges consecutive sites if some sample has segregating alt alleles in the ROD.
|
||||
*/
|
||||
@Allows(value = {DataSource.REFERENCE})
|
||||
@Requires(value = {DataSource.REFERENCE}, referenceMetaData = @RMD(name = "variant", type = ReferenceOrderedDatum.class))
|
||||
|
|
@ -55,10 +55,10 @@ public class MergeSegregatingAlternateAllelesWalker extends RodWalker<Integer, I
|
|||
|
||||
@Output(doc = "File to which variants should be written", required = true)
|
||||
protected VCFWriter writer = null;
|
||||
private MergePhasedSegregatingAlternateAllelesVCFWriter vcMergerWriter = null;
|
||||
private MergeSegregatingAlternateAllelesVCFWriter vcMergerWriter = null;
|
||||
|
||||
@Argument(fullName = "maxGenomicDistanceForMNP", shortName = "maxDistMNP", doc = "The maximum reference-genome distance between consecutive heterozygous sites to permit merging phased VCF records into a MNP record; [default:1]", required = false)
|
||||
protected int maxGenomicDistanceForMNP = 1;
|
||||
@Argument(fullName = "maxGenomicDistance", shortName = "maxDist", doc = "The maximum reference-genome distance between consecutive heterozygous sites to permit merging phased VCF records; [default:1]", required = false)
|
||||
protected int maxGenomicDistance = 1;
|
||||
|
||||
@Argument(fullName = "useSingleSample", shortName = "useSample", doc = "Only output genotypes for the single sample given; [default:use all samples]", required = false)
|
||||
protected String useSingleSample = null;
|
||||
|
|
@ -74,9 +74,12 @@ public class MergeSegregatingAlternateAllelesWalker extends RodWalker<Integer, I
|
|||
public final static String UNION_REFSEQ = "UNION";
|
||||
public final static String INTERSECT_REFSEQ = "INTERSECT";
|
||||
|
||||
@Argument(fullName = "mergeBasedOnRefSeqAnnotation", shortName = "mergeBasedOnRefSeqAnnotation", doc = "'Should merging be performed if two sites lie on the same RefSeq sequence in the INFO field {" + IGNORE_REFSEQ + ", " + UNION_REFSEQ + ", " + INTERSECT_REFSEQ + "}; [default:"+ IGNORE_REFSEQ + "]", required = false)
|
||||
@Argument(fullName = "mergeBasedOnRefSeqAnnotation", shortName = "mergeBasedOnRefSeqAnnotation", doc = "'Should merging be performed if two sites lie on the same RefSeq sequence in the INFO field {" + IGNORE_REFSEQ + ", " + UNION_REFSEQ + ", " + INTERSECT_REFSEQ + "}; [default:" + IGNORE_REFSEQ + "]", required = false)
|
||||
protected String mergeBasedOnRefSeqAnnotation = IGNORE_REFSEQ;
|
||||
|
||||
@Argument(fullName = "dontRequireSomeSampleHasDoubleAltAllele", shortName = "dontRequireSomeSampleHasDoubleAltAllele", doc = "Should the requirement, that SUCCESSIVE records to be merged have at least one sample with a double alternate allele, be relaxed?; [default:false]", required = false)
|
||||
protected boolean dontRequireSomeSampleHasDoubleAltAllele = false;
|
||||
|
||||
private LinkedList<String> rodNames = null;
|
||||
|
||||
public void initialize() {
|
||||
|
|
@ -89,14 +92,20 @@ public class MergeSegregatingAlternateAllelesWalker extends RodWalker<Integer, I
|
|||
private void initializeVcfWriter() {
|
||||
GenomeLocParser genomeLocParser = getToolkit().getGenomeLocParser();
|
||||
|
||||
MergeRule mergeRule = null;
|
||||
VariantContextMergeRule vcMergeRule;
|
||||
if (mergeBasedOnRefSeqAnnotation.equals(IGNORE_REFSEQ))
|
||||
mergeRule = new DistanceMergeRule(maxGenomicDistanceForMNP, genomeLocParser);
|
||||
vcMergeRule = new DistanceMergeRule(maxGenomicDistance, genomeLocParser);
|
||||
else
|
||||
mergeRule = new SameGenePlusWithinDistanceMergeRule(maxGenomicDistanceForMNP, genomeLocParser, mergeBasedOnRefSeqAnnotation);
|
||||
vcMergeRule = new SameGenePlusWithinDistanceMergeRule(maxGenomicDistance, genomeLocParser, mergeBasedOnRefSeqAnnotation);
|
||||
|
||||
VariantContextUtils.AlleleMergeRule alleleMergeRule;
|
||||
if (dontRequireSomeSampleHasDoubleAltAllele) // if a pair of VariantContext passes the vcMergeRule, then always merge them:
|
||||
alleleMergeRule = new AlwaysMergeAllelesRule();
|
||||
else
|
||||
alleleMergeRule = new ExistsDoubleAltAlleleMergeRule();
|
||||
|
||||
// false <-> don't take control of writer, since didn't create it:
|
||||
vcMergerWriter = new MergePhasedSegregatingAlternateAllelesVCFWriter(writer,genomeLocParser, getToolkit().getArguments().referenceFile, mergeRule, useSingleSample, emitOnlyMergedRecords, logger, false, !disablePrintAlternateAlleleStatistics);
|
||||
vcMergerWriter = new MergeSegregatingAlternateAllelesVCFWriter(writer, genomeLocParser, getToolkit().getArguments().referenceFile, vcMergeRule, alleleMergeRule, useSingleSample, emitOnlyMergedRecords, logger, false, !disablePrintAlternateAlleleStatistics);
|
||||
writer = null; // so it can't be accessed directly [i.e., not through vcMergerWriter]
|
||||
|
||||
// setup the header fields:
|
||||
|
|
@ -168,8 +177,8 @@ public class MergeSegregatingAlternateAllelesWalker extends RodWalker<Integer, I
|
|||
System.out.println("Only considered single sample: " + useSingleSample);
|
||||
|
||||
System.out.println("Number of successive pairs of records: " + vcMergerWriter.getNumRecordsAttemptToMerge());
|
||||
System.out.println("Number of potentially merged records (" + vcMergerWriter.getMergeRule() + "): " + vcMergerWriter.getNumRecordsSatisfyingMergeRule());
|
||||
System.out.println("Number of records merged [all samples are mergeable, some sample has a MNP of ALT alleles]: " + vcMergerWriter.getNumMergedRecords());
|
||||
System.out.println("Number of potentially merged records (" + vcMergerWriter.getVcMergeRule() + "): " + vcMergerWriter.getNumRecordsSatisfyingMergeRule());
|
||||
System.out.println("Number of records merged ("+ vcMergerWriter.getAlleleMergeRule() + "): " + vcMergerWriter.getNumMergedRecords());
|
||||
System.out.println(vcMergerWriter.getAltAlleleStats());
|
||||
}
|
||||
}
|
||||
|
|
@ -179,32 +188,6 @@ enum MergeBasedOnRefSeqAnnotation {
|
|||
UNION_WITH_DIST, INTERSECT_WITH_DIST
|
||||
}
|
||||
|
||||
interface MergeRule {
|
||||
public boolean shouldMerge(VariantContext vc1, VariantContext vc2);
|
||||
}
|
||||
|
||||
class DistanceMergeRule implements MergeRule {
|
||||
private int maxGenomicDistanceForMNP;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
public DistanceMergeRule(int maxGenomicDistanceForMNP, GenomeLocParser genomeLocParser) {
|
||||
this.maxGenomicDistanceForMNP = maxGenomicDistanceForMNP;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
}
|
||||
|
||||
public boolean shouldMerge(VariantContext vc1, VariantContext vc2) {
|
||||
return minDistance(vc1, vc2) <= maxGenomicDistanceForMNP;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "Merge distance <= " + maxGenomicDistanceForMNP;
|
||||
}
|
||||
|
||||
public int minDistance(VariantContext vc1, VariantContext vc2) {
|
||||
return VariantContextUtils.getLocation(genomeLocParser,vc1).minDistance(VariantContextUtils.getLocation(genomeLocParser,vc2));
|
||||
}
|
||||
}
|
||||
|
||||
class SameGenePlusWithinDistanceMergeRule extends DistanceMergeRule {
|
||||
private MergeBasedOnRefSeqAnnotation mergeBasedOnRefSeqAnnotation;
|
||||
|
||||
|
|
@ -219,8 +202,8 @@ class SameGenePlusWithinDistanceMergeRule extends DistanceMergeRule {
|
|||
throw new UserException("Must provide " + MergeSegregatingAlternateAllelesWalker.IGNORE_REFSEQ + ", " + MergeSegregatingAlternateAllelesWalker.UNION_REFSEQ + ", or " + MergeSegregatingAlternateAllelesWalker.INTERSECT_REFSEQ + " as argument to mergeBasedOnRefSeqAnnotation!");
|
||||
}
|
||||
|
||||
public boolean shouldMerge(VariantContext vc1, VariantContext vc2) {
|
||||
boolean withinDistance = super.shouldMerge(vc1, vc2);
|
||||
public boolean shouldAttemptToMerge(VariantContext vc1, VariantContext vc2) {
|
||||
boolean withinDistance = super.shouldAttemptToMerge(vc1, vc2);
|
||||
|
||||
if (mergeBasedOnRefSeqAnnotation == MergeBasedOnRefSeqAnnotation.UNION_WITH_DIST)
|
||||
return withinDistance || sameGene(vc1, vc2);
|
||||
|
|
@ -247,4 +230,18 @@ class SameGenePlusWithinDistanceMergeRule extends DistanceMergeRule {
|
|||
public String toString() {
|
||||
return super.toString() + " " + (mergeBasedOnRefSeqAnnotation == MergeBasedOnRefSeqAnnotation.UNION_WITH_DIST ? "OR" : "AND") + " on the same gene";
|
||||
}
|
||||
|
||||
public Map<String, Object> addToMergedAttributes(VariantContext vc1, VariantContext vc2) {
|
||||
Map<String, Object> addedAttribs = super.addToMergedAttributes(vc1, vc2);
|
||||
addedAttribs.putAll(RefSeqDataParser.getMergedRefSeqNameAttributes(vc1, vc2));
|
||||
return addedAttribs;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
class AlwaysMergeAllelesRule extends VariantContextUtils.AlleleMergeRule {
|
||||
public boolean allelesShouldBeMerged(VariantContext vc1, VariantContext vc2) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -138,8 +138,8 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
// Wrapper VCFWriters will take ownership of inner writers iff: inner writer != origWriter [which wasn't created here]
|
||||
VCFWriter origWriter = writer;
|
||||
|
||||
if (enableMergePhasedSegregatingPolymorphismsToMNP) // null <-> use ALL samples, false <-> emit all records, false <-> don't track the statistics of alternate alleles being merged:
|
||||
writer = new MergePhasedSegregatingAlternateAllelesVCFWriter(writer, getToolkit().getGenomeLocParser(), getToolkit().getArguments().referenceFile, new DistanceMergeRule(maxGenomicDistanceForMNP, getToolkit().getGenomeLocParser()), null, false, logger, writer != origWriter, false);
|
||||
if (enableMergePhasedSegregatingPolymorphismsToMNP)
|
||||
writer = new MergeSegregatingAlternateAllelesVCFWriter(writer, getToolkit().getGenomeLocParser(), getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, logger, writer != origWriter);
|
||||
|
||||
/* Due to discardIrrelevantPhasedSites(), the startDistance spanned by [partiallyPhasedSites.peek(), unphasedSiteQueue.peek()] is <= cacheWindow
|
||||
Due to processQueue(), the startDistance spanned by [unphasedSiteQueue.peek(), mostDownstreamLocusReached] is <= cacheWindow
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/* Some methods for extracting RefSeq-related data from annotated VCF INFO fields:
|
||||
|
|
@ -36,6 +37,8 @@ public class RefSeqDataParser {
|
|||
private static String NAME_KEY = REFSEQ_PREFIX + "name";
|
||||
private static String NAME2_KEY = REFSEQ_PREFIX + "name2";
|
||||
|
||||
private static String[] NAME_KEYS = {NAME_KEY, NAME2_KEY};
|
||||
|
||||
private static Map<String, String> getRefSeqEntriesToNames(VariantContext vc, boolean getName2) {
|
||||
String nameKeyToUse = getName2 ? NAME2_KEY : NAME_KEY;
|
||||
String nameKeyToUseMultiplePrefix = nameKeyToUse + "_";
|
||||
|
|
@ -78,6 +81,57 @@ public class RefSeqDataParser {
|
|||
return getRefSeqNames(vc, false);
|
||||
}
|
||||
|
||||
public static Map<String, Object> getMergedRefSeqNameAttributes(VariantContext vc1, VariantContext vc2) {
|
||||
Map<String, Object> refSeqNameAttribs = new HashMap<String, Object>();
|
||||
|
||||
Map<String, RefSeqEntry> entriesMap1 = getAllRefSeqEntriesByName(vc1);
|
||||
Map<String, RefSeqEntry> entriesMap2 = getAllRefSeqEntriesByName(vc2);
|
||||
|
||||
Set<String> commonNames = entriesMap1.keySet();
|
||||
commonNames.retainAll(entriesMap2.keySet());
|
||||
boolean addSuffix = commonNames.size() > 1;
|
||||
int count = 1;
|
||||
|
||||
for (String name : commonNames) {
|
||||
RefSeqEntry refseq1 = entriesMap1.get(name);
|
||||
RefSeqEntry refseq2 = entriesMap2.get(name);
|
||||
|
||||
String keySuffix = "";
|
||||
if (addSuffix)
|
||||
keySuffix = "_" + count;
|
||||
|
||||
boolean added = false;
|
||||
for (String key : NAME_KEYS) {
|
||||
Object obj1 = refseq1.info.get(key);
|
||||
Object obj2 = refseq2.info.get(key);
|
||||
if (obj1 != null && obj2 != null && obj1.equals(obj2)) {
|
||||
added = true;
|
||||
String useKey = key + keySuffix;
|
||||
refSeqNameAttribs.put(useKey, obj1);
|
||||
}
|
||||
}
|
||||
if (added)
|
||||
count++;
|
||||
}
|
||||
if (count > 1)
|
||||
refSeqNameAttribs.put(NUM_RECORDS_KEY, count - 1); // since incremented count one extra time
|
||||
|
||||
return refSeqNameAttribs;
|
||||
}
|
||||
|
||||
private static Map<String, RefSeqEntry> getAllRefSeqEntriesByName(VariantContext vc) {
|
||||
Map<String, RefSeqEntry> nameToEntries = new TreeMap<String, RefSeqEntry>();
|
||||
|
||||
List<RefSeqEntry> allEntries = getAllRefSeqEntries(vc);
|
||||
for (RefSeqEntry entry : allEntries) {
|
||||
Object name = entry.info.get(NAME_KEY);
|
||||
if (name != null)
|
||||
nameToEntries.put(name.toString(), entry);
|
||||
}
|
||||
|
||||
return nameToEntries;
|
||||
}
|
||||
|
||||
// Returns a List of SEPARATE Map<refseq.ENTRY, refseq.VALUE> for EACH RefSeq annotation (i.e., each gene), stripping out the "_1", "_2", etc.
|
||||
private static List<RefSeqEntry> getAllRefSeqEntries(VariantContext vc) {
|
||||
List<RefSeqEntry> allRefSeq = new LinkedList<RefSeqEntry>();
|
||||
|
|
|
|||
|
|
@ -0,0 +1,51 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class MergeMNPsIntegrationTest extends WalkerTest {
|
||||
|
||||
public static String baseTestString(String reference, String VCF, int maxDistMNP) {
|
||||
return "-T MergeMNPs" +
|
||||
" -R " + reference +
|
||||
" -B:variant,VCF " + validationDataLocation + VCF +
|
||||
" --maxGenomicDistanceForMNP " + maxDistMNP +
|
||||
" -o %s" +
|
||||
" -NO_HEADER";
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 1)
|
||||
+ " -L chr20:556259-756570",
|
||||
1,
|
||||
Arrays.asList("19d0b2361367024bb9a83b9c15ef2453"));
|
||||
executeTest("Merge MNP sites within genomic distance of 1 [TEST ONE]", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 10)
|
||||
+ " -L chr20:556259-756570",
|
||||
1,
|
||||
Arrays.asList("f25a6403579dab1395773b3ba365c327"));
|
||||
executeTest("Merge MNP sites within genomic distance of 10 [TEST TWO]", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test3() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 100)
|
||||
+ " -L chr20:556259-756570",
|
||||
1,
|
||||
Arrays.asList("a064955ffeea7fc4e09512f3e9cdbb9e"));
|
||||
executeTest("Merge MNP sites within genomic distance of 100 [TEST THREE]", spec);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -7,11 +7,11 @@ import java.util.Arrays;
|
|||
|
||||
public class MergeSegregatingAlternateAllelesIntegrationTest extends WalkerTest {
|
||||
|
||||
public static String baseTestString(String reference, String VCF, int maxDistMNP) {
|
||||
public static String baseTestString(String reference, String VCF, int maxDist) {
|
||||
return "-T MergeSegregatingAlternateAlleles" +
|
||||
" -R " + reference +
|
||||
" -B:variant,VCF " + validationDataLocation + VCF +
|
||||
" --maxGenomicDistanceForMNP " + maxDistMNP +
|
||||
" --maxGenomicDistance " + maxDist +
|
||||
" -o %s" +
|
||||
" -NO_HEADER";
|
||||
}
|
||||
|
|
@ -24,7 +24,7 @@ public class MergeSegregatingAlternateAllelesIntegrationTest extends WalkerTest
|
|||
+ " -L chr20:556259-756570",
|
||||
1,
|
||||
Arrays.asList("e6a14fc97dbd0aaa8e6a4d9a7f1616a6"));
|
||||
executeTest("Merge MNP het sites within genomic distance of 1 [TEST ONE]", spec);
|
||||
executeTest("Merge sites within genomic distance of 1 [TEST ONE]", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -34,7 +34,7 @@ public class MergeSegregatingAlternateAllelesIntegrationTest extends WalkerTest
|
|||
+ " -L chr20:556259-756570",
|
||||
1,
|
||||
Arrays.asList("cc2b45c85a51b4998e30758c48f61940"));
|
||||
executeTest("Merge MNP het sites within genomic distance of 10 [TEST TWO]", spec);
|
||||
executeTest("Merge sites within genomic distance of 10 [TEST TWO]", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -44,8 +44,8 @@ public class MergeSegregatingAlternateAllelesIntegrationTest extends WalkerTest
|
|||
+ " -L chr20:556259-756570",
|
||||
1,
|
||||
Arrays.asList("47300cc7a5a7d84b3c279f04c4567739"));
|
||||
executeTest("Merge MNP het sites within genomic distance of 100 [TEST THREE]", spec);
|
||||
executeTest("Merge sites within genomic distance of 100 [TEST THREE]", spec);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue