Added ability to focus only on a single sample and/or emit only merged records in MNP merger
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4587 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
5cdd7a7ba4
commit
34538bf2b3
|
|
@ -47,6 +47,10 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
|||
private ReferenceSequenceFile referenceFileForMNPmerging;
|
||||
private int maxGenomicDistanceForMNP;
|
||||
|
||||
private String useSingleSample = null;
|
||||
|
||||
private boolean emitOnlyMergedRecords;
|
||||
|
||||
private VCFRecord vcfrWaitingToMerge;
|
||||
private List<VCFRecord> filteredVcfrList;
|
||||
|
||||
|
|
@ -60,10 +64,13 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
|||
// Should we call innerWriter.close() in close()
|
||||
private boolean takeOwnershipOfInner;
|
||||
|
||||
public MergePhasedSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, File referenceFile, int maxGenomicDistanceForMNP, Logger logger, boolean takeOwnershipOfInner, boolean trackAltAlleleStats) {
|
||||
public MergePhasedSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, File referenceFile, int maxGenomicDistanceForMNP, String singleSample, boolean emitOnlyMergedRecords, Logger logger, boolean takeOwnershipOfInner, boolean trackAltAlleleStats) {
|
||||
this.innerWriter = innerWriter;
|
||||
this.referenceFileForMNPmerging = new IndexedFastaSequenceFile(referenceFile);
|
||||
this.maxGenomicDistanceForMNP = maxGenomicDistanceForMNP;
|
||||
this.useSingleSample = singleSample;
|
||||
this.emitOnlyMergedRecords = emitOnlyMergedRecords;
|
||||
|
||||
this.vcfrWaitingToMerge = null;
|
||||
this.filteredVcfrList = new LinkedList<VCFRecord>();
|
||||
this.numRecordsWithinDistance = 0;
|
||||
|
|
@ -77,7 +84,7 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
|||
}
|
||||
|
||||
public MergePhasedSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, File referenceFile, int maxGenomicDistanceForMNP, Logger logger) {
|
||||
this(innerWriter, referenceFile, maxGenomicDistanceForMNP, logger, false, false); // by default, don't own inner and don't keep track of alt allele statistics
|
||||
this(innerWriter, referenceFile, maxGenomicDistanceForMNP, null, false, logger, false, false); // by default: consider all samples, emit all records, don't own inner, don't keep track of alt allele statistics
|
||||
}
|
||||
|
||||
public void writeHeader(VCFHeader header) {
|
||||
|
|
@ -92,6 +99,14 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
|||
}
|
||||
|
||||
public void add(VariantContext vc, byte refBase) {
|
||||
if (useSingleSample != null) { // only want to output context for one sample
|
||||
Genotype sampGt = vc.getGenotype(useSingleSample);
|
||||
if (sampGt != null)
|
||||
vc = vc.subContextFromGenotypes(sampGt);
|
||||
else // asked for a sample that this vc does not contain, so ignore this vc:
|
||||
return;
|
||||
}
|
||||
|
||||
logger.debug("Next VC input = " + VariantContextUtils.getLocation(vc));
|
||||
boolean curVcIsNotFiltered = vc.isNotFiltered();
|
||||
|
||||
|
|
@ -103,9 +118,9 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
|||
|
||||
if (curVcIsNotFiltered) { // still need to wait before can release vc
|
||||
logger.debug("Waiting for new variant " + VariantContextUtils.getLocation(vc));
|
||||
vcfrWaitingToMerge = new VCFRecord(vc, refBase);
|
||||
vcfrWaitingToMerge = new VCFRecord(vc, refBase, false);
|
||||
}
|
||||
else {
|
||||
else if (!emitOnlyMergedRecords) { // filtered records are never merged
|
||||
logger.debug("DIRECTLY output " + VariantContextUtils.getLocation(vc));
|
||||
innerWriter.add(vc, refBase);
|
||||
}
|
||||
|
|
@ -114,8 +129,10 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
|||
logger.debug("Waiting to merge " + VariantContextUtils.getLocation(vcfrWaitingToMerge.vc));
|
||||
|
||||
if (!curVcIsNotFiltered) {
|
||||
logger.debug("Caching unprocessed output " + VariantContextUtils.getLocation(vc));
|
||||
filteredVcfrList.add(new VCFRecord(vc, refBase));
|
||||
if (!emitOnlyMergedRecords) { // filtered records are never merged
|
||||
logger.debug("Caching unprocessed output " + VariantContextUtils.getLocation(vc));
|
||||
filteredVcfrList.add(new VCFRecord(vc, refBase, false));
|
||||
}
|
||||
}
|
||||
else { // waiting to merge vcfrWaitingToMerge, and curVcIsNotFiltered. So, attempt to merge them:
|
||||
numRecordsAttemptToMerge++;
|
||||
|
|
@ -144,14 +161,14 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
|||
VariantContext mergedVc = VariantContextUtils.mergeIntoMNP(vcfrWaitingToMerge.vc, vc, referenceFileForMNPmerging);
|
||||
if (mergedVc != null) {
|
||||
mergedRecords = true;
|
||||
vcfrWaitingToMerge = new VCFRecord(mergedVc, vcfrWaitingToMerge.refBase);
|
||||
vcfrWaitingToMerge = new VCFRecord(mergedVc, vcfrWaitingToMerge.refBase, true);
|
||||
numMergedRecords++;
|
||||
}
|
||||
}
|
||||
|
||||
if (!mergedRecords) {
|
||||
stopWaitingToMerge();
|
||||
vcfrWaitingToMerge = new VCFRecord(vc, refBase);
|
||||
vcfrWaitingToMerge = new VCFRecord(vc, refBase, false);
|
||||
}
|
||||
logger.debug("Merged? = " + mergedRecords);
|
||||
}
|
||||
|
|
@ -165,7 +182,8 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
|||
return;
|
||||
}
|
||||
|
||||
innerWriter.add(vcfrWaitingToMerge.vc, vcfrWaitingToMerge.refBase);
|
||||
if (!emitOnlyMergedRecords || vcfrWaitingToMerge.resultedFromMerge)
|
||||
innerWriter.add(vcfrWaitingToMerge.vc, vcfrWaitingToMerge.refBase);
|
||||
vcfrWaitingToMerge = null;
|
||||
|
||||
for (VCFRecord vcfr : filteredVcfrList)
|
||||
|
|
@ -209,10 +227,12 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
|||
private static class VCFRecord {
|
||||
public VariantContext vc;
|
||||
public byte refBase;
|
||||
public boolean resultedFromMerge;
|
||||
|
||||
public VCFRecord(VariantContext vc, byte refBase) {
|
||||
public VCFRecord(VariantContext vc, byte refBase, boolean resultedFromMerge) {
|
||||
this.vc = vc;
|
||||
this.refBase = refBase;
|
||||
this.resultedFromMerge = resultedFromMerge;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -284,7 +304,7 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite
|
|||
|
||||
private String percentageString(int count, int baseCount) {
|
||||
int NUM_DECIMAL_PLACES = 1;
|
||||
String percent = new Formatter().format("%."+ NUM_DECIMAL_PLACES + "f", MathUtils.percentage(count, baseCount)).toString();
|
||||
String percent = new Formatter().format("%." + NUM_DECIMAL_PLACES + "f", MathUtils.percentage(count, baseCount)).toString();
|
||||
return count + " (" + percent + "%)";
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,6 +56,12 @@ public class MergeSegregatingAlternateAllelesWalker extends RodWalker<Integer, I
|
|||
@Argument(fullName = "maxGenomicDistanceForMNP", shortName = "maxDistMNP", doc = "The maximum reference-genome distance between consecutive heterozygous sites to permit merging phased VCF records into a MNP record; [default:1]", required = false)
|
||||
protected int maxGenomicDistanceForMNP = 1;
|
||||
|
||||
@Argument(fullName = "useSingleSample", shortName = "useSample", doc = "Only output genotypes for the single sample given; [default:use all samples]", required = false)
|
||||
protected String useSingleSample = null;
|
||||
|
||||
@Argument(fullName = "emitOnlyMergedRecords", shortName = "emitOnlyMerged", doc = "Only output records that resulted from merging; [default:false]", required = false)
|
||||
protected boolean emitOnlyMergedRecords = false;
|
||||
|
||||
@Argument(fullName = "disablePrintAltAlleleStats", shortName = "noAlleleStats", doc = "Should the print-out of alternate allele statistics be disabled?; [default:false]", required = false)
|
||||
protected boolean disablePrintAlternateAlleleStatistics = false;
|
||||
|
||||
|
|
@ -70,7 +76,7 @@ public class MergeSegregatingAlternateAllelesWalker extends RodWalker<Integer, I
|
|||
|
||||
private void initializeVcfWriter() {
|
||||
// false <-> don't take control of writer, since didn't create it:
|
||||
vcMergerWriter = new MergePhasedSegregatingAlternateAllelesVCFWriter(writer, getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, logger, false, !disablePrintAlternateAlleleStatistics);
|
||||
vcMergerWriter = new MergePhasedSegregatingAlternateAllelesVCFWriter(writer, getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, useSingleSample, emitOnlyMergedRecords, logger, false, !disablePrintAlternateAlleleStatistics);
|
||||
writer = null; // so it can't be accessed directly [i.e., not through vcMergerWriter]
|
||||
|
||||
// setup the header fields:
|
||||
|
|
@ -139,7 +145,7 @@ public class MergeSegregatingAlternateAllelesWalker extends RodWalker<Integer, I
|
|||
vcMergerWriter.close();
|
||||
|
||||
System.out.println("Number of successive pairs of records (any distance): " + vcMergerWriter.getNumRecordsAttemptToMerge());
|
||||
System.out.println("Number of potentially merged records (distance <= "+ maxGenomicDistanceForMNP + "): " + vcMergerWriter.getNumRecordsWithinDistance());
|
||||
System.out.println("Number of potentially merged records (distance <= " + maxGenomicDistanceForMNP + "): " + vcMergerWriter.getNumRecordsWithinDistance());
|
||||
System.out.println("Number of records merged [all samples are mergeable, some sample has a MNP of ALT alleles]: " + vcMergerWriter.getNumMergedRecords());
|
||||
System.out.println(vcMergerWriter.getAltAlleleStats());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -129,8 +129,8 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
// Wrapper VCFWriters will take ownership of inner writers iff: inner writer != origWriter [which wasn't created here]
|
||||
VCFWriter origWriter = writer;
|
||||
|
||||
if (enableMergePhasedSegregatingPolymorphismsToMNP) // false <-> don't track the statistics of alternate alleles being merged:
|
||||
writer = new MergePhasedSegregatingAlternateAllelesVCFWriter(writer, getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, logger, writer != origWriter, false);
|
||||
if (enableMergePhasedSegregatingPolymorphismsToMNP) // null <-> use ALL samples, false <-> emit all records, false <-> don't track the statistics of alternate alleles being merged:
|
||||
writer = new MergePhasedSegregatingAlternateAllelesVCFWriter(writer, getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, null, false, logger, writer != origWriter, false);
|
||||
|
||||
/* Due to discardIrrelevantPhasedSites(), the startDistance spanned by [partiallyPhasedSites.peek(), unphasedSiteQueue.peek()] is <= cacheWindow
|
||||
Due to processQueue(), the startDistance spanned by [unphasedSiteQueue.peek(), mostDownstreamLocusReached] is <= cacheWindow
|
||||
|
|
|
|||
Loading…
Reference in New Issue