Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
5ec737f008
|
|
@ -2,6 +2,7 @@ library(gsalib)
|
|||
library(ggplot2)
|
||||
library(gplots)
|
||||
library(tools)
|
||||
library(reshape)
|
||||
|
||||
#
|
||||
# Standard command line switch. Can we loaded interactively for development
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
|||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
|
|
@ -221,6 +222,10 @@ public class GenomeAnalysisEngine {
|
|||
if (this.getArguments().nonDeterministicRandomSeed)
|
||||
resetRandomGenerator(System.currentTimeMillis());
|
||||
|
||||
// TODO -- REMOVE ME WHEN WE STOP BCF testing
|
||||
if ( this.getArguments().USE_SLOW_GENOTYPES )
|
||||
GenotypeBuilder.MAKE_FAST_BY_DEFAULT = false;
|
||||
|
||||
// if the use specified an input BQSR recalibration table then enable on the fly recalibration
|
||||
if (this.getArguments().BQSR_RECAL_FILE != null)
|
||||
setBaseRecalibration(this.getArguments().BQSR_RECAL_FILE, this.getArguments().quantizationLevels);
|
||||
|
|
|
|||
|
|
@ -51,11 +51,6 @@ public class ReadProperties {
|
|||
return includeReadsWithDeletionAtLoci;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public boolean generateExtendedEvents() {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a list of the files acting as sources of reads.
|
||||
* @return A list of files storing reads data.
|
||||
|
|
|
|||
|
|
@ -336,6 +336,11 @@ public class GATKArgumentCollection {
|
|||
public boolean generateShadowBCF = false;
|
||||
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
|
||||
|
||||
@Argument(fullName="useSlowGenotypes",shortName = "useSlowGenotypes",doc="",required=false)
|
||||
@Hidden
|
||||
public boolean USE_SLOW_GENOTYPES = false;
|
||||
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
|
||||
|
||||
/**
|
||||
* The file pointed to by this argument must be a VCF file. The GATK will read in just the header of this file
|
||||
* and then use the INFO, FORMAT, and FILTER field values from this file to repair the header file of any other
|
||||
|
|
|
|||
|
|
@ -28,7 +28,6 @@ package org.broadinstitute.sting.gatk.contexts;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
|
|
@ -89,36 +88,9 @@ public class AlignmentContext implements HasGenomeLocation {
|
|||
* @return
|
||||
*/
|
||||
public ReadBackedPileup getBasePileup() {
|
||||
if(!hasBasePileup())
|
||||
throw new ReviewedStingException("No base pileup is available. Please check for a base pileup with hasBasePileup() before attempting to retrieve a pileup.");
|
||||
return basePileup;
|
||||
}
|
||||
|
||||
/** Returns extended event (indel) pileup over the current genomic location. May return null if this context keeps
|
||||
* only base pileup.
|
||||
* @return
|
||||
*/
|
||||
@Deprecated
|
||||
public ReadBackedExtendedEventPileup getExtendedEventPileup() {
|
||||
if(!hasExtendedEventPileup())
|
||||
throw new ReviewedStingException("No extended event pileup is present.");
|
||||
return (ReadBackedExtendedEventPileup)basePileup;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if this alignment context keeps base pileup over the current genomic location.
|
||||
* TODO: Syntax of AlignmentContext uses hasBasePileup() / hasExtendedEventPileup() as an enumeration mechanism. Change this to a more sensible interface.
|
||||
* @return
|
||||
*/
|
||||
public boolean hasBasePileup() { return !(basePileup instanceof ReadBackedExtendedEventPileup); }
|
||||
|
||||
/** Returns true if this alignment context keeps extended event (indel) pileup over the current genomic location.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Deprecated
|
||||
public boolean hasExtendedEventPileup() { return basePileup instanceof ReadBackedExtendedEventPileup; }
|
||||
|
||||
/**
|
||||
* Returns true if any reads have been filtered out of the pileup due to excess DoC.
|
||||
* @return True if reads have been filtered out. False otherwise.
|
||||
|
|
|
|||
|
|
@ -116,19 +116,15 @@ public class AlignmentContextUtils {
|
|||
*
|
||||
**/
|
||||
public static Map<SAMReadGroupRecord, AlignmentContext> splitContextByReadGroup(AlignmentContext context, Collection<SAMReadGroupRecord> readGroups) {
|
||||
if ( ! context.hasBasePileup() ) {
|
||||
return Collections.emptyMap();
|
||||
} else {
|
||||
HashMap<SAMReadGroupRecord, AlignmentContext> contexts = new HashMap<SAMReadGroupRecord, AlignmentContext>();
|
||||
HashMap<SAMReadGroupRecord, AlignmentContext> contexts = new HashMap<SAMReadGroupRecord, AlignmentContext>();
|
||||
|
||||
for (SAMReadGroupRecord rg : readGroups) {
|
||||
ReadBackedPileup rgPileup = context.getBasePileup().getPileupForReadGroup(rg.getReadGroupId());
|
||||
if ( rgPileup != null ) // there we some reads for RG
|
||||
contexts.put(rg, new AlignmentContext(context.getLocation(), rgPileup));
|
||||
}
|
||||
|
||||
return contexts;
|
||||
for (SAMReadGroupRecord rg : readGroups) {
|
||||
ReadBackedPileup rgPileup = context.getBasePileup().getPileupForReadGroup(rg.getReadGroupId());
|
||||
if ( rgPileup != null ) // there we some reads for RG
|
||||
contexts.put(rg, new AlignmentContext(context.getLocation(), rgPileup));
|
||||
}
|
||||
|
||||
return contexts;
|
||||
}
|
||||
|
||||
public static Map<String, AlignmentContext> splitContextBySampleName(ReadBackedPileup pileup) {
|
||||
|
|
@ -139,32 +135,16 @@ public class AlignmentContextUtils {
|
|||
public static AlignmentContext joinContexts(Collection<AlignmentContext> contexts) {
|
||||
// validation
|
||||
GenomeLoc loc = contexts.iterator().next().getLocation();
|
||||
boolean isExtended = contexts.iterator().next().basePileup instanceof ReadBackedExtendedEventPileup;
|
||||
for(AlignmentContext context: contexts) {
|
||||
if(!loc.equals(context.getLocation()))
|
||||
throw new ReviewedStingException("Illegal attempt to join contexts from different genomic locations");
|
||||
if(isExtended != (context.basePileup instanceof ReadBackedExtendedEventPileup))
|
||||
throw new ReviewedStingException("Illegal attempt to join simple and extended contexts");
|
||||
}
|
||||
|
||||
AlignmentContext jointContext;
|
||||
if(isExtended) {
|
||||
List<ExtendedEventPileupElement> pe = new ArrayList<ExtendedEventPileupElement>();
|
||||
for(AlignmentContext context: contexts) {
|
||||
for(PileupElement pileupElement: context.basePileup)
|
||||
pe.add((ExtendedEventPileupElement)pileupElement);
|
||||
}
|
||||
jointContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc,pe));
|
||||
List<PileupElement> pe = new ArrayList<PileupElement>();
|
||||
for(AlignmentContext context: contexts) {
|
||||
for(PileupElement pileupElement: context.basePileup)
|
||||
pe.add(pileupElement);
|
||||
}
|
||||
else {
|
||||
List<PileupElement> pe = new ArrayList<PileupElement>();
|
||||
for(AlignmentContext context: contexts) {
|
||||
for(PileupElement pileupElement: context.basePileup)
|
||||
pe.add(pileupElement);
|
||||
}
|
||||
jointContext = new AlignmentContext(loc, new ReadBackedPileupImpl(loc,pe));
|
||||
}
|
||||
|
||||
return jointContext;
|
||||
return new AlignmentContext(loc, new ReadBackedPileupImpl(loc,pe));
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* The basic downsampler API, with no reads-specific operations
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public interface Downsampler<T> {
|
||||
|
||||
/*
|
||||
* Submit one item to the downsampler for consideration . Some downsamplers will be able to determine
|
||||
* immediately whether the item survives the downsampling process, while others will need to see
|
||||
* more items before making that determination.
|
||||
*/
|
||||
public void submit( T item );
|
||||
|
||||
/*
|
||||
* Submit a collection of items to the downsampler for consideration.
|
||||
*/
|
||||
public void submit( Collection<T> items );
|
||||
|
||||
/*
|
||||
* Are there items that have survived the downsampling process waiting to be retrieved?
|
||||
*/
|
||||
public boolean hasDownsampledItems();
|
||||
|
||||
/*
|
||||
* Return (and remove) all items that have survived downsampling and are waiting to be retrieved.
|
||||
*/
|
||||
public List<T> consumeDownsampledItems();
|
||||
|
||||
/*
|
||||
* Are there items stored in this downsampler that it doesn't yet know whether they will
|
||||
* ultimately survive the downsampling process?
|
||||
*/
|
||||
public boolean hasPendingItems();
|
||||
|
||||
/*
|
||||
* Used to tell the downsampler that no more items will be submitted to it, and that it should
|
||||
* finalize any pending items.
|
||||
*/
|
||||
public void signalEndOfInput();
|
||||
|
||||
/*
|
||||
* Reset the downsampler to a clean state, devoid of any pending/downsampled items or tracked state
|
||||
* information.
|
||||
*/
|
||||
public void clear();
|
||||
}
|
||||
|
|
@ -0,0 +1,98 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
|
||||
/**
|
||||
* StingSAMIterator wrapper around our generic reads downsampler interface
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class DownsamplingReadsIterator implements StingSAMIterator {
|
||||
|
||||
private StingSAMIterator nestedSAMIterator;
|
||||
private ReadsDownsampler<SAMRecord> downsampler;
|
||||
private Collection<SAMRecord> downsampledReadsCache;
|
||||
private Iterator<SAMRecord> downsampledReadsCacheIterator;
|
||||
|
||||
public DownsamplingReadsIterator( StingSAMIterator iter, ReadsDownsampler<SAMRecord> downsampler ) {
|
||||
nestedSAMIterator = iter;
|
||||
this.downsampler = downsampler;
|
||||
fillDownsampledReadsCache();
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
if ( downsampledReadsCacheIterator.hasNext() ) {
|
||||
return true;
|
||||
}
|
||||
else if ( ! nestedSAMIterator.hasNext() || ! fillDownsampledReadsCache() ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public SAMRecord next() {
|
||||
if ( ! downsampledReadsCacheIterator.hasNext() && ! fillDownsampledReadsCache() ) {
|
||||
throw new NoSuchElementException("next() called when there are no more items");
|
||||
}
|
||||
|
||||
return downsampledReadsCacheIterator.next();
|
||||
}
|
||||
|
||||
private boolean fillDownsampledReadsCache() {
|
||||
while ( nestedSAMIterator.hasNext() && ! downsampler.hasDownsampledItems() ) {
|
||||
downsampler.submit(nestedSAMIterator.next());
|
||||
}
|
||||
|
||||
if ( ! nestedSAMIterator.hasNext() ) {
|
||||
downsampler.signalEndOfInput();
|
||||
}
|
||||
|
||||
downsampledReadsCache = downsampler.consumeDownsampledItems();
|
||||
downsampledReadsCacheIterator = downsampledReadsCache.iterator();
|
||||
|
||||
return downsampledReadsCacheIterator.hasNext();
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
||||
}
|
||||
|
||||
public void close() {
|
||||
nestedSAMIterator.close();
|
||||
}
|
||||
|
||||
public Iterator<SAMRecord> iterator() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,94 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Fractional Downsampler: selects a specified fraction of the reads for inclusion
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class FractionalDownsampler<T extends SAMRecord> implements ReadsDownsampler<T> {
|
||||
|
||||
private ArrayList<T> selectedReads;
|
||||
|
||||
private int cutoffForInclusion;
|
||||
|
||||
private static final int RANDOM_POOL_SIZE = 10000;
|
||||
|
||||
public FractionalDownsampler( double fraction ) {
|
||||
if ( fraction < 0.0 || fraction > 1.0 ) {
|
||||
throw new ReviewedStingException("Fraction of reads to include must be between 0.0 and 1.0, inclusive");
|
||||
}
|
||||
|
||||
cutoffForInclusion = (int)(fraction * RANDOM_POOL_SIZE);
|
||||
clear();
|
||||
}
|
||||
|
||||
public void submit( T newRead ) {
|
||||
if ( GenomeAnalysisEngine.getRandomGenerator().nextInt(10000) < cutoffForInclusion ) {
|
||||
selectedReads.add(newRead);
|
||||
}
|
||||
}
|
||||
|
||||
public void submit( Collection<T> newReads ) {
|
||||
for ( T read : newReads ) {
|
||||
submit(read);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean hasDownsampledItems() {
|
||||
return selectedReads.size() > 0;
|
||||
}
|
||||
|
||||
public List<T> consumeDownsampledItems() {
|
||||
List<T> downsampledItems = selectedReads;
|
||||
clear();
|
||||
return downsampledItems;
|
||||
}
|
||||
|
||||
public boolean hasPendingItems() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public void signalEndOfInput() {
|
||||
// NO-OP
|
||||
}
|
||||
|
||||
public void clear() {
|
||||
selectedReads = new ArrayList<T>();
|
||||
}
|
||||
|
||||
public boolean requiresCoordinateSortOrder() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,259 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Positional Downsampler: When eliminating reads, try to do so evenly based on the alignment start positions
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class PositionalDownsampler<T extends SAMRecord> implements ReadsDownsampler<T> {
|
||||
|
||||
private int targetCoverage;
|
||||
|
||||
private ReservoirDownsampler<T> reservoir;
|
||||
|
||||
private int currentContigIndex;
|
||||
|
||||
private int currentAlignmentStart;
|
||||
|
||||
private LinkedList<PositionalReadGrouping> pendingReads;
|
||||
|
||||
private ArrayList<T> finalizedReads;
|
||||
|
||||
public PositionalDownsampler ( int targetCoverage ) {
|
||||
this.targetCoverage = targetCoverage;
|
||||
clear();
|
||||
}
|
||||
|
||||
public void submit ( T newRead ) {
|
||||
if ( readIsPastCurrentPosition(newRead) ) {
|
||||
updateAndDownsamplePendingReads();
|
||||
}
|
||||
|
||||
reservoir.submit(newRead);
|
||||
updateCurrentPosition(newRead);
|
||||
}
|
||||
|
||||
public void submit ( Collection<T> newReads ) {
|
||||
for ( T read : newReads ) {
|
||||
submit(read);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean hasDownsampledItems() {
|
||||
return finalizedReads.size() > 0;
|
||||
}
|
||||
|
||||
public List<T> consumeDownsampledItems() {
|
||||
List<T> toReturn = finalizedReads;
|
||||
finalizedReads = new ArrayList<T>();
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
public boolean hasPendingItems() {
|
||||
return pendingReads.size() > 0;
|
||||
}
|
||||
|
||||
public void signalEndOfInput() {
|
||||
updateAndDownsamplePendingReads();
|
||||
|
||||
for ( PositionalReadGrouping group : pendingReads ) {
|
||||
group.finalizeAllActiveReads();
|
||||
finalizedReads.addAll(group.getFinalizedReads());
|
||||
}
|
||||
|
||||
pendingReads.clear();
|
||||
}
|
||||
|
||||
public void clear() {
|
||||
reservoir = new ReservoirDownsampler<T>(targetCoverage);
|
||||
pendingReads = new LinkedList<PositionalReadGrouping>();
|
||||
finalizedReads = new ArrayList<T>();
|
||||
}
|
||||
|
||||
public boolean requiresCoordinateSortOrder() {
|
||||
return true;
|
||||
}
|
||||
|
||||
private void updateCurrentPosition ( T read ) {
|
||||
currentContigIndex = read.getReferenceIndex();
|
||||
currentAlignmentStart = read.getAlignmentStart();
|
||||
}
|
||||
|
||||
private boolean readIsPastCurrentPosition ( T read ) {
|
||||
return read.getReferenceIndex() != currentContigIndex || read.getAlignmentStart() > currentAlignmentStart;
|
||||
}
|
||||
|
||||
private void updateAndDownsamplePendingReads() {
|
||||
finalizeOutOfScopeReads();
|
||||
|
||||
List<T> oldLocusReads = reservoir.consumeDownsampledItems();
|
||||
pendingReads.add(new PositionalReadGrouping(oldLocusReads, currentContigIndex, currentAlignmentStart));
|
||||
|
||||
downsampleOverlappingGroups();
|
||||
}
|
||||
|
||||
private void finalizeOutOfScopeReads() {
|
||||
Iterator<PositionalReadGrouping> iter = pendingReads.iterator();
|
||||
boolean noPrecedingUnfinalizedGroups = true;
|
||||
|
||||
while ( iter.hasNext() ) {
|
||||
PositionalReadGrouping currentGroup = iter.next();
|
||||
currentGroup.finalizeActiveReadsBeforePosition(currentContigIndex, currentAlignmentStart);
|
||||
|
||||
if ( currentGroup.isFinalized() && noPrecedingUnfinalizedGroups ) {
|
||||
iter.remove();
|
||||
finalizedReads.addAll(currentGroup.getFinalizedReads());
|
||||
}
|
||||
else {
|
||||
noPrecedingUnfinalizedGroups = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void downsampleOverlappingGroups() {
|
||||
int[] groupReadCounts = new int[pendingReads.size()];
|
||||
int totalCoverage = 0;
|
||||
int numActiveGroups = 0;
|
||||
int currentGroup = 0;
|
||||
|
||||
for ( PositionalReadGrouping group : pendingReads ) {
|
||||
groupReadCounts[currentGroup] = group.numActiveReads();
|
||||
totalCoverage += groupReadCounts[currentGroup];
|
||||
|
||||
if ( groupReadCounts[currentGroup] > 0 ) {
|
||||
numActiveGroups++;
|
||||
}
|
||||
|
||||
currentGroup++;
|
||||
}
|
||||
|
||||
if ( totalCoverage <= targetCoverage ) {
|
||||
return;
|
||||
}
|
||||
|
||||
int numReadsToRemove = Math.min(totalCoverage - targetCoverage, totalCoverage - numActiveGroups);
|
||||
currentGroup = 0;
|
||||
|
||||
while ( numReadsToRemove > 0 ) {
|
||||
if ( groupReadCounts[currentGroup] > 1 ) {
|
||||
groupReadCounts[currentGroup]--;
|
||||
numReadsToRemove--;
|
||||
}
|
||||
|
||||
currentGroup = (currentGroup + 1) % groupReadCounts.length;
|
||||
}
|
||||
|
||||
currentGroup = 0;
|
||||
for ( PositionalReadGrouping group : pendingReads ) {
|
||||
if ( ! group.isFinalized() ) {
|
||||
group.downsampleActiveReads(groupReadCounts[currentGroup]);
|
||||
}
|
||||
currentGroup++;
|
||||
}
|
||||
}
|
||||
|
||||
private class PositionalReadGrouping {
|
||||
private List<T> activeReads;
|
||||
private List<T> finalizedReads;
|
||||
|
||||
private int contig;
|
||||
private int alignmentStart;
|
||||
|
||||
public PositionalReadGrouping( Collection<T> reads, int contig, int alignmentStart ) {
|
||||
activeReads = new LinkedList<T>(reads);
|
||||
finalizedReads = new ArrayList<T>();
|
||||
this.contig = contig;
|
||||
this.alignmentStart = alignmentStart;
|
||||
}
|
||||
|
||||
public int numActiveReads() {
|
||||
return activeReads.size();
|
||||
}
|
||||
|
||||
public boolean isFinalized() {
|
||||
return activeReads.size() == 0;
|
||||
}
|
||||
|
||||
public List<T> getFinalizedReads() {
|
||||
return finalizedReads;
|
||||
}
|
||||
|
||||
public void finalizeActiveReadsBeforePosition( int contig, int position ) {
|
||||
if ( this.contig != contig ) {
|
||||
finalizeAllActiveReads();
|
||||
return;
|
||||
}
|
||||
|
||||
Iterator<T> iter = activeReads.iterator();
|
||||
|
||||
while ( iter.hasNext() ) {
|
||||
T read = iter.next();
|
||||
if ( read.getAlignmentEnd() < position ) {
|
||||
iter.remove();
|
||||
finalizedReads.add(read);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void finalizeAllActiveReads() {
|
||||
finalizedReads.addAll(activeReads);
|
||||
activeReads.clear();
|
||||
}
|
||||
|
||||
public void downsampleActiveReads( int numReadsToKeep ) {
|
||||
if ( numReadsToKeep > activeReads.size() || numReadsToKeep < 0 ) {
|
||||
throw new ReviewedStingException(String.format("Cannot retain %d reads out of %d total reads",
|
||||
numReadsToKeep, activeReads.size()));
|
||||
}
|
||||
|
||||
BitSet itemsToKeep = new BitSet(activeReads.size());
|
||||
for ( Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(activeReads.size(), numReadsToKeep) ) {
|
||||
itemsToKeep.set(selectedIndex);
|
||||
}
|
||||
|
||||
int currentIndex = 0;
|
||||
Iterator<T> iter = activeReads.iterator();
|
||||
|
||||
while ( iter.hasNext() ) {
|
||||
T read = iter.next();
|
||||
|
||||
if ( ! itemsToKeep.get(currentIndex) ) {
|
||||
iter.remove();
|
||||
}
|
||||
|
||||
currentIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* An extension of the basic downsampler API with reads-specific operations
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public interface ReadsDownsampler<T extends SAMRecord> extends Downsampler<T> {
|
||||
|
||||
/*
|
||||
* Does this downsampler require that reads be fed to it in coordinate order?
|
||||
*/
|
||||
public boolean requiresCoordinateSortOrder();
|
||||
}
|
||||
|
|
@ -0,0 +1,106 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Reservoir Downsampler: Selects n reads out of a stream whose size is not known in advance, with
|
||||
* every read in the stream having an equal chance of being selected for inclusion.
|
||||
*
|
||||
* An implementation of "Algorithm R" from the paper "Random Sampling with a Reservoir" (Jeffrey Scott Vitter, 1985)
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class ReservoirDownsampler<T extends SAMRecord> implements ReadsDownsampler<T> {
|
||||
|
||||
private ArrayList<T> reservoir;
|
||||
|
||||
private int targetSampleSize;
|
||||
|
||||
private int totalReadsSeen;
|
||||
|
||||
public ReservoirDownsampler ( int targetSampleSize ) {
|
||||
if ( targetSampleSize <= 0 ) {
|
||||
throw new ReviewedStingException("Cannot do reservoir downsampling with a sample size <= 0");
|
||||
}
|
||||
|
||||
this.targetSampleSize = targetSampleSize;
|
||||
clear();
|
||||
}
|
||||
|
||||
public void submit ( T newRead ) {
|
||||
totalReadsSeen++;
|
||||
|
||||
if ( totalReadsSeen <= targetSampleSize ) {
|
||||
reservoir.add(newRead);
|
||||
}
|
||||
else {
|
||||
int randomSlot = GenomeAnalysisEngine.getRandomGenerator().nextInt(totalReadsSeen);
|
||||
if ( randomSlot < targetSampleSize ) {
|
||||
reservoir.set(randomSlot, newRead);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void submit ( Collection<T> newReads ) {
|
||||
for ( T read : newReads ) {
|
||||
submit(read);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean hasDownsampledItems() {
|
||||
return reservoir.size() > 0;
|
||||
}
|
||||
|
||||
public List<T> consumeDownsampledItems() {
|
||||
List<T> downsampledItems = reservoir;
|
||||
clear();
|
||||
return downsampledItems;
|
||||
}
|
||||
|
||||
public boolean hasPendingItems() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public void signalEndOfInput() {
|
||||
// NO-OP
|
||||
}
|
||||
|
||||
public void clear() {
|
||||
reservoir = new ArrayList<T>(targetSampleSize);
|
||||
totalReadsSeen = 0;
|
||||
}
|
||||
|
||||
public boolean requiresCoordinateSortOrder() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -38,10 +38,7 @@ import org.broadinstitute.sting.utils.variantcontext.writer.Options;
|
|||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.io.PrintStream;
|
||||
import java.io.*;
|
||||
import java.util.Arrays;
|
||||
import java.util.EnumSet;
|
||||
import java.util.List;
|
||||
|
|
@ -58,6 +55,8 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
|
|||
*/
|
||||
private static Logger logger = Logger.getLogger(VariantContextWriterStorage.class);
|
||||
|
||||
private final static int BUFFER_SIZE = 1048576;
|
||||
|
||||
protected final File file;
|
||||
protected OutputStream stream;
|
||||
protected final VariantContextWriter writer;
|
||||
|
|
@ -93,7 +92,7 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
|
|||
if ( stub.isCompressed() )
|
||||
stream = new BlockCompressedOutputStream(file);
|
||||
else
|
||||
stream = new PrintStream(file);
|
||||
stream = new PrintStream(new BufferedOutputStream(new FileOutputStream(file), BUFFER_SIZE));
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new UserException.CouldNotCreateOutputFile(file, "Unable to open target output stream", ex);
|
||||
|
|
|
|||
|
|
@ -51,6 +51,8 @@ import java.util.List;
|
|||
* @version 0.1
|
||||
*/
|
||||
public class VariantContextWriterStub implements Stub<VariantContextWriter>, VariantContextWriter {
|
||||
public final static boolean UPDATE_CONTIG_HEADERS = true;
|
||||
|
||||
/**
|
||||
* The engine, central to the GATK's processing.
|
||||
*/
|
||||
|
|
@ -215,7 +217,8 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
|
|||
vcfHeader.addMetaDataLine(commandLineArgHeaderLine);
|
||||
}
|
||||
|
||||
//vcfHeader = VCFUtils.withUpdatedContigs(vcfHeader, engine);
|
||||
if ( UPDATE_CONTIG_HEADERS )
|
||||
vcfHeader = VCFUtils.withUpdatedContigs(vcfHeader, engine);
|
||||
}
|
||||
|
||||
outputTracker.getStorage(this).writeHeader(vcfHeader);
|
||||
|
|
|
|||
|
|
@ -40,9 +40,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
|||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.ReservoirDownsampler;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileupImpl;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
@ -63,7 +61,6 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
// member fields
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
private boolean hasExtendedEvents = false; // will be set to true if at least one read had an indel right before the current position
|
||||
|
||||
/**
|
||||
* Used to create new GenomeLocs.
|
||||
|
|
@ -92,26 +89,10 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
// stepForwardOnGenome(). The next call to stepForwardOnGenome() will clear that memory (as we remember only extended
|
||||
// events immediately preceding the current reference base).
|
||||
|
||||
boolean generateExtendedEvents = true; // should we generate an additional, special pile for indels between the ref bases?
|
||||
// the only purpose of this flag is to shield away a few additional lines of code
|
||||
// when extended piles are not needed, it may not be even worth it...
|
||||
|
||||
byte[] insertedBases = null; // remember full inserted sequence if we are generating piles of extended events (indels)
|
||||
int eventLength = -1; // will be set to the length of insertion/deletion if we are generating piles of extended events
|
||||
byte eventDelayedFlag = 0; // will be set to non-0 if there was an event (indel) right before the
|
||||
// current base on the ref. We use a counter-like variable here since clearing the indel event is
|
||||
// delayed by one base, so we need to remember how long ago we have seen the actual event
|
||||
|
||||
int eventStart = -1; // where on the read the extended event starts (i.e. the last position on the read prior to the
|
||||
// event, or -1 if alignment starts with an insertion); this one is easy to recompute on the fly,
|
||||
// we cache it here mainly for convenience
|
||||
|
||||
|
||||
public SAMRecordState(SAMRecord read, boolean extended) {
|
||||
public SAMRecordState(SAMRecord read) {
|
||||
this.read = read;
|
||||
cigar = read.getCigar();
|
||||
nCigarElements = cigar.numCigarElements();
|
||||
generateExtendedEvents = extended;
|
||||
|
||||
//System.out.printf("Creating a SAMRecordState: %s%n", this);
|
||||
}
|
||||
|
|
@ -150,27 +131,6 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
return curElement.getOperator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if we just stepped over insertion/into a deletion prior to the last return from stepForwardOnGenome.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public boolean hadIndel() {
|
||||
return (eventLength > 0);
|
||||
}
|
||||
|
||||
public int getEventLength() {
|
||||
return eventLength;
|
||||
}
|
||||
|
||||
public byte[] getEventBases() {
|
||||
return insertedBases;
|
||||
}
|
||||
|
||||
public int getReadEventStartOffset() {
|
||||
return eventStart;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s ro=%d go=%d co=%d cec=%d %s", read.getReadName(), readOffset, genomeOffset, cigarOffset, cigarElementCounter, curElement);
|
||||
}
|
||||
|
|
@ -208,19 +168,6 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
genomeOffset++; // extended events need that. Logically, it's legal to advance the genomic offset here:
|
||||
// we do step forward on the ref, and by returning null we also indicate that we are past the read end.
|
||||
|
||||
if (generateExtendedEvents && eventDelayedFlag > 0) {
|
||||
|
||||
// if we had an indel right before the read ended (i.e. insertion was the last cigar element),
|
||||
// we keep it until next reference base; then we discard it and this will allow the LocusIterator to
|
||||
// finally discard this read
|
||||
eventDelayedFlag--;
|
||||
if (eventDelayedFlag == 0) {
|
||||
eventLength = -1; // reset event when we are past it
|
||||
insertedBases = null;
|
||||
eventStart = -1;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -232,17 +179,6 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
cigarElementCounter = curElement.getLength();
|
||||
break;
|
||||
case I: // insertion w.r.t. the reference
|
||||
if (generateExtendedEvents) {
|
||||
// we see insertions only once, when we step right onto them; the position on the read is scrolled
|
||||
// past the insertion right after that
|
||||
if (eventDelayedFlag > 1)
|
||||
throw new UserException.MalformedBAM(read, String.format("Adjacent I/D events in read %s -- cigar: %s. This is an indication of a malformed file, but the SAM spec allows reads with adjacent insertion/deletion. If you are sure you want to use this read, re-run your analysis with the extra option: -rf BadCigar", read.getReadName(), read.getCigarString()));
|
||||
insertedBases = Arrays.copyOfRange(read.getReadBases(), readOffset + 1, readOffset + 1 + curElement.getLength());
|
||||
eventLength = curElement.getLength();
|
||||
eventStart = readOffset;
|
||||
eventDelayedFlag = 2; // insertion causes re-entry into stepForwardOnGenome, so we set the delay to 2
|
||||
// System.out.println("Inserted "+(new String (insertedBases)) +" after "+readOffset);
|
||||
} // continue onto the 'S' case !
|
||||
case S: // soft clip
|
||||
cigarElementCounter = curElement.getLength();
|
||||
readOffset += curElement.getLength();
|
||||
|
|
@ -250,19 +186,6 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
case D: // deletion w.r.t. the reference
|
||||
if (readOffset < 0) // we don't want reads starting with deletion, this is a malformed cigar string
|
||||
throw new UserException.MalformedBAM(read, "Read starting with deletion. Cigar: " + read.getCigarString() + ". This is an indication of a malformed file, but the SAM spec allows reads starting in deletion. If you are sure you want to use this read, re-run your analysis with the extra option: -rf BadCigar");
|
||||
if (generateExtendedEvents) {
|
||||
if (cigarElementCounter == 1) {
|
||||
// generate an extended event only if we just stepped into the deletion (i.e. don't
|
||||
// generate the event at every deleted position on the ref, that's what cigarElementCounter==1 is for!)
|
||||
if (eventDelayedFlag > 1)
|
||||
throw new UserException.MalformedBAM(read, String.format("Adjacent I/D events in read %s -- cigar: %s. This is an indication of a malformed file, but the SAM spec allows reads with adjacent insertion/deletion. If you are sure you want to use this read, re-run your analysis with the extra option: -rf BadCigar", read.getReadName(), read.getCigarString()));
|
||||
eventLength = curElement.getLength();
|
||||
eventDelayedFlag = 2; // deletion on the ref causes an immediate return, so we have to delay by 1 only
|
||||
eventStart = readOffset;
|
||||
insertedBases = null;
|
||||
// System.out.println("Deleted "+eventLength +" bases after "+readOffset);
|
||||
}
|
||||
}
|
||||
// should be the same as N case
|
||||
genomeOffset++;
|
||||
done = true;
|
||||
|
|
@ -280,21 +203,6 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
throw new IllegalStateException("Case statement didn't deal with cigar op: " + curElement.getOperator());
|
||||
}
|
||||
|
||||
if (generateExtendedEvents) {
|
||||
if (eventDelayedFlag > 0 && done) {
|
||||
// if we did make a successful step on the ref, decrement delayed flag. If, upon the decrementing the,
|
||||
// the flag is 1, we are standing on the reference base right after the indel (so we have to keep it).
|
||||
// Otherwise, we are away from the previous indel and have to clear our memories...
|
||||
eventDelayedFlag--; // when we notice an indel, we set delayed flag to 2, so now
|
||||
// if eventDelayedFlag == 1, an indel occured right before the current base
|
||||
if (eventDelayedFlag == 0) {
|
||||
eventLength = -1; // reset event when we are past it
|
||||
insertedBases = null;
|
||||
eventStart = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return done ? curElement.getOperator() : stepForwardOnGenome();
|
||||
}
|
||||
}
|
||||
|
|
@ -374,147 +282,69 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
// this call will set hasExtendedEvents to true if it picks up a read with indel right before the current position on the ref:
|
||||
readStates.collectPendingReads();
|
||||
|
||||
int size = 0;
|
||||
int nDeletions = 0;
|
||||
int nInsertions = 0;
|
||||
int nMQ0Reads = 0;
|
||||
final GenomeLoc location = getLocation();
|
||||
final Map<String, ReadBackedPileupImpl> fullPileup = new HashMap<String, ReadBackedPileupImpl>();
|
||||
boolean hasBeenSampled = false;
|
||||
for (final String sample : samples) {
|
||||
final Iterator<SAMRecordState> iterator = readStates.iterator(sample);
|
||||
final List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
|
||||
hasBeenSampled |= location.getStart() <= readStates.getDownsamplingExtent(sample);
|
||||
|
||||
int size = 0; // number of elements in this sample's pileup
|
||||
int nDeletions = 0; // number of deletions in this sample's pileup
|
||||
int nMQ0Reads = 0; // number of MQ0 reads in this sample's pileup (warning: current implementation includes N bases that are MQ0)
|
||||
|
||||
// if extended events are requested, and if previous traversal step brought us over an indel in
|
||||
// at least one read, we emit extended pileup (making sure that it is associated with the previous base,
|
||||
// i.e. the one right *before* the indel) and do NOT shift the current position on the ref.
|
||||
// In this case, the subsequent call to next() will emit the normal pileup at the current base
|
||||
// and shift the position.
|
||||
if (readInfo.generateExtendedEvents() && hasExtendedEvents) {
|
||||
Map<String, ReadBackedExtendedEventPileupImpl> fullExtendedEventPileup = new HashMap<String, ReadBackedExtendedEventPileupImpl>();
|
||||
while (iterator.hasNext()) {
|
||||
final SAMRecordState state = iterator.next(); // state object with the read/offset information
|
||||
final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read
|
||||
final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator
|
||||
final CigarElement nextElement = state.peekForwardOnGenome(); // next cigar element
|
||||
final CigarElement lastElement = state.peekBackwardOnGenome(); // last cigar element
|
||||
final CigarOperator nextOp = nextElement.getOperator(); // next cigar operator
|
||||
final CigarOperator lastOp = lastElement.getOperator(); // last cigar operator
|
||||
final int readOffset = state.getReadOffset(); // the base offset on this read
|
||||
|
||||
// get current location on the reference and decrement it by 1: the indels we just stepped over
|
||||
// are associated with the *previous* reference base
|
||||
GenomeLoc loc = genomeLocParser.incPos(getLocation(), -1);
|
||||
final boolean isBeforeDeletion = nextOp == CigarOperator.DELETION;
|
||||
final boolean isAfterDeletion = lastOp == CigarOperator.DELETION;
|
||||
final boolean isBeforeInsertion = nextOp == CigarOperator.INSERTION;
|
||||
final boolean isAfterInsertion = lastOp == CigarOperator.INSERTION;
|
||||
final boolean isNextToSoftClip = nextOp == CigarOperator.S || (state.getGenomeOffset() == 0 && read.getSoftStart() != read.getAlignmentStart());
|
||||
|
||||
boolean hasBeenSampled = false;
|
||||
for (final String sample : samples) {
|
||||
Iterator<SAMRecordState> iterator = readStates.iterator(sample);
|
||||
List<ExtendedEventPileupElement> indelPile = new ArrayList<ExtendedEventPileupElement>(readStates.size(sample));
|
||||
hasBeenSampled |= loc.getStart() <= readStates.getDownsamplingExtent(sample);
|
||||
int nextElementLength = nextElement.getLength();
|
||||
|
||||
size = 0;
|
||||
nDeletions = 0;
|
||||
nInsertions = 0;
|
||||
nMQ0Reads = 0;
|
||||
int maxDeletionLength = 0;
|
||||
if (op == CigarOperator.N) // N's are never added to any pileup
|
||||
continue;
|
||||
|
||||
while (iterator.hasNext()) {
|
||||
final SAMRecordState state = iterator.next();
|
||||
final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read
|
||||
final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator
|
||||
final int readOffset = state.getReadOffset(); // the base offset on this read
|
||||
final int eventStartOffset = state.getReadEventStartOffset(); // this will be -1 if base is not a deletion, or if base is the first deletion in the event. Otherwise, it will give the last base before the deletion began.
|
||||
final int eventLength = state.getEventLength();
|
||||
|
||||
if (op == CigarOperator.N) // N's are never added to any pileup
|
||||
continue;
|
||||
|
||||
if (state.hadIndel()) { // this read has an indel associated with the previous position on the ref
|
||||
if (op == CigarOperator.D) {
|
||||
if (readInfo.includeReadsWithDeletionAtLoci()) { // only add deletions to the pileup if we are authorized to do so
|
||||
pile.add(new PileupElement(read, readOffset, true, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, null, nextOp == CigarOperator.D ? nextElementLength : -1));
|
||||
size++;
|
||||
ExtendedEventPileupElement pileupElement;
|
||||
if (state.getEventBases() == null) { // Deletion event
|
||||
nDeletions++;
|
||||
maxDeletionLength = Math.max(maxDeletionLength, state.getEventLength());
|
||||
pileupElement = new ExtendedEventPileupElement(read, eventStartOffset, eventLength);
|
||||
}
|
||||
else { // Insertion event
|
||||
nInsertions++;
|
||||
pileupElement = new ExtendedEventPileupElement(read, eventStartOffset, eventLength, state.getEventBases());
|
||||
}
|
||||
if (read.getMappingQuality() == 0)
|
||||
nMQ0Reads++;
|
||||
|
||||
indelPile.add(pileupElement);
|
||||
}
|
||||
|
||||
// this read has no indel so add it to the pileup as a NOEVENT:
|
||||
// a deletion that didn't start here (therefore, not an extended event)
|
||||
// we add (mis)matches as no events.
|
||||
else if (op != CigarOperator.D || readInfo.includeReadsWithDeletionAtLoci()) {
|
||||
size++;
|
||||
indelPile.add(new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), readOffset));
|
||||
nDeletions++;
|
||||
if (read.getMappingQuality() == 0)
|
||||
nMQ0Reads++;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (!filterBaseInRead(read, location.getStart())) {
|
||||
String insertedBaseString = null;
|
||||
if (nextOp == CigarOperator.I)
|
||||
insertedBaseString = new String(Arrays.copyOfRange(read.getReadBases(), readOffset + 1, readOffset + 1 + nextElement.getLength()));
|
||||
|
||||
if (indelPile.size() != 0)
|
||||
fullExtendedEventPileup.put(sample, new ReadBackedExtendedEventPileupImpl(loc, indelPile, size, maxDeletionLength, nInsertions, nDeletions, nMQ0Reads));
|
||||
}
|
||||
hasExtendedEvents = false; // we are done with extended events prior to current ref base
|
||||
nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc, fullExtendedEventPileup), hasBeenSampled);
|
||||
}
|
||||
else { // this is a regular event pileup (not extended)
|
||||
final GenomeLoc location = getLocation();
|
||||
final Map<String, ReadBackedPileupImpl> fullPileup = new HashMap<String, ReadBackedPileupImpl>();
|
||||
boolean hasBeenSampled = false;
|
||||
for (final String sample : samples) {
|
||||
final Iterator<SAMRecordState> iterator = readStates.iterator(sample);
|
||||
final List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
|
||||
hasBeenSampled |= location.getStart() <= readStates.getDownsamplingExtent(sample);
|
||||
|
||||
size = 0; // number of elements in this sample's pileup
|
||||
nDeletions = 0; // number of deletions in this sample's pileup
|
||||
nMQ0Reads = 0; // number of MQ0 reads in this sample's pileup (warning: current implementation includes N bases that are MQ0)
|
||||
|
||||
while (iterator.hasNext()) {
|
||||
final SAMRecordState state = iterator.next(); // state object with the read/offset information
|
||||
final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read
|
||||
final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator
|
||||
final CigarElement nextElement = state.peekForwardOnGenome(); // next cigar element
|
||||
final CigarElement lastElement = state.peekBackwardOnGenome(); // last cigar element
|
||||
final CigarOperator nextOp = nextElement.getOperator(); // next cigar operator
|
||||
final CigarOperator lastOp = lastElement.getOperator(); // last cigar operator
|
||||
final int readOffset = state.getReadOffset(); // the base offset on this read
|
||||
|
||||
final boolean isBeforeDeletion = nextOp == CigarOperator.DELETION;
|
||||
final boolean isAfterDeletion = lastOp == CigarOperator.DELETION;
|
||||
final boolean isBeforeInsertion = nextOp == CigarOperator.INSERTION;
|
||||
final boolean isAfterInsertion = lastOp == CigarOperator.INSERTION;
|
||||
final boolean isNextToSoftClip = nextOp == CigarOperator.S || (state.getGenomeOffset() == 0 && read.getSoftStart() != read.getAlignmentStart());
|
||||
|
||||
int nextElementLength = nextElement.getLength();
|
||||
|
||||
if (op == CigarOperator.N) // N's are never added to any pileup
|
||||
continue;
|
||||
|
||||
if (op == CigarOperator.D) {
|
||||
if (readInfo.includeReadsWithDeletionAtLoci()) { // only add deletions to the pileup if we are authorized to do so
|
||||
pile.add(new PileupElement(read, readOffset, true, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, null, nextOp == CigarOperator.D ? nextElementLength : -1));
|
||||
size++;
|
||||
nDeletions++;
|
||||
if (read.getMappingQuality() == 0)
|
||||
nMQ0Reads++;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (!filterBaseInRead(read, location.getStart())) {
|
||||
String insertedBaseString = null;
|
||||
if (nextOp == CigarOperator.I)
|
||||
insertedBaseString = new String(Arrays.copyOfRange(read.getReadBases(), readOffset + 1, readOffset + 1 + nextElement.getLength()));
|
||||
|
||||
pile.add(new PileupElement(read, readOffset, false, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, insertedBaseString, nextElementLength));
|
||||
size++;
|
||||
if (read.getMappingQuality() == 0)
|
||||
nMQ0Reads++;
|
||||
}
|
||||
pile.add(new PileupElement(read, readOffset, false, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, insertedBaseString, nextElementLength));
|
||||
size++;
|
||||
if (read.getMappingQuality() == 0)
|
||||
nMQ0Reads++;
|
||||
}
|
||||
}
|
||||
|
||||
if (pile.size() != 0) // if this pileup added at least one base, add it to the full pileup
|
||||
fullPileup.put(sample, new ReadBackedPileupImpl(location, pile, size, nDeletions, nMQ0Reads));
|
||||
}
|
||||
|
||||
updateReadStates(); // critical - must be called after we get the current state offsets and location
|
||||
if (!fullPileup.isEmpty()) // if we got reads with non-D/N over the current position, we are done
|
||||
nextAlignmentContext = new AlignmentContext(location, new ReadBackedPileupImpl(location, fullPileup), hasBeenSampled);
|
||||
if (pile.size() != 0) // if this pileup added at least one base, add it to the full pileup
|
||||
fullPileup.put(sample, new ReadBackedPileupImpl(location, pile, size, nDeletions, nMQ0Reads));
|
||||
}
|
||||
|
||||
updateReadStates(); // critical - must be called after we get the current state offsets and location
|
||||
if (!fullPileup.isEmpty()) // if we got reads with non-D/N over the current position, we are done
|
||||
nextAlignmentContext = new AlignmentContext(location, new ReadBackedPileupImpl(location, fullPileup), hasBeenSampled);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -546,9 +376,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
while (it.hasNext()) {
|
||||
SAMRecordState state = it.next();
|
||||
CigarOperator op = state.stepForwardOnGenome();
|
||||
if (state.hadIndel() && readInfo.generateExtendedEvents())
|
||||
hasExtendedEvents = true;
|
||||
else if (op == null) {
|
||||
if (op == null) {
|
||||
// we discard the read only when we are past its end AND indel at the end of the read (if any) was
|
||||
// already processed. Keeping the read state that retunred null upon stepForwardOnGenome() is safe
|
||||
// as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
|
||||
|
|
@ -757,12 +585,9 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
int readCount = 0;
|
||||
for (SAMRecord read : reads) {
|
||||
if (readCount < maxReads) {
|
||||
SAMRecordState state = new SAMRecordState(read, readInfo.generateExtendedEvents());
|
||||
SAMRecordState state = new SAMRecordState(read);
|
||||
state.stepForwardOnGenome();
|
||||
newReadStates.add(state);
|
||||
// TODO: What if we downsample the extended events away?
|
||||
if (state.hadIndel())
|
||||
hasExtendedEvents = true;
|
||||
readCount++;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -251,7 +251,7 @@ public class VariantContextAdaptors {
|
|||
|
||||
Map<String, Object> attributes = new HashMap<String, Object>();
|
||||
Collection<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
Genotype call = new Genotype(name, genotypeAlleles);
|
||||
Genotype call = GenotypeBuilder.create(name, genotypeAlleles);
|
||||
|
||||
// add the call to the genotype list, and then use this list to create a VariantContext
|
||||
genotypes.add(call);
|
||||
|
|
@ -344,7 +344,7 @@ public class VariantContextAdaptors {
|
|||
alleles.add(allele2);
|
||||
}
|
||||
|
||||
Genotype g = new Genotype(samples[i], myAlleles);
|
||||
Genotype g = GenotypeBuilder.create(samples[i], myAlleles);
|
||||
genotypes.add(g);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -53,19 +53,6 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
|
|||
|
||||
dataProvider.getShard().getReadMetrics().incrementNumIterations();
|
||||
|
||||
if ( locus.hasExtendedEventPileup() ) {
|
||||
// if the alignment context we received holds an "extended" pileup (i.e. pileup of insertions/deletions
|
||||
// associated with the current site), we need to update the location. The updated location still starts
|
||||
// at the current genomic position, but it has to span the length of the longest deletion (if any).
|
||||
location = engine.getGenomeLocParser().setStop(location,location.getStop()+locus.getExtendedEventPileup().getMaxDeletionLength());
|
||||
|
||||
// it is possible that the new expanded location spans the current shard boundary; the next method ensures
|
||||
// that when it is the case, the reference sequence held by the ReferenceView will be reloaded so that
|
||||
// the view has all the bases we are gonna need. If the location fits within the current view bounds,
|
||||
// the next call will not do anything to the view:
|
||||
referenceView.expandBoundsToAccomodateLoc(location);
|
||||
}
|
||||
|
||||
// create reference context. Note that if we have a pileup of "extended events", the context will
|
||||
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
|
||||
ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||
|
|
|
|||
|
|
@ -34,9 +34,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -79,13 +77,11 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
|
|||
|
||||
String rods = getReferenceOrderedData( tracker );
|
||||
|
||||
if ( context.hasBasePileup() ) {
|
||||
ReadBackedPileup basePileup = context.getBasePileup();
|
||||
out.printf("%s %s", basePileup.getPileupString((char)ref.getBase()), rods);
|
||||
if ( SHOW_VERBOSE )
|
||||
out.printf(" %s", createVerboseOutput(basePileup));
|
||||
out.println();
|
||||
}
|
||||
ReadBackedPileup basePileup = context.getBasePileup();
|
||||
out.printf("%s %s", basePileup.getPileupString((char)ref.getBase()), rods);
|
||||
if ( SHOW_VERBOSE )
|
||||
out.printf(" %s", createVerboseOutput(basePileup));
|
||||
out.println();
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,11 +30,9 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||
|
|
@ -72,7 +70,7 @@ public class AlleleBalance extends InfoFieldAnnotation {
|
|||
// we care only about het calls
|
||||
|
||||
AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||
if ( context == null || !context.hasBasePileup() )
|
||||
if ( context == null )
|
||||
continue;
|
||||
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
|
|||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -21,15 +22,12 @@ import java.util.*;
|
|||
*/
|
||||
public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
||||
public void annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, final GenotypeBuilder gb) {
|
||||
Double ratio = annotateSNP(stratifiedContext, vc, g);
|
||||
if (ratio == null)
|
||||
return null;
|
||||
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%.2f", ratio.doubleValue()));
|
||||
return map;
|
||||
return;
|
||||
|
||||
gb.attribute(getKeyNames().get(0), Double.valueOf(String.format("%.2f", ratio.doubleValue())));
|
||||
}
|
||||
|
||||
private Double annotateSNP(AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
||||
|
|
@ -51,9 +49,6 @@ public class AlleleBalanceBySample extends GenotypeAnnotation implements Experim
|
|||
if ( altAlleles.size() == 0 )
|
||||
return null;
|
||||
|
||||
if ( !stratifiedContext.hasBasePileup() )
|
||||
return null;
|
||||
|
||||
final String bases = new String(stratifiedContext.getBasePileup().getBases());
|
||||
if ( bases.length() == 0 )
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -59,8 +59,6 @@ public class BaseCounts extends InfoFieldAnnotation {
|
|||
int[] counts = new int[4];
|
||||
|
||||
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
|
||||
if ( !sample.getValue().hasBasePileup() )
|
||||
continue;
|
||||
for (byte base : sample.getValue().getBasePileup().getBases() ) {
|
||||
int index = BaseUtils.simpleBaseToBaseIndex(base);
|
||||
if ( index != -1 )
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnno
|
|||
|
||||
int depth = 0;
|
||||
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() )
|
||||
depth += sample.getValue().hasBasePileup() ? sample.getValue().getBasePileup().depthOfCoverage() : 0;
|
||||
depth += sample.getValue().getBasePileup().depthOfCoverage();
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%d", depth));
|
||||
return map;
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
|
|
@ -14,6 +14,7 @@ import org.broadinstitute.sting.utils.pileup.PileupElement;
|
|||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -44,22 +45,17 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
|||
|
||||
private static final String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
||||
public void annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, GenotypeBuilder gb) {
|
||||
if ( g == null || !g.isCalled() )
|
||||
return null;
|
||||
return;
|
||||
|
||||
if ( vc.isSNP() )
|
||||
return annotateSNP(stratifiedContext, vc);
|
||||
if ( vc.isIndel() )
|
||||
return annotateIndel(stratifiedContext, vc);
|
||||
|
||||
return null;
|
||||
annotateSNP(stratifiedContext, vc, gb);
|
||||
else if ( vc.isIndel() )
|
||||
annotateIndel(stratifiedContext, vc, gb);
|
||||
}
|
||||
|
||||
private Map<String,Object> annotateSNP(AlignmentContext stratifiedContext, VariantContext vc) {
|
||||
|
||||
if ( ! stratifiedContext.hasBasePileup() )
|
||||
return null;
|
||||
private void annotateSNP(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) {
|
||||
|
||||
HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
|
||||
for ( Allele allele : vc.getAlleles() )
|
||||
|
|
@ -72,22 +68,18 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
|||
}
|
||||
|
||||
// we need to add counts in the correct order
|
||||
Integer[] counts = new Integer[alleleCounts.size()];
|
||||
int[] counts = new int[alleleCounts.size()];
|
||||
counts[0] = alleleCounts.get(vc.getReference().getBases()[0]);
|
||||
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
|
||||
counts[i+1] = alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]);
|
||||
|
||||
return toADAnnotation(counts);
|
||||
gb.AD(counts);
|
||||
}
|
||||
|
||||
private Map<String,Object> annotateIndel(AlignmentContext stratifiedContext, VariantContext vc) {
|
||||
|
||||
if ( ! stratifiedContext.hasBasePileup() )
|
||||
return null;
|
||||
|
||||
private void annotateIndel(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) {
|
||||
ReadBackedPileup pileup = stratifiedContext.getBasePileup();
|
||||
if ( pileup == null )
|
||||
return null;
|
||||
return;
|
||||
|
||||
final HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>();
|
||||
alleleCounts.put(REF_ALLELE, 0);
|
||||
|
|
@ -123,16 +115,12 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
|||
}
|
||||
}
|
||||
|
||||
Integer[] counts = new Integer[alleleCounts.size()];
|
||||
int[] counts = new int[alleleCounts.size()];
|
||||
counts[0] = alleleCounts.get(REF_ALLELE);
|
||||
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
|
||||
counts[i+1] = alleleCounts.get( getAlleleRepresentation(vc.getAlternateAllele(i)) );
|
||||
|
||||
return toADAnnotation(counts);
|
||||
}
|
||||
|
||||
private final Map<String, Object> toADAnnotation(final Integer[] counts) {
|
||||
return Collections.singletonMap(getKeyNames().get(0), (Object)Arrays.asList(counts));
|
||||
gb.AD(counts);
|
||||
}
|
||||
|
||||
private String getAlleleRepresentation(Allele allele) {
|
||||
|
|
@ -145,7 +133,7 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
|||
}
|
||||
|
||||
// public String getIndelBases()
|
||||
public List<String> getKeyNames() { return Arrays.asList("AD"); }
|
||||
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.GENOTYPE_ALLELE_DEPTHS); }
|
||||
|
||||
public List<VCFFormatHeaderLine> getDescriptions() {
|
||||
return Arrays.asList(
|
||||
|
|
|
|||
|
|
@ -296,7 +296,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
|
||||
for ( String sample : stratifiedContexts.keySet() ) {
|
||||
final AlignmentContext context = stratifiedContexts.get(sample);
|
||||
if ( context == null || !context.hasBasePileup() )
|
||||
if ( context == null )
|
||||
continue;
|
||||
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
|
|
|
|||
|
|
@ -74,9 +74,6 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
|
||||
final int locus = ref.getLocus().getStart() + (ref.getLocus().getStop() - ref.getLocus().getStart()) / 2;
|
||||
|
||||
if ( !context.hasBasePileup() )
|
||||
return null;
|
||||
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
|
||||
// Compute all haplotypes consistent with the current read pileup
|
||||
|
|
@ -86,7 +83,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
if (haplotypes != null) {
|
||||
for (final Genotype genotype : vc.getGenotypes()) {
|
||||
final AlignmentContext thisContext = stratifiedContexts.get(genotype.getSampleName());
|
||||
if (thisContext != null && thisContext.hasBasePileup()) {
|
||||
if (thisContext != null) {
|
||||
final ReadBackedPileup thisPileup = thisContext.getBasePileup();
|
||||
if (vc.isSNP())
|
||||
scoreRA.add(scoreReadsAgainstHaplotypes(haplotypes, thisPileup, contextSize, locus)); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
|
||||
|
|
|
|||
|
|
@ -31,9 +31,6 @@ public class LowMQ extends InfoFieldAnnotation {
|
|||
double total = 0;
|
||||
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() )
|
||||
{
|
||||
if ( !sample.getValue().hasBasePileup() )
|
||||
continue;
|
||||
|
||||
for ( PileupElement p : sample.getValue().getBasePileup() )
|
||||
{
|
||||
if ( p.getMappingQual() == 0 ) { mq0 += 1; }
|
||||
|
|
|
|||
|
|
@ -31,12 +31,10 @@ public class MappingQualityZero extends InfoFieldAnnotation implements StandardA
|
|||
int mq0 = 0;
|
||||
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
|
||||
final AlignmentContext context = sample.getValue();
|
||||
if ( context.hasBasePileup() ) {
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
for (PileupElement p : pileup ) {
|
||||
if ( p.getMappingQual() == 0 )
|
||||
mq0++;
|
||||
}
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
for (PileupElement p : pileup ) {
|
||||
if ( p.getMappingQual() == 0 )
|
||||
mq0++;
|
||||
}
|
||||
}
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
|
|
|
|||
|
|
@ -36,33 +36,30 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
|||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Count for each sample of mapping quality zero reads
|
||||
*/
|
||||
public class MappingQualityZeroBySample extends GenotypeAnnotation {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker,
|
||||
AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext context, VariantContext vc, Genotype g) {
|
||||
public void annotate(RefMetaDataTracker tracker,
|
||||
AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext context,
|
||||
VariantContext vc, Genotype g, GenotypeBuilder gb) {
|
||||
if ( g == null || !g.isCalled() )
|
||||
return null;
|
||||
return;
|
||||
|
||||
int mq0 = 0;
|
||||
if ( context.hasBasePileup() ) {
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
for (PileupElement p : pileup ) {
|
||||
if ( p.getMappingQual() == 0 )
|
||||
mq0++;
|
||||
}
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
for (PileupElement p : pileup ) {
|
||||
if ( p.getMappingQual() == 0 )
|
||||
mq0++;
|
||||
}
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%d", mq0));
|
||||
return map;
|
||||
|
||||
gb.attribute(getKeyNames().get(0), mq0);
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); }
|
||||
|
|
|
|||
|
|
@ -31,12 +31,10 @@ public class MappingQualityZeroFraction extends InfoFieldAnnotation implements E
|
|||
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
|
||||
AlignmentContext context = sample.getValue();
|
||||
depth += context.size();
|
||||
if ( context.hasBasePileup() ) {
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
for (PileupElement p : pileup ) {
|
||||
if ( p.getMappingQual() == 0 )
|
||||
mq0++;
|
||||
}
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
for (PileupElement p : pileup ) {
|
||||
if ( p.getMappingQual() == 0 )
|
||||
mq0++;
|
||||
}
|
||||
}
|
||||
if (depth > 0) {
|
||||
|
|
|
|||
|
|
@ -28,15 +28,13 @@ public class NBaseCount extends InfoFieldAnnotation {
|
|||
int countRegularBaseSolid = 0;
|
||||
|
||||
for( final AlignmentContext context : stratifiedContexts.values() ) {
|
||||
if ( context.hasBasePileup() ) { // must be called as getBasePileup may throw error when pileup has no bases
|
||||
for( final PileupElement p : context.getBasePileup()) {
|
||||
final String platform = p.getRead().getReadGroup().getPlatform();
|
||||
if( platform != null && platform.toUpperCase().contains("SOLID") ) {
|
||||
if( BaseUtils.isNBase( p.getBase() ) ) {
|
||||
countNBaseSolid++;
|
||||
} else if( BaseUtils.isRegularBase( p.getBase() ) ) {
|
||||
countRegularBaseSolid++;
|
||||
}
|
||||
for( final PileupElement p : context.getBasePileup()) {
|
||||
final String platform = p.getRead().getReadGroup().getPlatform();
|
||||
if( platform != null && platform.toUpperCase().contains("SOLID") ) {
|
||||
if( BaseUtils.isNBase( p.getBase() ) ) {
|
||||
countNBaseSolid++;
|
||||
} else if( BaseUtils.isRegularBase( p.getBase() ) ) {
|
||||
countRegularBaseSolid++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
|
|||
if ( context == null )
|
||||
continue;
|
||||
|
||||
depth += context.hasBasePileup() ? context.getBasePileup().depthOfCoverage() : 0;
|
||||
depth += context.getBasePileup().depthOfCoverage();
|
||||
}
|
||||
|
||||
if ( depth == 0 )
|
||||
|
|
|
|||
|
|
@ -42,12 +42,10 @@ public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAn
|
|||
|
||||
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
|
||||
AlignmentContext context = sample.getValue();
|
||||
if ( context.hasBasePileup() ) {
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
for (PileupElement p : pileup ) {
|
||||
if ( p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE )
|
||||
qualities[index++] = p.getMappingQual();
|
||||
}
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
for (PileupElement p : pileup ) {
|
||||
if ( p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE )
|
||||
qualities[index++] = p.getMappingQual();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -63,9 +63,6 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar
|
|||
continue;
|
||||
}
|
||||
|
||||
if (!context.hasBasePileup())
|
||||
continue;
|
||||
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
if (pileup == null)
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -35,11 +35,9 @@ public class SpanningDeletions extends InfoFieldAnnotation implements StandardAn
|
|||
int depth = 0;
|
||||
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
|
||||
AlignmentContext context = sample.getValue();
|
||||
if ( context.hasBasePileup() ) {
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
deletions += pileup.getNumberOfDeletions();
|
||||
depth += pileup.getNumberOfElements();
|
||||
}
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
deletions += pileup.getNumberOfDeletions();
|
||||
depth += pileup.getNumberOfElements();
|
||||
}
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%.2f", depth == 0 ? 0.0 : (double)deletions/(double)depth));
|
||||
|
|
|
|||
|
|
@ -39,18 +39,16 @@ public class TechnologyComposition extends InfoFieldAnnotation implements Experi
|
|||
|
||||
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
|
||||
AlignmentContext context = sample.getValue();
|
||||
if ( context.hasBasePileup() ) {
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
for ( PileupElement p : pileup ) {
|
||||
if(ReadUtils.is454Read(p.getRead()))
|
||||
reads454++;
|
||||
else if (ReadUtils.isSOLiDRead(p.getRead()))
|
||||
readsSolid++;
|
||||
else if (ReadUtils.isIlluminaRead(p.getRead()))
|
||||
readsIllumina++;
|
||||
else
|
||||
readsOther++;
|
||||
}
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
for ( PileupElement p : pileup ) {
|
||||
if(ReadUtils.is454Read(p.getRead()))
|
||||
reads454++;
|
||||
else if (ReadUtils.isSOLiDRead(p.getRead()))
|
||||
readsSolid++;
|
||||
else if (ReadUtils.isIlluminaRead(p.getRead()))
|
||||
readsIllumina++;
|
||||
else
|
||||
readsOther++;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -305,12 +305,10 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
// if the reference base is not ambiguous, we can annotate
|
||||
Map<String, AlignmentContext> stratifiedContexts;
|
||||
if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) {
|
||||
if ( context.hasBasePileup() ) {
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getBasePileup());
|
||||
annotatedVCs = new ArrayList<VariantContext>(VCs.size());
|
||||
for ( VariantContext vc : VCs )
|
||||
annotatedVCs.add(engine.annotateContext(tracker, ref, stratifiedContexts, vc));
|
||||
}
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getBasePileup());
|
||||
annotatedVCs = new ArrayList<VariantContext>(VCs.size());
|
||||
for ( VariantContext vc : VCs )
|
||||
annotatedVCs.add(engine.annotateContext(tracker, ref, stratifiedContexts, vc));
|
||||
}
|
||||
|
||||
for ( VariantContext annotatedVC : annotatedVCs )
|
||||
|
|
|
|||
|
|
@ -261,24 +261,22 @@ public class VariantAnnotatorEngine {
|
|||
}
|
||||
|
||||
private GenotypesContext annotateGenotypes(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( requestedGenotypeAnnotations.size() == 0 )
|
||||
if ( requestedGenotypeAnnotations.isEmpty() )
|
||||
return vc.getGenotypes();
|
||||
|
||||
GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
|
||||
final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
|
||||
for ( final Genotype genotype : vc.getGenotypes() ) {
|
||||
AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||
|
||||
if ( context == null ) {
|
||||
genotypes.add(genotype);
|
||||
continue;
|
||||
} else {
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(genotype);
|
||||
for ( final GenotypeAnnotation annotation : requestedGenotypeAnnotations ) {
|
||||
annotation.annotate(tracker, walker, ref, context, vc, genotype, gb);
|
||||
}
|
||||
genotypes.add(gb.make());
|
||||
}
|
||||
|
||||
Map<String, Object> genotypeAnnotations = new HashMap<String, Object>(genotype.getAttributes());
|
||||
for ( GenotypeAnnotation annotation : requestedGenotypeAnnotations ) {
|
||||
Map<String, Object> result = annotation.annotate(tracker, walker, ref, context, vc, genotype);
|
||||
if ( result != null )
|
||||
genotypeAnnotations.putAll(result);
|
||||
}
|
||||
genotypes.add(new Genotype(genotype.getSampleName(), genotype.getAlleles(), genotype.getLog10PError(), genotype.getFilters(), genotypeAnnotations, genotype.isPhased()));
|
||||
}
|
||||
|
||||
return genotypes;
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.List;
|
||||
|
|
@ -13,8 +14,9 @@ import java.util.Map;
|
|||
public abstract class GenotypeAnnotation extends VariantAnnotatorAnnotation {
|
||||
|
||||
// return annotations for the given contexts/genotype split by sample
|
||||
public abstract Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker,
|
||||
ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g);
|
||||
public abstract void annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker,
|
||||
ReferenceContext ref, AlignmentContext stratifiedContext,
|
||||
VariantContext vc, Genotype g, GenotypeBuilder gb );
|
||||
|
||||
// return the descriptions used for the VCF FORMAT meta field
|
||||
public abstract List<VCFFormatHeaderLine> getDescriptions();
|
||||
|
|
|
|||
|
|
@ -204,8 +204,6 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
for ( final Genotype g : vc_input.getGenotypes() ) {
|
||||
Set<String> filters = new LinkedHashSet<String>(g.getFilters());
|
||||
|
||||
boolean genotypeIsPhased = true;
|
||||
String sample = g.getSampleName();
|
||||
|
||||
|
|
@ -271,7 +269,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
// Compute new GQ field = -10*log10Pr(Genotype call is wrong)
|
||||
// Beagle gives probability that genotype is AA, AB and BB.
|
||||
// Which, by definition, are prob of hom ref, het and hom var.
|
||||
Double probWrongGenotype, genotypeQuality;
|
||||
double probWrongGenotype, genotypeQuality;
|
||||
Double homRefProbability = Double.valueOf(beagleProbabilities.get(0));
|
||||
Double hetProbability = Double.valueOf(beagleProbabilities.get(1));
|
||||
Double homVarProbability = Double.valueOf(beagleProbabilities.get(2));
|
||||
|
|
@ -300,7 +298,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
else
|
||||
genotypeQuality = log10(probWrongGenotype);
|
||||
|
||||
HashMap<String,Object> originalAttributes = new HashMap<String,Object>(g.getAttributes());
|
||||
HashMap<String,Object> originalAttributes = new HashMap<String,Object>(g.getExtendedAttributes());
|
||||
|
||||
// get original encoding and add to keynotype attributes
|
||||
String a1, a2, og;
|
||||
|
|
@ -328,7 +326,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
else {
|
||||
originalAttributes.put("OG",".");
|
||||
}
|
||||
Genotype imputedGenotype = new Genotype(g.getSampleName(), alleles, genotypeQuality, filters,originalAttributes , genotypeIsPhased);
|
||||
Genotype imputedGenotype = new GenotypeBuilder(g).alleles(alleles).log10PError(genotypeQuality).attributes(originalAttributes).phased(genotypeIsPhased).make();
|
||||
if ( imputedGenotype.isHet() || imputedGenotype.isHomVar() ) {
|
||||
beagleVarCounts++;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.BitSetUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -26,147 +27,135 @@ import java.util.*;
|
|||
*/
|
||||
public class BQSRKeyManager {
|
||||
|
||||
private final List<Covariate> requiredCovariates;
|
||||
private final List<Covariate> optionalCovariates;
|
||||
private final List<RequiredCovariateInfo> requiredCovariatesInfo;
|
||||
private final List<OptionalCovariateInfo> optionalCovariatesInfo;
|
||||
private final Covariate[] requiredCovariates;
|
||||
private final Covariate[] optionalCovariates;
|
||||
private final RequiredCovariateInfo[] requiredCovariatesInfo;
|
||||
private final OptionalCovariateInfo[] optionalCovariatesInfo;
|
||||
private final Map<String, Short> covariateNameToIDMap;
|
||||
|
||||
private int nRequiredBits; // Number of bits used to represent the required covariates
|
||||
private int nOptionalBits; // Number of bits used to represent the standard covaraites
|
||||
private final int nOptionalIDBits; // Number of bits used to represent the optional covariates IDs
|
||||
private final int totalNumberOfBits; // Sum of all of the above plus the event bits
|
||||
|
||||
private final BitSet optionalCovariateMask; // Standard mask for optional covariates bitset
|
||||
private final BitSet optionalCovariateIDMask; // Standard mask for optional covariates order bitset
|
||||
|
||||
|
||||
private final int optionalCovariateOffset;
|
||||
private final int optionalCovariateIDOffset;
|
||||
|
||||
private final long optionalCovariateMask; // Standard mask for optional covariates key
|
||||
private final long optionalCovariateIDMask; // Standard mask for optional covariates order key
|
||||
private final long eventIDMask; // Standard mask for event ID
|
||||
|
||||
/**
|
||||
* Initializes the KeyManager with the total number of covariates to use
|
||||
*
|
||||
* @param requiredCovariates the ordered list of required covariates
|
||||
* @param optionalCovariates the ordered list of optional covariates
|
||||
*/
|
||||
public BQSRKeyManager(List<Covariate> requiredCovariates, List<Covariate> optionalCovariates) {
|
||||
this.requiredCovariates = new ArrayList<Covariate>(requiredCovariates);
|
||||
this.optionalCovariates = new ArrayList<Covariate>(optionalCovariates);
|
||||
requiredCovariatesInfo = new ArrayList<RequiredCovariateInfo>(requiredCovariates.size()); // initialize the required covariates list
|
||||
optionalCovariatesInfo = new ArrayList<OptionalCovariateInfo>(optionalCovariates.size()); // initialize the optional covariates list (size may be 0, it's okay)
|
||||
public BQSRKeyManager(final List<Covariate> requiredCovariates, final List<Covariate> optionalCovariates) {
|
||||
this.requiredCovariates = new Covariate[requiredCovariates.size()];
|
||||
this.optionalCovariates = new Covariate[optionalCovariates.size()];
|
||||
requiredCovariatesInfo = new RequiredCovariateInfo[requiredCovariates.size()]; // initialize the required covariates list
|
||||
optionalCovariatesInfo = new OptionalCovariateInfo[optionalCovariates.size()]; // initialize the optional covariates list (size may be 0, it's okay)
|
||||
covariateNameToIDMap = new HashMap<String, Short>(optionalCovariates.size()*2); // the map from covariate name to covariate id (when reading GATK Reports, we get the IDs as names of covariates)
|
||||
|
||||
nRequiredBits = 0;
|
||||
for (Covariate required : requiredCovariates) { // create a list of required covariates with the extra information for key management
|
||||
int nBits = required.numberOfBits(); // number of bits used by this covariate
|
||||
BitSet mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate
|
||||
requiredCovariatesInfo.add(new RequiredCovariateInfo(nRequiredBits, mask, required)); // Create an object for this required covariate
|
||||
for (int i = 0; i < requiredCovariates.size(); i++) { // create a list of required covariates with the extra information for key management
|
||||
final Covariate required = requiredCovariates.get(i);
|
||||
final int nBits = required.numberOfBits(); // number of bits used by this covariate
|
||||
final long mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate
|
||||
this.requiredCovariates[i] = required;
|
||||
requiredCovariatesInfo[i] = new RequiredCovariateInfo(nBits, nRequiredBits, mask, required); // Create an object for this required covariate
|
||||
nRequiredBits += nBits;
|
||||
}
|
||||
|
||||
final int bitsInEventType = numberOfBitsToRepresent(EventType.values().length);
|
||||
eventIDMask = genericMask(nRequiredBits, bitsInEventType);
|
||||
|
||||
short id = 0;
|
||||
nOptionalBits = 0;
|
||||
for (Covariate optional : optionalCovariates) {
|
||||
int nBits = optional.numberOfBits(); // number of bits used by this covariate
|
||||
nOptionalBits = Math.max(nOptionalBits, nBits); // optional covariates are represented by the number of bits needed by biggest covariate
|
||||
BitSet optionalID = bitSetFromId(id); // calculate the optional covariate ID for this covariate
|
||||
optionalCovariatesInfo.add(new OptionalCovariateInfo(optionalID, optional)); // optional covariates have standardized mask and number of bits, so no need to store in the RequiredCovariateInfo object
|
||||
String covariateName = optional.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
|
||||
int nOptionalBits = 0;
|
||||
for (int i = 0; i < optionalCovariates.size(); i++) {
|
||||
final Covariate optional = optionalCovariates.get(i);
|
||||
nOptionalBits = Math.max(nOptionalBits, optional.numberOfBits()); // optional covariates are represented by the number of bits needed by biggest covariate
|
||||
this.optionalCovariates[i] = optional;
|
||||
optionalCovariatesInfo[i] = new OptionalCovariateInfo(id, optional);
|
||||
final String covariateName = optional.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
|
||||
covariateNameToIDMap.put(covariateName, id);
|
||||
id++;
|
||||
}
|
||||
|
||||
nOptionalIDBits = BitSetUtils.numberOfBitsToRepresent(optionalCovariates.size()); // number of bits used to represent the covariate ID
|
||||
optionalCovariateMask = genericMask(nRequiredBits, nOptionalBits); // the generic mask to extract optional covariate bits from the combined bitset
|
||||
optionalCovariateIDMask = genericMask(nRequiredBits + nOptionalBits, nOptionalIDBits); // the generic mask to extract optional covariate ID bits from the combined bitset
|
||||
totalNumberOfBits = nRequiredBits + nOptionalBits + nOptionalIDBits + bitsInEventType(); // total number of bits used in the final key
|
||||
optionalCovariateOffset = nRequiredBits + bitsInEventType;
|
||||
optionalCovariateMask = genericMask(optionalCovariateOffset, nOptionalBits); // the generic mask to extract optional covariate bits from the combined bitset
|
||||
optionalCovariateIDOffset = nRequiredBits + bitsInEventType + nOptionalBits;
|
||||
final int nOptionalIDBits = numberOfBitsToRepresent(optionalCovariates.size()); // number of bits used to represent the covariate ID
|
||||
optionalCovariateIDMask = genericMask(optionalCovariateIDOffset, nOptionalIDBits); // the generic mask to extract optional covariate ID bits from the combined bitset
|
||||
|
||||
final int totalNumberOfBits = optionalCovariateIDOffset + nOptionalIDBits; // total number of bits used in the final key
|
||||
if ( totalNumberOfBits > 64 )
|
||||
throw new UserException.BadInput("The total number of bits used for the master BQSR key is greater than 64 and cannot be represented in a long");
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates one key per optional covariate.
|
||||
* Generates one key given the optional covariate (or none if it is null)
|
||||
*
|
||||
* Keys include all required covariates, the standard covariate and the event type.
|
||||
*
|
||||
* Example allKeys:
|
||||
* RG, QUAL, CYCLE, CONTEXT
|
||||
*
|
||||
* List of BitSets returned by this example (given eventType):
|
||||
* RG, QUAL, CYCLE, EVENT
|
||||
* RG, QUAL, CONTEXT, EVENT
|
||||
*
|
||||
* Note: If there are no optional covariates, only one bitset key will be returned with all the required covariates and the event type
|
||||
*
|
||||
* @param allKeys The keys in bitset representation for each covariate
|
||||
* @param eventType The type of event described by this keyset (e.g. mismatches, insertions, deletions)
|
||||
* @return one key in bitset representation per covariate
|
||||
* @param allKeys The keys in long representation for each covariate (includes all optional covariates, not just the one requested)
|
||||
* @param eventType The type of event described by this keyset (e.g. mismatches, insertions, deletions)
|
||||
* @return one key in long representation (non-negative) or -1 for a bad key
|
||||
*/
|
||||
public List<BitSet> bitSetsFromAllKeys(BitSet[] allKeys, EventType eventType) {
|
||||
List<BitSet> allBitSets = new ArrayList<BitSet>(); // Generate one key per optional covariate
|
||||
public long createMasterKey(final long[] allKeys, final EventType eventType, final int optionalCovariateIndex) {
|
||||
|
||||
BitSet eventBitSet = bitSetFromEvent(eventType); // create a bitset with the event type
|
||||
int eventTypeBitIndex = nRequiredBits + nOptionalBits + nOptionalIDBits; // Location in the bit set to add the event type bits
|
||||
|
||||
int covariateIndex = 0;
|
||||
BitSet requiredKey = new BitSet(nRequiredBits); // This will be a bitset holding all the required keys, to replicate later on
|
||||
int keyIndex = 0;
|
||||
long masterKey = 0L; // This will be a master key holding all the required keys, to replicate later on
|
||||
for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo)
|
||||
addBitSetToKeyAtLocation(requiredKey, allKeys[covariateIndex++], infoRequired.bitsBefore); // Add all the required covariates to the key set
|
||||
masterKey |= (allKeys[keyIndex++] << infoRequired.offset);
|
||||
|
||||
for (OptionalCovariateInfo infoOptional : optionalCovariatesInfo) {
|
||||
BitSet covariateKey = allKeys[covariateIndex++]; // get the bitset from all keys
|
||||
if (covariateKey == null)
|
||||
continue; // do not add nulls to the final set of keys.
|
||||
final long eventKey = keyFromEvent(eventType); // create a key for the event type
|
||||
masterKey |= (eventKey << nRequiredBits);
|
||||
|
||||
BitSet optionalKey = new BitSet(totalNumberOfBits); // create a new key for this optional covariate
|
||||
optionalKey.or(requiredKey); // import all the required covariates
|
||||
addBitSetToKeyAtLocation(optionalKey, covariateKey, nRequiredBits); // add the optional covariate right after the required covariates
|
||||
addBitSetToKeyAtLocation(optionalKey, infoOptional.covariateID, nRequiredBits + nOptionalBits); // add the optional covariate ID right after the optional covarite
|
||||
addBitSetToKeyAtLocation(optionalKey, eventBitSet, eventTypeBitIndex); // Add the event type
|
||||
allBitSets.add(optionalKey); // add this key to the list of keys
|
||||
if (optionalCovariateIndex >= 0 && optionalCovariateIndex < optionalCovariates.length) {
|
||||
final long covariateKey = allKeys[keyIndex + optionalCovariateIndex];
|
||||
if (covariateKey < 0) // do not add "nulls" to the final set of keys
|
||||
return -1;
|
||||
|
||||
masterKey |= (covariateKey << optionalCovariateOffset);
|
||||
masterKey |= (optionalCovariatesInfo[optionalCovariateIndex].covariateID << optionalCovariateIDOffset);
|
||||
}
|
||||
|
||||
if (optionalCovariatesInfo.size() == 0) { // special case when we have no optional covariates, add the event type to the required key (our only key)
|
||||
addBitSetToKeyAtLocation(requiredKey, eventBitSet, eventTypeBitIndex); // Add the event type
|
||||
allBitSets.add(requiredKey); // add this key to the list of keys
|
||||
}
|
||||
|
||||
return allBitSets;
|
||||
return masterKey;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates one bitset key for the covariates represented in Object[] key
|
||||
* Generates one key for the covariates represented in Object[] key
|
||||
*
|
||||
* The covariates will have the actual objects produced by the covariates (probably read from the recalibration data file)
|
||||
* and will contain all required covariates and one (or none) optional covariates. Therefore, the product is one bitset key, not many.
|
||||
* and will contain all required covariates and one (or none) optional covariates. Therefore, the product is one key, not many.
|
||||
*
|
||||
* Example key:
|
||||
* RG, QUAL, CYCLE, CYCLE_ID, EventType
|
||||
*
|
||||
* @param key list of objects produced by the required covariates followed by one or zero optional covariates.
|
||||
* @return a bitset key representing these objects. Bitset encryption is done using the covariate's interface.
|
||||
* @return a key representing these objects.
|
||||
*/
|
||||
public BitSet bitSetFromKey(Object[] key) {
|
||||
BitSet bitSetKey = new BitSet(totalNumberOfBits);
|
||||
|
||||
public long longFromKey(Object[] key) {
|
||||
int requiredCovariate = 0;
|
||||
for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo) {
|
||||
BitSet covariateBitSet = infoRequired.covariate.bitSetFromKey(key[requiredCovariate++]); // create a bitset from the object key provided using the required covariate's interface
|
||||
addBitSetToKeyAtLocation(bitSetKey, covariateBitSet, infoRequired.bitsBefore); // add it to the bitset key
|
||||
}
|
||||
|
||||
if (optionalCovariatesInfo.size() > 0) {
|
||||
int optionalCovariate = requiredCovariatesInfo.size(); // the optional covariate index in the key array
|
||||
int covariateIDIndex = optionalCovariate + 1; // the optional covariate ID index is right after the optional covariate's
|
||||
int covariateID = parseCovariateID(key[covariateIDIndex]); // when reading the GATK Report the ID may come in a String instead of an index
|
||||
OptionalCovariateInfo infoOptional = optionalCovariatesInfo.get(covariateID); // so we can get the optional covariate information
|
||||
|
||||
BitSet covariateBitSet = infoOptional.covariate.bitSetFromKey(key[optionalCovariate]); // convert the optional covariate key into a bitset using the covariate's interface
|
||||
addBitSetToKeyAtLocation(bitSetKey, covariateBitSet, nRequiredBits); // add the optional covariate right after the required covariates
|
||||
addBitSetToKeyAtLocation(bitSetKey, infoOptional.covariateID, nRequiredBits + nOptionalBits); // add the optional covariate ID right after the optional covarite
|
||||
}
|
||||
|
||||
int eventIndex = key.length - 1; // the event type is always the last key
|
||||
int eventTypeBitIndex = nRequiredBits + nOptionalBits + nOptionalIDBits; // location in the bit set to add the event type bits
|
||||
BitSet eventBitSet = bitSetFromEvent((EventType) key[eventIndex]); // get the bit set representation of the event type
|
||||
addBitSetToKeyAtLocation(bitSetKey, eventBitSet, eventTypeBitIndex); // add the event type
|
||||
long masterKey = 0L; // This will be a master key holding all the required keys, to replicate later on
|
||||
for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo)
|
||||
masterKey |= (infoRequired.covariate.longFromKey(key[requiredCovariate++]) << infoRequired.offset);
|
||||
|
||||
return bitSetKey;
|
||||
final int eventIndex = key.length - 1; // the event type is always the last key
|
||||
final long eventKey = keyFromEvent((EventType) key[eventIndex]); // create a key for the event type
|
||||
masterKey |= (eventKey << nRequiredBits);
|
||||
|
||||
if (optionalCovariatesInfo.length > 0) {
|
||||
final int covariateIndex = requiredCovariatesInfo.length; // the optional covariate index in the key array
|
||||
final int covariateIDIndex = covariateIndex + 1; // the optional covariate ID index is right after the optional covariate's
|
||||
final short covariateID = parseCovariateID(key[covariateIDIndex]); // when reading the GATK Report the ID may come in a String instead of an index
|
||||
final OptionalCovariateInfo infoOptional = optionalCovariatesInfo[covariateID]; // so we can get the optional covariate information
|
||||
|
||||
final long covariateKey = infoOptional.covariate.longFromKey(key[covariateIndex]); // convert the optional covariate key into a bitset using the covariate's interface
|
||||
masterKey |= (covariateKey << optionalCovariateOffset);
|
||||
masterKey |= (infoOptional.covariateID << optionalCovariateIDOffset);
|
||||
}
|
||||
|
||||
return masterKey;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -176,116 +165,82 @@ public class BQSRKeyManager {
|
|||
* @param id the string or short representation of the optional covariate id
|
||||
* @return the short representation of the optional covariate id.
|
||||
*/
|
||||
private short parseCovariateID(Object id) {
|
||||
private short parseCovariateID(final Object id) {
|
||||
return (id instanceof String) ? covariateNameToIDMap.get(id.toString()) : (Short) id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a key set of objects from a combined bitset key.
|
||||
* Generates a key set of objects from a combined master key.
|
||||
*
|
||||
* Masks out each covariate independently and decodes their values (Object) into a keyset
|
||||
*
|
||||
* @param key the bitset representation of the keys
|
||||
* @param master the master representation of the keys
|
||||
* @return an object array with the values for each key
|
||||
*/
|
||||
public List<Object> keySetFrom(BitSet key) {
|
||||
List<Object> objectKeys = new ArrayList<Object>();
|
||||
public List<Object> keySetFrom(final long master) {
|
||||
final List<Object> objectKeys = new ArrayList<Object>();
|
||||
for (RequiredCovariateInfo info : requiredCovariatesInfo) {
|
||||
BitSet covariateBitSet = extractBitSetFromKey(key, info.mask, info.bitsBefore); // get the covariate's bitset
|
||||
objectKeys.add(info.covariate.keyFromBitSet(covariateBitSet)); // convert the bitset to object using covariate's interface
|
||||
final long covariateKey = extractKeyFromMaster(master, info.mask, info.offset); // get the covariate's key
|
||||
objectKeys.add(info.covariate.formatKey(covariateKey)); // convert the key to object using covariate's interface
|
||||
}
|
||||
|
||||
if (optionalCovariatesInfo.size() > 0) {
|
||||
BitSet covBitSet = extractBitSetFromKey(key, optionalCovariateMask, nRequiredBits); // mask out the covariate bit set
|
||||
BitSet idbs = extractBitSetFromKey(key, optionalCovariateIDMask, nRequiredBits + nOptionalBits); // mask out the covariate order (to identify which covariate this is)
|
||||
short id = BitSetUtils.shortFrom(idbs); // covert the id bitset into a short
|
||||
Covariate covariate = optionalCovariatesInfo.get(id).covariate; // get the corresponding optional covariate object
|
||||
objectKeys.add(covariate.keyFromBitSet(covBitSet)); // add the optional covariate to the key set
|
||||
if (optionalCovariatesInfo.length > 0) {
|
||||
final long covKey = extractKeyFromMaster(master, optionalCovariateMask, optionalCovariateOffset); // get the covariate's key
|
||||
final int covIDKey = (int)extractKeyFromMaster(master, optionalCovariateIDMask, optionalCovariateIDOffset); // get the covariate's id (to identify which covariate this is)
|
||||
Covariate covariate = optionalCovariatesInfo[(short)covIDKey].covariate; // get the corresponding optional covariate object
|
||||
objectKeys.add(covariate.formatKey(covKey)); // add the optional covariate key to the key set
|
||||
objectKeys.add(covariate.getClass().getSimpleName().split("Covariate")[0]); // add the covariate name using the id
|
||||
}
|
||||
objectKeys.add(eventFromBitSet(key)); // add the event type object to the key set
|
||||
|
||||
objectKeys.add(EventType.eventFrom((int)extractKeyFromMaster(master, eventIDMask, nRequiredBits))); // add the event type object to the key set
|
||||
|
||||
return objectKeys;
|
||||
}
|
||||
|
||||
public List<Covariate> getRequiredCovariates() {
|
||||
public Covariate[] getRequiredCovariates() {
|
||||
return requiredCovariates;
|
||||
}
|
||||
|
||||
public List<Covariate> getOptionalCovariates() {
|
||||
public Covariate[] getOptionalCovariates() {
|
||||
return optionalCovariates;
|
||||
}
|
||||
|
||||
/**
|
||||
* Translates a masked bitset into a bitset starting at 0
|
||||
*
|
||||
* @param key the masked out bitset
|
||||
* @param n the number of bits to chop
|
||||
* @return a translated bitset starting at 0 for the covariate machinery to decode
|
||||
*/
|
||||
private BitSet chopNBitsFrom(BitSet key, int n) {
|
||||
BitSet choppedKey = new BitSet();
|
||||
for (int i = key.nextSetBit(0); i >= 0; i = key.nextSetBit(i + 1))
|
||||
choppedKey.set(i - n); // Set every bit translocated to the beginning of the BitSet
|
||||
return choppedKey;
|
||||
public int getNumRequiredCovariates() {
|
||||
return requiredCovariates.length;
|
||||
}
|
||||
|
||||
public int getNumOptionalCovariates() {
|
||||
return optionalCovariates.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a mask for the requested covariate to extract the relevant bitset from a combined bitset key
|
||||
* Creates a mask for the requested covariate to extract the relevant key from a combined master key
|
||||
*
|
||||
* @param leadingBits the index of the covariate in the ordered covariate list
|
||||
* @param nBits the number of bits needed by the Covariate to represent its values in BitSet form
|
||||
* @return the bitset relevant to the covariate
|
||||
* @param offset the offset into the master key
|
||||
* @param nBits the number of bits needed by the Covariate to represent its values
|
||||
* @return the mask relevant to the covariate
|
||||
*/
|
||||
|
||||
private BitSet genericMask(int leadingBits, int nBits) {
|
||||
BitSet mask = new BitSet(leadingBits + nBits);
|
||||
mask.set(leadingBits, leadingBits + nBits);
|
||||
private long genericMask(final int offset, final int nBits) {
|
||||
long mask = 0L;
|
||||
for ( int i = 0; i < nBits; i++ )
|
||||
mask |= 1L << (offset+i);
|
||||
return mask;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes the event type (enum) from the full bitset key
|
||||
*
|
||||
* @param fullKey the full key of all covariates + event type
|
||||
* @return the decoded event type.
|
||||
*/
|
||||
private EventType eventFromBitSet(BitSet fullKey) {
|
||||
BitSet eventKey = new BitSet();
|
||||
int firstBitIndex = nRequiredBits + nOptionalBits + nOptionalIDBits;
|
||||
for (int i = fullKey.nextSetBit(firstBitIndex); i >= 0; i = fullKey.nextSetBit(i + 1))
|
||||
eventKey.set(i - firstBitIndex);
|
||||
return EventType.eventFrom(BitSetUtils.shortFrom(eventKey));
|
||||
private long extractKeyFromMaster(final long master, final long mask, final int offset) {
|
||||
long key = master & mask;
|
||||
return key >> offset;
|
||||
}
|
||||
|
||||
// cache the BitSet representing an event since it's otherwise created a massive amount of times
|
||||
private static final Map<EventType, BitSet> eventTypeCache = new HashMap<EventType, BitSet>(EventType.values().length);
|
||||
// cache the key representing an event since it's otherwise created a massive amount of times
|
||||
private static final long[] eventTypeCache = new long[EventType.values().length]; // event IDs must be longs so that bit-fiddling works
|
||||
static {
|
||||
for (final EventType eventType : EventType.values())
|
||||
eventTypeCache.put(eventType, BitSetUtils.bitSetFrom(eventType.index));
|
||||
eventTypeCache[eventType.index] = (long)eventType.index;
|
||||
}
|
||||
|
||||
private BitSet bitSetFromEvent(final EventType eventType) {
|
||||
return eventTypeCache.get(eventType);
|
||||
}
|
||||
|
||||
private BitSet bitSetFromId(final short id) {
|
||||
return BitSetUtils.bitSetFrom(id);
|
||||
}
|
||||
|
||||
private int bitsInEventType() {
|
||||
return BitSetUtils.numberOfBitsToRepresent(EventType.values().length);
|
||||
}
|
||||
|
||||
private void addBitSetToKeyAtLocation(BitSet key, BitSet bitSet, int location) {
|
||||
for (int j = bitSet.nextSetBit(0); j >= 0; j = bitSet.nextSetBit(j + 1))
|
||||
key.set(j + location); // translate the bits set in the key to their corresponding position in the full key
|
||||
}
|
||||
|
||||
private BitSet extractBitSetFromKey (BitSet key, BitSet mask, int leadingBits) {
|
||||
BitSet bitSet = (BitSet) key.clone();
|
||||
bitSet.and(mask);
|
||||
return chopNBitsFrom(bitSet, leadingBits);
|
||||
private long keyFromEvent(final EventType eventType) {
|
||||
return eventTypeCache[eventType.index];
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -297,22 +252,22 @@ public class BQSRKeyManager {
|
|||
if (this == other)
|
||||
return true;
|
||||
|
||||
if (requiredCovariatesInfo.size() != other.requiredCovariatesInfo.size() ||
|
||||
optionalCovariatesInfo.size() != other.optionalCovariatesInfo.size())
|
||||
if (requiredCovariatesInfo.length != other.requiredCovariatesInfo.length ||
|
||||
optionalCovariatesInfo.length != other.optionalCovariatesInfo.length)
|
||||
return false;
|
||||
|
||||
for (int i = 0; i < requiredCovariates.size(); i++) {
|
||||
Covariate myRequiredCovariate = requiredCovariates.get(i);
|
||||
Covariate otherRequiredCovariate = other.requiredCovariates.get(i);
|
||||
for (int i = 0; i < requiredCovariates.length; i++) {
|
||||
Covariate myRequiredCovariate = requiredCovariates[i];
|
||||
Covariate otherRequiredCovariate = other.requiredCovariates[i];
|
||||
String thisName = myRequiredCovariate.getClass().getSimpleName();
|
||||
String otherName = otherRequiredCovariate.getClass().getSimpleName();
|
||||
if (!thisName.equals(otherName))
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < optionalCovariates.size(); i++) {
|
||||
Covariate myOptionalCovariate = optionalCovariates.get(i);
|
||||
Covariate otherOptionalCovariate = other.optionalCovariates.get(i);
|
||||
for (int i = 0; i < optionalCovariates.length; i++) {
|
||||
Covariate myOptionalCovariate = optionalCovariates[i];
|
||||
Covariate otherOptionalCovariate = other.optionalCovariates[i];
|
||||
String thisName = myOptionalCovariate.getClass().getSimpleName();
|
||||
String otherName = otherOptionalCovariate.getClass().getSimpleName();
|
||||
if (!thisName.equals(otherName))
|
||||
|
|
@ -322,27 +277,50 @@ public class BQSRKeyManager {
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the number of bits necessary to represent a given number of elements
|
||||
*
|
||||
* @param numberOfElements the number of elements to represent (must be positive)
|
||||
* @return the number of bits necessary to represent this many elements
|
||||
*/
|
||||
public static int numberOfBitsToRepresent(long numberOfElements) {
|
||||
if (numberOfElements < 0)
|
||||
throw new ReviewedStingException("Number of elements must be positive: " + numberOfElements);
|
||||
|
||||
if (numberOfElements == 1L)
|
||||
return 1; // special case
|
||||
|
||||
int n = 0;
|
||||
numberOfElements--;
|
||||
while (numberOfElements > 0) {
|
||||
numberOfElements = numberOfElements >> 1;
|
||||
n++;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
/**
|
||||
* Aggregate information for each Covariate
|
||||
*/
|
||||
class RequiredCovariateInfo {
|
||||
public final int bitsBefore; // number of bits before this covariate in the combined bitset key
|
||||
public final BitSet mask; // the mask to pull out this covariate from the combined bitset key ( a mask made from bitsBefore and nBits )
|
||||
private static class RequiredCovariateInfo {
|
||||
public final int nBits; // number of bits for this key
|
||||
public final int offset; // the offset into the master key
|
||||
public final long mask; // the mask to pull out this covariate from the combined bitset key ( a mask made from bitsBefore and nBits )
|
||||
public final Covariate covariate; // this allows reverse lookup of the Covariates in order
|
||||
|
||||
RequiredCovariateInfo(int bitsBefore, BitSet mask, Covariate covariate) {
|
||||
this.bitsBefore = bitsBefore;
|
||||
RequiredCovariateInfo(final int nBits, final int offset, final long mask, final Covariate covariate) {
|
||||
this.nBits = nBits;
|
||||
this.offset = offset;
|
||||
this.mask = mask;
|
||||
this.covariate = covariate;
|
||||
}
|
||||
}
|
||||
|
||||
class OptionalCovariateInfo {
|
||||
public final BitSet covariateID; // cache the covariate ID
|
||||
private static class OptionalCovariateInfo {
|
||||
public final long covariateID; // cache the covariate ID (must be a long so that bit-fiddling works)
|
||||
public final Covariate covariate;
|
||||
|
||||
OptionalCovariateInfo(BitSet covariateID, Covariate covariate) {
|
||||
OptionalCovariateInfo(final long covariateID, final Covariate covariate) {
|
||||
this.covariateID = covariateID;
|
||||
this.covariate = covariate;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,15 +26,12 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.BitSetUtils;
|
||||
import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
|
|
@ -44,8 +41,7 @@ import java.util.BitSet;
|
|||
public class ContextCovariate implements StandardCovariate {
|
||||
|
||||
private int mismatchesContextSize;
|
||||
private int insertionsContextSize;
|
||||
private int deletionsContextSize;
|
||||
private int indelsContextSize;
|
||||
|
||||
private byte LOW_QUAL_TAIL;
|
||||
|
||||
|
|
@ -53,42 +49,33 @@ public class ContextCovariate implements StandardCovariate {
|
|||
@Override
|
||||
public void initialize(final RecalibrationArgumentCollection RAC) {
|
||||
mismatchesContextSize = RAC.MISMATCHES_CONTEXT_SIZE;
|
||||
insertionsContextSize = RAC.INSERTIONS_CONTEXT_SIZE;
|
||||
deletionsContextSize = RAC.DELETIONS_CONTEXT_SIZE;
|
||||
indelsContextSize = RAC.INDELS_CONTEXT_SIZE;
|
||||
if (mismatchesContextSize > MAX_DNA_CONTEXT)
|
||||
throw new UserException.BadArgumentValue("mismatches_context_size", String.format("context size cannot be bigger than %d, but was %d", MAX_DNA_CONTEXT, mismatchesContextSize));
|
||||
if (indelsContextSize > MAX_DNA_CONTEXT)
|
||||
throw new UserException.BadArgumentValue("indels_context_size", String.format("context size cannot be bigger than %d, but was %d", MAX_DNA_CONTEXT, indelsContextSize));
|
||||
|
||||
LOW_QUAL_TAIL = RAC.LOW_QUAL_TAIL;
|
||||
|
||||
if (mismatchesContextSize <= 0 || insertionsContextSize <= 0 || deletionsContextSize <= 0)
|
||||
throw new UserException(String.format("Context Size must be positive, if you don't want to use the context covariate, just turn it off instead. Mismatches: %d Insertions: %d Deletions:%d", mismatchesContextSize, insertionsContextSize, deletionsContextSize));
|
||||
|
||||
if (mismatchesContextSize <= 0 || indelsContextSize <= 0)
|
||||
throw new UserException(String.format("Context size must be positive, if you don't want to use the context covariate, just turn it off instead. Mismatches: %d Indels: %d", mismatchesContextSize, indelsContextSize));
|
||||
}
|
||||
|
||||
@Override
|
||||
public CovariateValues getValues(final GATKSAMRecord read) {
|
||||
int l = read.getReadLength();
|
||||
BitSet[] mismatches = new BitSet[l];
|
||||
BitSet[] insertions = new BitSet[l];
|
||||
BitSet[] deletions = new BitSet[l];
|
||||
public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
|
||||
|
||||
GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context
|
||||
final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context
|
||||
|
||||
final boolean negativeStrand = clippedRead.getReadNegativeStrandFlag();
|
||||
byte[] bases = clippedRead.getReadBases();
|
||||
if (negativeStrand)
|
||||
bases = BaseUtils.simpleReverseComplement(bases);
|
||||
|
||||
for (int i = 0; i < clippedRead.getReadLength(); i++) {
|
||||
mismatches[i] = contextWith(bases, i, mismatchesContextSize);
|
||||
insertions[i] = contextWith(bases, i, insertionsContextSize);
|
||||
deletions[i] = contextWith(bases, i, deletionsContextSize);
|
||||
final int readLength = clippedRead.getReadLength();
|
||||
for (int i = 0; i < readLength; i++) {
|
||||
final long indelKey = contextWith(bases, i, indelsContextSize);
|
||||
values.addCovariate(contextWith(bases, i, mismatchesContextSize), indelKey, indelKey, (negativeStrand ? readLength - i - 1 : i));
|
||||
}
|
||||
|
||||
if (negativeStrand) {
|
||||
reverse(mismatches);
|
||||
reverse(insertions);
|
||||
reverse(deletions);
|
||||
}
|
||||
return new CovariateValues(mismatches, insertions, deletions);
|
||||
}
|
||||
|
||||
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
|
|
@ -98,21 +85,21 @@ public class ContextCovariate implements StandardCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String keyFromBitSet(BitSet key) {
|
||||
if (key == null) // this can only happen in test routines because we do not propagate null keys to the csv file
|
||||
public String formatKey(final long key) {
|
||||
if (key == -1) // this can only happen in test routines because we do not propagate null keys to the csv file
|
||||
return null;
|
||||
|
||||
return BitSetUtils.dnaFrom(key);
|
||||
return contextFromKey(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BitSet bitSetFromKey(Object key) {
|
||||
return BitSetUtils.bitSetFrom((String) key);
|
||||
public long longFromKey(Object key) {
|
||||
return keyFromContext((String) key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numberOfBits() {
|
||||
return Long.bitCount(-1L);
|
||||
return Integer.bitCount(Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -121,29 +108,132 @@ public class ContextCovariate implements StandardCovariate {
|
|||
* @param bases the bases in the read to build the context from
|
||||
* @param offset the position in the read to calculate the context for
|
||||
* @param contextSize context size to use building the context
|
||||
* @return the bitSet representing the Context
|
||||
* @return the key representing the context
|
||||
*/
|
||||
private BitSet contextWith(byte[] bases, int offset, int contextSize) {
|
||||
BitSet result = null;
|
||||
if (offset - contextSize + 1 >= 0) {
|
||||
final byte[] context = Arrays.copyOfRange(bases, offset - contextSize + 1, offset + 1);
|
||||
if (!BaseUtils.containsBase(context, BaseUtils.N))
|
||||
result = BitSetUtils.bitSetFrom(context);
|
||||
}
|
||||
private long contextWith(final byte[] bases, final int offset, final int contextSize) {
|
||||
final int start = offset - contextSize + 1;
|
||||
final long result;
|
||||
if (start >= 0)
|
||||
result = keyFromContext(bases, start, offset + 1);
|
||||
else
|
||||
result = -1L;
|
||||
return result;
|
||||
}
|
||||
|
||||
public static long keyFromContext(final String dna) {
|
||||
return keyFromContext(dna.getBytes(), 0, dna.length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverses the given array in place.
|
||||
* Creates a long representation of a given dna string.
|
||||
*
|
||||
* @param array any array
|
||||
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
|
||||
* be longer than 31 bases.
|
||||
*
|
||||
* The bit representation of a dna string is the simple:
|
||||
* 0 A 4 AA 8 CA
|
||||
* 1 C 5 AC ...
|
||||
* 2 G 6 AG 1343 TTGGT
|
||||
* 3 T 7 AT 1364 TTTTT
|
||||
*
|
||||
* To convert from dna to number, we convert the dna string to base10 and add all combinations that
|
||||
* preceded the string (with smaller lengths).
|
||||
*
|
||||
* @param dna the dna sequence
|
||||
* @return the key representing the dna sequence
|
||||
*/
|
||||
private static void reverse(final Object[] array) {
|
||||
final int arrayLength = array.length;
|
||||
for (int l = 0, r = arrayLength - 1; l < r; l++, r--) {
|
||||
final Object temp = array[l];
|
||||
array[l] = array[r];
|
||||
array[r] = temp;
|
||||
public static long keyFromContext(final byte[] dna, final int start, final int end) {
|
||||
final long preContext = combinationsPerLength[end - start - 1]; // the sum of all combinations that preceded the length of the dna string
|
||||
long baseTen = 0L; // the number in base_10 that we are going to use to generate the bit set
|
||||
for (int i = start; i < end; i++) {
|
||||
baseTen = (baseTen << 2); // multiply by 4
|
||||
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(dna[i]);
|
||||
if (baseIndex == -1) // ignore non-ACGT bases
|
||||
return -1L;
|
||||
baseTen += (long)baseIndex;
|
||||
}
|
||||
return baseTen + preContext; // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length.
|
||||
}
|
||||
|
||||
static final private int MAX_DNA_CONTEXT = 31; // the maximum context size (number of bases) permitted in the "long bitset" implementation of the DNA <=> BitSet conversion.
|
||||
static final long[] combinationsPerLength = new long[MAX_DNA_CONTEXT + 1]; // keeps the memoized table with the number of combinations for each given DNA context length
|
||||
static {
|
||||
for (int i = 0; i < MAX_DNA_CONTEXT + 1; i++)
|
||||
computeCombinationsFor(i);
|
||||
}
|
||||
|
||||
/**
|
||||
* The sum of all combinations of a context of a given length from length = 0 to length.
|
||||
*
|
||||
* Memoized implementation of sum(4^i) , where i=[0,length]
|
||||
*
|
||||
* @param length the length of the DNA context
|
||||
*/
|
||||
private static void computeCombinationsFor(final int length) {
|
||||
long combinations = 0L;
|
||||
for (int i = 1; i <= length; i++)
|
||||
combinations += (1L << 2 * i); // add all combinations with 4^i ( 4^i is the same as 2^(2*i) )
|
||||
combinationsPerLength[length] = combinations;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a key into the dna string representation.
|
||||
*
|
||||
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
|
||||
* be longer than 31 bases.
|
||||
*
|
||||
* We calculate the length of the resulting DNA sequence by looking at the sum(4^i) that exceeds the
|
||||
* base_10 representation of the sequence. This is important for us to know how to bring the number
|
||||
* to a quasi-canonical base_4 representation, and to fill in leading A's (since A's are represented
|
||||
* as 0's and leading 0's are omitted).
|
||||
*
|
||||
* quasi-canonical because A is represented by a 0, therefore,
|
||||
* instead of : 0, 1, 2, 3, 10, 11, 12, ...
|
||||
* we have : 0, 1, 2, 3, 00, 01, 02, ...
|
||||
*
|
||||
* but we can correctly decode it because we know the final length.
|
||||
*
|
||||
* @param key the key representing the dna sequence
|
||||
* @return the dna sequence represented by the key
|
||||
*/
|
||||
public static String contextFromKey(long key) {
|
||||
if (key < 0)
|
||||
throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?");
|
||||
|
||||
final int length = contextLengthFor(key); // the length of the context (the number of combinations is memoized, so costs zero to separate this into two method calls)
|
||||
key -= combinationsPerLength[length - 1]; // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation
|
||||
|
||||
StringBuilder dna = new StringBuilder();
|
||||
while (key > 0) { // perform a simple base_10 to base_4 conversion (quasi-canonical)
|
||||
final byte base = (byte) (key & 3); // equivalent to (key % 4)
|
||||
dna.append((char)BaseUtils.baseIndexToSimpleBase(base));
|
||||
key = key >> 2; // divide by 4
|
||||
}
|
||||
for (int j = dna.length(); j < length; j++)
|
||||
dna.append('A'); // add leading A's as necessary (due to the "quasi" canonical status, see description above)
|
||||
|
||||
return dna.reverse().toString(); // make sure to reverse the string since we should have been pre-pending all along
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the length of the DNA context for a given base 10 number
|
||||
*
|
||||
* It is important to know the length given the base 10 number to calculate the number of combinations
|
||||
* and to disambiguate the "quasi-canonical" state.
|
||||
*
|
||||
* This method also calculates the number of combinations as a by-product, but since it memoizes the
|
||||
* results, a subsequent call to combinationsFor(length) is O(1).
|
||||
*
|
||||
* @param number the base 10 representation of the key
|
||||
* @return the length of the DNA context represented by this number
|
||||
*/
|
||||
private static int contextLengthFor(final long number) {
|
||||
int length = 1; // the calculated length of the DNA sequence given the base_10 representation of its BitSet.
|
||||
long combinations = combinationsPerLength[length]; // the next context (we advance it so we know which one was preceding it).
|
||||
while (combinations <= number) { // find the length of the dna string (length)
|
||||
length++;
|
||||
combinations = combinationsPerLength[length]; // calculate the next context
|
||||
}
|
||||
return length;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,8 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
|
|
@ -45,15 +43,15 @@ public interface Covariate {
|
|||
*
|
||||
* @param RAC the recalibration argument collection
|
||||
*/
|
||||
public void initialize(RecalibrationArgumentCollection RAC);
|
||||
public void initialize(final RecalibrationArgumentCollection RAC);
|
||||
|
||||
/**
|
||||
* Calculates covariate values for all positions in the read.
|
||||
*
|
||||
* @param read the read to calculate the covariates on.
|
||||
* @return all the covariate values for every base in the read.
|
||||
* @param read the read to calculate the covariates on.
|
||||
* @param values the object to record the covariate values for every base in the read.
|
||||
*/
|
||||
public CovariateValues getValues(GATKSAMRecord read);
|
||||
public void recordValues(final GATKSAMRecord read, final ReadCovariates values);
|
||||
|
||||
/**
|
||||
* Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
|
|
@ -61,26 +59,26 @@ public interface Covariate {
|
|||
* @param str the key in string type (read from the csv)
|
||||
* @return the key in it's correct type.
|
||||
*/
|
||||
public Object getValue(String str);
|
||||
public Object getValue(final String str);
|
||||
|
||||
/**
|
||||
* Converts the bitset representation of the key (used internally for table indexing) to String format for file output.
|
||||
* Converts the internal representation of the key to String format for file output.
|
||||
*
|
||||
* @param key the bitset representation of the key
|
||||
* @param key the long representation of the key
|
||||
* @return a string representation of the key
|
||||
*/
|
||||
public String keyFromBitSet(BitSet key);
|
||||
public String formatKey(final long key);
|
||||
|
||||
/**
|
||||
* Converts a key into a bitset
|
||||
* Converts an Object key into a long key using only the lowest numberOfBits() bits
|
||||
*
|
||||
* Only necessary for on-the-fly recalibration when you have the object, but need to store it in memory in bitset format. For counting covariates
|
||||
* the getValues method already returns all values in BitSet format.
|
||||
* Only necessary for on-the-fly recalibration when you have the object, but need to store it in memory in long format. For counting covariates
|
||||
* the getValues method already returns all values in long format.
|
||||
*
|
||||
* @param key the object corresponding to the covariate
|
||||
* @return a bitset representation of the object
|
||||
* @return a long representation of the object
|
||||
*/
|
||||
public BitSet bitSetFromKey(Object key);
|
||||
public long longFromKey(final Object key);
|
||||
|
||||
/**
|
||||
* Each covariate should determine how many bits are necessary to encode it's data
|
||||
|
|
|
|||
|
|
@ -1,39 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* An object to hold the different covariate values for all bases in the read.
|
||||
*
|
||||
* Currently we have three different covariates for each read:
|
||||
* - Mismatch
|
||||
* - Insertion
|
||||
* - Deletion
|
||||
*
|
||||
* @author Mauricio Carneiro
|
||||
* @since 2/8/12
|
||||
*/
|
||||
public class CovariateValues {
|
||||
private final BitSet[] mismatches;
|
||||
private final BitSet[] insertions;
|
||||
private final BitSet[] deletions;
|
||||
|
||||
public CovariateValues(BitSet[] mismatch, BitSet[] insertion, BitSet[] deletion) {
|
||||
this.mismatches = mismatch;
|
||||
this.insertions = insertion;
|
||||
this.deletions = deletion;
|
||||
}
|
||||
|
||||
public BitSet[] getMismatches() {
|
||||
return mismatches;
|
||||
}
|
||||
|
||||
public BitSet[] getInsertions() {
|
||||
return insertions;
|
||||
}
|
||||
|
||||
public BitSet[] getDeletions() {
|
||||
return deletions;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,12 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.BitSetUtils;
|
||||
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.EnumSet;
|
||||
|
||||
/*
|
||||
|
|
@ -60,18 +58,18 @@ public class CycleCovariate implements StandardCovariate {
|
|||
|
||||
// Used to pick out the covariate's value from attributes of the read
|
||||
@Override
|
||||
public CovariateValues getValues(final GATKSAMRecord read) {
|
||||
BitSet[] cycles = new BitSet[read.getReadLength()];
|
||||
public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
|
||||
final int readLength = read.getReadLength();
|
||||
final NGSPlatform ngsPlatform = read.getNGSPlatform();
|
||||
|
||||
// Discrete cycle platforms
|
||||
if (DISCRETE_CYCLE_PLATFORMS.contains(ngsPlatform)) {
|
||||
final short readOrderFactor = read.getReadPairedFlag() && read.getSecondOfPairFlag() ? (short) -1 : 1;
|
||||
final short increment;
|
||||
short cycle;
|
||||
final int readOrderFactor = read.getReadPairedFlag() && read.getSecondOfPairFlag() ? -1 : 1;
|
||||
final int increment;
|
||||
int cycle;
|
||||
if (read.getReadNegativeStrandFlag()) {
|
||||
cycle = (short) (read.getReadLength() * readOrderFactor);
|
||||
increment = (short) (-1 * readOrderFactor);
|
||||
cycle = readLength * readOrderFactor;
|
||||
increment = -1 * readOrderFactor;
|
||||
}
|
||||
else {
|
||||
cycle = readOrderFactor;
|
||||
|
|
@ -79,9 +77,10 @@ public class CycleCovariate implements StandardCovariate {
|
|||
}
|
||||
|
||||
final int CUSHION = 4;
|
||||
final int MAX_CYCLE = read.getReadLength() - CUSHION - 1;
|
||||
for (int i = 0; i < MAX_CYCLE; i++) {
|
||||
cycles[i] = (i<CUSHION || i>MAX_CYCLE) ? null : BitSetUtils.bitSetFrom(cycle);
|
||||
final int MAX_CYCLE = readLength - CUSHION - 1;
|
||||
for (int i = 0; i < readLength; i++) {
|
||||
final long key = (i<CUSHION || i>MAX_CYCLE) ? -1L : keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, i);
|
||||
cycle += increment;
|
||||
}
|
||||
}
|
||||
|
|
@ -89,7 +88,6 @@ public class CycleCovariate implements StandardCovariate {
|
|||
// Flow cycle platforms
|
||||
else if (FLOW_CYCLE_PLATFORMS.contains(ngsPlatform)) {
|
||||
|
||||
final int readLength = read.getReadLength();
|
||||
final byte[] bases = read.getReadBases();
|
||||
|
||||
// Differentiate between first and second of pair.
|
||||
|
|
@ -100,7 +98,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
// the current sequential model would consider the effects independently instead of jointly.
|
||||
final boolean multiplyByNegative1 = read.getReadPairedFlag() && read.getSecondOfPairFlag();
|
||||
|
||||
short cycle = multiplyByNegative1 ? (short) -1 : 1; // todo -- check if this is the right behavior for mate paired reads in flow cycle platforms.
|
||||
int cycle = multiplyByNegative1 ? -1 : 1; // todo -- check if this is the right behavior for mate paired reads in flow cycle platforms.
|
||||
|
||||
// BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change
|
||||
// For example, AAAAAAA was probably read in two flow cycles but here we count it as one
|
||||
|
|
@ -108,19 +106,23 @@ public class CycleCovariate implements StandardCovariate {
|
|||
int iii = 0;
|
||||
while (iii < readLength) {
|
||||
while (iii < readLength && bases[iii] == (byte) 'T') {
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
final long key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
while (iii < readLength && bases[iii] == (byte) 'A') {
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
final long key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
while (iii < readLength && bases[iii] == (byte) 'C') {
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
final long key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
while (iii < readLength && bases[iii] == (byte) 'G') {
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
final long key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
if (iii < readLength) {
|
||||
|
|
@ -130,7 +132,8 @@ public class CycleCovariate implements StandardCovariate {
|
|||
cycle++;
|
||||
}
|
||||
if (iii < readLength && !BaseUtils.isRegularBase(bases[iii])) {
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
final long key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
|
||||
|
|
@ -140,19 +143,23 @@ public class CycleCovariate implements StandardCovariate {
|
|||
int iii = readLength - 1;
|
||||
while (iii >= 0) {
|
||||
while (iii >= 0 && bases[iii] == (byte) 'T') {
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
final long key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
while (iii >= 0 && bases[iii] == (byte) 'A') {
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
final long key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
while (iii >= 0 && bases[iii] == (byte) 'C') {
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
final long key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
while (iii >= 0 && bases[iii] == (byte) 'G') {
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
final long key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
if (iii >= 0) {
|
||||
|
|
@ -162,7 +169,8 @@ public class CycleCovariate implements StandardCovariate {
|
|||
cycle++;
|
||||
}
|
||||
if (iii >= 0 && !BaseUtils.isRegularBase(bases[iii])) {
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
final long key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
}
|
||||
|
|
@ -173,28 +181,38 @@ public class CycleCovariate implements StandardCovariate {
|
|||
else {
|
||||
throw new UserException("The platform (" + read.getReadGroup().getPlatform() + ") associated with read group " + read.getReadGroup() + " is not a recognized platform. Implemented options are e.g. illumina, 454, and solid");
|
||||
}
|
||||
|
||||
return new CovariateValues(cycles, cycles, cycles);
|
||||
}
|
||||
|
||||
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
@Override
|
||||
public final Object getValue(final String str) {
|
||||
return Short.parseShort(str);
|
||||
return Integer.parseInt(str);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String keyFromBitSet(BitSet key) {
|
||||
return String.format("%d", BitSetUtils.shortFrom(key));
|
||||
public String formatKey(final long key) {
|
||||
long cycle = key >> 1; // shift so we can remove the "sign" bit
|
||||
if ( (key & 1) != 0 ) // is the last bit set?
|
||||
cycle *= -1; // then the cycle is negative
|
||||
return String.format("%d", cycle);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BitSet bitSetFromKey(Object key) {
|
||||
return (key instanceof String) ? BitSetUtils.bitSetFrom(Short.parseShort((String) key)) : BitSetUtils.bitSetFrom((Short) key);
|
||||
public long longFromKey(final Object key) {
|
||||
return (key instanceof String) ? keyFromCycle(Integer.parseInt((String) key)) : keyFromCycle((Integer) key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numberOfBits() {
|
||||
return BitSetUtils.numberOfBitsToRepresent(2 * Short.MAX_VALUE); // positive and negative
|
||||
return Integer.bitCount(Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
private static long keyFromCycle(final int cycle) {
|
||||
// no negative values because values must fit into the first few bits of the long
|
||||
long result = Math.abs(cycle);
|
||||
result = result << 1; // shift so we can add the "sign" bit
|
||||
if ( cycle < 0 )
|
||||
result++; // negative cycles get the lower-most bit set
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,11 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.BitSetUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
|
|
@ -43,28 +40,17 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
|||
|
||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
@Override
|
||||
public void initialize(final RecalibrationArgumentCollection RAC) {
|
||||
}
|
||||
public void initialize(final RecalibrationArgumentCollection RAC) {}
|
||||
|
||||
@Override
|
||||
public CovariateValues getValues(final GATKSAMRecord read) {
|
||||
int readLength = read.getReadLength();
|
||||
|
||||
BitSet[] mismatches = new BitSet[readLength];
|
||||
BitSet[] insertions = new BitSet[readLength];
|
||||
BitSet[] deletions = new BitSet[readLength];
|
||||
|
||||
byte[] baseQualities = read.getBaseQualities();
|
||||
byte[] baseInsertionQualities = read.getBaseInsertionQualities();
|
||||
byte[] baseDeletionQualities = read.getBaseDeletionQualities();
|
||||
public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
|
||||
final byte[] baseQualities = read.getBaseQualities();
|
||||
final byte[] baseInsertionQualities = read.getBaseInsertionQualities();
|
||||
final byte[] baseDeletionQualities = read.getBaseDeletionQualities();
|
||||
|
||||
for (int i = 0; i < baseQualities.length; i++) {
|
||||
mismatches[i] = BitSetUtils.bitSetFrom(baseQualities[i]);
|
||||
insertions[i] = BitSetUtils.bitSetFrom(baseInsertionQualities[i]);
|
||||
deletions[i] = BitSetUtils.bitSetFrom(baseDeletionQualities[i]);
|
||||
values.addCovariate((long)baseQualities[i], (long)baseInsertionQualities[i], (long)baseDeletionQualities[i], i);
|
||||
}
|
||||
|
||||
return new CovariateValues(mismatches, insertions, deletions);
|
||||
}
|
||||
|
||||
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
|
|
@ -74,17 +60,17 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String keyFromBitSet(BitSet key) {
|
||||
return String.format("%d", BitSetUtils.longFrom(key));
|
||||
public String formatKey(final long key) {
|
||||
return String.format("%d", key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BitSet bitSetFromKey(Object key) {
|
||||
return (key instanceof String) ? BitSetUtils.bitSetFrom(Byte.parseByte((String) key)) : BitSetUtils.bitSetFrom((Byte) key);
|
||||
public long longFromKey(final Object key) {
|
||||
return (key instanceof String) ? (long)Byte.parseByte((String) key) : (long)(Byte) key;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numberOfBits() {
|
||||
return BitSetUtils.numberOfBitsToRepresent(QualityUtils.MAX_QUAL_SCORE);
|
||||
return BQSRKeyManager.numberOfBitsToRepresent(QualityUtils.MAX_QUAL_SCORE);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import org.broadinstitute.sting.utils.recalibration.QualQuantizer;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.BitSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
|
|
@ -31,15 +30,15 @@ public class QuantizationInfo {
|
|||
this(quantizedQuals, empiricalQualCounts, calculateQuantizationLevels(quantizedQuals));
|
||||
}
|
||||
|
||||
public QuantizationInfo(Map<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap, int quantizationLevels) {
|
||||
public QuantizationInfo(Map<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap, int quantizationLevels) {
|
||||
final Long [] qualHistogram = new Long[QualityUtils.MAX_QUAL_SCORE+1]; // create a histogram with the empirical quality distribution
|
||||
for (int i = 0; i < qualHistogram.length; i++)
|
||||
qualHistogram[i] = 0L;
|
||||
|
||||
Map<BitSet, RecalDatum> qualTable = null; // look for the quality score table
|
||||
for (Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> entry : keysAndTablesMap.entrySet()) {
|
||||
Map<Long, RecalDatum> qualTable = null; // look for the quality score table
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> entry : keysAndTablesMap.entrySet()) {
|
||||
BQSRKeyManager keyManager = entry.getKey();
|
||||
if (keyManager.getRequiredCovariates().size() == 2) // it should be the only one with 2 required covaraites
|
||||
if (keyManager.getNumRequiredCovariates() == 2) // it should be the only one with 2 required covariates
|
||||
qualTable = entry.getValue();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,8 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* The object temporarily held by a read that describes all of it's covariates.
|
||||
*
|
||||
|
|
@ -13,27 +11,29 @@ import java.util.BitSet;
|
|||
* @since 2/8/12
|
||||
*/
|
||||
public class ReadCovariates {
|
||||
private final BitSet[][] mismatchesKeySet;
|
||||
private final BitSet[][] insertionsKeySet;
|
||||
private final BitSet[][] deletionsKeySet;
|
||||
private final long[][] mismatchesKeySet;
|
||||
private final long[][] insertionsKeySet;
|
||||
private final long[][] deletionsKeySet;
|
||||
|
||||
private int nextCovariateIndex;
|
||||
private int currentCovariateIndex = 0;
|
||||
|
||||
public ReadCovariates(int readLength, int numberOfCovariates) {
|
||||
this.mismatchesKeySet = new BitSet[readLength][numberOfCovariates];
|
||||
this.insertionsKeySet = new BitSet[readLength][numberOfCovariates];
|
||||
this.deletionsKeySet = new BitSet[readLength][numberOfCovariates];
|
||||
this.nextCovariateIndex = 0;
|
||||
this.mismatchesKeySet = new long[readLength][numberOfCovariates];
|
||||
this.insertionsKeySet = new long[readLength][numberOfCovariates];
|
||||
this.deletionsKeySet = new long[readLength][numberOfCovariates];
|
||||
}
|
||||
|
||||
public void addCovariate(CovariateValues covariate) {
|
||||
transposeCovariateValues(mismatchesKeySet, covariate.getMismatches());
|
||||
transposeCovariateValues(insertionsKeySet, covariate.getInsertions());
|
||||
transposeCovariateValues(deletionsKeySet, covariate.getDeletions());
|
||||
nextCovariateIndex++;
|
||||
public void setCovariateIndex(final int index) {
|
||||
currentCovariateIndex = index;
|
||||
}
|
||||
|
||||
public BitSet[] getKeySet(final int readPosition, final EventType errorModel) {
|
||||
public void addCovariate(final long mismatch, final long insertion, final long deletion, final int readOffset) {
|
||||
mismatchesKeySet[readOffset][currentCovariateIndex] = mismatch;
|
||||
insertionsKeySet[readOffset][currentCovariateIndex] = insertion;
|
||||
deletionsKeySet[readOffset][currentCovariateIndex] = deletion;
|
||||
}
|
||||
|
||||
public long[] getKeySet(final int readPosition, final EventType errorModel) {
|
||||
switch (errorModel) {
|
||||
case BASE_SUBSTITUTION:
|
||||
return getMismatchesKeySet(readPosition);
|
||||
|
|
@ -46,35 +46,30 @@ public class ReadCovariates {
|
|||
}
|
||||
}
|
||||
|
||||
public BitSet[] getMismatchesKeySet(int readPosition) {
|
||||
public long[] getMismatchesKeySet(final int readPosition) {
|
||||
return mismatchesKeySet[readPosition];
|
||||
}
|
||||
|
||||
public BitSet[] getInsertionsKeySet(int readPosition) {
|
||||
public long[] getInsertionsKeySet(final int readPosition) {
|
||||
return insertionsKeySet[readPosition];
|
||||
}
|
||||
|
||||
public BitSet[] getDeletionsKeySet(int readPosition) {
|
||||
public long[] getDeletionsKeySet(final int readPosition) {
|
||||
return deletionsKeySet[readPosition];
|
||||
}
|
||||
|
||||
private void transposeCovariateValues(BitSet[][] keySet, BitSet[] covariateValues) {
|
||||
for (int i = 0; i < covariateValues.length; i++)
|
||||
keySet[i][nextCovariateIndex] = covariateValues[i];
|
||||
}
|
||||
|
||||
/**
|
||||
* Testing routines
|
||||
*/
|
||||
protected BitSet[][] getMismatchesKeySet() {
|
||||
protected long[][] getMismatchesKeySet() {
|
||||
return mismatchesKeySet;
|
||||
}
|
||||
|
||||
protected BitSet[][] getInsertionsKeySet() {
|
||||
protected long[][] getInsertionsKeySet() {
|
||||
return insertionsKeySet;
|
||||
}
|
||||
|
||||
protected BitSet[][] getDeletionsKeySet() {
|
||||
protected long[][] getDeletionsKeySet() {
|
||||
return deletionsKeySet;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,11 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.BitSetUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.BitSet;
|
||||
import java.util.HashMap;
|
||||
|
||||
/*
|
||||
|
|
@ -43,23 +40,22 @@ import java.util.HashMap;
|
|||
|
||||
public class ReadGroupCovariate implements RequiredCovariate {
|
||||
|
||||
private final HashMap<String, Short> readGroupLookupTable = new HashMap<String, Short>();
|
||||
private final HashMap<Short, String> readGroupReverseLookupTable = new HashMap<Short, String>();
|
||||
private short nextId = 0;
|
||||
private final HashMap<String, Long> readGroupLookupTable = new HashMap<String, Long>();
|
||||
private final HashMap<Long, String> readGroupReverseLookupTable = new HashMap<Long, String>();
|
||||
private long nextId = 0L;
|
||||
|
||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
@Override
|
||||
public void initialize(final RecalibrationArgumentCollection RAC) {
|
||||
}
|
||||
public void initialize(final RecalibrationArgumentCollection RAC) {}
|
||||
|
||||
@Override
|
||||
public CovariateValues getValues(final GATKSAMRecord read) {
|
||||
final int l = read.getReadLength();
|
||||
public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
|
||||
final String readGroupId = readGroupValueFromRG(read.getReadGroup());
|
||||
BitSet rg = bitSetForReadGroup(readGroupId); // All objects must output a BitSet, so we convert the "compressed" representation of the Read Group into a bitset
|
||||
BitSet[] readGroups = new BitSet[l];
|
||||
Arrays.fill(readGroups, rg);
|
||||
return new CovariateValues(readGroups, readGroups, readGroups);
|
||||
final long key = keyForReadGroup(readGroupId);
|
||||
|
||||
final int l = read.getReadLength();
|
||||
for (int i = 0; i < l; i++)
|
||||
values.addCovariate(key, key, key, i);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -68,35 +64,28 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String keyFromBitSet(BitSet key) {
|
||||
return decodeReadGroup((short) BitSetUtils.longFrom(key));
|
||||
public String formatKey(final long key) {
|
||||
return readGroupReverseLookupTable.get(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BitSet bitSetFromKey(Object key) {
|
||||
return bitSetForReadGroup((String) key);
|
||||
public long longFromKey(Object key) {
|
||||
return keyForReadGroup((String) key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numberOfBits() {
|
||||
return BitSetUtils.numberOfBitsToRepresent(Short.MAX_VALUE);
|
||||
return BQSRKeyManager.numberOfBitsToRepresent(Short.MAX_VALUE);
|
||||
}
|
||||
|
||||
private String decodeReadGroup(final short id) {
|
||||
return readGroupReverseLookupTable.get(id);
|
||||
}
|
||||
|
||||
private BitSet bitSetForReadGroup(String readGroupId) {
|
||||
short shortId;
|
||||
if (readGroupLookupTable.containsKey(readGroupId))
|
||||
shortId = readGroupLookupTable.get(readGroupId);
|
||||
else {
|
||||
shortId = nextId;
|
||||
private long keyForReadGroup(final String readGroupId) {
|
||||
if (!readGroupLookupTable.containsKey(readGroupId)) {
|
||||
readGroupLookupTable.put(readGroupId, nextId);
|
||||
readGroupReverseLookupTable.put(nextId, readGroupId);
|
||||
nextId++;
|
||||
}
|
||||
return BitSetUtils.bitSetFrom(shortId);
|
||||
}
|
||||
|
||||
return readGroupLookupTable.get(readGroupId);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -105,8 +94,8 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
|||
* @param rg the read group record
|
||||
* @return platform unit or readgroup id
|
||||
*/
|
||||
private String readGroupValueFromRG(GATKSAMReadGroupRecord rg) {
|
||||
String platformUnit = rg.getPlatformUnit();
|
||||
private String readGroupValueFromRG(final GATKSAMReadGroupRecord rg) {
|
||||
final String platformUnit = rg.getPlatformUnit();
|
||||
return platformUnit == null ? rg.getId() : platformUnit;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -149,17 +149,17 @@ public class RecalDataManager {
|
|||
* @param optionalCovariates list of optional covariates (in order)
|
||||
* @return a map with each key manager and it's corresponding recalibration table properly initialized
|
||||
*/
|
||||
public static LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> initializeTables(ArrayList<Covariate> requiredCovariates, ArrayList<Covariate> optionalCovariates) {
|
||||
final LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> tablesAndKeysMap = new LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>>();
|
||||
ArrayList<Covariate> requiredCovariatesToAdd = new ArrayList<Covariate>(requiredCovariates.size() + 1); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates.
|
||||
ArrayList<Covariate> optionalCovariatesToAdd = new ArrayList<Covariate>(); // initialize an empty array of optional covariates to create the first few tables
|
||||
public static LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> initializeTables(ArrayList<Covariate> requiredCovariates, ArrayList<Covariate> optionalCovariates) {
|
||||
final LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> tablesAndKeysMap = new LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>>();
|
||||
final ArrayList<Covariate> requiredCovariatesToAdd = new ArrayList<Covariate>(requiredCovariates.size() + 1); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates.
|
||||
final ArrayList<Covariate> optionalCovariatesToAdd = new ArrayList<Covariate>(); // initialize an empty array of optional covariates to create the first few tables
|
||||
for (Covariate covariate : requiredCovariates) {
|
||||
requiredCovariatesToAdd.add(covariate);
|
||||
final Map<BitSet, RecalDatum> recalTable = new HashMap<BitSet, RecalDatum>(); // initializing a new recal table for each required covariate (cumulatively)
|
||||
final Map<Long, RecalDatum> recalTable = new HashMap<Long, RecalDatum>(); // initializing a new recal table for each required covariate (cumulatively)
|
||||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
|
||||
tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map
|
||||
}
|
||||
final Map<BitSet, RecalDatum> recalTable = new HashMap<BitSet, RecalDatum>(Short.MAX_VALUE); // initializing a new recal table to hold all optional covariates
|
||||
final Map<Long, RecalDatum> recalTable = new HashMap<Long, RecalDatum>(Short.MAX_VALUE); // initializing a new recal table to hold all optional covariates
|
||||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager
|
||||
tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map
|
||||
return tablesAndKeysMap;
|
||||
|
|
@ -181,7 +181,7 @@ public class RecalDataManager {
|
|||
final List<Class<? extends RequiredCovariate>> requiredClasses = new PluginManager<RequiredCovariate>(RequiredCovariate.class).getPlugins();
|
||||
final List<Class<? extends StandardCovariate>> standardClasses = new PluginManager<StandardCovariate>(StandardCovariate.class).getPlugins();
|
||||
|
||||
ArrayList<Covariate> requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates
|
||||
final ArrayList<Covariate> requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates
|
||||
ArrayList<Covariate> optionalCovariates = new ArrayList<Covariate>();
|
||||
if (argumentCollection.USE_STANDARD_COVARIATES)
|
||||
optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user
|
||||
|
|
@ -223,7 +223,7 @@ public class RecalDataManager {
|
|||
logger.info("");
|
||||
}
|
||||
|
||||
private static List<GATKReportTable> generateReportTables(Map<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap) {
|
||||
private static List<GATKReportTable> generateReportTables(Map<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap) {
|
||||
List<GATKReportTable> result = new LinkedList<GATKReportTable>();
|
||||
int tableIndex = 0;
|
||||
|
||||
|
|
@ -235,23 +235,23 @@ public class RecalDataManager {
|
|||
final Pair<String, String> nObservations = new Pair<String, String>(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d");
|
||||
final Pair<String, String> nErrors = new Pair<String, String>(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME, "%d");
|
||||
|
||||
for (Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> entry : keysAndTablesMap.entrySet()) {
|
||||
BQSRKeyManager keyManager = entry.getKey();
|
||||
Map<BitSet, RecalDatum> recalTable = entry.getValue();
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> entry : keysAndTablesMap.entrySet()) {
|
||||
final BQSRKeyManager keyManager = entry.getKey();
|
||||
final Map<Long, RecalDatum> recalTable = entry.getValue();
|
||||
|
||||
boolean isReadGroupTable = tableIndex == 0; // special case for the read group table so we can print the extra column it needs.
|
||||
final boolean isReadGroupTable = tableIndex == 0; // special case for the read group table so we can print the extra column it needs.
|
||||
|
||||
List<Covariate> requiredList = keyManager.getRequiredCovariates(); // ask the key manager what required covariates were used in this recal table
|
||||
List<Covariate> optionalList = keyManager.getOptionalCovariates(); // ask the key manager what optional covariates were used in this recal table
|
||||
final Covariate[] requiredList = keyManager.getRequiredCovariates(); // ask the key manager what required covariates were used in this recal table
|
||||
final Covariate[] optionalList = keyManager.getOptionalCovariates(); // ask the key manager what optional covariates were used in this recal table
|
||||
|
||||
ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
|
||||
final ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
|
||||
|
||||
for (Covariate covariate : requiredList) {
|
||||
String name = covariate.getClass().getSimpleName().split("Covariate")[0]; // get the covariate names and put them in order
|
||||
for (final Covariate covariate : requiredList) {
|
||||
final String name = covariate.getClass().getSimpleName().split("Covariate")[0]; // get the covariate names and put them in order
|
||||
columnNames.add(new Pair<String,String>(name, "%s")); // save the required covariate name so we can reference it in the future
|
||||
}
|
||||
|
||||
if (optionalList.size() > 0) {
|
||||
if (optionalList.length > 0) {
|
||||
columnNames.add(covariateValue);
|
||||
columnNames.add(covariateName);
|
||||
}
|
||||
|
|
@ -263,30 +263,30 @@ public class RecalDataManager {
|
|||
columnNames.add(nObservations);
|
||||
columnNames.add(nErrors);
|
||||
|
||||
GATKReportTable reportTable = new GATKReportTable("RecalTable" + tableIndex++, "", columnNames.size());
|
||||
for (Pair<String, String> columnName : columnNames)
|
||||
final GATKReportTable reportTable = new GATKReportTable("RecalTable" + tableIndex++, "", columnNames.size());
|
||||
for (final Pair<String, String> columnName : columnNames)
|
||||
reportTable.addColumn(columnName.getFirst(), columnName.getSecond()); // every table must have the event type
|
||||
|
||||
int rowIndex = 0;
|
||||
|
||||
for (Map.Entry<BitSet, RecalDatum> recalTableEntry : recalTable.entrySet()) { // create a map with column name => key value for all covariate keys
|
||||
BitSet bitSetKey = recalTableEntry.getKey();
|
||||
Map<String, Object> columnData = new HashMap<String, Object>(columnNames.size());
|
||||
Iterator<Pair<String, String>> iterator = columnNames.iterator();
|
||||
for (Object key : keyManager.keySetFrom(bitSetKey)) {
|
||||
String columnName = iterator.next().getFirst();
|
||||
for (Map.Entry<Long, RecalDatum> recalTableEntry : recalTable.entrySet()) { // create a map with column name => key value for all covariate keys
|
||||
final Long bitSetKey = recalTableEntry.getKey();
|
||||
final Map<String, Object> columnData = new HashMap<String, Object>(columnNames.size());
|
||||
final Iterator<Pair<String, String>> iterator = columnNames.iterator();
|
||||
for (final Object key : keyManager.keySetFrom(bitSetKey)) {
|
||||
final String columnName = iterator.next().getFirst();
|
||||
columnData.put(columnName, key);
|
||||
}
|
||||
RecalDatum datum = recalTableEntry.getValue();
|
||||
final RecalDatum datum = recalTableEntry.getValue();
|
||||
columnData.put(iterator.next().getFirst(), datum.getEmpiricalQuality());
|
||||
if (isReadGroupTable)
|
||||
columnData.put(iterator.next().getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table
|
||||
columnData.put(iterator.next().getFirst(), datum.numObservations);
|
||||
columnData.put(iterator.next().getFirst(), datum.numMismatches);
|
||||
|
||||
for (Map.Entry<String, Object> dataEntry : columnData.entrySet()) {
|
||||
String columnName = dataEntry.getKey();
|
||||
Object value = dataEntry.getValue();
|
||||
for (final Map.Entry<String, Object> dataEntry : columnData.entrySet()) {
|
||||
final String columnName = dataEntry.getKey();
|
||||
final Object value = dataEntry.getValue();
|
||||
reportTable.set(rowIndex, columnName, value.toString());
|
||||
}
|
||||
rowIndex++;
|
||||
|
|
@ -296,16 +296,16 @@ public class RecalDataManager {
|
|||
return result;
|
||||
}
|
||||
|
||||
public static void outputRecalibrationReport(RecalibrationArgumentCollection RAC, QuantizationInfo quantizationInfo, Map<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap, PrintStream outputFile) {
|
||||
public static void outputRecalibrationReport(RecalibrationArgumentCollection RAC, QuantizationInfo quantizationInfo, Map<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap, PrintStream outputFile) {
|
||||
outputRecalibrationReport(RAC.generateReportTable(), quantizationInfo.generateReportTable(), generateReportTables(keysAndTablesMap), outputFile);
|
||||
}
|
||||
|
||||
public static void outputRecalibrationReport(GATKReportTable argumentTable, QuantizationInfo quantizationInfo, LinkedHashMap<BQSRKeyManager,Map<BitSet, RecalDatum>> keysAndTablesMap, PrintStream outputFile) {
|
||||
public static void outputRecalibrationReport(GATKReportTable argumentTable, QuantizationInfo quantizationInfo, LinkedHashMap<BQSRKeyManager,Map<Long, RecalDatum>> keysAndTablesMap, PrintStream outputFile) {
|
||||
outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(), generateReportTables(keysAndTablesMap), outputFile);
|
||||
}
|
||||
|
||||
private static void outputRecalibrationReport(GATKReportTable argumentTable, GATKReportTable quantizationTable, List<GATKReportTable> recalTables, PrintStream outputFile) {
|
||||
GATKReport report = new GATKReport();
|
||||
final GATKReport report = new GATKReport();
|
||||
report.addTable(argumentTable);
|
||||
report.addTable(quantizationTable);
|
||||
report.addTables(recalTables);
|
||||
|
|
@ -328,7 +328,7 @@ public class RecalDataManager {
|
|||
final File plotFileName = new File(csvFileName + ".pdf");
|
||||
files.getFirst().close();
|
||||
|
||||
RScriptExecutor executor = new RScriptExecutor();
|
||||
final RScriptExecutor executor = new RScriptExecutor();
|
||||
executor.addScript(new Resource(SCRIPT_FILE, RecalDataManager.class));
|
||||
executor.addArgs(csvFileName.getAbsolutePath());
|
||||
executor.addArgs(plotFileName.getAbsolutePath());
|
||||
|
|
@ -340,34 +340,34 @@ public class RecalDataManager {
|
|||
|
||||
}
|
||||
|
||||
public static void generateRecalibrationPlot(File filename, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> original, boolean keepIntermediates) {
|
||||
Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
|
||||
public static void generateRecalibrationPlot(File filename, LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> original, boolean keepIntermediates) {
|
||||
final Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
|
||||
writeCSV(files.getFirst(), original, "ORIGINAL", true);
|
||||
outputRecalibrationPlot(files, keepIntermediates);
|
||||
}
|
||||
|
||||
public static void generateRecalibrationPlot(File filename, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> original, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> recalibrated, boolean keepIntermediates) {
|
||||
Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
|
||||
public static void generateRecalibrationPlot(File filename, LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> original, LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> recalibrated, boolean keepIntermediates) {
|
||||
final Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
|
||||
writeCSV(files.getFirst(), recalibrated, "RECALIBRATED", true);
|
||||
writeCSV(files.getFirst(), original, "ORIGINAL", false);
|
||||
outputRecalibrationPlot(files, keepIntermediates);
|
||||
}
|
||||
|
||||
private static void writeCSV(PrintStream deltaTableFile, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> map, String recalibrationMode, boolean printHeader) {
|
||||
private static void writeCSV(PrintStream deltaTableFile, LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> map, String recalibrationMode, boolean printHeader) {
|
||||
final int QUALITY_SCORE_COVARIATE_INDEX = 1;
|
||||
final Map<BitSet, RecalDatum> deltaTable = new HashMap<BitSet, RecalDatum>();
|
||||
final Map<Long, RecalDatum> deltaTable = new HashMap<Long, RecalDatum>();
|
||||
BQSRKeyManager deltaKeyManager = null;
|
||||
|
||||
|
||||
for (Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> tableEntry : map.entrySet()) {
|
||||
BQSRKeyManager keyManager = tableEntry.getKey();
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> tableEntry : map.entrySet()) {
|
||||
final BQSRKeyManager keyManager = tableEntry.getKey();
|
||||
|
||||
if (keyManager.getOptionalCovariates().size() > 0) { // initialize with the 'all covariates' table
|
||||
if (keyManager.getNumOptionalCovariates() > 0) { // initialize with the 'all covariates' table
|
||||
// create a key manager for the delta table
|
||||
final List<Covariate> requiredCovariates = Arrays.asList(keyManager.getRequiredCovariates().get(0)); // include the read group covariate as the only required covariate
|
||||
List<Covariate> optionalCovariates = new ArrayList<Covariate>();
|
||||
optionalCovariates.add(keyManager.getRequiredCovariates().get(1)); // include the quality score covariate as an optional covariate
|
||||
optionalCovariates.addAll(keyManager.getOptionalCovariates()); // include all optional covariates
|
||||
final List<Covariate> requiredCovariates = Arrays.asList(keyManager.getRequiredCovariates()[0]); // include the read group covariate as the only required covariate
|
||||
final List<Covariate> optionalCovariates = new ArrayList<Covariate>();
|
||||
optionalCovariates.add(keyManager.getRequiredCovariates()[1]); // include the quality score covariate as an optional covariate
|
||||
optionalCovariates.addAll(Arrays.asList(keyManager.getOptionalCovariates())); // include all optional covariates
|
||||
deltaKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initialize the key manager
|
||||
}
|
||||
}
|
||||
|
|
@ -376,37 +376,37 @@ public class RecalDataManager {
|
|||
throw new ReviewedStingException ("Couldn't find the covariates table");
|
||||
|
||||
boolean readyToPrint = false;
|
||||
for (Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> tableEntry : map.entrySet()) {
|
||||
BQSRKeyManager keyManager = tableEntry.getKey();
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> tableEntry : map.entrySet()) {
|
||||
final BQSRKeyManager keyManager = tableEntry.getKey();
|
||||
|
||||
if (keyManager.getRequiredCovariates().size() == 2 && keyManager.getOptionalCovariates().isEmpty()) { // look for the QualityScore table
|
||||
Map<BitSet, RecalDatum> table = tableEntry.getValue();
|
||||
if (keyManager.getNumRequiredCovariates() == 2 && keyManager.getNumOptionalCovariates() == 0) { // look for the QualityScore table
|
||||
final Map<Long, RecalDatum> table = tableEntry.getValue();
|
||||
|
||||
// add the quality score table to the delta table
|
||||
for (Map.Entry<BitSet, RecalDatum> entry : table.entrySet()) { // go through every element in the covariates table to create the delta table
|
||||
RecalDatum recalDatum = entry.getValue(); // the current element (recal datum)
|
||||
for (final Map.Entry<Long, RecalDatum> entry : table.entrySet()) { // go through every element in the covariates table to create the delta table
|
||||
final RecalDatum recalDatum = entry.getValue(); // the current element (recal datum)
|
||||
|
||||
List<Object> covs = keyManager.keySetFrom(entry.getKey()); // extract the key objects from the bitset key
|
||||
List<Object> newCovs = new ArrayList<Object>(4);
|
||||
final List<Object> covs = keyManager.keySetFrom(entry.getKey()); // extract the key objects from the bitset key
|
||||
final List<Object> newCovs = new ArrayList<Object>(4);
|
||||
newCovs.add(0, covs.get(0)); // replace the covariate value with the quality score
|
||||
newCovs.add(1, covs.get(1));
|
||||
newCovs.add(2, "QualityScore"); // replace the covariate name with QualityScore (for the QualityScore covariate)
|
||||
newCovs.add(3, covs.get(2));
|
||||
BitSet deltaKey = deltaKeyManager.bitSetFromKey(newCovs.toArray()); // create a new bitset key for the delta table
|
||||
final long deltaKey = deltaKeyManager.longFromKey(newCovs.toArray()); // create a new bitset key for the delta table
|
||||
addToDeltaTable(deltaTable, deltaKey, recalDatum); // add this covariate to the delta table
|
||||
}
|
||||
}
|
||||
|
||||
else if (keyManager.getOptionalCovariates().size() > 0) { // look for the optional covariates table
|
||||
Map<BitSet, RecalDatum> table = tableEntry.getValue();
|
||||
else if (keyManager.getNumOptionalCovariates() > 0) { // look for the optional covariates table
|
||||
final Map<Long, RecalDatum> table = tableEntry.getValue();
|
||||
|
||||
// add the optional covariates to the delta table
|
||||
for (Map.Entry<BitSet, RecalDatum> entry : table.entrySet()) { // go through every element in the covariates table to create the delta table
|
||||
RecalDatum recalDatum = entry.getValue(); // the current element (recal datum)
|
||||
for (final Map.Entry<Long, RecalDatum> entry : table.entrySet()) { // go through every element in the covariates table to create the delta table
|
||||
final RecalDatum recalDatum = entry.getValue(); // the current element (recal datum)
|
||||
|
||||
List<Object> covs = keyManager.keySetFrom(entry.getKey()); // extract the key objects from the bitset key
|
||||
final List<Object> covs = keyManager.keySetFrom(entry.getKey()); // extract the key objects from the bitset key
|
||||
covs.remove(QUALITY_SCORE_COVARIATE_INDEX); // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS)
|
||||
BitSet deltaKey = deltaKeyManager.bitSetFromKey(covs.toArray()); // create a new bitset key for the delta table
|
||||
final long deltaKey = deltaKeyManager.longFromKey(covs.toArray()); // create a new bitset key for the delta table
|
||||
addToDeltaTable(deltaTable, deltaKey, recalDatum); // add this covariate to the delta table
|
||||
}
|
||||
readyToPrint = true;
|
||||
|
|
@ -416,7 +416,7 @@ public class RecalDataManager {
|
|||
if (readyToPrint) {
|
||||
|
||||
if (printHeader) {
|
||||
List<String> header = new LinkedList<String>();
|
||||
final List<String> header = new LinkedList<String>();
|
||||
header.add("ReadGroup");
|
||||
header.add("CovariateValue");
|
||||
header.add("CovariateName");
|
||||
|
|
@ -431,9 +431,9 @@ public class RecalDataManager {
|
|||
}
|
||||
|
||||
// print each data line
|
||||
for(Map.Entry<BitSet, RecalDatum> deltaEntry : deltaTable.entrySet()) {
|
||||
List<Object> deltaKeys = deltaKeyManager.keySetFrom(deltaEntry.getKey());
|
||||
RecalDatum deltaDatum = deltaEntry.getValue();
|
||||
for (final Map.Entry<Long, RecalDatum> deltaEntry : deltaTable.entrySet()) {
|
||||
final List<Object> deltaKeys = deltaKeyManager.keySetFrom(deltaEntry.getKey());
|
||||
final RecalDatum deltaDatum = deltaEntry.getValue();
|
||||
deltaTableFile.print(Utils.join(",", deltaKeys));
|
||||
deltaTableFile.print("," + deltaDatum.stringForCSV());
|
||||
deltaTableFile.println("," + recalibrationMode);
|
||||
|
|
@ -453,8 +453,8 @@ public class RecalDataManager {
|
|||
* @param deltaKey the key to the table
|
||||
* @param recalDatum the recal datum to combine with the accuracyDatum element in the table
|
||||
*/
|
||||
private static void addToDeltaTable(Map<BitSet, RecalDatum> deltaTable, BitSet deltaKey, RecalDatum recalDatum) {
|
||||
RecalDatum deltaDatum = deltaTable.get(deltaKey); // check if we already have a RecalDatum for this key
|
||||
private static void addToDeltaTable(Map<Long, RecalDatum> deltaTable, Long deltaKey, RecalDatum recalDatum) {
|
||||
final RecalDatum deltaDatum = deltaTable.get(deltaKey); // check if we already have a RecalDatum for this key
|
||||
if (deltaDatum == null)
|
||||
deltaTable.put(deltaKey, new RecalDatum(recalDatum)); // if we don't have a key yet, create a new one with the same values as the curent datum
|
||||
else
|
||||
|
|
@ -611,18 +611,32 @@ public class RecalDataManager {
|
|||
* @param requestedCovariates The list of requested covariates.
|
||||
* @return a matrix with all the covariates calculated for every base in the read
|
||||
*/
|
||||
public static ReadCovariates computeCovariates(final GATKSAMRecord read, final List<Covariate> requestedCovariates) {
|
||||
final int numRequestedCovariates = requestedCovariates.size();
|
||||
final int readLength = read.getReadLength();
|
||||
final ReadCovariates readCovariates = new ReadCovariates(readLength, numRequestedCovariates);
|
||||
|
||||
// Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
|
||||
for (Covariate covariate : requestedCovariates)
|
||||
readCovariates.addCovariate(covariate.getValues(read));
|
||||
|
||||
public static ReadCovariates computeCovariates(final GATKSAMRecord read, final Covariate[] requestedCovariates) {
|
||||
final ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), requestedCovariates.length);
|
||||
computeCovariates(read, requestedCovariates, readCovariates);
|
||||
return readCovariates;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes all requested covariates for every offset in the given read
|
||||
* by calling covariate.getValues(..).
|
||||
*
|
||||
* It populates an array of covariate values where result[i][j] is the covariate
|
||||
* value for the ith position in the read and the jth covariate in
|
||||
* reqeustedCovariates list.
|
||||
*
|
||||
* @param read The read for which to compute covariate values.
|
||||
* @param requestedCovariates The list of requested covariates.
|
||||
* @param readCovariates The object to store the covariate values
|
||||
*/
|
||||
public static void computeCovariates(final GATKSAMRecord read, final Covariate[] requestedCovariates, final ReadCovariates readCovariates) {
|
||||
// Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
|
||||
for (int i = 0; i < requestedCovariates.length; i++) {
|
||||
readCovariates.setCovariateIndex(i);
|
||||
requestedCovariates[i].recordValues(read, readCovariates);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform a certain transversion (A <-> C or G <-> T) on the base.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -114,16 +114,10 @@ public class RecalibrationArgumentCollection {
|
|||
public int MISMATCHES_CONTEXT_SIZE = 2;
|
||||
|
||||
/**
|
||||
* The context covariate will use a context of this size to calculate it's covariate value for base insertions
|
||||
* The context covariate will use a context of this size to calculate it's covariate value for base insertions and deletions
|
||||
*/
|
||||
@Argument(fullName = "insertions_context_size", shortName = "ics", doc = "size of the k-mer context to be used for base insertions", required = false)
|
||||
public int INSERTIONS_CONTEXT_SIZE = 8;
|
||||
|
||||
/**
|
||||
* The context covariate will use a context of this size to calculate it's covariate value for base deletions
|
||||
*/
|
||||
@Argument(fullName = "deletions_context_size", shortName = "dcs", doc = "size of the k-mer context to be used for base deletions", required = false)
|
||||
public int DELETIONS_CONTEXT_SIZE = 8;
|
||||
@Argument(fullName = "indels_context_size", shortName = "ics", doc = "size of the k-mer context to be used for base insertions and deletions", required = false)
|
||||
public int INDELS_CONTEXT_SIZE = 8;
|
||||
|
||||
/**
|
||||
* A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace all base qualities in the read for this default value. Negative value turns it off (default is off)
|
||||
|
|
@ -188,10 +182,8 @@ public class RecalibrationArgumentCollection {
|
|||
argumentsTable.set("solid_nocall_strategy", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY);
|
||||
argumentsTable.addRowID("mismatches_context_size", true);
|
||||
argumentsTable.set("mismatches_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE);
|
||||
argumentsTable.addRowID("insertions_context_size", true);
|
||||
argumentsTable.set("insertions_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_CONTEXT_SIZE);
|
||||
argumentsTable.addRowID("deletions_context_size", true);
|
||||
argumentsTable.set("deletions_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DELETIONS_CONTEXT_SIZE);
|
||||
argumentsTable.addRowID("indels_context_size", true);
|
||||
argumentsTable.set("indels_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE);
|
||||
argumentsTable.addRowID("mismatches_default_quality", true);
|
||||
argumentsTable.set("mismatches_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
|
||||
argumentsTable.addRowID("insertions_default_quality", true);
|
||||
|
|
|
|||
|
|
@ -18,8 +18,8 @@ import java.util.*;
|
|||
*/
|
||||
public class RecalibrationReport {
|
||||
private QuantizationInfo quantizationInfo; // histogram containing the counts for qual quantization (calculated after recalibration is done)
|
||||
private final LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap; // quick access reference to the read group table and its key manager
|
||||
private final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // list of all covariates to be used in this calculation
|
||||
private final LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap; // quick access reference to the read group table and its key manager
|
||||
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
|
||||
|
||||
private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
|
||||
private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter
|
||||
|
|
@ -36,21 +36,25 @@ public class RecalibrationReport {
|
|||
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalDataManager.initializeCovariates(RAC); // initialize the required and optional covariates
|
||||
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
|
||||
ArrayList<Covariate> optionalCovariates = covariates.getSecond();
|
||||
requestedCovariates.addAll(requiredCovariates); // add all required covariates to the list of requested covariates
|
||||
requestedCovariates.addAll(optionalCovariates); // add all optional covariates to the list of requested covariates
|
||||
requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()];
|
||||
int covariateIndex = 0;
|
||||
for (final Covariate covariate : requiredCovariates)
|
||||
requestedCovariates[covariateIndex++] = covariate;
|
||||
for (final Covariate covariate : optionalCovariates)
|
||||
requestedCovariates[covariateIndex++] = covariate;
|
||||
|
||||
for (Covariate cov : requestedCovariates)
|
||||
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
|
||||
|
||||
keysAndTablesMap = new LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>>();
|
||||
ArrayList<Covariate> requiredCovariatesToAdd = new ArrayList<Covariate>(requiredCovariates.size()); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates.
|
||||
ArrayList<Covariate> optionalCovariatesToAdd = new ArrayList<Covariate>(); // initialize an empty array of optional covariates to create the first few tables
|
||||
keysAndTablesMap = new LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>>();
|
||||
ArrayList<Covariate> requiredCovariatesToAdd = new ArrayList<Covariate>(requiredCovariates.size()); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates.
|
||||
ArrayList<Covariate> optionalCovariatesToAdd = new ArrayList<Covariate>(); // initialize an empty array of optional covariates to create the first few tables
|
||||
for (Covariate covariate : requiredCovariates) {
|
||||
requiredCovariatesToAdd.add(covariate);
|
||||
final Map<BitSet, RecalDatum> table; // initializing a new recal table for each required covariate (cumulatively)
|
||||
final Map<Long, RecalDatum> table; // initializing a new recal table for each required covariate (cumulatively)
|
||||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
|
||||
|
||||
int nRequiredCovariates = requiredCovariatesToAdd.size(); // the number of required covariates defines which table we are looking at (RG, QUAL or ALL_COVARIATES)
|
||||
final int nRequiredCovariates = requiredCovariatesToAdd.size(); // the number of required covariates defines which table we are looking at (RG, QUAL or ALL_COVARIATES)
|
||||
final String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check.";
|
||||
if (nRequiredCovariates == 1) { // if there is only one required covariate, this is the read group table
|
||||
final GATKReportTable reportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE);
|
||||
|
|
@ -69,15 +73,16 @@ public class RecalibrationReport {
|
|||
|
||||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager
|
||||
final GATKReportTable reportTable = report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE);
|
||||
final Map<BitSet, RecalDatum> table = parseAllCovariatesTable(keyManager, reportTable);
|
||||
final Map<Long, RecalDatum> table = parseAllCovariatesTable(keyManager, reportTable);
|
||||
keysAndTablesMap.put(keyManager, table);
|
||||
}
|
||||
|
||||
protected RecalibrationReport(QuantizationInfo quantizationInfo, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap, GATKReportTable argumentTable, RecalibrationArgumentCollection RAC) {
|
||||
protected RecalibrationReport(final QuantizationInfo quantizationInfo, final LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap, final GATKReportTable argumentTable, final RecalibrationArgumentCollection RAC) {
|
||||
this.quantizationInfo = quantizationInfo;
|
||||
this.keysAndTablesMap = keysAndTablesMap;
|
||||
this.argumentTable = argumentTable;
|
||||
this.RAC = RAC;
|
||||
this.requestedCovariates = null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -94,25 +99,25 @@ public class RecalibrationReport {
|
|||
* @param other the recalibration report to combine with this one
|
||||
*/
|
||||
public void combine(RecalibrationReport other) {
|
||||
Iterator<Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>>> thisIterator = keysAndTablesMap.entrySet().iterator();
|
||||
Iterator<Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>>> thisIterator = keysAndTablesMap.entrySet().iterator();
|
||||
|
||||
for (Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> otherEntry : other.getKeysAndTablesMap().entrySet()) {
|
||||
Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> thisEntry = thisIterator.next();
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> otherEntry : other.getKeysAndTablesMap().entrySet()) {
|
||||
Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> thisEntry = thisIterator.next();
|
||||
|
||||
Map<BitSet, RecalDatum> thisTable = thisEntry.getValue();
|
||||
BQSRKeyManager thisKeyManager = thisEntry.getKey();
|
||||
BQSRKeyManager otherKeyManager = otherEntry.getKey();
|
||||
final Map<Long, RecalDatum> thisTable = thisEntry.getValue();
|
||||
final BQSRKeyManager thisKeyManager = thisEntry.getKey();
|
||||
final BQSRKeyManager otherKeyManager = otherEntry.getKey();
|
||||
|
||||
for (Map.Entry<BitSet, RecalDatum> otherTableEntry : otherEntry.getValue().entrySet()) {
|
||||
RecalDatum otherDatum = otherTableEntry.getValue();
|
||||
BitSet otherBitKey = otherTableEntry.getKey();
|
||||
List<Object> otherObjectKey = otherKeyManager.keySetFrom(otherBitKey);
|
||||
for (Map.Entry<Long, RecalDatum> otherTableEntry : otherEntry.getValue().entrySet()) {
|
||||
final RecalDatum otherDatum = otherTableEntry.getValue();
|
||||
final Long otherBitKey = otherTableEntry.getKey();
|
||||
final List<Object> otherObjectKey = otherKeyManager.keySetFrom(otherBitKey);
|
||||
|
||||
BitSet thisBitKey = thisKeyManager.bitSetFromKey(otherObjectKey.toArray());
|
||||
RecalDatum thisDatum = thisTable.get(thisBitKey);
|
||||
final long thisKey = thisKeyManager.longFromKey(otherObjectKey.toArray());
|
||||
final RecalDatum thisDatum = thisTable.get(thisKey);
|
||||
|
||||
if (thisDatum == null)
|
||||
thisTable.put(thisBitKey, otherDatum);
|
||||
thisTable.put(thisKey, otherDatum);
|
||||
else
|
||||
thisDatum.combine(otherDatum);
|
||||
}
|
||||
|
|
@ -123,11 +128,11 @@ public class RecalibrationReport {
|
|||
return quantizationInfo;
|
||||
}
|
||||
|
||||
public LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> getKeysAndTablesMap() {
|
||||
public LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> getKeysAndTablesMap() {
|
||||
return keysAndTablesMap;
|
||||
}
|
||||
|
||||
public ArrayList<Covariate> getRequestedCovariates() {
|
||||
public Covariate[] getRequestedCovariates() {
|
||||
return requestedCovariates;
|
||||
}
|
||||
|
||||
|
|
@ -138,7 +143,7 @@ public class RecalibrationReport {
|
|||
* @param reportTable the GATKReport table containing data for this table
|
||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
||||
*/
|
||||
private Map<BitSet, RecalDatum> parseAllCovariatesTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
|
||||
private Map<Long, RecalDatum> parseAllCovariatesTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
|
||||
ArrayList<String> columnNamesOrderedList = new ArrayList<String>(5);
|
||||
columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME);
|
||||
columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
|
||||
|
|
@ -155,7 +160,7 @@ public class RecalibrationReport {
|
|||
* @param reportTable the GATKReport table containing data for this table
|
||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
||||
*/
|
||||
private Map<BitSet, RecalDatum> parseQualityScoreTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
|
||||
private Map<Long, RecalDatum> parseQualityScoreTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
|
||||
ArrayList<String> columnNamesOrderedList = new ArrayList<String>(3);
|
||||
columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME);
|
||||
columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
|
||||
|
|
@ -170,7 +175,7 @@ public class RecalibrationReport {
|
|||
* @param reportTable the GATKReport table containing data for this table
|
||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
||||
*/
|
||||
private Map<BitSet, RecalDatum> parseReadGroupTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
|
||||
private Map<Long, RecalDatum> parseReadGroupTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
|
||||
ArrayList<String> columnNamesOrderedList = new ArrayList<String>(2);
|
||||
columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME);
|
||||
columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME);
|
||||
|
|
@ -185,26 +190,26 @@ public class RecalibrationReport {
|
|||
* @param columnNamesOrderedList a list of columns to read from the report table and build as key for this particular table
|
||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
||||
*/
|
||||
private Map<BitSet, RecalDatum> genericRecalTableParsing(BQSRKeyManager keyManager, GATKReportTable reportTable, ArrayList<String> columnNamesOrderedList, boolean hasEstimatedQReportedColumn) {
|
||||
Map<BitSet, RecalDatum> result = new HashMap<BitSet, RecalDatum>(reportTable.getNumRows()*2);
|
||||
private Map<Long, RecalDatum> genericRecalTableParsing(BQSRKeyManager keyManager, GATKReportTable reportTable, ArrayList<String> columnNamesOrderedList, boolean hasEstimatedQReportedColumn) {
|
||||
final Map<Long, RecalDatum> result = new HashMap<Long, RecalDatum>(reportTable.getNumRows()*2);
|
||||
|
||||
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
|
||||
int nKeys = columnNamesOrderedList.size();
|
||||
Object [] keySet = new Object[nKeys];
|
||||
final int nKeys = columnNamesOrderedList.size();
|
||||
final Object [] keySet = new Object[nKeys];
|
||||
for (int j = 0; j < nKeys; j++)
|
||||
keySet[j] = reportTable.get(i, columnNamesOrderedList.get(j)); // all these objects are okay in String format, the key manager will handle them correctly (except for the event type (see below)
|
||||
keySet[j] = reportTable.get(i, columnNamesOrderedList.get(j)); // all these objects are okay in String format, the key manager will handle them correctly (except for the event type (see below)
|
||||
keySet[keySet.length-1] = EventType.eventFrom((String) keySet[keySet.length-1]); // the last key is always the event type. We convert the string ("M", "I" or "D") to an enum object (necessary for the key manager).
|
||||
BitSet bitKey = keyManager.bitSetFromKey(keySet);
|
||||
final long bitKey = keyManager.longFromKey(keySet);
|
||||
|
||||
long nObservations = (Long) reportTable.get(i, RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME);
|
||||
long nErrors = (Long) reportTable.get(i, RecalDataManager.NUMBER_ERRORS_COLUMN_NAME);
|
||||
double empiricalQuality = (Double) reportTable.get(i, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME);
|
||||
final long nObservations = (Long) reportTable.get(i, RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME);
|
||||
final long nErrors = (Long) reportTable.get(i, RecalDataManager.NUMBER_ERRORS_COLUMN_NAME);
|
||||
final double empiricalQuality = (Double) reportTable.get(i, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME);
|
||||
|
||||
double estimatedQReported = hasEstimatedQReportedColumn ? // the estimatedQreported column only exists in the ReadGroup table
|
||||
(Double) reportTable.get(i, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table
|
||||
Byte.parseByte((String) reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table
|
||||
final double estimatedQReported = hasEstimatedQReportedColumn ? // the estimatedQreported column only exists in the ReadGroup table
|
||||
(Double) reportTable.get(i, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table
|
||||
Byte.parseByte((String) reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table
|
||||
|
||||
RecalDatum recalDatum = new RecalDatum(nObservations, nErrors, estimatedQReported, empiricalQuality);
|
||||
final RecalDatum recalDatum = new RecalDatum(nObservations, nErrors, estimatedQReported, empiricalQuality);
|
||||
result.put(bitKey, recalDatum);
|
||||
}
|
||||
return result;
|
||||
|
|
@ -217,14 +222,14 @@ public class RecalibrationReport {
|
|||
* @return an ArrayList with the quantization mappings from 0 to MAX_QUAL_SCORE
|
||||
*/
|
||||
private QuantizationInfo initializeQuantizationTable(GATKReportTable table) {
|
||||
Byte[] quals = new Byte[QualityUtils.MAX_QUAL_SCORE + 1];
|
||||
Long[] counts = new Long[QualityUtils.MAX_QUAL_SCORE + 1];
|
||||
final Byte[] quals = new Byte[QualityUtils.MAX_QUAL_SCORE + 1];
|
||||
final Long[] counts = new Long[QualityUtils.MAX_QUAL_SCORE + 1];
|
||||
for ( int i = 0; i < table.getNumRows(); i++ ) {
|
||||
byte originalQual = (byte)i;
|
||||
Object quantizedObject = table.get(i, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME);
|
||||
Object countObject = table.get(i, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME);
|
||||
byte quantizedQual = Byte.parseByte(quantizedObject.toString());
|
||||
long quantizedCount = Long.parseLong(countObject.toString());
|
||||
final byte originalQual = (byte)i;
|
||||
final Object quantizedObject = table.get(i, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME);
|
||||
final Object countObject = table.get(i, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME);
|
||||
final byte quantizedQual = Byte.parseByte(quantizedObject.toString());
|
||||
final long quantizedCount = Long.parseLong(countObject.toString());
|
||||
quals[originalQual] = quantizedQual;
|
||||
counts[originalQual] = quantizedCount;
|
||||
}
|
||||
|
|
@ -238,7 +243,7 @@ public class RecalibrationReport {
|
|||
* @return a RAC object properly initialized with all the objects in the table
|
||||
*/
|
||||
private RecalibrationArgumentCollection initializeArgumentCollectionTable(GATKReportTable table) {
|
||||
RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||
final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||
|
||||
for ( int i = 0; i < table.getNumRows(); i++ ) {
|
||||
final String argument = table.get(i, "Argument").toString();
|
||||
|
|
@ -261,11 +266,8 @@ public class RecalibrationReport {
|
|||
else if (argument.equals("mismatches_context_size"))
|
||||
RAC.MISMATCHES_CONTEXT_SIZE = Integer.parseInt((String) value);
|
||||
|
||||
else if (argument.equals("insertions_context_size"))
|
||||
RAC.INSERTIONS_CONTEXT_SIZE = Integer.parseInt((String) value);
|
||||
|
||||
else if (argument.equals("deletions_context_size"))
|
||||
RAC.DELETIONS_CONTEXT_SIZE = Integer.parseInt((String) value);
|
||||
else if (argument.equals("indels_context_size"))
|
||||
RAC.INDELS_CONTEXT_SIZE = Integer.parseInt((String) value);
|
||||
|
||||
else if (argument.equals("mismatches_default_quality"))
|
||||
RAC.MISMATCHES_DEFAULT_QUALITY = Byte.parseByte((String) value);
|
||||
|
|
@ -306,7 +308,7 @@ public class RecalibrationReport {
|
|||
* and quantization of the quality scores during every call of combine(). Very useful for the BQSRGatherer.
|
||||
*/
|
||||
public void calculateEmpiricalAndQuantizedQualities() {
|
||||
for (Map<BitSet, RecalDatum> table : keysAndTablesMap.values())
|
||||
for (Map<Long, RecalDatum> table : keysAndTablesMap.values())
|
||||
for (RecalDatum datum : table.values())
|
||||
datum.calcCombinedEmpiricalQuality();
|
||||
|
||||
|
|
@ -331,26 +333,26 @@ public class RecalibrationReport {
|
|||
return isEqualTable(this.keysAndTablesMap, other.keysAndTablesMap);
|
||||
}
|
||||
|
||||
private boolean isEqualTable(LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> t1, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> t2) {
|
||||
private boolean isEqualTable(LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> t1, LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> t2) {
|
||||
if (t1.size() != t2.size())
|
||||
return false;
|
||||
|
||||
Iterator<Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>>> t1Iterator = t1.entrySet().iterator();
|
||||
Iterator<Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>>> t2Iterator = t2.entrySet().iterator();
|
||||
final Iterator<Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>>> t1Iterator = t1.entrySet().iterator();
|
||||
final Iterator<Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>>> t2Iterator = t2.entrySet().iterator();
|
||||
|
||||
while (t1Iterator.hasNext() && t2Iterator.hasNext()) {
|
||||
Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> t1MapEntry = t1Iterator.next();
|
||||
Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> t2MapEntry = t2Iterator.next();
|
||||
Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> t1MapEntry = t1Iterator.next();
|
||||
Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> t2MapEntry = t2Iterator.next();
|
||||
|
||||
if (!(t1MapEntry.getKey().equals(t2MapEntry.getKey())))
|
||||
return false;
|
||||
|
||||
Map<BitSet, RecalDatum> table2 = t2MapEntry.getValue();
|
||||
for (Map.Entry<BitSet, RecalDatum> t1TableEntry : t1MapEntry.getValue().entrySet()) {
|
||||
BitSet t1Key = t1TableEntry.getKey();
|
||||
final Map<Long, RecalDatum> table2 = t2MapEntry.getValue();
|
||||
for (Map.Entry<Long, RecalDatum> t1TableEntry : t1MapEntry.getValue().entrySet()) {
|
||||
final Long t1Key = t1TableEntry.getKey();
|
||||
if (!table2.containsKey(t1Key))
|
||||
return false;
|
||||
RecalDatum t1Datum = t1TableEntry.getValue();
|
||||
final RecalDatum t1Datum = t1TableEntry.getValue();
|
||||
if (!t1Datum.equals(table2.get(t1Key)))
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,12 +33,10 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -147,7 +145,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
intervalListIterator = new PeekableIterator<GenomeLoc>(getToolkit().getIntervals().iterator());
|
||||
|
||||
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); // get all of the unique sample names for the VCF Header
|
||||
vcfWriter.writeHeader(new VCFHeader(getHeaderInfo(), samples)); // initialize the VCF header
|
||||
vcfWriter.writeHeader(new VCFHeader(ThresHolder.getHeaderInfo(), samples)); // initialize the VCF header
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -249,6 +247,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
private void outputStatsToVCF(IntervalStatistics stats, Allele refAllele) {
|
||||
GenomeLoc interval = stats.getInterval();
|
||||
|
||||
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
Map<String, Object> attributes = new HashMap<String, Object>();
|
||||
ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
|
|
@ -258,73 +257,46 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
VariantContextBuilder vcb = new VariantContextBuilder("DiagnoseTargets", interval.getContig(), interval.getStart(), interval.getStart(), alleles);
|
||||
|
||||
vcb = vcb.log10PError(VariantContext.NO_LOG10_PERROR); // QUAL field makes no sense in our VCF
|
||||
vcb.filters(statusesToStrings(stats.callableStatuses(thresholds)));
|
||||
vcb.filters(new HashSet<String>(statusesToStrings(stats.callableStatuses(thresholds))));
|
||||
|
||||
attributes.put(VCFConstants.END_KEY, interval.getStop());
|
||||
attributes.put(VCFConstants.DEPTH_KEY, stats.averageCoverage());
|
||||
|
||||
vcb = vcb.attributes(attributes);
|
||||
|
||||
for (String sample : samples) {
|
||||
Map<String, Object> infos = new HashMap<String, Object>();
|
||||
SampleStatistics sampleStat = stats.getSample(sample);
|
||||
infos.put(VCFConstants.DEPTH_KEY, sampleStat.averageCoverage());
|
||||
infos.put("Q1", sampleStat.getQuantileDepth(0.25));
|
||||
infos.put("MED", sampleStat.getQuantileDepth(0.50));
|
||||
infos.put("Q3", sampleStat.getQuantileDepth(0.75));
|
||||
|
||||
Set<String> filters = new HashSet<String>();
|
||||
filters.addAll(statusesToStrings(stats.getSample(sample).getCallableStatuses(thresholds)));
|
||||
|
||||
|
||||
genotypes.add(new Genotype(sample, null, VariantContext.NO_LOG10_PERROR, filters, infos, false));
|
||||
}
|
||||
vcb = vcb.genotypes(genotypes);
|
||||
|
||||
if (debug) {
|
||||
System.out.printf("Output -- Interval: %s, Coverage: %.2f%n", stats.getInterval(), stats.averageCoverage());
|
||||
}
|
||||
for (String sample : samples) {
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(sample);
|
||||
|
||||
SampleStatistics sampleStat = stats.getSample(sample);
|
||||
gb.DP((int)sampleStat.averageCoverage());
|
||||
gb.attribute("Q1", sampleStat.getQuantileDepth(0.25));
|
||||
gb.attribute("MED", sampleStat.getQuantileDepth(0.50));
|
||||
gb.attribute("Q3", sampleStat.getQuantileDepth(0.75));
|
||||
|
||||
if (debug) {
|
||||
System.out.printf("Found %d bad mates out of %d reads %n", sampleStat.getnBadMates(), sampleStat.getnReads());
|
||||
}
|
||||
gb.filters(statusesToStrings(stats.getSample(sample).getCallableStatuses(thresholds)));
|
||||
|
||||
genotypes.add(gb.make());
|
||||
}
|
||||
vcb = vcb.genotypes(genotypes);
|
||||
|
||||
|
||||
vcfWriter.add(vcb.make());
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the header lines for the VCF writer
|
||||
*
|
||||
* @return A set of VCF header lines
|
||||
*/
|
||||
private static Set<VCFHeaderLine> getHeaderInfo() {
|
||||
Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>();
|
||||
|
||||
// INFO fields for overall data
|
||||
headerLines.add(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position of the interval"));
|
||||
headerLines.add(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a lci divided by interval size."));
|
||||
headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));
|
||||
|
||||
// FORMAT fields for each genotype
|
||||
// todo -- find the appropriate VCF constants
|
||||
headerLines.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a lci divided by interval size."));
|
||||
headerLines.add(new VCFFormatHeaderLine("Q1", 1, VCFHeaderLineType.Float, "Lower Quartile of depth distribution."));
|
||||
headerLines.add(new VCFFormatHeaderLine("MED", 1, VCFHeaderLineType.Float, "Median of depth distribution."));
|
||||
headerLines.add(new VCFFormatHeaderLine("Q3", 1, VCFHeaderLineType.Float, "Upper Quartile of depth Distribution."));
|
||||
|
||||
|
||||
// FILTER fields
|
||||
for (CallableStatus stat : CallableStatus.values())
|
||||
headerLines.add(new VCFFilterHeaderLine(stat.name(), stat.description));
|
||||
|
||||
return headerLines;
|
||||
}
|
||||
|
||||
/**
|
||||
* Function that process a set of statuses into strings
|
||||
*
|
||||
* @param statuses the set of statuses to be converted
|
||||
* @return a matching set of strings
|
||||
*/
|
||||
private Set<String> statusesToStrings(Set<CallableStatus> statuses) {
|
||||
Set<String> output = new HashSet<String>(statuses.size());
|
||||
private List<String> statusesToStrings(Set<CallableStatus> statuses) {
|
||||
List<String> output = new ArrayList<String>(statuses.size());
|
||||
|
||||
for (CallableStatus status : statuses)
|
||||
output.add(status.name());
|
||||
|
|
@ -333,6 +305,6 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
}
|
||||
|
||||
private IntervalStatistics createIntervalStatistic(GenomeLoc interval) {
|
||||
return new IntervalStatistics(samples, interval /*, minimumCoverage, maximumCoverage, minimumMappingQuality, minimumBaseQuality*/);
|
||||
return new IntervalStatistics(samples, interval);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,84 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.PartitionBy;
|
||||
import org.broadinstitute.sting.gatk.walkers.PartitionType;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
||||
@PartitionBy(PartitionType.CONTIG)
|
||||
@ActiveRegionExtension(extension = 0, maxRegion = 50000)
|
||||
public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
|
||||
@Output(required = true)
|
||||
private PrintStream out;
|
||||
|
||||
@Override
|
||||
// Look to see if the region has sufficient coverage
|
||||
public double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
|
||||
|
||||
int depth = ThresHolder.DEFAULTS.getFilteredCoverage(context.getBasePileup());
|
||||
|
||||
// note the linear probability scale
|
||||
int coverageThreshold = 20;
|
||||
return Math.min((double) depth / coverageThreshold, 1);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc map(final ActiveRegion activeRegion, final RefMetaDataTracker tracker) {
|
||||
if (activeRegion.isActive)
|
||||
return activeRegion.getLocation();
|
||||
else
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long reduceInit() {
|
||||
return 0L;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long reduce(final GenomeLoc value, Long reduce) {
|
||||
if (value != null) {
|
||||
out.println(value.toString());
|
||||
return reduce++;
|
||||
} else
|
||||
return reduce;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(Long reduce) {
|
||||
logger.info(String.format("Found %d intervals", reduce));
|
||||
}
|
||||
}
|
||||
|
|
@ -79,14 +79,12 @@ class SampleStatistics {
|
|||
* @return the callable statuses of the entire sample
|
||||
*/
|
||||
public Set<CallableStatus> getCallableStatuses(ThresHolder thresholds) {
|
||||
Set<CallableStatus> output = new HashSet<CallableStatus>();
|
||||
|
||||
// We check if reads are present ot prevent div / 0 exceptions
|
||||
if (nReads == 0) {
|
||||
output.add(CallableStatus.NO_READS);
|
||||
return output;
|
||||
return Collections.singleton(CallableStatus.NO_READS);
|
||||
}
|
||||
|
||||
Set<CallableStatus> output = new HashSet<CallableStatus>();
|
||||
Map<CallableStatus, Double> totals = new HashMap<CallableStatus, Double>(CallableStatus.values().length);
|
||||
|
||||
// initialize map
|
||||
|
|
@ -104,19 +102,19 @@ class SampleStatistics {
|
|||
|
||||
double intervalSize = interval.size();
|
||||
|
||||
if ((nBadMates / nReads) > thresholds.getBadMateStatusThreshold())
|
||||
if (((double) nBadMates / nReads) >= thresholds.getBadMateStatusThreshold())
|
||||
output.add(CallableStatus.BAD_MATE);
|
||||
|
||||
if ((totals.get(CallableStatus.COVERAGE_GAPS) / intervalSize) > thresholds.getCoverageStatusThreshold())
|
||||
if ((totals.get(CallableStatus.COVERAGE_GAPS) / intervalSize) >= thresholds.getCoverageStatusThreshold())
|
||||
output.add(CallableStatus.COVERAGE_GAPS);
|
||||
|
||||
if ((totals.get(CallableStatus.LOW_COVERAGE) / intervalSize) > thresholds.getCoverageStatusThreshold())
|
||||
if ((totals.get(CallableStatus.LOW_COVERAGE) / intervalSize) >= thresholds.getCoverageStatusThreshold())
|
||||
output.add(CallableStatus.LOW_COVERAGE);
|
||||
|
||||
if ((totals.get(CallableStatus.EXCESSIVE_COVERAGE) / intervalSize) > thresholds.getExcessiveCoverageThreshold())
|
||||
if ((totals.get(CallableStatus.EXCESSIVE_COVERAGE) / intervalSize) >= thresholds.getExcessiveCoverageThreshold())
|
||||
output.add(CallableStatus.EXCESSIVE_COVERAGE);
|
||||
|
||||
if ((totals.get(CallableStatus.POOR_QUALITY) / intervalSize) > thresholds.getQualityStatusThreshold())
|
||||
if ((totals.get(CallableStatus.POOR_QUALITY) / intervalSize) >= thresholds.getQualityStatusThreshold())
|
||||
output.add(CallableStatus.POOR_QUALITY);
|
||||
|
||||
if (totals.get(CallableStatus.REF_N) > 0)
|
||||
|
|
@ -126,6 +124,7 @@ class SampleStatistics {
|
|||
if (output.isEmpty()) {
|
||||
output.add(CallableStatus.PASS);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
|
@ -146,7 +145,7 @@ class SampleStatistics {
|
|||
int locusIndex = locus.getStart() - interval.getStart();
|
||||
|
||||
int rawCoverage = pileup.depthOfCoverage();
|
||||
int coverage = pileup.getBaseAndMappingFilteredPileup(thresholds.getMinimumBaseQuality(), thresholds.getMinimumMappingQuality()).depthOfCoverage();
|
||||
int coverage = thresholds.getFilteredCoverage(pileup);
|
||||
|
||||
LocusStatistics locusData = new LocusStatistics(coverage, rawCoverage);
|
||||
|
||||
|
|
@ -161,7 +160,7 @@ class SampleStatistics {
|
|||
// Was this read already processed?
|
||||
if (read.getTemporaryAttribute("checkedBadMate") == null) {
|
||||
nReads++;
|
||||
if (hasValidMate(read, thresholds))
|
||||
if (!hasValidMate(read, thresholds))
|
||||
nBadMates++;
|
||||
read.setTemporaryAttribute("checkedBadMate", true);
|
||||
}
|
||||
|
|
@ -254,7 +253,7 @@ class SampleStatistics {
|
|||
* reasonable insert size?
|
||||
* inverted?
|
||||
* same orientation?
|
||||
* todo - same contig?
|
||||
* same contig?
|
||||
* is pair mapped?
|
||||
* todo - is forced mate?
|
||||
*
|
||||
|
|
@ -264,6 +263,10 @@ class SampleStatistics {
|
|||
if (!read.getReadPairedFlag())
|
||||
return false;
|
||||
|
||||
// different contigs
|
||||
if (read.getMateReferenceIndex() != read.getReferenceIndex())
|
||||
return false;
|
||||
|
||||
// unmapped
|
||||
if (read.getMateUnmappedFlag() || read.getReadUnmappedFlag())
|
||||
return false;
|
||||
|
|
@ -277,10 +280,19 @@ class SampleStatistics {
|
|||
read.getAlignmentStart() < read.getMateAlignmentStart())
|
||||
return false;
|
||||
|
||||
// TODO note: IGV uses a different alorithm for insert size, there should be a common util class that does this for you
|
||||
// mates are too far apart
|
||||
if (Math.abs(read.getAlignmentStart() - read.getMateAlignmentStart()) > thresholds.getMaximumInsertSize())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public int getnReads() {
|
||||
return nReads;
|
||||
}
|
||||
|
||||
public int getnBadMates() {
|
||||
return nBadMates;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,12 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
||||
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
class ThresHolder {
|
||||
public static final ThresHolder DEFAULTS = new ThresHolder(20, 20, 5, 700, 20, 50, 0.5, 0.2, 0.5, 0.2, 0.2, 0.5);
|
||||
|
||||
|
|
@ -69,14 +75,6 @@ class ThresHolder {
|
|||
this.qualityStatusThreshold = qualityStatusThreshold;
|
||||
}
|
||||
|
||||
public int getMinimumBaseQuality() {
|
||||
return minimumBaseQuality;
|
||||
}
|
||||
|
||||
public int getMinimumMappingQuality() {
|
||||
return minimumMappingQuality;
|
||||
}
|
||||
|
||||
public int getMinimumCoverage() {
|
||||
return minimumCoverage;
|
||||
}
|
||||
|
|
@ -116,4 +114,37 @@ class ThresHolder {
|
|||
public double getQualityStatusThreshold() {
|
||||
return qualityStatusThreshold;
|
||||
}
|
||||
|
||||
public int getFilteredCoverage(ReadBackedPileup pileup) {
|
||||
return pileup.getBaseAndMappingFilteredPileup(minimumBaseQuality, minimumMappingQuality).depthOfCoverage();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the header lines for the VCF writer
|
||||
*
|
||||
* @return A set of VCF header lines
|
||||
*/
|
||||
public static Set<VCFHeaderLine> getHeaderInfo() {
|
||||
Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>();
|
||||
|
||||
// INFO fields for overall data
|
||||
headerLines.add(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position of the interval"));
|
||||
headerLines.add(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
||||
headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));
|
||||
|
||||
// FORMAT fields for each genotype
|
||||
// todo -- find the appropriate VCF constants
|
||||
headerLines.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
||||
headerLines.add(new VCFFormatHeaderLine("Q1", 1, VCFHeaderLineType.Float, "Lower Quartile of depth distribution."));
|
||||
headerLines.add(new VCFFormatHeaderLine("MED", 1, VCFHeaderLineType.Float, "Median of depth distribution."));
|
||||
headerLines.add(new VCFFormatHeaderLine("Q3", 1, VCFHeaderLineType.Float, "Upper Quartile of depth Distribution."));
|
||||
|
||||
|
||||
// FILTER fields
|
||||
for (CallableStatus stat : CallableStatus.values())
|
||||
headerLines.add(new VCFFilterHeaderLine(stat.name(), stat.description));
|
||||
|
||||
return headerLines;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,8 +55,6 @@ public class BAMDiffableReader implements DiffableReader {
|
|||
|
||||
int count = 0;
|
||||
while ( iterator.hasNext() ) {
|
||||
if ( count++ > maxElementsToRead && maxElementsToRead != -1)
|
||||
break;
|
||||
final SAMRecord record = iterator.next();
|
||||
|
||||
// name is the read name + first of pair
|
||||
|
|
@ -88,6 +86,9 @@ public class BAMDiffableReader implements DiffableReader {
|
|||
if ( ! root.hasElement(name) )
|
||||
// protect ourselves from malformed files
|
||||
root.add(readRoot);
|
||||
count += readRoot.size();
|
||||
if ( count > maxElementsToRead && maxElementsToRead != -1)
|
||||
break;
|
||||
}
|
||||
|
||||
reader.close();
|
||||
|
|
|
|||
|
|
@ -147,7 +147,7 @@ public class DiffEngine {
|
|||
* @param diffs the list of differences to summarize
|
||||
*/
|
||||
public void reportSummarizedDifferences(List<Difference> diffs, SummaryReportParams params ) {
|
||||
printSummaryReport(summarizedDifferencesOfPaths(diffs, params.maxRawDiffsToSummarize), params );
|
||||
printSummaryReport(summarizedDifferencesOfPaths(diffs, params.doPairwise, params.maxRawDiffsToSummarize), params );
|
||||
}
|
||||
|
||||
final protected static String[] diffNameToPath(String diffName) {
|
||||
|
|
@ -161,9 +161,17 @@ public class DiffEngine {
|
|||
diffs.add(new Difference(diff));
|
||||
}
|
||||
|
||||
return summarizedDifferencesOfPaths(diffs, -1);
|
||||
return summarizedDifferencesOfPaths(diffs, true, -1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes a minimum set of potential differences between all singleton differences
|
||||
* in singletonDiffs. Employs an expensive pairwise O(n^2) algorithm.
|
||||
*
|
||||
* @param singletonDiffs
|
||||
* @param maxRawDiffsToSummarize
|
||||
* @return
|
||||
*/
|
||||
private Map<String, Difference> initialPairwiseSummaries(final List<? extends Difference> singletonDiffs,
|
||||
final int maxRawDiffsToSummarize) {
|
||||
Map<String, Difference> summaries = new HashMap<String, Difference>();
|
||||
|
|
@ -191,9 +199,41 @@ public class DiffEngine {
|
|||
return summaries;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the possible leaf differences among the singleton diffs.
|
||||
*
|
||||
* The leaf differences are all of the form *.*...*.X where all internal
|
||||
* differences are wildcards and the only summarized difference considered
|
||||
* interesting to compute is
|
||||
*
|
||||
* @param singletonDiffs
|
||||
* @param maxRawDiffsToSummarize
|
||||
* @return
|
||||
*/
|
||||
private Map<String, Difference> initialLeafSummaries(final List<? extends Difference> singletonDiffs,
|
||||
final int maxRawDiffsToSummarize) {
|
||||
Map<String, Difference> summaries = new HashMap<String, Difference>();
|
||||
|
||||
// create the initial set of differences
|
||||
for ( final Difference d : singletonDiffs ) {
|
||||
final String path = summarizedPath(d.getParts(), 1);
|
||||
Difference sumDiff = new Difference(path, d.getMaster(), d.getTest());
|
||||
sumDiff.setCount(0);
|
||||
addSummaryIfMissing(summaries, sumDiff);
|
||||
|
||||
if ( maxRawDiffsToSummarize != -1 && summaries.size() > maxRawDiffsToSummarize)
|
||||
return summaries;
|
||||
}
|
||||
|
||||
return summaries;
|
||||
}
|
||||
|
||||
protected List<Difference> summarizedDifferencesOfPaths(final List<? extends Difference> singletonDiffs,
|
||||
final boolean doPairwise,
|
||||
final int maxRawDiffsToSummarize) {
|
||||
Map<String, Difference> summaries = initialPairwiseSummaries(singletonDiffs, maxRawDiffsToSummarize);
|
||||
final Map<String, Difference> summaries = doPairwise
|
||||
? initialPairwiseSummaries(singletonDiffs, maxRawDiffsToSummarize)
|
||||
: initialLeafSummaries(singletonDiffs, maxRawDiffsToSummarize);
|
||||
|
||||
// count differences
|
||||
for ( Difference diffPath : singletonDiffs ) {
|
||||
|
|
@ -372,18 +412,21 @@ public class DiffEngine {
|
|||
final int maxCountOneItems;
|
||||
final int minSumDiffToShow;
|
||||
final int maxRawDiffsToSummarize;
|
||||
final boolean doPairwise;
|
||||
boolean descending = true;
|
||||
|
||||
public SummaryReportParams(PrintStream out,
|
||||
int maxItemsToDisplay,
|
||||
int maxCountOneItems,
|
||||
int minSumDiffToShow,
|
||||
int maxRawDiffsToSummarize) {
|
||||
int maxRawDiffsToSummarize,
|
||||
final boolean doPairwise) {
|
||||
this.out = out;
|
||||
this.maxItemsToDisplay = maxItemsToDisplay;
|
||||
this.maxCountOneItems = maxCountOneItems;
|
||||
this.minSumDiffToShow = minSumDiffToShow;
|
||||
this.maxRawDiffsToSummarize = maxRawDiffsToSummarize;
|
||||
this.doPairwise = doPairwise;
|
||||
}
|
||||
|
||||
public void setDescending(boolean descending) {
|
||||
|
|
|
|||
|
|
@ -111,21 +111,21 @@ import java.util.List;
|
|||
* <p>
|
||||
*
|
||||
* <pre>
|
||||
[testng] path count
|
||||
[testng] *.*.*.AC 6
|
||||
[testng] *.*.*.AF 6
|
||||
[testng] *.*.*.AN 6
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000000.AC 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000000.AF 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000000.AN 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000117.AC 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000117.AF 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000117.AN 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000211.AC 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000211.AF 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000211.AN 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000598.AC 1
|
||||
</pre>
|
||||
[testng] path count
|
||||
[testng] *.*.*.AC 6
|
||||
[testng] *.*.*.AF 6
|
||||
[testng] *.*.*.AN 6
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000000.AC 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000000.AF 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000000.AN 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000117.AC 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000117.AF 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000117.AN 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000211.AC 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000211.AF 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000211.AN 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000598.AC 1
|
||||
</pre>
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 7/4/11
|
||||
|
|
@ -165,6 +165,8 @@ public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="maxRawDiffsToSummary", shortName="maxRawDiffsToSummary", doc="Max. number of objects to read from the files. -1 [default] means unlimited", required=false)
|
||||
int maxRawDiffsToSummary = -1;
|
||||
|
||||
@Argument(fullName="doPairwise", shortName="doPairwise", doc="If provided, we will compute the minimum pairwise differences to summary, which can be extremely expensive", required=false)
|
||||
boolean doPairwise = false;
|
||||
|
||||
/**
|
||||
* The max number of differences to display when summarizing. For example, if there are 10M differences, but
|
||||
|
|
@ -199,11 +201,14 @@ public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="showItemizedDifferences", shortName="SID", doc="Should we enumerate all differences between the files?", required=false)
|
||||
boolean showItemizedDifferences = false;
|
||||
|
||||
@Argument(fullName="iterations", doc="Number of iterations to perform, should be 1 unless you are doing memory testing", required=false)
|
||||
int iterations = 1;
|
||||
|
||||
DiffEngine diffEngine;
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
this.diffEngine = new DiffEngine();
|
||||
this.diffEngine = new DiffEngine();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -223,29 +228,39 @@ public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
|
|||
|
||||
@Override
|
||||
public void onTraversalDone(Integer sum) {
|
||||
//out.printf("Reading master file %s%n", masterFile);
|
||||
DiffElement master = diffEngine.createDiffableFromFile(masterFile, MAX_OBJECTS_TO_READ);
|
||||
logger.info(String.format("Read %d objects", master.size()));
|
||||
//out.printf("Reading test file %s%n", testFile);
|
||||
DiffElement test = diffEngine.createDiffableFromFile(testFile, MAX_OBJECTS_TO_READ);
|
||||
logger.info(String.format("Read %d objects", test.size()));
|
||||
if ( iterations > 1 ) {
|
||||
for ( int i = 0; i < iterations; i++ ) {
|
||||
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, 20, 10, 0, -1, false);
|
||||
boolean success = DiffEngine.simpleDiffFiles(masterFile, testFile, MAX_OBJECTS_TO_READ, params);
|
||||
logger.info("Iteration " + i + " success " + success);
|
||||
}
|
||||
} else {
|
||||
//out.printf("Reading master file %s%n", masterFile);
|
||||
DiffElement master = diffEngine.createDiffableFromFile(masterFile, MAX_OBJECTS_TO_READ);
|
||||
logger.info(String.format("Read %d objects", master.size()));
|
||||
//out.printf("Reading test file %s%n", testFile);
|
||||
DiffElement test = diffEngine.createDiffableFromFile(testFile, MAX_OBJECTS_TO_READ);
|
||||
logger.info(String.format("Read %d objects", test.size()));
|
||||
|
||||
// out.printf("Master diff objects%n");
|
||||
// out.println(master.toString());
|
||||
// out.printf("Test diff objects%n");
|
||||
// out.println(test.toString());
|
||||
|
||||
List<Difference> diffs = diffEngine.diff(master, test);
|
||||
logger.info(String.format("Done computing diff with %d differences found", diffs.size()));
|
||||
if ( showItemizedDifferences ) {
|
||||
out.printf("Itemized results%n");
|
||||
for ( Difference diff : diffs )
|
||||
out.printf("DIFF: %s%n", diff.toString());
|
||||
}
|
||||
List<Difference> diffs = diffEngine.diff(master, test);
|
||||
logger.info(String.format("Done computing diff with %d differences found", diffs.size()));
|
||||
if ( showItemizedDifferences ) {
|
||||
out.printf("Itemized results%n");
|
||||
for ( Difference diff : diffs )
|
||||
out.printf("DIFF: %s%n", diff.toString());
|
||||
}
|
||||
|
||||
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_DIFFS, MAX_COUNT1_DIFFS, minCountForDiff, maxRawDiffsToSummary);
|
||||
params.setDescending(false);
|
||||
diffEngine.reportSummarizedDifferences(diffs, params);
|
||||
logger.info(String.format("Done summarizing differences"));
|
||||
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out,
|
||||
MAX_DIFFS, MAX_COUNT1_DIFFS, minCountForDiff,
|
||||
maxRawDiffsToSummary, doPairwise);
|
||||
params.setDescending(false);
|
||||
diffEngine.reportSummarizedDifferences(diffs, params);
|
||||
logger.info(String.format("Done summarizing differences"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,11 +29,13 @@ import org.broad.tribble.AbstractFeatureReader;
|
|||
import org.broad.tribble.FeatureReader;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.LineReader;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
|
|
@ -79,9 +81,6 @@ public class VCFDiffableReader implements DiffableReader {
|
|||
String prevName = "";
|
||||
Iterator<VariantContext> it = reader.iterator();
|
||||
while ( it.hasNext() ) {
|
||||
if ( count++ > maxElementsToRead && maxElementsToRead != -1)
|
||||
break;
|
||||
|
||||
VariantContext vc = it.next();
|
||||
String name = vc.getChr() + ":" + vc.getStart();
|
||||
if ( name.equals(prevName) ) {
|
||||
|
|
@ -109,9 +108,12 @@ public class VCFDiffableReader implements DiffableReader {
|
|||
for (Genotype g : vc.getGenotypes() ) {
|
||||
DiffNode gRoot = DiffNode.empty(g.getSampleName(), vcRoot);
|
||||
gRoot.add("GT", g.getGenotypeString());
|
||||
gRoot.add("GQ", g.hasLog10PError() ? g.getLog10PError() * -10 : VCFConstants.MISSING_VALUE_v4 );
|
||||
if ( g.hasGQ() ) gRoot.add("GQ", g.getGQ() );
|
||||
if ( g.hasDP() ) gRoot.add("DP", g.getDP() );
|
||||
if ( g.hasAD() ) gRoot.add("AD", Utils.join(",", g.getAD()));
|
||||
if ( g.hasPL() ) gRoot.add("PL", Utils.join(",", g.getPL()));
|
||||
|
||||
for (Map.Entry<String, Object> attribute : g.getAttributes().entrySet()) {
|
||||
for (Map.Entry<String, Object> attribute : g.getExtendedAttributes().entrySet()) {
|
||||
if ( ! attribute.getKey().startsWith("_") )
|
||||
gRoot.add(attribute.getKey(), attribute.getValue());
|
||||
}
|
||||
|
|
@ -120,6 +122,9 @@ public class VCFDiffableReader implements DiffableReader {
|
|||
}
|
||||
|
||||
root.add(vcRoot);
|
||||
count += vcRoot.size();
|
||||
if ( count > maxElementsToRead && maxElementsToRead != -1)
|
||||
break;
|
||||
}
|
||||
|
||||
reader.close();
|
||||
|
|
|
|||
|
|
@ -297,13 +297,14 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
// for each genotype, check filters then create a new object
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
if ( g.isCalled() ) {
|
||||
Set<String> filters = new LinkedHashSet<String>(g.getFilters());
|
||||
List<String> filters = new ArrayList<String>(g.getFilters());
|
||||
|
||||
for ( VariantContextUtils.JexlVCMatchExp exp : genotypeFilterExps ) {
|
||||
if ( VariantContextUtils.match(vc, g, exp) )
|
||||
filters.add(exp.name);
|
||||
}
|
||||
genotypes.add(new Genotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), filters, g.getAttributes(), g.isPhased()));
|
||||
|
||||
genotypes.add(new GenotypeBuilder(g).filters(filters).make());
|
||||
} else {
|
||||
genotypes.add(g);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -98,11 +98,9 @@ public class ConsensusAlleleCounter {
|
|||
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
||||
final AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
||||
|
||||
if ( context.hasBasePileup() ) {
|
||||
final ReadBackedPileup indelPileup = context.getBasePileup();
|
||||
insCount += indelPileup.getNumberOfInsertionsAfterThisElement();
|
||||
delCount += indelPileup.getNumberOfDeletionsAfterThisElement();
|
||||
}
|
||||
final ReadBackedPileup indelPileup = context.getBasePileup();
|
||||
insCount += indelPileup.getNumberOfInsertionsAfterThisElement();
|
||||
delCount += indelPileup.getNumberOfDeletionsAfterThisElement();
|
||||
}
|
||||
|
||||
if ( insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping )
|
||||
|
|
@ -112,9 +110,6 @@ public class ConsensusAlleleCounter {
|
|||
// todo -- warning, can be duplicating expensive partition here
|
||||
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
||||
|
||||
if ( !context.hasBasePileup() )
|
||||
continue;
|
||||
|
||||
final ReadBackedPileup indelPileup = context.getBasePileup();
|
||||
|
||||
final int nIndelReads = indelPileup.getNumberOfInsertionsAfterThisElement() + indelPileup.getNumberOfDeletionsAfterThisElement();
|
||||
|
|
|
|||
|
|
@ -89,7 +89,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
|||
* @param ref reference context
|
||||
* @param contexts stratified alignment contexts
|
||||
* @param contextType stratified context type
|
||||
* @param alternateAllelesToUse the alternate allele to use, null if not set
|
||||
* @param allAllelesToUse the alternate allele to use, null if not set
|
||||
* @param useBAQedPileup should we use the BAQed pileup or the raw one?
|
||||
* @param locParser Genome Loc Parser
|
||||
* @return variant context where genotypes are no-called but with GLs
|
||||
|
|
@ -98,7 +98,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
|||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> contexts,
|
||||
final AlignmentContextUtils.ReadOrientation contextType,
|
||||
final List<Allele> alternateAllelesToUse,
|
||||
final List<Allele> allAllelesToUse,
|
||||
final boolean useBAQedPileup,
|
||||
final GenomeLocParser locParser);
|
||||
|
||||
|
|
|
|||
|
|
@ -35,8 +35,7 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.AbstractVCFCodec;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
|
@ -44,14 +43,13 @@ import org.broadinstitute.sting.utils.variantcontext.*;
|
|||
import java.util.*;
|
||||
|
||||
public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
|
||||
private final int HAPLOTYPE_SIZE;
|
||||
|
||||
private final boolean getAlleleListFromVCF;
|
||||
private static final int HAPLOTYPE_SIZE = 80;
|
||||
|
||||
private boolean DEBUG = false;
|
||||
private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
|
||||
private PairHMMIndelErrorModel pairModel;
|
||||
|
||||
private boolean allelesArePadded;
|
||||
|
||||
private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
|
||||
new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
|
||||
protected synchronized HashMap<PileupElement, LinkedHashMap<Allele, Double>> initialValue() {
|
||||
|
|
@ -75,124 +73,56 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
super(UAC, logger);
|
||||
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
|
||||
UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
|
||||
getAlleleListFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
||||
HAPLOTYPE_SIZE = UAC.INDEL_HAPLOTYPE_SIZE;
|
||||
DEBUG = UAC.OUTPUT_DEBUG_INDEL_INFO;
|
||||
haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
|
||||
ignoreSNPAllelesWhenGenotypingIndels = UAC.IGNORE_SNP_ALLELES;
|
||||
}
|
||||
|
||||
protected List<Allele> computeConsensusAlleles(ReferenceContext ref,
|
||||
protected static List<Allele> computeConsensusAlleles(ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType,
|
||||
GenomeLocParser locParser) {
|
||||
GenomeLocParser locParser, UnifiedArgumentCollection UAC) {
|
||||
ConsensusAlleleCounter counter = new ConsensusAlleleCounter(locParser, true, UAC.MIN_INDEL_COUNT_FOR_GENOTYPING, UAC.MIN_INDEL_FRACTION_PER_SAMPLE);
|
||||
return counter.computeConsensusAlleles(ref, contexts, contextType);
|
||||
}
|
||||
|
||||
private final static EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.MIXED);
|
||||
|
||||
|
||||
public VariantContext getLikelihoods(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> contexts,
|
||||
final AlignmentContextUtils.ReadOrientation contextType,
|
||||
final List<Allele> alternateAllelesToUse,
|
||||
final List<Allele> allAllelesToUse,
|
||||
final boolean useBAQedPileup,
|
||||
final GenomeLocParser locParser) {
|
||||
|
||||
if (tracker == null)
|
||||
return null;
|
||||
|
||||
GenomeLoc loc = ref.getLocus();
|
||||
Allele refAllele, altAllele;
|
||||
VariantContext vc = null;
|
||||
|
||||
boolean allelesArePadded = true;
|
||||
|
||||
if (!ref.getLocus().equals(lastSiteVisited)) {
|
||||
// if (!ref.getLocus().equals(lastSiteVisited)) {
|
||||
if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) {
|
||||
// starting a new site: clear allele list
|
||||
alleleList.clear();
|
||||
lastSiteVisited = ref.getLocus();
|
||||
indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
|
||||
haplotypeMap.clear();
|
||||
|
||||
if (getAlleleListFromVCF) {
|
||||
for (final VariantContext vc_input : tracker.getValues(UAC.alleles, loc)) {
|
||||
if (vc_input != null &&
|
||||
allowableTypes.contains(vc_input.getType()) &&
|
||||
ref.getLocus().getStart() == vc_input.getStart()) {
|
||||
vc = vc_input;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// ignore places where we don't have a variant
|
||||
if (vc == null)
|
||||
return null;
|
||||
|
||||
alleleList.clear();
|
||||
if (ignoreSNPAllelesWhenGenotypingIndels) {
|
||||
// if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore it and don't genotype it
|
||||
for (Allele a : vc.getAlleles())
|
||||
if (a.isNonReference() && a.getBases().length == vc.getReference().getBases().length)
|
||||
continue;
|
||||
else
|
||||
alleleList.add(a);
|
||||
|
||||
} else {
|
||||
for (Allele a : vc.getAlleles())
|
||||
alleleList.add(a);
|
||||
}
|
||||
if (vc.getReference().getBases().length == vc.getEnd()-vc.getStart()+1)
|
||||
allelesArePadded = false;
|
||||
|
||||
} else {
|
||||
alleleList = computeConsensusAlleles(ref, contexts, contextType, locParser);
|
||||
if (alleleList.isEmpty())
|
||||
return null;
|
||||
}
|
||||
}
|
||||
// protect against having an indel too close to the edge of a contig
|
||||
if (loc.getStart() <= HAPLOTYPE_SIZE)
|
||||
return null;
|
||||
|
||||
// check if there is enough reference window to create haplotypes (can be an issue at end of contigs)
|
||||
if (ref.getWindow().getStop() < loc.getStop() + HAPLOTYPE_SIZE)
|
||||
return null;
|
||||
|
||||
if (alleleList.isEmpty())
|
||||
return null;
|
||||
|
||||
refAllele = alleleList.get(0);
|
||||
altAllele = alleleList.get(1);
|
||||
|
||||
// look for alt allele that has biggest length distance to ref allele
|
||||
int maxLenDiff = 0;
|
||||
for (Allele a : alleleList) {
|
||||
if (a.isNonReference()) {
|
||||
int lenDiff = Math.abs(a.getBaseString().length() - refAllele.getBaseString().length());
|
||||
if (lenDiff > maxLenDiff) {
|
||||
maxLenDiff = lenDiff;
|
||||
altAllele = a;
|
||||
}
|
||||
}
|
||||
Pair<List<Allele>,Boolean> pair = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
|
||||
alleleList = pair.first;
|
||||
allelesArePadded = pair.second;
|
||||
if (alleleList.isEmpty())
|
||||
return null;
|
||||
}
|
||||
|
||||
final int eventLength = altAllele.getBaseString().length() - refAllele.getBaseString().length();
|
||||
final int hsize = ref.getWindow().size() - Math.abs(eventLength) - 1;
|
||||
final int numPrefBases = ref.getLocus().getStart() - ref.getWindow().getStart() + 1;
|
||||
|
||||
if (hsize <= 0) {
|
||||
logger.warn(String.format("Warning: event at location %s can't be genotyped, skipping", loc.toString()));
|
||||
getHaplotypeMapFromAlleles(alleleList, ref, loc, haplotypeMap); // will update haplotypeMap adding elements
|
||||
if (haplotypeMap == null || haplotypeMap.isEmpty())
|
||||
return null;
|
||||
}
|
||||
haplotypeMap = Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(),
|
||||
ref, hsize, numPrefBases);
|
||||
|
||||
// start making the VariantContext
|
||||
// For all non-snp VC types, VC end location is just startLocation + length of ref allele including padding base.
|
||||
int endLoc = loc.getStart() + refAllele.length()-1;
|
||||
if (allelesArePadded)
|
||||
endLoc++;
|
||||
|
||||
final int endLoc = computeEndLocation(alleleList, loc,allelesArePadded);
|
||||
final int eventLength = getEventLength(alleleList);
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList).referenceBaseForIndel(ref.getBase());
|
||||
|
||||
// create the genotypes; no-call everyone for now
|
||||
|
|
@ -206,23 +136,19 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
|
||||
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
||||
|
||||
if (context.hasBasePileup()) {
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
if (pileup != null) {
|
||||
final double[] genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
|
||||
GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(genotypeLikelihoods);
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
if (pileup != null) {
|
||||
final GenotypeBuilder b = new GenotypeBuilder(sample.getKey());
|
||||
final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
|
||||
b.PL(genotypeLikelihoods);
|
||||
b.DP(getFilteredDepth(pileup));
|
||||
genotypes.add(b.make());
|
||||
|
||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(pileup));
|
||||
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
|
||||
genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.format("Sample:%s Alleles:%s GL:", sample.getKey(), alleleList.toString());
|
||||
for (int k = 0; k < genotypeLikelihoods.length; k++)
|
||||
System.out.format("%1.4f ", genotypeLikelihoods[k]);
|
||||
System.out.println();
|
||||
}
|
||||
if (DEBUG) {
|
||||
System.out.format("Sample:%s Alleles:%s GL:", sample.getKey(), alleleList.toString());
|
||||
for (int k = 0; k < genotypeLikelihoods.length; k++)
|
||||
System.out.format("%1.4f ", genotypeLikelihoods[k]);
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -234,6 +160,102 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
return indelLikelihoodMap.get();
|
||||
}
|
||||
|
||||
public static int computeEndLocation(final List<Allele> alleles, final GenomeLoc loc, final boolean allelesArePadded) {
|
||||
Allele refAllele = alleles.get(0);
|
||||
int endLoc = loc.getStart() + refAllele.length()-1;
|
||||
if (allelesArePadded)
|
||||
endLoc++;
|
||||
|
||||
return endLoc;
|
||||
}
|
||||
|
||||
public static void getHaplotypeMapFromAlleles(final List<Allele> alleleList,
|
||||
final ReferenceContext ref,
|
||||
final GenomeLoc loc,
|
||||
final LinkedHashMap<Allele, Haplotype> haplotypeMap) {
|
||||
// protect against having an indel too close to the edge of a contig
|
||||
if (loc.getStart() <= HAPLOTYPE_SIZE)
|
||||
haplotypeMap.clear();
|
||||
// check if there is enough reference window to create haplotypes (can be an issue at end of contigs)
|
||||
else if (ref.getWindow().getStop() < loc.getStop() + HAPLOTYPE_SIZE)
|
||||
haplotypeMap.clear();
|
||||
else if (alleleList.isEmpty())
|
||||
haplotypeMap.clear();
|
||||
else {
|
||||
final int eventLength = getEventLength(alleleList);
|
||||
final int hsize = ref.getWindow().size() - Math.abs(eventLength) - 1;
|
||||
final int numPrefBases = ref.getLocus().getStart() - ref.getWindow().getStart() + 1;
|
||||
|
||||
haplotypeMap.putAll(Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(),
|
||||
ref, hsize, numPrefBases));
|
||||
}
|
||||
}
|
||||
|
||||
public static int getEventLength(List<Allele> alleleList) {
|
||||
Allele refAllele = alleleList.get(0);
|
||||
Allele altAllele = alleleList.get(1);
|
||||
// look for alt allele that has biggest length distance to ref allele
|
||||
int maxLenDiff = 0;
|
||||
for (Allele a : alleleList) {
|
||||
if (a.isNonReference()) {
|
||||
int lenDiff = Math.abs(a.getBaseString().length() - refAllele.getBaseString().length());
|
||||
if (lenDiff > maxLenDiff) {
|
||||
maxLenDiff = lenDiff;
|
||||
altAllele = a;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return altAllele.getBaseString().length() - refAllele.getBaseString().length();
|
||||
|
||||
}
|
||||
|
||||
public static Pair<List<Allele>,Boolean> getInitialAlleleList(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> contexts,
|
||||
final AlignmentContextUtils.ReadOrientation contextType,
|
||||
final GenomeLocParser locParser,
|
||||
final UnifiedArgumentCollection UAC,
|
||||
final boolean ignoreSNPAllelesWhenGenotypingIndels) {
|
||||
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
boolean allelesArePadded = true;
|
||||
if (UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
|
||||
VariantContext vc = null;
|
||||
for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) {
|
||||
if (vc_input != null &&
|
||||
allowableTypes.contains(vc_input.getType()) &&
|
||||
ref.getLocus().getStart() == vc_input.getStart()) {
|
||||
vc = vc_input;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// ignore places where we don't have a variant
|
||||
if (vc == null)
|
||||
return new Pair<List<Allele>,Boolean>(alleles,false);
|
||||
|
||||
if (ignoreSNPAllelesWhenGenotypingIndels) {
|
||||
// if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore it and don't genotype it
|
||||
for (Allele a : vc.getAlleles())
|
||||
if (a.isNonReference() && a.getBases().length == vc.getReference().getBases().length)
|
||||
continue;
|
||||
else
|
||||
alleles.add(a);
|
||||
|
||||
} else {
|
||||
alleles.addAll(vc.getAlleles());
|
||||
}
|
||||
if ( vc.getReference().getBases().length == vc.getEnd()-vc.getStart()+1)
|
||||
allelesArePadded = false;
|
||||
|
||||
|
||||
|
||||
} else {
|
||||
alleles = IndelGenotypeLikelihoodsCalculationModel.computeConsensusAlleles(ref, contexts, contextType, locParser, UAC);
|
||||
}
|
||||
return new Pair<List<Allele>,Boolean> (alleles,allelesArePadded);
|
||||
}
|
||||
|
||||
// Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup,
|
||||
// so that per-sample DP will include deletions covering the event.
|
||||
protected int getFilteredDepth(ReadBackedPileup pileup) {
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> contexts,
|
||||
final AlignmentContextUtils.ReadOrientation contextType,
|
||||
final List<Allele> alternateAllelesToUse,
|
||||
final List<Allele> allAllelesToUse,
|
||||
final boolean useBAQedPileup,
|
||||
final GenomeLocParser locParser) {
|
||||
|
||||
|
|
@ -70,11 +70,6 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
final int indexOfRefBase = BaseUtils.simpleBaseToBaseIndex(refBase);
|
||||
final Allele refAllele = Allele.create(refBase, true);
|
||||
|
||||
// start making the VariantContext
|
||||
final GenomeLoc loc = ref.getLocus();
|
||||
final List<Allele> alleles = new ArrayList<Allele>();
|
||||
alleles.add(refAllele);
|
||||
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), alleles);
|
||||
|
||||
// calculate the GLs
|
||||
ArrayList<SampleGenotypeData> GLs = new ArrayList<SampleGenotypeData>(contexts.size());
|
||||
|
|
@ -90,9 +85,16 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
GLs.add(new SampleGenotypeData(sample.getKey(), GL, getFilteredDepth(pileup)));
|
||||
}
|
||||
|
||||
// start making the VariantContext
|
||||
final GenomeLoc loc = ref.getLocus();
|
||||
final List<Allele> alleles = new ArrayList<Allele>();
|
||||
alleles.add(refAllele);
|
||||
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), alleles);
|
||||
// find the alternate allele(s) that we should be using
|
||||
if ( alternateAllelesToUse != null ) {
|
||||
alleles.addAll(alternateAllelesToUse);
|
||||
if ( allAllelesToUse != null ) {
|
||||
alleles.addAll(allAllelesToUse.subList(1,allAllelesToUse.size())); // this includes ref allele
|
||||
} else if ( useAlleleFromVCF ) {
|
||||
final VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles);
|
||||
|
||||
|
|
@ -156,12 +158,11 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
myLikelihoods[i] = allLikelihoods[PLordering[i]];
|
||||
|
||||
// normalize in log space so that max element is zero.
|
||||
final GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(myLikelihoods, false, true));
|
||||
|
||||
final HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
attributes.put(VCFConstants.DEPTH_KEY, sampleData.depth);
|
||||
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
|
||||
genotypes.add(new Genotype(sampleData.name, noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(sampleData.name);
|
||||
final double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(myLikelihoods, false, true);
|
||||
gb.PL(genotypeLikelihoods);
|
||||
gb.DP(sampleData.depth);
|
||||
genotypes.add(gb.make());
|
||||
}
|
||||
|
||||
return builder.genotypes(genotypes).make();
|
||||
|
|
|
|||
|
|
@ -65,18 +65,15 @@ public class UnifiedArgumentCollection {
|
|||
/**
|
||||
* The minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. Only genotypes with
|
||||
* confidence >= this threshold are emitted as called sites. A reasonable threshold is 30 for high-pass calling (this
|
||||
* is the default). Note that the confidence (QUAL) values for multi-sample low-pass (e.g. 4x per sample) calling might
|
||||
* be significantly smaller with the new EXACT model than with our older GRID_SEARCH model, as the latter tended to
|
||||
* over-estimate the confidence; for low-pass calling we tend to use much smaller thresholds (e.g. 4).
|
||||
* is the default).
|
||||
*/
|
||||
@Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be called", required = false)
|
||||
@Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be called", required = false)
|
||||
public double STANDARD_CONFIDENCE_FOR_CALLING = 30.0;
|
||||
|
||||
/**
|
||||
* the minimum phred-scaled Qscore threshold to emit low confidence calls. Genotypes with confidence >= this but less
|
||||
* than the calling threshold are emitted but marked as filtered.
|
||||
* This argument allows you to emit low quality calls as filtered records.
|
||||
*/
|
||||
@Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)", required = false)
|
||||
@Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)", required = false)
|
||||
public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -252,7 +252,7 @@ public class UnifiedGenotyperEngine {
|
|||
vc = new VariantContextBuilder("UG_call", ref.getLocus().getContig(), ref.getLocus().getStart(), ref.getLocus().getStart(), alleles).make();
|
||||
}
|
||||
|
||||
if ( annotationEngine != null && rawContext.hasBasePileup() ) {
|
||||
if ( annotationEngine != null ) {
|
||||
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
|
||||
final ReadBackedPileup pileup = rawContext.getBasePileup();
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
|
||||
|
|
@ -378,10 +378,10 @@ public class UnifiedGenotyperEngine {
|
|||
double overallLog10PofF = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero();
|
||||
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
|
||||
|
||||
List<Allele> alternateAllelesToUse = builder.make().getAlternateAlleles();
|
||||
List<Allele> allAllelesToUse = builder.make().getAlleles();
|
||||
|
||||
// the forward lod
|
||||
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, alternateAllelesToUse, false, model);
|
||||
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, allAllelesToUse, false, model);
|
||||
AFresult.reset();
|
||||
afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model), AFresult);
|
||||
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
||||
|
|
@ -390,7 +390,7 @@ public class UnifiedGenotyperEngine {
|
|||
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
|
||||
|
||||
// the reverse lod
|
||||
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, alternateAllelesToUse, false, model);
|
||||
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, allAllelesToUse, false, model);
|
||||
AFresult.reset();
|
||||
afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model), AFresult);
|
||||
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
||||
|
|
@ -422,7 +422,7 @@ public class UnifiedGenotyperEngine {
|
|||
if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // TODO - this function doesn't work with mixed records or records that started as mixed and then became non-mixed
|
||||
vcCall = VariantContextUtils.reverseTrimAlleles(vcCall);
|
||||
|
||||
if ( annotationEngine != null && !limitedContext && rawContext.hasBasePileup() ) {
|
||||
if ( annotationEngine != null && !limitedContext ) {
|
||||
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
|
||||
final ReadBackedPileup pileup = rawContext.getBasePileup();
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
|
||||
|
|
@ -441,7 +441,7 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
|
||||
if ( !BaseUtils.isRegularBase(refContext.getBase()) || !rawContext.hasBasePileup() )
|
||||
if ( !BaseUtils.isRegularBase(refContext.getBase()) )
|
||||
return null;
|
||||
|
||||
Map<String, AlignmentContext> stratifiedContexts = null;
|
||||
|
|
@ -507,9 +507,7 @@ public class UnifiedGenotyperEngine {
|
|||
int depth = 0;
|
||||
|
||||
if ( isCovered ) {
|
||||
AlignmentContext context = contexts.get(sample);
|
||||
if ( context.hasBasePileup() )
|
||||
depth = context.getBasePileup().depthOfCoverage();
|
||||
depth = contexts.get(sample).getBasePileup().depthOfCoverage();
|
||||
}
|
||||
|
||||
P_of_ref *= 1.0 - (theta / 2.0) * getRefBinomialProb(depth);
|
||||
|
|
@ -571,37 +569,35 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
final List<GenotypeLikelihoodsCalculationModel.Model> models = new ArrayList<GenotypeLikelihoodsCalculationModel.Model>(2);
|
||||
|
||||
if ( rawContext.hasBasePileup() ) {
|
||||
// if we're genotyping given alleles and we have a requested SNP at this position, do SNP
|
||||
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||
final VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles);
|
||||
if ( vcInput == null )
|
||||
return models;
|
||||
// if we're genotyping given alleles and we have a requested SNP at this position, do SNP
|
||||
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||
final VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles);
|
||||
if ( vcInput == null )
|
||||
return models;
|
||||
|
||||
if ( vcInput.isSNP() ) {
|
||||
// ignore SNPs if the user chose INDEL mode only
|
||||
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
|
||||
models.add(GenotypeLikelihoodsCalculationModel.Model.SNP);
|
||||
else if ( UAC.GLmodel.name().toUpperCase().contains("SNP") )
|
||||
models.add(UAC.GLmodel);
|
||||
}
|
||||
else if ( vcInput.isIndel() || vcInput.isMixed() ) {
|
||||
// ignore INDELs if the user chose SNP mode only
|
||||
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
|
||||
models.add(GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
||||
else if (UAC.GLmodel.name().toUpperCase().contains("INDEL"))
|
||||
models.add(UAC.GLmodel);
|
||||
}
|
||||
// No support for other types yet
|
||||
if ( vcInput.isSNP() ) {
|
||||
// ignore SNPs if the user chose INDEL mode only
|
||||
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
|
||||
models.add(GenotypeLikelihoodsCalculationModel.Model.SNP);
|
||||
else if ( UAC.GLmodel.name().toUpperCase().contains("SNP") )
|
||||
models.add(UAC.GLmodel);
|
||||
}
|
||||
else if ( vcInput.isIndel() || vcInput.isMixed() ) {
|
||||
// ignore INDELs if the user chose SNP mode only
|
||||
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
|
||||
models.add(GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
||||
else if (UAC.GLmodel.name().toUpperCase().contains("INDEL"))
|
||||
models.add(UAC.GLmodel);
|
||||
}
|
||||
// No support for other types yet
|
||||
}
|
||||
else {
|
||||
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH ) {
|
||||
models.add(GenotypeLikelihoodsCalculationModel.Model.SNP);
|
||||
models.add(GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
||||
}
|
||||
else {
|
||||
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH ) {
|
||||
models.add(GenotypeLikelihoodsCalculationModel.Model.SNP);
|
||||
models.add(GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
||||
}
|
||||
else {
|
||||
models.add(UAC.GLmodel);
|
||||
}
|
||||
models.add(UAC.GLmodel);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -117,7 +117,7 @@ public class PairHMMIndelErrorModel {
|
|||
|
||||
}
|
||||
|
||||
static private void getContextHomopolymerLength(final byte[] refBytes, final int[] hrunArray) {
|
||||
static private void getContextHomopolymerLength(final byte[] refBytes, final int[] hrunArray) {
|
||||
// compute forward hrun length, example:
|
||||
// AGGTGACCCCCCTGAGAG
|
||||
// 001000012345000000
|
||||
|
|
@ -164,10 +164,24 @@ public class PairHMMIndelErrorModel {
|
|||
}
|
||||
}
|
||||
}
|
||||
public synchronized double[] computeReadHaplotypeLikelihoods(ReadBackedPileup pileup, LinkedHashMap<Allele,Haplotype> haplotypeMap, ReferenceContext ref, int eventLength, HashMap<PileupElement, LinkedHashMap<Allele,Double>> indelLikelihoodMap){
|
||||
|
||||
|
||||
public synchronized double[] computeDiploidReadHaplotypeLikelihoods(ReadBackedPileup pileup, LinkedHashMap<Allele, Haplotype> haplotypeMap, ReferenceContext ref, int eventLength, HashMap<PileupElement, LinkedHashMap<Allele, Double>> indelLikelihoodMap){
|
||||
final int numHaplotypes = haplotypeMap.size();
|
||||
final double readLikelihoods[][] = new double[pileup.getNumberOfElements()][numHaplotypes];
|
||||
|
||||
final int readCounts[] = new int[pileup.getNumberOfElements()];
|
||||
final double[][] readLikelihoods = computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, indelLikelihoodMap, readCounts);
|
||||
return getDiploidHaplotypeLikelihoods(numHaplotypes, readCounts, readLikelihoods);
|
||||
|
||||
}
|
||||
|
||||
public synchronized double[][] computeGeneralReadHaplotypeLikelihoods(final ReadBackedPileup pileup,
|
||||
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
|
||||
final ReferenceContext ref,
|
||||
final int eventLength,
|
||||
final HashMap<PileupElement, LinkedHashMap<Allele, Double>> indelLikelihoodMap,
|
||||
final int[] readCounts) {
|
||||
final double readLikelihoods[][] = new double[pileup.getNumberOfElements()][haplotypeMap.size()];
|
||||
final PairHMM pairHMM = new PairHMM(bandedLikelihoods);
|
||||
|
||||
int readIdx=0;
|
||||
|
|
@ -367,7 +381,7 @@ public class PairHMMIndelErrorModel {
|
|||
|
||||
}
|
||||
|
||||
return getHaplotypeLikelihoods(numHaplotypes, readCounts, readLikelihoods);
|
||||
return readLikelihoods;
|
||||
}
|
||||
|
||||
private boolean useSoftClippedBases(GATKSAMRecord read, long eventStartPos, int eventLength) {
|
||||
|
|
@ -385,7 +399,7 @@ public class PairHMMIndelErrorModel {
|
|||
return b1.length;
|
||||
}
|
||||
|
||||
private static double[] getHaplotypeLikelihoods(final int numHaplotypes, final int readCounts[], final double readLikelihoods[][]) {
|
||||
private static double[] getDiploidHaplotypeLikelihoods(final int numHaplotypes, final int readCounts[], final double readLikelihoods[][]) {
|
||||
final double[][] haplotypeLikehoodMatrix = new double[numHaplotypes][numHaplotypes];
|
||||
|
||||
// todo: MAD 09/26/11 -- I'm almost certain this calculation can be simplified to just a single loop without the intermediate NxN matrix
|
||||
|
|
|
|||
|
|
@ -185,38 +185,36 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
|
|||
}
|
||||
|
||||
// look at the normal context to get deletions and positions with high entropy
|
||||
if ( context.hasBasePileup() ) {
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
|
||||
int mismatchQualities = 0, totalQualities = 0;
|
||||
final byte refBase = ref.getBase();
|
||||
for ( PileupElement p : pileup ) {
|
||||
int mismatchQualities = 0, totalQualities = 0;
|
||||
final byte refBase = ref.getBase();
|
||||
for ( PileupElement p : pileup ) {
|
||||
|
||||
// check the ends of the reads to see how far they extend
|
||||
furthestStopPos = Math.max(furthestStopPos, p.getRead().getAlignmentEnd());
|
||||
// check the ends of the reads to see how far they extend
|
||||
furthestStopPos = Math.max(furthestStopPos, p.getRead().getAlignmentEnd());
|
||||
|
||||
// is it a deletion or insertion?
|
||||
if ( p.isDeletion() || p.isBeforeInsertion() ) {
|
||||
hasIndel = true;
|
||||
if ( p.isBeforeInsertion() )
|
||||
hasInsertion = true;
|
||||
}
|
||||
|
||||
// look for mismatches
|
||||
else if ( lookForMismatchEntropy ) {
|
||||
if ( p.getBase() != refBase )
|
||||
mismatchQualities += p.getQual();
|
||||
totalQualities += p.getQual();
|
||||
}
|
||||
// is it a deletion or insertion?
|
||||
if ( p.isDeletion() || p.isBeforeInsertion() ) {
|
||||
hasIndel = true;
|
||||
if ( p.isBeforeInsertion() )
|
||||
hasInsertion = true;
|
||||
}
|
||||
|
||||
// make sure we're supposed to look for high entropy
|
||||
if ( lookForMismatchEntropy &&
|
||||
pileup.getNumberOfElements() >= minReadsAtLocus &&
|
||||
(double)mismatchQualities / (double)totalQualities >= mismatchThreshold )
|
||||
hasPointEvent = true;
|
||||
// look for mismatches
|
||||
else if ( lookForMismatchEntropy ) {
|
||||
if ( p.getBase() != refBase )
|
||||
mismatchQualities += p.getQual();
|
||||
totalQualities += p.getQual();
|
||||
}
|
||||
}
|
||||
|
||||
// make sure we're supposed to look for high entropy
|
||||
if ( lookForMismatchEntropy &&
|
||||
pileup.getNumberOfElements() >= minReadsAtLocus &&
|
||||
(double)mismatchQualities / (double)totalQualities >= mismatchThreshold )
|
||||
hasPointEvent = true;
|
||||
|
||||
// return null if no event occurred
|
||||
if ( !hasIndel && !hasPointEvent )
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -316,6 +316,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
// first, the basic info
|
||||
headerInfo.add(new VCFHeaderLine("source", "SomaticIndelDetector"));
|
||||
headerInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
|
||||
|
||||
// FORMAT and INFO fields
|
||||
// headerInfo.addAll(VCFUtils.getSupportedHeaderStrings());
|
||||
|
|
@ -616,7 +617,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
throw new StingException("Read "+read.getReadName()+" from "+getToolkit().getSourceFileForReaderID(getToolkit().getReaderIDForRead(read))+
|
||||
"has no Normal/Tumor tag associated with it");
|
||||
|
||||
// String rg = (String)read.getAttribute("RG");
|
||||
// String rg = (String)read.getExtendedAttribute("RG");
|
||||
// if ( rg == null )
|
||||
// throw new UserException.MalformedBam(read, "Read "+read.getReadName()+" has no read group in merged stream. RG is required for somatic calls.");
|
||||
|
||||
|
|
@ -1147,13 +1148,12 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
for ( String sample : normalSamples ) {
|
||||
|
||||
Map<String,Object> attrs = call.makeStatsAttributes(null);
|
||||
|
||||
if ( ! discard_event ) // we made a call - put actual het genotype here:
|
||||
genotypes.add(new Genotype(sample,alleles,Genotype.NO_LOG10_PERROR,null,attrs,false));
|
||||
else // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all)
|
||||
genotypes.add(new Genotype(sample, homref_alleles,Genotype.NO_LOG10_PERROR,null,attrs,false));
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(sample);
|
||||
gb.attributes(call.makeStatsAttributes(null));
|
||||
gb.alleles(! discard_event
|
||||
? alleles // we made a call - put actual het genotype here:
|
||||
: homref_alleles); // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all)
|
||||
genotypes.add(gb.make());
|
||||
|
||||
}
|
||||
Set<String> filters = null;
|
||||
|
|
@ -1237,11 +1237,11 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
|
||||
for ( String sample : normalSamples ) {
|
||||
genotypes.add(new Genotype(sample, homRefN ? homRefAlleles : alleles,Genotype.NO_LOG10_PERROR,null,attrsNormal,false));
|
||||
genotypes.add(GenotypeBuilder.create(sample, homRefN ? homRefAlleles : alleles, attrsNormal));
|
||||
}
|
||||
|
||||
for ( String sample : tumorSamples ) {
|
||||
genotypes.add(new Genotype(sample, homRefT ? homRefAlleles : alleles,Genotype.NO_LOG10_PERROR,null,attrsTumor,false) );
|
||||
genotypes.add(GenotypeBuilder.create(sample, homRefT ? homRefAlleles : alleles, attrsTumor));
|
||||
}
|
||||
|
||||
Set<String> filters = null;
|
||||
|
|
@ -2143,7 +2143,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
|
||||
class VCFIndelAttributes {
|
||||
public static String ALLELIC_DEPTH_KEY = "AD";
|
||||
public static String ALLELIC_DEPTH_KEY = VCFConstants.GENOTYPE_ALLELE_DEPTHS;
|
||||
public static String DEPTH_TOTAL_KEY = VCFConstants.DEPTH_KEY;
|
||||
|
||||
public static String MAPQ_KEY = "MQS";
|
||||
|
|
|
|||
|
|
@ -97,10 +97,10 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
private ArrayList<Sample> trios = new ArrayList<Sample>();
|
||||
|
||||
//Matrix of priors for all genotype combinations
|
||||
private EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>> mvCountMatrix;
|
||||
private EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>> mvCountMatrix;
|
||||
|
||||
//Matrix of allele transmission
|
||||
private EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,TrioPhase>>> transmissionMatrix;
|
||||
private EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,TrioPhase>>> transmissionMatrix;
|
||||
|
||||
//Metrics counters hash keys
|
||||
private final Byte NUM_TRIO_GENOTYPES_CALLED = 0;
|
||||
|
|
@ -138,17 +138,17 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
|
||||
private EnumMap<FamilyMember,Genotype> trioPhasedGenotypes = new EnumMap<FamilyMember, Genotype>(FamilyMember.class);
|
||||
|
||||
private ArrayList<Allele> getAlleles(Genotype.Type genotype){
|
||||
private ArrayList<Allele> getAlleles(GenotypeType genotype){
|
||||
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
|
||||
if(genotype == Genotype.Type.HOM_REF){
|
||||
if(genotype == GenotypeType.HOM_REF){
|
||||
alleles.add(REF);
|
||||
alleles.add(REF);
|
||||
}
|
||||
else if(genotype == Genotype.Type.HET){
|
||||
else if(genotype == GenotypeType.HET){
|
||||
alleles.add(REF);
|
||||
alleles.add(VAR);
|
||||
}
|
||||
else if(genotype == Genotype.Type.HOM_VAR){
|
||||
else if(genotype == GenotypeType.HOM_VAR){
|
||||
alleles.add(VAR);
|
||||
alleles.add(VAR);
|
||||
}
|
||||
|
|
@ -158,27 +158,34 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
return alleles;
|
||||
}
|
||||
|
||||
private boolean isPhasable(Genotype.Type genotype){
|
||||
return genotype == Genotype.Type.HOM_REF || genotype == Genotype.Type.HET || genotype == Genotype.Type.HOM_VAR;
|
||||
private boolean isPhasable(GenotypeType genotype){
|
||||
return genotype == GenotypeType.HOM_REF || genotype == GenotypeType.HET || genotype == GenotypeType.HOM_VAR;
|
||||
}
|
||||
|
||||
//Create a new Genotype based on information from a single individual
|
||||
//Homozygous genotypes will be set as phased, heterozygous won't be
|
||||
private void phaseSingleIndividualAlleles(Genotype.Type genotype, FamilyMember familyMember){
|
||||
if(genotype == Genotype.Type.HOM_REF || genotype == Genotype.Type.HOM_VAR){
|
||||
trioPhasedGenotypes.put(familyMember, new Genotype(DUMMY_NAME, getAlleles(genotype), Genotype.NO_LOG10_PERROR, null, null, true));
|
||||
}
|
||||
else
|
||||
trioPhasedGenotypes.put(familyMember, new Genotype(DUMMY_NAME,getAlleles(genotype),Genotype.NO_LOG10_PERROR,null,null,false));
|
||||
private void phaseSingleIndividualAlleles(GenotypeType genotype, FamilyMember familyMember){
|
||||
boolean phase = genotype == GenotypeType.HOM_REF || genotype == GenotypeType.HOM_VAR;
|
||||
trioPhasedGenotypes.put(familyMember, makeGenotype(genotype, phase));
|
||||
}
|
||||
|
||||
private Genotype makeGenotype(final GenotypeType type, boolean phase) {
|
||||
return makeGenotype(getAlleles(type), phase);
|
||||
}
|
||||
|
||||
private Genotype makeGenotype(final List<Allele> alleles, boolean phase) {
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(DUMMY_NAME, alleles);
|
||||
gb.phased(phase);
|
||||
return gb.make();
|
||||
}
|
||||
|
||||
//Find the phase for a parent/child pair
|
||||
private void phasePairAlleles(Genotype.Type parentGenotype, Genotype.Type childGenotype, FamilyMember parent){
|
||||
private void phasePairAlleles(GenotypeType parentGenotype, GenotypeType childGenotype, FamilyMember parent){
|
||||
|
||||
//Special case for Het/Het as it is ambiguous
|
||||
if(parentGenotype == Genotype.Type.HET && childGenotype == Genotype.Type.HET){
|
||||
trioPhasedGenotypes.put(parent, new Genotype(DUMMY_NAME, getAlleles(parentGenotype), Genotype.NO_LOG10_PERROR, null, null, false));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME,getAlleles(childGenotype),Genotype.NO_LOG10_PERROR,null,null,false));
|
||||
if(parentGenotype == GenotypeType.HET && childGenotype == GenotypeType.HET){
|
||||
trioPhasedGenotypes.put(parent, makeGenotype(parentGenotype, false));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, makeGenotype(childGenotype, false));
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -190,34 +197,34 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
//If there is a possible phasing between the parent and child => phase
|
||||
int childTransmittedAlleleIndex = childAlleles.indexOf(parentAlleles.get(0));
|
||||
if(childTransmittedAlleleIndex > -1){
|
||||
trioPhasedGenotypes.put(parent, new Genotype(DUMMY_NAME, parentAlleles, Genotype.NO_LOG10_PERROR, null, null, true));
|
||||
trioPhasedGenotypes.put(parent, makeGenotype(parentAlleles, true));
|
||||
childPhasedAlleles.add(childAlleles.remove(childTransmittedAlleleIndex));
|
||||
if(parent.equals(FamilyMember.MOTHER))
|
||||
childPhasedAlleles.add(childAlleles.get(0));
|
||||
else
|
||||
childPhasedAlleles.add(0,childAlleles.get(0));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME, childPhasedAlleles, Genotype.NO_LOG10_PERROR, null, null, true));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, makeGenotype(childPhasedAlleles, true));
|
||||
}
|
||||
else if((childTransmittedAlleleIndex = childAlleles.indexOf(parentAlleles.get(1))) > -1){
|
||||
parentPhasedAlleles.add(parentAlleles.get(1));
|
||||
parentPhasedAlleles.add(parentAlleles.get(0));
|
||||
trioPhasedGenotypes.put(parent, new Genotype(DUMMY_NAME, parentPhasedAlleles, Genotype.NO_LOG10_PERROR, null, null, true));
|
||||
trioPhasedGenotypes.put(parent, makeGenotype(parentPhasedAlleles, true));
|
||||
childPhasedAlleles.add(childAlleles.remove(childTransmittedAlleleIndex));
|
||||
if(parent.equals(FamilyMember.MOTHER))
|
||||
childPhasedAlleles.add(childAlleles.get(0));
|
||||
else
|
||||
childPhasedAlleles.add(0,childAlleles.get(0));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME, childPhasedAlleles, Genotype.NO_LOG10_PERROR, null, null, true));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, makeGenotype(childPhasedAlleles, true));
|
||||
}
|
||||
//This is a Mendelian Violation => Do not phase
|
||||
else{
|
||||
trioPhasedGenotypes.put(parent, new Genotype(DUMMY_NAME,getAlleles(parentGenotype),Genotype.NO_LOG10_PERROR,null,null,false));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME,getAlleles(childGenotype),Genotype.NO_LOG10_PERROR,null,null,false));
|
||||
trioPhasedGenotypes.put(parent, makeGenotype(parentGenotype, false));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, makeGenotype(childGenotype, false));
|
||||
}
|
||||
}
|
||||
|
||||
//Phases a family by transmission
|
||||
private void phaseFamilyAlleles(Genotype.Type mother, Genotype.Type father, Genotype.Type child){
|
||||
private void phaseFamilyAlleles(GenotypeType mother, GenotypeType father, GenotypeType child){
|
||||
|
||||
Set<ArrayList<Allele>> possiblePhasedChildGenotypes = new HashSet<ArrayList<Allele>>();
|
||||
ArrayList<Allele> motherAlleles = getAlleles(mother);
|
||||
|
|
@ -246,7 +253,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
motherPhasedAlleles.add(motherAlleles.get(0));
|
||||
else
|
||||
motherPhasedAlleles.add(motherAlleles.get(1));
|
||||
trioPhasedGenotypes.put(FamilyMember.MOTHER, new Genotype(DUMMY_NAME,motherPhasedAlleles,Genotype.NO_LOG10_PERROR,null,null,true));
|
||||
trioPhasedGenotypes.put(FamilyMember.MOTHER, makeGenotype(motherPhasedAlleles, true));
|
||||
|
||||
//Create father's genotype
|
||||
ArrayList<Allele> fatherPhasedAlleles = new ArrayList<Allele>(2);
|
||||
|
|
@ -255,10 +262,10 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
fatherPhasedAlleles.add(fatherAlleles.get(0));
|
||||
else
|
||||
fatherPhasedAlleles.add(fatherAlleles.get(1));
|
||||
trioPhasedGenotypes.put(FamilyMember.FATHER, new Genotype(DUMMY_NAME,fatherPhasedAlleles,Genotype.NO_LOG10_PERROR,null,null,true));
|
||||
trioPhasedGenotypes.put(FamilyMember.FATHER, makeGenotype(fatherPhasedAlleles,true));
|
||||
|
||||
//Create child's genotype
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME,childPhasedAllelesAlleles,Genotype.NO_LOG10_PERROR,null,null,true));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, makeGenotype(childPhasedAllelesAlleles,true));
|
||||
|
||||
//Once a phased combination is found; exit
|
||||
return;
|
||||
|
|
@ -266,16 +273,16 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
}
|
||||
|
||||
//If this is reached then no phasing could be found
|
||||
trioPhasedGenotypes.put(FamilyMember.MOTHER, new Genotype(DUMMY_NAME,getAlleles(mother),Genotype.NO_LOG10_PERROR,null,null,false));
|
||||
trioPhasedGenotypes.put(FamilyMember.FATHER, new Genotype(DUMMY_NAME,getAlleles(father),Genotype.NO_LOG10_PERROR,null,null,false));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME,getAlleles(child),Genotype.NO_LOG10_PERROR,null,null,false));
|
||||
trioPhasedGenotypes.put(FamilyMember.MOTHER, makeGenotype(mother,false));
|
||||
trioPhasedGenotypes.put(FamilyMember.FATHER, makeGenotype(father,false));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, makeGenotype(child,false));
|
||||
}
|
||||
|
||||
/* Constructor: Creates a conceptual trio genotype combination from the given genotypes.
|
||||
If one or more genotypes are set as NO_CALL or UNAVAILABLE, it will phase them like a pair
|
||||
or single individual.
|
||||
*/
|
||||
public TrioPhase(Genotype.Type mother, Genotype.Type father, Genotype.Type child){
|
||||
public TrioPhase(GenotypeType mother, GenotypeType father, GenotypeType child){
|
||||
|
||||
//Take care of cases where one or more family members are no call
|
||||
if(!isPhasable(child)){
|
||||
|
|
@ -297,7 +304,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
phaseSingleIndividualAlleles(father, FamilyMember.FATHER);
|
||||
}
|
||||
//Special case for Het/Het/Het as it is ambiguous
|
||||
else if(mother == Genotype.Type.HET && father == Genotype.Type.HET && child == Genotype.Type.HET){
|
||||
else if(mother == GenotypeType.HET && father == GenotypeType.HET && child == GenotypeType.HET){
|
||||
phaseSingleIndividualAlleles(mother, FamilyMember.MOTHER);
|
||||
phaseSingleIndividualAlleles(father, FamilyMember.FATHER);
|
||||
phaseSingleIndividualAlleles(child, FamilyMember.CHILD);
|
||||
|
|
@ -311,7 +318,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
if(fatherFAlleleFirst && trioPhasedGenotypes.get(FamilyMember.CHILD).isPhased()){
|
||||
ArrayList<Allele> childAlleles = new ArrayList<Allele>(trioPhasedGenotypes.get(FamilyMember.CHILD).getAlleles());
|
||||
childAlleles.add(childAlleles.remove(0));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD,new Genotype(DUMMY_NAME,childAlleles,Genotype.NO_LOG10_PERROR,null,null,true));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD,makeGenotype(childAlleles,true));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -347,7 +354,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
|
||||
//Add the transmission probability
|
||||
Map<String, Object> genotypeAttributes = new HashMap<String, Object>();
|
||||
genotypeAttributes.putAll(genotype.getAttributes());
|
||||
genotypeAttributes.putAll(genotype.getExtendedAttributes());
|
||||
if(transmissionProb>NO_TRANSMISSION_PROB)
|
||||
genotypeAttributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, phredScoreTransmission);
|
||||
|
||||
|
|
@ -370,7 +377,10 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
else
|
||||
log10Error = genotype.getLikelihoods().getLog10GQ(phasedGenotype.getType());
|
||||
|
||||
return new Genotype(genotype.getSampleName(), phasedAlleles, log10Error, null, genotypeAttributes, phasedGenotype.isPhased());
|
||||
return new GenotypeBuilder(genotype).alleles(phasedAlleles)
|
||||
.log10PError(log10Error)
|
||||
.attributes(genotypeAttributes)
|
||||
.phased(phasedGenotype.isPhased()).make();
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -438,15 +448,15 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
|
||||
//Create the transmission matrices
|
||||
private void buildMatrices(){
|
||||
mvCountMatrix = new EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>>(Genotype.Type.class);
|
||||
transmissionMatrix = new EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,TrioPhase>>>(Genotype.Type.class);
|
||||
for(Genotype.Type mother : Genotype.Type.values()){
|
||||
mvCountMatrix.put(mother,new EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>(Genotype.Type.class));
|
||||
transmissionMatrix.put(mother,new EnumMap<Genotype.Type,EnumMap<Genotype.Type,TrioPhase>>(Genotype.Type.class));
|
||||
for(Genotype.Type father : Genotype.Type.values()){
|
||||
mvCountMatrix.get(mother).put(father,new EnumMap<Genotype.Type, Integer>(Genotype.Type.class));
|
||||
transmissionMatrix.get(mother).put(father,new EnumMap<Genotype.Type,TrioPhase>(Genotype.Type.class));
|
||||
for(Genotype.Type child : Genotype.Type.values()){
|
||||
mvCountMatrix = new EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>>(GenotypeType.class);
|
||||
transmissionMatrix = new EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,TrioPhase>>>(GenotypeType.class);
|
||||
for(GenotypeType mother : GenotypeType.values()){
|
||||
mvCountMatrix.put(mother,new EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>(GenotypeType.class));
|
||||
transmissionMatrix.put(mother,new EnumMap<GenotypeType,EnumMap<GenotypeType,TrioPhase>>(GenotypeType.class));
|
||||
for(GenotypeType father : GenotypeType.values()){
|
||||
mvCountMatrix.get(mother).put(father,new EnumMap<GenotypeType, Integer>(GenotypeType.class));
|
||||
transmissionMatrix.get(mother).put(father,new EnumMap<GenotypeType,TrioPhase>(GenotypeType.class));
|
||||
for(GenotypeType child : GenotypeType.values()){
|
||||
mvCountMatrix.get(mother).get(father).put(child, getCombinationMVCount(mother, father, child));
|
||||
transmissionMatrix.get(mother).get(father).put(child,new TrioPhase(mother,father,child));
|
||||
}
|
||||
|
|
@ -457,16 +467,16 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
//Returns the number of Mendelian Violations for a given genotype combination.
|
||||
//If one of the parents genotype is missing, it will consider it as a parent/child pair
|
||||
//If the child genotype or both parents genotypes are missing, 0 is returned.
|
||||
private int getCombinationMVCount(Genotype.Type mother, Genotype.Type father, Genotype.Type child){
|
||||
private int getCombinationMVCount(GenotypeType mother, GenotypeType father, GenotypeType child){
|
||||
|
||||
//Child is no call => No MV
|
||||
if(child == Genotype.Type.NO_CALL || child == Genotype.Type.UNAVAILABLE)
|
||||
if(child == GenotypeType.NO_CALL || child == GenotypeType.UNAVAILABLE)
|
||||
return 0;
|
||||
//Add parents with genotypes for the evaluation
|
||||
ArrayList<Genotype.Type> parents = new ArrayList<Genotype.Type>();
|
||||
if (!(mother == Genotype.Type.NO_CALL || mother == Genotype.Type.UNAVAILABLE))
|
||||
ArrayList<GenotypeType> parents = new ArrayList<GenotypeType>();
|
||||
if (!(mother == GenotypeType.NO_CALL || mother == GenotypeType.UNAVAILABLE))
|
||||
parents.add(mother);
|
||||
if (!(father == Genotype.Type.NO_CALL || father == Genotype.Type.UNAVAILABLE))
|
||||
if (!(father == GenotypeType.NO_CALL || father == GenotypeType.UNAVAILABLE))
|
||||
parents.add(father);
|
||||
|
||||
//Both parents no calls => No MV
|
||||
|
|
@ -477,35 +487,35 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
int parentsNumRefAlleles = 0;
|
||||
int parentsNumAltAlleles = 0;
|
||||
|
||||
for(Genotype.Type parent : parents){
|
||||
if(parent == Genotype.Type.HOM_REF){
|
||||
for(GenotypeType parent : parents){
|
||||
if(parent == GenotypeType.HOM_REF){
|
||||
parentsNumRefAlleles++;
|
||||
}
|
||||
else if(parent == Genotype.Type.HET){
|
||||
else if(parent == GenotypeType.HET){
|
||||
parentsNumRefAlleles++;
|
||||
parentsNumAltAlleles++;
|
||||
}
|
||||
else if(parent == Genotype.Type.HOM_VAR){
|
||||
else if(parent == GenotypeType.HOM_VAR){
|
||||
parentsNumAltAlleles++;
|
||||
}
|
||||
}
|
||||
|
||||
//Case Child is HomRef
|
||||
if(child == Genotype.Type.HOM_REF){
|
||||
if(child == GenotypeType.HOM_REF){
|
||||
if(parentsNumRefAlleles == parents.size())
|
||||
return 0;
|
||||
else return (parents.size()-parentsNumRefAlleles);
|
||||
}
|
||||
|
||||
//Case child is HomVar
|
||||
if(child == Genotype.Type.HOM_VAR){
|
||||
if(child == GenotypeType.HOM_VAR){
|
||||
if(parentsNumAltAlleles == parents.size())
|
||||
return 0;
|
||||
else return parents.size()-parentsNumAltAlleles;
|
||||
}
|
||||
|
||||
//Case child is Het
|
||||
if(child == Genotype.Type.HET && ((parentsNumRefAlleles > 0 && parentsNumAltAlleles > 0) || parents.size()<2))
|
||||
if(child == GenotypeType.HET && ((parentsNumRefAlleles > 0 && parentsNumAltAlleles > 0) || parents.size()<2))
|
||||
return 0;
|
||||
|
||||
//MV
|
||||
|
|
@ -513,7 +523,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
}
|
||||
|
||||
//Given two trio genotypes combinations, returns the number of different genotypes between the two combinations.
|
||||
private int countFamilyGenotypeDiff(Genotype.Type motherOriginal,Genotype.Type fatherOriginal,Genotype.Type childOriginal,Genotype.Type motherNew,Genotype.Type fatherNew,Genotype.Type childNew){
|
||||
private int countFamilyGenotypeDiff(GenotypeType motherOriginal,GenotypeType fatherOriginal,GenotypeType childOriginal,GenotypeType motherNew,GenotypeType fatherNew,GenotypeType childNew){
|
||||
int count = 0;
|
||||
if(motherOriginal!=motherNew)
|
||||
count++;
|
||||
|
|
@ -526,21 +536,21 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
|
||||
//Get a Map of genotype likelihoods.
|
||||
//In case of null, unavailable or no call, all likelihoods are 1/3.
|
||||
private EnumMap<Genotype.Type,Double> getLikelihoodsAsMapSafeNull(Genotype genotype){
|
||||
private EnumMap<GenotypeType,Double> getLikelihoodsAsMapSafeNull(Genotype genotype){
|
||||
if(genotype == null || !genotype.isCalled()){
|
||||
EnumMap<Genotype.Type,Double> likelihoods = new EnumMap<Genotype.Type, Double>(Genotype.Type.class);
|
||||
likelihoods.put(Genotype.Type.HOM_REF,1.0/3.0);
|
||||
likelihoods.put(Genotype.Type.HET,1.0/3.0);
|
||||
likelihoods.put(Genotype.Type.HOM_VAR,1.0/3.0);
|
||||
EnumMap<GenotypeType,Double> likelihoods = new EnumMap<GenotypeType, Double>(GenotypeType.class);
|
||||
likelihoods.put(GenotypeType.HOM_REF,1.0/3.0);
|
||||
likelihoods.put(GenotypeType.HET,1.0/3.0);
|
||||
likelihoods.put(GenotypeType.HOM_VAR,1.0/3.0);
|
||||
return likelihoods;
|
||||
}
|
||||
return genotype.getLikelihoods().getAsMap(true);
|
||||
}
|
||||
|
||||
//Returns the Genotype.Type; returns UNVAILABLE if given null
|
||||
private Genotype.Type getTypeSafeNull(Genotype genotype){
|
||||
//Returns the GenotypeType; returns UNVAILABLE if given null
|
||||
private GenotypeType getTypeSafeNull(Genotype genotype){
|
||||
if(genotype == null)
|
||||
return Genotype.Type.UNAVAILABLE;
|
||||
return GenotypeType.UNAVAILABLE;
|
||||
return genotype.getType();
|
||||
}
|
||||
|
||||
|
|
@ -561,18 +571,18 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
//Always assign the first parent as the parent having genotype information in pairs
|
||||
//Always assign the mother as the first parent in trios
|
||||
int parentsCalled = 0;
|
||||
Map<Genotype.Type,Double> firstParentLikelihoods;
|
||||
Map<Genotype.Type,Double> secondParentLikelihoods;
|
||||
ArrayList<Genotype.Type> bestFirstParentGenotype = new ArrayList<Genotype.Type>();
|
||||
ArrayList<Genotype.Type> bestSecondParentGenotype = new ArrayList<Genotype.Type>();
|
||||
ArrayList<Genotype.Type> bestChildGenotype = new ArrayList<Genotype.Type>();
|
||||
Genotype.Type pairSecondParentGenotype = null;
|
||||
Map<GenotypeType,Double> firstParentLikelihoods;
|
||||
Map<GenotypeType,Double> secondParentLikelihoods;
|
||||
ArrayList<GenotypeType> bestFirstParentGenotype = new ArrayList<GenotypeType>();
|
||||
ArrayList<GenotypeType> bestSecondParentGenotype = new ArrayList<GenotypeType>();
|
||||
ArrayList<GenotypeType> bestChildGenotype = new ArrayList<GenotypeType>();
|
||||
GenotypeType pairSecondParentGenotype = null;
|
||||
if(mother == null || !mother.isCalled()){
|
||||
firstParentLikelihoods = getLikelihoodsAsMapSafeNull(father);
|
||||
secondParentLikelihoods = getLikelihoodsAsMapSafeNull(mother);
|
||||
bestFirstParentGenotype.add(getTypeSafeNull(father));
|
||||
bestSecondParentGenotype.add(getTypeSafeNull(mother));
|
||||
pairSecondParentGenotype = mother == null ? Genotype.Type.UNAVAILABLE : mother.getType();
|
||||
pairSecondParentGenotype = mother == null ? GenotypeType.UNAVAILABLE : mother.getType();
|
||||
if(father != null && father.isCalled())
|
||||
parentsCalled = 1;
|
||||
}
|
||||
|
|
@ -583,12 +593,12 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
bestSecondParentGenotype.add(getTypeSafeNull(father));
|
||||
if(father == null || !father.isCalled()){
|
||||
parentsCalled = 1;
|
||||
pairSecondParentGenotype = father == null ? Genotype.Type.UNAVAILABLE : father.getType();
|
||||
pairSecondParentGenotype = father == null ? GenotypeType.UNAVAILABLE : father.getType();
|
||||
}else{
|
||||
parentsCalled = 2;
|
||||
}
|
||||
}
|
||||
Map<Genotype.Type,Double> childLikelihoods = getLikelihoodsAsMapSafeNull(child);
|
||||
Map<GenotypeType,Double> childLikelihoods = getLikelihoodsAsMapSafeNull(child);
|
||||
bestChildGenotype.add(getTypeSafeNull(child));
|
||||
|
||||
//Prior vars
|
||||
|
|
@ -604,9 +614,9 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
int mvCount;
|
||||
int cumulativeMVCount = 0;
|
||||
double configurationLikelihood = 0;
|
||||
for(Map.Entry<Genotype.Type,Double> childGenotype : childLikelihoods.entrySet()){
|
||||
for(Map.Entry<Genotype.Type,Double> firstParentGenotype : firstParentLikelihoods.entrySet()){
|
||||
for(Map.Entry<Genotype.Type,Double> secondParentGenotype : secondParentLikelihoods.entrySet()){
|
||||
for(Map.Entry<GenotypeType,Double> childGenotype : childLikelihoods.entrySet()){
|
||||
for(Map.Entry<GenotypeType,Double> firstParentGenotype : firstParentLikelihoods.entrySet()){
|
||||
for(Map.Entry<GenotypeType,Double> secondParentGenotype : secondParentLikelihoods.entrySet()){
|
||||
mvCount = mvCountMatrix.get(firstParentGenotype.getKey()).get(secondParentGenotype.getKey()).get(childGenotype.getKey());
|
||||
//For parent/child pairs, sum over the possible genotype configurations of the missing parent
|
||||
if(parentsCalled<2){
|
||||
|
|
@ -797,9 +807,9 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
updateTrioMetricsCounters(phasedMother,phasedFather,phasedChild,mvCount,metricsCounters);
|
||||
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t%s:%s:%s:%s\t%s:%s:%s:%s",
|
||||
vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),
|
||||
phasedMother.getAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getAttribute(VCFConstants.DEPTH_KEY),phasedMother.getAttribute("AD"),phasedMother.getLikelihoodsString(),
|
||||
phasedFather.getGenotypeString(),phasedFather.getAttribute(VCFConstants.DEPTH_KEY),phasedFather.getAttribute("AD"),phasedFather.getLikelihoodsString(),
|
||||
phasedChild.getGenotypeString(),phasedChild.getAttribute(VCFConstants.DEPTH_KEY),phasedChild.getAttribute("AD"),phasedChild.getLikelihoodsString());
|
||||
phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),Arrays.asList(phasedMother.getAD()),
|
||||
phasedMother.getLikelihoodsString(), phasedFather.getGenotypeString(),phasedFather.getDP(),Arrays.asList(phasedFather.getAD()),phasedFather.getLikelihoodsString(),
|
||||
phasedChild.getGenotypeString(),Arrays.asList(phasedChild.getDP()),phasedChild.getAD(),phasedChild.getLikelihoodsString());
|
||||
if(!(phasedMother.getType()==mother.getType() && phasedFather.getType()==father.getType() && phasedChild.getType()==child.getType()))
|
||||
metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
|
||||
}
|
||||
|
|
@ -809,8 +819,8 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
|
||||
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t.:.:.:.\t%s:%s:%s:%s",
|
||||
vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),
|
||||
phasedMother.getAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getAttribute(VCFConstants.DEPTH_KEY),phasedMother.getAttribute("AD"),phasedMother.getLikelihoodsString(),
|
||||
phasedChild.getGenotypeString(),phasedChild.getAttribute(VCFConstants.DEPTH_KEY),phasedChild.getAttribute("AD"),phasedChild.getLikelihoodsString());
|
||||
phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),Arrays.asList(phasedMother.getAD()),phasedMother.getLikelihoodsString(),
|
||||
phasedChild.getGenotypeString(),phasedChild.getDP(),Arrays.asList(phasedChild.getAD()),phasedChild.getLikelihoodsString());
|
||||
}
|
||||
}
|
||||
else{
|
||||
|
|
@ -820,8 +830,8 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
|
||||
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t.:.:.:.\t%s:%s:%s:%s\t%s:%s:%s:%s",
|
||||
vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),
|
||||
phasedFather.getAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedFather.getGenotypeString(),phasedFather.getAttribute(VCFConstants.DEPTH_KEY),phasedFather.getAttribute("AD"),phasedFather.getLikelihoodsString(),
|
||||
phasedChild.getGenotypeString(),phasedChild.getAttribute(VCFConstants.DEPTH_KEY),phasedChild.getAttribute("AD"),phasedChild.getLikelihoodsString());
|
||||
phasedFather.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedFather.getGenotypeString(),phasedFather.getDP(),Arrays.asList(phasedFather.getAD()),phasedFather.getLikelihoodsString(),
|
||||
phasedChild.getGenotypeString(),phasedChild.getDP(),Arrays.asList(phasedChild.getAD()),phasedChild.getLikelihoodsString());
|
||||
}
|
||||
|
||||
//Report violation if set so
|
||||
|
|
|
|||
|
|
@ -109,14 +109,13 @@ class PhasingUtils {
|
|||
}
|
||||
|
||||
double mergedGQ = Math.max(gt1.getLog10PError(), gt2.getLog10PError());
|
||||
Set<String> mergedGtFilters = new HashSet<String>(); // Since gt1 and gt2 were unfiltered, the Genotype remains unfiltered
|
||||
|
||||
Map<String, Object> mergedGtAttribs = new HashMap<String, Object>();
|
||||
PhaseAndQuality phaseQual = calcPhaseForMergedGenotypes(gt1, gt2);
|
||||
if (phaseQual.PQ != null)
|
||||
mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ);
|
||||
|
||||
Genotype mergedGt = new Genotype(gt1.getSampleName(), mergedAllelesForSample, mergedGQ, mergedGtFilters, mergedGtAttribs, phaseQual.isPhased);
|
||||
Genotype mergedGt = new GenotypeBuilder(gt1.getSampleName(), mergedAllelesForSample).log10PError(mergedGQ).attributes(mergedGtAttribs).phased(phaseQual.isPhased).make();
|
||||
mergedGenotypes.add(mergedGt);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -269,10 +269,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
logger.debug("Unprocessed variant = " + VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), vc));
|
||||
}
|
||||
|
||||
int numReads = 0;
|
||||
if (context.hasBasePileup()) {
|
||||
numReads = context.getBasePileup().getNumberOfElements();
|
||||
}
|
||||
int numReads = context.getBasePileup().getNumberOfElements();
|
||||
PhasingStats addInPhaseStats = new PhasingStats(numReads, 1);
|
||||
phaseStats.addIn(addInPhaseStats);
|
||||
}
|
||||
|
|
@ -288,7 +285,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
private VariantContext reduceVCToSamples(VariantContext vc, Set<String> samplesToPhase) {
|
||||
// for ( String sample : samplesToPhase )
|
||||
// logger.debug(String.format(" Sample %s has genotype %s, het = %s", sample, vc.getGenotype(sample), vc.getGenotype(sample).isHet() ));
|
||||
VariantContext subvc = vc.subContextFromSamples(samplesToPhase);
|
||||
VariantContext subvc = vc.subContextFromSamples(samplesToPhase, true);
|
||||
// logger.debug("original VC = " + vc);
|
||||
// logger.debug("sub VC = " + subvc);
|
||||
return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF);
|
||||
|
|
@ -374,7 +371,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
if (isUnfilteredCalledDiploidGenotype(gt)) {
|
||||
if (gt.isHom()) { // Note that this Genotype may be replaced later to contain the PQ of a downstream het site that was phased relative to a het site lying upstream of this hom site:
|
||||
// true <-> can trivially phase a hom site relative to ANY previous site:
|
||||
Genotype phasedGt = new Genotype(gt.getSampleName(), gt.getAlleles(), gt.getLog10PError(), gt.getFilters(), gt.getAttributes(), true);
|
||||
Genotype phasedGt = new GenotypeBuilder(gt).phased(true).make();
|
||||
uvc.setGenotype(samp, phasedGt);
|
||||
}
|
||||
else if (gt.isHet()) { // Attempt to phase this het genotype relative to the previous het genotype
|
||||
|
|
@ -408,9 +405,10 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
if (DEBUG) logger.debug("THE PHASE CHOSEN HERE:\n" + allelePair + "\n\n");
|
||||
|
||||
ensurePhasing(allelePair, prevAllelePair, pr.haplotype);
|
||||
Map<String, Object> gtAttribs = new HashMap<String, Object>(gt.getAttributes());
|
||||
gtAttribs.put(PQ_KEY, pr.phaseQuality);
|
||||
Genotype phasedGt = new Genotype(gt.getSampleName(), allelePair.getAllelesAsList(), gt.getLog10PError(), gt.getFilters(), gtAttribs, genotypesArePhased);
|
||||
Genotype phasedGt = new GenotypeBuilder(gt)
|
||||
.alleles(allelePair.getAllelesAsList())
|
||||
.attribute(PQ_KEY, pr.phaseQuality)
|
||||
.phased(genotypesArePhased).make();
|
||||
uvc.setGenotype(samp, phasedGt);
|
||||
}
|
||||
|
||||
|
|
@ -428,9 +426,9 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
interiorUvc.setPhasingInconsistent();
|
||||
|
||||
if (genotypesArePhased) {
|
||||
Map<String, Object> handledGtAttribs = new HashMap<String, Object>(handledGt.getAttributes());
|
||||
handledGtAttribs.put(PQ_KEY, pr.phaseQuality);
|
||||
Genotype phasedHomGt = new Genotype(handledGt.getSampleName(), handledGt.getAlleles(), handledGt.getLog10PError(), handledGt.getFilters(), handledGtAttribs, genotypesArePhased);
|
||||
Genotype phasedHomGt = new GenotypeBuilder(handledGt)
|
||||
.attribute(PQ_KEY, pr.phaseQuality)
|
||||
.phased(genotypesArePhased).make();
|
||||
interiorUvc.setGenotype(samp, phasedHomGt);
|
||||
}
|
||||
}
|
||||
|
|
@ -1106,10 +1104,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
this.sampleReadBases = new HashMap<String, ReadBasesAtPosition>();
|
||||
|
||||
if (alignment != null) {
|
||||
ReadBackedPileup pileup = null;
|
||||
if (alignment.hasBasePileup()) {
|
||||
pileup = alignment.getBasePileup();
|
||||
}
|
||||
ReadBackedPileup pileup = alignment.getBasePileup();
|
||||
if (pileup != null) {
|
||||
// filter the read-base pileup based on min base and mapping qualities:
|
||||
pileup = pileup.getBaseAndMappingFilteredPileup(MIN_BASE_QUALITY_SCORE, MIN_MAPPING_QUALITY_SCORE);
|
||||
|
|
@ -1439,7 +1434,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
}
|
||||
|
||||
public static boolean isUnfilteredCalledDiploidGenotype(Genotype gt) {
|
||||
return (gt.isNotFiltered() && gt.isCalled() && gt.getPloidy() == 2);
|
||||
return (! gt.isFiltered() && gt.isCalled() && gt.getPloidy() == 2);
|
||||
}
|
||||
|
||||
private class MultipleBaseCountsWriter {
|
||||
|
|
|
|||
|
|
@ -365,7 +365,7 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
|
|||
return counter;
|
||||
|
||||
// Do not operate on variants that are not covered to the optional minimum depth
|
||||
if (!context.hasReads() || !context.hasBasePileup() || (minDepth > 0 && context.getBasePileup().getBases().length < minDepth)) {
|
||||
if (!context.hasReads() || (minDepth > 0 && context.getBasePileup().getBases().length < minDepth)) {
|
||||
counter.nUncovered = 1L;
|
||||
if (vcComp.getAttribute("GV").equals("T"))
|
||||
counter.nAltNotCalled = 1L;
|
||||
|
|
@ -423,7 +423,7 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
|
|||
}
|
||||
}
|
||||
else {
|
||||
// if (!vcComp.hasAttribute("GV"))
|
||||
// if (!vcComp.hasExtendedAttribute("GV"))
|
||||
// throw new UserException.BadInput("Variant has no GV annotation in the INFO field. " + vcComp.getChr() + ":" + vcComp.getStart());
|
||||
|
||||
if (call.isCalledAlt(callConf)) {
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ public class GLBasedSampleSelector extends SampleSelector {
|
|||
return true;
|
||||
// want to include a site in the given samples if it is *likely* to be variant (via the EXACT model)
|
||||
// first subset to the samples
|
||||
VariantContext subContext = vc.subContextFromSamples(samples);
|
||||
VariantContext subContext = vc.subContextFromSamples(samples, true);
|
||||
|
||||
// now check to see (using EXACT model) whether this should be variant
|
||||
// do we want to apply a prior? maybe user-spec?
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ public class GTBasedSampleSelector extends SampleSelector{
|
|||
if ( samples == null || samples.isEmpty() )
|
||||
return true;
|
||||
|
||||
VariantContext subContext = vc.subContextFromSamples(samples, vc.getAlleles());
|
||||
VariantContext subContext = vc.subContextFromSamples(samples, false);
|
||||
if ( subContext.isPolymorphicInSamples() ) {
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
|||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Molten;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeType;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -54,7 +55,7 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
* Initialize this object
|
||||
*/
|
||||
public GenotypeConcordance() {
|
||||
final int nGenotypeTypes = Genotype.Type.values().length;
|
||||
final int nGenotypeTypes = GenotypeType.values().length;
|
||||
truthByCalledGenotypeCounts = new long[nGenotypeTypes][nGenotypeTypes];
|
||||
}
|
||||
|
||||
|
|
@ -75,11 +76,11 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
if (eval != null) {
|
||||
for (final Genotype g : eval.getGenotypes() ) {
|
||||
final String sample = g.getSampleName();
|
||||
final Genotype.Type called = g.getType();
|
||||
final Genotype.Type truth;
|
||||
final GenotypeType called = g.getType();
|
||||
final GenotypeType truth;
|
||||
|
||||
if (!validationIsValidVC || !validation.hasGenotype(sample)) {
|
||||
truth = Genotype.Type.NO_CALL;
|
||||
truth = GenotypeType.NO_CALL;
|
||||
} else {
|
||||
truth = validation.getGenotype(sample).getType();
|
||||
}
|
||||
|
|
@ -90,19 +91,19 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
|
||||
// otherwise, mark no-calls for all samples
|
||||
else {
|
||||
final Genotype.Type called = Genotype.Type.NO_CALL;
|
||||
final GenotypeType called = GenotypeType.NO_CALL;
|
||||
|
||||
for (final Genotype g : validation.getGenotypes()) {
|
||||
final Genotype.Type truth = g.getType();
|
||||
final GenotypeType truth = g.getType();
|
||||
incrValue(truth, called);
|
||||
|
||||
// print out interesting sites
|
||||
/*
|
||||
if ( PRINT_INTERESTING_SITES && super.getVEWalker().gcLog != null ) {
|
||||
if ( (truth == Genotype.Type.HOM_VAR || truth == Genotype.Type.HET) && called == Genotype.Type.NO_CALL ) {
|
||||
if ( (truth == GenotypeType.HOM_VAR || truth == GenotypeType.HET) && called == GenotypeType.NO_CALL ) {
|
||||
super.getVEWalker().gcLog.printf("%s FN %s%n", group, validation);
|
||||
}
|
||||
if ( (called == Genotype.Type.HOM_VAR || called == Genotype.Type.HET) && truth == Genotype.Type.HOM_REF ) {
|
||||
if ( (called == GenotypeType.HOM_VAR || called == GenotypeType.HET) && truth == GenotypeType.HOM_REF ) {
|
||||
super.getVEWalker().gcLog.printf("%s FP %s%n", group, validation);
|
||||
}
|
||||
}
|
||||
|
|
@ -121,36 +122,36 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
* @param truth the truth type
|
||||
* @param called the called type
|
||||
*/
|
||||
private void incrValue(final Genotype.Type truth, final Genotype.Type called) {
|
||||
private void incrValue(final GenotypeType truth, final GenotypeType called) {
|
||||
truthByCalledGenotypeCounts[truth.ordinal()][called.ordinal()]++;
|
||||
}
|
||||
|
||||
private long count(final Genotype.Type truth, final Genotype.Type called) {
|
||||
private long count(final GenotypeType truth, final GenotypeType called) {
|
||||
return truthByCalledGenotypeCounts[truth.ordinal()][called.ordinal()];
|
||||
}
|
||||
|
||||
private long count(final EnumSet<Genotype.Type> truth, final Genotype.Type called) {
|
||||
private long count(final EnumSet<GenotypeType> truth, final GenotypeType called) {
|
||||
return count(truth, EnumSet.of(called));
|
||||
}
|
||||
|
||||
private long count(final Genotype.Type truth, final EnumSet<Genotype.Type> called) {
|
||||
private long count(final GenotypeType truth, final EnumSet<GenotypeType> called) {
|
||||
return count(EnumSet.of(truth), called);
|
||||
}
|
||||
|
||||
private long count(final EnumSet<Genotype.Type> truth, final EnumSet<Genotype.Type> called) {
|
||||
private long count(final EnumSet<GenotypeType> truth, final EnumSet<GenotypeType> called) {
|
||||
long sum = 0;
|
||||
for ( final Genotype.Type truth1 : truth ) {
|
||||
for ( final Genotype.Type called1 : called ) {
|
||||
for ( final GenotypeType truth1 : truth ) {
|
||||
for ( final GenotypeType called1 : called ) {
|
||||
sum += count(truth1, called1);
|
||||
}
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
private long countDiag( final EnumSet<Genotype.Type> d1 ) {
|
||||
private long countDiag( final EnumSet<GenotypeType> d1 ) {
|
||||
long sum = 0;
|
||||
|
||||
for(final Genotype.Type e1 : d1 ) {
|
||||
for(final GenotypeType e1 : d1 ) {
|
||||
sum += truthByCalledGenotypeCounts[e1.ordinal()][e1.ordinal()];
|
||||
}
|
||||
|
||||
|
|
@ -159,13 +160,13 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
|
||||
@Override
|
||||
public void finalizeEvaluation() {
|
||||
final EnumSet<Genotype.Type> allVariantGenotypes = EnumSet.of(Genotype.Type.HOM_VAR, Genotype.Type.HET);
|
||||
final EnumSet<Genotype.Type> allCalledGenotypes = EnumSet.of(Genotype.Type.HOM_VAR, Genotype.Type.HET, Genotype.Type.HOM_REF);
|
||||
final EnumSet<Genotype.Type> allGenotypes = EnumSet.allOf(Genotype.Type.class);
|
||||
final EnumSet<GenotypeType> allVariantGenotypes = EnumSet.of(GenotypeType.HOM_VAR, GenotypeType.HET);
|
||||
final EnumSet<GenotypeType> allCalledGenotypes = EnumSet.of(GenotypeType.HOM_VAR, GenotypeType.HET, GenotypeType.HOM_REF);
|
||||
final EnumSet<GenotypeType> allGenotypes = EnumSet.allOf(GenotypeType.class);
|
||||
|
||||
// exact values of the table
|
||||
for ( final Genotype.Type truth : Genotype.Type.values() ) {
|
||||
for ( final Genotype.Type called : Genotype.Type.values() ) {
|
||||
for ( final GenotypeType truth : GenotypeType.values() ) {
|
||||
for ( final GenotypeType called : GenotypeType.values() ) {
|
||||
final String field = String.format("n_true_%s_called_%s", truth, called);
|
||||
final Long value = count(truth, called);
|
||||
map.put(field, value.toString());
|
||||
|
|
@ -173,20 +174,20 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
}
|
||||
|
||||
// counts of called genotypes
|
||||
for ( final Genotype.Type called : Genotype.Type.values() ) {
|
||||
for ( final GenotypeType called : GenotypeType.values() ) {
|
||||
final String field = String.format("total_called_%s", called);
|
||||
final Long value = count(allGenotypes, called);
|
||||
map.put(field, value.toString());
|
||||
}
|
||||
|
||||
// counts of true genotypes
|
||||
for ( final Genotype.Type truth : Genotype.Type.values() ) {
|
||||
for ( final GenotypeType truth : GenotypeType.values() ) {
|
||||
final String field = String.format("total_true_%s", truth);
|
||||
final Long value = count(truth, allGenotypes);
|
||||
map.put(field, value.toString());
|
||||
}
|
||||
|
||||
for ( final Genotype.Type genotype : Genotype.Type.values() ) {
|
||||
for ( final GenotypeType genotype : GenotypeType.values() ) {
|
||||
final String field = String.format("percent_%s_called_%s", genotype, genotype);
|
||||
long numer = count(genotype, genotype);
|
||||
long denom = count(EnumSet.of(genotype), allGenotypes);
|
||||
|
|
@ -215,7 +216,7 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
// overall genotype concordance of sites called non-ref in eval track
|
||||
// MAD: this is the non-reference discrepancy rate
|
||||
final String field = "percent_non_reference_discrepancy_rate";
|
||||
long homrefConcords = count(Genotype.Type.HOM_REF, Genotype.Type.HOM_REF);
|
||||
long homrefConcords = count(GenotypeType.HOM_REF, GenotypeType.HOM_REF);
|
||||
long allNoHomRef = count(allCalledGenotypes, allCalledGenotypes) - homrefConcords;
|
||||
long numer = allNoHomRef - countDiag(allVariantGenotypes);
|
||||
long denom = count(allCalledGenotypes, allCalledGenotypes) - homrefConcords;
|
||||
|
|
|
|||
|
|
@ -121,9 +121,9 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
|
|||
int ac = 0;
|
||||
if ( vc.getNAlleles() > 2 ) {
|
||||
return SiteStatus.POLY;
|
||||
//// System.out.printf("multiple alleles %s = %s%n", vc.getAlleles(), vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY));
|
||||
//// System.out.printf("multiple alleles %s = %s%n", vc.getAlleles(), vc.getExtendedAttribute(VCFConstants.ALLELE_COUNT_KEY));
|
||||
// // todo -- omg this is painful. We need a better approach to dealing with multi-valued attributes
|
||||
// for ( String v : (List<String>)vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY) )
|
||||
// for ( String v : (List<String>)vc.getExtendedAttribute(VCFConstants.ALLELE_COUNT_KEY) )
|
||||
// ac += Integer.valueOf(v);
|
||||
//// System.out.printf(" ac = %d%n", ac);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -241,7 +241,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
|||
// update transition / transversion ratio
|
||||
if ( titvTable != null ) titvTable.inc(type, g.getSampleName());
|
||||
|
||||
if ( g.hasAttribute(VCFConstants.DEPTH_KEY) )
|
||||
if ( g.hasDP() )
|
||||
depthPerSample.inc(type, g.getSampleName());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -199,7 +199,7 @@ public class VariantEvalUtils {
|
|||
* @return a new VariantContext with just the requested samples
|
||||
*/
|
||||
public VariantContext getSubsetOfVariantContext(VariantContext vc, Set<String> sampleNames) {
|
||||
VariantContext vcsub = vc.subContextFromSamples(sampleNames, vc.getAlleles());
|
||||
VariantContext vcsub = vc.subContextFromSamples(sampleNames, false);
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vcsub);
|
||||
|
||||
final int originalAlleleCount = vc.getHetCount() + 2 * vc.getHomVarCount();
|
||||
|
|
|
|||
|
|
@ -223,7 +223,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
|||
newA = Allele.NO_CALL;
|
||||
newAlleles.add(newA);
|
||||
}
|
||||
newGenotypes.add(Genotype.modifyAlleles(genotype, newAlleles));
|
||||
newGenotypes.add(new GenotypeBuilder(genotype).alleles(newAlleles).make());
|
||||
}
|
||||
|
||||
return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).referenceBaseForIndel(refBaseForIndel).make();
|
||||
|
|
|
|||
|
|
@ -315,6 +315,15 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
@Argument(fullName="fullyDecode", doc="If true, the incoming VariantContext will be fully decoded", required=false)
|
||||
private boolean fullyDecode = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="forceGenotypesDecode", doc="If true, the incoming VariantContext will have its genotypes forcibly decoded by computing AC across all genotypes. For efficiency testing only", required=false)
|
||||
private boolean forceGenotypesDecode = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="justRead", doc="If true, we won't actually write the output file. For efficiency testing only", required=false)
|
||||
private boolean justRead = false;
|
||||
|
||||
|
||||
/* Private class used to store the intermediate variants in the integer random selection process */
|
||||
private class RandomVariantStructure {
|
||||
private VariantContext vc;
|
||||
|
|
@ -392,11 +401,13 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
Collection<String> XLsamplesFromFile = SampleUtils.getSamplesFromFiles(XLsampleFiles);
|
||||
samples.removeAll(XLsamplesFromFile);
|
||||
samples.removeAll(XLsampleNames);
|
||||
NO_SAMPLES_SPECIFIED = NO_SAMPLES_SPECIFIED && XLsampleNames.isEmpty();
|
||||
|
||||
if ( samples.size() == 0 && !NO_SAMPLES_SPECIFIED )
|
||||
throw new UserException("All samples requested to be included were also requested to be excluded.");
|
||||
|
||||
for ( String sample : samples )
|
||||
if ( ! NO_SAMPLES_SPECIFIED )
|
||||
for ( String sample : samples )
|
||||
logger.info("Including sample '" + sample + "'");
|
||||
|
||||
// if user specified types to include, add these, otherwise, add all possible variant context types to list of vc types to include
|
||||
|
|
@ -494,7 +505,16 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
}
|
||||
|
||||
for (VariantContext vc : vcs) {
|
||||
if ( fullyDecode ) vc = vc.fullyDecode(vcfRods.get(vc.getSource()));
|
||||
// an option for performance testing only
|
||||
if ( fullyDecode )
|
||||
vc = vc.fullyDecode(vcfRods.get(vc.getSource()));
|
||||
|
||||
// an option for performance testing only
|
||||
if ( forceGenotypesDecode ) {
|
||||
final int x = vc.getCalledChrCount();
|
||||
//logger.info("forceGenotypesDecode with getCalledChrCount() = " + );
|
||||
}
|
||||
|
||||
if ( IDsToKeep != null && ! IDsToKeep.contains(vc.getID()) )
|
||||
continue;
|
||||
|
||||
|
|
@ -538,7 +558,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
if (!selectedTypes.contains(vc.getType()))
|
||||
continue;
|
||||
|
||||
VariantContext sub = subsetRecord(vc, samples, EXCLUDE_NON_VARIANTS);
|
||||
VariantContext sub = subsetRecord(vc, EXCLUDE_NON_VARIANTS);
|
||||
|
||||
if ( REGENOTYPE && sub.isPolymorphicInSamples() && hasPLs(sub) ) {
|
||||
final VariantContextBuilder builder = new VariantContextBuilder(UG_engine.calculateGenotypes(tracker, ref, context, sub)).filters(sub.getFiltersMaybeNull());
|
||||
|
|
@ -559,7 +579,8 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
randomlyAddVariant(++variantNumber, sub);
|
||||
}
|
||||
else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
|
||||
vcfWriter.add(sub);
|
||||
if ( ! justRead )
|
||||
vcfWriter.add(sub);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -687,18 +708,14 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
* Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN, AC, AF).
|
||||
*
|
||||
* @param vc the VariantContext record to subset
|
||||
* @param samples the samples to extract
|
||||
* @return the subsetted VariantContext
|
||||
*/
|
||||
private VariantContext subsetRecord(final VariantContext vc, final Set<String> samples, final boolean excludeNonVariants) {
|
||||
if ( samples == null || samples.isEmpty() )
|
||||
private VariantContext subsetRecord(final VariantContext vc, final boolean excludeNonVariants) {
|
||||
if ( NO_SAMPLES_SPECIFIED || samples.isEmpty() )
|
||||
return vc;
|
||||
|
||||
final VariantContext sub;
|
||||
if ( excludeNonVariants )
|
||||
sub = vc.subContextFromSamples(samples); // strip out the alternate alleles that aren't being used
|
||||
else
|
||||
sub = vc.subContextFromSamples(samples, vc.getAlleles());
|
||||
final VariantContext sub = vc.subContextFromSamples(samples, excludeNonVariants); // strip out the alternate alleles that aren't being used
|
||||
|
||||
VariantContextBuilder builder = new VariantContextBuilder(sub);
|
||||
|
||||
GenotypesContext newGC = sub.getGenotypes();
|
||||
|
|
@ -708,15 +725,13 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
newGC = VariantContextUtils.stripPLs(sub.getGenotypes());
|
||||
|
||||
//Remove a fraction of the genotypes if needed
|
||||
if(fractionGenotypes>0){
|
||||
if ( fractionGenotypes > 0 ){
|
||||
ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
for ( Genotype genotype : newGC ) {
|
||||
//Set genotype to no call if it falls in the fraction.
|
||||
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
|
||||
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
|
||||
alleles.add(Allele.create((byte)'.'));
|
||||
alleles.add(Allele.create((byte)'.'));
|
||||
genotypes.add(new Genotype(genotype.getSampleName(),alleles, Genotype.NO_LOG10_PERROR,genotype.getFilters(),new HashMap<String, Object>(),false));
|
||||
List<Allele> alleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
genotypes.add(new GenotypeBuilder(genotype).alleles(alleles).noGQ().make());
|
||||
}
|
||||
else{
|
||||
genotypes.add(genotype);
|
||||
|
|
@ -750,14 +765,12 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
for (String sample : originalVC.getSampleNames()) {
|
||||
Genotype g = originalVC.getGenotype(sample);
|
||||
|
||||
if ( g.isNotFiltered() ) {
|
||||
|
||||
String dp = (String) g.getAttribute("DP");
|
||||
if (dp != null && ! dp.equals(VCFConstants.MISSING_DEPTH_v3) && ! dp.equals(VCFConstants.MISSING_VALUE_v4) ) {
|
||||
depth += Integer.valueOf(dp);
|
||||
}
|
||||
if ( ! g.isFiltered() ) {
|
||||
if ( g.hasDP() )
|
||||
depth += g.getDP();
|
||||
}
|
||||
}
|
||||
|
||||
builder.attribute("DP", depth);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -288,8 +288,8 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
|
|||
|
||||
private byte getStandardEncoding(Genotype g, int offset) {
|
||||
byte b;
|
||||
if ( g.hasAttribute(VCFConstants.GENOTYPE_QUALITY_KEY) && ((Integer) g.getAttribute(VCFConstants.GENOTYPE_QUALITY_KEY)) < minGenotypeQuality ) {
|
||||
b = NO_CALL;
|
||||
if ( g.hasGQ() && g.getGQ() < minGenotypeQuality ) {
|
||||
b = NO_CALL;
|
||||
} else if ( g.isHomRef() ) {
|
||||
b = HOM_REF;
|
||||
} else if ( g.isHomVar() ) {
|
||||
|
|
@ -305,7 +305,7 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
|
|||
|
||||
private byte getFlippedEncoding(Genotype g, int offset) {
|
||||
byte b;
|
||||
if ( g.hasAttribute(VCFConstants.GENOTYPE_QUALITY_KEY) && ((Integer) g.getAttribute(VCFConstants.GENOTYPE_QUALITY_KEY)) < minGenotypeQuality ) {
|
||||
if ( g.hasGQ() && g.getGQ() < minGenotypeQuality ) {
|
||||
b = NO_CALL;
|
||||
} else if ( g.isHomRef() ) {
|
||||
b = HOM_VAR;
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
|
|||
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
|
@ -314,8 +315,12 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
if ( addGenotypeFields ) {
|
||||
for ( final String sample : samples ) {
|
||||
for ( final String gf : genotypeFields ) {
|
||||
if ( vc.hasGenotype(sample) && vc.getGenotype(sample).hasAttribute(gf) )
|
||||
addFieldValue(vc.getGenotype(sample).getAttribute(gf), records);
|
||||
if ( vc.hasGenotype(sample) && vc.getGenotype(sample).hasAnyAttribute(gf) ) {
|
||||
if ( gf.equals(VCFConstants.GENOTYPE_KEY) )
|
||||
addFieldValue(vc.getGenotype(sample).getGenotypeString(true), records);
|
||||
else
|
||||
addFieldValue(vc.getGenotype(sample).getAnyAttribute(gf), records);
|
||||
}
|
||||
else
|
||||
addFieldValue(MISSING_DATA, records);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -132,7 +132,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
|
||||
// set the appropriate sample name if necessary
|
||||
if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName()) ) {
|
||||
Genotype g = Genotype.modifyName(vc.getGenotype(variants.getName()), sampleName);
|
||||
Genotype g = new GenotypeBuilder(vc.getGenotype(variants.getName())).name(sampleName).make();
|
||||
builder.genotypes(g);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,9 +1,5 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.ObjectOutputStream;
|
||||
import java.util.BitSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
|
@ -16,10 +12,8 @@ import java.util.Map;
|
|||
*/
|
||||
public class BitSetUtils {
|
||||
|
||||
static final private int MAX_DNA_CONTEXT = 31; // the maximum context size (number of bases) permitted in the "long bitset" implementation of the DNA <=> BitSet conversion.
|
||||
static final private byte NBITS_LONG_REPRESENTATION = 64; // the number of bits used in the long version to represent the bit set (necessary for the two's complement representation of negative numbers)
|
||||
static final private byte NBITS_SHORT_REPRESENTATION = 16; // the number of bits used in the short version to represent the bit set (necessary for the two's complement representation of negative numbers)
|
||||
static final long[] combinationsPerLength = new long[MAX_DNA_CONTEXT + 1]; // keeps the memoized table with the number of combinations for each given DNA context length
|
||||
|
||||
/**
|
||||
* Creates an long out of a bitset
|
||||
|
|
@ -112,173 +106,4 @@ public class BitSetUtils {
|
|||
}
|
||||
return bitSet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a BitSet into the dna string representation.
|
||||
*
|
||||
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
|
||||
* be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
|
||||
* a bitSetFrom(BigNumber) method.
|
||||
*
|
||||
* We calculate the length of the resulting DNA sequence by looking at the sum(4^i) that exceeds the
|
||||
* base_10 representation of the sequence. This is important for us to know how to bring the number
|
||||
* to a quasi-canonical base_4 representation, and to fill in leading A's (since A's are represented
|
||||
* as 0's and leading 0's are omitted).
|
||||
*
|
||||
* quasi-canonical because A is represented by a 0, therefore,
|
||||
* instead of : 0, 1, 2, 3, 10, 11, 12, ...
|
||||
* we have : 0, 1, 2, 3, 00, 01, 02, ...
|
||||
*
|
||||
* but we can correctly decode it because we know the final length.
|
||||
*
|
||||
* @param bitSet the bitset representation of the dna sequence
|
||||
* @return the dna sequence represented by the bitset
|
||||
*/
|
||||
public static String dnaFrom(final BitSet bitSet) {
|
||||
long number = longFrom(bitSet); // the base_10 representation of the bit set
|
||||
if (number < 0)
|
||||
throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?");
|
||||
|
||||
final int length = contextLengthFor(number); // the length of the context (the number of combinations is memoized, so costs zero to separate this into two method calls)
|
||||
number -= combinationsFor(length - 1); // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation
|
||||
|
||||
StringBuilder dna = new StringBuilder();
|
||||
while (number > 0) { // perform a simple base_10 to base_4 conversion (quasi-canonical)
|
||||
byte base = (byte) (number % 4);
|
||||
switch (base) {
|
||||
case 0:
|
||||
dna.append('A');
|
||||
break;
|
||||
case 1:
|
||||
dna.append('C');
|
||||
break;
|
||||
case 2:
|
||||
dna.append('G');
|
||||
break;
|
||||
case 3:
|
||||
dna.append('T');
|
||||
break;
|
||||
}
|
||||
number /= 4;
|
||||
}
|
||||
for (int j = dna.length(); j < length; j++)
|
||||
dna.append('A'); // add leading A's as necessary (due to the "quasi" canonical status, see description above)
|
||||
|
||||
return dna.reverse().toString(); // make sure to reverse the string since we should have been pre-pending all along
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a BitSet representation of a given dna string.
|
||||
*
|
||||
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
|
||||
* be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
|
||||
* a bitSetFrom(BigNumber) method.
|
||||
*
|
||||
* The bit representation of a dna string is the simple:
|
||||
* 0 A 4 AA 8 CA
|
||||
* 1 C 5 AC ...
|
||||
* 2 G 6 AG 1343 TTGGT
|
||||
* 3 T 7 AT 1364 TTTTT
|
||||
*
|
||||
* To convert from dna to number, we convert the dna string to base10 and add all combinations that
|
||||
* preceded the string (with smaller lengths).
|
||||
*
|
||||
* @param dna the dna sequence
|
||||
* @return the bitset representing the dna sequence
|
||||
*/
|
||||
public static BitSet bitSetFrom(String dna) {
|
||||
return bitSetFrom(dna.getBytes());
|
||||
}
|
||||
|
||||
public static BitSet bitSetFrom(final byte[] dna) {
|
||||
if (dna.length > MAX_DNA_CONTEXT)
|
||||
throw new ReviewedStingException(String.format("DNA Length cannot be bigger than %d. dna: %s (%d)", MAX_DNA_CONTEXT, dna, dna.length));
|
||||
|
||||
final long preContext = combinationsFor(dna.length - 1); // the sum of all combinations that preceded the length of the dna string
|
||||
long baseTen = 0; // the number in base_10 that we are going to use to generate the bit set
|
||||
for (final byte base : dna) {
|
||||
baseTen *= 4;
|
||||
baseTen += BaseUtils.simpleBaseToBaseIndex(base);
|
||||
}
|
||||
return bitSetFrom(baseTen + preContext); // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length.
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the number of bits necessary to represent a given number of elements
|
||||
*
|
||||
* @param numberOfElements the number of elements to represent (must be positive)
|
||||
* @return the number of bits necessary to represent this many elements
|
||||
*/
|
||||
public static int numberOfBitsToRepresent(long numberOfElements) {
|
||||
if (numberOfElements < 0)
|
||||
throw new ReviewedStingException("Number of elements must be positive: " + numberOfElements);
|
||||
|
||||
if (numberOfElements == 1L)
|
||||
return 1; // special case
|
||||
|
||||
int n = 0;
|
||||
numberOfElements--;
|
||||
while (numberOfElements > 0) {
|
||||
numberOfElements = numberOfElements >> 1;
|
||||
n++;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the length of the DNA context for a given base 10 number
|
||||
*
|
||||
* It is important to know the length given the base 10 number to calculate the number of combinations
|
||||
* and to disambiguate the "quasi-canonical" state.
|
||||
*
|
||||
* This method also calculates the number of combinations as a by-product, but since it memoizes the
|
||||
* results, a subsequent call to combinationsFor(length) is O(1).
|
||||
*
|
||||
* @param number the base 10 representation of the bitset
|
||||
* @return the length of the DNA context represented by this number
|
||||
*/
|
||||
private static int contextLengthFor(long number) {
|
||||
int length = 1; // the calculated length of the DNA sequence given the base_10 representation of its BitSet.
|
||||
long combinations = combinationsFor(length); // the next context (we advance it so we know which one was preceding it).
|
||||
while (combinations <= number) { // find the length of the dna string (length)
|
||||
length++;
|
||||
combinations = combinationsFor(length); // calculate the next context
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
/**
|
||||
* The sum of all combinations of a context of a given length from length = 0 to length.
|
||||
*
|
||||
* Memoized implementation of sum(4^i) , where i=[0,length]
|
||||
*
|
||||
* @param length the length of the DNA context
|
||||
* @return the sum of all combinations leading up to this context length.
|
||||
*/
|
||||
private static long combinationsFor(int length) {
|
||||
if (length > MAX_DNA_CONTEXT)
|
||||
throw new ReviewedStingException(String.format("Context cannot be longer than %d bases but requested %d.", MAX_DNA_CONTEXT, length));
|
||||
|
||||
// only calculate the number of combinations if the table hasn't already cached the value
|
||||
if (length > 0 && combinationsPerLength[length] == 0) {
|
||||
long combinations = 0L;
|
||||
for (int i = 1; i <= length; i++)
|
||||
combinations += (1L << 2 * i); // add all combinations with 4^i ( 4^i is the same as 2^(2*i) )
|
||||
combinationsPerLength[length] = combinations;
|
||||
}
|
||||
return combinationsPerLength[length];
|
||||
}
|
||||
|
||||
|
||||
public static byte[] sizeOf(Object obj) throws java.io.IOException
|
||||
{
|
||||
ByteArrayOutputStream byteObject = new ByteArrayOutputStream();
|
||||
ObjectOutputStream objectOutputStream = new ObjectOutputStream(byteObject);
|
||||
objectOutputStream.writeObject(obj);
|
||||
objectOutputStream.flush();
|
||||
objectOutputStream.close();
|
||||
byteObject.close();
|
||||
|
||||
return byteObject.toByteArray();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.utils;
|
|||
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeType;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -30,7 +31,7 @@ public class MendelianViolation {
|
|||
private boolean allCalledOnly = true;
|
||||
|
||||
//Stores occurrences of inheritance
|
||||
private EnumMap<Genotype.Type, EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>> inheritance;
|
||||
private EnumMap<GenotypeType, EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>> inheritance;
|
||||
|
||||
private int violations_total=0;
|
||||
|
||||
|
|
@ -74,119 +75,119 @@ public class MendelianViolation {
|
|||
|
||||
//Count of HomRef/HomRef/HomRef trios
|
||||
public int getRefRefRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HOM_REF).get(GenotypeType.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of HomVar/HomVar/HomVar trios
|
||||
public int getVarVarVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR);
|
||||
return inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of HomRef/HomVar/Het trios
|
||||
public int getRefVarHet(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET) +
|
||||
inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HOM_VAR).get(GenotypeType.HET) +
|
||||
inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_REF).get(GenotypeType.HET);
|
||||
}
|
||||
|
||||
//Count of Het/Het/Het trios
|
||||
public int getHetHetHet(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET);
|
||||
return inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HET);
|
||||
}
|
||||
|
||||
//Count of Het/Het/HomRef trios
|
||||
public int getHetHetHomRef(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
|
||||
return inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of Het/Het/HomVar trios
|
||||
public int getHetHetHomVar(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR);
|
||||
return inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of ref alleles inherited from Het/Het parents (no violation)
|
||||
public int getParentsHetHetInheritedRef(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||
+ 2*inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
|
||||
return inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HET)
|
||||
+ 2*inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HOM_REF);
|
||||
//return parentsHetHet_childRef;
|
||||
}
|
||||
|
||||
//Count of var alleles inherited from Het/Het parents (no violation)
|
||||
public int getParentsHetHetInheritedVar(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||
+ 2*inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR);
|
||||
return inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HET)
|
||||
+ 2*inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HOM_VAR);
|
||||
//return parentsHetHet_childVar;
|
||||
}
|
||||
|
||||
//Count of ref alleles inherited from HomRef/Het parents (no violation)
|
||||
public int getParentsRefHetInheritedRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HET).get(GenotypeType.HOM_REF)
|
||||
+ inheritance.get(GenotypeType.HET).get(GenotypeType.HOM_REF).get(GenotypeType.HOM_REF);
|
||||
//return parentsHomRefHet_childRef;
|
||||
}
|
||||
|
||||
//Count of var alleles inherited from HomRef/Het parents (no violation)
|
||||
public int getParentsRefHetInheritedVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HET).get(GenotypeType.HET)
|
||||
+ inheritance.get(GenotypeType.HET).get(GenotypeType.HOM_REF).get(GenotypeType.HET);
|
||||
//return parentsHomRefHet_childVar;
|
||||
}
|
||||
|
||||
//Count of ref alleles inherited from HomVar/Het parents (no violation)
|
||||
public int getParentsVarHetInheritedRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET);
|
||||
return inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HET).get(GenotypeType.HET)
|
||||
+ inheritance.get(GenotypeType.HET).get(GenotypeType.HOM_VAR).get(GenotypeType.HET);
|
||||
//return parentsHomVarHet_childRef;
|
||||
}
|
||||
|
||||
//Count of var alleles inherited from HomVar/Het parents (no violation)
|
||||
public int getParentsVarHetInheritedVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR);
|
||||
return inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HET).get(GenotypeType.HOM_VAR)
|
||||
+ inheritance.get(GenotypeType.HET).get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_VAR);
|
||||
//return parentsHomVarHet_childVar;
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HOM_REF -> HOM_VAR
|
||||
public int getParentsRefRefChildVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HOM_REF).get(GenotypeType.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HOM_REF -> HET
|
||||
public int getParentsRefRefChildHet(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HOM_REF).get(GenotypeType.HET);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HET -> HOM_VAR
|
||||
public int getParentsRefHetChildVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HET).get(GenotypeType.HOM_VAR)
|
||||
+ inheritance.get(GenotypeType.HET).get(GenotypeType.HOM_REF).get(GenotypeType.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HOM_VAR -> HOM_VAR
|
||||
public int getParentsRefVarChildVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR)
|
||||
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_VAR)
|
||||
+ inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_REF).get(GenotypeType.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HOM_VAR -> HOM_REF
|
||||
public int getParentsRefVarChildRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF)
|
||||
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_REF)
|
||||
+ inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_REF).get(GenotypeType.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_VAR/HET -> HOM_REF
|
||||
public int getParentsVarHetChildRef(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF)
|
||||
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
|
||||
return inheritance.get(GenotypeType.HET).get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_REF)
|
||||
+ inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HET).get(GenotypeType.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_VAR/HOM_VAR -> HOM_REF
|
||||
public int getParentsVarVarChildRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF);
|
||||
return inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_VAR/HOM_VAR -> HET
|
||||
public int getParentsVarVarChildHet(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET);
|
||||
return inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_VAR).get(GenotypeType.HET);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -362,12 +363,12 @@ public class MendelianViolation {
|
|||
|
||||
private void createInheritanceMap(){
|
||||
|
||||
inheritance = new EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>>(Genotype.Type.class);
|
||||
for(Genotype.Type mType : Genotype.Type.values()){
|
||||
inheritance.put(mType, new EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>(Genotype.Type.class));
|
||||
for(Genotype.Type dType : Genotype.Type.values()){
|
||||
inheritance.get(mType).put(dType, new EnumMap<Genotype.Type,Integer>(Genotype.Type.class));
|
||||
for(Genotype.Type cType : Genotype.Type.values()){
|
||||
inheritance = new EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>>(GenotypeType.class);
|
||||
for(GenotypeType mType : GenotypeType.values()){
|
||||
inheritance.put(mType, new EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>(GenotypeType.class));
|
||||
for(GenotypeType dType : GenotypeType.values()){
|
||||
inheritance.get(mType).put(dType, new EnumMap<GenotypeType,Integer>(GenotypeType.class));
|
||||
for(GenotypeType cType : GenotypeType.values()){
|
||||
inheritance.get(mType).get(dType).put(cType, 0);
|
||||
}
|
||||
}
|
||||
|
|
@ -376,9 +377,9 @@ public class MendelianViolation {
|
|||
}
|
||||
|
||||
private void clearInheritanceMap(){
|
||||
for(Genotype.Type mType : Genotype.Type.values()){
|
||||
for(Genotype.Type dType : Genotype.Type.values()){
|
||||
for(Genotype.Type cType : Genotype.Type.values()){
|
||||
for(GenotypeType mType : GenotypeType.values()){
|
||||
for(GenotypeType dType : GenotypeType.values()){
|
||||
for(GenotypeType cType : GenotypeType.values()){
|
||||
inheritance.get(mType).get(dType).put(cType, 0);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -225,9 +225,9 @@ public class SequenceDictionaryUtils {
|
|||
return false;
|
||||
|
||||
// todo -- reenable if we want to be really strict here
|
||||
// if (me.getAttribute(SAMSequenceRecord.MD5_TAG) != null && that.getAttribute(SAMSequenceRecord.MD5_TAG) != null) {
|
||||
// final BigInteger thisMd5 = new BigInteger((String)me.getAttribute(SAMSequenceRecord.MD5_TAG), 16);
|
||||
// final BigInteger thatMd5 = new BigInteger((String)that.getAttribute(SAMSequenceRecord.MD5_TAG), 16);
|
||||
// if (me.getExtendedAttribute(SAMSequenceRecord.MD5_TAG) != null && that.getExtendedAttribute(SAMSequenceRecord.MD5_TAG) != null) {
|
||||
// final BigInteger thisMd5 = new BigInteger((String)me.getExtendedAttribute(SAMSequenceRecord.MD5_TAG), 16);
|
||||
// final BigInteger thatMd5 = new BigInteger((String)that.getExtendedAttribute(SAMSequenceRecord.MD5_TAG), 16);
|
||||
// if (!thisMd5.equals(thatMd5)) {
|
||||
// return false;
|
||||
// }
|
||||
|
|
|
|||
|
|
@ -223,6 +223,20 @@ public class Utils {
|
|||
return ret.toString();
|
||||
}
|
||||
|
||||
public static String join(String separator, int[] ints) {
|
||||
if ( ints == null || ints.length == 0)
|
||||
return "";
|
||||
else {
|
||||
StringBuilder ret = new StringBuilder();
|
||||
ret.append(ints[0]);
|
||||
for (int i = 1; i < ints.length; ++i) {
|
||||
ret.append(separator);
|
||||
ret.append(ints[i]);
|
||||
}
|
||||
return ret.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a string of the form elt1.toString() [sep elt2.toString() ... sep elt.toString()] for a collection of
|
||||
* elti objects (note there's no actual space between sep and the elti elements). Returns
|
||||
|
|
|
|||
|
|
@ -24,6 +24,8 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
|
|
@ -33,9 +35,7 @@ import org.broad.tribble.readers.AsciiLineReader;
|
|||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
|
|
@ -45,15 +45,45 @@ import java.io.FileNotFoundException;
|
|||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDependentFeatureCodec {
|
||||
/**
|
||||
* Decode BCF2 files
|
||||
*/
|
||||
public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDependentFeatureCodec {
|
||||
final protected static Logger logger = Logger.getLogger(BCF2Codec.class);
|
||||
private VCFHeader header = null;
|
||||
|
||||
/**
|
||||
* Maps offsets (encoded in BCF) into contig names (from header) for the CHROM field
|
||||
*/
|
||||
private final ArrayList<String> contigNames = new ArrayList<String>();
|
||||
|
||||
/**
|
||||
* Maps header string names (encoded in VCF) into strings found in the BCF header
|
||||
*
|
||||
* Initialized when processing the header
|
||||
*/
|
||||
private ArrayList<String> dictionary;
|
||||
|
||||
/**
|
||||
* Our decoder that reads low-level objects from the BCF2 records
|
||||
*/
|
||||
private final BCF2Decoder decoder = new BCF2Decoder();
|
||||
private boolean skipGenotypes = false;
|
||||
|
||||
/**
|
||||
* Provides some sanity checking on the header
|
||||
*/
|
||||
private final static int MAX_HEADER_SIZE = 0x08000000;
|
||||
|
||||
/**
|
||||
* Genotype field decoders that are initialized when the header is read
|
||||
*/
|
||||
private BCF2GenotypeFieldDecoders gtFieldDecoders = null;
|
||||
|
||||
// for error handling
|
||||
private int recordNo = 0;
|
||||
private int pos = 0;
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Feature codec interface functions
|
||||
|
|
@ -62,28 +92,30 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
|||
|
||||
@Override
|
||||
public Feature decodeLoc( final PositionalBufferedStream inputStream ) {
|
||||
return decode(inputStream);
|
||||
// TODO: a less expensive version of decodeLoc() that doesn't use VariantContext
|
||||
// TODO: very easy -- just decodeSitesBlock, and then skip to end of end of sites block
|
||||
// TODO: and then skip genotypes block
|
||||
recordNo++;
|
||||
final VariantContextBuilder builder = new VariantContextBuilder();
|
||||
|
||||
final int sitesBlockSize = decoder.readBlockSize(inputStream);
|
||||
final int genotypeBlockSize = decoder.readBlockSize(inputStream); // necessary because it's in the stream
|
||||
decoder.readNextBlock(sitesBlockSize, inputStream);
|
||||
decodeSiteLoc(builder);
|
||||
|
||||
return builder.fullyDecoded(true).make();
|
||||
}
|
||||
|
||||
@Override
|
||||
public VariantContext decode( final PositionalBufferedStream inputStream ) {
|
||||
recordNo++;
|
||||
final VariantContextBuilder builder = new VariantContextBuilder();
|
||||
|
||||
final int sitesBlockSize = decoder.readBlockSize(inputStream);
|
||||
final int genotypeBlockSize = decoder.readBlockSize(inputStream);
|
||||
decoder.readNextBlock(sitesBlockSize, inputStream);
|
||||
final SitesInfoForDecoding info = decodeSitesBlock(builder);
|
||||
|
||||
if ( isSkippingGenotypes() ) {
|
||||
decoder.skipNextBlock(genotypeBlockSize, inputStream);
|
||||
} else {
|
||||
decoder.readNextBlock(genotypeBlockSize, inputStream);
|
||||
decodeGenotypes(info, builder);
|
||||
}
|
||||
decodeSiteLoc(builder);
|
||||
final SitesInfoForDecoding info = decodeSitesExtendedInfo(builder);
|
||||
|
||||
decoder.readNextBlock(genotypeBlockSize, inputStream);
|
||||
createLazyGenotypesDecoder(info, builder);
|
||||
return builder.fullyDecoded(true).make();
|
||||
}
|
||||
|
||||
|
|
@ -97,16 +129,16 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
|||
try {
|
||||
// note that this reads the magic as well, and so does double duty
|
||||
if ( ! BCF2Utils.startsWithBCF2Magic(inputStream) )
|
||||
throw new UserException.MalformedBCF2("Input stream does not begin with BCF2 magic");
|
||||
error("Input stream does not begin with BCF2 magic");
|
||||
|
||||
final int headerSizeInBytes = BCF2Utils.readInt(BCF2Type.INT32.getSizeInBytes(), inputStream);
|
||||
|
||||
if ( headerSizeInBytes <= 0 || headerSizeInBytes > MAX_HEADER_SIZE) // no bigger than 8 MB
|
||||
throw new UserException.MalformedBCF2("BCF2 header has invalid length: " + headerSizeInBytes + " must be >= 0 and < "+ MAX_HEADER_SIZE);
|
||||
error("BCF2 header has invalid length: " + headerSizeInBytes + " must be >= 0 and < "+ MAX_HEADER_SIZE);
|
||||
|
||||
final byte[] headerBytes = new byte[headerSizeInBytes];
|
||||
if ( inputStream.read(headerBytes) != headerSizeInBytes )
|
||||
throw new UserException.MalformedBCF2("Couldn't read all of the bytes specified in the header length = " + headerSizeInBytes);
|
||||
error("Couldn't read all of the bytes specified in the header length = " + headerSizeInBytes);
|
||||
|
||||
final PositionalBufferedStream bps = new PositionalBufferedStream(new ByteArrayInputStream(headerBytes));
|
||||
final AsciiLineReader headerReader = new AsciiLineReader(bps);
|
||||
|
|
@ -118,12 +150,24 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
|||
}
|
||||
|
||||
// create the config offsets
|
||||
for ( final VCFContigHeaderLine contig : header.getContigLines())
|
||||
contigNames.add(contig.getID());
|
||||
if ( ! header.getContigLines().isEmpty() ) {
|
||||
logger.info("Found contig lines in BCF2 file, using those");
|
||||
contigNames.clear();
|
||||
for ( final VCFContigHeaderLine contig : header.getContigLines()) {
|
||||
if ( contig.getID() == null || contig.getID().equals("") )
|
||||
error("found a contig with an invalid ID " + contig);
|
||||
contigNames.add(contig.getID());
|
||||
}
|
||||
} else {
|
||||
logger.info("Didn't find any contig lines in BCF2 file, falling back (dangerously) to GATK reference dictionary");
|
||||
}
|
||||
|
||||
// create the string dictionary
|
||||
dictionary = parseDictionary(header);
|
||||
|
||||
// prepare the genotype field decoders
|
||||
gtFieldDecoders = new BCF2GenotypeFieldDecoders(header);
|
||||
|
||||
// position right before next line (would be right before first real record byte at end of header)
|
||||
return new FeatureCodecHeader(header, inputStream.getPosition());
|
||||
}
|
||||
|
|
@ -153,7 +197,6 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
|||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
|
||||
@Override
|
||||
public void setGenomeLocParser(final GenomeLocParser genomeLocParser) {
|
||||
// initialize contigNames to standard ones in reference
|
||||
|
|
@ -161,14 +204,6 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
|||
contigNames.add(contig.getSequenceName());
|
||||
}
|
||||
|
||||
public boolean isSkippingGenotypes() {
|
||||
return skipGenotypes;
|
||||
}
|
||||
|
||||
public void setSkipGenotypes(final boolean skipGenotypes) {
|
||||
this.skipGenotypes = skipGenotypes;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// implicit block
|
||||
|
|
@ -182,50 +217,83 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
|||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private final SitesInfoForDecoding decodeSitesBlock(final VariantContextBuilder builder) {
|
||||
final int contigOffset = decoder.decodeInt(BCF2Type.INT32.getSizeInBytes());
|
||||
/**
|
||||
* Decode the sites level data from this classes decoder
|
||||
*
|
||||
* @param builder
|
||||
* @return
|
||||
*/
|
||||
@Requires({"builder != null"})
|
||||
private final void decodeSiteLoc(final VariantContextBuilder builder) {
|
||||
final int contigOffset = decoder.decodeInt(BCF2Type.INT32);
|
||||
final String contig = lookupContigName(contigOffset);
|
||||
builder.chr(contig);
|
||||
|
||||
final int pos = decoder.decodeInt(BCF2Type.INT32.getSizeInBytes());
|
||||
final int refLength = decoder.decodeInt(BCF2Type.INT32.getSizeInBytes());
|
||||
this.pos = decoder.decodeInt(BCF2Type.INT32);
|
||||
final int refLength = decoder.decodeInt(BCF2Type.INT32);
|
||||
builder.start((long)pos);
|
||||
builder.stop((long)(pos + refLength - 1)); // minus one because of our open intervals
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode the sites level data from this classes decoder
|
||||
*
|
||||
* @param builder
|
||||
* @return
|
||||
*/
|
||||
@Requires({"builder != null", "decoder != null"})
|
||||
@Ensures({"result != null", "result.isValid()"})
|
||||
private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextBuilder builder) {
|
||||
final Object qual = decoder.decodeSingleValue(BCF2Type.FLOAT);
|
||||
if ( qual != null ) {
|
||||
builder.log10PError(((Double)qual) / -10.0);
|
||||
}
|
||||
|
||||
final int nAlleleInfo = decoder.decodeInt(BCF2Type.INT32.getSizeInBytes());
|
||||
final int nFormatSamples = decoder.decodeInt(BCF2Type.INT32.getSizeInBytes());
|
||||
final int nAlleleInfo = decoder.decodeInt(BCF2Type.INT32);
|
||||
final int nFormatSamples = decoder.decodeInt(BCF2Type.INT32);
|
||||
final int nAlleles = nAlleleInfo >> 16;
|
||||
final int nInfo = nAlleleInfo & 0x00FF;
|
||||
final int nFormatFields = nFormatSamples >> 24;
|
||||
final int nSamples = nFormatSamples & 0x0FFF;
|
||||
final int nInfo = nAlleleInfo & 0x0000FFFF;
|
||||
final int nFormatFields = nFormatSamples >> 24;
|
||||
final int nSamples = nFormatSamples & 0x00FFFFF;
|
||||
|
||||
decodeID(builder);
|
||||
final ArrayList<Allele> alleles = decodeAlleles(builder, pos, nAlleles);
|
||||
decodeFilter(builder);
|
||||
decodeInfo(builder, nInfo);
|
||||
|
||||
return new SitesInfoForDecoding(pos, nFormatFields, nSamples, alleles);
|
||||
final SitesInfoForDecoding info = new SitesInfoForDecoding(nFormatFields, nSamples, alleles);
|
||||
if ( ! info.isValid() )
|
||||
error("Sites info is malformed: " + info);
|
||||
return info;
|
||||
}
|
||||
|
||||
private final static class SitesInfoForDecoding {
|
||||
final int pos;
|
||||
protected final static class SitesInfoForDecoding {
|
||||
final int nFormatFields;
|
||||
final int nSamples;
|
||||
final ArrayList<Allele> alleles;
|
||||
|
||||
private SitesInfoForDecoding(final int pos, final int nFormatFields, final int nSamples, final ArrayList<Allele> alleles) {
|
||||
this.pos = pos;
|
||||
private SitesInfoForDecoding(final int nFormatFields, final int nSamples, final ArrayList<Allele> alleles) {
|
||||
this.nFormatFields = nFormatFields;
|
||||
this.nSamples = nSamples;
|
||||
this.alleles = alleles;
|
||||
}
|
||||
|
||||
public boolean isValid() {
|
||||
return nFormatFields >= 0 &&
|
||||
nSamples >= 0 &&
|
||||
alleles != null && ! alleles.isEmpty() && alleles.get(0).isReference();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("nFormatFields = %d, nSamples = %d, alleles = %s", nFormatFields, nSamples, alleles);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode the id field in this BCF2 file and store it in the builder
|
||||
* @param builder
|
||||
*/
|
||||
private void decodeID( final VariantContextBuilder builder ) {
|
||||
final String id = (String)decoder.decodeTypedValue();
|
||||
|
||||
|
|
@ -235,6 +303,15 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
|||
builder.id(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Annoying routine that deals with allele clipping from the BCF2 encoding to the standard
|
||||
* GATK encoding.
|
||||
*
|
||||
* @param position
|
||||
* @param ref
|
||||
* @param unclippedAlleles
|
||||
* @return
|
||||
*/
|
||||
protected static ArrayList<Allele> clipAllelesIfNecessary(int position, String ref, ArrayList<Allele> unclippedAlleles) {
|
||||
if ( ! AbstractVCFCodec.isSingleNucleotideEvent(unclippedAlleles) ) {
|
||||
ArrayList<Allele> clippedAlleles = new ArrayList<Allele>(unclippedAlleles.size());
|
||||
|
|
@ -244,6 +321,14 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
|||
return unclippedAlleles;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode the alleles from this BCF2 file and put the results in builder
|
||||
* @param builder
|
||||
* @param pos
|
||||
* @param nAlleles
|
||||
* @return the alleles
|
||||
*/
|
||||
@Requires("nAlleles > 0")
|
||||
private ArrayList<Allele> decodeAlleles( final VariantContextBuilder builder, final int pos, final int nAlleles ) {
|
||||
// TODO -- probably need inline decoder for efficiency here (no sense in going bytes -> string -> vector -> bytes
|
||||
ArrayList<Allele> alleles = new ArrayList<Allele>(nAlleles);
|
||||
|
|
@ -259,15 +344,21 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
|||
alleles.add(Allele.create(allele, false));
|
||||
}
|
||||
}
|
||||
assert ref != null;
|
||||
|
||||
alleles = clipAllelesIfNecessary(pos, ref, alleles);
|
||||
builder.alleles(alleles);
|
||||
|
||||
assert ref.length() > 0;
|
||||
builder.referenceBaseForIndel(ref.getBytes()[0]);
|
||||
|
||||
return alleles;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode the filter field of this BCF2 file and store the result in the builder
|
||||
* @param builder
|
||||
*/
|
||||
private void decodeFilter( final VariantContextBuilder builder ) {
|
||||
final Object value = decoder.decodeTypedValue();
|
||||
|
||||
|
|
@ -275,17 +366,28 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
|||
builder.unfiltered();
|
||||
else {
|
||||
if ( value instanceof Integer )
|
||||
// fast path for single integer result
|
||||
builder.filter(getDictionaryString((Integer)value));
|
||||
else {
|
||||
for ( int offset : (List<Integer>)value )
|
||||
for ( final int offset : (List<Integer>)value )
|
||||
builder.filter(getDictionaryString(offset));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Loop over the info field key / value pairs in this BCF2 file and decode them into the builder
|
||||
*
|
||||
* @param builder
|
||||
* @param numInfoFields
|
||||
*/
|
||||
@Requires("numInfoFields >= 0")
|
||||
private void decodeInfo( final VariantContextBuilder builder, final int numInfoFields ) {
|
||||
final Map<String, Object> infoFieldEntries = new HashMap<String, Object>(numInfoFields);
|
||||
if ( numInfoFields == 0 )
|
||||
// fast path, don't bother doing any work if there are no fields
|
||||
return;
|
||||
|
||||
final Map<String, Object> infoFieldEntries = new HashMap<String, Object>(numInfoFields);
|
||||
for ( int i = 0; i < numInfoFields; i++ ) {
|
||||
final String key = getDictionaryString();
|
||||
Object value = decoder.decodeTypedValue();
|
||||
|
|
@ -297,143 +399,98 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
|||
builder.attributes(infoFieldEntries);
|
||||
}
|
||||
|
||||
private void decodeGenotypes( final SitesInfoForDecoding siteInfo, final VariantContextBuilder builder ) {
|
||||
final List<String> samples = new ArrayList<String>(header.getGenotypeSamples());
|
||||
final int nSamples = siteInfo.nSamples;
|
||||
final int nFields = siteInfo.nFormatFields;
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Decoding Genotypes
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
if ( samples.size() != nSamples )
|
||||
throw new UserException.MalformedBCF2("GATK currently doesn't support reading BCF2 files with " +
|
||||
"different numbers of samples per record. Saw " + samples.size() +
|
||||
" samples in header but have a record with " + nSamples + " samples");
|
||||
/**
|
||||
* Create the lazy loader for the genotypes data, and store it in the builder
|
||||
* so that the VC will be able to decode on demand the genotypes data
|
||||
*
|
||||
* @param siteInfo
|
||||
* @param builder
|
||||
*/
|
||||
private void createLazyGenotypesDecoder( final SitesInfoForDecoding siteInfo,
|
||||
final VariantContextBuilder builder ) {
|
||||
if (siteInfo.nSamples > 0) {
|
||||
final LazyGenotypesContext.LazyParser lazyParser =
|
||||
new BCF2LazyGenotypesDecoder(this, siteInfo.alleles, siteInfo.nSamples, siteInfo.nFormatFields);
|
||||
final int nGenotypes = header.getGenotypeSamples().size();
|
||||
LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser,
|
||||
new LazyData(siteInfo.nFormatFields, decoder.getRecordBytes()),
|
||||
nGenotypes);
|
||||
|
||||
final Map<String, List<Object>> fieldValues = decodeGenotypeFieldValues(nFields, nSamples);
|
||||
final List<Genotype> genotypes = new ArrayList<Genotype>(nSamples);
|
||||
for ( int i = 0; i < nSamples; i++ ) {
|
||||
// all of the information we need for each genotype, with default values
|
||||
final String sampleName = samples.get(i);
|
||||
List<Allele> alleles = null;
|
||||
boolean isPhased = false;
|
||||
double log10PError = VariantContext.NO_LOG10_PERROR;
|
||||
Set<String> filters = null;
|
||||
Map<String, Object> attributes = null;
|
||||
double[] log10Likelihoods = null;
|
||||
// did we resort the sample names? If so, we need to load the genotype data
|
||||
if ( !header.samplesWereAlreadySorted() )
|
||||
lazy.decode();
|
||||
|
||||
for ( final Map.Entry<String, List<Object>> entry : fieldValues.entrySet() ) {
|
||||
final String field = entry.getKey();
|
||||
Object value = entry.getValue().get(i);
|
||||
try {
|
||||
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
alleles = decodeGenotypeAlleles(siteInfo.alleles, (List<Integer>)value);
|
||||
} else if ( field.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
|
||||
if ( value != BCF2Type.INT8.getMissingJavaValue() )
|
||||
log10PError = ((Integer)value) / -10.0;
|
||||
} else if ( field.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) ) {
|
||||
final List<Integer> pls = (List<Integer>)value;
|
||||
if ( pls != null ) { // we have a PL field
|
||||
log10Likelihoods = new double[pls.size()];
|
||||
for ( int j = 0; j < log10Likelihoods.length; j++ ) {
|
||||
final double d = pls.get(j);
|
||||
log10Likelihoods[j] = d == -0.0 ? 0.0 : d / -10.0;
|
||||
}
|
||||
}
|
||||
} else if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
|
||||
throw new ReviewedStingException("Genotype filters not implemented in GATK BCF2");
|
||||
//filters = new HashSet<String>(values.get(i));
|
||||
} else { // add to attributes
|
||||
if ( value != null ) { // don't add missing values
|
||||
if ( attributes == null ) attributes = new HashMap<String, Object>(nFields);
|
||||
if ( value instanceof List && ((List)value).size() == 1)
|
||||
value = ((List)value).get(0);
|
||||
attributes.put(field, value);
|
||||
}
|
||||
}
|
||||
} catch ( ClassCastException e ) {
|
||||
throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field
|
||||
+ " inconsistent with the value observed in the decoded value in the "
|
||||
+ " BCF file. Value was " + value);
|
||||
}
|
||||
}
|
||||
|
||||
if ( alleles == null ) throw new UserException.MalformedBCF2("BUG: no alleles found");
|
||||
|
||||
final Genotype g = new Genotype(sampleName, alleles, log10PError, filters, attributes, isPhased, log10Likelihoods);
|
||||
genotypes.add(g);
|
||||
}
|
||||
|
||||
builder.genotypes(genotypes);
|
||||
}
|
||||
|
||||
private final List<Allele> decodeGenotypeAlleles(final ArrayList<Allele> siteAlleles, final List<Integer> encoded) {
|
||||
if ( encoded == null )
|
||||
// no called sample GT = .
|
||||
return Collections.emptyList();
|
||||
else {
|
||||
// we have at least some alleles to decode
|
||||
final List<Allele> gt = new ArrayList<Allele>(encoded.size());
|
||||
for ( final Integer encode : encoded ) {
|
||||
if ( encode == null ) // absent, as are all following by definition
|
||||
return gt;
|
||||
else {
|
||||
final int offset = encode >> 1;
|
||||
if ( offset == 0 )
|
||||
gt.add(Allele.NO_CALL);
|
||||
else
|
||||
gt.add(siteAlleles.get(offset - 1));
|
||||
}
|
||||
}
|
||||
|
||||
return gt;
|
||||
builder.genotypesNoValidation(lazy);
|
||||
}
|
||||
}
|
||||
|
||||
private final Map<String, List<Object>> decodeGenotypeFieldValues(final int nFields, final int nSamples) {
|
||||
assert (nFields > 0 && nSamples > 0) || (nFields == 0 && nSamples == 0);
|
||||
public static class LazyData {
|
||||
final public int nGenotypeFields;
|
||||
final public byte[] bytes;
|
||||
|
||||
if ( nFields == 0 ) // fast path exit for sites only file
|
||||
return Collections.emptyMap();
|
||||
else {
|
||||
final Map<String, List<Object>> map = new LinkedHashMap<String, List<Object>>(nFields);
|
||||
|
||||
for ( int i = 0; i < nFields; i++ ) {
|
||||
final String field = getDictionaryString();
|
||||
final byte typeDescriptor = decoder.readTypeDescriptor();
|
||||
final List<Object> values = new ArrayList<Object>(nSamples);
|
||||
for ( int j = 0; j < nSamples; j++ )
|
||||
values.add(decoder.decodeTypedValue(typeDescriptor));
|
||||
map.put(field, values);
|
||||
}
|
||||
|
||||
return map;
|
||||
@Requires({"nGenotypeFields > 0", "bytes != null"})
|
||||
public LazyData(final int nGenotypeFields, final byte[] bytes) {
|
||||
this.nGenotypeFields = nGenotypeFields;
|
||||
this.bytes = bytes;
|
||||
}
|
||||
}
|
||||
|
||||
@Ensures("result != null")
|
||||
private final String getDictionaryString() {
|
||||
return getDictionaryString((Integer) decoder.decodeTypedValue());
|
||||
}
|
||||
|
||||
private final String getDictionaryString(final int offset) {
|
||||
if ( offset >= dictionary.size() ) throw new UserException.MalformedBCF2("BUG: no dictionary field found at offset " + offset);
|
||||
final String field = dictionary.get(offset);
|
||||
return field;
|
||||
@Requires("offset < dictionary.size()")
|
||||
@Ensures("result != null")
|
||||
protected final String getDictionaryString(final int offset) {
|
||||
return dictionary.get(offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Translate the config offset as encoded in the BCF file into the actual string
|
||||
* name of the contig from the dictionary
|
||||
*
|
||||
* @param contigOffset
|
||||
* @return
|
||||
*/
|
||||
@Requires({"contigOffset >= 0", "contigOffset < contigNames.size()"})
|
||||
@Ensures("result != null")
|
||||
private final String lookupContigName( final int contigOffset ) {
|
||||
if ( contigOffset < contigNames.size() ) {
|
||||
return contigNames.get(contigOffset);
|
||||
}
|
||||
else {
|
||||
throw new UserException.MalformedBCF2(String.format("No contig at index %d present in the sequence dictionary from the BCF2 header (%s)", contigOffset, contigNames));
|
||||
}
|
||||
return contigNames.get(contigOffset);
|
||||
}
|
||||
|
||||
@Requires("header != null")
|
||||
@Ensures({"result != null", "! result.isEmpty()"})
|
||||
private final ArrayList<String> parseDictionary(final VCFHeader header) {
|
||||
final ArrayList<String> dict = BCF2Utils.makeDictionary(header);
|
||||
|
||||
// if we got here we never found a dictionary, or there are no elements in the dictionary
|
||||
if ( dict.size() == 0 )
|
||||
throw new UserException.MalformedBCF2("Dictionary header element was absent or empty");
|
||||
if ( dict.isEmpty() )
|
||||
error("Dictionary header element was absent or empty");
|
||||
|
||||
return dict;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the VCFHeader we found in this BCF2 file
|
||||
*/
|
||||
protected VCFHeader getHeader() {
|
||||
return header;
|
||||
}
|
||||
|
||||
@Requires("field != null")
|
||||
@Ensures("result != null")
|
||||
protected BCF2GenotypeFieldDecoders.Decoder getGenotypeFieldDecoder(final String field) {
|
||||
return gtFieldDecoders.getDecoder(field);
|
||||
}
|
||||
|
||||
private final void error(final String message) throws RuntimeException {
|
||||
throw new UserException.MalformedBCF2(String.format("At record %d with position %d:", recordNo, pos, message));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,8 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -33,12 +35,13 @@ import java.io.ByteArrayInputStream;
|
|||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
||||
public class BCF2Decoder {
|
||||
public final class BCF2Decoder {
|
||||
final protected static Logger logger = Logger.getLogger(FeatureCodec.class);
|
||||
|
||||
byte[] recordBytes;
|
||||
ByteArrayInputStream recordStream;
|
||||
byte[] recordBytes = null;
|
||||
ByteArrayInputStream recordStream = null;
|
||||
|
||||
public BCF2Decoder() {
|
||||
// nothing to do
|
||||
|
|
@ -66,6 +69,7 @@ public class BCF2Decoder {
|
|||
* @return
|
||||
*/
|
||||
public void readNextBlock(final int blockSizeInBytes, final InputStream stream) {
|
||||
if ( blockSizeInBytes < 0 ) throw new UserException.MalformedBCF2("Invalid block size " + blockSizeInBytes);
|
||||
setRecordBytes(readRecordBytes(blockSizeInBytes, stream));
|
||||
}
|
||||
|
||||
|
|
@ -112,9 +116,9 @@ public class BCF2Decoder {
|
|||
*
|
||||
* @param recordBytes
|
||||
*/
|
||||
@Requires("recordBytes != null")
|
||||
@Ensures({"this.recordBytes == recordBytes", "recordStream != null"})
|
||||
public void setRecordBytes(final byte[] recordBytes) {
|
||||
assert recordBytes != null;
|
||||
|
||||
this.recordBytes = recordBytes;
|
||||
this.recordStream = new ByteArrayInputStream(recordBytes);
|
||||
}
|
||||
|
|
@ -131,7 +135,7 @@ public class BCF2Decoder {
|
|||
}
|
||||
|
||||
public final Object decodeTypedValue(final byte typeDescriptor) {
|
||||
final int size = BCF2Utils.sizeIsOverflow(typeDescriptor) ? decodeVectorSize() : BCF2Utils.decodeSize(typeDescriptor);
|
||||
final int size = decodeNumberOfElements(typeDescriptor);
|
||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||
|
||||
assert size >= 0;
|
||||
|
|
@ -155,7 +159,7 @@ public class BCF2Decoder {
|
|||
|
||||
public final Object decodeSingleValue(final BCF2Type type) {
|
||||
// TODO -- decodeTypedValue should integrate this routine
|
||||
final int value = BCF2Utils.readInt(type.getSizeInBytes(), recordStream);
|
||||
final int value = decodeInt(type);
|
||||
|
||||
if ( value == type.getMissingBytes() )
|
||||
return null;
|
||||
|
|
@ -184,26 +188,107 @@ public class BCF2Decoder {
|
|||
final byte[] bytes = new byte[size]; // TODO -- in principle should just grab bytes from underlying array
|
||||
try {
|
||||
recordStream.read(bytes);
|
||||
final String s = new String(bytes);
|
||||
return BCF2Utils.isCollapsedString(s) ? BCF2Utils.exploreStringList(s) : s;
|
||||
|
||||
int goodLength = 0;
|
||||
for ( ; goodLength < bytes.length ; goodLength++ )
|
||||
if ( bytes[goodLength] == 0 ) break;
|
||||
|
||||
if ( goodLength == 0 )
|
||||
return null;
|
||||
else {
|
||||
final String s = new String(bytes, 0, goodLength);
|
||||
return BCF2Utils.isCollapsedString(s) ? BCF2Utils.exploreStringList(s) : s;
|
||||
}
|
||||
} catch ( IOException e ) {
|
||||
throw new ReviewedStingException("readByte failure", e);
|
||||
}
|
||||
}
|
||||
|
||||
private final int decodeVectorSize() {
|
||||
final byte typeDescriptor = readTypeDescriptor();
|
||||
final int size = BCF2Utils.decodeSize(typeDescriptor);
|
||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||
|
||||
assert size == 1;
|
||||
assert type == BCF2Type.INT8 || type == BCF2Type.INT16 || type == BCF2Type.INT32;
|
||||
|
||||
return decodeInt(type.getSizeInBytes());
|
||||
@Ensures("result >= 0")
|
||||
public final int decodeNumberOfElements(final byte typeDescriptor) {
|
||||
if ( BCF2Utils.sizeIsOverflow(typeDescriptor) )
|
||||
// -1 ensures we explode immediately with a bad size if the result is missing
|
||||
return decodeInt(readTypeDescriptor(), -1);
|
||||
else
|
||||
// the size is inline, so just decode it
|
||||
return BCF2Utils.decodeSize(typeDescriptor);
|
||||
}
|
||||
|
||||
public final int decodeInt(int bytesForEachInt) {
|
||||
return BCF2Utils.readInt(bytesForEachInt, recordStream);
|
||||
/**
|
||||
* Decode an int from the stream. If the value in the stream is missing,
|
||||
* returns missingValue. Requires the typeDescriptor indicate an inline
|
||||
* single element event
|
||||
*
|
||||
* @param typeDescriptor
|
||||
* @return
|
||||
*/
|
||||
@Requires("BCF2Utils.decodeSize(typeDescriptor) == 1")
|
||||
public final int decodeInt(final byte typeDescriptor, final int missingValue) {
|
||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||
final int i = decodeInt(type);
|
||||
return i == type.getMissingBytes() ? missingValue : i;
|
||||
}
|
||||
|
||||
@Requires("type != null")
|
||||
public final int decodeInt(final BCF2Type type) {
|
||||
return BCF2Utils.readInt(type.getSizeInBytes(), recordStream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Low-level reader for int[]
|
||||
*
|
||||
* Requires a typeDescriptor so the function knows how many elements to read,
|
||||
* and how they are encoded.
|
||||
*
|
||||
* If size == 0 => result is null
|
||||
* If size > 0 => result depends on the actual values in the stream
|
||||
* -- If the first element read is MISSING, result is null (all values are missing)
|
||||
* -- Else result = int[N] where N is the first N non-missing values decoded
|
||||
*
|
||||
* @param maybeDest if not null we'll not allocate space for the vector, but instead use
|
||||
* the externally allocated array of ints to store values. If the
|
||||
* size of this vector is < the actual size of the elements, we'll be
|
||||
* forced to use freshly allocated arrays. Also note that padded
|
||||
* int elements are still forced to do a fresh allocation as well.
|
||||
* @return see description
|
||||
*/
|
||||
@Requires({"BCF2Type.INTEGERS.contains(type)", "size >= 0", "type != null"})
|
||||
public final int[] decodeIntArray(final int size, final BCF2Type type, int[] maybeDest) {
|
||||
if ( size == 0 ) {
|
||||
return null;
|
||||
} else {
|
||||
if ( maybeDest != null && maybeDest.length < size )
|
||||
maybeDest = null; // by nulling this out we ensure that we do fresh allocations as maybeDest is too small
|
||||
|
||||
final int val1 = decodeInt(type);
|
||||
if ( val1 == type.getMissingBytes() ) {
|
||||
// fast path for first element being missing
|
||||
for ( int i = 1; i < size; i++ ) decodeInt(type);
|
||||
return null;
|
||||
} else {
|
||||
// we know we will have at least 1 element, so making the int[] is worth it
|
||||
final int[] ints = maybeDest == null ? new int[size] : maybeDest;
|
||||
ints[0] = val1; // we already read the first one
|
||||
for ( int i = 1; i < size; i++ ) {
|
||||
ints[i] = decodeInt(type);
|
||||
if ( ints[i] == type.getMissingBytes() ) {
|
||||
// read the rest of the missing values, dropping them
|
||||
for ( int j = i + 1; j < size; j++ ) decodeInt(type);
|
||||
// deal with auto-pruning by returning an int[] containing
|
||||
// only the non-MISSING values. We do this by copying the first
|
||||
// i elements, as i itself is missing
|
||||
return Arrays.copyOf(ints, i);
|
||||
}
|
||||
}
|
||||
return ints; // all of the elements were non-MISSING
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public final int[] decodeIntArray(final byte typeDescriptor) {
|
||||
final int size = decodeNumberOfElements(typeDescriptor);
|
||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||
return decodeIntArray(size, type, null);
|
||||
}
|
||||
|
||||
public final double rawFloatToFloat(final int rawFloat) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,282 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* An efficient scheme for building and obtaining specialized
|
||||
* genotype field decoders. Used by the BCFCodec to parse
|
||||
* with little overhead the fields from BCF2 encoded genotype
|
||||
* records
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 6/12
|
||||
*/
|
||||
public class BCF2GenotypeFieldDecoders {
|
||||
final protected static Logger logger = Logger.getLogger(BCF2GenotypeFieldDecoders.class);
|
||||
private final static boolean ENABLE_FASTPATH_GT = true;
|
||||
private final static int MIN_SAMPLES_FOR_FASTPATH_GENOTYPES = 0; // TODO -- update to reasonable number
|
||||
|
||||
// initialized once per writer to allow parallel writers to work
|
||||
private final HashMap<String, Decoder> genotypeFieldDecoder = new HashMap<String, Decoder>();
|
||||
private final Decoder defaultDecoder = new GenericDecoder();
|
||||
|
||||
public BCF2GenotypeFieldDecoders(final VCFHeader header) {
|
||||
// TODO -- fill in appropriate decoders for each FORMAT field in the header
|
||||
|
||||
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_KEY, new GTDecoder());
|
||||
// currently the generic decoder handles FILTER values properly, in so far as we don't tolerate multiple filter field values per genotype
|
||||
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_FILTER_KEY, new FTDecoder());
|
||||
genotypeFieldDecoder.put(VCFConstants.DEPTH_KEY, new DPDecoder());
|
||||
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_ALLELE_DEPTHS, new ADDecoder());
|
||||
genotypeFieldDecoder.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, new PLDecoder());
|
||||
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_QUALITY_KEY, new GQDecoder());
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
//
|
||||
// Genotype field decoder
|
||||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Return decoder appropriate for field, or the generic decoder if no
|
||||
* specialized one is bound
|
||||
* @param field the GT field to decode
|
||||
* @return a non-null decoder
|
||||
*/
|
||||
@Requires("field != null")
|
||||
@Ensures("result != null")
|
||||
public Decoder getDecoder(final String field) {
|
||||
final Decoder d = genotypeFieldDecoder.get(field);
|
||||
return d == null ? defaultDecoder : d;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decoder a field (implicit from creation) encoded as
|
||||
* typeDescriptor in the decoder object in the GenotypeBuilders
|
||||
* one for each sample in order.
|
||||
*
|
||||
* The way this works is that this decode method
|
||||
* iterates over the builders, decoding a genotype field
|
||||
* in BCF2 for each sample from decoder.
|
||||
*
|
||||
* This system allows us to easily use specialized
|
||||
* decoders for specific genotype field values. For example,
|
||||
* we use a special decoder to directly read the BCF2 data for
|
||||
* the PL field into a int[] rather than the generic List of Integer
|
||||
*/
|
||||
public interface Decoder {
|
||||
@Requires({"siteAlleles != null", "! siteAlleles.isEmpty()",
|
||||
"field != null", "decoder != null", "gbs != null", "! gbs.isEmpty()"})
|
||||
public void decode(final List<Allele> siteAlleles,
|
||||
final String field,
|
||||
final BCF2Decoder decoder,
|
||||
final byte typeDescriptor,
|
||||
final List<GenotypeBuilder> gbs);
|
||||
}
|
||||
|
||||
private class GTDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||
// we have to do a bit of low-level processing here as we want to know the size upfronta
|
||||
final int ploidy = decoder.decodeNumberOfElements(typeDescriptor);
|
||||
|
||||
if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && ploidy == 2 && gbs.size() >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES )
|
||||
fastBiallelicDiploidDecode(siteAlleles, decoder, typeDescriptor, gbs);
|
||||
else {
|
||||
generalDecode(siteAlleles, ploidy, decoder, typeDescriptor, gbs);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* fast path for many samples with diploid genotypes
|
||||
*
|
||||
* The way this would work is simple. Create a List<Allele> diploidGenotypes[] object
|
||||
* After decoding the offset, if that sample is diploid compute the
|
||||
* offset into the alleles vector which is simply offset = allele0 * nAlleles + allele1
|
||||
* if there's a value at diploidGenotypes[offset], use it, otherwise create the genotype
|
||||
* cache it and use that
|
||||
*
|
||||
* Some notes. If there are nAlleles at the site, there are implicitly actually
|
||||
* n + 1 options including
|
||||
*/
|
||||
@Requires("siteAlleles.size() == 2")
|
||||
@SuppressWarnings({"unchecked"})
|
||||
private final void fastBiallelicDiploidDecode(final List<Allele> siteAlleles,
|
||||
final BCF2Decoder decoder,
|
||||
final byte typeDescriptor,
|
||||
final List<GenotypeBuilder> gbs) {
|
||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||
|
||||
final int nPossibleGenotypes = 3 * 3;
|
||||
final Object allGenotypes[] = new Object[nPossibleGenotypes];
|
||||
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
final int a1 = decoder.decodeInt(type);
|
||||
final int a2 = decoder.decodeInt(type);
|
||||
|
||||
if ( a1 == type.getMissingBytes() ) {
|
||||
assert a2 == type.getMissingBytes();
|
||||
// no called sample GT = .
|
||||
gb.alleles(null);
|
||||
} else if ( a2 == type.getMissingBytes() ) {
|
||||
gb.alleles(Arrays.asList(getAlleleFromEncoded(siteAlleles, a1)));
|
||||
} else {
|
||||
// downshift to remove phase
|
||||
final int offset = (a1 >> 1) * 3 + (a2 >> 1);
|
||||
assert offset < allGenotypes.length;
|
||||
|
||||
// TODO -- how can I get rid of this cast?
|
||||
List<Allele> gt = (List<Allele>)allGenotypes[offset];
|
||||
if ( gt == null ) {
|
||||
final Allele allele1 = getAlleleFromEncoded(siteAlleles, a1);
|
||||
final Allele allele2 = getAlleleFromEncoded(siteAlleles, a2);
|
||||
gt = Arrays.asList(allele1, allele2);
|
||||
allGenotypes[offset] = gt;
|
||||
}
|
||||
|
||||
gb.alleles(gt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private final void generalDecode(final List<Allele> siteAlleles,
|
||||
final int ploidy,
|
||||
final BCF2Decoder decoder,
|
||||
final byte typeDescriptor,
|
||||
final List<GenotypeBuilder> gbs) {
|
||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||
|
||||
// a single cache for the encoded genotypes, since we don't actually need this vector
|
||||
final int[] tmp = new int[ploidy];
|
||||
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
final int[] encoded = decoder.decodeIntArray(ploidy, type, tmp);
|
||||
if ( encoded == null )
|
||||
// no called sample GT = .
|
||||
gb.alleles(null);
|
||||
else {
|
||||
assert encoded.length > 0;
|
||||
|
||||
// we have at least some alleles to decode
|
||||
final List<Allele> gt = new ArrayList<Allele>(encoded.length);
|
||||
|
||||
// note that the auto-pruning of fields magically handles different
|
||||
// ploidy per sample at a site
|
||||
for ( final int encode : encoded )
|
||||
gt.add(getAlleleFromEncoded(siteAlleles, encode));
|
||||
|
||||
gb.alleles(gt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Requires({"siteAlleles != null && ! siteAlleles.isEmpty()", "encode >= 0"})
|
||||
@Ensures("result != null")
|
||||
private final Allele getAlleleFromEncoded(final List<Allele> siteAlleles, final int encode) {
|
||||
final int offset = encode >> 1;
|
||||
return offset == 0 ? Allele.NO_CALL : siteAlleles.get(offset - 1);
|
||||
}
|
||||
}
|
||||
|
||||
private class DPDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
// the -1 is for missing
|
||||
gb.DP(decoder.decodeInt(typeDescriptor, -1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class GQDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
// the -1 is for missing
|
||||
gb.GQ(decoder.decodeInt(typeDescriptor, -1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class ADDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
gb.AD(decoder.decodeIntArray(typeDescriptor));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class PLDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
gb.PL(decoder.decodeIntArray(typeDescriptor));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class GenericDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
Object value = decoder.decodeTypedValue(typeDescriptor);
|
||||
if ( value != null ) { // don't add missing values
|
||||
if ( value instanceof List && ((List)value).size() == 1) {
|
||||
// todo -- I really hate this, and it suggests that the code isn't completely right
|
||||
// the reason it's here is that it's possible to prune down a vector to a singleton
|
||||
// value and there we have the contract that the value comes back as an atomic value
|
||||
// not a vector of size 1
|
||||
value = ((List)value).get(0);
|
||||
}
|
||||
gb.attribute(field, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class FTDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
Object value = decoder.decodeTypedValue(typeDescriptor);
|
||||
if ( value != null ) { // don't add missing values
|
||||
gb.filters(value instanceof String ? Collections.singletonList((String)value) : (List<String>)value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,103 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Lazy version of genotypes decoder for BCF2 genotypes
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 5/12
|
||||
*/
|
||||
class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
|
||||
final protected static Logger logger = Logger.getLogger(BCF2LazyGenotypesDecoder.class);
|
||||
|
||||
// the essential information for us to use to decode the genotypes data
|
||||
// initialized when this lazy decoder is created, as we know all of this from the BCF2Codec
|
||||
// and its stored here again for code cleanliness
|
||||
private final BCF2Codec codec;
|
||||
private final ArrayList<Allele> siteAlleles;
|
||||
private final int nSamples;
|
||||
private final int nFields;
|
||||
|
||||
BCF2LazyGenotypesDecoder(final BCF2Codec codec, final ArrayList<Allele> alleles, final int nSamples, final int nFields) {
|
||||
this.codec = codec;
|
||||
this.siteAlleles = alleles;
|
||||
this.nSamples = nSamples;
|
||||
this.nFields = nFields;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LazyGenotypesContext.LazyData parse(final Object data) {
|
||||
if ( logger.isDebugEnabled() )
|
||||
logger.debug("Decoding BCF genotypes for " + nSamples + " samples with " + nFields + " fields each");
|
||||
|
||||
// load our byte[] data into the decoder
|
||||
final BCF2Decoder decoder = new BCF2Decoder(((BCF2Codec.LazyData)data).bytes);
|
||||
|
||||
// TODO -- fast path for sites only
|
||||
|
||||
// go ahead and decode everyone
|
||||
final List<String> samples = new ArrayList<String>(codec.getHeader().getGenotypeSamples());
|
||||
|
||||
if ( samples.size() != nSamples )
|
||||
throw new UserException.MalformedBCF2("GATK currently doesn't support reading BCF2 files with " +
|
||||
"different numbers of samples per record. Saw " + samples.size() +
|
||||
" samples in header but have a record with " + nSamples + " samples");
|
||||
|
||||
// create and initialize the genotypes array
|
||||
final ArrayList<GenotypeBuilder> builders = new ArrayList<GenotypeBuilder>(nSamples);
|
||||
for ( int i = 0; i < nSamples; i++ ) {
|
||||
builders.add(new GenotypeBuilder(samples.get(i)));
|
||||
}
|
||||
|
||||
for ( int i = 0; i < nFields; i++ ) {
|
||||
// get the field name
|
||||
final int offset = (Integer) decoder.decodeTypedValue();
|
||||
final String field = codec.getDictionaryString(offset);
|
||||
|
||||
// the type of each element
|
||||
final byte typeDescriptor = decoder.readTypeDescriptor();
|
||||
final BCF2GenotypeFieldDecoders.Decoder fieldDecoder = codec.getGenotypeFieldDecoder(field);
|
||||
try {
|
||||
fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, builders);
|
||||
} catch ( ClassCastException e ) {
|
||||
throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field
|
||||
+ " inconsistent with the value observed in the decoded value");
|
||||
}
|
||||
}
|
||||
|
||||
final ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nSamples);
|
||||
for ( final GenotypeBuilder gb : builders )
|
||||
genotypes.add(gb.make());
|
||||
|
||||
return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset());
|
||||
}
|
||||
}
|
||||
|
|
@ -1,143 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import org.broad.tribble.FeatureCodecHeader;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.Options;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Testing BCF2
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 2012
|
||||
*/
|
||||
public class BCF2TestWalker extends RodWalker<Integer, Integer> {
|
||||
/**
|
||||
* Variants from this VCF file are used by this tool as input.
|
||||
* The file must at least contain the standard VCF header lines, but
|
||||
* can be empty (i.e., no variants are contained in the file).
|
||||
*/
|
||||
@Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
|
||||
public RodBinding<VariantContext> variants;
|
||||
|
||||
@Argument(doc="keep variants", required=false)
|
||||
public boolean keepVariants = false;
|
||||
|
||||
@Argument(doc="quiet", required=false)
|
||||
public boolean quiet = false;
|
||||
|
||||
@Argument(doc="dontIndexOnTheFly", required=false)
|
||||
public boolean dontIndexOnTheFly = false;
|
||||
|
||||
@Output(doc="File to which results should be written",required=true)
|
||||
protected File bcfFile;
|
||||
|
||||
private final List<VariantContext> vcs = new ArrayList<VariantContext>();
|
||||
protected VariantContextWriter writer;
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
final Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), Collections.singletonList(variants));
|
||||
final VCFHeader header = VCFUtils.withUpdatedContigs(vcfRods.values().iterator().next(), getToolkit());
|
||||
try {
|
||||
EnumSet<Options> options = EnumSet.of(Options.FORCE_BCF);
|
||||
if ( !dontIndexOnTheFly ) options.add(Options.INDEX_ON_THE_FLY);
|
||||
writer = VariantContextWriterFactory.create(bcfFile, new FileOutputStream(bcfFile), getToolkit().getMasterSequenceDictionary(), options);
|
||||
writer.writeHeader(header);
|
||||
} catch ( FileNotFoundException e ) {
|
||||
throw new UserException.CouldNotCreateOutputFile(bcfFile, e);
|
||||
}
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null ) // RodWalkers can make funky map calls
|
||||
return 0;
|
||||
|
||||
for ( VariantContext vc : tracker.getValues(variants, context.getLocation())) {
|
||||
writer.add(vc);
|
||||
if ( keepVariants ) vcs.add(vc);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
//
|
||||
// default reduce -- doesn't do anything at all
|
||||
//
|
||||
public Integer reduceInit() { return 0; }
|
||||
public Integer reduce(Integer counter, Integer sum) { return counter + sum; }
|
||||
|
||||
public void onTraversalDone(Integer sum) {
|
||||
try {
|
||||
writer.close();
|
||||
logger.info("Closed writer");
|
||||
|
||||
// read in the BCF records
|
||||
BCF2Codec codec = new BCF2Codec();
|
||||
PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(bcfFile));
|
||||
FeatureCodecHeader header = codec.readHeader(pbs);
|
||||
pbs.close();
|
||||
|
||||
pbs = new PositionalBufferedStream(new FileInputStream(bcfFile));
|
||||
pbs.skip(header.getHeaderEnd());
|
||||
Iterator<VariantContext> it = vcs.iterator();
|
||||
while ( ! pbs.isDone() ) {
|
||||
if ( keepVariants ) {
|
||||
VariantContext expected = it.next();
|
||||
if ( ! quiet )
|
||||
System.out.printf("vcf = %s %d %s%n", expected.getChr(), expected.getStart(), expected);
|
||||
}
|
||||
VariantContext bcfRaw = codec.decode(pbs);
|
||||
VariantContext bcf = new VariantContextBuilder(bcfRaw).source("variant").make();
|
||||
if ( ! quiet ) {
|
||||
System.out.printf("bcf = %s %d %s%n", bcf.getChr(), bcf.getStart(), bcf.toString());
|
||||
System.out.printf("--------------------------------------------------%n");
|
||||
}
|
||||
}
|
||||
|
||||
} catch ( IOException e ) {
|
||||
throw new UserException.CouldNotCreateOutputFile(bcfFile, "bad user!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -24,18 +24,22 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
|
||||
import java.util.EnumSet;
|
||||
|
||||
/**
|
||||
* BCF2 types and information
|
||||
* BCF2 types and associated information
|
||||
*
|
||||
* @author depristo
|
||||
* @since 05/12
|
||||
*/
|
||||
public enum BCF2Type {
|
||||
INT8(1, 1, BCF2Utils.INT8_MISSING_VALUE, -127, 127), // todo -- confirm range
|
||||
INT16(2, 2, BCF2Utils.INT16_MISSING_VALUE, -32767, 32767),
|
||||
INT32(3, 4, BCF2Utils.INT32_MISSING_VALUE, -2147483647, 2147483647),
|
||||
FLOAT(5, 4, BCF2Utils.FLOAT_MISSING_VALUE),
|
||||
CHAR(7);
|
||||
INT8 (1, 1, 0xFFFFFF80, -127, 127), // todo -- confirm range
|
||||
INT16(2, 2, 0xFFFF8000, -32767, 32767),
|
||||
INT32(3, 4, 0x80000000, -2147483647, 2147483647),
|
||||
FLOAT(5, 4, 0x7F800001),
|
||||
CHAR (7, 1, 0x00000000);
|
||||
|
||||
private final int id;
|
||||
private final Object missingJavaValue;
|
||||
|
|
@ -60,11 +64,53 @@ public enum BCF2Type {
|
|||
this.maxValue = maxValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* How many bytes are used to represent this type on disk?
|
||||
* @return
|
||||
*/
|
||||
public int getSizeInBytes() {
|
||||
return sizeInBytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* The ID according to the BCF2 specification
|
||||
* @return
|
||||
*/
|
||||
public int getID() { return id; }
|
||||
|
||||
/**
|
||||
* Can we encode value v in this type, according to its declared range.
|
||||
*
|
||||
* Only makes sense for integer values
|
||||
*
|
||||
* @param v
|
||||
* @return
|
||||
*/
|
||||
@Requires("INTEGERS.contains(this)")
|
||||
public final boolean withinRange(final long v) { return v >= minValue && v <= maxValue; }
|
||||
|
||||
/**
|
||||
* Return the java object (aka null) that is used to represent a missing value for this
|
||||
* type in Java
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Object getMissingJavaValue() { return missingJavaValue; }
|
||||
|
||||
/**
|
||||
* The bytes (encoded as an int) that are used to represent a missing value
|
||||
* for this type in BCF2
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public int getMissingBytes() { return missingBytes; }
|
||||
|
||||
/**
|
||||
* An enum set of the types that might represent Integer values
|
||||
*/
|
||||
public final static EnumSet<BCF2Type> INTEGERS = EnumSet.of(INT8, INT16, INT32);
|
||||
|
||||
public boolean isIntegerType() {
|
||||
return INTEGERS.contains(this);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,8 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||
|
|
@ -33,9 +35,8 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.io.OutputStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Common utilities for working with BCF2 files
|
||||
|
|
@ -45,7 +46,7 @@ import java.util.List;
|
|||
* @author depristo
|
||||
* @since 5/12
|
||||
*/
|
||||
public class BCF2Utils {
|
||||
public final class BCF2Utils {
|
||||
public static final byte[] MAGIC_HEADER_LINE = "BCF\2".getBytes();
|
||||
|
||||
public static final int MAX_ALLELES_IN_GENOTYPES = 127;
|
||||
|
|
@ -53,12 +54,6 @@ public class BCF2Utils {
|
|||
public static final int OVERFLOW_ELEMENT_MARKER = 15;
|
||||
public static final int MAX_INLINE_ELEMENTS = 14;
|
||||
|
||||
// Note that these values are prefixed by FFFFFF for convenience
|
||||
public static final int INT8_MISSING_VALUE = 0xFFFFFF80;
|
||||
public static final int INT16_MISSING_VALUE = 0xFFFF8000;
|
||||
public static final int INT32_MISSING_VALUE = 0x80000000;
|
||||
public static final int FLOAT_MISSING_VALUE = 0x7F800001;
|
||||
|
||||
public final static BCF2Type[] INTEGER_TYPES_BY_SIZE = new BCF2Type[]{BCF2Type.INT8, BCF2Type.INT16, BCF2Type.INT32};
|
||||
public final static BCF2Type[] ID_TO_ENUM;
|
||||
|
||||
|
|
@ -77,11 +72,17 @@ public class BCF2Utils {
|
|||
* The dictionary is an ordered list of common VCF identifers (FILTER, INFO, and FORMAT)
|
||||
* fields.
|
||||
*
|
||||
* Note that its critical that the list be dedupped and sorted in a consistent manner each time,
|
||||
* as the BCF2 offsets are encoded relative to this dictionary, and if it isn't determined exactly
|
||||
* the same way as in the header each time it's very bad
|
||||
*
|
||||
* @param header the VCFHeader from which to build the dictionary
|
||||
* @return a non-null dictionary of elements, may be empty
|
||||
*/
|
||||
@Requires("header != null")
|
||||
@Ensures({"result != null", "new HashSet(result).size() == result.size()"})
|
||||
public final static ArrayList<String> makeDictionary(final VCFHeader header) {
|
||||
final ArrayList<String> dict = new ArrayList<String>();
|
||||
final Set<String> dict = new TreeSet<String>();
|
||||
|
||||
// set up the strings dictionary
|
||||
dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field
|
||||
|
|
@ -92,23 +93,27 @@ public class BCF2Utils {
|
|||
}
|
||||
}
|
||||
|
||||
return dict;
|
||||
return new ArrayList<String>(dict);
|
||||
}
|
||||
|
||||
@Requires({"nElements >= 0", "type != null"})
|
||||
public final static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
|
||||
int encodeSize = Math.min(nElements, OVERFLOW_ELEMENT_MARKER);
|
||||
byte typeByte = (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F));
|
||||
return typeByte;
|
||||
}
|
||||
|
||||
@Ensures("result >= 0")
|
||||
public final static int decodeSize(final byte typeDescriptor) {
|
||||
return (0xF0 & typeDescriptor) >> 4;
|
||||
}
|
||||
|
||||
@Ensures("result >= 0")
|
||||
public final static int decodeTypeID(final byte typeDescriptor) {
|
||||
return typeDescriptor & 0x0F;
|
||||
}
|
||||
|
||||
@Ensures("result != null")
|
||||
public final static BCF2Type decodeType(final byte typeDescriptor) {
|
||||
return ID_TO_ENUM[decodeTypeID(typeDescriptor)];
|
||||
}
|
||||
|
|
@ -117,6 +122,7 @@ public class BCF2Utils {
|
|||
return decodeSize(typeDescriptor) == OVERFLOW_ELEMENT_MARKER;
|
||||
}
|
||||
|
||||
@Requires("nElements >= 0")
|
||||
public final static boolean willOverflow(final long nElements) {
|
||||
return nElements > MAX_INLINE_ELEMENTS;
|
||||
}
|
||||
|
|
@ -128,6 +134,7 @@ public class BCF2Utils {
|
|||
}
|
||||
|
||||
public final static byte readByte(final InputStream stream) {
|
||||
// TODO -- shouldn't be capturing error here
|
||||
try {
|
||||
return (byte)(stream.read() & 0xFF);
|
||||
} catch ( IOException e ) {
|
||||
|
|
@ -135,6 +142,7 @@ public class BCF2Utils {
|
|||
}
|
||||
}
|
||||
|
||||
@Requires({"stream != null", "bytesForEachInt > 0"})
|
||||
public final static int readInt(int bytesForEachInt, final InputStream stream) {
|
||||
switch ( bytesForEachInt ) {
|
||||
case 1: {
|
||||
|
|
@ -161,10 +169,10 @@ public class BCF2Utils {
|
|||
* @param strings size > 1 list of strings
|
||||
* @return
|
||||
*/
|
||||
@Requires({"strings != null", "strings.size() > 1"})
|
||||
@Ensures("result != null")
|
||||
public static final String collapseStringList(final List<String> strings) {
|
||||
assert strings.size() > 1;
|
||||
|
||||
StringBuilder b = new StringBuilder();
|
||||
final StringBuilder b = new StringBuilder();
|
||||
for ( final String s : strings ) {
|
||||
assert s.indexOf(",") == -1; // no commas in individual strings
|
||||
b.append(",").append(s);
|
||||
|
|
@ -181,12 +189,15 @@ public class BCF2Utils {
|
|||
* @param collapsed
|
||||
* @return
|
||||
*/
|
||||
@Requires({"collapsed != null", "isCollapsedString(collapsed)"})
|
||||
@Ensures("result != null")
|
||||
public static final List<String> exploreStringList(final String collapsed) {
|
||||
assert isCollapsedString(collapsed);
|
||||
final String[] exploded = collapsed.substring(1).split(",");
|
||||
return Arrays.asList(exploded);
|
||||
}
|
||||
|
||||
@Requires("s != null")
|
||||
public static final boolean isCollapsedString(final String s) {
|
||||
return s.charAt(0) == ',';
|
||||
}
|
||||
|
|
@ -200,6 +211,8 @@ public class BCF2Utils {
|
|||
* @param vcfFile
|
||||
* @return
|
||||
*/
|
||||
@Requires("vcfFile != null")
|
||||
@Ensures("result != null")
|
||||
public static final File shadowBCF(final File vcfFile) {
|
||||
final String path = vcfFile.getAbsolutePath();
|
||||
if ( path.contains(".vcf") )
|
||||
|
|
@ -207,4 +220,109 @@ public class BCF2Utils {
|
|||
else
|
||||
return new File( path + ".bcf" );
|
||||
}
|
||||
|
||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
||||
public final static BCF2Type determineIntegerType(final int value) {
|
||||
for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) {
|
||||
if ( potentialType.withinRange(value) )
|
||||
return potentialType;
|
||||
}
|
||||
|
||||
throw new ReviewedStingException("Integer cannot be encoded in allowable range of even INT32: " + value);
|
||||
}
|
||||
|
||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
||||
public final static BCF2Type determineIntegerType(final int[] values) {
|
||||
// literally a copy of the code below, but there's no general way to unify lists and arrays in java
|
||||
BCF2Type maxType = BCF2Type.INT8;
|
||||
for ( final int value : values ) {
|
||||
final BCF2Type type1 = determineIntegerType(value);
|
||||
switch ( type1 ) {
|
||||
case INT8: break;
|
||||
case INT16: maxType = BCF2Type.INT16; break;
|
||||
case INT32: return BCF2Type.INT32; // fast path for largest possible value
|
||||
default: throw new ReviewedStingException("Unexpected integer type " + type1 );
|
||||
}
|
||||
}
|
||||
return maxType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the maximum BCF2 integer size of t1 and t2
|
||||
*
|
||||
* For example, if t1 == INT8 and t2 == INT16 returns INT16
|
||||
*
|
||||
* @param t1
|
||||
* @param t2
|
||||
* @return
|
||||
*/
|
||||
@Requires({"BCF2Type.INTEGERS.contains(t1)","BCF2Type.INTEGERS.contains(t2)"})
|
||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
||||
public final static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
|
||||
switch ( t1 ) {
|
||||
case INT8: return t2;
|
||||
case INT16: return t2 == BCF2Type.INT32 ? t2 : t1;
|
||||
case INT32: return t1;
|
||||
default: throw new ReviewedStingException("BUG: unexpected BCF2Type " + t1);
|
||||
}
|
||||
}
|
||||
|
||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
||||
public final static BCF2Type determineIntegerType(final List<Integer> values) {
|
||||
BCF2Type maxType = BCF2Type.INT8;
|
||||
for ( final int value : values ) {
|
||||
final BCF2Type type1 = determineIntegerType(value);
|
||||
switch ( type1 ) {
|
||||
case INT8: break;
|
||||
case INT16: maxType = BCF2Type.INT16; break;
|
||||
case INT32: return BCF2Type.INT32; // fast path for largest possible value
|
||||
default: throw new ReviewedStingException("Unexpected integer type " + type1 );
|
||||
}
|
||||
}
|
||||
return maxType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function that takes an object and returns a list representation
|
||||
* of it:
|
||||
*
|
||||
* o == null => []
|
||||
* o is a list => o
|
||||
* else => [o]
|
||||
*
|
||||
* @param o
|
||||
* @return
|
||||
*/
|
||||
public final static List<Object> toList(final Object o) {
|
||||
if ( o == null ) return Collections.emptyList();
|
||||
else if ( o instanceof List ) return (List<Object>)o;
|
||||
else return Collections.singletonList(o);
|
||||
}
|
||||
|
||||
public final static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException {
|
||||
switch ( type.getSizeInBytes() ) {
|
||||
case 1:
|
||||
encodeStream.write(0xFF & value);
|
||||
break;
|
||||
case 2:
|
||||
encodeStream.write((0xFF00 & value) >> 8);
|
||||
encodeStream.write(0xFF & value);
|
||||
break;
|
||||
case 4:
|
||||
encodeStream.write((0xFF000000 & value) >> 24);
|
||||
encodeStream.write((0x00FF0000 & value) >> 16);
|
||||
encodeStream.write((0x0000FF00 & value) >> 8);
|
||||
encodeStream.write((0x000000FF & value));
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("BUG: unexpected type size " + type);
|
||||
}
|
||||
// general case for reference
|
||||
// for ( int i = type.getSizeInBytes() - 1; i >= 0; i-- ) {
|
||||
// final int shift = i * 8;
|
||||
// int mask = 0xFF << shift;
|
||||
// int byteValue = (mask & value) >> shift;
|
||||
// encodeStream.write(byteValue);
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
|
||||
// we have to store the list of strings that make up the header until they're needed
|
||||
protected VCFHeader header = null;
|
||||
protected VCFHeaderVersion version = null;
|
||||
|
||||
// a mapping of the allele
|
||||
protected Map<String, List<Allele>> alleleMap = new HashMap<String, List<Allele>>(3);
|
||||
|
|
@ -48,7 +49,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
protected final String[] locParts = new String[6];
|
||||
|
||||
// for performance we cache the hashmap of filter encodings for quick lookup
|
||||
protected HashMap<String,LinkedHashSet<String>> filterHash = new HashMap<String,LinkedHashSet<String>>();
|
||||
protected HashMap<String,List<String>> filterHash = new HashMap<String,List<String>>();
|
||||
|
||||
// we store a name to give to each of the variant contexts we emit
|
||||
protected String name = "Unknown";
|
||||
|
|
@ -91,24 +92,12 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
*/
|
||||
public abstract Object readHeader(LineReader reader);
|
||||
|
||||
/**
|
||||
* create a genotype map
|
||||
*
|
||||
* @param str the string
|
||||
* @param alleles the list of alleles
|
||||
* @param chr chrom
|
||||
* @param pos position
|
||||
* @return a mapping of sample name to genotype object
|
||||
*/
|
||||
public abstract LazyGenotypesContext.LazyData createGenotypeMap(String str, List<Allele> alleles, String chr, int pos);
|
||||
|
||||
|
||||
/**
|
||||
* parse the filter string, first checking to see if we already have parsed it in a previous attempt
|
||||
* @param filterString the string to parse
|
||||
* @return a set of the filters applied
|
||||
*/
|
||||
protected abstract Set<String> parseFilters(String filterString);
|
||||
protected abstract List<String> parseFilters(String filterString);
|
||||
|
||||
/**
|
||||
* create a VCF header from a set of header record lines
|
||||
|
|
@ -117,6 +106,8 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
* @return a VCFHeader object
|
||||
*/
|
||||
protected VCFHeader parseHeaderFromLines( final List<String> headerStrings, final VCFHeaderVersion version ) {
|
||||
this.version = version;
|
||||
|
||||
Set<VCFHeaderLine> metaData = new TreeSet<VCFHeaderLine>();
|
||||
Set<String> sampleNames = new LinkedHashSet<String>();
|
||||
int contigCounter = 0;
|
||||
|
|
@ -320,7 +311,9 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
String ref = getCachedString(parts[3].toUpperCase());
|
||||
String alts = getCachedString(parts[4].toUpperCase());
|
||||
builder.log10PError(parseQual(parts[5]));
|
||||
builder.filters(parseFilters(getCachedString(parts[6])));
|
||||
|
||||
final List<String> filters = parseFilters(getCachedString(parts[6]));
|
||||
if ( filters != null ) builder.filters(new HashSet<String>(filters));
|
||||
final Map<String, Object> attrs = parseInfo(parts[7]);
|
||||
builder.attributes(attrs);
|
||||
|
||||
|
|
@ -719,4 +712,115 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
try { stream.close(); } catch ( IOException e ) {}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* create a genotype map
|
||||
*
|
||||
* @param str the string
|
||||
* @param alleles the list of alleles
|
||||
* @return a mapping of sample name to genotype object
|
||||
*/
|
||||
public LazyGenotypesContext.LazyData createGenotypeMap(final String str,
|
||||
final List<Allele> alleles,
|
||||
final String chr,
|
||||
final int pos) {
|
||||
if (genotypeParts == null)
|
||||
genotypeParts = new String[header.getColumnCount() - NUM_STANDARD_FIELDS];
|
||||
|
||||
int nParts = ParsingUtils.split(str, genotypeParts, VCFConstants.FIELD_SEPARATOR_CHAR);
|
||||
if ( nParts != genotypeParts.length )
|
||||
generateException("there are " + (nParts-1) + " genotypes while the header requires that " + (genotypeParts.length-1) + " genotypes be present for all records", lineNo);
|
||||
|
||||
ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nParts);
|
||||
|
||||
// get the format keys
|
||||
int nGTKeys = ParsingUtils.split(genotypeParts[0], genotypeKeyArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
|
||||
|
||||
// cycle through the sample names
|
||||
Iterator<String> sampleNameIterator = header.getGenotypeSamples().iterator();
|
||||
|
||||
// clear out our allele mapping
|
||||
alleleMap.clear();
|
||||
|
||||
// cycle through the genotype strings
|
||||
for (int genotypeOffset = 1; genotypeOffset < nParts; genotypeOffset++) {
|
||||
int GTValueSplitSize = ParsingUtils.split(genotypeParts[genotypeOffset], GTValueArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
|
||||
|
||||
final String sampleName = sampleNameIterator.next();
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(sampleName);
|
||||
|
||||
// check to see if the value list is longer than the key list, which is a problem
|
||||
if (nGTKeys < GTValueSplitSize)
|
||||
generateException("There are too many keys for the sample " + sampleName + ", keys = " + parts[8] + ", values = " + parts[genotypeOffset]);
|
||||
|
||||
int genotypeAlleleLocation = -1;
|
||||
if (nGTKeys >= 1) {
|
||||
gb.maxAttributes(nGTKeys - 1);
|
||||
|
||||
for (int i = 0; i < nGTKeys; i++) {
|
||||
final String gtKey = genotypeKeyArray[i];
|
||||
boolean missing = i >= GTValueSplitSize;
|
||||
|
||||
// todo -- all of these on the fly parsing of the missing value should be static constants
|
||||
if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) {
|
||||
genotypeAlleleLocation = i;
|
||||
} else if ( missing ) {
|
||||
// if its truly missing (there no provided value) skip adding it to the attributes
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_FILTER_KEY)) {
|
||||
final List<String> filters = parseFilters(getCachedString(GTValueArray[i]));
|
||||
if ( filters != null ) gb.filters(filters);
|
||||
} else if ( GTValueArray[i].equals(VCFConstants.MISSING_VALUE_v4) ) {
|
||||
// don't add missing values to the map
|
||||
} else {
|
||||
if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
|
||||
if ( GTValueArray[i].equals(VCFConstants.MISSING_GENOTYPE_QUALITY_v3) )
|
||||
gb.noGQ();
|
||||
else
|
||||
gb.GQ((int)Math.round(Double.valueOf(GTValueArray[i])));
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) {
|
||||
gb.AD(decodeInts(GTValueArray[i]));
|
||||
} else if (gtKey.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY)) {
|
||||
gb.PL(decodeInts(GTValueArray[i]));
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_LIKELIHOODS_KEY)) {
|
||||
gb.PL(GenotypeLikelihoods.fromGLField(GTValueArray[i]).getAsPLs());
|
||||
} else if (gtKey.equals(VCFConstants.DEPTH_KEY)) {
|
||||
gb.DP(Integer.valueOf(GTValueArray[i]));
|
||||
} else {
|
||||
gb.attribute(gtKey, GTValueArray[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check to make sure we found a genotype field if our version is less than 4.1 file
|
||||
if ( version != VCFHeaderVersion.VCF4_1 && genotypeAlleleLocation == -1 )
|
||||
generateException("Unable to find the GT field for the record; the GT field is required in VCF4.0");
|
||||
if ( genotypeAlleleLocation > 0 )
|
||||
generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes when present");
|
||||
|
||||
final List<Allele> GTalleles = (genotypeAlleleLocation == -1 ? new ArrayList<Allele>(0) : parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap));
|
||||
gb.alleles(GTalleles);
|
||||
gb.phased(genotypeAlleleLocation != -1 && GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1);
|
||||
|
||||
// add it to the list
|
||||
try {
|
||||
genotypes.add(gb.make());
|
||||
} catch (TribbleException e) {
|
||||
throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos);
|
||||
}
|
||||
}
|
||||
|
||||
return new LazyGenotypesContext.LazyData(genotypes, header.getSampleNamesInOrder(), header.getSampleNameToOffset());
|
||||
}
|
||||
|
||||
|
||||
private final static String[] INT_DECODE_ARRAY = new String[10000];
|
||||
private final static int[] decodeInts(final String string) {
|
||||
final int nValues = ParsingUtils.split(string, INT_DECODE_ARRAY, ',');
|
||||
final int[] values = new int[nValues];
|
||||
for ( int i = 0; i < nValues; i++ )
|
||||
values[i] = Integer.valueOf(INT_DECODE_ARRAY[i]);
|
||||
return values;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,27 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import org.broad.tribble.TribbleException;
|
||||
|
|
@ -78,24 +102,24 @@ public class VCF3Codec extends AbstractVCFCodec {
|
|||
* @param filterString the string to parse
|
||||
* @return a set of the filters applied
|
||||
*/
|
||||
protected Set<String> parseFilters(String filterString) {
|
||||
protected List<String> parseFilters(String filterString) {
|
||||
|
||||
// null for unfiltered
|
||||
if ( filterString.equals(VCFConstants.UNFILTERED) )
|
||||
return null;
|
||||
|
||||
// empty set for passes filters
|
||||
LinkedHashSet<String> fFields = new LinkedHashSet<String>();
|
||||
List<String> fFields = new ArrayList<String>();
|
||||
|
||||
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v3) )
|
||||
return fFields;
|
||||
return new ArrayList<String>(fFields);
|
||||
|
||||
if ( filterString.length() == 0 )
|
||||
generateException("The VCF specification requires a valid filter status");
|
||||
|
||||
// do we have the filter string cached?
|
||||
if ( filterHash.containsKey(filterString) )
|
||||
return filterHash.get(filterString);
|
||||
return new ArrayList<String>(filterHash.get(filterString));
|
||||
|
||||
// otherwise we have to parse and cache the value
|
||||
if ( filterString.indexOf(VCFConstants.FILTER_CODE_SEPARATOR) == -1 )
|
||||
|
|
@ -108,93 +132,6 @@ public class VCF3Codec extends AbstractVCFCodec {
|
|||
return fFields;
|
||||
}
|
||||
|
||||
/**
|
||||
* create a genotype map
|
||||
*
|
||||
* @param str the string
|
||||
* @param alleles the list of alleles
|
||||
* @param chr chrom
|
||||
* @param pos position
|
||||
* @return a mapping of sample name to genotype object
|
||||
*/
|
||||
public LazyGenotypesContext.LazyData createGenotypeMap(String str, List<Allele> alleles, String chr, int pos) {
|
||||
if (genotypeParts == null)
|
||||
genotypeParts = new String[header.getColumnCount() - NUM_STANDARD_FIELDS];
|
||||
|
||||
int nParts = ParsingUtils.split(str, genotypeParts, VCFConstants.FIELD_SEPARATOR_CHAR);
|
||||
if ( nParts != genotypeParts.length )
|
||||
generateException("there are " + (nParts-1) + " genotypes while the header requires that " + (genotypeParts.length-1) + " genotypes be present for all records", lineNo);
|
||||
|
||||
ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nParts);
|
||||
|
||||
// get the format keys
|
||||
int nGTKeys = ParsingUtils.split(genotypeParts[0], genotypeKeyArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
|
||||
|
||||
// cycle through the sample names
|
||||
Iterator<String> sampleNameIterator = header.getGenotypeSamples().iterator();
|
||||
|
||||
// clear out our allele mapping
|
||||
alleleMap.clear();
|
||||
|
||||
// cycle through the genotype strings
|
||||
for (int genotypeOffset = 1; genotypeOffset < nParts; genotypeOffset++) {
|
||||
int GTValueSplitSize = ParsingUtils.split(genotypeParts[genotypeOffset], GTValueArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
|
||||
|
||||
double GTQual = VariantContext.NO_LOG10_PERROR;
|
||||
Set<String> genotypeFilters = null;
|
||||
Map<String, Object> gtAttributes = null;
|
||||
String sampleName = sampleNameIterator.next();
|
||||
|
||||
// check to see if the value list is longer than the key list, which is a problem
|
||||
if (nGTKeys < GTValueSplitSize)
|
||||
generateException("There are too many keys for the sample " + sampleName + ", keys = " + parts[8] + ", values = " + parts[genotypeOffset]);
|
||||
|
||||
int genotypeAlleleLocation = -1;
|
||||
if (nGTKeys >= 1) {
|
||||
gtAttributes = new HashMap<String, Object>(nGTKeys - 1);
|
||||
|
||||
for (int i = 0; i < nGTKeys; i++) {
|
||||
final String gtKey = new String(genotypeKeyArray[i]);
|
||||
boolean missing = i >= GTValueSplitSize;
|
||||
|
||||
if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) {
|
||||
genotypeAlleleLocation = i;
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
|
||||
GTQual = missing ? parseQual(VCFConstants.MISSING_VALUE_v4) : parseQual(GTValueArray[i]);
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_FILTER_KEY)) {
|
||||
genotypeFilters = missing ? parseFilters(VCFConstants.MISSING_VALUE_v4) : parseFilters(getCachedString(GTValueArray[i]));
|
||||
} else if ( missing || GTValueArray[i].equals(VCFConstants.MISSING_GENOTYPE_QUALITY_v3) ) {
|
||||
gtAttributes.put(gtKey, VCFConstants.MISSING_VALUE_v4);
|
||||
} else {
|
||||
gtAttributes.put(gtKey, new String(GTValueArray[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check to make sure we found a genotype field
|
||||
if ( genotypeAlleleLocation < 0 )
|
||||
generateException("Unable to find the GT field for the record; the GT field is required");
|
||||
if ( genotypeAlleleLocation > 0 )
|
||||
generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes");
|
||||
|
||||
boolean phased = GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1;
|
||||
|
||||
// add it to the list
|
||||
try {
|
||||
genotypes.add(new Genotype(sampleName,
|
||||
parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap),
|
||||
GTQual,
|
||||
genotypeFilters,
|
||||
gtAttributes,
|
||||
phased));
|
||||
} catch (TribbleException e) {
|
||||
throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos);
|
||||
}
|
||||
}
|
||||
|
||||
return new LazyGenotypesContext.LazyData(genotypes, header.sampleNamesInOrder, header.sampleNameToOffset);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canDecode(final String potentialInput) {
|
||||
return canDecodeFile(potentialInput, VCF3_MAGIC_HEADER);
|
||||
|
|
|
|||
|
|
@ -48,7 +48,6 @@ import java.util.*;
|
|||
public class VCFCodec extends AbstractVCFCodec {
|
||||
// Our aim is to read in the records and convert to VariantContext as quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters.
|
||||
public final static String VCF4_MAGIC_HEADER = "##fileformat=VCFv4";
|
||||
private VCFHeaderVersion version = null;
|
||||
|
||||
/**
|
||||
* A VCF header the contains master info/filter/format records that we use to 'fill in'
|
||||
|
|
@ -127,121 +126,33 @@ public class VCFCodec extends AbstractVCFCodec {
|
|||
* @param filterString the string to parse
|
||||
* @return a set of the filters applied or null if filters were not applied to the record (e.g. as per the missing value in a VCF)
|
||||
*/
|
||||
protected Set<String> parseFilters(String filterString) {
|
||||
return parseFilters(filterHash, lineNo, filterString);
|
||||
}
|
||||
|
||||
public static Set<String> parseFilters(final Map<String, LinkedHashSet<String>> cache, final int lineNo, final String filterString) {
|
||||
protected List<String> parseFilters(String filterString) {
|
||||
// null for unfiltered
|
||||
if ( filterString.equals(VCFConstants.UNFILTERED) )
|
||||
return null;
|
||||
|
||||
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v4) )
|
||||
return Collections.emptySet();
|
||||
return Collections.emptyList();
|
||||
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v3) )
|
||||
generateException(VCFConstants.PASSES_FILTERS_v3 + " is an invalid filter name in vcf4", lineNo);
|
||||
if ( filterString.length() == 0 )
|
||||
generateException("The VCF specification requires a valid filter status: filter was " + filterString, lineNo);
|
||||
|
||||
// do we have the filter string cached?
|
||||
if ( cache != null && cache.containsKey(filterString) )
|
||||
return Collections.unmodifiableSet(cache.get(filterString));
|
||||
if ( filterHash.containsKey(filterString) )
|
||||
return filterHash.get(filterString);
|
||||
|
||||
// empty set for passes filters
|
||||
LinkedHashSet<String> fFields = new LinkedHashSet<String>();
|
||||
List<String> fFields = new LinkedList<String>();
|
||||
// otherwise we have to parse and cache the value
|
||||
if ( filterString.indexOf(VCFConstants.FILTER_CODE_SEPARATOR) == -1 )
|
||||
fFields.add(filterString);
|
||||
else
|
||||
fFields.addAll(Arrays.asList(filterString.split(VCFConstants.FILTER_CODE_SEPARATOR)));
|
||||
|
||||
fFields = fFields;
|
||||
if ( cache != null ) cache.put(filterString, fFields);
|
||||
filterHash.put(filterString, Collections.unmodifiableList(fFields));
|
||||
|
||||
return Collections.unmodifiableSet(fFields);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* create a genotype map
|
||||
*
|
||||
* @param str the string
|
||||
* @param alleles the list of alleles
|
||||
* @return a mapping of sample name to genotype object
|
||||
*/
|
||||
public LazyGenotypesContext.LazyData createGenotypeMap(String str, List<Allele> alleles, String chr, int pos) {
|
||||
if (genotypeParts == null)
|
||||
genotypeParts = new String[header.getColumnCount() - NUM_STANDARD_FIELDS];
|
||||
|
||||
int nParts = ParsingUtils.split(str, genotypeParts, VCFConstants.FIELD_SEPARATOR_CHAR);
|
||||
if ( nParts != genotypeParts.length )
|
||||
generateException("there are " + (nParts-1) + " genotypes while the header requires that " + (genotypeParts.length-1) + " genotypes be present for all records", lineNo);
|
||||
|
||||
ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nParts);
|
||||
|
||||
// get the format keys
|
||||
int nGTKeys = ParsingUtils.split(genotypeParts[0], genotypeKeyArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
|
||||
|
||||
// cycle through the sample names
|
||||
Iterator<String> sampleNameIterator = header.getGenotypeSamples().iterator();
|
||||
|
||||
// clear out our allele mapping
|
||||
alleleMap.clear();
|
||||
|
||||
// cycle through the genotype strings
|
||||
for (int genotypeOffset = 1; genotypeOffset < nParts; genotypeOffset++) {
|
||||
int GTValueSplitSize = ParsingUtils.split(genotypeParts[genotypeOffset], GTValueArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
|
||||
|
||||
double GTQual = VariantContext.NO_LOG10_PERROR;
|
||||
Set<String> genotypeFilters = null;
|
||||
Map<String, Object> gtAttributes = null;
|
||||
String sampleName = sampleNameIterator.next();
|
||||
|
||||
// check to see if the value list is longer than the key list, which is a problem
|
||||
if (nGTKeys < GTValueSplitSize)
|
||||
generateException("There are too many keys for the sample " + sampleName + ", keys = " + parts[8] + ", values = " + parts[genotypeOffset]);
|
||||
|
||||
int genotypeAlleleLocation = -1;
|
||||
if (nGTKeys >= 1) {
|
||||
gtAttributes = new HashMap<String, Object>(nGTKeys - 1);
|
||||
|
||||
for (int i = 0; i < nGTKeys; i++) {
|
||||
final String gtKey = new String(genotypeKeyArray[i]);
|
||||
boolean missing = i >= GTValueSplitSize;
|
||||
|
||||
// todo -- all of these on the fly parsing of the missing value should be static constants
|
||||
if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) {
|
||||
genotypeAlleleLocation = i;
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
|
||||
GTQual = missing ? parseQual(VCFConstants.MISSING_VALUE_v4) : parseQual(GTValueArray[i]);
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_FILTER_KEY)) {
|
||||
genotypeFilters = missing ? parseFilters(VCFConstants.MISSING_VALUE_v4) : parseFilters(getCachedString(GTValueArray[i]));
|
||||
} else if ( missing ) {
|
||||
// if its truly missing (there no provided value) skip adding it to the attributes
|
||||
} else {
|
||||
gtAttributes.put(gtKey, GTValueArray[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check to make sure we found a genotype field if we are a VCF4.0 file
|
||||
if ( version == VCFHeaderVersion.VCF4_0 && genotypeAlleleLocation == -1 )
|
||||
generateException("Unable to find the GT field for the record; the GT field is required in VCF4.0");
|
||||
if ( genotypeAlleleLocation > 0 )
|
||||
generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes when present");
|
||||
|
||||
List<Allele> GTalleles = (genotypeAlleleLocation == -1 ? new ArrayList<Allele>(0) : parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap));
|
||||
boolean phased = genotypeAlleleLocation != -1 && GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1;
|
||||
|
||||
// add it to the list
|
||||
try {
|
||||
genotypes.add(new Genotype(sampleName, GTalleles, GTQual, genotypeFilters, gtAttributes, phased));
|
||||
} catch (TribbleException e) {
|
||||
throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos);
|
||||
}
|
||||
}
|
||||
|
||||
return new LazyGenotypesContext.LazyData(genotypes, header.sampleNamesInOrder, header.sampleNameToOffset);
|
||||
return fFields;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -56,8 +56,9 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
|||
public String getDescription() { return description; }
|
||||
public VCFHeaderLineType getType() { return type; }
|
||||
public VCFHeaderLineCount getCountType() { return countType; }
|
||||
public boolean isFixedCount() { return countType == VCFHeaderLineCount.INTEGER; }
|
||||
public int getCount() {
|
||||
if ( countType != VCFHeaderLineCount.INTEGER )
|
||||
if ( ! isFixedCount() )
|
||||
throw new ReviewedStingException("Asking for header line count when type is not an integer");
|
||||
return count;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ public final class VCFConstants {
|
|||
public static final String GENOTYPE_LIKELIHOODS_KEY = "GL"; // log10 scaled genotype likelihoods
|
||||
public static final String GENOTYPE_POSTERIORS_KEY = "GP";
|
||||
public static final String GENOTYPE_QUALITY_KEY = "GQ";
|
||||
public static final String GENOTYPE_ALLELE_DEPTHS = "AD";
|
||||
public static final String HAPMAP2_KEY = "H2";
|
||||
public static final String HAPMAP3_KEY = "H3";
|
||||
public static final String HAPLOTYPE_QUALITY_KEY = "HQ";
|
||||
|
|
@ -113,7 +114,5 @@ public final class VCFConstants {
|
|||
public static final String EMPTY_GENOTYPE = "./.";
|
||||
public static final int MAX_GENOTYPE_QUAL = 99;
|
||||
|
||||
public static final String DOUBLE_PRECISION_FORMAT_STRING = "%.2f";
|
||||
public static final String DOUBLE_PRECISION_INT_SUFFIX = ".00";
|
||||
public static final Double VCF_ENCODING_EPSILON = 0.00005; // when we consider fields equal(), used in the Qual compare
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue