Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
7204fcc2c3
|
|
@ -48,8 +48,16 @@ public class AlleleFrequencyCalculationResult {
|
||||||
double log10LikelihoodOfAFzero = 0.0;
|
double log10LikelihoodOfAFzero = 0.0;
|
||||||
double log10PosteriorOfAFzero = 0.0;
|
double log10PosteriorOfAFzero = 0.0;
|
||||||
|
|
||||||
AlleleFrequencyCalculationResult(int maxAltAlleles, int numChr) {
|
public AlleleFrequencyCalculationResult(int maxAltAlleles, int numChr) {
|
||||||
log10AlleleFrequencyLikelihoods = new double[maxAltAlleles][numChr+1];
|
log10AlleleFrequencyLikelihoods = new double[maxAltAlleles][numChr+1];
|
||||||
log10AlleleFrequencyPosteriors = new double[maxAltAlleles][numChr+1];
|
log10AlleleFrequencyPosteriors = new double[maxAltAlleles][numChr+1];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public double getLog10LikelihoodOfAFzero() {
|
||||||
|
return log10LikelihoodOfAFzero;
|
||||||
|
}
|
||||||
|
|
||||||
|
public double getLog10PosteriorOfAFzero() {
|
||||||
|
return log10PosteriorOfAFzero;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -38,10 +38,10 @@ public abstract class FrequencyModeSelector implements Cloneable{
|
||||||
protected FrequencyModeSelector(GenomeLocParser parser) {
|
protected FrequencyModeSelector(GenomeLocParser parser) {
|
||||||
this.parser = parser;
|
this.parser = parser;
|
||||||
}
|
}
|
||||||
protected void logCurrentSiteData(VariantContext vc, VariantContext subVC) {
|
protected void logCurrentSiteData(VariantContext vc, boolean passesCriteria) {
|
||||||
logCurrentSiteData(vc, subVC, false, false);
|
logCurrentSiteData(vc, passesCriteria, false, false);
|
||||||
}
|
}
|
||||||
protected abstract void logCurrentSiteData(VariantContext vc, VariantContext subVC, boolean IGNORE_GENOTYPES, boolean IGNORE_POLYMORPHIC);
|
protected abstract void logCurrentSiteData(VariantContext vc, boolean included, boolean IGNORE_GENOTYPES, boolean IGNORE_POLYMORPHIC);
|
||||||
protected abstract ArrayList<VariantContext> selectValidationSites(int numValidationSites);
|
protected abstract ArrayList<VariantContext> selectValidationSites(int numValidationSites);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -23,21 +23,52 @@
|
||||||
*/
|
*/
|
||||||
package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;
|
package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.AlleleFrequencyCalculationResult;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.ExactAFCalculationModel;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
|
||||||
|
|
||||||
public class GLBasedSampleSelector extends SampleSelector {
|
public class GLBasedSampleSelector extends SampleSelector {
|
||||||
public GLBasedSampleSelector(TreeSet<String> sm) {
|
Map<Integer,double[][]> numAllelePriorMatrix = new HashMap<Integer,double[][]>();
|
||||||
|
double referenceLikelihood;
|
||||||
|
public GLBasedSampleSelector(TreeSet<String> sm, double refLik) {
|
||||||
super(sm);
|
super(sm);
|
||||||
|
referenceLikelihood = refLik;
|
||||||
}
|
}
|
||||||
|
|
||||||
public VariantContext subsetSiteToSamples(VariantContext vc) {
|
public boolean selectSiteInSamples(VariantContext vc) {
|
||||||
/* todo - Look at sample array, and create a new vc with samples for which GL's indicate they should be included.
|
if ( samples == null || samples.isEmpty() )
|
||||||
For example, include all samples (and corresponding genotypes) whose GL's are such that argmax(GL) = HET or HOMVAR. */
|
return true;
|
||||||
throw new ReviewedStingException("GLBasedSampleSelector not implemented yet!");
|
// want to include a site in the given samples if it is *likely* to be variant (via the EXACT model)
|
||||||
//return true;
|
// first subset to the samples
|
||||||
|
VariantContext subContext = vc.subContextFromSamples(samples);
|
||||||
|
|
||||||
|
// now check to see (using EXACT model) whether this should be variant
|
||||||
|
// do we want to apply a prior? maybe user-spec?
|
||||||
|
double[][] flatPrior = createFlatPrior(vc.getAlleles());
|
||||||
|
AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(vc.getAlternateAlleles().size(),2*samples.size());
|
||||||
|
ExactAFCalculationModel.linearExactMultiAllelic(subContext.getGenotypes(),vc.getAlternateAlleles().size(),flatPrior,result,true);
|
||||||
|
// do we want to let this qual go up or down?
|
||||||
|
if ( result.getLog10PosteriorOfAFzero() < referenceLikelihood ) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private double[][] createFlatPrior(List<Allele> alleles) {
|
||||||
|
if ( ! numAllelePriorMatrix.containsKey(alleles.size()) ) {
|
||||||
|
numAllelePriorMatrix.put(alleles.size(), new double[alleles.size()][1+2*samples.size()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return numAllelePriorMatrix.get(alleles.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -38,14 +38,18 @@ public class GTBasedSampleSelector extends SampleSelector{
|
||||||
super(sm);
|
super(sm);
|
||||||
}
|
}
|
||||||
|
|
||||||
public VariantContext subsetSiteToSamples(VariantContext vc) {
|
public boolean selectSiteInSamples(VariantContext vc) {
|
||||||
// Super class already defined initialization which filled data structure "samples" with desired samples.
|
// Super class already defined initialization which filled data structure "samples" with desired samples.
|
||||||
// We only need to check if current vc if polymorphic in that set of samples
|
// We only need to check if current vc if polymorphic in that set of samples
|
||||||
|
|
||||||
if ( samples == null || samples.isEmpty() )
|
if ( samples == null || samples.isEmpty() )
|
||||||
return vc;
|
return true;
|
||||||
|
|
||||||
return vc.subContextFromSamples(samples, vc.getAlleles());
|
VariantContext subContext = vc.subContextFromSamples(samples, vc.getAlleles());
|
||||||
|
if ( subContext.isPolymorphicInSamples() ) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -60,7 +60,7 @@ public class KeepAFSpectrumFrequencySelector extends FrequencyModeSelector {
|
||||||
postSampleSelectionHistogram = new int[NUM_BINS];
|
postSampleSelectionHistogram = new int[NUM_BINS];
|
||||||
}
|
}
|
||||||
|
|
||||||
public void logCurrentSiteData(VariantContext vc, VariantContext subVC, boolean IGNORE_GENOTYPES, boolean IGNORE_POLYMORPHIC) {
|
public void logCurrentSiteData(VariantContext vc, boolean selectedInTargetSamples, boolean IGNORE_GENOTYPES, boolean IGNORE_POLYMORPHIC) {
|
||||||
|
|
||||||
// this method is called for every variant of a selected type, regardless of whether it will be selectable or not
|
// this method is called for every variant of a selected type, regardless of whether it will be selectable or not
|
||||||
// get AC,AF,AN attributes from vc
|
// get AC,AF,AN attributes from vc
|
||||||
|
|
@ -107,7 +107,7 @@ public class KeepAFSpectrumFrequencySelector extends FrequencyModeSelector {
|
||||||
numTotalSites++;
|
numTotalSites++;
|
||||||
|
|
||||||
// now process VC subsetted to samples of interest
|
// now process VC subsetted to samples of interest
|
||||||
if (!subVC.isPolymorphicInSamples() && !IGNORE_POLYMORPHIC)
|
if (! selectedInTargetSamples && !IGNORE_POLYMORPHIC)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
//System.out.format("Post:%4.4f %d\n",af0, binIndex);
|
//System.out.format("Post:%4.4f %d\n",af0, binIndex);
|
||||||
|
|
|
||||||
|
|
@ -33,8 +33,7 @@ public class NullSampleSelector extends SampleSelector{
|
||||||
super(sm);
|
super(sm);
|
||||||
}
|
}
|
||||||
|
|
||||||
public VariantContext subsetSiteToSamples(VariantContext vc) {
|
public boolean selectSiteInSamples(VariantContext vc) {
|
||||||
return vc;
|
return true;
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,7 @@ public abstract class SampleSelector implements Cloneable {
|
||||||
samples = new TreeSet<String>(sm);
|
samples = new TreeSet<String>(sm);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract VariantContext subsetSiteToSamples(VariantContext vc);
|
protected abstract boolean selectSiteInSamples(VariantContext vc);
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -42,14 +42,14 @@ public class UniformSamplingFrequencySelector extends FrequencyModeSelector {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void logCurrentSiteData(VariantContext vc, VariantContext subVC, boolean IGNORE_GENOTYPES, boolean IGNORE_POLYMORPHIC) {
|
public void logCurrentSiteData(VariantContext vc, boolean selectedInTargetSamples, boolean IGNORE_GENOTYPES, boolean IGNORE_POLYMORPHIC) {
|
||||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||||
|
|
||||||
|
|
||||||
if (vc.hasGenotypes() && !IGNORE_GENOTYPES) {
|
if (vc.hasGenotypes() && !IGNORE_GENOTYPES) {
|
||||||
// recompute AF,AC,AN based on genotypes:
|
// recompute AF,AC,AN based on genotypes:
|
||||||
VariantContextUtils.calculateChromosomeCounts(vc, attributes, false);
|
VariantContextUtils.calculateChromosomeCounts(vc, attributes, false);
|
||||||
if (!subVC.isPolymorphicInSamples() && !IGNORE_POLYMORPHIC)
|
if (! selectedInTargetSamples && !IGNORE_POLYMORPHIC)
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
if ( attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY) ) {
|
if ( attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY) ) {
|
||||||
|
|
|
||||||
|
|
@ -124,6 +124,10 @@ public class ValidationSiteSelectorWalker extends RodWalker<Integer, Integer> {
|
||||||
@Argument(fullName="sampleMode", shortName="sampleMode", doc="Sample selection mode", required=false)
|
@Argument(fullName="sampleMode", shortName="sampleMode", doc="Sample selection mode", required=false)
|
||||||
private SAMPLE_SELECTION_MODE sampleMode = SAMPLE_SELECTION_MODE.NONE;
|
private SAMPLE_SELECTION_MODE sampleMode = SAMPLE_SELECTION_MODE.NONE;
|
||||||
|
|
||||||
|
@Argument(shortName="samplePNonref",fullName="samplePNonref", doc="GL-based selection mode only: the probability" +
|
||||||
|
" that a site is non-reference in the samples for which to include the site",required=false)
|
||||||
|
private double samplePNonref = 0.99;
|
||||||
|
|
||||||
@Argument(fullName="numValidationSites", shortName="numSites", doc="Number of output validation sites", required=true)
|
@Argument(fullName="numValidationSites", shortName="numSites", doc="Number of output validation sites", required=true)
|
||||||
private int numValidationSites;
|
private int numValidationSites;
|
||||||
|
|
||||||
|
|
@ -231,13 +235,14 @@ public class ValidationSiteSelectorWalker extends RodWalker<Integer, Integer> {
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
|
||||||
// do anything required by frequency selector before we select for samples
|
// does this site pass the criteria for the samples we are interested in?
|
||||||
VariantContext subVC;
|
boolean passesSampleSelectionCriteria;
|
||||||
if (samples.isEmpty())
|
if (samples.isEmpty())
|
||||||
subVC = vc;
|
passesSampleSelectionCriteria = true;
|
||||||
else
|
else
|
||||||
subVC = sampleSelector.subsetSiteToSamples(vc);
|
passesSampleSelectionCriteria = sampleSelector.selectSiteInSamples(vc);
|
||||||
frequencyModeSelector.logCurrentSiteData(vc, subVC, IGNORE_GENOTYPES, IGNORE_POLYMORPHIC);
|
|
||||||
|
frequencyModeSelector.logCurrentSiteData(vc,passesSampleSelectionCriteria,IGNORE_GENOTYPES,IGNORE_POLYMORPHIC);
|
||||||
}
|
}
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
@ -263,7 +268,7 @@ public class ValidationSiteSelectorWalker extends RodWalker<Integer, Integer> {
|
||||||
SampleSelector sm;
|
SampleSelector sm;
|
||||||
switch ( sampleMode ) {
|
switch ( sampleMode ) {
|
||||||
case POLY_BASED_ON_GL:
|
case POLY_BASED_ON_GL:
|
||||||
sm = new GLBasedSampleSelector(samples);
|
sm = new GLBasedSampleSelector(samples, Math.log10(1.0-samplePNonref));
|
||||||
break;
|
break;
|
||||||
case POLY_BASED_ON_GT:
|
case POLY_BASED_ON_GT:
|
||||||
sm = new GTBasedSampleSelector(samples);
|
sm = new GTBasedSampleSelector(samples);
|
||||||
|
|
|
||||||
|
|
@ -104,6 +104,10 @@ public class ClippingOp {
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case REVERT_SOFTCLIPPED_BASES:
|
||||||
|
read = revertSoftClippedBases(read);
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
throw new IllegalStateException("Unexpected Clipping operator type " + algorithm);
|
throw new IllegalStateException("Unexpected Clipping operator type " + algorithm);
|
||||||
}
|
}
|
||||||
|
|
@ -111,6 +115,38 @@ public class ClippingOp {
|
||||||
return read;
|
return read;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read) {
|
||||||
|
GATKSAMRecord unclipped;
|
||||||
|
|
||||||
|
// shallow copy of the read bases and quals should be fine here because they are immutable in the original read
|
||||||
|
try {
|
||||||
|
unclipped = (GATKSAMRecord) read.clone();
|
||||||
|
} catch (CloneNotSupportedException e) {
|
||||||
|
throw new ReviewedStingException("Where did the clone go?");
|
||||||
|
}
|
||||||
|
|
||||||
|
Cigar unclippedCigar = new Cigar();
|
||||||
|
int matchesCount = 0;
|
||||||
|
for (CigarElement element : read.getCigar().getCigarElements()) {
|
||||||
|
if (element.getOperator() == CigarOperator.SOFT_CLIP || element.getOperator() == CigarOperator.MATCH_OR_MISMATCH)
|
||||||
|
matchesCount += element.getLength();
|
||||||
|
else if (matchesCount > 0) {
|
||||||
|
unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH));
|
||||||
|
matchesCount = 0;
|
||||||
|
unclippedCigar.add(element);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
unclippedCigar.add(element);
|
||||||
|
}
|
||||||
|
if (matchesCount > 0)
|
||||||
|
unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH));
|
||||||
|
|
||||||
|
unclipped.setCigar(unclippedCigar);
|
||||||
|
unclipped.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), unclippedCigar));
|
||||||
|
|
||||||
|
return unclipped;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a cigar string, get the number of bases hard or soft clipped at the start
|
* Given a cigar string, get the number of bases hard or soft clipped at the start
|
||||||
*/
|
*/
|
||||||
|
|
@ -472,7 +508,7 @@ public class ClippingOp {
|
||||||
|
|
||||||
for (CigarElement cigarElement : oldCigar.getCigarElements()) {
|
for (CigarElement cigarElement : oldCigar.getCigarElements()) {
|
||||||
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP )
|
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP )
|
||||||
oldShift += Math.min(cigarElement.getLength(), newShift - oldShift);
|
oldShift += cigarElement.getLength();
|
||||||
else if (readHasStarted)
|
else if (readHasStarted)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -29,5 +29,10 @@ public enum ClippingRepresentation {
|
||||||
* lossy) operation. Note that this can only be applied to cases where the clipped
|
* lossy) operation. Note that this can only be applied to cases where the clipped
|
||||||
* bases occur at the start or end of a read.
|
* bases occur at the start or end of a read.
|
||||||
*/
|
*/
|
||||||
HARDCLIP_BASES
|
HARDCLIP_BASES,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Turn all soft-clipped bases into matches
|
||||||
|
*/
|
||||||
|
REVERT_SOFTCLIPPED_BASES,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -213,4 +213,9 @@ public class ReadClipper {
|
||||||
}
|
}
|
||||||
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public GATKSAMRecord revertSoftClippedBases() {
|
||||||
|
this.addOp(new ClippingOp(0, 0)); // UNSOFTCLIP_BASES doesn't need coordinates
|
||||||
|
return this.clipRead(ClippingRepresentation.REVERT_SOFTCLIPPED_BASES);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -851,50 +851,6 @@ public class ReadUtils {
|
||||||
return new Pair<Integer, Boolean>(readBases, fallsInsideDeletion);
|
return new Pair<Integer, Boolean>(readBases, fallsInsideDeletion);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static GATKSAMRecord unclipSoftClippedBases(GATKSAMRecord read) {
|
|
||||||
int newReadStart = read.getAlignmentStart();
|
|
||||||
int newReadEnd = read.getAlignmentEnd();
|
|
||||||
List<CigarElement> newCigarElements = new ArrayList<CigarElement>(read.getCigar().getCigarElements().size());
|
|
||||||
int heldOver = -1;
|
|
||||||
boolean sSeen = false;
|
|
||||||
for ( CigarElement e : read.getCigar().getCigarElements() ) {
|
|
||||||
if ( e.getOperator().equals(CigarOperator.S) ) {
|
|
||||||
newCigarElements.add(new CigarElement(e.getLength(),CigarOperator.M));
|
|
||||||
if ( sSeen ) {
|
|
||||||
newReadEnd += e.getLength();
|
|
||||||
sSeen = true;
|
|
||||||
} else {
|
|
||||||
newReadStart -= e.getLength();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
newCigarElements.add(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// merge duplicate operators together
|
|
||||||
int idx = 0;
|
|
||||||
List<CigarElement> finalCigarElements = new ArrayList<CigarElement>(read.getCigar().getCigarElements().size());
|
|
||||||
while ( idx < newCigarElements.size() -1 ) {
|
|
||||||
if ( newCigarElements.get(idx).getOperator().equals(newCigarElements.get(idx+1).getOperator()) ) {
|
|
||||||
int combSize = newCigarElements.get(idx).getLength();
|
|
||||||
int offset = 0;
|
|
||||||
while ( idx + offset < newCigarElements.size()-1 && newCigarElements.get(idx+offset).getOperator().equals(newCigarElements.get(idx+1+offset).getOperator()) ) {
|
|
||||||
combSize += newCigarElements.get(idx+offset+1).getLength();
|
|
||||||
offset++;
|
|
||||||
}
|
|
||||||
finalCigarElements.add(new CigarElement(combSize,newCigarElements.get(idx).getOperator()));
|
|
||||||
idx = idx + offset -1;
|
|
||||||
} else {
|
|
||||||
finalCigarElements.add(newCigarElements.get(idx));
|
|
||||||
}
|
|
||||||
idx++;
|
|
||||||
}
|
|
||||||
|
|
||||||
read.setCigar(new Cigar(finalCigarElements));
|
|
||||||
read.setAlignmentStart(newReadStart);
|
|
||||||
|
|
||||||
return read;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compares two SAMRecords only the basis on alignment start. Note that
|
* Compares two SAMRecords only the basis on alignment start. Note that
|
||||||
* comparisons are performed ONLY on the basis of alignment start; any
|
* comparisons are performed ONLY on the basis of alignment start; any
|
||||||
|
|
|
||||||
|
|
@ -279,9 +279,9 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||||
GATKSAMRecord clippedRead = (new ReadClipper(read)).hardClipLeadingInsertions();
|
GATKSAMRecord clippedRead = (new ReadClipper(read)).hardClipLeadingInsertions();
|
||||||
|
|
||||||
int expectedLength = read.getReadLength() - leadingInsertionLength(read.getCigar());
|
int expectedLength = read.getReadLength() - leadingCigarElementLength(read.getCigar(), CigarOperator.INSERTION);
|
||||||
if (cigarHasElementsDifferentThanInsertionsAndHardClips(read.getCigar()))
|
if (cigarHasElementsDifferentThanInsertionsAndHardClips(read.getCigar()))
|
||||||
expectedLength -= leadingInsertionLength(ReadClipperTestUtils.invertCigar(read.getCigar()));
|
expectedLength -= leadingCigarElementLength(ReadClipperTestUtils.invertCigar(read.getCigar()), CigarOperator.INSERTION);
|
||||||
|
|
||||||
if (! clippedRead.isEmpty()) {
|
if (! clippedRead.isEmpty()) {
|
||||||
Assert.assertEquals(expectedLength, clippedRead.getReadLength(), String.format("%s -> %s", read.getCigarString(), clippedRead.getCigarString())); // check that everything else is still there
|
Assert.assertEquals(expectedLength, clippedRead.getReadLength(), String.format("%s -> %s", read.getCigarString(), clippedRead.getCigarString())); // check that everything else is still there
|
||||||
|
|
@ -293,6 +293,27 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true)
|
||||||
|
public void testRevertSoftClippedBases()
|
||||||
|
{
|
||||||
|
for (Cigar cigar: cigarList) {
|
||||||
|
final int leadingSoftClips = leadingCigarElementLength(cigar, CigarOperator.SOFT_CLIP);
|
||||||
|
final int tailSoftClips = leadingCigarElementLength(ReadClipperTestUtils.invertCigar(cigar), CigarOperator.SOFT_CLIP);
|
||||||
|
|
||||||
|
final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||||
|
final GATKSAMRecord unclipped = (new ReadClipper(read)).revertSoftClippedBases();
|
||||||
|
|
||||||
|
if ( leadingSoftClips > 0 || tailSoftClips > 0) {
|
||||||
|
final int expectedStart = read.getAlignmentStart() - leadingSoftClips;
|
||||||
|
final int expectedEnd = read.getAlignmentEnd() + tailSoftClips;
|
||||||
|
|
||||||
|
Assert.assertEquals(unclipped.getAlignmentStart(), expectedStart);
|
||||||
|
Assert.assertEquals(unclipped.getAlignmentEnd(), expectedEnd);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
Assert.assertEquals(read.getCigarString(), unclipped.getCigarString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) {
|
private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) {
|
||||||
|
|
@ -304,12 +325,12 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean startsWithInsertion(Cigar cigar) {
|
private boolean startsWithInsertion(Cigar cigar) {
|
||||||
return leadingInsertionLength(cigar) > 0;
|
return leadingCigarElementLength(cigar, CigarOperator.INSERTION) > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
private int leadingInsertionLength(Cigar cigar) {
|
private int leadingCigarElementLength(Cigar cigar, CigarOperator operator) {
|
||||||
for (CigarElement cigarElement : cigar.getCigarElements()) {
|
for (CigarElement cigarElement : cigar.getCigarElements()) {
|
||||||
if (cigarElement.getOperator() == CigarOperator.INSERTION)
|
if (cigarElement.getOperator() == operator)
|
||||||
return cigarElement.getLength();
|
return cigarElement.getLength();
|
||||||
if (cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
if (cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
||||||
break;
|
break;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue