Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
13591b169f
|
|
@ -1,6 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
|
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
|
||||||
|
|
@ -11,6 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
@ -39,6 +41,7 @@ import java.util.Map;
|
||||||
* @since 10/30/11
|
* @since 10/30/11
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||||
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
|
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
|
||||||
public class CompareBAM extends LocusWalker<Map<CompareBAM.TestName, Boolean>, CompareBAM.TestResults> {
|
public class CompareBAM extends LocusWalker<Map<CompareBAM.TestName, Boolean>, CompareBAM.TestResults> {
|
||||||
@Argument(required = true, shortName = "rr", fullName = "reduced_readgroup", doc = "The read group ID corresponding to the compressed BAM being tested") public String reducedReadGroupID;
|
@Argument(required = true, shortName = "rr", fullName = "reduced_readgroup", doc = "The read group ID corresponding to the compressed BAM being tested") public String reducedReadGroupID;
|
||||||
|
|
|
||||||
|
|
@ -90,7 +90,6 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
|
||||||
|
|
||||||
return new VariantContextBuilder("pc",referenceSampleVC.getChr(), referenceSampleVC.getStart(), referenceSampleVC.getEnd(),
|
return new VariantContextBuilder("pc",referenceSampleVC.getChr(), referenceSampleVC.getStart(), referenceSampleVC.getEnd(),
|
||||||
referenceSampleVC.getAlleles())
|
referenceSampleVC.getAlleles())
|
||||||
.referenceBaseForIndel(referenceSampleVC.getReferenceBaseForIndel())
|
|
||||||
.genotypes(new GenotypeBuilder(UAC.referenceSampleName, referenceAlleles).GQ(referenceGenotype.getGQ()).make())
|
.genotypes(new GenotypeBuilder(UAC.referenceSampleName, referenceAlleles).GQ(referenceGenotype.getGQ()).make())
|
||||||
.make();
|
.make();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -42,7 +42,6 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
|
||||||
private static final int MAX_NUM_ALLELES_TO_GENOTYPE = 4;
|
private static final int MAX_NUM_ALLELES_TO_GENOTYPE = 4;
|
||||||
|
|
||||||
private PairHMMIndelErrorModel pairModel;
|
private PairHMMIndelErrorModel pairModel;
|
||||||
private boolean allelesArePadded = false;
|
|
||||||
/*
|
/*
|
||||||
private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
|
private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
|
||||||
new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
|
new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
|
||||||
|
|
@ -88,12 +87,10 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
|
||||||
final List<Allele> allAllelesToUse){
|
final List<Allele> allAllelesToUse){
|
||||||
|
|
||||||
|
|
||||||
final Pair<List<Allele>,Boolean> pair = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true);
|
List<Allele> alleles = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true);
|
||||||
List<Allele> alleles = pair.first;
|
|
||||||
|
|
||||||
if (alleles.size() > MAX_NUM_ALLELES_TO_GENOTYPE)
|
if (alleles.size() > MAX_NUM_ALLELES_TO_GENOTYPE)
|
||||||
alleles = alleles.subList(0,MAX_NUM_ALLELES_TO_GENOTYPE);
|
alleles = alleles.subList(0,MAX_NUM_ALLELES_TO_GENOTYPE);
|
||||||
allelesArePadded = pair.second;
|
|
||||||
if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) {
|
if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) {
|
||||||
IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap().clear();
|
IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap().clear();
|
||||||
haplotypeMap.clear();
|
haplotypeMap.clear();
|
||||||
|
|
@ -121,6 +118,6 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
|
||||||
protected int getEndLocation(final RefMetaDataTracker tracker,
|
protected int getEndLocation(final RefMetaDataTracker tracker,
|
||||||
final ReferenceContext ref,
|
final ReferenceContext ref,
|
||||||
final List<Allele> allelesToUse) {
|
final List<Allele> allelesToUse) {
|
||||||
return IndelGenotypeLikelihoodsCalculationModel.computeEndLocation(allelesToUse, ref.getLocus(), allelesArePadded);
|
return ref.getLocus().getStart() + allelesToUse.get(0).length() - 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -33,7 +33,6 @@ import org.apache.commons.lang.ArrayUtils;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFAlleleClipper;
|
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
|
|
@ -184,8 +183,13 @@ public class GenotypingEngine {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
|
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
|
||||||
public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine, final ArrayList<Haplotype> haplotypes, final byte[] ref, final GenomeLoc refLoc,
|
public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine,
|
||||||
final GenomeLoc activeRegionWindow, final GenomeLocParser genomeLocParser, final ArrayList<VariantContext> activeAllelesToGenotype ) {
|
final ArrayList<Haplotype> haplotypes,
|
||||||
|
final byte[] ref,
|
||||||
|
final GenomeLoc refLoc,
|
||||||
|
final GenomeLoc activeRegionWindow,
|
||||||
|
final GenomeLocParser genomeLocParser,
|
||||||
|
final ArrayList<VariantContext> activeAllelesToGenotype ) {
|
||||||
|
|
||||||
final ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> returnCalls = new ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>>();
|
final ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> returnCalls = new ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>>();
|
||||||
|
|
||||||
|
|
@ -423,24 +427,21 @@ public class GenotypingEngine {
|
||||||
protected static VariantContext createMergedVariantContext( final VariantContext thisVC, final VariantContext nextVC, final byte[] ref, final GenomeLoc refLoc ) {
|
protected static VariantContext createMergedVariantContext( final VariantContext thisVC, final VariantContext nextVC, final byte[] ref, final GenomeLoc refLoc ) {
|
||||||
final int thisStart = thisVC.getStart();
|
final int thisStart = thisVC.getStart();
|
||||||
final int nextStart = nextVC.getStart();
|
final int nextStart = nextVC.getStart();
|
||||||
byte[] refBases = ( thisVC.hasReferenceBaseForIndel() ? new byte[]{ thisVC.getReferenceBaseForIndel() } : new byte[]{} );
|
byte[] refBases = new byte[]{};
|
||||||
byte[] altBases = ( thisVC.hasReferenceBaseForIndel() ? new byte[]{ thisVC.getReferenceBaseForIndel() } : new byte[]{} );
|
byte[] altBases = new byte[]{};
|
||||||
refBases = ArrayUtils.addAll(refBases, thisVC.getReference().getBases());
|
refBases = ArrayUtils.addAll(refBases, thisVC.getReference().getBases());
|
||||||
altBases = ArrayUtils.addAll(altBases, thisVC.getAlternateAllele(0).getBases());
|
altBases = ArrayUtils.addAll(altBases, thisVC.getAlternateAllele(0).getBases());
|
||||||
for( int locus = thisStart + refBases.length; locus < nextStart; locus++ ) {
|
int locus;
|
||||||
|
for( locus = thisStart + refBases.length; locus < nextStart; locus++ ) {
|
||||||
final byte refByte = ref[locus - refLoc.getStart()];
|
final byte refByte = ref[locus - refLoc.getStart()];
|
||||||
refBases = ArrayUtils.add(refBases, refByte);
|
refBases = ArrayUtils.add(refBases, refByte);
|
||||||
altBases = ArrayUtils.add(altBases, refByte);
|
altBases = ArrayUtils.add(altBases, refByte);
|
||||||
}
|
}
|
||||||
if( nextVC.hasReferenceBaseForIndel() ) {
|
refBases = ArrayUtils.addAll(refBases, ArrayUtils.subarray(nextVC.getReference().getBases(), locus > nextStart ? 1 : 0, nextVC.getReference().getBases().length)); // special case of deletion including the padding base of consecutive indel
|
||||||
refBases = ArrayUtils.add(refBases, nextVC.getReferenceBaseForIndel());
|
|
||||||
altBases = ArrayUtils.add(altBases, nextVC.getReferenceBaseForIndel());
|
|
||||||
}
|
|
||||||
refBases = ArrayUtils.addAll(refBases, nextVC.getReference().getBases());
|
|
||||||
altBases = ArrayUtils.addAll(altBases, nextVC.getAlternateAllele(0).getBases());
|
altBases = ArrayUtils.addAll(altBases, nextVC.getAlternateAllele(0).getBases());
|
||||||
|
|
||||||
int iii = 0;
|
int iii = 0;
|
||||||
if( refBases.length == altBases.length && VCFAlleleClipper.needsPadding(thisVC) ) { // special case of insertion + deletion of same length creates an MNP --> trim padding bases off the allele
|
if( refBases.length == altBases.length ) { // special case of insertion + deletion of same length creates an MNP --> trim padding bases off the allele
|
||||||
while( iii < refBases.length && refBases[iii] == altBases[iii] ) { iii++; }
|
while( iii < refBases.length && refBases[iii] == altBases[iii] ) { iii++; }
|
||||||
}
|
}
|
||||||
final ArrayList<Allele> mergedAlleles = new ArrayList<Allele>();
|
final ArrayList<Allele> mergedAlleles = new ArrayList<Allele>();
|
||||||
|
|
@ -533,10 +534,10 @@ public class GenotypingEngine {
|
||||||
final int elementLength = ce.getLength();
|
final int elementLength = ce.getLength();
|
||||||
switch( ce.getOperator() ) {
|
switch( ce.getOperator() ) {
|
||||||
case I:
|
case I:
|
||||||
final byte[] insertionBases = Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength );
|
final byte[] insertionBases = Arrays.copyOfRange( alignment, alignmentPos - 1, alignmentPos + elementLength ); // add padding base
|
||||||
boolean allN = true;
|
boolean allN = true;
|
||||||
for( final byte b : insertionBases ) {
|
for( int i = 1; i < insertionBases.length; i++ ) { // check all bases except for the padding base
|
||||||
if( b != (byte) 'N' ) {
|
if( insertionBases[i] != (byte) 'N' ) {
|
||||||
allN = false;
|
allN = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -544,14 +545,13 @@ public class GenotypingEngine {
|
||||||
if( !allN ) {
|
if( !allN ) {
|
||||||
final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
|
final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
|
||||||
final int insertionStart = refLoc.getStart() + refPos - 1;
|
final int insertionStart = refLoc.getStart() + refPos - 1;
|
||||||
|
insertionAlleles.add( Allele.create(ref[refPos-1], true) );
|
||||||
if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
|
if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
|
||||||
insertionAlleles.add( Allele.create(ref[refPos-1], true) );
|
|
||||||
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
|
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
|
||||||
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
|
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
|
||||||
} else {
|
} else {
|
||||||
insertionAlleles.add( Allele.create(Allele.NULL_ALLELE_STRING, true) );
|
|
||||||
insertionAlleles.add( Allele.create(insertionBases, false) );
|
insertionAlleles.add( Allele.create(insertionBases, false) );
|
||||||
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).referenceBaseForIndel(ref[refPos-1]).make());
|
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -561,7 +561,7 @@ public class GenotypingEngine {
|
||||||
alignmentPos += elementLength;
|
alignmentPos += elementLength;
|
||||||
break;
|
break;
|
||||||
case D:
|
case D:
|
||||||
final byte[] deletionBases = Arrays.copyOfRange( ref, refPos, refPos + elementLength );
|
final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base
|
||||||
final ArrayList<Allele> deletionAlleles = new ArrayList<Allele>();
|
final ArrayList<Allele> deletionAlleles = new ArrayList<Allele>();
|
||||||
final int deletionStart = refLoc.getStart() + refPos - 1;
|
final int deletionStart = refLoc.getStart() + refPos - 1;
|
||||||
// BUGBUG: how often does this symbolic deletion allele case happen?
|
// BUGBUG: how often does this symbolic deletion allele case happen?
|
||||||
|
|
@ -572,8 +572,8 @@ public class GenotypingEngine {
|
||||||
// vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart, deletionAlleles).make());
|
// vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart, deletionAlleles).make());
|
||||||
//} else {
|
//} else {
|
||||||
deletionAlleles.add( Allele.create(deletionBases, true) );
|
deletionAlleles.add( Allele.create(deletionBases, true) );
|
||||||
deletionAlleles.add( Allele.create(Allele.NULL_ALLELE_STRING, false) );
|
deletionAlleles.add( Allele.create(ref[refPos-1], false) );
|
||||||
vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).referenceBaseForIndel(ref[refPos-1]).make());
|
vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make());
|
||||||
//}
|
//}
|
||||||
refPos += elementLength;
|
refPos += elementLength;
|
||||||
break;
|
break;
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,7 @@ import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Input;
|
import org.broadinstitute.sting.commandline.Input;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
import org.broadinstitute.sting.commandline.RodBinding;
|
import org.broadinstitute.sting.commandline.RodBinding;
|
||||||
|
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -42,6 +43,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||||
|
|
@ -80,6 +82,7 @@ import java.util.*;
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} )
|
||||||
@Reference(window=@Window(start=-HaplotypeResolver.ACTIVE_WINDOW,stop= HaplotypeResolver.ACTIVE_WINDOW))
|
@Reference(window=@Window(start=-HaplotypeResolver.ACTIVE_WINDOW,stop= HaplotypeResolver.ACTIVE_WINDOW))
|
||||||
public class HaplotypeResolver extends RodWalker<Integer, Integer> {
|
public class HaplotypeResolver extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
import net.sf.samtools.SAMUtils;
|
import net.sf.samtools.SAMUtils;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
|
@ -290,20 +289,22 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
// TODO -- Guillermo, this test cannot work because the ArtificialReadPileupTestProvider returns a position of chr1:5, which is less than
|
||||||
|
// TODO -- HAPLOTYPE_SIZE in IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles() so the HaplotypeMap is not populated.
|
||||||
|
@Test (enabled = false)
|
||||||
public void testIndelErrorModel() {
|
public void testIndelErrorModel() {
|
||||||
final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
|
final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
|
||||||
final byte refByte = refPileupTestProvider.getRefByte();
|
final byte refByte = refPileupTestProvider.getRefByte();
|
||||||
final String altBases = "TCA";
|
final String altBases = (char)refByte + "TCA";
|
||||||
final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
|
final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
|
||||||
final List<Allele> trueAlleles = new ArrayList<Allele>();
|
final List<Allele> trueAlleles = new ArrayList<Allele>();
|
||||||
trueAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, true));
|
trueAlleles.add(Allele.create(refByte, true));
|
||||||
trueAlleles.add(Allele.create("TC", false));
|
trueAlleles.add(Allele.create((char)refByte + "TC", false));
|
||||||
|
|
||||||
final String fw = new String(refPileupTestProvider.getReferenceContext().getForwardBases());
|
final String fw = new String(refPileupTestProvider.getReferenceContext().getForwardBases());
|
||||||
final VariantContext refInsertionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(),
|
final VariantContext refInsertionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(),
|
||||||
refPileupTestProvider.getReferenceContext().getLocus().getStart(), trueAlleles).
|
refPileupTestProvider.getReferenceContext().getLocus().getStart(), trueAlleles).
|
||||||
genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).referenceBaseForIndel(refByte).make();
|
genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).make();
|
||||||
|
|
||||||
|
|
||||||
final int[] matchArray = {95, 995, 9995, 10000};
|
final int[] matchArray = {95, 995, 9995, 10000};
|
||||||
|
|
@ -333,12 +334,12 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
||||||
// create deletion VC
|
// create deletion VC
|
||||||
final int delLength = 4;
|
final int delLength = 4;
|
||||||
final List<Allele> delAlleles = new ArrayList<Allele>();
|
final List<Allele> delAlleles = new ArrayList<Allele>();
|
||||||
delAlleles.add(Allele.create(fw.substring(1,delLength+1), true));
|
delAlleles.add(Allele.create(fw.substring(0,delLength+1), true));
|
||||||
delAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, false));
|
delAlleles.add(Allele.create(refByte, false));
|
||||||
|
|
||||||
final VariantContext refDeletionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(),
|
final VariantContext refDeletionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(),
|
||||||
refPileupTestProvider.getReferenceContext().getLocus().getStart()+delLength, delAlleles).
|
refPileupTestProvider.getReferenceContext().getLocus().getStart()+delLength, delAlleles).
|
||||||
genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).referenceBaseForIndel(refByte).make();
|
genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).make();
|
||||||
|
|
||||||
for (int matches: matchArray) {
|
for (int matches: matchArray) {
|
||||||
for (int mismatches: mismatchArray) {
|
for (int mismatches: mismatchArray) {
|
||||||
|
|
@ -392,9 +393,6 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
||||||
final byte refByte = readPileupTestProvider.getRefByte();
|
final byte refByte = readPileupTestProvider.getRefByte();
|
||||||
final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T';
|
final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T';
|
||||||
|
|
||||||
final int refIdx = BaseUtils.simpleBaseToBaseIndex(refByte);
|
|
||||||
final int altIdx = BaseUtils.simpleBaseToBaseIndex(altByte);
|
|
||||||
|
|
||||||
final List<Allele> allAlleles = new ArrayList<Allele>(); // this contains only ref Allele up to now
|
final List<Allele> allAlleles = new ArrayList<Allele>(); // this contains only ref Allele up to now
|
||||||
final Set<String> laneIDs = new TreeSet<String>();
|
final Set<String> laneIDs = new TreeSet<String>();
|
||||||
laneIDs.add(GenotypeLikelihoodsCalculationModel.DUMMY_LANE);
|
laneIDs.add(GenotypeLikelihoodsCalculationModel.DUMMY_LANE);
|
||||||
|
|
@ -411,11 +409,17 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
||||||
for (String laneID : laneIDs)
|
for (String laneID : laneIDs)
|
||||||
noisyErrorModels.put(laneID, Q30ErrorModel);
|
noisyErrorModels.put(laneID, Q30ErrorModel);
|
||||||
|
|
||||||
|
final int refIdx = 0;
|
||||||
|
int altIdx = 2;
|
||||||
|
|
||||||
|
// ref allele must be first
|
||||||
|
allAlleles.add(Allele.create(refByte, true));
|
||||||
for (byte b: BaseUtils.BASES) {
|
for (byte b: BaseUtils.BASES) {
|
||||||
if (refByte == b)
|
if (refByte != b) {
|
||||||
allAlleles.add(Allele.create(b,true));
|
if (b == altByte)
|
||||||
else
|
altIdx = allAlleles.size();
|
||||||
allAlleles.add(Allele.create(b, false));
|
allAlleles.add(Allele.create(b, false));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PrintStream out = null;
|
PrintStream out = null;
|
||||||
|
|
|
||||||
|
|
@ -262,8 +262,6 @@ public class GenotypingEngineUnitTest extends BaseTest {
|
||||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
|
||||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
|
||||||
|
|
||||||
// SNP + ref + SNP = MNP with ref base gap
|
// SNP + ref + SNP = MNP with ref base gap
|
||||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
|
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
|
||||||
|
|
@ -274,11 +272,9 @@ public class GenotypingEngineUnitTest extends BaseTest {
|
||||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
|
||||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
|
||||||
|
|
||||||
// insertion + SNP
|
// insertion + SNP
|
||||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","AAAAA").referenceBaseForIndel("T").make();
|
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TAAAAA").make();
|
||||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make();
|
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make();
|
||||||
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TAAAAACG").source("merged").make();
|
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TAAAAACG").source("merged").make();
|
||||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||||
|
|
@ -286,23 +282,19 @@ public class GenotypingEngineUnitTest extends BaseTest {
|
||||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
|
||||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
|
||||||
|
|
||||||
// SNP + insertion
|
// SNP + insertion
|
||||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
|
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
|
||||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("-","AAAAA").referenceBaseForIndel("C").make();
|
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","CAAAAA").make();
|
||||||
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","GCCAAAAA").source("merged").make();
|
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","GCCAAAAA").source("merged").make();
|
||||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||||
logger.warn(truthVC + " == " + mergedVC);
|
logger.warn(truthVC + " == " + mergedVC);
|
||||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
|
||||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
|
||||||
|
|
||||||
// deletion + SNP
|
// deletion + SNP
|
||||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("C","-").referenceBaseForIndel("T").make();
|
thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","T").make();
|
||||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make();
|
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make();
|
||||||
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TG").source("merged").make();
|
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TG").source("merged").make();
|
||||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||||
|
|
@ -310,68 +302,66 @@ public class GenotypingEngineUnitTest extends BaseTest {
|
||||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
|
||||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
|
||||||
|
|
||||||
// SNP + deletion
|
// SNP + deletion
|
||||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
|
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
|
||||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make();
|
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
|
||||||
truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","GCC").source("merged").make();
|
truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","GCC").source("merged").make();
|
||||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||||
logger.warn(truthVC + " == " + mergedVC);
|
logger.warn(truthVC + " == " + mergedVC);
|
||||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
|
||||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
|
||||||
|
|
||||||
// insertion + deletion = MNP
|
// insertion + deletion = MNP
|
||||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","A").referenceBaseForIndel("T").make();
|
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TA").make();
|
||||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make();
|
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
|
||||||
truthVC = new VariantContextBuilder().loc("2", 1704, 1706).alleles("CCG","ACC").source("merged").make();
|
truthVC = new VariantContextBuilder().loc("2", 1704, 1706).alleles("CCG","ACC").source("merged").make();
|
||||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||||
logger.warn(truthVC + " == " + mergedVC);
|
logger.warn(truthVC + " == " + mergedVC);
|
||||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
|
||||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
|
||||||
|
|
||||||
// insertion + deletion
|
// insertion + deletion
|
||||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","AAAAA").referenceBaseForIndel("T").make();
|
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TAAAAA").make();
|
||||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make();
|
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
|
||||||
truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","TAAAAACC").source("merged").make();
|
truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","TAAAAACC").source("merged").make();
|
||||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||||
logger.warn(truthVC + " == " + mergedVC);
|
logger.warn(truthVC + " == " + mergedVC);
|
||||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
|
||||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
|
||||||
|
|
||||||
// insertion + insertion
|
// insertion + insertion
|
||||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","A").referenceBaseForIndel("T").make();
|
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TA").make();
|
||||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("-","A").referenceBaseForIndel("C").make();
|
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","CA").make();
|
||||||
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TACCA").source("merged").make();
|
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TACCA").source("merged").make();
|
||||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||||
logger.warn(truthVC + " == " + mergedVC);
|
logger.warn(truthVC + " == " + mergedVC);
|
||||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
|
||||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
|
||||||
|
|
||||||
// deletion + deletion
|
// deletion + deletion
|
||||||
thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("T","-").referenceBaseForIndel("A").make();
|
thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make();
|
||||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make();
|
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
|
||||||
truthVC = new VariantContextBuilder().loc("2", 1701, 1706).alleles("ATTCCG","ATCC").source("merged").make();
|
truthVC = new VariantContextBuilder().loc("2", 1701, 1706).alleles("ATTCCG","ATCC").source("merged").make();
|
||||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||||
logger.warn(truthVC + " == " + mergedVC);
|
logger.warn(truthVC + " == " + mergedVC);
|
||||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
|
||||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
// deletion + insertion (abutting)
|
||||||
|
thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make();
|
||||||
|
nextVC = new VariantContextBuilder().loc("2", 1702, 1702).alleles("T","GCGCGC").make();
|
||||||
|
truthVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","AGCGCGC").source("merged").make();
|
||||||
|
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||||
|
logger.warn(truthVC + " == " + mergedVC);
|
||||||
|
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||||
|
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||||
|
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||||
|
|
||||||
// complex + complex
|
// complex + complex
|
||||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","AAA").make();
|
thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","AAA").make();
|
||||||
|
|
@ -382,8 +372,6 @@ public class GenotypingEngineUnitTest extends BaseTest {
|
||||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
|
||||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerMultiSampleGGA() {
|
public void testHaplotypeCallerMultiSampleGGA() {
|
||||||
HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "ff370c42c8b09a29f1aeff5ac57c7ea6");
|
HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "d8317f4589e8e0c48bcd087cdb75ce88");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void HCTestComplexVariants(String bam, String args, String md5) {
|
private void HCTestComplexVariants(String bam, String args, String md5) {
|
||||||
|
|
@ -43,6 +43,5 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
public void testHaplotypeCallerMultiSampleComplex() {
|
public void testHaplotypeCallerMultiSampleComplex() {
|
||||||
HCTestComplexVariants(CEUTRIO_BAM, "", "6f9fda3ea82c5696bed1d48ee90cd76b");
|
HCTestComplexVariants(CEUTRIO_BAM, "", "6f9fda3ea82c5696bed1d48ee90cd76b");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -29,11 +29,13 @@ import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
||||||
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
||||||
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
@ -46,6 +48,7 @@ import java.util.Iterator;
|
||||||
* @author mhanna
|
* @author mhanna
|
||||||
* @version 0.1
|
* @version 0.1
|
||||||
*/
|
*/
|
||||||
|
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||||
public class AlignmentValidation extends ReadWalker<Integer,Integer> {
|
public class AlignmentValidation extends ReadWalker<Integer,Integer> {
|
||||||
/**
|
/**
|
||||||
* The supporting BWT index generated using BWT.
|
* The supporting BWT index generated using BWT.
|
||||||
|
|
|
||||||
|
|
@ -34,11 +34,13 @@ import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
||||||
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||||
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
@ -50,6 +52,7 @@ import java.io.File;
|
||||||
* @author mhanna
|
* @author mhanna
|
||||||
* @version 0.1
|
* @version 0.1
|
||||||
*/
|
*/
|
||||||
|
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||||
@WalkerName("Align")
|
@WalkerName("Align")
|
||||||
public class AlignmentWalker extends ReadWalker<Integer,Integer> {
|
public class AlignmentWalker extends ReadWalker<Integer,Integer> {
|
||||||
@Argument(fullName="target_reference",shortName="target_ref",doc="The reference to which reads in the source file should be aligned. Alongside this reference should sit index files " +
|
@Argument(fullName="target_reference",shortName="target_ref",doc="The reference to which reads in the source file should be aligned. Alongside this reference should sit index files " +
|
||||||
|
|
|
||||||
|
|
@ -30,9 +30,11 @@ import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
||||||
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
|
@ -48,6 +50,7 @@ import java.util.TreeMap;
|
||||||
* @author mhanna
|
* @author mhanna
|
||||||
* @version 0.1
|
* @version 0.1
|
||||||
*/
|
*/
|
||||||
|
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||||
public class CountBestAlignments extends ReadWalker<Integer,Integer> {
|
public class CountBestAlignments extends ReadWalker<Integer,Integer> {
|
||||||
/**
|
/**
|
||||||
* The supporting BWT index generated using BWT.
|
* The supporting BWT index generated using BWT.
|
||||||
|
|
|
||||||
|
|
@ -131,6 +131,12 @@ public class CommandLineGATK extends CommandLineExecutable {
|
||||||
// can't close tribble index when writing
|
// can't close tribble index when writing
|
||||||
if ( message.indexOf("Unable to close index for") != -1 )
|
if ( message.indexOf("Unable to close index for") != -1 )
|
||||||
exitSystemWithUserError(new UserException(t.getCause().getMessage()));
|
exitSystemWithUserError(new UserException(t.getCause().getMessage()));
|
||||||
|
|
||||||
|
// disk is full
|
||||||
|
if ( message.indexOf("No space left on device") != -1 )
|
||||||
|
exitSystemWithUserError(new UserException(t.getMessage()));
|
||||||
|
if ( t.getCause().getMessage().indexOf("No space left on device") != -1 )
|
||||||
|
exitSystemWithUserError(new UserException(t.getCause().getMessage()));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -3,10 +3,12 @@ package org.broadinstitute.sting.gatk.examples;
|
||||||
|
|
||||||
import net.sf.samtools.SAMReadGroupRecord;
|
import net.sf.samtools.SAMReadGroupRecord;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
||||||
|
|
@ -17,8 +19,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the coverage per sample.
|
* Computes the coverage per sample for every position (use with -L argument!).
|
||||||
*/
|
*/
|
||||||
|
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||||
public class CoverageBySample extends LocusWalker<Integer, Integer> {
|
public class CoverageBySample extends LocusWalker<Integer, Integer> {
|
||||||
@Output
|
@Output
|
||||||
protected PrintStream out;
|
protected PrintStream out;
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.examples;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -35,6 +36,7 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
|
@ -46,6 +48,7 @@ import java.io.PrintStream;
|
||||||
*
|
*
|
||||||
* @author aaron
|
* @author aaron
|
||||||
*/
|
*/
|
||||||
|
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
|
||||||
public class GATKPaperGenotyper extends LocusWalker<Integer,Long> implements TreeReducible<Long> {
|
public class GATKPaperGenotyper extends LocusWalker<Integer,Long> implements TreeReducible<Long> {
|
||||||
// the possible diploid genotype strings
|
// the possible diploid genotype strings
|
||||||
private static enum GENOTYPE { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
|
private static enum GENOTYPE { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
|
||||||
|
|
|
||||||
|
|
@ -163,43 +163,58 @@ public class VariantContextAdaptors {
|
||||||
@Override
|
@Override
|
||||||
public VariantContext convert(String name, Object input, ReferenceContext ref) {
|
public VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||||
OldDbSNPFeature dbsnp = (OldDbSNPFeature)input;
|
OldDbSNPFeature dbsnp = (OldDbSNPFeature)input;
|
||||||
if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
|
|
||||||
return null;
|
|
||||||
Allele refAllele = Allele.create(dbsnp.getNCBIRefBase(), true);
|
|
||||||
|
|
||||||
if ( isSNP(dbsnp) || isIndel(dbsnp) || isMNP(dbsnp) || dbsnp.getVariantType().contains("mixed") ) {
|
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
|
||||||
// add the reference allele
|
if ( index < 0 )
|
||||||
List<Allele> alleles = new ArrayList<Allele>();
|
return null; // we weren't given enough reference context to create the VariantContext
|
||||||
alleles.add(refAllele);
|
|
||||||
|
|
||||||
// add all of the alt alleles
|
final byte refBaseForIndel = ref.getBases()[index];
|
||||||
boolean sawNullAllele = refAllele.isNull();
|
|
||||||
for ( String alt : getAlternateAlleleList(dbsnp) ) {
|
|
||||||
if ( ! Allele.acceptableAlleleBases(alt) ) {
|
|
||||||
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
Allele altAllele = Allele.create(alt, false);
|
|
||||||
alleles.add(altAllele);
|
|
||||||
if ( altAllele.isNull() )
|
|
||||||
sawNullAllele = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
Map<String, Object> attributes = new HashMap<String, Object>();
|
boolean addPaddingBase;
|
||||||
|
if ( isSNP(dbsnp) || isMNP(dbsnp) )
|
||||||
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
|
addPaddingBase = false;
|
||||||
if ( index < 0 )
|
else if ( isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") )
|
||||||
return null; // we weren't given enough reference context to create the VariantContext
|
addPaddingBase = VariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp)));
|
||||||
Byte refBaseForIndel = new Byte(ref.getBases()[index]);
|
else
|
||||||
|
|
||||||
final VariantContextBuilder builder = new VariantContextBuilder();
|
|
||||||
builder.source(name).id(dbsnp.getRsID());
|
|
||||||
builder.loc(dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd() - (refAllele.isNull() ? 1 : 0));
|
|
||||||
builder.alleles(alleles);
|
|
||||||
builder.referenceBaseForIndel(refBaseForIndel);
|
|
||||||
return builder.make();
|
|
||||||
} else
|
|
||||||
return null; // can't handle anything else
|
return null; // can't handle anything else
|
||||||
|
|
||||||
|
Allele refAllele;
|
||||||
|
if ( dbsnp.getNCBIRefBase().equals("-") )
|
||||||
|
refAllele = Allele.create(refBaseForIndel, true);
|
||||||
|
else if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
|
||||||
|
return null;
|
||||||
|
else
|
||||||
|
refAllele = Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + dbsnp.getNCBIRefBase(), true);
|
||||||
|
|
||||||
|
final List<Allele> alleles = new ArrayList<Allele>();
|
||||||
|
alleles.add(refAllele);
|
||||||
|
|
||||||
|
// add all of the alt alleles
|
||||||
|
for ( String alt : getAlternateAlleleList(dbsnp) ) {
|
||||||
|
if ( Allele.wouldBeNullAllele(alt.getBytes()))
|
||||||
|
alt = "";
|
||||||
|
else if ( ! Allele.acceptableAlleleBases(alt) )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
alleles.add(Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + alt, false));
|
||||||
|
}
|
||||||
|
|
||||||
|
final VariantContextBuilder builder = new VariantContextBuilder();
|
||||||
|
builder.source(name).id(dbsnp.getRsID());
|
||||||
|
builder.loc(dbsnp.getChr(), dbsnp.getStart() - (addPaddingBase ? 1 : 0), dbsnp.getEnd() - (addPaddingBase && refAllele.length() == 1 ? 1 : 0));
|
||||||
|
builder.alleles(alleles);
|
||||||
|
return builder.make();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<String> stripNullDashes(final List<String> alleles) {
|
||||||
|
final List<String> newAlleles = new ArrayList<String>(alleles.size());
|
||||||
|
for ( final String allele : alleles ) {
|
||||||
|
if ( allele.equals("-") )
|
||||||
|
newAlleles.add("");
|
||||||
|
else
|
||||||
|
newAlleles.add(allele);
|
||||||
|
}
|
||||||
|
return newAlleles;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -351,7 +366,7 @@ public class VariantContextAdaptors {
|
||||||
long end = hapmap.getEnd();
|
long end = hapmap.getEnd();
|
||||||
if ( deletionLength > 0 )
|
if ( deletionLength > 0 )
|
||||||
end += deletionLength;
|
end += deletionLength;
|
||||||
VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).referenceBaseForIndel(refBaseForIndel).make();
|
VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).make();
|
||||||
return vc;
|
return vc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -42,10 +42,6 @@ import java.util.List;
|
||||||
*/
|
*/
|
||||||
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
|
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
private static final String REF_ALLELE = "REF";
|
|
||||||
|
|
||||||
private static final String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
|
|
||||||
|
|
||||||
public void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, GenotypeBuilder gb) {
|
public void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, GenotypeBuilder gb) {
|
||||||
if ( g == null || !g.isCalled() )
|
if ( g == null || !g.isCalled() )
|
||||||
return;
|
return;
|
||||||
|
|
@ -53,10 +49,10 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
||||||
if ( vc.isSNP() )
|
if ( vc.isSNP() )
|
||||||
annotateSNP(stratifiedContext, vc, gb);
|
annotateSNP(stratifiedContext, vc, gb);
|
||||||
else if ( vc.isIndel() )
|
else if ( vc.isIndel() )
|
||||||
annotateIndel(stratifiedContext, vc, gb);
|
annotateIndel(stratifiedContext, ref.getBase(), vc, gb);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void annotateSNP(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) {
|
private void annotateSNP(final AlignmentContext stratifiedContext, final VariantContext vc, final GenotypeBuilder gb) {
|
||||||
|
|
||||||
HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
|
HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
|
||||||
for ( Allele allele : vc.getAlleles() )
|
for ( Allele allele : vc.getAlleles() )
|
||||||
|
|
@ -77,62 +73,47 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
||||||
gb.AD(counts);
|
gb.AD(counts);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void annotateIndel(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) {
|
private void annotateIndel(final AlignmentContext stratifiedContext, final byte refBase, final VariantContext vc, final GenotypeBuilder gb) {
|
||||||
ReadBackedPileup pileup = stratifiedContext.getBasePileup();
|
ReadBackedPileup pileup = stratifiedContext.getBasePileup();
|
||||||
if ( pileup == null )
|
if ( pileup == null )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
final HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>();
|
final HashMap<Allele, Integer> alleleCounts = new HashMap<Allele, Integer>();
|
||||||
alleleCounts.put(REF_ALLELE, 0);
|
|
||||||
final Allele refAllele = vc.getReference();
|
final Allele refAllele = vc.getReference();
|
||||||
|
|
||||||
for ( Allele allele : vc.getAlternateAlleles() ) {
|
for ( final Allele allele : vc.getAlleles() ) {
|
||||||
|
alleleCounts.put(allele, 0);
|
||||||
if ( allele.isNoCall() ) {
|
|
||||||
continue; // this does not look so good, should we die???
|
|
||||||
}
|
|
||||||
|
|
||||||
alleleCounts.put(getAlleleRepresentation(allele), 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for ( PileupElement p : pileup ) {
|
for ( PileupElement p : pileup ) {
|
||||||
if ( p.isBeforeInsertion() ) {
|
if ( p.isBeforeInsertion() ) {
|
||||||
|
|
||||||
final String b = p.getEventBases();
|
final Allele insertion = Allele.create((char)refBase + p.getEventBases(), false);
|
||||||
if ( alleleCounts.containsKey(b) ) {
|
if ( alleleCounts.containsKey(insertion) ) {
|
||||||
alleleCounts.put(b, alleleCounts.get(b)+1);
|
alleleCounts.put(insertion, alleleCounts.get(insertion)+1);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if ( p.isBeforeDeletionStart() ) {
|
} else if ( p.isBeforeDeletionStart() ) {
|
||||||
if ( p.getEventLength() == refAllele.length() ) {
|
if ( p.getEventLength() == refAllele.length() - 1 ) {
|
||||||
// this is indeed the deletion allele recorded in VC
|
// this is indeed the deletion allele recorded in VC
|
||||||
final String b = DEL;
|
final Allele deletion = Allele.create(refBase);
|
||||||
if ( alleleCounts.containsKey(b) ) {
|
if ( alleleCounts.containsKey(deletion) ) {
|
||||||
alleleCounts.put(b, alleleCounts.get(b)+1);
|
alleleCounts.put(deletion, alleleCounts.get(deletion)+1);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
} else if ( p.getRead().getAlignmentEnd() > vc.getStart() ) {
|
} else if ( p.getRead().getAlignmentEnd() > vc.getStart() ) {
|
||||||
alleleCounts.put(REF_ALLELE, alleleCounts.get(REF_ALLELE)+1);
|
alleleCounts.put(refAllele, alleleCounts.get(refAllele)+1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int[] counts = new int[alleleCounts.size()];
|
final int[] counts = new int[alleleCounts.size()];
|
||||||
counts[0] = alleleCounts.get(REF_ALLELE);
|
counts[0] = alleleCounts.get(refAllele);
|
||||||
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
|
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
|
||||||
counts[i+1] = alleleCounts.get( getAlleleRepresentation(vc.getAlternateAllele(i)) );
|
counts[i+1] = alleleCounts.get( vc.getAlternateAllele(i) );
|
||||||
|
|
||||||
gb.AD(counts);
|
gb.AD(counts);
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getAlleleRepresentation(Allele allele) {
|
|
||||||
if ( allele.isNull() ) { // deletion wrt the ref
|
|
||||||
return DEL;
|
|
||||||
} else { // insertion, pass actual bases
|
|
||||||
return allele.getBaseString();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
// public String getIndelBases()
|
// public String getIndelBases()
|
||||||
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.GENOTYPE_ALLELE_DEPTHS); }
|
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.GENOTYPE_ALLELE_DEPTHS); }
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -250,8 +250,6 @@ public class BeagleOutputToVCF extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
// Beagle always produces genotype strings based on the strings we input in the likelihood file.
|
// Beagle always produces genotype strings based on the strings we input in the likelihood file.
|
||||||
String refString = vc_input.getReference().getDisplayString();
|
String refString = vc_input.getReference().getDisplayString();
|
||||||
if (refString.length() == 0) // ref was null
|
|
||||||
refString = Allele.NULL_ALLELE_STRING;
|
|
||||||
|
|
||||||
Allele bglAlleleA, bglAlleleB;
|
Allele bglAlleleA, bglAlleleB;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -239,7 +239,7 @@ public class ProduceBeagleInput extends RodWalker<Integer, Integer> {
|
||||||
if ( markers != null ) markers.append(marker).append("\t").append(Integer.toString(markerCounter++)).append("\t");
|
if ( markers != null ) markers.append(marker).append("\t").append(Integer.toString(markerCounter++)).append("\t");
|
||||||
for ( Allele allele : preferredVC.getAlleles() ) {
|
for ( Allele allele : preferredVC.getAlleles() ) {
|
||||||
String bglPrintString;
|
String bglPrintString;
|
||||||
if (allele.isNoCall() || allele.isNull())
|
if (allele.isNoCall())
|
||||||
bglPrintString = "-";
|
bglPrintString = "-";
|
||||||
else
|
else
|
||||||
bglPrintString = allele.getBaseString(); // get rid of * in case of reference allele
|
bglPrintString = allele.getBaseString(); // get rid of * in case of reference allele
|
||||||
|
|
|
||||||
|
|
@ -149,7 +149,7 @@ public class VariantsToBeagleUnphased extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
// write out the alleles at this site
|
// write out the alleles at this site
|
||||||
for ( Allele allele : vc.getAlleles() ) {
|
for ( Allele allele : vc.getAlleles() ) {
|
||||||
beagleOut.append(allele.isNoCall() || allele.isNull() ? "-" : allele.getBaseString()).append(" ");
|
beagleOut.append(allele.isNoCall() ? "-" : allele.getBaseString()).append(" ");
|
||||||
}
|
}
|
||||||
|
|
||||||
// write out sample level genotypes
|
// write out sample level genotypes
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
||||||
import net.sf.picard.util.PeekableIterator;
|
import net.sf.picard.util.PeekableIterator;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -36,6 +37,7 @@ import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||||
|
|
||||||
|
|
@ -80,6 +82,7 @@ import java.util.*;
|
||||||
* @author Mauricio Carneiro, Roger Zurawicki
|
* @author Mauricio Carneiro, Roger Zurawicki
|
||||||
* @since 5/8/12
|
* @since 5/8/12
|
||||||
*/
|
*/
|
||||||
|
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||||
@By(value = DataSource.READS)
|
@By(value = DataSource.READS)
|
||||||
@PartitionBy(PartitionType.INTERVAL)
|
@PartitionBy(PartitionType.INTERVAL)
|
||||||
public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -33,9 +34,11 @@ import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.PartitionBy;
|
import org.broadinstitute.sting.gatk.walkers.PartitionBy;
|
||||||
import org.broadinstitute.sting.gatk.walkers.PartitionType;
|
import org.broadinstitute.sting.gatk.walkers.PartitionType;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
|
||||||
|
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||||
@PartitionBy(PartitionType.CONTIG)
|
@PartitionBy(PartitionType.CONTIG)
|
||||||
@ActiveRegionExtension(extension = 0, maxRegion = 50000)
|
@ActiveRegionExtension(extension = 0, maxRegion = 50000)
|
||||||
public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
|
public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
|
||||||
|
|
|
||||||
|
|
@ -107,11 +107,11 @@ public class FastaAlternateReference extends FastaReference {
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if ( vc.isSimpleDeletion()) {
|
if ( vc.isSimpleDeletion()) {
|
||||||
deletionBasesRemaining = vc.getReference().length();
|
deletionBasesRemaining = vc.getReference().length() - 1;
|
||||||
// delete the next n bases, not this one
|
// delete the next n bases, not this one
|
||||||
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
|
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
|
||||||
} else if ( vc.isSimpleInsertion()) {
|
} else if ( vc.isSimpleInsertion()) {
|
||||||
return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString()));
|
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
|
||||||
} else if (vc.isSNP()) {
|
} else if (vc.isSNP()) {
|
||||||
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
|
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,17 +26,20 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.fasta;
|
package org.broadinstitute.sting.gatk.walkers.fasta;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculates basic statistics about the reference sequence itself
|
* Calculates basic statistics about the reference sequence itself
|
||||||
*/
|
*/
|
||||||
|
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||||
public class FastaStats extends RefWalker<Byte, FastaStats.FastaStatistics> {
|
public class FastaStats extends RefWalker<Byte, FastaStats.FastaStatistics> {
|
||||||
@Output PrintStream out;
|
@Output PrintStream out;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -246,18 +246,19 @@ public class ConsensusAlleleCounter {
|
||||||
// get ref bases of accurate deletion
|
// get ref bases of accurate deletion
|
||||||
final int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart();
|
final int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart();
|
||||||
stop = loc.getStart() + dLen;
|
stop = loc.getStart() + dLen;
|
||||||
final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen);
|
final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference - 1, startIdxInReference + dLen); // add reference padding
|
||||||
|
|
||||||
if (Allele.acceptableAlleleBases(refBases, false)) {
|
if (Allele.acceptableAlleleBases(refBases, false)) {
|
||||||
refAllele = Allele.create(refBases, true);
|
refAllele = Allele.create(refBases, true);
|
||||||
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
altAllele = Allele.create(ref.getBase(), false);
|
||||||
}
|
}
|
||||||
else continue; // don't go on with this allele if refBases are non-standard
|
else continue; // don't go on with this allele if refBases are non-standard
|
||||||
} else {
|
} else {
|
||||||
// insertion case
|
// insertion case
|
||||||
if (Allele.acceptableAlleleBases(s, false)) { // don't allow N's in insertions
|
final String insertionBases = (char)ref.getBase() + s; // add reference padding
|
||||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
if (Allele.acceptableAlleleBases(insertionBases, false)) { // don't allow N's in insertions
|
||||||
altAllele = Allele.create(s, false);
|
refAllele = Allele.create(ref.getBase(), true);
|
||||||
|
altAllele = Allele.create(insertionBases, false);
|
||||||
stop = loc.getStart();
|
stop = loc.getStart();
|
||||||
}
|
}
|
||||||
else continue; // go on to next allele if consensus insertion has any non-standard base.
|
else continue; // go on to next allele if consensus insertion has any non-standard base.
|
||||||
|
|
@ -267,7 +268,6 @@ public class ConsensusAlleleCounter {
|
||||||
final VariantContextBuilder builder = new VariantContextBuilder().source("");
|
final VariantContextBuilder builder = new VariantContextBuilder().source("");
|
||||||
builder.loc(loc.getContig(), loc.getStart(), stop);
|
builder.loc(loc.getContig(), loc.getStart(), stop);
|
||||||
builder.alleles(Arrays.asList(refAllele, altAllele));
|
builder.alleles(Arrays.asList(refAllele, altAllele));
|
||||||
builder.referenceBaseForIndel(ref.getBase());
|
|
||||||
builder.noGenotypes();
|
builder.noGenotypes();
|
||||||
if (doMultiAllelicCalls) {
|
if (doMultiAllelicCalls) {
|
||||||
vcs.add(builder.make());
|
vcs.add(builder.make());
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,6 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.Haplotype;
|
import org.broadinstitute.sting.utils.Haplotype;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
|
|
@ -48,7 +47,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
private boolean DEBUG = false;
|
private boolean DEBUG = false;
|
||||||
private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
|
private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
|
||||||
private PairHMMIndelErrorModel pairModel;
|
private PairHMMIndelErrorModel pairModel;
|
||||||
private boolean allelesArePadded;
|
|
||||||
|
|
||||||
private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
|
private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
|
||||||
new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
|
new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
|
||||||
|
|
@ -105,25 +103,21 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
|
indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
|
||||||
haplotypeMap.clear();
|
haplotypeMap.clear();
|
||||||
|
|
||||||
Pair<List<Allele>,Boolean> pair = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
|
alleleList = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
|
||||||
alleleList = pair.first;
|
|
||||||
allelesArePadded = pair.second;
|
|
||||||
if (alleleList.isEmpty())
|
if (alleleList.isEmpty())
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
getHaplotypeMapFromAlleles(alleleList, ref, loc, haplotypeMap); // will update haplotypeMap adding elements
|
getHaplotypeMapFromAlleles(alleleList, ref, loc, haplotypeMap); // will update haplotypeMap adding elements
|
||||||
if (haplotypeMap == null || haplotypeMap.isEmpty())
|
if (haplotypeMap == null || haplotypeMap.isEmpty())
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
// start making the VariantContext
|
// start making the VariantContext
|
||||||
// For all non-snp VC types, VC end location is just startLocation + length of ref allele including padding base.
|
// For all non-snp VC types, VC end location is just startLocation + length of ref allele including padding base.
|
||||||
|
final int endLoc = loc.getStart() + alleleList.get(0).length() - 1;
|
||||||
final int endLoc = computeEndLocation(alleleList, loc,allelesArePadded);
|
|
||||||
final int eventLength = getEventLength(alleleList);
|
final int eventLength = getEventLength(alleleList);
|
||||||
|
|
||||||
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList).referenceBaseForIndel(ref.getBase());
|
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList);
|
||||||
|
|
||||||
// create the genotypes; no-call everyone for now
|
// create the genotypes; no-call everyone for now
|
||||||
GenotypesContext genotypes = GenotypesContext.create();
|
GenotypesContext genotypes = GenotypesContext.create();
|
||||||
|
|
@ -160,15 +154,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
return indelLikelihoodMap.get();
|
return indelLikelihoodMap.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int computeEndLocation(final List<Allele> alleles, final GenomeLoc loc, final boolean allelesArePadded) {
|
|
||||||
Allele refAllele = alleles.get(0);
|
|
||||||
int endLoc = loc.getStart() + refAllele.length()-1;
|
|
||||||
if (allelesArePadded)
|
|
||||||
endLoc++;
|
|
||||||
|
|
||||||
return endLoc;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void getHaplotypeMapFromAlleles(final List<Allele> alleleList,
|
public static void getHaplotypeMapFromAlleles(final List<Allele> alleleList,
|
||||||
final ReferenceContext ref,
|
final ReferenceContext ref,
|
||||||
final GenomeLoc loc,
|
final GenomeLoc loc,
|
||||||
|
|
@ -213,7 +198,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Pair<List<Allele>,Boolean> getInitialAlleleList(final RefMetaDataTracker tracker,
|
public static List<Allele> getInitialAlleleList(final RefMetaDataTracker tracker,
|
||||||
final ReferenceContext ref,
|
final ReferenceContext ref,
|
||||||
final Map<String, AlignmentContext> contexts,
|
final Map<String, AlignmentContext> contexts,
|
||||||
final AlignmentContextUtils.ReadOrientation contextType,
|
final AlignmentContextUtils.ReadOrientation contextType,
|
||||||
|
|
@ -222,7 +207,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
final boolean ignoreSNPAllelesWhenGenotypingIndels) {
|
final boolean ignoreSNPAllelesWhenGenotypingIndels) {
|
||||||
|
|
||||||
List<Allele> alleles = new ArrayList<Allele>();
|
List<Allele> alleles = new ArrayList<Allele>();
|
||||||
boolean allelesArePadded = true;
|
|
||||||
if (UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
|
if (UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
|
||||||
VariantContext vc = null;
|
VariantContext vc = null;
|
||||||
for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) {
|
for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) {
|
||||||
|
|
@ -235,7 +219,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
}
|
}
|
||||||
// ignore places where we don't have a variant
|
// ignore places where we don't have a variant
|
||||||
if (vc == null)
|
if (vc == null)
|
||||||
return new Pair<List<Allele>,Boolean>(alleles,false);
|
return alleles;
|
||||||
|
|
||||||
if (ignoreSNPAllelesWhenGenotypingIndels) {
|
if (ignoreSNPAllelesWhenGenotypingIndels) {
|
||||||
// if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore it and don't genotype it
|
// if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore it and don't genotype it
|
||||||
|
|
@ -248,15 +232,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
} else {
|
} else {
|
||||||
alleles.addAll(vc.getAlleles());
|
alleles.addAll(vc.getAlleles());
|
||||||
}
|
}
|
||||||
if ( vc.getReference().getBases().length == vc.getEnd()-vc.getStart()+1)
|
|
||||||
allelesArePadded = false;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
alleles = IndelGenotypeLikelihoodsCalculationModel.computeConsensusAlleles(ref, contexts, contextType, locParser, UAC);
|
alleles = computeConsensusAlleles(ref, contexts, contextType, locParser, UAC);
|
||||||
}
|
}
|
||||||
return new Pair<List<Allele>,Boolean> (alleles,allelesArePadded);
|
return alleles;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup,
|
// Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup,
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFAlleleClipper;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -283,7 +282,7 @@ public class UnifiedGenotyperEngine {
|
||||||
VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, rawContext.getLocation(), false, logger, UAC.alleles);
|
VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, rawContext.getLocation(), false, logger, UAC.alleles);
|
||||||
if ( vcInput == null )
|
if ( vcInput == null )
|
||||||
return null;
|
return null;
|
||||||
vc = new VariantContextBuilder("UG_call", ref.getLocus().getContig(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles()).referenceBaseForIndel(vcInput.getReferenceBaseForIndel()).make();
|
vc = new VariantContextBuilder("UG_call", ref.getLocus().getContig(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles()).make();
|
||||||
} else {
|
} else {
|
||||||
// deal with bad/non-standard reference bases
|
// deal with bad/non-standard reference bases
|
||||||
if ( !Allele.acceptableAlleleBases(new byte[]{ref.getBase()}) )
|
if ( !Allele.acceptableAlleleBases(new byte[]{ref.getBase()}) )
|
||||||
|
|
@ -408,11 +407,6 @@ public class UnifiedGenotyperEngine {
|
||||||
builder.log10PError(phredScaledConfidence/-10.0);
|
builder.log10PError(phredScaledConfidence/-10.0);
|
||||||
if ( ! passesCallThreshold(phredScaledConfidence) )
|
if ( ! passesCallThreshold(phredScaledConfidence) )
|
||||||
builder.filters(filter);
|
builder.filters(filter);
|
||||||
if ( limitedContext ) {
|
|
||||||
builder.referenceBaseForIndel(vc.getReferenceBaseForIndel());
|
|
||||||
} else {
|
|
||||||
builder.referenceBaseForIndel(refContext.getBase());
|
|
||||||
}
|
|
||||||
|
|
||||||
// create the genotypes
|
// create the genotypes
|
||||||
final GenotypesContext genotypes = afcm.get().subsetAlleles(vc, myAlleles, true,ploidy);
|
final GenotypesContext genotypes = afcm.get().subsetAlleles(vc, myAlleles, true,ploidy);
|
||||||
|
|
@ -493,8 +487,8 @@ public class UnifiedGenotyperEngine {
|
||||||
|
|
||||||
// if we are subsetting alleles (either because there were too many or because some were not polymorphic)
|
// if we are subsetting alleles (either because there were too many or because some were not polymorphic)
|
||||||
// then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
|
// then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
|
||||||
if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // TODO - this function doesn't work with mixed records or records that started as mixed and then became non-mixed
|
if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext )
|
||||||
vcCall = VCFAlleleClipper.reverseTrimAlleles(vcCall);
|
vcCall = VariantContextUtils.reverseTrimAlleles(vcCall);
|
||||||
|
|
||||||
if ( annotationEngine != null && !limitedContext ) {
|
if ( annotationEngine != null && !limitedContext ) {
|
||||||
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
|
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
|
||||||
|
|
|
||||||
|
|
@ -872,7 +872,13 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
for ( VariantContext knownIndel : knownIndelsToTry ) {
|
for ( VariantContext knownIndel : knownIndelsToTry ) {
|
||||||
if ( knownIndel == null || !knownIndel.isIndel() || knownIndel.isComplexIndel() )
|
if ( knownIndel == null || !knownIndel.isIndel() || knownIndel.isComplexIndel() )
|
||||||
continue;
|
continue;
|
||||||
byte[] indelStr = knownIndel.isSimpleInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length());
|
final byte[] indelStr;
|
||||||
|
if ( knownIndel.isSimpleInsertion() ) {
|
||||||
|
final byte[] fullAllele = knownIndel.getAlternateAllele(0).getBases();
|
||||||
|
indelStr = Arrays.copyOfRange(fullAllele, 1, fullAllele.length); // remove ref padding
|
||||||
|
} else {
|
||||||
|
indelStr = Utils.dupBytes((byte)'-', knownIndel.getReference().length() - 1);
|
||||||
|
}
|
||||||
int start = knownIndel.getStart() - leftmostIndex + 1;
|
int start = knownIndel.getStart() - leftmostIndex + 1;
|
||||||
Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel);
|
Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel);
|
||||||
if ( c != null )
|
if ( c != null )
|
||||||
|
|
|
||||||
|
|
@ -1131,12 +1131,13 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
|
||||||
List<Allele> alleles = new ArrayList<Allele>(2); // actual observed (distinct!) alleles at the site
|
List<Allele> alleles = new ArrayList<Allele>(2); // actual observed (distinct!) alleles at the site
|
||||||
List<Allele> homref_alleles = null; // when needed, will contain two identical copies of ref allele - needed to generate hom-ref genotype
|
List<Allele> homref_alleles = null; // when needed, will contain two identical copies of ref allele - needed to generate hom-ref genotype
|
||||||
|
|
||||||
|
final byte referencePaddingBase = refBases[(int)start-1];
|
||||||
|
|
||||||
if ( call.getVariant() == null ) {
|
if ( call.getVariant() == null ) {
|
||||||
// we will need to cteate genotype with two (hom) ref alleles (below).
|
// we will need to create genotype with two (hom) ref alleles (below).
|
||||||
// we can not use 'alleles' list here, since that list is supposed to contain
|
// we can not use 'alleles' list here, since that list is supposed to contain
|
||||||
// only *distinct* alleles observed at the site or VCFContext will frown upon us...
|
// only *distinct* alleles observed at the site or VCFContext will frown upon us...
|
||||||
alleles.add( Allele.create(refBases[(int)start-1],true) );
|
alleles.add( Allele.create(referencePaddingBase,true) );
|
||||||
homref_alleles = new ArrayList<Allele>(2);
|
homref_alleles = new ArrayList<Allele>(2);
|
||||||
homref_alleles.add( alleles.get(0));
|
homref_alleles.add( alleles.get(0));
|
||||||
homref_alleles.add( alleles.get(0));
|
homref_alleles.add( alleles.get(0));
|
||||||
|
|
@ -1145,7 +1146,7 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
|
||||||
// (Genotype will tell us whether it is an actual call or not!)
|
// (Genotype will tell us whether it is an actual call or not!)
|
||||||
int event_length = call.getVariant().lengthOnRef();
|
int event_length = call.getVariant().lengthOnRef();
|
||||||
if ( event_length < 0 ) event_length = 0;
|
if ( event_length < 0 ) event_length = 0;
|
||||||
fillAlleleList(alleles,call);
|
fillAlleleList(alleles,call,referencePaddingBase);
|
||||||
stop += event_length;
|
stop += event_length;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1165,7 +1166,7 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
|
||||||
filters.add("NoCall");
|
filters.add("NoCall");
|
||||||
}
|
}
|
||||||
VariantContext vc = new VariantContextBuilder("IGv2_Indel_call", refName, start, stop, alleles)
|
VariantContext vc = new VariantContextBuilder("IGv2_Indel_call", refName, start, stop, alleles)
|
||||||
.genotypes(genotypes).filters(filters).referenceBaseForIndel(refBases[(int)start-1]).make();
|
.genotypes(genotypes).filters(filters).make();
|
||||||
vcf.add(vc);
|
vcf.add(vc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1175,16 +1176,16 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
|
||||||
* @param l
|
* @param l
|
||||||
* @param call
|
* @param call
|
||||||
*/
|
*/
|
||||||
private void fillAlleleList(List<Allele> l, IndelPrecall call) {
|
private void fillAlleleList(List<Allele> l, IndelPrecall call, byte referencePaddingBase) {
|
||||||
int event_length = call.getVariant().lengthOnRef();
|
int event_length = call.getVariant().lengthOnRef();
|
||||||
if ( event_length == 0 ) { // insertion
|
if ( event_length == 0 ) { // insertion
|
||||||
|
|
||||||
l.add( Allele.create(Allele.NULL_ALLELE_STRING,true) );
|
l.add( Allele.create(referencePaddingBase,true) );
|
||||||
l.add( Allele.create(call.getVariant().getBases(), false ));
|
l.add( Allele.create(referencePaddingBase + call.getVariant().getBases(), false ));
|
||||||
|
|
||||||
} else { //deletion:
|
} else { //deletion:
|
||||||
l.add( Allele.create(call.getVariant().getBases(), true ));
|
l.add( Allele.create(referencePaddingBase + call.getVariant().getBases(), true ));
|
||||||
l.add( Allele.create(Allele.NULL_ALLELE_STRING,false) );
|
l.add( Allele.create(referencePaddingBase,false) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1218,19 +1219,20 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
|
||||||
// }
|
// }
|
||||||
boolean homRefT = ( tCall.getVariant() == null );
|
boolean homRefT = ( tCall.getVariant() == null );
|
||||||
boolean homRefN = ( nCall.getVariant() == null );
|
boolean homRefN = ( nCall.getVariant() == null );
|
||||||
|
final byte referencePaddingBase = refBases[(int)start-1];
|
||||||
if ( tCall.getVariant() == null && nCall.getVariant() == null) {
|
if ( tCall.getVariant() == null && nCall.getVariant() == null) {
|
||||||
// no indel at all ; create base-representation ref/ref alleles for genotype construction
|
// no indel at all ; create base-representation ref/ref alleles for genotype construction
|
||||||
alleles.add( Allele.create(refBases[(int)start-1],true) );
|
alleles.add( Allele.create(referencePaddingBase,true) );
|
||||||
} else {
|
} else {
|
||||||
// we got indel(s)
|
// we got indel(s)
|
||||||
int event_length = 0;
|
int event_length = 0;
|
||||||
if ( tCall.getVariant() != null ) {
|
if ( tCall.getVariant() != null ) {
|
||||||
// indel in tumor
|
// indel in tumor
|
||||||
event_length = tCall.getVariant().lengthOnRef();
|
event_length = tCall.getVariant().lengthOnRef();
|
||||||
fillAlleleList(alleles, tCall);
|
fillAlleleList(alleles, tCall, referencePaddingBase);
|
||||||
} else {
|
} else {
|
||||||
event_length = nCall.getVariant().lengthOnRef();
|
event_length = nCall.getVariant().lengthOnRef();
|
||||||
fillAlleleList(alleles, nCall);
|
fillAlleleList(alleles, nCall, referencePaddingBase);
|
||||||
}
|
}
|
||||||
if ( event_length > 0 ) stop += event_length;
|
if ( event_length > 0 ) stop += event_length;
|
||||||
}
|
}
|
||||||
|
|
@ -1262,7 +1264,7 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
VariantContext vc = new VariantContextBuilder("IGv2_Indel_call", refName, start, stop, alleles)
|
VariantContext vc = new VariantContextBuilder("IGv2_Indel_call", refName, start, stop, alleles)
|
||||||
.genotypes(genotypes).filters(filters).attributes(attrs).referenceBaseForIndel(refBases[(int)start-1]).make();
|
.genotypes(genotypes).filters(filters).attributes(attrs).make();
|
||||||
vcf.add(vc);
|
vcf.add(vc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Input;
|
import org.broadinstitute.sting.commandline.Input;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
import org.broadinstitute.sting.commandline.RodBinding;
|
import org.broadinstitute.sting.commandline.RodBinding;
|
||||||
|
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -42,6 +43,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -70,6 +72,7 @@ import java.util.*;
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||||
public class CountRODs extends RodWalker<CountRODs.Datum, Pair<ExpandingArrayList<Long>, Long>> implements TreeReducible<Pair<ExpandingArrayList<Long>, Long>> {
|
public class CountRODs extends RodWalker<CountRODs.Datum, Pair<ExpandingArrayList<Long>, Long>> implements TreeReducible<Pair<ExpandingArrayList<Long>, Long>> {
|
||||||
@Output
|
@Output
|
||||||
public PrintStream out;
|
public PrintStream out;
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
@ -262,20 +263,33 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
||||||
sequenceInvalid = true;
|
sequenceInvalid = true;
|
||||||
invReason.add("SITE_IS_FILTERED");
|
invReason.add("SITE_IS_FILTERED");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
String refString = validate.getReference().getDisplayString();
|
||||||
|
String altString = validate.getAlternateAllele(0).getDisplayString();
|
||||||
|
|
||||||
if ( validate.isIndel() ) {
|
if ( validate.isIndel() ) {
|
||||||
sequence.append(Character.toUpperCase((char)ref.getBase()));
|
sequence.append(Character.toUpperCase((char)ref.getBase()));
|
||||||
rawSequence.append(Character.toUpperCase((char)ref.getBase()));
|
rawSequence.append(Character.toUpperCase((char)ref.getBase()));
|
||||||
|
final byte[] refAllele = validate.getReference().getBases();
|
||||||
|
refString = new String(Arrays.copyOfRange(refAllele, 1, refAllele.length));
|
||||||
|
if ( refString.isEmpty() )
|
||||||
|
refString = "-";
|
||||||
|
final byte[] altAllele = validate.getAlternateAllele(0).getBases();
|
||||||
|
altString = new String(Arrays.copyOfRange(altAllele, 1, altAllele.length));
|
||||||
|
if ( altString.isEmpty() )
|
||||||
|
altString = "-";
|
||||||
}
|
}
|
||||||
|
|
||||||
sequence.append('[');
|
sequence.append('[');
|
||||||
sequence.append(validate.getAlternateAllele(0).toString());
|
sequence.append(altString);
|
||||||
sequence.append('/');
|
sequence.append('/');
|
||||||
sequence.append(validate.getReference().toString());
|
sequence.append(refString);
|
||||||
sequence.append(']');
|
sequence.append(']');
|
||||||
// do this to the raw sequence to -- the indeces will line up that way
|
// do this to the raw sequence to -- the indeces will line up that way
|
||||||
rawSequence.append('[');
|
rawSequence.append('[');
|
||||||
rawSequence.append(validate.getAlternateAllele(0).getBaseString());
|
rawSequence.append(altString);
|
||||||
rawSequence.append('/');
|
rawSequence.append('/');
|
||||||
rawSequence.append(validate.getReference().getBaseString());
|
rawSequence.append(refString);
|
||||||
rawSequence.append(']');
|
rawSequence.append(']');
|
||||||
allelePos = ref.getLocus();
|
allelePos = ref.getLocus();
|
||||||
if ( indelCounter > 0 ) {
|
if ( indelCounter > 0 ) {
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
@ -40,14 +39,11 @@ public class GenomeEvent implements Comparable {
|
||||||
final protected GenomeLoc loc;
|
final protected GenomeLoc loc;
|
||||||
/** A set of the alleles segregating in this context */
|
/** A set of the alleles segregating in this context */
|
||||||
final protected List<Allele> alleles;
|
final protected List<Allele> alleles;
|
||||||
final protected Byte refBase;
|
|
||||||
// final protected HashMap<String, Object> attributes;
|
// final protected HashMap<String, Object> attributes;
|
||||||
|
|
||||||
public GenomeEvent(GenomeLocParser parser, final String contig, final int start, final int stop, final List<Allele> alleles, HashMap<String, Object> attributes,
|
public GenomeEvent(GenomeLocParser parser, final String contig, final int start, final int stop, final List<Allele> alleles, HashMap<String, Object> attributes) {
|
||||||
byte base) {
|
|
||||||
this.loc = parser.createGenomeLoc(contig, start, stop);
|
this.loc = parser.createGenomeLoc(contig, start, stop);
|
||||||
this.alleles = alleles;
|
this.alleles = alleles;
|
||||||
this.refBase = base;
|
|
||||||
// this.attributes = attributes;
|
// this.attributes = attributes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -68,7 +64,7 @@ public class GenomeEvent implements Comparable {
|
||||||
|
|
||||||
public VariantContext createVariantContextFromEvent() {
|
public VariantContext createVariantContextFromEvent() {
|
||||||
return new VariantContextBuilder("event", loc.getContig(), loc.getStart(), loc.getStop(), alleles)
|
return new VariantContextBuilder("event", loc.getContig(), loc.getStart(), loc.getStop(), alleles)
|
||||||
.log10PError(0.0).referenceBaseForIndel(refBase).make();
|
.log10PError(0.0).make();
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -115,7 +115,7 @@ public class KeepAFSpectrumFrequencySelector extends FrequencyModeSelector {
|
||||||
|
|
||||||
// create bare-bones event and log in corresponding bin
|
// create bare-bones event and log in corresponding bin
|
||||||
// attributes contains AC,AF,AN pulled from original vc, and we keep them here and log in output file for bookkeeping purposes
|
// attributes contains AC,AF,AN pulled from original vc, and we keep them here and log in output file for bookkeeping purposes
|
||||||
GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes, vc.getReferenceBaseForIndel());
|
GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes);
|
||||||
|
|
||||||
binnedEventArray[binIndex].add(event);
|
binnedEventArray[binIndex].add(event);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -65,7 +65,7 @@ public class UniformSamplingFrequencySelector extends FrequencyModeSelector {
|
||||||
}
|
}
|
||||||
// create bare-bones event and log in corresponding bin
|
// create bare-bones event and log in corresponding bin
|
||||||
// attributes contains AC,AF,AN pulled from original vc, and we keep them here and log in output file for bookkeeping purposes
|
// attributes contains AC,AF,AN pulled from original vc, and we keep them here and log in output file for bookkeeping purposes
|
||||||
GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes, vc.getReferenceBaseForIndel());
|
GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes);
|
||||||
binnedEventArray.add(event);
|
binnedEventArray.add(event);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -56,7 +56,7 @@ public class ThetaVariantEvaluator extends VariantEvaluator {
|
||||||
//increment stats for pairwise mismatches
|
//increment stats for pairwise mismatches
|
||||||
|
|
||||||
for (Allele allele : genotype.getAlleles()) {
|
for (Allele allele : genotype.getAlleles()) {
|
||||||
if (allele.isNonNull() && allele.isCalled()) {
|
if (allele.isCalled()) {
|
||||||
String alleleString = allele.toString();
|
String alleleString = allele.toString();
|
||||||
alleleCounts.putIfAbsent(alleleString, 0);
|
alleleCounts.putIfAbsent(alleleString, 0);
|
||||||
alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);
|
alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);
|
||||||
|
|
|
||||||
|
|
@ -139,11 +139,11 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
||||||
final byte[] refSeq = ref.getBases();
|
final byte[] refSeq = ref.getBases();
|
||||||
|
|
||||||
// get the indel length
|
// get the indel length
|
||||||
int indelLength;
|
final int indelLength;
|
||||||
if ( vc.isSimpleDeletion() )
|
if ( vc.isSimpleDeletion() )
|
||||||
indelLength = vc.getReference().length();
|
indelLength = vc.getReference().length() - 1;
|
||||||
else
|
else
|
||||||
indelLength = vc.getAlternateAllele(0).length();
|
indelLength = vc.getAlternateAllele(0).length() - 1;
|
||||||
|
|
||||||
if ( indelLength > 200 ) {
|
if ( indelLength > 200 ) {
|
||||||
writer.add(vc);
|
writer.add(vc);
|
||||||
|
|
@ -151,7 +151,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// create an indel haplotype
|
// create an indel haplotype
|
||||||
int originalIndex = ref.getLocus().getStart() - ref.getWindow().getStart() + 1;
|
final int originalIndex = ref.getLocus().getStart() - ref.getWindow().getStart() + 1;
|
||||||
final byte[] originalIndel = makeHaplotype(vc, refSeq, originalIndex, indelLength);
|
final byte[] originalIndel = makeHaplotype(vc, refSeq, originalIndex, indelLength);
|
||||||
|
|
||||||
// create a CIGAR string to represent the event
|
// create a CIGAR string to represent the event
|
||||||
|
|
@ -170,11 +170,12 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
||||||
VariantContext newVC = new VariantContextBuilder(vc).start(vc.getStart()-difference).stop(vc.getEnd()-difference).make();
|
VariantContext newVC = new VariantContextBuilder(vc).start(vc.getStart()-difference).stop(vc.getEnd()-difference).make();
|
||||||
//System.out.println("Moving record from " + vc.getChr()+":"+vc.getStart() + " to " + vc.getChr()+":"+(vc.getStart()-difference));
|
//System.out.println("Moving record from " + vc.getChr()+":"+vc.getStart() + " to " + vc.getChr()+":"+(vc.getStart()-difference));
|
||||||
|
|
||||||
int indelIndex = originalIndex-difference;
|
final int indelIndex = originalIndex-difference;
|
||||||
byte[] newBases = new byte[indelLength];
|
final byte[] newBases = new byte[indelLength + 1];
|
||||||
System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 0, indelLength);
|
newBases[0] = refSeq[indelIndex-1];
|
||||||
Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion());
|
System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 1, indelLength);
|
||||||
newVC = updateAllele(newVC, newAllele, refSeq[indelIndex-1]);
|
final Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion());
|
||||||
|
newVC = updateAllele(newVC, newAllele);
|
||||||
|
|
||||||
writer.add(newVC);
|
writer.add(newVC);
|
||||||
return 1;
|
return 1;
|
||||||
|
|
@ -195,7 +196,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
||||||
if ( vc.isSimpleDeletion() ) {
|
if ( vc.isSimpleDeletion() ) {
|
||||||
indexOfRef += indelLength;
|
indexOfRef += indelLength;
|
||||||
} else {
|
} else {
|
||||||
System.arraycopy(vc.getAlternateAllele(0).getBases(), 0, hap, currentPos, indelLength);
|
System.arraycopy(vc.getAlternateAllele(0).getBases(), 1, hap, currentPos, indelLength);
|
||||||
currentPos += indelLength;
|
currentPos += indelLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -205,14 +206,14 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
||||||
return hap;
|
return hap;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static VariantContext updateAllele(VariantContext vc, Allele newAllele, Byte refBaseForIndel) {
|
public static VariantContext updateAllele(final VariantContext vc, final Allele newAllele) {
|
||||||
// create a mapping from original allele to new allele
|
// create a mapping from original allele to new allele
|
||||||
HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
|
HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
|
||||||
if ( newAllele.isReference() ) {
|
if ( newAllele.isReference() ) {
|
||||||
alleleMap.put(vc.getReference(), newAllele);
|
alleleMap.put(vc.getReference(), newAllele);
|
||||||
alleleMap.put(vc.getAlternateAllele(0), vc.getAlternateAllele(0));
|
alleleMap.put(vc.getAlternateAllele(0), Allele.create(newAllele.getBases()[0], false));
|
||||||
} else {
|
} else {
|
||||||
alleleMap.put(vc.getReference(), vc.getReference());
|
alleleMap.put(vc.getReference(), Allele.create(newAllele.getBases()[0], true));
|
||||||
alleleMap.put(vc.getAlternateAllele(0), newAllele);
|
alleleMap.put(vc.getAlternateAllele(0), newAllele);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -229,6 +230,6 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
||||||
newGenotypes.add(new GenotypeBuilder(genotype).alleles(newAlleles).make());
|
newGenotypes.add(new GenotypeBuilder(genotype).alleles(newAlleles).make());
|
||||||
}
|
}
|
||||||
|
|
||||||
return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).referenceBaseForIndel(refBaseForIndel).make();
|
return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -119,7 +119,6 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
if ( toInterval != null ) {
|
if ( toInterval != null ) {
|
||||||
// check whether the strand flips, and if so reverse complement everything
|
// check whether the strand flips, and if so reverse complement everything
|
||||||
// TODO -- make this work for indels (difficult because the 'previous base' context needed will be changing based on indel type/size)
|
|
||||||
if ( fromInterval.isPositiveStrand() != toInterval.isPositiveStrand() && vc.isPointEvent() ) {
|
if ( fromInterval.isPositiveStrand() != toInterval.isPositiveStrand() && vc.isPointEvent() ) {
|
||||||
vc = VariantContextUtils.reverseComplement(vc);
|
vc = VariantContextUtils.reverseComplement(vc);
|
||||||
}
|
}
|
||||||
|
|
@ -132,11 +131,10 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
||||||
.attribute("OriginalStart", fromInterval.getStart()).make();
|
.attribute("OriginalStart", fromInterval.getStart()).make();
|
||||||
}
|
}
|
||||||
|
|
||||||
VariantContext newVC = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc);
|
if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(vc) ) {
|
||||||
if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) {
|
|
||||||
logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s",
|
logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s",
|
||||||
originalVC.getChr(), originalVC.getStart(), newVC.getChr(), newVC.getStart(),
|
originalVC.getChr(), originalVC.getStart(), vc.getChr(), vc.getStart(),
|
||||||
originalVC.getReference(), originalVC.getAlternateAllele(0), newVC.getReference(), newVC.getAlternateAllele(0)));
|
originalVC.getReference(), originalVC.getAlternateAllele(0), vc.getReference(), vc.getAlternateAllele(0)));
|
||||||
}
|
}
|
||||||
|
|
||||||
writer.add(vc);
|
writer.add(vc);
|
||||||
|
|
|
||||||
|
|
@ -130,35 +130,16 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// get the true reference allele
|
// get the true reference allele
|
||||||
Allele reportedRefAllele = vc.getReference();
|
final Allele reportedRefAllele = vc.getReference();
|
||||||
Allele observedRefAllele = null;
|
final int refLength = reportedRefAllele.length();
|
||||||
// insertions
|
if ( refLength > 100 ) {
|
||||||
if ( vc.isSimpleInsertion() ) {
|
logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", refLength, vc.getChr(), vc.getStart()));
|
||||||
observedRefAllele = Allele.create(Allele.NULL_ALLELE_STRING);
|
return;
|
||||||
}
|
}
|
||||||
// deletions
|
|
||||||
else if ( vc.isSimpleDeletion() || vc.isMNP() ) {
|
|
||||||
// we can't validate arbitrarily long deletions
|
|
||||||
if ( reportedRefAllele.length() > 100 ) {
|
|
||||||
logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", reportedRefAllele.length(), vc.getChr(), vc.getStart()));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// deletions are associated with the (position of) the last (preceding) non-deleted base;
|
final byte[] observedRefBases = new byte[refLength];
|
||||||
// hence to get actually deleted bases we need offset = 1
|
System.arraycopy(ref.getBases(), 0, observedRefBases, 0, refLength);
|
||||||
int offset = vc.isMNP() ? 0 : 1;
|
final Allele observedRefAllele = Allele.create(observedRefBases);
|
||||||
byte[] refBytes = ref.getBases();
|
|
||||||
byte[] trueRef = new byte[reportedRefAllele.length()];
|
|
||||||
for (int i = 0; i < reportedRefAllele.length(); i++)
|
|
||||||
trueRef[i] = refBytes[i+offset];
|
|
||||||
observedRefAllele = Allele.create(trueRef, true);
|
|
||||||
}
|
|
||||||
// SNPs, etc. but not mixed types because they are too difficult
|
|
||||||
else if ( !vc.isMixed() ) {
|
|
||||||
byte[] refByte = new byte[1];
|
|
||||||
refByte[0] = ref.getBase();
|
|
||||||
observedRefAllele = Allele.create(refByte, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
// get the RS IDs
|
// get the RS IDs
|
||||||
Set<String> rsIDs = null;
|
Set<String> rsIDs = null;
|
||||||
|
|
@ -171,10 +152,10 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
||||||
try {
|
try {
|
||||||
switch( type ) {
|
switch( type ) {
|
||||||
case ALL:
|
case ALL:
|
||||||
vc.extraStrictValidation(observedRefAllele, ref.getBase(), rsIDs);
|
vc.extraStrictValidation(reportedRefAllele, observedRefAllele, rsIDs);
|
||||||
break;
|
break;
|
||||||
case REF:
|
case REF:
|
||||||
vc.validateReferenceBases(observedRefAllele, ref.getBase());
|
vc.validateReferenceBases(reportedRefAllele, observedRefAllele);
|
||||||
break;
|
break;
|
||||||
case IDS:
|
case IDS:
|
||||||
vc.validateRSIDs(rsIDs);
|
vc.validateRSIDs(rsIDs);
|
||||||
|
|
|
||||||
|
|
@ -381,7 +381,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
||||||
getters.put("REF", new Getter() {
|
getters.put("REF", new Getter() {
|
||||||
public String get(VariantContext vc) {
|
public String get(VariantContext vc) {
|
||||||
StringBuilder x = new StringBuilder();
|
StringBuilder x = new StringBuilder();
|
||||||
x.append(vc.getAlleleStringWithRefPadding(vc.getReference()));
|
x.append(vc.getReference().getDisplayString());
|
||||||
return x.toString();
|
return x.toString();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
@ -393,7 +393,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
for ( int i = 0; i < n; i++ ) {
|
for ( int i = 0; i < n; i++ ) {
|
||||||
if ( i != 0 ) x.append(",");
|
if ( i != 0 ) x.append(",");
|
||||||
x.append(vc.getAlleleStringWithRefPadding(vc.getAlternateAllele(i)));
|
x.append(vc.getAlternateAllele(i));
|
||||||
}
|
}
|
||||||
return x.toString();
|
return x.toString();
|
||||||
}
|
}
|
||||||
|
|
@ -435,11 +435,8 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
||||||
private static Object splitAltAlleles(VariantContext vc) {
|
private static Object splitAltAlleles(VariantContext vc) {
|
||||||
final int numAltAlleles = vc.getAlternateAlleles().size();
|
final int numAltAlleles = vc.getAlternateAlleles().size();
|
||||||
if ( numAltAlleles == 1 )
|
if ( numAltAlleles == 1 )
|
||||||
return vc.getAlleleStringWithRefPadding(vc.getAlternateAllele(0));
|
return vc.getAlternateAllele(0);
|
||||||
|
|
||||||
final List<String> alleles = new ArrayList<String>(numAltAlleles);
|
return vc.getAlternateAlleles();
|
||||||
for ( Allele allele : vc.getAlternateAlleles() )
|
|
||||||
alleles.add(vc.getAlleleStringWithRefPadding(allele));
|
|
||||||
return alleles;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -103,12 +103,6 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
||||||
@Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod", required=false)
|
@Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod", required=false)
|
||||||
protected String sampleName = null;
|
protected String sampleName = null;
|
||||||
|
|
||||||
/**
|
|
||||||
* This argument is useful for fixing input VCFs with bad reference bases (the output will be a fixed version of the VCF).
|
|
||||||
*/
|
|
||||||
@Argument(fullName="fixRef", shortName="fixRef", doc="Fix common reference base in case there's an indel without padding", required=false)
|
|
||||||
protected boolean fixReferenceBase = false;
|
|
||||||
|
|
||||||
private Set<String> allowedGenotypeFormatStrings = new HashSet<String>();
|
private Set<String> allowedGenotypeFormatStrings = new HashSet<String>();
|
||||||
private boolean wroteHeader = false;
|
private boolean wroteHeader = false;
|
||||||
private Set<String> samples;
|
private Set<String> samples;
|
||||||
|
|
@ -140,10 +134,6 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
||||||
builder.genotypes(g);
|
builder.genotypes(g);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( fixReferenceBase ) {
|
|
||||||
builder.referenceBaseForIndel(ref.getBase());
|
|
||||||
}
|
|
||||||
|
|
||||||
writeRecord(builder.make(), tracker, ref.getLocus());
|
writeRecord(builder.make(), tracker, ref.getLocus());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -169,8 +159,8 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
Map<String, Allele> alleleMap = new HashMap<String, Allele>(2);
|
Map<String, Allele> alleleMap = new HashMap<String, Allele>(2);
|
||||||
alleleMap.put(RawHapMapFeature.DELETION, Allele.create(Allele.NULL_ALLELE_STRING, dbsnpVC.isSimpleInsertion()));
|
alleleMap.put(RawHapMapFeature.DELETION, Allele.create(ref.getBase(), dbsnpVC.isSimpleInsertion()));
|
||||||
alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion()));
|
alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(ref.getBase() + ((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion()));
|
||||||
hapmap.setActualAlleles(alleleMap);
|
hapmap.setActualAlleles(alleleMap);
|
||||||
|
|
||||||
// also, use the correct positioning for insertions
|
// also, use the correct positioning for insertions
|
||||||
|
|
|
||||||
|
|
@ -431,6 +431,37 @@ public class BaseUtils {
|
||||||
return new String(simpleComplement(bases.getBytes()));
|
return new String(simpleComplement(bases.getBytes()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the uppercased version of the bases
|
||||||
|
*
|
||||||
|
* @param bases the bases
|
||||||
|
* @return the upper cased version
|
||||||
|
*/
|
||||||
|
static public byte[] convertToUpperCase(final byte[] bases) {
|
||||||
|
for ( int i = 0; i < bases.length; i++ ) {
|
||||||
|
if ( (char)bases[i] >= 'a' )
|
||||||
|
bases[i] = toUpperCaseBase(bases[i]);
|
||||||
|
}
|
||||||
|
return bases;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public byte toUpperCaseBase(final byte base) {
|
||||||
|
switch (base) {
|
||||||
|
case 'a':
|
||||||
|
return 'A';
|
||||||
|
case 'c':
|
||||||
|
return 'C';
|
||||||
|
case 'g':
|
||||||
|
return 'G';
|
||||||
|
case 't':
|
||||||
|
return 'T';
|
||||||
|
case 'n':
|
||||||
|
return 'N';
|
||||||
|
default:
|
||||||
|
return base;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the index of the most common base in the basecounts array. To be used with
|
* Returns the index of the most common base in the basecounts array. To be used with
|
||||||
* pileup.getBaseCounts.
|
* pileup.getBaseCounts.
|
||||||
|
|
|
||||||
|
|
@ -176,7 +176,7 @@ public class Haplotype {
|
||||||
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
|
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
|
||||||
}
|
}
|
||||||
} else if( refAllele.length() < altAllele.length() ) { // insertion
|
} else if( refAllele.length() < altAllele.length() ) { // insertion
|
||||||
final int altAlleleLength = altAllele.length();
|
final int altAlleleLength = altAllele.length() - 1;
|
||||||
newHaplotype = new byte[bases.length + altAlleleLength];
|
newHaplotype = new byte[bases.length + altAlleleLength];
|
||||||
for( int iii = 0; iii < bases.length; iii++ ) {
|
for( int iii = 0; iii < bases.length; iii++ ) {
|
||||||
newHaplotype[iii] = bases[iii];
|
newHaplotype[iii] = bases[iii];
|
||||||
|
|
@ -185,15 +185,16 @@ public class Haplotype {
|
||||||
newHaplotype[iii] = newHaplotype[iii-altAlleleLength];
|
newHaplotype[iii] = newHaplotype[iii-altAlleleLength];
|
||||||
}
|
}
|
||||||
for( int iii = 0; iii < altAlleleLength; iii++ ) {
|
for( int iii = 0; iii < altAlleleLength; iii++ ) {
|
||||||
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
|
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii+1];
|
||||||
}
|
}
|
||||||
} else { // deletion
|
} else { // deletion
|
||||||
final int shift = refAllele.length() - altAllele.length();
|
final int shift = refAllele.length() - altAllele.length();
|
||||||
|
final int altAlleleLength = altAllele.length() - 1;
|
||||||
newHaplotype = new byte[bases.length - shift];
|
newHaplotype = new byte[bases.length - shift];
|
||||||
for( int iii = 0; iii < haplotypeInsertLocation + altAllele.length(); iii++ ) {
|
for( int iii = 0; iii < haplotypeInsertLocation + altAlleleLength; iii++ ) {
|
||||||
newHaplotype[iii] = bases[iii];
|
newHaplotype[iii] = bases[iii];
|
||||||
}
|
}
|
||||||
for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length; iii++ ) {
|
for( int iii = haplotypeInsertLocation + altAlleleLength; iii < newHaplotype.length; iii++ ) {
|
||||||
newHaplotype[iii] = bases[iii+shift];
|
newHaplotype[iii] = bases[iii+shift];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -204,8 +205,11 @@ public class Haplotype {
|
||||||
return new Haplotype(newHaplotype);
|
return new Haplotype(newHaplotype);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(List<Allele> alleleList, int startPos, ReferenceContext ref,
|
public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(final List<Allele> alleleList,
|
||||||
final int haplotypeSize, final int numPrefBases) {
|
final int startPos,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final int haplotypeSize,
|
||||||
|
final int numPrefBases) {
|
||||||
|
|
||||||
LinkedHashMap<Allele,Haplotype> haplotypeMap = new LinkedHashMap<Allele,Haplotype>();
|
LinkedHashMap<Allele,Haplotype> haplotypeMap = new LinkedHashMap<Allele,Haplotype>();
|
||||||
|
|
||||||
|
|
@ -216,7 +220,6 @@ public class Haplotype {
|
||||||
refAllele = a;
|
refAllele = a;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (refAllele == null)
|
if (refAllele == null)
|
||||||
|
|
@ -224,19 +227,12 @@ public class Haplotype {
|
||||||
|
|
||||||
byte[] refBases = ref.getBases();
|
byte[] refBases = ref.getBases();
|
||||||
|
|
||||||
|
final int startIdxInReference = 1 + startPos - numPrefBases - ref.getWindow().getStart();
|
||||||
|
final String basesBeforeVariant = new String(Arrays.copyOfRange(refBases, startIdxInReference, startIdxInReference + numPrefBases));
|
||||||
|
|
||||||
int startIdxInReference = (int)(1+startPos-numPrefBases-ref.getWindow().getStart());
|
|
||||||
//int numPrefBases = (int)(vc.getStart()-ref.getWindow().getStart()+1); // indel vc starts one before event
|
|
||||||
|
|
||||||
|
|
||||||
byte[] basesBeforeVariant = Arrays.copyOfRange(refBases,startIdxInReference,startIdxInReference+numPrefBases);
|
|
||||||
int startAfter = startIdxInReference+numPrefBases+ refAllele.getBases().length;
|
|
||||||
// protect against long events that overrun available reference context
|
// protect against long events that overrun available reference context
|
||||||
if (startAfter > refBases.length)
|
final int startAfter = Math.min(startIdxInReference + numPrefBases + refAllele.getBases().length - 1, refBases.length);
|
||||||
startAfter = refBases.length;
|
final String basesAfterVariant = new String(Arrays.copyOfRange(refBases, startAfter, refBases.length));
|
||||||
byte[] basesAfterVariant = Arrays.copyOfRange(refBases,
|
|
||||||
startAfter, refBases.length);
|
|
||||||
|
|
||||||
|
|
||||||
// Create location for all haplotypes
|
// Create location for all haplotypes
|
||||||
final int startLoc = ref.getWindow().getStart() + startIdxInReference;
|
final int startLoc = ref.getWindow().getStart() + startIdxInReference;
|
||||||
|
|
@ -244,16 +240,14 @@ public class Haplotype {
|
||||||
|
|
||||||
final GenomeLoc locus = ref.getGenomeLocParser().createGenomeLoc(ref.getLocus().getContig(),startLoc,stopLoc);
|
final GenomeLoc locus = ref.getGenomeLocParser().createGenomeLoc(ref.getLocus().getContig(),startLoc,stopLoc);
|
||||||
|
|
||||||
|
|
||||||
for (final Allele a : alleleList) {
|
for (final Allele a : alleleList) {
|
||||||
|
|
||||||
byte[] alleleBases = a.getBases();
|
final byte[] alleleBases = a.getBases();
|
||||||
// use string concatenation
|
// use string concatenation
|
||||||
String haplotypeString = new String(basesBeforeVariant) + new String(alleleBases) + new String(basesAfterVariant);
|
String haplotypeString = basesBeforeVariant + new String(Arrays.copyOfRange(alleleBases, 1, alleleBases.length)) + basesAfterVariant;
|
||||||
haplotypeString = haplotypeString.substring(0,haplotypeSize);
|
haplotypeString = haplotypeString.substring(0,haplotypeSize);
|
||||||
|
|
||||||
haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus));
|
haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return haplotypeMap;
|
return haplotypeMap;
|
||||||
|
|
|
||||||
|
|
@ -305,27 +305,6 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||||
builder.id(id);
|
builder.id(id);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Annoying routine that deals with allele clipping from the BCF2 encoding to the standard
|
|
||||||
* GATK encoding.
|
|
||||||
*
|
|
||||||
* @param position
|
|
||||||
* @param ref
|
|
||||||
* @param unclippedAlleles
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
@Requires({"position > 0", "ref != null && ref.length() > 0", "! unclippedAlleles.isEmpty()"})
|
|
||||||
@Ensures("result.size() == unclippedAlleles.size()")
|
|
||||||
protected List<Allele> clipAllelesIfNecessary(final int position,
|
|
||||||
final String ref,
|
|
||||||
final List<Allele> unclippedAlleles) {
|
|
||||||
// the last argument of 1 allows us to safely ignore the end, because we are
|
|
||||||
// ultimately going to use the end in the record itself
|
|
||||||
final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(position, ref, unclippedAlleles, 1);
|
|
||||||
if ( clipped.getError() != null ) error(clipped.getError());
|
|
||||||
return clipped.getClippedAlleles();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Decode the alleles from this BCF2 file and put the results in builder
|
* Decode the alleles from this BCF2 file and put the results in builder
|
||||||
* @param builder
|
* @param builder
|
||||||
|
|
@ -353,11 +332,9 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||||
}
|
}
|
||||||
assert ref != null;
|
assert ref != null;
|
||||||
|
|
||||||
alleles = clipAllelesIfNecessary(pos, ref, alleles);
|
|
||||||
builder.alleles(alleles);
|
builder.alleles(alleles);
|
||||||
|
|
||||||
assert ref.length() > 0;
|
assert ref.length() > 0;
|
||||||
builder.referenceBaseForIndel(ref.getBytes()[0]);
|
|
||||||
|
|
||||||
return alleles;
|
return alleles;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -256,9 +256,20 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
||||||
final Map<String, Object> attrs = parseInfo(parts[7]);
|
final Map<String, Object> attrs = parseInfo(parts[7]);
|
||||||
builder.attributes(attrs);
|
builder.attributes(attrs);
|
||||||
|
|
||||||
|
if ( attrs.containsKey(VCFConstants.END_KEY) ) {
|
||||||
|
// update stop with the end key if provided
|
||||||
|
try {
|
||||||
|
builder.stop(Integer.valueOf(attrs.get(VCFConstants.END_KEY).toString()));
|
||||||
|
} catch (Exception e) {
|
||||||
|
generateException("the END value in the INFO field is not valid");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
builder.stop(pos + ref.length() - 1);
|
||||||
|
}
|
||||||
|
|
||||||
// get our alleles, filters, and setup an attribute map
|
// get our alleles, filters, and setup an attribute map
|
||||||
final List<Allele> rawAlleles = parseAlleles(ref, alts, lineNo);
|
final List<Allele> alleles = parseAlleles(ref, alts, lineNo);
|
||||||
final List<Allele> alleles = updateBuilderAllelesAndStop(builder, ref, pos, rawAlleles, attrs);
|
builder.alleles(alleles);
|
||||||
|
|
||||||
// do we have genotyping data
|
// do we have genotyping data
|
||||||
if (parts.length > NUM_STANDARD_FIELDS && includeGenotypes) {
|
if (parts.length > NUM_STANDARD_FIELDS && includeGenotypes) {
|
||||||
|
|
@ -275,7 +286,6 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
||||||
|
|
||||||
VariantContext vc = null;
|
VariantContext vc = null;
|
||||||
try {
|
try {
|
||||||
builder.referenceBaseForIndel(ref.getBytes()[0]);
|
|
||||||
vc = builder.make();
|
vc = builder.make();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
generateException(e.getMessage());
|
generateException(e.getMessage());
|
||||||
|
|
@ -284,31 +294,6 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
||||||
return vc;
|
return vc;
|
||||||
}
|
}
|
||||||
|
|
||||||
private final List<Allele> updateBuilderAllelesAndStop(final VariantContextBuilder builder,
|
|
||||||
final String ref,
|
|
||||||
final int pos,
|
|
||||||
final List<Allele> rawAlleles,
|
|
||||||
final Map<String, Object> attrs) {
|
|
||||||
int endForSymbolicAlleles = pos; // by default we use the pos
|
|
||||||
if ( attrs.containsKey(VCFConstants.END_KEY) ) {
|
|
||||||
// update stop with the end key if provided
|
|
||||||
try {
|
|
||||||
endForSymbolicAlleles = Integer.valueOf(attrs.get(VCFConstants.END_KEY).toString());
|
|
||||||
} catch (Exception e) {
|
|
||||||
generateException("the END value in the INFO field is not valid");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// find out our current location, and clip the alleles down to their minimum length
|
|
||||||
final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(pos, ref, rawAlleles, endForSymbolicAlleles);
|
|
||||||
if ( clipped.getError() != null )
|
|
||||||
generateException(clipped.getError(), lineNo);
|
|
||||||
|
|
||||||
builder.stop(clipped.getStop());
|
|
||||||
builder.alleles(clipped.getClippedAlleles());
|
|
||||||
return clipped.getClippedAlleles();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get the name of this codec
|
* get the name of this codec
|
||||||
* @return our set name
|
* @return our set name
|
||||||
|
|
|
||||||
|
|
@ -1,434 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2012, The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
|
||||||
|
|
||||||
import com.google.java.contract.Ensures;
|
|
||||||
import com.google.java.contract.Invariant;
|
|
||||||
import com.google.java.contract.Requires;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* All of the gross allele clipping and padding routines in one place
|
|
||||||
*
|
|
||||||
* Having attempted to understand / fix / document this code myself
|
|
||||||
* I can only conclude that this entire approach needs to be rethought. This
|
|
||||||
* code just doesn't work robustly with symbolic alleles, with multiple alleles,
|
|
||||||
* requires a special "reference base for indels" stored in the VariantContext
|
|
||||||
* whose correctness isn't enforced, and overall has strange special cases
|
|
||||||
* all over the place.
|
|
||||||
*
|
|
||||||
* The reason this code is so complex is due to symbolics and multi-alleleic
|
|
||||||
* variation, which frequently occur when combining variants from multiple
|
|
||||||
* VCF files.
|
|
||||||
*
|
|
||||||
* TODO rethink this class, make it clean, and make it easy to create, mix, and write out alleles
|
|
||||||
* TODO this code doesn't work with reverse clipped alleles (ATA / GTTA -> AT / GT)
|
|
||||||
*
|
|
||||||
* @author Mark DePristo
|
|
||||||
* @since 6/12
|
|
||||||
*/
|
|
||||||
public final class VCFAlleleClipper {
|
|
||||||
private VCFAlleleClipper() { }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Determine whether we should clip off the first base of all unclippped alleles or not
|
|
||||||
*
|
|
||||||
* Returns true if all of the alleles in unclippedAlleles share a common first base with
|
|
||||||
* ref0. Ref0 should be the first base of the reference allele UnclippedAlleles may
|
|
||||||
* contain the reference allele itself, or just the alternate alleles, it doesn't matter.
|
|
||||||
*
|
|
||||||
* The algorithm returns true if the first base should be clipped off, or false otherwise
|
|
||||||
*
|
|
||||||
* This algorithm works even in the presence of symbolic alleles, logically ignoring these
|
|
||||||
* values. It
|
|
||||||
*
|
|
||||||
* @param unclippedAlleles list of unclipped alleles to assay
|
|
||||||
* @param ref0 the first base of the reference allele
|
|
||||||
* @return true if we should clip the first base of unclippedAlleles
|
|
||||||
*/
|
|
||||||
@Requires("unclippedAlleles != null")
|
|
||||||
public static boolean shouldClipFirstBaseP(final List<Allele> unclippedAlleles,
|
|
||||||
final byte ref0) {
|
|
||||||
boolean allSymbolicAlt = true;
|
|
||||||
|
|
||||||
for ( final Allele a : unclippedAlleles ) {
|
|
||||||
if ( a.isSymbolic() ) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// already know we aren't symbolic, so we only need to decide if we have only seen a ref
|
|
||||||
if ( ! a.isReference() )
|
|
||||||
allSymbolicAlt = false;
|
|
||||||
|
|
||||||
if ( a.length() < 1 || (a.getBases()[0] != ref0) ) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// to reach here all alleles are consistent with clipping the first base matching ref0
|
|
||||||
// but we don't clip if all ALT alleles are symbolic
|
|
||||||
return ! allSymbolicAlt;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static int computeReverseClipping(final List<Allele> unclippedAlleles,
|
|
||||||
final byte[] ref,
|
|
||||||
final int forwardClipping,
|
|
||||||
final boolean allowFullClip) {
|
|
||||||
int clipping = 0;
|
|
||||||
boolean stillClipping = true;
|
|
||||||
|
|
||||||
while ( stillClipping ) {
|
|
||||||
for ( final Allele a : unclippedAlleles ) {
|
|
||||||
if ( a.isSymbolic() )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong
|
|
||||||
// position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine).
|
|
||||||
if ( a.length() - clipping == 0 )
|
|
||||||
return clipping - (allowFullClip ? 0 : 1);
|
|
||||||
|
|
||||||
if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) {
|
|
||||||
stillClipping = false;
|
|
||||||
}
|
|
||||||
else if ( ref.length == clipping ) {
|
|
||||||
if ( allowFullClip )
|
|
||||||
stillClipping = false;
|
|
||||||
else
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) {
|
|
||||||
stillClipping = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ( stillClipping )
|
|
||||||
clipping++;
|
|
||||||
}
|
|
||||||
|
|
||||||
return clipping;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Are the alleles describing a polymorphism substitution one base for another?
|
|
||||||
*
|
|
||||||
* @param alleles a list of alleles, must not be empty
|
|
||||||
* @return Return true if the length of any allele in alleles isn't 1
|
|
||||||
*/
|
|
||||||
@Requires("!alleles.isEmpty()")
|
|
||||||
private static boolean isSingleNucleotideEvent(final List<Allele> alleles) {
|
|
||||||
for ( final Allele a : alleles ) {
|
|
||||||
if ( a.length() != 1 )
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* clip the alleles, based on the reference, returning a ClippedAlleles object describing what happened
|
|
||||||
*
|
|
||||||
* The ClippedAlleles object contains the implied stop position of the alleles, given the provided start
|
|
||||||
* position, after clipping. It also contains the list of alleles, in the same order as the provided
|
|
||||||
* unclipped ones, that are the fully clipped version of the input alleles. If an error occurs
|
|
||||||
* during this option the getError() function returns a string describing the problem (for use in parsers).
|
|
||||||
*
|
|
||||||
* The basic operation are:
|
|
||||||
*
|
|
||||||
* single allele
|
|
||||||
* => stop == start and clipped == unclipped
|
|
||||||
* any number of single nucleotide events
|
|
||||||
* => stop == start and clipped == unclipped
|
|
||||||
* two alleles, second being symbolic
|
|
||||||
* => stop == start and clipped == unclipped
|
|
||||||
* Note in this case that the STOP should be computed by other means (from END in VCF, for example)
|
|
||||||
* Note that if there's more than two alleles and the second is a symbolic the code produces an error
|
|
||||||
* Any other case:
|
|
||||||
* The alleles are trimmed of any sequence shared at the end of the alleles. If N bases
|
|
||||||
* are common then the alleles will all be at least N bases shorter.
|
|
||||||
* The stop position returned is the start position + the length of the
|
|
||||||
* reverse trimmed only reference allele - 1.
|
|
||||||
* If the alleles all share a single common starting sequence (just one base is considered)
|
|
||||||
* then the alleles have this leading common base removed as well.
|
|
||||||
*
|
|
||||||
* TODO This code is gross and brittle and needs to be rethought from scratch
|
|
||||||
*
|
|
||||||
* @param start the unadjusted start position (pre-clipping)
|
|
||||||
* @param ref the reference string
|
|
||||||
* @param unclippedAlleles the list of unclipped alleles, including the reference allele
|
|
||||||
* @return the new reference end position of this event
|
|
||||||
*/
|
|
||||||
@Requires({"start > 0", "ref != null && ref.length() > 0", "!unclippedAlleles.isEmpty()"})
|
|
||||||
@Ensures("result != null")
|
|
||||||
public static ClippedAlleles clipAlleles(final int start,
|
|
||||||
final String ref,
|
|
||||||
final List<Allele> unclippedAlleles,
|
|
||||||
final int endForSymbolicAllele ) {
|
|
||||||
// no variation or single nucleotide events are by definition fully clipped
|
|
||||||
if ( unclippedAlleles.size() == 1 || isSingleNucleotideEvent(unclippedAlleles) )
|
|
||||||
return new ClippedAlleles(start, unclippedAlleles, null);
|
|
||||||
|
|
||||||
// we've got to sort out the clipping by looking at the alleles themselves
|
|
||||||
final byte firstRefBase = (byte) ref.charAt(0);
|
|
||||||
final boolean firstBaseIsClipped = shouldClipFirstBaseP(unclippedAlleles, firstRefBase);
|
|
||||||
final int forwardClipping = firstBaseIsClipped ? 1 : 0;
|
|
||||||
final int reverseClipping = computeReverseClipping(unclippedAlleles, ref.getBytes(), forwardClipping, false);
|
|
||||||
final boolean needsClipping = forwardClipping > 0 || reverseClipping > 0;
|
|
||||||
|
|
||||||
if ( reverseClipping == -1 )
|
|
||||||
return new ClippedAlleles("computeReverseClipping failed due to bad alleles");
|
|
||||||
|
|
||||||
boolean sawSymbolic = false;
|
|
||||||
List<Allele> clippedAlleles;
|
|
||||||
if ( ! needsClipping ) {
|
|
||||||
// there's nothing to clip, so clippedAlleles are the original alleles
|
|
||||||
clippedAlleles = unclippedAlleles;
|
|
||||||
} else {
|
|
||||||
clippedAlleles = new ArrayList<Allele>(unclippedAlleles.size());
|
|
||||||
for ( final Allele a : unclippedAlleles ) {
|
|
||||||
if ( a.isSymbolic() ) {
|
|
||||||
sawSymbolic = true;
|
|
||||||
clippedAlleles.add(a);
|
|
||||||
} else {
|
|
||||||
final byte[] allele = Arrays.copyOfRange(a.getBases(), forwardClipping, a.getBases().length - reverseClipping);
|
|
||||||
if ( !Allele.acceptableAlleleBases(allele) )
|
|
||||||
return new ClippedAlleles("Unparsable vcf record with bad allele [" + allele + "]");
|
|
||||||
clippedAlleles.add(Allele.create(allele, a.isReference()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int stop = VariantContextUtils.computeEndFromAlleles(clippedAlleles, start, endForSymbolicAllele);
|
|
||||||
|
|
||||||
// TODO
|
|
||||||
// TODO
|
|
||||||
// TODO COMPLETELY BROKEN CODE -- THE GATK CURRENTLY ENCODES THE STOP POSITION FOR CLIPPED ALLELES AS + 1
|
|
||||||
// TODO ITS TRUE SIZE TO DIFFERENTIATE CLIPPED VS. UNCLIPPED ALLELES. NEEDS TO BE FIXED
|
|
||||||
// TODO
|
|
||||||
// TODO
|
|
||||||
if ( needsClipping && ! sawSymbolic && ! clippedAlleles.get(0).isNull() ) stop++;
|
|
||||||
// TODO
|
|
||||||
// TODO
|
|
||||||
// TODO COMPLETELY BROKEN CODE -- THE GATK CURRENTLY ENCODES THE STOP POSITION FOR CLIPPED ALLELES AS + 1
|
|
||||||
// TODO ITS TRUE SIZE TO DIFFERENTIATE CLIPPED VS. UNCLIPPED ALLELES. NEEDS TO BE FIXED
|
|
||||||
// TODO
|
|
||||||
// TODO
|
|
||||||
|
|
||||||
final Byte refBaseForIndel = firstBaseIsClipped ? firstRefBase : null;
|
|
||||||
return new ClippedAlleles(stop, clippedAlleles, refBaseForIndel);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns true if the alleles in inputVC should have reference bases added for padding
|
|
||||||
*
|
|
||||||
* We need to pad a VC with a common base if the length of the reference allele is
|
|
||||||
* less than the length of the VariantContext. This happens because the position of
|
|
||||||
* e.g. an indel is always one before the actual event (as per VCF convention).
|
|
||||||
*
|
|
||||||
* @param inputVC the VC to evaluate, cannot be null
|
|
||||||
* @return true if
|
|
||||||
*/
|
|
||||||
public static boolean needsPadding(final VariantContext inputVC) {
|
|
||||||
// biallelic sites with only symbolic never need padding
|
|
||||||
if ( inputVC.isBiallelic() && inputVC.getAlternateAllele(0).isSymbolic() )
|
|
||||||
return false;
|
|
||||||
|
|
||||||
final int recordLength = inputVC.getEnd() - inputVC.getStart() + 1;
|
|
||||||
final int referenceLength = inputVC.getReference().length();
|
|
||||||
|
|
||||||
if ( referenceLength == recordLength )
|
|
||||||
return false;
|
|
||||||
else if ( referenceLength == recordLength - 1 )
|
|
||||||
return true;
|
|
||||||
else if ( !inputVC.hasSymbolicAlleles() )
|
|
||||||
throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) +
|
|
||||||
" in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size");
|
|
||||||
else if ( inputVC.isMixed() && inputVC.hasSymbolicAlleles() )
|
|
||||||
throw new IllegalArgumentException("GATK infrastructure limitation prevents needsPadding from working properly with VariantContexts containing a mixture of symbolic and concrete alleles at " + inputVC);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Allele padAllele(final VariantContext vc, final Allele allele) {
|
|
||||||
assert needsPadding(vc);
|
|
||||||
|
|
||||||
if ( allele.isSymbolic() )
|
|
||||||
return allele;
|
|
||||||
else {
|
|
||||||
// get bases for current allele and create a new one with trimmed bases
|
|
||||||
final StringBuilder sb = new StringBuilder();
|
|
||||||
sb.append((char)vc.getReferenceBaseForIndel().byteValue());
|
|
||||||
sb.append(allele.getDisplayString());
|
|
||||||
final String newBases = sb.toString();
|
|
||||||
return Allele.create(newBases, allele.isReference());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC) {
|
|
||||||
final boolean padVC = needsPadding(inputVC);
|
|
||||||
|
|
||||||
// nothing to do if we don't need to pad bases
|
|
||||||
if ( padVC ) {
|
|
||||||
if ( !inputVC.hasReferenceBaseForIndel() )
|
|
||||||
throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available.");
|
|
||||||
|
|
||||||
final ArrayList<Allele> alleles = new ArrayList<Allele>(inputVC.getNAlleles());
|
|
||||||
final Map<Allele, Allele> unpaddedToPadded = inputVC.hasGenotypes() ? new HashMap<Allele, Allele>(inputVC.getNAlleles()) : null;
|
|
||||||
|
|
||||||
boolean paddedAtLeastOne = false;
|
|
||||||
for (final Allele a : inputVC.getAlleles()) {
|
|
||||||
final Allele padded = padAllele(inputVC, a);
|
|
||||||
paddedAtLeastOne = paddedAtLeastOne || padded != a;
|
|
||||||
alleles.add(padded);
|
|
||||||
if ( unpaddedToPadded != null ) unpaddedToPadded.put(a, padded); // conditional to avoid making unnecessary make
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( ! paddedAtLeastOne )
|
|
||||||
throw new ReviewedStingException("VC was supposed to need padding but no allele was actually changed at location " + inputVC.getChr() + ":" + inputVC.getStart() + " with allele " + inputVC.getAlleles());
|
|
||||||
|
|
||||||
final VariantContextBuilder vcb = new VariantContextBuilder(inputVC);
|
|
||||||
vcb.alleles(alleles);
|
|
||||||
|
|
||||||
// the position of the inputVC is one further, if it doesn't contain symbolic alleles
|
|
||||||
vcb.computeEndFromAlleles(alleles, inputVC.getStart(), inputVC.getEnd());
|
|
||||||
|
|
||||||
if ( inputVC.hasGenotypes() ) {
|
|
||||||
assert unpaddedToPadded != null;
|
|
||||||
|
|
||||||
// now we can recreate new genotypes with trimmed alleles
|
|
||||||
final GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples());
|
|
||||||
for (final Genotype g : inputVC.getGenotypes() ) {
|
|
||||||
final List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size());
|
|
||||||
for (final Allele a : g.getAlleles()) {
|
|
||||||
newGenotypeAlleles.add( a.isCalled() ? unpaddedToPadded.get(a) : Allele.NO_CALL);
|
|
||||||
}
|
|
||||||
genotypes.add(new GenotypeBuilder(g).alleles(newGenotypeAlleles).make());
|
|
||||||
}
|
|
||||||
vcb.genotypes(genotypes);
|
|
||||||
}
|
|
||||||
|
|
||||||
return vcb.make();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
return inputVC;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {
|
|
||||||
// see if we need to trim common reference base from all alleles
|
|
||||||
|
|
||||||
final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, true);
|
|
||||||
if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
|
|
||||||
return inputVC;
|
|
||||||
|
|
||||||
final List<Allele> alleles = new ArrayList<Allele>();
|
|
||||||
final GenotypesContext genotypes = GenotypesContext.create();
|
|
||||||
final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
|
|
||||||
|
|
||||||
for (final Allele a : inputVC.getAlleles()) {
|
|
||||||
if (a.isSymbolic()) {
|
|
||||||
alleles.add(a);
|
|
||||||
originalToTrimmedAlleleMap.put(a, a);
|
|
||||||
} else {
|
|
||||||
// get bases for current allele and create a new one with trimmed bases
|
|
||||||
final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
|
|
||||||
final Allele trimmedAllele = Allele.create(newBases, a.isReference());
|
|
||||||
alleles.add(trimmedAllele);
|
|
||||||
originalToTrimmedAlleleMap.put(a, trimmedAllele);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// now we can recreate new genotypes with trimmed alleles
|
|
||||||
for ( final Genotype genotype : inputVC.getGenotypes() ) {
|
|
||||||
final List<Allele> originalAlleles = genotype.getAlleles();
|
|
||||||
final List<Allele> trimmedAlleles = new ArrayList<Allele>();
|
|
||||||
for ( final Allele a : originalAlleles ) {
|
|
||||||
if ( a.isCalled() )
|
|
||||||
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
|
|
||||||
else
|
|
||||||
trimmedAlleles.add(Allele.NO_CALL);
|
|
||||||
}
|
|
||||||
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
|
|
||||||
}
|
|
||||||
|
|
||||||
return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() + (inputVC.isMixed() ? -1 : 0)).alleles(alleles).genotypes(genotypes).make();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Invariant("stop != -1 || error != null") // we're either an error or a meaningful result but not both
|
|
||||||
public static class ClippedAlleles {
|
|
||||||
private final int stop;
|
|
||||||
private final List<Allele> clippedAlleles;
|
|
||||||
private final Byte refBaseForIndel;
|
|
||||||
private final String error;
|
|
||||||
|
|
||||||
@Requires({"stop > 0", "clippedAlleles != null"})
|
|
||||||
private ClippedAlleles(final int stop, final List<Allele> clippedAlleles, final Byte refBaseForIndel) {
|
|
||||||
this.stop = stop;
|
|
||||||
this.clippedAlleles = clippedAlleles;
|
|
||||||
this.error = null;
|
|
||||||
this.refBaseForIndel = refBaseForIndel;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Requires("error != null")
|
|
||||||
private ClippedAlleles(final String error) {
|
|
||||||
this.stop = -1;
|
|
||||||
this.clippedAlleles = null;
|
|
||||||
this.refBaseForIndel = null;
|
|
||||||
this.error = error;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get an error if it occurred
|
|
||||||
* @return the error message, or null if no error occurred
|
|
||||||
*/
|
|
||||||
public String getError() {
|
|
||||||
return error;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the stop position to use after the clipping as been applied, given the
|
|
||||||
* provided position to clipAlleles
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public int getStop() {
|
|
||||||
return stop;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the clipped alleles themselves
|
|
||||||
* @return the clipped alleles in the order of the input unclipped alleles
|
|
||||||
*/
|
|
||||||
public List<Allele> getClippedAlleles() {
|
|
||||||
return clippedAlleles;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the reference base we should use for indels, or null if none is appropriate
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public Byte getRefBaseForIndel() {
|
|
||||||
return refBaseForIndel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
package org.broadinstitute.sting.utils.variantcontext;
|
package org.broadinstitute.sting.utils.variantcontext;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Immutable representation of an allele
|
* Immutable representation of an allele
|
||||||
|
|
@ -77,32 +77,36 @@ public class Allele implements Comparable<Allele> {
|
||||||
private static final byte[] EMPTY_ALLELE_BASES = new byte[0];
|
private static final byte[] EMPTY_ALLELE_BASES = new byte[0];
|
||||||
|
|
||||||
private boolean isRef = false;
|
private boolean isRef = false;
|
||||||
private boolean isNull = false;
|
|
||||||
private boolean isNoCall = false;
|
private boolean isNoCall = false;
|
||||||
private boolean isSymbolic = false;
|
private boolean isSymbolic = false;
|
||||||
|
|
||||||
private byte[] bases = null;
|
private byte[] bases = null;
|
||||||
|
|
||||||
public final static String NULL_ALLELE_STRING = "-";
|
|
||||||
public final static String NO_CALL_STRING = ".";
|
public final static String NO_CALL_STRING = ".";
|
||||||
/** A generic static NO_CALL allele for use */
|
/** A generic static NO_CALL allele for use */
|
||||||
|
|
||||||
// no public way to create an allele
|
// no public way to create an allele
|
||||||
private Allele(byte[] bases, boolean isRef) {
|
private Allele(byte[] bases, boolean isRef) {
|
||||||
// standardize our representation of null allele and bases
|
// null alleles are no longer allowed
|
||||||
if ( wouldBeNullAllele(bases) ) {
|
if ( wouldBeNullAllele(bases) ) {
|
||||||
bases = EMPTY_ALLELE_BASES;
|
throw new IllegalArgumentException("Null alleles are not supported");
|
||||||
isNull = true;
|
}
|
||||||
} else if ( wouldBeNoCallAllele(bases) ) {
|
|
||||||
bases = EMPTY_ALLELE_BASES;
|
// no-calls are represented as no bases
|
||||||
|
if ( wouldBeNoCallAllele(bases) ) {
|
||||||
|
this.bases = EMPTY_ALLELE_BASES;
|
||||||
isNoCall = true;
|
isNoCall = true;
|
||||||
if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele");
|
if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele");
|
||||||
} else if ( wouldBeSymbolicAllele(bases) ) {
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( wouldBeSymbolicAllele(bases) ) {
|
||||||
isSymbolic = true;
|
isSymbolic = true;
|
||||||
if ( isRef ) throw new IllegalArgumentException("Cannot tag a symbolic allele as the reference allele");
|
if ( isRef ) throw new IllegalArgumentException("Cannot tag a symbolic allele as the reference allele");
|
||||||
}
|
}
|
||||||
// else
|
else {
|
||||||
// bases = new String(bases).toUpperCase().getBytes(); // todo -- slow performance
|
bases = BaseUtils.convertToUpperCase(bases);
|
||||||
|
}
|
||||||
|
|
||||||
this.isRef = isRef;
|
this.isRef = isRef;
|
||||||
this.bases = bases;
|
this.bases = bases;
|
||||||
|
|
@ -126,8 +130,6 @@ public class Allele implements Comparable<Allele> {
|
||||||
private final static Allele ALT_T = new Allele("T", false);
|
private final static Allele ALT_T = new Allele("T", false);
|
||||||
private final static Allele REF_N = new Allele("N", true);
|
private final static Allele REF_N = new Allele("N", true);
|
||||||
private final static Allele ALT_N = new Allele("N", false);
|
private final static Allele ALT_N = new Allele("N", false);
|
||||||
private final static Allele REF_NULL = new Allele(NULL_ALLELE_STRING, true);
|
|
||||||
private final static Allele ALT_NULL = new Allele(NULL_ALLELE_STRING, false);
|
|
||||||
public final static Allele NO_CALL = new Allele(NO_CALL_STRING, false);
|
public final static Allele NO_CALL = new Allele(NO_CALL_STRING, false);
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
|
@ -154,7 +156,6 @@ public class Allele implements Comparable<Allele> {
|
||||||
case '.':
|
case '.':
|
||||||
if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele");
|
if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele");
|
||||||
return NO_CALL;
|
return NO_CALL;
|
||||||
case '-': return isRef ? REF_NULL : ALT_NULL;
|
|
||||||
case 'A': case 'a' : return isRef ? REF_A : ALT_A;
|
case 'A': case 'a' : return isRef ? REF_A : ALT_A;
|
||||||
case 'C': case 'c' : return isRef ? REF_C : ALT_C;
|
case 'C': case 'c' : return isRef ? REF_C : ALT_C;
|
||||||
case 'G': case 'g' : return isRef ? REF_G : ALT_G;
|
case 'G': case 'g' : return isRef ? REF_G : ALT_G;
|
||||||
|
|
@ -179,14 +180,9 @@ public class Allele implements Comparable<Allele> {
|
||||||
public static Allele extend(Allele left, byte[] right) {
|
public static Allele extend(Allele left, byte[] right) {
|
||||||
if (left.isSymbolic())
|
if (left.isSymbolic())
|
||||||
throw new IllegalArgumentException("Cannot extend a symbolic allele");
|
throw new IllegalArgumentException("Cannot extend a symbolic allele");
|
||||||
byte[] bases = null;
|
byte[] bases = new byte[left.length() + right.length];
|
||||||
if ( left.length() == 0 )
|
System.arraycopy(left.getBases(), 0, bases, 0, left.length());
|
||||||
bases = right;
|
System.arraycopy(right, 0, bases, left.length(), right.length);
|
||||||
else {
|
|
||||||
bases = new byte[left.length() + right.length];
|
|
||||||
System.arraycopy(left.getBases(), 0, bases, 0, left.length());
|
|
||||||
System.arraycopy(right, 0, bases, left.length(), right.length);
|
|
||||||
}
|
|
||||||
|
|
||||||
return create(bases, left.isReference());
|
return create(bases, left.isReference());
|
||||||
}
|
}
|
||||||
|
|
@ -242,7 +238,10 @@ public class Allele implements Comparable<Allele> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean acceptableAlleleBases(byte[] bases, boolean allowNsAsAcceptable) {
|
public static boolean acceptableAlleleBases(byte[] bases, boolean allowNsAsAcceptable) {
|
||||||
if ( wouldBeNullAllele(bases) || wouldBeNoCallAllele(bases) || wouldBeSymbolicAllele(bases) )
|
if ( wouldBeNullAllele(bases) )
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if ( wouldBeNoCallAllele(bases) || wouldBeSymbolicAllele(bases) )
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
for (byte base : bases ) {
|
for (byte base : bases ) {
|
||||||
|
|
@ -299,11 +298,6 @@ public class Allele implements Comparable<Allele> {
|
||||||
//
|
//
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
//Returns true if this is the null allele
|
|
||||||
public boolean isNull() { return isNull; }
|
|
||||||
// Returns true if this is not the null allele
|
|
||||||
public boolean isNonNull() { return ! isNull(); }
|
|
||||||
|
|
||||||
// Returns true if this is the NO_CALL allele
|
// Returns true if this is the NO_CALL allele
|
||||||
public boolean isNoCall() { return isNoCall; }
|
public boolean isNoCall() { return isNoCall; }
|
||||||
// Returns true if this is not the NO_CALL allele
|
// Returns true if this is not the NO_CALL allele
|
||||||
|
|
@ -319,7 +313,7 @@ public class Allele implements Comparable<Allele> {
|
||||||
|
|
||||||
// Returns a nice string representation of this object
|
// Returns a nice string representation of this object
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return (isNull() ? NULL_ALLELE_STRING : ( isNoCall() ? NO_CALL_STRING : getDisplayString() )) + (isReference() ? "*" : "");
|
return ( isNoCall() ? NO_CALL_STRING : getDisplayString() ) + (isReference() ? "*" : "");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -384,27 +378,27 @@ public class Allele implements Comparable<Allele> {
|
||||||
* @return true if this and other are equal
|
* @return true if this and other are equal
|
||||||
*/
|
*/
|
||||||
public boolean equals(Allele other, boolean ignoreRefState) {
|
public boolean equals(Allele other, boolean ignoreRefState) {
|
||||||
return this == other || (isRef == other.isRef || ignoreRefState) && isNull == other.isNull && isNoCall == other.isNoCall && (bases == other.bases || Arrays.equals(bases, other.bases));
|
return this == other || (isRef == other.isRef || ignoreRefState) && isNoCall == other.isNoCall && (bases == other.bases || Arrays.equals(bases, other.bases));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param test bases to test against
|
* @param test bases to test against
|
||||||
*
|
*
|
||||||
* @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
|
* @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
|
||||||
*/
|
*/
|
||||||
public boolean basesMatch(byte[] test) { return !isSymbolic && (bases == test || Arrays.equals(bases, test)); }
|
public boolean basesMatch(byte[] test) { return !isSymbolic && (bases == test || Arrays.equals(bases, test)); }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param test bases to test against
|
* @param test bases to test against
|
||||||
*
|
*
|
||||||
* @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
|
* @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
|
||||||
*/
|
*/
|
||||||
public boolean basesMatch(String test) { return basesMatch(test.toUpperCase().getBytes()); }
|
public boolean basesMatch(String test) { return basesMatch(test.toUpperCase().getBytes()); }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param test allele to test against
|
* @param test allele to test against
|
||||||
*
|
*
|
||||||
* @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
|
* @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
|
||||||
*/
|
*/
|
||||||
public boolean basesMatch(Allele test) { return basesMatch(test.getBases()); }
|
public boolean basesMatch(Allele test) { return basesMatch(test.getBases()); }
|
||||||
|
|
||||||
|
|
@ -421,10 +415,6 @@ public class Allele implements Comparable<Allele> {
|
||||||
//
|
//
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
public static Allele getMatchingAllele(Collection<Allele> allAlleles, String alleleBases) {
|
|
||||||
return getMatchingAllele(allAlleles, alleleBases.getBytes());
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Allele getMatchingAllele(Collection<Allele> allAlleles, byte[] alleleBases) {
|
public static Allele getMatchingAllele(Collection<Allele> allAlleles, byte[] alleleBases) {
|
||||||
for ( Allele a : allAlleles ) {
|
for ( Allele a : allAlleles ) {
|
||||||
if ( a.basesMatch(alleleBases) ) {
|
if ( a.basesMatch(alleleBases) ) {
|
||||||
|
|
@ -438,26 +428,6 @@ public class Allele implements Comparable<Allele> {
|
||||||
return null; // couldn't find anything
|
return null; // couldn't find anything
|
||||||
}
|
}
|
||||||
|
|
||||||
public static List<Allele> resolveAlleles(List<Allele> possibleAlleles, List<String> alleleStrings) {
|
|
||||||
List<Allele> myAlleles = new ArrayList<Allele>(alleleStrings.size());
|
|
||||||
|
|
||||||
for ( String alleleString : alleleStrings ) {
|
|
||||||
Allele allele = getMatchingAllele(possibleAlleles, alleleString);
|
|
||||||
|
|
||||||
if ( allele == null ) {
|
|
||||||
if ( Allele.wouldBeNoCallAllele(alleleString.getBytes()) ) {
|
|
||||||
allele = create(alleleString);
|
|
||||||
} else {
|
|
||||||
throw new IllegalArgumentException("Allele " + alleleString + " not present in the list of alleles " + possibleAlleles);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
myAlleles.add(allele);
|
|
||||||
}
|
|
||||||
|
|
||||||
return myAlleles;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int compareTo(Allele other) {
|
public int compareTo(Allele other) {
|
||||||
if ( isReference() && other.isNonReference() )
|
if ( isReference() && other.isNonReference() )
|
||||||
return -1;
|
return -1;
|
||||||
|
|
@ -468,9 +438,6 @@ public class Allele implements Comparable<Allele> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean oneIsPrefixOfOther(Allele a1, Allele a2) {
|
public static boolean oneIsPrefixOfOther(Allele a1, Allele a2) {
|
||||||
if ( a1.isNull() || a2.isNull() )
|
|
||||||
return true;
|
|
||||||
|
|
||||||
if ( a2.length() >= a1.length() )
|
if ( a2.length() >= a1.length() )
|
||||||
return firstIsPrefixOfSecond(a1, a2);
|
return firstIsPrefixOfSecond(a1, a2);
|
||||||
else
|
else
|
||||||
|
|
|
||||||
|
|
@ -188,8 +188,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
@Deprecated // ID is no longer stored in the attributes map
|
@Deprecated // ID is no longer stored in the attributes map
|
||||||
private final static String ID_KEY = "ID";
|
private final static String ID_KEY = "ID";
|
||||||
|
|
||||||
private final Byte REFERENCE_BASE_FOR_INDEL;
|
|
||||||
|
|
||||||
public final static Set<String> PASSES_FILTERS = Collections.unmodifiableSet(new LinkedHashSet<String>());
|
public final static Set<String> PASSES_FILTERS = Collections.unmodifiableSet(new LinkedHashSet<String>());
|
||||||
|
|
||||||
/** The location of this VariantContext */
|
/** The location of this VariantContext */
|
||||||
|
|
@ -228,7 +226,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
public enum Validation {
|
public enum Validation {
|
||||||
REF_PADDING,
|
|
||||||
ALLELES,
|
ALLELES,
|
||||||
GENOTYPES
|
GENOTYPES
|
||||||
}
|
}
|
||||||
|
|
@ -250,7 +247,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
this(other.getSource(), other.getID(), other.getChr(), other.getStart(), other.getEnd(),
|
this(other.getSource(), other.getID(), other.getChr(), other.getStart(), other.getEnd(),
|
||||||
other.getAlleles(), other.getGenotypes(), other.getLog10PError(),
|
other.getAlleles(), other.getGenotypes(), other.getLog10PError(),
|
||||||
other.getFiltersMaybeNull(),
|
other.getFiltersMaybeNull(),
|
||||||
other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL,
|
other.getAttributes(),
|
||||||
other.fullyDecoded, NO_VALIDATION);
|
other.fullyDecoded, NO_VALIDATION);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -266,7 +263,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
* @param log10PError qual
|
* @param log10PError qual
|
||||||
* @param filters filters: use null for unfiltered and empty set for passes filters
|
* @param filters filters: use null for unfiltered and empty set for passes filters
|
||||||
* @param attributes attributes
|
* @param attributes attributes
|
||||||
* @param referenceBaseForIndel padded reference base
|
|
||||||
* @param validationToPerform set of validation steps to take
|
* @param validationToPerform set of validation steps to take
|
||||||
*/
|
*/
|
||||||
protected VariantContext(final String source,
|
protected VariantContext(final String source,
|
||||||
|
|
@ -279,7 +275,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
final double log10PError,
|
final double log10PError,
|
||||||
final Set<String> filters,
|
final Set<String> filters,
|
||||||
final Map<String, Object> attributes,
|
final Map<String, Object> attributes,
|
||||||
final Byte referenceBaseForIndel,
|
|
||||||
final boolean fullyDecoded,
|
final boolean fullyDecoded,
|
||||||
final EnumSet<Validation> validationToPerform ) {
|
final EnumSet<Validation> validationToPerform ) {
|
||||||
if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); }
|
if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); }
|
||||||
|
|
@ -292,7 +287,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
this.ID = ID.equals(VCFConstants.EMPTY_ID_FIELD) ? VCFConstants.EMPTY_ID_FIELD : ID;
|
this.ID = ID.equals(VCFConstants.EMPTY_ID_FIELD) ? VCFConstants.EMPTY_ID_FIELD : ID;
|
||||||
|
|
||||||
this.commonInfo = new CommonInfo(source, log10PError, filters, attributes);
|
this.commonInfo = new CommonInfo(source, log10PError, filters, attributes);
|
||||||
REFERENCE_BASE_FOR_INDEL = referenceBaseForIndel;
|
|
||||||
|
|
||||||
// todo -- remove me when this check is no longer necessary
|
// todo -- remove me when this check is no longer necessary
|
||||||
if ( this.commonInfo.hasAttribute(ID_KEY) )
|
if ( this.commonInfo.hasAttribute(ID_KEY) )
|
||||||
|
|
@ -340,8 +334,9 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
* in this VC is returned as the set of alleles in the subContext, even if
|
* in this VC is returned as the set of alleles in the subContext, even if
|
||||||
* some of those alleles aren't in the samples
|
* some of those alleles aren't in the samples
|
||||||
*
|
*
|
||||||
* @param sampleNames
|
* @param sampleNames the sample names
|
||||||
* @return
|
* @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples
|
||||||
|
* @return new VariantContext subsetting to just the given samples
|
||||||
*/
|
*/
|
||||||
public VariantContext subContextFromSamples(Set<String> sampleNames, final boolean rederiveAllelesFromGenotypes ) {
|
public VariantContext subContextFromSamples(Set<String> sampleNames, final boolean rederiveAllelesFromGenotypes ) {
|
||||||
if ( sampleNames.containsAll(getSampleNames()) ) {
|
if ( sampleNames.containsAll(getSampleNames()) ) {
|
||||||
|
|
@ -501,7 +496,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
*/
|
*/
|
||||||
public boolean isSimpleInsertion() {
|
public boolean isSimpleInsertion() {
|
||||||
// can't just call !isSimpleDeletion() because of complex indels
|
// can't just call !isSimpleDeletion() because of complex indels
|
||||||
return getType() == Type.INDEL && getReference().isNull() && isBiallelic();
|
return getType() == Type.INDEL && isBiallelic() && getReference().length() == 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -509,7 +504,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
*/
|
*/
|
||||||
public boolean isSimpleDeletion() {
|
public boolean isSimpleDeletion() {
|
||||||
// can't just call !isSimpleInsertion() because of complex indels
|
// can't just call !isSimpleInsertion() because of complex indels
|
||||||
return getType() == Type.INDEL && getAlternateAllele(0).isNull() && isBiallelic();
|
return getType() == Type.INDEL && isBiallelic() && getAlternateAllele(0).length() == 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -553,22 +548,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
return ID;
|
return ID;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasReferenceBaseForIndel() {
|
|
||||||
return REFERENCE_BASE_FOR_INDEL != null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// the indel base that gets stripped off for indels
|
|
||||||
public Byte getReferenceBaseForIndel() {
|
|
||||||
return REFERENCE_BASE_FOR_INDEL;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getAlleleStringWithRefPadding(final Allele allele) {
|
|
||||||
if ( VCFAlleleClipper.needsPadding(this) )
|
|
||||||
return VCFAlleleClipper.padAllele(this, allele).getDisplayString();
|
|
||||||
else
|
|
||||||
return allele.getDisplayString();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
|
|
@ -808,8 +787,8 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
* Returns a map from sampleName -> Genotype for the genotype associated with sampleName. Returns a map
|
* Returns a map from sampleName -> Genotype for the genotype associated with sampleName. Returns a map
|
||||||
* for consistency with the multi-get function.
|
* for consistency with the multi-get function.
|
||||||
*
|
*
|
||||||
* @param sampleName
|
* @param sampleName the sample name
|
||||||
* @return
|
* @return mapping from sample name to genotype
|
||||||
* @throws IllegalArgumentException if sampleName isn't bound to a genotype
|
* @throws IllegalArgumentException if sampleName isn't bound to a genotype
|
||||||
*/
|
*/
|
||||||
public GenotypesContext getGenotypes(String sampleName) {
|
public GenotypesContext getGenotypes(String sampleName) {
|
||||||
|
|
@ -823,7 +802,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
* For testing convenience only
|
* For testing convenience only
|
||||||
*
|
*
|
||||||
* @param sampleNames a unique list of sample names
|
* @param sampleNames a unique list of sample names
|
||||||
* @return
|
* @return subsetting genotypes context
|
||||||
* @throws IllegalArgumentException if sampleName isn't bound to a genotype
|
* @throws IllegalArgumentException if sampleName isn't bound to a genotype
|
||||||
*/
|
*/
|
||||||
protected GenotypesContext getGenotypes(Collection<String> sampleNames) {
|
protected GenotypesContext getGenotypes(Collection<String> sampleNames) {
|
||||||
|
|
@ -1011,13 +990,13 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
/**
|
/**
|
||||||
* Run all extra-strict validation tests on a Variant Context object
|
* Run all extra-strict validation tests on a Variant Context object
|
||||||
*
|
*
|
||||||
* @param reference the true reference allele
|
* @param reportedReference the reported reference allele
|
||||||
* @param paddedRefBase the reference base used for padding indels
|
* @param observedReference the actual reference allele
|
||||||
* @param rsIDs the true dbSNP IDs
|
* @param rsIDs the true dbSNP IDs
|
||||||
*/
|
*/
|
||||||
public void extraStrictValidation(Allele reference, Byte paddedRefBase, Set<String> rsIDs) {
|
public void extraStrictValidation(final Allele reportedReference, final Allele observedReference, final Set<String> rsIDs) {
|
||||||
// validate the reference
|
// validate the reference
|
||||||
validateReferenceBases(reference, paddedRefBase);
|
validateReferenceBases(reportedReference, observedReference);
|
||||||
|
|
||||||
// validate the RS IDs
|
// validate the RS IDs
|
||||||
validateRSIDs(rsIDs);
|
validateRSIDs(rsIDs);
|
||||||
|
|
@ -1032,18 +1011,9 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
//checkReferenceTrack();
|
//checkReferenceTrack();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void validateReferenceBases(Allele reference, Byte paddedRefBase) {
|
public void validateReferenceBases(final Allele reportedReference, final Allele observedReference) {
|
||||||
if ( reference == null )
|
if ( reportedReference != null && !reportedReference.basesMatch(observedReference) ) {
|
||||||
return;
|
throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), observedReference.getBaseString(), reportedReference.getBaseString()));
|
||||||
|
|
||||||
// don't validate if we're a complex event
|
|
||||||
if ( !isComplexIndel() && !reference.isNull() && !reference.basesMatch(getReference()) ) {
|
|
||||||
throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), reference.getBaseString(), getReference().getBaseString()));
|
|
||||||
}
|
|
||||||
|
|
||||||
// we also need to validate the padding base for simple indels
|
|
||||||
if ( hasReferenceBaseForIndel() && !getReferenceBaseForIndel().equals(paddedRefBase) ) {
|
|
||||||
throw new TribbleException.InternalCodecException(String.format("the padded REF base is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), (char)paddedRefBase.byteValue(), (char)getReferenceBaseForIndel().byteValue()));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1135,7 +1105,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
for (final Validation val : validationToPerform ) {
|
for (final Validation val : validationToPerform ) {
|
||||||
switch (val) {
|
switch (val) {
|
||||||
case ALLELES: validateAlleles(); break;
|
case ALLELES: validateAlleles(); break;
|
||||||
case REF_PADDING: validateReferencePadding(); break;
|
|
||||||
case GENOTYPES: validateGenotypes(); break;
|
case GENOTYPES: validateGenotypes(); break;
|
||||||
default: throw new IllegalArgumentException("Unexpected validation mode " + val);
|
default: throw new IllegalArgumentException("Unexpected validation mode " + val);
|
||||||
}
|
}
|
||||||
|
|
@ -1151,8 +1120,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
if ( hasAttribute(VCFConstants.END_KEY) ) {
|
if ( hasAttribute(VCFConstants.END_KEY) ) {
|
||||||
final int end = getAttributeAsInt(VCFConstants.END_KEY, -1);
|
final int end = getAttributeAsInt(VCFConstants.END_KEY, -1);
|
||||||
assert end != -1;
|
assert end != -1;
|
||||||
if ( end != getEnd() && end != getEnd() + 1 ) {
|
if ( end != getEnd() ) {
|
||||||
// the end is allowed to 1 bigger because of the padding
|
|
||||||
final String message = "Badly formed variant context at location " + getChr() + ":"
|
final String message = "Badly formed variant context at location " + getChr() + ":"
|
||||||
+ getStart() + "; getEnd() was " + getEnd()
|
+ getStart() + "; getEnd() was " + getEnd()
|
||||||
+ " but this VariantContext contains an END key with value " + end;
|
+ " but this VariantContext contains an END key with value " + end;
|
||||||
|
|
@ -1161,23 +1129,19 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
else
|
else
|
||||||
throw new ReviewedStingException(message);
|
throw new ReviewedStingException(message);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
final long length = (stop - start) + 1;
|
||||||
|
if ( ! hasSymbolicAlleles() && length != getReference().length() ) {
|
||||||
|
throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void validateReferencePadding() {
|
|
||||||
if ( hasSymbolicAlleles() ) // symbolic alleles don't need padding...
|
|
||||||
return;
|
|
||||||
|
|
||||||
boolean needsPadding = (getReference().length() == getEnd() - getStart()); // off by one because padded base was removed
|
|
||||||
|
|
||||||
if ( needsPadding && !hasReferenceBaseForIndel() )
|
|
||||||
throw new ReviewedStingException("Badly formed variant context at location " + getChr() + ":" + getStart() + "; no padded reference base was provided.");
|
|
||||||
}
|
|
||||||
|
|
||||||
private void validateAlleles() {
|
private void validateAlleles() {
|
||||||
// check alleles
|
|
||||||
boolean alreadySeenRef = false, alreadySeenNull = false;
|
boolean alreadySeenRef = false;
|
||||||
for ( Allele allele : alleles ) {
|
|
||||||
|
for ( final Allele allele : alleles ) {
|
||||||
// make sure there's only one reference allele
|
// make sure there's only one reference allele
|
||||||
if ( allele.isReference() ) {
|
if ( allele.isReference() ) {
|
||||||
if ( alreadySeenRef ) throw new IllegalArgumentException("BUG: Received two reference tagged alleles in VariantContext " + alleles + " this=" + this);
|
if ( alreadySeenRef ) throw new IllegalArgumentException("BUG: Received two reference tagged alleles in VariantContext " + alleles + " this=" + this);
|
||||||
|
|
@ -1187,26 +1151,11 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
if ( allele.isNoCall() ) {
|
if ( allele.isNoCall() ) {
|
||||||
throw new IllegalArgumentException("BUG: Cannot add a no call allele to a variant context " + alleles + " this=" + this);
|
throw new IllegalArgumentException("BUG: Cannot add a no call allele to a variant context " + alleles + " this=" + this);
|
||||||
}
|
}
|
||||||
|
|
||||||
// make sure there's only one null allele
|
|
||||||
if ( allele.isNull() ) {
|
|
||||||
if ( alreadySeenNull ) throw new IllegalArgumentException("BUG: Received two null alleles in VariantContext " + alleles + " this=" + this);
|
|
||||||
alreadySeenNull = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// make sure there's one reference allele
|
// make sure there's one reference allele
|
||||||
if ( ! alreadySeenRef )
|
if ( ! alreadySeenRef )
|
||||||
throw new IllegalArgumentException("No reference allele found in VariantContext");
|
throw new IllegalArgumentException("No reference allele found in VariantContext");
|
||||||
|
|
||||||
// if ( getType() == Type.INDEL ) {
|
|
||||||
// if ( getReference().length() != (getLocation().size()-1) ) {
|
|
||||||
long length = (stop - start) + 1;
|
|
||||||
if ( ! hasSymbolicAlleles()
|
|
||||||
&& ((getReference().isNull() && length != 1 )
|
|
||||||
|| (getReference().isNonNull() && (length - getReference().length() > 1)))) {
|
|
||||||
throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void validateGenotypes() {
|
private void validateGenotypes() {
|
||||||
|
|
|
||||||
|
|
@ -25,9 +25,6 @@
|
||||||
package org.broadinstitute.sting.utils.variantcontext;
|
package org.broadinstitute.sting.utils.variantcontext;
|
||||||
|
|
||||||
import com.google.java.contract.*;
|
import com.google.java.contract.*;
|
||||||
import org.broad.tribble.Feature;
|
|
||||||
import org.broad.tribble.TribbleException;
|
|
||||||
import org.broad.tribble.util.ParsingUtils;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
@ -74,7 +71,6 @@ public class VariantContextBuilder {
|
||||||
private Set<String> filters = null;
|
private Set<String> filters = null;
|
||||||
private Map<String, Object> attributes = null;
|
private Map<String, Object> attributes = null;
|
||||||
private boolean attributesCanBeModified = false;
|
private boolean attributesCanBeModified = false;
|
||||||
private Byte referenceBaseForIndel = null;
|
|
||||||
|
|
||||||
/** enum of what must be validated */
|
/** enum of what must be validated */
|
||||||
final private EnumSet<VariantContext.Validation> toValidate = EnumSet.noneOf(VariantContext.Validation.class);
|
final private EnumSet<VariantContext.Validation> toValidate = EnumSet.noneOf(VariantContext.Validation.class);
|
||||||
|
|
@ -117,7 +113,6 @@ public class VariantContextBuilder {
|
||||||
this.genotypes = parent.genotypes;
|
this.genotypes = parent.genotypes;
|
||||||
this.ID = parent.getID();
|
this.ID = parent.getID();
|
||||||
this.log10PError = parent.getLog10PError();
|
this.log10PError = parent.getLog10PError();
|
||||||
this.referenceBaseForIndel = parent.getReferenceBaseForIndel();
|
|
||||||
this.source = parent.getSource();
|
this.source = parent.getSource();
|
||||||
this.start = parent.getStart();
|
this.start = parent.getStart();
|
||||||
this.stop = parent.getEnd();
|
this.stop = parent.getEnd();
|
||||||
|
|
@ -132,7 +127,6 @@ public class VariantContextBuilder {
|
||||||
this.genotypes = parent.genotypes;
|
this.genotypes = parent.genotypes;
|
||||||
this.ID = parent.ID;
|
this.ID = parent.ID;
|
||||||
this.log10PError = parent.log10PError;
|
this.log10PError = parent.log10PError;
|
||||||
this.referenceBaseForIndel = parent.referenceBaseForIndel;
|
|
||||||
this.source = parent.source;
|
this.source = parent.source;
|
||||||
this.start = parent.start;
|
this.start = parent.start;
|
||||||
this.stop = parent.stop;
|
this.stop = parent.stop;
|
||||||
|
|
@ -362,21 +356,6 @@ public class VariantContextBuilder {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Tells us that the resulting VariantContext should use this byte for the reference base
|
|
||||||
* Null means no refBase is available
|
|
||||||
* @param referenceBaseForIndel
|
|
||||||
*/
|
|
||||||
public VariantContextBuilder referenceBaseForIndel(final Byte referenceBaseForIndel) {
|
|
||||||
this.referenceBaseForIndel = referenceBaseForIndel;
|
|
||||||
toValidate.add(VariantContext.Validation.REF_PADDING);
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public VariantContextBuilder referenceBaseForIndel(final String referenceBaseForIndel) {
|
|
||||||
return referenceBaseForIndel(referenceBaseForIndel.getBytes()[0]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tells us that the resulting VariantContext should have source field set to source
|
* Tells us that the resulting VariantContext should have source field set to source
|
||||||
* @param source
|
* @param source
|
||||||
|
|
@ -401,7 +380,6 @@ public class VariantContextBuilder {
|
||||||
this.start = start;
|
this.start = start;
|
||||||
this.stop = stop;
|
this.stop = stop;
|
||||||
toValidate.add(VariantContext.Validation.ALLELES);
|
toValidate.add(VariantContext.Validation.ALLELES);
|
||||||
toValidate.add(VariantContext.Validation.REF_PADDING);
|
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -416,7 +394,6 @@ public class VariantContextBuilder {
|
||||||
this.start = loc.getStart();
|
this.start = loc.getStart();
|
||||||
this.stop = loc.getStop();
|
this.stop = loc.getStop();
|
||||||
toValidate.add(VariantContext.Validation.ALLELES);
|
toValidate.add(VariantContext.Validation.ALLELES);
|
||||||
toValidate.add(VariantContext.Validation.REF_PADDING);
|
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -440,7 +417,6 @@ public class VariantContextBuilder {
|
||||||
public VariantContextBuilder start(final long start) {
|
public VariantContextBuilder start(final long start) {
|
||||||
this.start = start;
|
this.start = start;
|
||||||
toValidate.add(VariantContext.Validation.ALLELES);
|
toValidate.add(VariantContext.Validation.ALLELES);
|
||||||
toValidate.add(VariantContext.Validation.REF_PADDING);
|
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -517,6 +493,6 @@ public class VariantContextBuilder {
|
||||||
public VariantContext make() {
|
public VariantContext make() {
|
||||||
return new VariantContext(source, ID, contig, start, stop, alleles,
|
return new VariantContext(source, ID, contig, start, stop, alleles,
|
||||||
genotypes, log10PError, filters, attributes,
|
genotypes, log10PError, filters, attributes,
|
||||||
referenceBaseForIndel, fullyDecoded, toValidate);
|
fullyDecoded, toValidate);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -64,9 +64,9 @@ public class VariantContextUtils {
|
||||||
* Ensures that VC contains all of the samples in allSamples by adding missing samples to
|
* Ensures that VC contains all of the samples in allSamples by adding missing samples to
|
||||||
* the resulting VC with default diploid ./. genotypes
|
* the resulting VC with default diploid ./. genotypes
|
||||||
*
|
*
|
||||||
* @param vc
|
* @param vc the VariantContext
|
||||||
* @param allSamples
|
* @param allSamples all of the samples needed
|
||||||
* @return
|
* @return a new VariantContext with missing samples added
|
||||||
*/
|
*/
|
||||||
public static VariantContext addMissingSamples(final VariantContext vc, final Set<String> allSamples) {
|
public static VariantContext addMissingSamples(final VariantContext vc, final Set<String> allSamples) {
|
||||||
// TODO -- what's the fastest way to do this calculation?
|
// TODO -- what's the fastest way to do this calculation?
|
||||||
|
|
@ -376,9 +376,9 @@ public class VariantContextUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @deprecated use variant context builder version instead
|
* @deprecated use variant context builder version instead
|
||||||
* @param vc
|
* @param vc the variant context
|
||||||
* @param keysToPreserve
|
* @param keysToPreserve the keys to preserve
|
||||||
* @return
|
* @return a pruned version of the original variant context
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public static VariantContext pruneVariantContext(final VariantContext vc, Collection<String> keysToPreserve ) {
|
public static VariantContext pruneVariantContext(final VariantContext vc, Collection<String> keysToPreserve ) {
|
||||||
|
|
@ -486,14 +486,13 @@ public class VariantContextUtils {
|
||||||
if ( genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE )
|
if ( genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE )
|
||||||
verifyUniqueSampleNames(unsortedVCs);
|
verifyUniqueSampleNames(unsortedVCs);
|
||||||
|
|
||||||
final List<VariantContext> prepaddedVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions);
|
final List<VariantContext> preFilteredVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions);
|
||||||
// Make sure all variant contexts are padded with reference base in case of indels if necessary
|
// Make sure all variant contexts are padded with reference base in case of indels if necessary
|
||||||
final List<VariantContext> VCs = new ArrayList<VariantContext>();
|
final List<VariantContext> VCs = new ArrayList<VariantContext>();
|
||||||
|
|
||||||
for (final VariantContext vc : prepaddedVCs) {
|
for (final VariantContext vc : preFilteredVCs) {
|
||||||
// also a reasonable place to remove filtered calls, if needed
|
|
||||||
if ( ! filteredAreUncalled || vc.isNotFiltered() )
|
if ( ! filteredAreUncalled || vc.isNotFiltered() )
|
||||||
VCs.add(VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc));
|
VCs.add(vc);
|
||||||
}
|
}
|
||||||
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
|
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
|
||||||
return null;
|
return null;
|
||||||
|
|
@ -547,9 +546,6 @@ public class VariantContextUtils {
|
||||||
|
|
||||||
filters.addAll(vc.getFilters());
|
filters.addAll(vc.getFilters());
|
||||||
|
|
||||||
if ( referenceBaseForIndel == null )
|
|
||||||
referenceBaseForIndel = vc.getReferenceBaseForIndel();
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// add attributes
|
// add attributes
|
||||||
//
|
//
|
||||||
|
|
@ -661,10 +657,9 @@ public class VariantContextUtils {
|
||||||
builder.genotypes(genotypes);
|
builder.genotypes(genotypes);
|
||||||
builder.log10PError(log10PError);
|
builder.log10PError(log10PError);
|
||||||
builder.filters(filters).attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes);
|
builder.filters(filters).attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes);
|
||||||
builder.referenceBaseForIndel(referenceBaseForIndel);
|
|
||||||
|
|
||||||
// Trim the padded bases of all alleles if necessary
|
// Trim the padded bases of all alleles if necessary
|
||||||
final VariantContext merged = createVariantContextWithTrimmedAlleles(builder.make());
|
final VariantContext merged = builder.make();
|
||||||
if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged);
|
if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged);
|
||||||
return merged;
|
return merged;
|
||||||
}
|
}
|
||||||
|
|
@ -700,73 +695,6 @@ public class VariantContextUtils {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) {
|
|
||||||
// see if we need to trim common reference base from all alleles
|
|
||||||
boolean trimVC;
|
|
||||||
|
|
||||||
// We need to trim common reference base from all alleles in all genotypes if a ref base is common to all alleles
|
|
||||||
Allele refAllele = inputVC.getReference();
|
|
||||||
if (!inputVC.isVariant())
|
|
||||||
trimVC = false;
|
|
||||||
else if (refAllele.isNull())
|
|
||||||
trimVC = false;
|
|
||||||
else {
|
|
||||||
trimVC = VCFAlleleClipper.shouldClipFirstBaseP(inputVC.getAlternateAlleles(), (byte) inputVC.getReference().getDisplayString().charAt(0));
|
|
||||||
}
|
|
||||||
|
|
||||||
// nothing to do if we don't need to trim bases
|
|
||||||
if (trimVC) {
|
|
||||||
List<Allele> alleles = new ArrayList<Allele>();
|
|
||||||
GenotypesContext genotypes = GenotypesContext.create();
|
|
||||||
|
|
||||||
Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
|
|
||||||
|
|
||||||
for (final Allele a : inputVC.getAlleles()) {
|
|
||||||
if (a.isSymbolic()) {
|
|
||||||
alleles.add(a);
|
|
||||||
originalToTrimmedAlleleMap.put(a, a);
|
|
||||||
} else {
|
|
||||||
// get bases for current allele and create a new one with trimmed bases
|
|
||||||
byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length());
|
|
||||||
Allele trimmedAllele = Allele.create(newBases, a.isReference());
|
|
||||||
alleles.add(trimmedAllele);
|
|
||||||
originalToTrimmedAlleleMap.put(a, trimmedAllele);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// detect case where we're trimming bases but resulting vc doesn't have any null allele. In that case, we keep original representation
|
|
||||||
// example: mixed records such as {TA*,TGA,TG}
|
|
||||||
boolean hasNullAlleles = false;
|
|
||||||
|
|
||||||
for (final Allele a: originalToTrimmedAlleleMap.values()) {
|
|
||||||
if (a.isNull())
|
|
||||||
hasNullAlleles = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!hasNullAlleles)
|
|
||||||
return inputVC;
|
|
||||||
// now we can recreate new genotypes with trimmed alleles
|
|
||||||
for ( final Genotype genotype : inputVC.getGenotypes() ) {
|
|
||||||
|
|
||||||
List<Allele> originalAlleles = genotype.getAlleles();
|
|
||||||
List<Allele> trimmedAlleles = new ArrayList<Allele>();
|
|
||||||
for ( final Allele a : originalAlleles ) {
|
|
||||||
if ( a.isCalled() )
|
|
||||||
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
|
|
||||||
else
|
|
||||||
trimmedAlleles.add(Allele.NO_CALL);
|
|
||||||
}
|
|
||||||
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
final VariantContextBuilder builder = new VariantContextBuilder(inputVC);
|
|
||||||
return builder.alleles(alleles).genotypes(genotypes).referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0])).make();
|
|
||||||
}
|
|
||||||
|
|
||||||
return inputVC;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static GenotypesContext stripPLs(GenotypesContext genotypes) {
|
public static GenotypesContext stripPLs(GenotypesContext genotypes) {
|
||||||
GenotypesContext newGs = GenotypesContext.create(genotypes.size());
|
GenotypesContext newGs = GenotypesContext.create(genotypes.size());
|
||||||
|
|
||||||
|
|
@ -819,7 +747,7 @@ public class VariantContextUtils {
|
||||||
if ( !mappedVCs.containsKey(vc.getType()) )
|
if ( !mappedVCs.containsKey(vc.getType()) )
|
||||||
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
|
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
|
||||||
mappedVCs.get(vc.getType()).add(vc);
|
mappedVCs.get(vc.getType()).add(vc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return mappedVCs;
|
return mappedVCs;
|
||||||
|
|
@ -881,10 +809,10 @@ public class VariantContextUtils {
|
||||||
//
|
//
|
||||||
// refAllele: ACGTGA
|
// refAllele: ACGTGA
|
||||||
// myRef: ACGT
|
// myRef: ACGT
|
||||||
// myAlt: -
|
// myAlt: A
|
||||||
//
|
//
|
||||||
// We need to remap all of the alleles in vc to include the extra GA so that
|
// We need to remap all of the alleles in vc to include the extra GA so that
|
||||||
// myRef => refAllele and myAlt => GA
|
// myRef => refAllele and myAlt => AGA
|
||||||
//
|
//
|
||||||
|
|
||||||
Allele myRef = vc.getReference();
|
Allele myRef = vc.getReference();
|
||||||
|
|
@ -979,7 +907,7 @@ public class VariantContextUtils {
|
||||||
HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
|
HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
|
||||||
for ( Allele originalAllele : vc.getAlleles() ) {
|
for ( Allele originalAllele : vc.getAlleles() ) {
|
||||||
Allele newAllele;
|
Allele newAllele;
|
||||||
if ( originalAllele.isNoCall() || originalAllele.isNull() )
|
if ( originalAllele.isNoCall() )
|
||||||
newAllele = originalAllele;
|
newAllele = originalAllele;
|
||||||
else
|
else
|
||||||
newAllele = Allele.create(BaseUtils.simpleReverseComplement(originalAllele.getBases()), originalAllele.isReference());
|
newAllele = Allele.create(BaseUtils.simpleReverseComplement(originalAllele.getBases()), originalAllele.isReference());
|
||||||
|
|
@ -1235,13 +1163,14 @@ public class VariantContextUtils {
|
||||||
if ( ! vc.isIndel() ) // only indels are tandem repeats
|
if ( ! vc.isIndel() ) // only indels are tandem repeats
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
final Allele ref = vc.getReference();
|
final Allele refAllele = vc.getReference();
|
||||||
|
final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length());
|
||||||
|
|
||||||
byte[] repeatUnit = null;
|
byte[] repeatUnit = null;
|
||||||
final ArrayList<Integer> lengths = new ArrayList<Integer>();
|
final ArrayList<Integer> lengths = new ArrayList<Integer>();
|
||||||
|
|
||||||
for ( final Allele allele : vc.getAlternateAlleles() ) {
|
for ( final Allele allele : vc.getAlternateAlleles() ) {
|
||||||
Pair<int[],byte[]> result = getNumTandemRepeatUnits(ref.getBases(), allele.getBases(), refBasesStartingAtVCWithoutPad.getBytes());
|
Pair<int[],byte[]> result = getNumTandemRepeatUnits(refAlleleBases, Arrays.copyOfRange(allele.getBases(), 1, allele.length()), refBasesStartingAtVCWithoutPad.getBytes());
|
||||||
|
|
||||||
final int[] repetitionCount = result.first;
|
final int[] repetitionCount = result.first;
|
||||||
// repetition count = 0 means allele is not a tandem expansion of context
|
// repetition count = 0 means allele is not a tandem expansion of context
|
||||||
|
|
@ -1256,7 +1185,7 @@ public class VariantContextUtils {
|
||||||
repeatUnit = result.second;
|
repeatUnit = result.second;
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("RefContext:"+refBasesStartingAtVCWithoutPad);
|
System.out.println("RefContext:"+refBasesStartingAtVCWithoutPad);
|
||||||
System.out.println("Ref:"+ref.toString()+" Count:" + String.valueOf(repetitionCount[0]));
|
System.out.println("Ref:"+refAllele.toString()+" Count:" + String.valueOf(repetitionCount[0]));
|
||||||
System.out.println("Allele:"+allele.toString()+" Count:" + String.valueOf(repetitionCount[1]));
|
System.out.println("Allele:"+allele.toString()+" Count:" + String.valueOf(repetitionCount[1]));
|
||||||
System.out.println("RU:"+new String(repeatUnit));
|
System.out.println("RU:"+new String(repeatUnit));
|
||||||
}
|
}
|
||||||
|
|
@ -1405,4 +1334,113 @@ public class VariantContextUtils {
|
||||||
return start + Math.max(ref.length() - 1, 0);
|
return start + Math.max(ref.length() - 1, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static boolean requiresPaddingBase(final List<String> alleles) {
|
||||||
|
|
||||||
|
// see whether one of the alleles would be null if trimmed through
|
||||||
|
|
||||||
|
for ( final String allele : alleles ) {
|
||||||
|
if ( allele.isEmpty() )
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
int clipping = 0;
|
||||||
|
Character currentBase = null;
|
||||||
|
|
||||||
|
while ( true ) {
|
||||||
|
for ( final String allele : alleles ) {
|
||||||
|
if ( allele.length() - clipping == 0 )
|
||||||
|
return true;
|
||||||
|
|
||||||
|
char myBase = allele.charAt(clipping);
|
||||||
|
if ( currentBase == null )
|
||||||
|
currentBase = myBase;
|
||||||
|
else if ( currentBase != myBase )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
clipping++;
|
||||||
|
currentBase = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {
|
||||||
|
|
||||||
|
// TODO - this function doesn't work with mixed records or records that started as mixed and then became non-mixed
|
||||||
|
|
||||||
|
// see whether we need to trim common reference base from all alleles
|
||||||
|
|
||||||
|
final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false);
|
||||||
|
if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
|
||||||
|
return inputVC;
|
||||||
|
|
||||||
|
final List<Allele> alleles = new ArrayList<Allele>();
|
||||||
|
final GenotypesContext genotypes = GenotypesContext.create();
|
||||||
|
final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
|
||||||
|
|
||||||
|
for (final Allele a : inputVC.getAlleles()) {
|
||||||
|
if (a.isSymbolic()) {
|
||||||
|
alleles.add(a);
|
||||||
|
originalToTrimmedAlleleMap.put(a, a);
|
||||||
|
} else {
|
||||||
|
// get bases for current allele and create a new one with trimmed bases
|
||||||
|
final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
|
||||||
|
final Allele trimmedAllele = Allele.create(newBases, a.isReference());
|
||||||
|
alleles.add(trimmedAllele);
|
||||||
|
originalToTrimmedAlleleMap.put(a, trimmedAllele);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// now we can recreate new genotypes with trimmed alleles
|
||||||
|
for ( final Genotype genotype : inputVC.getGenotypes() ) {
|
||||||
|
final List<Allele> originalAlleles = genotype.getAlleles();
|
||||||
|
final List<Allele> trimmedAlleles = new ArrayList<Allele>();
|
||||||
|
for ( final Allele a : originalAlleles ) {
|
||||||
|
if ( a.isCalled() )
|
||||||
|
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
|
||||||
|
else
|
||||||
|
trimmedAlleles.add(Allele.NO_CALL);
|
||||||
|
}
|
||||||
|
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
|
||||||
|
}
|
||||||
|
|
||||||
|
return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() - 1).alleles(alleles).genotypes(genotypes).make();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int computeReverseClipping(final List<Allele> unclippedAlleles,
|
||||||
|
final byte[] ref,
|
||||||
|
final int forwardClipping,
|
||||||
|
final boolean allowFullClip) {
|
||||||
|
int clipping = 0;
|
||||||
|
boolean stillClipping = true;
|
||||||
|
|
||||||
|
while ( stillClipping ) {
|
||||||
|
for ( final Allele a : unclippedAlleles ) {
|
||||||
|
if ( a.isSymbolic() )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong
|
||||||
|
// position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine).
|
||||||
|
if ( a.length() - clipping == 0 )
|
||||||
|
return clipping - (allowFullClip ? 0 : 1);
|
||||||
|
|
||||||
|
if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) {
|
||||||
|
stillClipping = false;
|
||||||
|
}
|
||||||
|
else if ( ref.length == clipping ) {
|
||||||
|
if ( allowFullClip )
|
||||||
|
stillClipping = false;
|
||||||
|
else
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) {
|
||||||
|
stillClipping = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( stillClipping )
|
||||||
|
clipping++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return clipping;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -274,10 +274,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void buildAlleles( VariantContext vc ) throws IOException {
|
private void buildAlleles( VariantContext vc ) throws IOException {
|
||||||
final boolean needsPadding = VCFAlleleClipper.needsPadding(vc);
|
|
||||||
for ( Allele allele : vc.getAlleles() ) {
|
for ( Allele allele : vc.getAlleles() ) {
|
||||||
if ( needsPadding )
|
|
||||||
allele = VCFAlleleClipper.padAllele(vc, allele);
|
|
||||||
final byte[] s = allele.getDisplayBases();
|
final byte[] s = allele.getDisplayBases();
|
||||||
if ( s == null )
|
if ( s == null )
|
||||||
throw new ReviewedStingException("BUG: BCF2Writer encountered null padded allele" + allele);
|
throw new ReviewedStingException("BUG: BCF2Writer encountered null padded allele" + allele);
|
||||||
|
|
|
||||||
|
|
@ -162,7 +162,6 @@ class VCFWriter extends IndexingVariantContextWriter {
|
||||||
vc = new VariantContextBuilder(vc).noGenotypes().make();
|
vc = new VariantContextBuilder(vc).noGenotypes().make();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
vc = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc);
|
|
||||||
super.add(vc);
|
super.add(vc);
|
||||||
|
|
||||||
Map<Allele, String> alleleMap = buildAlleleMap(vc);
|
Map<Allele, String> alleleMap = buildAlleleMap(vc);
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||||
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s",
|
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("0567b32ebdc26604ddf2a390de4579ac"));
|
Arrays.asList("ef481be9962e21d09847b8a1d4a4ff65"));
|
||||||
executeTest("testFastaAlternateReferenceIndels", spec2);
|
executeTest("testFastaAlternateReferenceIndels", spec2);
|
||||||
|
|
||||||
WalkerTestSpec spec3 = new WalkerTestSpec(
|
WalkerTestSpec spec3 = new WalkerTestSpec(
|
||||||
|
|
|
||||||
|
|
@ -38,9 +38,6 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -103,39 +100,27 @@ public class ArtificialReadPileupTestProvider {
|
||||||
boolean addBaseErrors, int phredScaledBaseErrorRate) {
|
boolean addBaseErrors, int phredScaledBaseErrorRate) {
|
||||||
// RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext);
|
// RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext);
|
||||||
|
|
||||||
|
String refAllele, altAllele;
|
||||||
ArrayList<Allele> vcAlleles = new ArrayList<Allele>();
|
if (eventLength == 0) {
|
||||||
Allele refAllele, altAllele;
|
// SNP case
|
||||||
if (eventLength == 0) {// SNP case
|
refAllele = new String(new byte[]{referenceContext.getBase()});
|
||||||
refAllele =Allele.create(referenceContext.getBase(),true);
|
altAllele = new String(altBases.substring(0,1));
|
||||||
altAllele = Allele.create(altBases.substring(0,1), false);
|
|
||||||
|
|
||||||
} else if (eventLength>0){
|
} else if (eventLength>0){
|
||||||
// insertion
|
// insertion
|
||||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
refAllele = "";
|
||||||
altAllele = Allele.create(altBases.substring(0,eventLength), false);
|
altAllele = altBases.substring(0,eventLength);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// deletion
|
// deletion
|
||||||
refAllele =Allele.create(refBases.substring(offset,offset+Math.abs(eventLength)),true);
|
refAllele = refBases.substring(offset,offset+Math.abs(eventLength));
|
||||||
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
altAllele = "";
|
||||||
}
|
}
|
||||||
int stop = loc.getStart();
|
|
||||||
vcAlleles.add(refAllele);
|
|
||||||
vcAlleles.add(altAllele);
|
|
||||||
|
|
||||||
final VariantContextBuilder builder = new VariantContextBuilder().source("");
|
|
||||||
builder.loc(loc.getContig(), loc.getStart(), stop);
|
|
||||||
builder.alleles(vcAlleles);
|
|
||||||
builder.referenceBaseForIndel(referenceContext.getBase());
|
|
||||||
builder.noGenotypes();
|
|
||||||
|
|
||||||
final VariantContext vc = builder.make();
|
|
||||||
|
|
||||||
Map<String,AlignmentContext> contexts = new HashMap<String,AlignmentContext>();
|
Map<String,AlignmentContext> contexts = new HashMap<String,AlignmentContext>();
|
||||||
|
|
||||||
for (String sample: sampleNames) {
|
for (String sample: sampleNames) {
|
||||||
AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc,vc, altBases, numReadsPerAllele, sample, addBaseErrors, phredScaledBaseErrorRate));
|
AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc, refAllele, altAllele, altBases, numReadsPerAllele, sample, addBaseErrors, phredScaledBaseErrorRate));
|
||||||
contexts.put(sample,context);
|
contexts.put(sample,context);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -149,73 +134,71 @@ public class ArtificialReadPileupTestProvider {
|
||||||
rg.setSample(name);
|
rg.setSample(name);
|
||||||
return rg;
|
return rg;
|
||||||
}
|
}
|
||||||
private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, VariantContext vc, String altBases,
|
|
||||||
|
private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, String refAllele, String altAllele, String altBases,
|
||||||
int[] numReadsPerAllele, String sample, boolean addErrors, int phredScaledErrorRate) {
|
int[] numReadsPerAllele, String sample, boolean addErrors, int phredScaledErrorRate) {
|
||||||
List<PileupElement> pileupElements = new ArrayList<PileupElement>();
|
List<PileupElement> pileupElements = new ArrayList<PileupElement>();
|
||||||
int readStart = contigStart;
|
|
||||||
int offset = (contigStop-contigStart+1)/2;
|
int offset = (contigStop-contigStart+1)/2;
|
||||||
int refAlleleLength = 0;
|
int refAlleleLength = refAllele.length();
|
||||||
int readCounter = 0;
|
|
||||||
int alleleCounter = 0;
|
|
||||||
for (Allele allele: vc.getAlleles()) {
|
|
||||||
if (allele.isReference())
|
|
||||||
refAlleleLength = allele.getBases().length;
|
|
||||||
|
|
||||||
int alleleLength = allele.getBases().length;
|
|
||||||
|
|
||||||
for ( int d = 0; d < numReadsPerAllele[alleleCounter]; d++ ) {
|
|
||||||
byte[] readBases = trueHaplotype(allele, offset, refAlleleLength);
|
|
||||||
if (addErrors)
|
|
||||||
addBaseErrors(readBases, phredScaledErrorRate);
|
|
||||||
|
|
||||||
byte[] readQuals = new byte[readBases.length];
|
|
||||||
Arrays.fill(readQuals, (byte)phredScaledErrorRate);
|
|
||||||
|
|
||||||
GATKSAMRecord read = new GATKSAMRecord(header);
|
|
||||||
read.setBaseQualities(readQuals);
|
|
||||||
read.setReadBases(readBases);
|
|
||||||
read.setReadName(artificialReadName+readCounter++);
|
|
||||||
|
|
||||||
boolean isBeforeDeletion = false, isBeforeInsertion = false;
|
|
||||||
if (allele.isReference())
|
|
||||||
read.setCigarString(readBases.length + "M");
|
|
||||||
else {
|
|
||||||
isBeforeDeletion = alleleLength<refAlleleLength;
|
|
||||||
isBeforeInsertion = alleleLength>refAlleleLength;
|
|
||||||
if (isBeforeDeletion || isBeforeInsertion)
|
|
||||||
read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") +
|
|
||||||
(readBases.length-offset)+"M");
|
|
||||||
else // SNP case
|
|
||||||
read.setCigarString(readBases.length+"M");
|
|
||||||
}
|
|
||||||
|
|
||||||
int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0));
|
|
||||||
read.setReadPairedFlag(false);
|
|
||||||
read.setAlignmentStart(readStart);
|
|
||||||
read.setMappingQuality(artificialMappingQuality);
|
|
||||||
read.setReferenceName(loc.getContig());
|
|
||||||
read.setReadNegativeStrandFlag(false);
|
|
||||||
read.setAttribute("RG", sampleRG(sample).getReadGroupId());
|
|
||||||
|
|
||||||
|
|
||||||
pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength));
|
|
||||||
}
|
|
||||||
alleleCounter++;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
pileupElements.addAll(createPileupElements(refAllele, loc, numReadsPerAllele[0], sample, contigStart, offset, altBases, addErrors, phredScaledErrorRate, refAlleleLength, true));
|
||||||
|
pileupElements.addAll(createPileupElements(altAllele, loc, numReadsPerAllele[1], sample, contigStart, offset, altBases, addErrors, phredScaledErrorRate, refAlleleLength, false));
|
||||||
return new ReadBackedPileupImpl(loc,pileupElements);
|
return new ReadBackedPileupImpl(loc,pileupElements);
|
||||||
}
|
}
|
||||||
|
|
||||||
private byte[] trueHaplotype(Allele allele, int offset, int refAlleleLength) {
|
private List<PileupElement> createPileupElements(String allele, GenomeLoc loc, int numReadsPerAllele, String sample, int readStart, int offset, String altBases, boolean addErrors, int phredScaledErrorRate, int refAlleleLength, boolean isReference) {
|
||||||
|
|
||||||
|
int alleleLength = allele.length();
|
||||||
|
List<PileupElement> pileupElements = new ArrayList<PileupElement>();
|
||||||
|
|
||||||
|
int readCounter = 0;
|
||||||
|
for ( int d = 0; d < numReadsPerAllele; d++ ) {
|
||||||
|
byte[] readBases = trueHaplotype(allele, offset, refAlleleLength);
|
||||||
|
if (addErrors)
|
||||||
|
addBaseErrors(readBases, phredScaledErrorRate);
|
||||||
|
|
||||||
|
byte[] readQuals = new byte[readBases.length];
|
||||||
|
Arrays.fill(readQuals, (byte)phredScaledErrorRate);
|
||||||
|
|
||||||
|
GATKSAMRecord read = new GATKSAMRecord(header);
|
||||||
|
read.setBaseQualities(readQuals);
|
||||||
|
read.setReadBases(readBases);
|
||||||
|
read.setReadName(artificialReadName+readCounter++);
|
||||||
|
|
||||||
|
boolean isBeforeDeletion = false, isBeforeInsertion = false;
|
||||||
|
if (isReference)
|
||||||
|
read.setCigarString(readBases.length + "M");
|
||||||
|
else {
|
||||||
|
isBeforeDeletion = alleleLength<refAlleleLength;
|
||||||
|
isBeforeInsertion = alleleLength>refAlleleLength;
|
||||||
|
if (isBeforeDeletion || isBeforeInsertion)
|
||||||
|
read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") +
|
||||||
|
(readBases.length-offset)+"M");
|
||||||
|
else // SNP case
|
||||||
|
read.setCigarString(readBases.length+"M");
|
||||||
|
}
|
||||||
|
|
||||||
|
int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0));
|
||||||
|
read.setReadPairedFlag(false);
|
||||||
|
read.setAlignmentStart(readStart);
|
||||||
|
read.setMappingQuality(artificialMappingQuality);
|
||||||
|
read.setReferenceName(loc.getContig());
|
||||||
|
read.setReadNegativeStrandFlag(false);
|
||||||
|
read.setAttribute("RG", sampleRG(sample).getReadGroupId());
|
||||||
|
|
||||||
|
|
||||||
|
pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength));
|
||||||
|
}
|
||||||
|
|
||||||
|
return pileupElements;
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[] trueHaplotype(String allele, int offset, int refAlleleLength) {
|
||||||
// create haplotype based on a particular allele
|
// create haplotype based on a particular allele
|
||||||
String prefix = refBases.substring(offset);
|
String prefix = refBases.substring(0, offset);
|
||||||
String alleleBases = new String(allele.getBases());
|
|
||||||
String postfix = refBases.substring(offset+refAlleleLength,refBases.length());
|
String postfix = refBases.substring(offset+refAlleleLength,refBases.length());
|
||||||
|
|
||||||
return (prefix+alleleBases+postfix).getBytes();
|
return (prefix+allele+postfix).getBytes();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addBaseErrors(final byte[] readBases, final int phredScaledErrorRate) {
|
private void addBaseErrors(final byte[] readBases, final int phredScaledErrorRate) {
|
||||||
|
|
|
||||||
|
|
@ -70,7 +70,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
|
||||||
List<Allele> alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
|
List<Allele> alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
|
||||||
|
|
||||||
Assert.assertEquals(alleles.size(),2);
|
Assert.assertEquals(alleles.size(),2);
|
||||||
Assert.assertEquals(alleles.get(1).getBaseString(), altBases.substring(0,eventLength));
|
Assert.assertEquals(alleles.get(1).getBaseString().substring(1), altBases.substring(0,eventLength));
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -79,7 +79,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
|
||||||
eventLength = 3;
|
eventLength = 3;
|
||||||
alleles = getConsensusAlleles(eventLength,false,10,0.1, altBases);
|
alleles = getConsensusAlleles(eventLength,false,10,0.1, altBases);
|
||||||
Assert.assertEquals(alleles.size(),2);
|
Assert.assertEquals(alleles.size(),2);
|
||||||
Assert.assertEquals(alleles.get(0).getBaseString(), refBases.substring(pileupProvider.offset,pileupProvider.offset+eventLength));
|
Assert.assertEquals(alleles.get(0).getBaseString().substring(1), refBases.substring(pileupProvider.offset,pileupProvider.offset+eventLength));
|
||||||
|
|
||||||
// same with min Reads = 11
|
// same with min Reads = 11
|
||||||
alleles = getConsensusAlleles(eventLength,false,11,0.1, altBases);
|
alleles = getConsensusAlleles(eventLength,false,11,0.1, altBases);
|
||||||
|
|
@ -97,7 +97,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
|
||||||
alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
|
alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
|
||||||
|
|
||||||
Assert.assertEquals(alleles.size(),2);
|
Assert.assertEquals(alleles.size(),2);
|
||||||
Assert.assertEquals(alleles.get(1).getBaseString(), altBases.substring(0,eventLength));
|
Assert.assertEquals(alleles.get(1).getBaseString().substring(1), altBases.substring(0,eventLength));
|
||||||
|
|
||||||
altBases = "CCTCNTGAGA";
|
altBases = "CCTCNTGAGA";
|
||||||
eventLength = 5;
|
eventLength = 5;
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest {
|
||||||
testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF;
|
testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF;
|
||||||
testArgs += " --virtualPrimerSize 30";
|
testArgs += " --virtualPrimerSize 30";
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
|
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
|
||||||
Arrays.asList("27f9450afa132888a8994167f0035fd7"));
|
Arrays.asList("240d99b58f73985fb114abe9044c0271"));
|
||||||
executeTest("Test probes", spec);
|
executeTest("Test probes", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -36,7 +36,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest {
|
||||||
testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF;
|
testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF;
|
||||||
testArgs += " --virtualPrimerSize 30 --doNotUseBWA";
|
testArgs += " --virtualPrimerSize 30 --doNotUseBWA";
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
|
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
|
||||||
Arrays.asList("f2611ff1d9cd5bedaad003251fed8bc1"));
|
Arrays.asList("6e7789445e29d91979a21e78d3d53295"));
|
||||||
executeTest("Test probes", spec);
|
executeTest("Test probes", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -49,7 +49,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest {
|
||||||
testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF;
|
testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF;
|
||||||
testArgs += " --virtualPrimerSize 30 --filterMonomorphic";
|
testArgs += " --virtualPrimerSize 30 --filterMonomorphic";
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
|
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
|
||||||
Arrays.asList("77b3f30e38fedad812125bdf6cf3255f"));
|
Arrays.asList("18d7236208db603e143b40db06ef2aca"));
|
||||||
executeTest("Test probes", spec);
|
executeTest("Test probes", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -98,16 +98,16 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
||||||
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ac58a5fde17661e2a19004ca954d9781", " -setKey null"); }
|
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ac58a5fde17661e2a19004ca954d9781", " -setKey null"); }
|
||||||
@Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "67a8076e30b4bca0ea5acdc9cd26a4e0"); } // official project VCF files in tabix format
|
@Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "67a8076e30b4bca0ea5acdc9cd26a4e0"); } // official project VCF files in tabix format
|
||||||
|
|
||||||
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "ef2d249ea4b25311966e038aac05c661"); }
|
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "909c6dc74eeb5ab86f8e74073eb0c1d6"); }
|
||||||
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "cdb448aaa92ca5a9e393d875b42581b3"); }
|
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "f0c2cb3e3a6160e1ed0ee2fd9b120f55"); }
|
||||||
|
|
||||||
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "f0ce3fb83d4ad9ba402d7cb11cd000c3"); }
|
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "f0ce3fb83d4ad9ba402d7cb11cd000c3"); }
|
||||||
|
|
||||||
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "4efdf983918db822e4ac13d911509576"); } // official project VCF files in tabix format
|
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "4efdf983918db822e4ac13d911509576"); } // official project VCF files in tabix format
|
||||||
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "848d4408ee953053d2307cefebc6bd6d"); } // official project VCF files in tabix format
|
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "848d4408ee953053d2307cefebc6bd6d"); } // official project VCF files in tabix format
|
||||||
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "91f6087e6e2bf3df4d1c9700eaff958b"); }
|
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "4159a0c0d7c15852a3a545e0bea6bbc5"); }
|
||||||
|
|
||||||
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "a9be239ab5e03e7e97caef58a3841dd2"); }
|
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "61d0ded244895234ac727391f29f13a8"); }
|
||||||
|
|
||||||
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "0b1815c699e71e143ed129bfadaffbcb"); }
|
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "0b1815c699e71e143ed129bfadaffbcb"); }
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -125,4 +125,14 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
|
||||||
executeTest("test bad ref allele in deletion", spec);
|
executeTest("test bad ref allele in deletion", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testComplexEvents() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
baseTestString("complexEvents.vcf", "ALL"),
|
||||||
|
0,
|
||||||
|
Arrays.asList("d41d8cd98f00b204e9800998ecf8427e")
|
||||||
|
);
|
||||||
|
|
||||||
|
executeTest("test validating complex events", spec);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -53,11 +53,11 @@ public class HaplotypeUnitTest extends BaseTest {
|
||||||
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
|
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
|
||||||
final Cigar h1Cigar = new Cigar(h1CigarList);
|
final Cigar h1Cigar = new Cigar(h1CigarList);
|
||||||
String h1bases = "AACTTCTGGTCAACTGGTCAACTGGTCAACTGGTCA";
|
String h1bases = "AACTTCTGGTCAACTGGTCAACTGGTCAACTGGTCA";
|
||||||
basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases);
|
basicInsertTest("A", "AACTT", 1, h1Cigar, bases, h1bases);
|
||||||
h1bases = "ACTGGTCACTTAACTGGTCAACTGGTCAACTGGTCA";
|
h1bases = "ACTGGTCACTTAACTGGTCAACTGGTCAACTGGTCA";
|
||||||
basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases);
|
basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases);
|
||||||
h1bases = "ACTGGTCAACTGGTCAAACTTCTGGTCAACTGGTCA";
|
h1bases = "ACTGGTCAACTGGTCAAACTTCTGGTCAACTGGTCA";
|
||||||
basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases);
|
basicInsertTest("A", "AACTT", 17, h1Cigar, bases, h1bases);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -68,11 +68,11 @@ public class HaplotypeUnitTest extends BaseTest {
|
||||||
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
|
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
|
||||||
final Cigar h1Cigar = new Cigar(h1CigarList);
|
final Cigar h1Cigar = new Cigar(h1CigarList);
|
||||||
String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA";
|
String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA";
|
||||||
basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases);
|
basicInsertTest("AACTT", "A", 1, h1Cigar, bases, h1bases);
|
||||||
h1bases = "ACTGGTCGGTCAACTGGTCAACTGGTCA";
|
h1bases = "ACTGGTCGGTCAACTGGTCAACTGGTCA";
|
||||||
basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases);
|
basicInsertTest("AACTT", "A", 7, h1Cigar, bases, h1bases);
|
||||||
h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA";
|
h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA";
|
||||||
basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases);
|
basicInsertTest("AACTT", "A", 17, h1Cigar, bases, h1bases);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -102,11 +102,11 @@ public class HaplotypeUnitTest extends BaseTest {
|
||||||
h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M));
|
h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M));
|
||||||
final Cigar h1Cigar = new Cigar(h1CigarList);
|
final Cigar h1Cigar = new Cigar(h1CigarList);
|
||||||
String h1bases = "AACTTTCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
|
String h1bases = "AACTTTCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
|
||||||
basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases);
|
basicInsertTest("A", "AACTT", 1, h1Cigar, bases, h1bases);
|
||||||
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCACTTGATCG" + "AGGGGGA" + "AGGC";
|
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCACTTGATCG" + "AGGGGGA" + "AGGC";
|
||||||
basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases);
|
basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases);
|
||||||
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGACTTGGGGA" + "AGGC";
|
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGACTTGGGGA" + "AGGC";
|
||||||
basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases);
|
basicInsertTest("A", "AACTT", 17, h1Cigar, bases, h1bases);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -121,11 +121,11 @@ public class HaplotypeUnitTest extends BaseTest {
|
||||||
h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M));
|
h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M));
|
||||||
final Cigar h1Cigar = new Cigar(h1CigarList);
|
final Cigar h1Cigar = new Cigar(h1CigarList);
|
||||||
String h1bases = "A" + "CGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
|
String h1bases = "A" + "CGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
|
||||||
basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases);
|
basicInsertTest("AACTT", "A", 1, h1Cigar, bases, h1bases);
|
||||||
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCG" + "AGGGGGA" + "AGGC";
|
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCG" + "AGGGGGA" + "AGGC";
|
||||||
basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases);
|
basicInsertTest("AACTT", "A", 7, h1Cigar, bases, h1bases);
|
||||||
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGA" + "AGGC";
|
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGA" + "AGGC";
|
||||||
basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases);
|
basicInsertTest("AACTT", "A", 17, h1Cigar, bases, h1bases);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
||||||
|
|
@ -1,226 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2012, The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
|
||||||
|
|
||||||
import com.google.java.contract.Requires;
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
|
||||||
import org.testng.Assert;
|
|
||||||
import org.testng.SkipException;
|
|
||||||
import org.testng.annotations.DataProvider;
|
|
||||||
import org.testng.annotations.Test;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
public class VCFAlleleClipperUnitTest extends BaseTest {
|
|
||||||
// --------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// Test allele clipping
|
|
||||||
//
|
|
||||||
// --------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
private class ClipAllelesTest extends TestDataProvider {
|
|
||||||
final int position;
|
|
||||||
final int stop;
|
|
||||||
final String ref;
|
|
||||||
List<Allele> inputs;
|
|
||||||
List<Allele> expected;
|
|
||||||
|
|
||||||
@Requires("arg.length % 2 == 0")
|
|
||||||
private ClipAllelesTest(final int position, final int stop, final String ... arg) {
|
|
||||||
super(ClipAllelesTest.class);
|
|
||||||
this.position = position;
|
|
||||||
this.stop = stop;
|
|
||||||
this.ref = arg[0];
|
|
||||||
|
|
||||||
int n = arg.length / 2;
|
|
||||||
inputs = new ArrayList<Allele>(n);
|
|
||||||
expected = new ArrayList<Allele>(n);
|
|
||||||
|
|
||||||
for ( int i = 0; i < n; i++ ) {
|
|
||||||
final boolean ref = i % n == 0;
|
|
||||||
inputs.add(Allele.create(arg[i], ref));
|
|
||||||
}
|
|
||||||
for ( int i = n; i < arg.length; i++ ) {
|
|
||||||
final boolean ref = i % n == 0;
|
|
||||||
expected.add(Allele.create(arg[i], ref));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isClipped() {
|
|
||||||
for ( int i = 0; i < inputs.size(); i++ ) {
|
|
||||||
if ( inputs.get(i).length() != expected.get(i).length() )
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
return String.format("ClipAllelesTest input=%s expected=%s", inputs, expected);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@DataProvider(name = "ClipAllelesTest")
|
|
||||||
public Object[][] makeClipAllelesTest() {
|
|
||||||
// do no harm
|
|
||||||
new ClipAllelesTest(10, 10, "A", "A");
|
|
||||||
new ClipAllelesTest(10, 10, "A", "C", "A", "C");
|
|
||||||
new ClipAllelesTest(10, 10, "A", "C", "G", "A", "C", "G");
|
|
||||||
|
|
||||||
// insertions
|
|
||||||
new ClipAllelesTest(10, 10, "A", "AA", "-", "A");
|
|
||||||
new ClipAllelesTest(10, 10, "A", "AAA", "-", "AA");
|
|
||||||
new ClipAllelesTest(10, 10, "A", "AG", "-", "G");
|
|
||||||
|
|
||||||
// deletions
|
|
||||||
new ClipAllelesTest(10, 11, "AA", "A", "A", "-");
|
|
||||||
new ClipAllelesTest(10, 12, "AAA", "A", "AA", "-");
|
|
||||||
new ClipAllelesTest(10, 11, "AG", "A", "G", "-");
|
|
||||||
new ClipAllelesTest(10, 12, "AGG", "A", "GG", "-");
|
|
||||||
|
|
||||||
// multi-allelic insertion and deletions
|
|
||||||
new ClipAllelesTest(10, 11, "AA", "A", "AAA", "A", "-", "AA");
|
|
||||||
new ClipAllelesTest(10, 11, "AA", "A", "AAG", "A", "-", "AG");
|
|
||||||
new ClipAllelesTest(10, 10, "A", "AA", "AAA", "-", "A", "AA");
|
|
||||||
new ClipAllelesTest(10, 10, "A", "AA", "ACA", "-", "A", "CA");
|
|
||||||
new ClipAllelesTest(10, 12, "ACG", "ATC", "AGG", "CG", "TC", "GG");
|
|
||||||
new ClipAllelesTest(10, 11, "AC", "AT", "AG", "C", "T", "G");
|
|
||||||
|
|
||||||
// cannot be clipped
|
|
||||||
new ClipAllelesTest(10, 11, "AC", "CT", "AG", "AC", "CT", "AG");
|
|
||||||
new ClipAllelesTest(10, 11, "AC", "CT", "GG", "AC", "CT", "GG");
|
|
||||||
|
|
||||||
// symbolic
|
|
||||||
new ClipAllelesTest(10, 100, "A", "<DEL>", "A", "<DEL>");
|
|
||||||
new ClipAllelesTest(50, 50, "G", "G]22:60]", "G", "G]22:60]");
|
|
||||||
new ClipAllelesTest(51, 51, "T", "]22:55]T", "T", "]22:55]T");
|
|
||||||
new ClipAllelesTest(52, 52, "C", "C[22:51[", "C", "C[22:51[");
|
|
||||||
new ClipAllelesTest(60, 60, "A", "A]22:50]", "A", "A]22:50]");
|
|
||||||
|
|
||||||
// symbolic with alleles that should be clipped
|
|
||||||
new ClipAllelesTest(10, 100, "A", "<DEL>", "AA", "-", "<DEL>", "A");
|
|
||||||
new ClipAllelesTest(10, 100, "AA", "<DEL>", "A", "A", "<DEL>", "-");
|
|
||||||
new ClipAllelesTest(10, 100, "AA", "<DEL>", "A", "AAA", "A", "<DEL>", "-", "AA");
|
|
||||||
new ClipAllelesTest(10, 100, "AG", "<DEL>", "A", "AGA", "G", "<DEL>", "-", "GA");
|
|
||||||
new ClipAllelesTest(10, 100, "G", "<DEL>", "A", "G", "<DEL>", "A");
|
|
||||||
|
|
||||||
// clipping from both ends
|
|
||||||
//
|
|
||||||
// TODO -- THIS CODE IS BROKEN BECAUSE CLIPPING DOES WORK WITH ALLELES CLIPPED FROM THE END
|
|
||||||
//
|
|
||||||
// new ClipAllelesTest(10, 10, "ATA", "ATTA", "-", "T");
|
|
||||||
// new ClipAllelesTest(10, 10, "ATAA", "ATTAA", "-", "T");
|
|
||||||
// new ClipAllelesTest(10, 10, "ATAAG", "ATTAAG", "-", "T");
|
|
||||||
// new ClipAllelesTest(10, 11, "GTA", "ATTA", "G", "AT");
|
|
||||||
// new ClipAllelesTest(10, 11, "GTAA", "ATTAA", "G", "AT");
|
|
||||||
// new ClipAllelesTest(10, 11, "GTAAG", "ATTAAG", "G", "AT");
|
|
||||||
|
|
||||||
// complex substitutions
|
|
||||||
new ClipAllelesTest(10, 10, "A", "GA", "A", "GA");
|
|
||||||
|
|
||||||
return ClipAllelesTest.getTests(ClipAllelesTest.class);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test(dataProvider = "ClipAllelesTest")
|
|
||||||
public void testClipAllelesTest(ClipAllelesTest cfg) {
|
|
||||||
final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(cfg.position, cfg.ref, cfg.inputs, cfg.stop);
|
|
||||||
Assert.assertNull(clipped.getError(), "Unexpected error occurred");
|
|
||||||
Assert.assertEquals(clipped.getStop(), cfg.stop, "Clipped alleles stop");
|
|
||||||
Assert.assertEquals(clipped.getClippedAlleles(), cfg.expected, "Clipped alleles");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test(dataProvider = "ClipAllelesTest", dependsOnMethods = "testClipAllelesTest")
|
|
||||||
public void testPaddingAllelesInVC(final ClipAllelesTest cfg) {
|
|
||||||
final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(cfg.position, cfg.ref, cfg.inputs, cfg.stop);
|
|
||||||
final VariantContext vc = new VariantContextBuilder("x", "1", cfg.position, cfg.stop, clipped.getClippedAlleles())
|
|
||||||
.referenceBaseForIndel(clipped.getRefBaseForIndel()).make();
|
|
||||||
|
|
||||||
if ( vc.isMixed() && vc.hasSymbolicAlleles() )
|
|
||||||
throw new SkipException("GATK cannot handle mixed variant contexts with symbolic and concrete alleles. Remove this check when allele clipping and padding is generalized");
|
|
||||||
|
|
||||||
Assert.assertEquals(VCFAlleleClipper.needsPadding(vc), cfg.isClipped(), "needPadding method");
|
|
||||||
|
|
||||||
if ( cfg.isClipped() ) {
|
|
||||||
// TODO
|
|
||||||
// TODO note that the GATK currently uses a broken approach to the clipped alleles, so the expected stop is
|
|
||||||
// TODO actually the original stop, as the original stop is +1 its true size.
|
|
||||||
// TODO
|
|
||||||
final int expectedStop = vc.getEnd(); // + (vc.hasSymbolicAlleles() ? 0 : 1);
|
|
||||||
|
|
||||||
final VariantContext padded = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc);
|
|
||||||
Assert.assertEquals(padded.getStart(), vc.getStart(), "padded VC start");
|
|
||||||
Assert.assertEquals(padded.getAlleles(), cfg.inputs, "padded VC alleles == original unclipped alleles");
|
|
||||||
Assert.assertEquals(padded.getEnd(), expectedStop, "padded VC end should be clipped VC + 1 (added a base to ref allele)");
|
|
||||||
Assert.assertFalse(VCFAlleleClipper.needsPadding(padded), "padded VC shouldn't need padding again");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// basic allele clipping test
|
|
||||||
//
|
|
||||||
// --------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
private class ReverseClippingPositionTestProvider extends TestDataProvider {
|
|
||||||
final String ref;
|
|
||||||
final List<Allele> alleles = new ArrayList<Allele>();
|
|
||||||
final int expectedClip;
|
|
||||||
|
|
||||||
private ReverseClippingPositionTestProvider(final int expectedClip, final String ref, final String... alleles) {
|
|
||||||
super(ReverseClippingPositionTestProvider.class);
|
|
||||||
this.ref = ref;
|
|
||||||
for ( final String allele : alleles )
|
|
||||||
this.alleles.add(Allele.create(allele));
|
|
||||||
this.expectedClip = expectedClip;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@DataProvider(name = "ReverseClippingPositionTestProvider")
|
|
||||||
public Object[][] makeReverseClippingPositionTestProvider() {
|
|
||||||
// pair clipping
|
|
||||||
new ReverseClippingPositionTestProvider(0, "ATT", "CCG");
|
|
||||||
new ReverseClippingPositionTestProvider(1, "ATT", "CCT");
|
|
||||||
new ReverseClippingPositionTestProvider(2, "ATT", "CTT");
|
|
||||||
new ReverseClippingPositionTestProvider(2, "ATT", "ATT"); // cannot completely clip allele
|
|
||||||
|
|
||||||
// triplets
|
|
||||||
new ReverseClippingPositionTestProvider(0, "ATT", "CTT", "CGG");
|
|
||||||
new ReverseClippingPositionTestProvider(1, "ATT", "CTT", "CGT"); // the T can go
|
|
||||||
new ReverseClippingPositionTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go
|
|
||||||
|
|
||||||
return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Test(dataProvider = "ReverseClippingPositionTestProvider")
|
|
||||||
public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) {
|
|
||||||
int result = VCFAlleleClipper.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
|
|
||||||
Assert.assertEquals(result, cfg.expectedClip);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -39,6 +39,17 @@ public class VCFIntegrationTest extends WalkerTest {
|
||||||
executeTest("Test reading and writing breakpoint VCF", spec1);
|
executeTest("Test reading and writing breakpoint VCF", spec1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true)
|
||||||
|
public void testReadingLowerCaseBases() {
|
||||||
|
String testVCF = privateTestDir + "lowercaseBases.vcf";
|
||||||
|
|
||||||
|
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
|
||||||
|
|
||||||
|
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
||||||
|
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("e0e308a25e56bde1c664139bb44ed19d"));
|
||||||
|
executeTest("Test reading VCF with lower-case bases", spec1);
|
||||||
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testReadingAndWriting1000GSVs() {
|
public void testReadingAndWriting1000GSVs() {
|
||||||
String testVCF = privateTestDir + "1000G_SVs.chr1.vcf";
|
String testVCF = privateTestDir + "1000G_SVs.chr1.vcf";
|
||||||
|
|
@ -57,7 +68,7 @@ public class VCFIntegrationTest extends WalkerTest {
|
||||||
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
|
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
|
||||||
|
|
||||||
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
||||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("0f82ac11852e7f958c1a0ce52398c2ae"));
|
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("38697c195e7abf18d95dcc16c8e6d284"));
|
||||||
executeTest("Test reading and writing samtools vcf", spec1);
|
executeTest("Test reading and writing samtools vcf", spec1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -66,7 +77,7 @@ public class VCFIntegrationTest extends WalkerTest {
|
||||||
String testVCF = privateTestDir + "ex2.vcf";
|
String testVCF = privateTestDir + "ex2.vcf";
|
||||||
String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";
|
String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";
|
||||||
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
||||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("9773d6a121cfcb18d090965bc520f120"));
|
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("a04a0fc22fedb516c663e56e51fc1e27"));
|
||||||
executeTest("Test writing samtools WEx BCF example", spec1);
|
executeTest("Test writing samtools WEx BCF example", spec1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -37,8 +37,6 @@ import org.testng.annotations.Test;
|
||||||
// public Allele(byte[] bases, boolean isRef) {
|
// public Allele(byte[] bases, boolean isRef) {
|
||||||
// public Allele(boolean isRef) {
|
// public Allele(boolean isRef) {
|
||||||
// public Allele(String bases, boolean isRef) {
|
// public Allele(String bases, boolean isRef) {
|
||||||
// public boolean isNullAllele() { return length() == 0; }
|
|
||||||
// public boolean isNonNullAllele() { return ! isNullAllele(); }
|
|
||||||
// public boolean isReference() { return isRef; }
|
// public boolean isReference() { return isRef; }
|
||||||
// public boolean isNonReference() { return ! isReference(); }
|
// public boolean isNonReference() { return ! isReference(); }
|
||||||
// public byte[] getBases() { return bases; }
|
// public byte[] getBases() { return bases; }
|
||||||
|
|
@ -49,13 +47,10 @@ import org.testng.annotations.Test;
|
||||||
* Basic unit test for RecalData
|
* Basic unit test for RecalData
|
||||||
*/
|
*/
|
||||||
public class AlleleUnitTest {
|
public class AlleleUnitTest {
|
||||||
Allele ARef, del, delRef, A, T, ATIns, ATCIns, NoCall;
|
Allele ARef, A, T, ATIns, ATCIns, NoCall;
|
||||||
|
|
||||||
@BeforeSuite
|
@BeforeSuite
|
||||||
public void before() {
|
public void before() {
|
||||||
del = Allele.create("-");
|
|
||||||
delRef = Allele.create("-", true);
|
|
||||||
|
|
||||||
A = Allele.create("A");
|
A = Allele.create("A");
|
||||||
ARef = Allele.create("A", true);
|
ARef = Allele.create("A", true);
|
||||||
T = Allele.create("T");
|
T = Allele.create("T");
|
||||||
|
|
@ -72,8 +67,6 @@ public class AlleleUnitTest {
|
||||||
Assert.assertFalse(A.isReference());
|
Assert.assertFalse(A.isReference());
|
||||||
Assert.assertTrue(A.basesMatch("A"));
|
Assert.assertTrue(A.basesMatch("A"));
|
||||||
Assert.assertEquals(A.length(), 1);
|
Assert.assertEquals(A.length(), 1);
|
||||||
Assert.assertTrue(A.isNonNull());
|
|
||||||
Assert.assertFalse(A.isNull());
|
|
||||||
|
|
||||||
Assert.assertTrue(ARef.isReference());
|
Assert.assertTrue(ARef.isReference());
|
||||||
Assert.assertFalse(ARef.isNonReference());
|
Assert.assertFalse(ARef.isNonReference());
|
||||||
|
|
@ -92,8 +85,8 @@ public class AlleleUnitTest {
|
||||||
Assert.assertFalse(NoCall.isReference());
|
Assert.assertFalse(NoCall.isReference());
|
||||||
Assert.assertFalse(NoCall.basesMatch("."));
|
Assert.assertFalse(NoCall.basesMatch("."));
|
||||||
Assert.assertEquals(NoCall.length(), 0);
|
Assert.assertEquals(NoCall.length(), 0);
|
||||||
Assert.assertTrue(NoCall.isNonNull());
|
Assert.assertTrue(NoCall.isNoCall());
|
||||||
Assert.assertFalse(NoCall.isNull());
|
Assert.assertFalse(NoCall.isCalled());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -103,16 +96,6 @@ public class AlleleUnitTest {
|
||||||
Assert.assertEquals(ATCIns.length(), 3);
|
Assert.assertEquals(ATCIns.length(), 3);
|
||||||
Assert.assertEquals(ATIns.getBases(), "AT".getBytes());
|
Assert.assertEquals(ATIns.getBases(), "AT".getBytes());
|
||||||
Assert.assertEquals(ATCIns.getBases(), "ATC".getBytes());
|
Assert.assertEquals(ATCIns.getBases(), "ATC".getBytes());
|
||||||
|
|
||||||
Assert.assertTrue(del.isNonReference());
|
|
||||||
Assert.assertFalse(delRef.isNonReference());
|
|
||||||
Assert.assertFalse(del.isReference());
|
|
||||||
Assert.assertTrue(delRef.isReference());
|
|
||||||
Assert.assertFalse(del.basesMatch("-"));
|
|
||||||
Assert.assertTrue(del.basesMatch(""));
|
|
||||||
Assert.assertEquals(del.length(), 0);
|
|
||||||
Assert.assertFalse(del.isNonNull());
|
|
||||||
Assert.assertTrue(del.isNull());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -128,18 +111,6 @@ public class AlleleUnitTest {
|
||||||
Assert.assertFalse(a1.equals(a4));
|
Assert.assertFalse(a1.equals(a4));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testDelConstructors() {
|
|
||||||
Allele a1 = Allele.create("-");
|
|
||||||
Allele a2 = Allele.create("-".getBytes());
|
|
||||||
Allele a3 = Allele.create("");
|
|
||||||
Allele a4 = Allele.create("", true);
|
|
||||||
|
|
||||||
Assert.assertTrue(a1.equals(a2));
|
|
||||||
Assert.assertTrue(a1.equals(a3));
|
|
||||||
Assert.assertFalse(a1.equals(a4));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testInsConstructors() {
|
public void testInsConstructors() {
|
||||||
Allele a1 = Allele.create("AC");
|
Allele a1 = Allele.create("AC");
|
||||||
|
|
@ -156,7 +127,6 @@ public class AlleleUnitTest {
|
||||||
public void testEquals() {
|
public void testEquals() {
|
||||||
Assert.assertTrue(ARef.basesMatch(A));
|
Assert.assertTrue(ARef.basesMatch(A));
|
||||||
Assert.assertFalse(ARef.equals(A));
|
Assert.assertFalse(ARef.equals(A));
|
||||||
Assert.assertFalse(ARef.equals(del));
|
|
||||||
Assert.assertFalse(ARef.equals(ATIns));
|
Assert.assertFalse(ARef.equals(ATIns));
|
||||||
Assert.assertFalse(ARef.equals(ATCIns));
|
Assert.assertFalse(ARef.equals(ATCIns));
|
||||||
|
|
||||||
|
|
@ -164,11 +134,6 @@ public class AlleleUnitTest {
|
||||||
Assert.assertFalse(T.basesMatch(A));
|
Assert.assertFalse(T.basesMatch(A));
|
||||||
Assert.assertFalse(T.equals(A));
|
Assert.assertFalse(T.equals(A));
|
||||||
|
|
||||||
Assert.assertTrue(del.basesMatch(del));
|
|
||||||
Assert.assertTrue(del.basesMatch(delRef));
|
|
||||||
Assert.assertTrue(del.equals(del));
|
|
||||||
Assert.assertFalse(del.equals(delRef));
|
|
||||||
|
|
||||||
Assert.assertTrue(ATIns.equals(ATIns));
|
Assert.assertTrue(ATIns.equals(ATIns));
|
||||||
Assert.assertFalse(ATIns.equals(ATCIns));
|
Assert.assertFalse(ATIns.equals(ATCIns));
|
||||||
Assert.assertTrue(ATIns.basesMatch("AT"));
|
Assert.assertTrue(ATIns.basesMatch("AT"));
|
||||||
|
|
@ -209,7 +174,6 @@ public class AlleleUnitTest {
|
||||||
public void testExtend() {
|
public void testExtend() {
|
||||||
Assert.assertEquals("AT", Allele.extend(Allele.create("A"), "T".getBytes()).toString());
|
Assert.assertEquals("AT", Allele.extend(Allele.create("A"), "T".getBytes()).toString());
|
||||||
Assert.assertEquals("ATA", Allele.extend(Allele.create("A"), "TA".getBytes()).toString());
|
Assert.assertEquals("ATA", Allele.extend(Allele.create("A"), "TA".getBytes()).toString());
|
||||||
Assert.assertEquals("A", Allele.extend(Allele.create("-"), "A".getBytes()).toString());
|
|
||||||
Assert.assertEquals("A", Allele.extend(Allele.NO_CALL, "A".getBytes()).toString());
|
Assert.assertEquals("A", Allele.extend(Allele.NO_CALL, "A".getBytes()).toString());
|
||||||
Assert.assertEquals("ATCGA", Allele.extend(Allele.create("AT"), "CGA".getBytes()).toString());
|
Assert.assertEquals("ATCGA", Allele.extend(Allele.create("AT"), "CGA".getBytes()).toString());
|
||||||
Assert.assertEquals("ATCGA", Allele.extend(Allele.create("ATC"), "GA".getBytes()).toString());
|
Assert.assertEquals("ATCGA", Allele.extend(Allele.create("ATC"), "GA".getBytes()).toString());
|
||||||
|
|
|
||||||
|
|
@ -225,10 +225,10 @@ public class VariantContextTestProvider {
|
||||||
add(builder());
|
add(builder());
|
||||||
add(builder().alleles("A"));
|
add(builder().alleles("A"));
|
||||||
add(builder().alleles("A", "C", "T"));
|
add(builder().alleles("A", "C", "T"));
|
||||||
add(builder().alleles("-", "C").referenceBaseForIndel("A"));
|
add(builder().alleles("A", "AC"));
|
||||||
add(builder().alleles("-", "CAGT").referenceBaseForIndel("A"));
|
add(builder().alleles("A", "ACAGT"));
|
||||||
add(builder().loc("1", 10, 11).alleles("C", "-").referenceBaseForIndel("A"));
|
add(builder().loc("1", 10, 11).alleles("AC", "A"));
|
||||||
add(builder().loc("1", 10, 13).alleles("CGT", "-").referenceBaseForIndel("A"));
|
add(builder().loc("1", 10, 13).alleles("ACGT", "A"));
|
||||||
|
|
||||||
// make sure filters work
|
// make sure filters work
|
||||||
add(builder().unfiltered());
|
add(builder().unfiltered());
|
||||||
|
|
@ -302,8 +302,8 @@ public class VariantContextTestProvider {
|
||||||
|
|
||||||
sites.add(builder().alleles("A").make());
|
sites.add(builder().alleles("A").make());
|
||||||
sites.add(builder().alleles("A", "C", "T").make());
|
sites.add(builder().alleles("A", "C", "T").make());
|
||||||
sites.add(builder().alleles("-", "C").referenceBaseForIndel("A").make());
|
sites.add(builder().alleles("A", "AC").make());
|
||||||
sites.add(builder().alleles("-", "CAGT").referenceBaseForIndel("A").make());
|
sites.add(builder().alleles("A", "ACAGT").make());
|
||||||
|
|
||||||
for ( VariantContext site : sites ) {
|
for ( VariantContext site : sites ) {
|
||||||
addGenotypes(site);
|
addGenotypes(site);
|
||||||
|
|
|
||||||
|
|
@ -28,27 +28,22 @@ public class VariantContextUnitTest extends BaseTest {
|
||||||
int snpLocStart = 10;
|
int snpLocStart = 10;
|
||||||
int snpLocStop = 10;
|
int snpLocStop = 10;
|
||||||
|
|
||||||
// - / ATC [ref] from 20-23
|
// - / ATC [ref] from 20-22
|
||||||
String delLoc = "chr1";
|
String delLoc = "chr1";
|
||||||
int delLocStart = 20;
|
int delLocStart = 20;
|
||||||
int delLocStop = 23;
|
int delLocStop = 22;
|
||||||
|
|
||||||
// - [ref] / ATC from 20-20
|
// - [ref] / ATC from 20-20
|
||||||
String insLoc = "chr1";
|
String insLoc = "chr1";
|
||||||
int insLocStart = 20;
|
int insLocStart = 20;
|
||||||
int insLocStop = 20;
|
int insLocStop = 20;
|
||||||
|
|
||||||
// - / A / T / ATC [ref] from 20-23
|
|
||||||
String mixedLoc = "chr1";
|
|
||||||
int mixedLocStart = 20;
|
|
||||||
int mixedLocStop = 23;
|
|
||||||
|
|
||||||
VariantContextBuilder basicBuilder, snpBuilder, insBuilder;
|
VariantContextBuilder basicBuilder, snpBuilder, insBuilder;
|
||||||
|
|
||||||
@BeforeSuite
|
@BeforeSuite
|
||||||
public void before() {
|
public void before() {
|
||||||
del = Allele.create("-");
|
del = Allele.create("A");
|
||||||
delRef = Allele.create("-", true);
|
delRef = Allele.create("A", true);
|
||||||
|
|
||||||
A = Allele.create("A");
|
A = Allele.create("A");
|
||||||
C = Allele.create("C");
|
C = Allele.create("C");
|
||||||
|
|
@ -62,9 +57,9 @@ public class VariantContextUnitTest extends BaseTest {
|
||||||
|
|
||||||
@BeforeMethod
|
@BeforeMethod
|
||||||
public void beforeTest() {
|
public void beforeTest() {
|
||||||
basicBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)).referenceBaseForIndel((byte)'A');
|
basicBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T));
|
||||||
snpBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)).referenceBaseForIndel((byte)'A');
|
snpBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T));
|
||||||
insBuilder = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATC)).referenceBaseForIndel((byte)'A');
|
insBuilder = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATC));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -213,7 +208,7 @@ public class VariantContextUnitTest extends BaseTest {
|
||||||
@Test
|
@Test
|
||||||
public void testCreatingDeletionVariantContext() {
|
public void testCreatingDeletionVariantContext() {
|
||||||
List<Allele> alleles = Arrays.asList(ATCref, del);
|
List<Allele> alleles = Arrays.asList(ATCref, del);
|
||||||
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).referenceBaseForIndel((byte)'A').make();
|
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make();
|
||||||
|
|
||||||
Assert.assertEquals(vc.getChr(), delLoc);
|
Assert.assertEquals(vc.getChr(), delLoc);
|
||||||
Assert.assertEquals(vc.getStart(), delLocStart);
|
Assert.assertEquals(vc.getStart(), delLocStart);
|
||||||
|
|
@ -240,8 +235,8 @@ public class VariantContextUnitTest extends BaseTest {
|
||||||
@Test
|
@Test
|
||||||
public void testMatchingAlleles() {
|
public void testMatchingAlleles() {
|
||||||
List<Allele> alleles = Arrays.asList(ATCref, del);
|
List<Allele> alleles = Arrays.asList(ATCref, del);
|
||||||
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).referenceBaseForIndel((byte)'A').make();
|
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make();
|
||||||
VariantContext vc2 = new VariantContextBuilder("test2", delLoc, delLocStart+12, delLocStop+12, alleles).referenceBaseForIndel((byte)'A').make();
|
VariantContext vc2 = new VariantContextBuilder("test2", delLoc, delLocStart+12, delLocStop+12, alleles).make();
|
||||||
|
|
||||||
Assert.assertTrue(vc.hasSameAllelesAs(vc2));
|
Assert.assertTrue(vc.hasSameAllelesAs(vc2));
|
||||||
Assert.assertTrue(vc.hasSameAlternateAllelesAs(vc2));
|
Assert.assertTrue(vc.hasSameAlternateAllelesAs(vc2));
|
||||||
|
|
@ -386,13 +381,13 @@ public class VariantContextUnitTest extends BaseTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testAccessingCompleteGenotypes() {
|
public void testAccessingCompleteGenotypes() {
|
||||||
List<Allele> alleles = Arrays.asList(Aref, T, del);
|
List<Allele> alleles = Arrays.asList(Aref, T, ATC);
|
||||||
|
|
||||||
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
|
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
|
||||||
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
|
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
|
||||||
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
|
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
|
||||||
Genotype g4 = GenotypeBuilder.create("Td", Arrays.asList(T, del));
|
Genotype g4 = GenotypeBuilder.create("Td", Arrays.asList(T, ATC));
|
||||||
Genotype g5 = GenotypeBuilder.create("dd", Arrays.asList(del, del));
|
Genotype g5 = GenotypeBuilder.create("dd", Arrays.asList(ATC, ATC));
|
||||||
Genotype g6 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
|
Genotype g6 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
|
||||||
|
|
||||||
VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles)
|
VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles)
|
||||||
|
|
@ -408,7 +403,7 @@ public class VariantContextUnitTest extends BaseTest {
|
||||||
Assert.assertEquals(10, vc.getCalledChrCount());
|
Assert.assertEquals(10, vc.getCalledChrCount());
|
||||||
Assert.assertEquals(3, vc.getCalledChrCount(Aref));
|
Assert.assertEquals(3, vc.getCalledChrCount(Aref));
|
||||||
Assert.assertEquals(4, vc.getCalledChrCount(T));
|
Assert.assertEquals(4, vc.getCalledChrCount(T));
|
||||||
Assert.assertEquals(3, vc.getCalledChrCount(del));
|
Assert.assertEquals(3, vc.getCalledChrCount(ATC));
|
||||||
Assert.assertEquals(2, vc.getCalledChrCount(Allele.NO_CALL));
|
Assert.assertEquals(2, vc.getCalledChrCount(Allele.NO_CALL));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -416,7 +411,7 @@ public class VariantContextUnitTest extends BaseTest {
|
||||||
public void testAccessingRefGenotypes() {
|
public void testAccessingRefGenotypes() {
|
||||||
List<Allele> alleles1 = Arrays.asList(Aref, T);
|
List<Allele> alleles1 = Arrays.asList(Aref, T);
|
||||||
List<Allele> alleles2 = Arrays.asList(Aref);
|
List<Allele> alleles2 = Arrays.asList(Aref);
|
||||||
List<Allele> alleles3 = Arrays.asList(Aref, T, del);
|
List<Allele> alleles3 = Arrays.asList(Aref, T);
|
||||||
for ( List<Allele> alleles : Arrays.asList(alleles1, alleles2, alleles3)) {
|
for ( List<Allele> alleles : Arrays.asList(alleles1, alleles2, alleles3)) {
|
||||||
Genotype g1 = GenotypeBuilder.create("AA1", Arrays.asList(Aref, Aref));
|
Genotype g1 = GenotypeBuilder.create("AA1", Arrays.asList(Aref, Aref));
|
||||||
Genotype g2 = GenotypeBuilder.create("AA2", Arrays.asList(Aref, Aref));
|
Genotype g2 = GenotypeBuilder.create("AA2", Arrays.asList(Aref, Aref));
|
||||||
|
|
@ -438,7 +433,7 @@ public class VariantContextUnitTest extends BaseTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testFilters() {
|
public void testFilters() {
|
||||||
List<Allele> alleles = Arrays.asList(Aref, T, del);
|
List<Allele> alleles = Arrays.asList(Aref, T);
|
||||||
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
|
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
|
||||||
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
|
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
|
||||||
|
|
||||||
|
|
@ -470,15 +465,15 @@ public class VariantContextUnitTest extends BaseTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testRepeatAllele() {
|
public void testRepeatAllele() {
|
||||||
Allele nullR = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
Allele nullR = Allele.create("A", true);
|
||||||
Allele nullA = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
Allele nullA = Allele.create("A", false);
|
||||||
Allele atc = Allele.create("ATC", false);
|
Allele atc = Allele.create("AATC", false);
|
||||||
Allele atcatc = Allele.create("ATCATC", false);
|
Allele atcatc = Allele.create("AATCATC", false);
|
||||||
Allele ccccR = Allele.create("CCCC", true);
|
Allele ccccR = Allele.create("ACCCC", true);
|
||||||
Allele cc = Allele.create("CC", false);
|
Allele cc = Allele.create("ACC", false);
|
||||||
Allele cccccc = Allele.create("CCCCCC", false);
|
Allele cccccc = Allele.create("ACCCCCC", false);
|
||||||
Allele gagaR = Allele.create("GAGA", true);
|
Allele gagaR = Allele.create("AGAGA", true);
|
||||||
Allele gagagaga = Allele.create("GAGAGAGA", false);
|
Allele gagagaga = Allele.create("AGAGAGAGA", false);
|
||||||
|
|
||||||
Pair<List<Integer>,byte[]> result;
|
Pair<List<Integer>,byte[]> result;
|
||||||
byte[] refBytes = "TATCATCATCGGA".getBytes();
|
byte[] refBytes = "TATCATCATCGGA".getBytes();
|
||||||
|
|
@ -497,15 +492,15 @@ public class VariantContextUnitTest extends BaseTest {
|
||||||
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("AATAATA".getBytes()),7);
|
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("AATAATA".getBytes()),7);
|
||||||
|
|
||||||
|
|
||||||
// -*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4
|
// A*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4
|
||||||
VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR,atc)).make();
|
VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR,atc)).make();
|
||||||
result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes);
|
result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes);
|
||||||
Assert.assertEquals(result.getFirst().toArray()[0],3);
|
Assert.assertEquals(result.getFirst().toArray()[0],3);
|
||||||
Assert.assertEquals(result.getFirst().toArray()[1],4);
|
Assert.assertEquals(result.getFirst().toArray()[1],4);
|
||||||
Assert.assertEquals(result.getSecond().length,3);
|
Assert.assertEquals(result.getSecond().length,3);
|
||||||
|
|
||||||
// ATC*,-,ATCATC
|
// ATC*,A,ATCATC
|
||||||
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(ATCref,nullA,atcatc)).make();
|
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+3, Arrays.asList(Allele.create("AATC", true),nullA,atcatc)).make();
|
||||||
result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes);
|
result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes);
|
||||||
Assert.assertEquals(result.getFirst().toArray()[0],3);
|
Assert.assertEquals(result.getFirst().toArray()[0],3);
|
||||||
Assert.assertEquals(result.getFirst().toArray()[1],2);
|
Assert.assertEquals(result.getFirst().toArray()[1],2);
|
||||||
|
|
@ -522,7 +517,7 @@ public class VariantContextUnitTest extends BaseTest {
|
||||||
|
|
||||||
// CCCC*,CC,-,CCCCCC, context = CCC: (C)7 -> (C)5,(C)3,(C)9
|
// CCCC*,CC,-,CCCCCC, context = CCC: (C)7 -> (C)5,(C)3,(C)9
|
||||||
refBytes = "TCCCCCCCAGAGAGAG".getBytes();
|
refBytes = "TCCCCCCCAGAGAGAG".getBytes();
|
||||||
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(ccccR,cc, nullA,cccccc)).make();
|
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(ccccR,cc, nullA,cccccc)).make();
|
||||||
result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes);
|
result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes);
|
||||||
Assert.assertEquals(result.getFirst().toArray()[0],7);
|
Assert.assertEquals(result.getFirst().toArray()[0],7);
|
||||||
Assert.assertEquals(result.getFirst().toArray()[1],5);
|
Assert.assertEquals(result.getFirst().toArray()[1],5);
|
||||||
|
|
@ -532,7 +527,7 @@ public class VariantContextUnitTest extends BaseTest {
|
||||||
|
|
||||||
// GAGA*,-,GAGAGAGA
|
// GAGA*,-,GAGAGAGA
|
||||||
refBytes = "TGAGAGAGAGATTT".getBytes();
|
refBytes = "TGAGAGAGAGATTT".getBytes();
|
||||||
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(gagaR, nullA,gagagaga)).make();
|
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(gagaR, nullA,gagagaga)).make();
|
||||||
result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes);
|
result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes);
|
||||||
Assert.assertEquals(result.getFirst().toArray()[0],5);
|
Assert.assertEquals(result.getFirst().toArray()[0],5);
|
||||||
Assert.assertEquals(result.getFirst().toArray()[1],3);
|
Assert.assertEquals(result.getFirst().toArray()[1],3);
|
||||||
|
|
@ -564,27 +559,24 @@ public class VariantContextUnitTest extends BaseTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testVCFfromGenotypes() {
|
public void testVCFfromGenotypes() {
|
||||||
List<Allele> alleles = Arrays.asList(Aref, T, del);
|
List<Allele> alleles = Arrays.asList(Aref, T);
|
||||||
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
|
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
|
||||||
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
|
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
|
||||||
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
|
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
|
||||||
Genotype g4 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
|
Genotype g4 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
|
||||||
Genotype g5 = GenotypeBuilder.create("--", Arrays.asList(del, del));
|
VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4).make();
|
||||||
VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
|
|
||||||
|
|
||||||
VariantContext vc12 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g2.getSampleName())), true);
|
VariantContext vc12 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g2.getSampleName())), true);
|
||||||
VariantContext vc1 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName())), true);
|
VariantContext vc1 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName())), true);
|
||||||
VariantContext vc23 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g2.getSampleName(), g3.getSampleName())), true);
|
VariantContext vc23 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g2.getSampleName(), g3.getSampleName())), true);
|
||||||
VariantContext vc4 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g4.getSampleName())), true);
|
VariantContext vc4 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g4.getSampleName())), true);
|
||||||
VariantContext vc14 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g4.getSampleName())), true);
|
VariantContext vc14 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g4.getSampleName())), true);
|
||||||
VariantContext vc5 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g5.getSampleName())), true);
|
|
||||||
|
|
||||||
Assert.assertTrue(vc12.isPolymorphicInSamples());
|
Assert.assertTrue(vc12.isPolymorphicInSamples());
|
||||||
Assert.assertTrue(vc23.isPolymorphicInSamples());
|
Assert.assertTrue(vc23.isPolymorphicInSamples());
|
||||||
Assert.assertTrue(vc1.isMonomorphicInSamples());
|
Assert.assertTrue(vc1.isMonomorphicInSamples());
|
||||||
Assert.assertTrue(vc4.isMonomorphicInSamples());
|
Assert.assertTrue(vc4.isMonomorphicInSamples());
|
||||||
Assert.assertTrue(vc14.isMonomorphicInSamples());
|
Assert.assertTrue(vc14.isMonomorphicInSamples());
|
||||||
Assert.assertTrue(vc5.isPolymorphicInSamples());
|
|
||||||
|
|
||||||
Assert.assertTrue(vc12.isSNP());
|
Assert.assertTrue(vc12.isSNP());
|
||||||
Assert.assertTrue(vc12.isVariant());
|
Assert.assertTrue(vc12.isVariant());
|
||||||
|
|
@ -606,17 +598,11 @@ public class VariantContextUnitTest extends BaseTest {
|
||||||
Assert.assertFalse(vc14.isVariant());
|
Assert.assertFalse(vc14.isVariant());
|
||||||
Assert.assertFalse(vc14.isBiallelic());
|
Assert.assertFalse(vc14.isBiallelic());
|
||||||
|
|
||||||
Assert.assertTrue(vc5.isIndel());
|
|
||||||
Assert.assertTrue(vc5.isSimpleDeletion());
|
|
||||||
Assert.assertTrue(vc5.isVariant());
|
|
||||||
Assert.assertTrue(vc5.isBiallelic());
|
|
||||||
|
|
||||||
Assert.assertEquals(3, vc12.getCalledChrCount(Aref));
|
Assert.assertEquals(3, vc12.getCalledChrCount(Aref));
|
||||||
Assert.assertEquals(1, vc23.getCalledChrCount(Aref));
|
Assert.assertEquals(1, vc23.getCalledChrCount(Aref));
|
||||||
Assert.assertEquals(2, vc1.getCalledChrCount(Aref));
|
Assert.assertEquals(2, vc1.getCalledChrCount(Aref));
|
||||||
Assert.assertEquals(0, vc4.getCalledChrCount(Aref));
|
Assert.assertEquals(0, vc4.getCalledChrCount(Aref));
|
||||||
Assert.assertEquals(2, vc14.getCalledChrCount(Aref));
|
Assert.assertEquals(2, vc14.getCalledChrCount(Aref));
|
||||||
Assert.assertEquals(0, vc5.getCalledChrCount(Aref));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testGetGenotypeMethods() {
|
public void testGetGenotypeMethods() {
|
||||||
|
|
@ -664,13 +650,12 @@ public class VariantContextUnitTest extends BaseTest {
|
||||||
@DataProvider(name = "getAlleles")
|
@DataProvider(name = "getAlleles")
|
||||||
public Object[][] mergeAllelesData() {
|
public Object[][] mergeAllelesData() {
|
||||||
new GetAllelesTest("A*", Aref);
|
new GetAllelesTest("A*", Aref);
|
||||||
new GetAllelesTest("-*", delRef);
|
|
||||||
new GetAllelesTest("A*/C", Aref, C);
|
new GetAllelesTest("A*/C", Aref, C);
|
||||||
new GetAllelesTest("A*/C/T", Aref, C, T);
|
new GetAllelesTest("A*/C/T", Aref, C, T);
|
||||||
new GetAllelesTest("A*/T/C", Aref, T, C);
|
new GetAllelesTest("A*/T/C", Aref, T, C);
|
||||||
new GetAllelesTest("A*/C/T/-", Aref, C, T, del);
|
new GetAllelesTest("A*/C/T/ATC", Aref, C, T, ATC);
|
||||||
new GetAllelesTest("A*/T/C/-", Aref, T, C, del);
|
new GetAllelesTest("A*/T/C/ATC", Aref, T, C, ATC);
|
||||||
new GetAllelesTest("A*/-/T/C", Aref, del, T, C);
|
new GetAllelesTest("A*/ATC/T/C", Aref, ATC, T, C);
|
||||||
|
|
||||||
return GetAllelesTest.getTests(GetAllelesTest.class);
|
return GetAllelesTest.getTests(GetAllelesTest.class);
|
||||||
}
|
}
|
||||||
|
|
@ -678,7 +663,7 @@ public class VariantContextUnitTest extends BaseTest {
|
||||||
@Test(dataProvider = "getAlleles")
|
@Test(dataProvider = "getAlleles")
|
||||||
public void testMergeAlleles(GetAllelesTest cfg) {
|
public void testMergeAlleles(GetAllelesTest cfg) {
|
||||||
final List<Allele> altAlleles = cfg.alleles.subList(1, cfg.alleles.size());
|
final List<Allele> altAlleles = cfg.alleles.subList(1, cfg.alleles.size());
|
||||||
final VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles).referenceBaseForIndel((byte)'A').make();
|
final VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles).make();
|
||||||
|
|
||||||
Assert.assertEquals(vc.getAlleles(), cfg.alleles, "VC alleles not the same as input alleles");
|
Assert.assertEquals(vc.getAlleles(), cfg.alleles, "VC alleles not the same as input alleles");
|
||||||
Assert.assertEquals(vc.getNAlleles(), cfg.alleles.size(), "VC getNAlleles not the same as input alleles size");
|
Assert.assertEquals(vc.getNAlleles(), cfg.alleles.size(), "VC getNAlleles not the same as input alleles size");
|
||||||
|
|
@ -845,7 +830,6 @@ public class VariantContextUnitTest extends BaseTest {
|
||||||
Assert.assertEquals(sub.getLog10PError(), vc.getLog10PError());
|
Assert.assertEquals(sub.getLog10PError(), vc.getLog10PError());
|
||||||
Assert.assertEquals(sub.getFilters(), vc.getFilters());
|
Assert.assertEquals(sub.getFilters(), vc.getFilters());
|
||||||
Assert.assertEquals(sub.getID(), vc.getID());
|
Assert.assertEquals(sub.getID(), vc.getID());
|
||||||
Assert.assertEquals(sub.getReferenceBaseForIndel(), vc.getReferenceBaseForIndel());
|
|
||||||
Assert.assertEquals(sub.getAttributes(), vc.getAttributes());
|
Assert.assertEquals(sub.getAttributes(), vc.getAttributes());
|
||||||
|
|
||||||
Set<Genotype> expectedGenotypes = new HashSet<Genotype>();
|
Set<Genotype> expectedGenotypes = new HashSet<Genotype>();
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,7 @@ import java.io.FileNotFoundException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
public class VariantContextUtilsUnitTest extends BaseTest {
|
public class VariantContextUtilsUnitTest extends BaseTest {
|
||||||
Allele Aref, T, C, delRef, Cref, ATC, ATCATC;
|
Allele Aref, T, C, Cref, ATC, ATCATC;
|
||||||
private GenomeLocParser genomeLocParser;
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
@BeforeSuite
|
@BeforeSuite
|
||||||
|
|
@ -56,7 +56,6 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
||||||
// alleles
|
// alleles
|
||||||
Aref = Allele.create("A", true);
|
Aref = Allele.create("A", true);
|
||||||
Cref = Allele.create("C", true);
|
Cref = Allele.create("C", true);
|
||||||
delRef = Allele.create("-", true);
|
|
||||||
T = Allele.create("T");
|
T = Allele.create("T");
|
||||||
C = Allele.create("C");
|
C = Allele.create("C");
|
||||||
ATC = Allele.create("ATC");
|
ATC = Allele.create("ATC");
|
||||||
|
|
@ -99,7 +98,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
||||||
private VariantContext makeVC(String source, List<Allele> alleles, Collection<Genotype> genotypes, Set<String> filters) {
|
private VariantContext makeVC(String source, List<Allele> alleles, Collection<Genotype> genotypes, Set<String> filters) {
|
||||||
int start = 10;
|
int start = 10;
|
||||||
int stop = start; // alleles.contains(ATC) ? start + 3 : start;
|
int stop = start; // alleles.contains(ATC) ? start + 3 : start;
|
||||||
return new VariantContextBuilder(source, "1", start, stop, alleles).genotypes(genotypes).filters(filters).referenceBaseForIndel(Cref.getBases()[0]).make();
|
return new VariantContextBuilder(source, "1", start, stop, alleles).genotypes(genotypes).filters(filters).make();
|
||||||
}
|
}
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------
|
||||||
|
|
@ -156,28 +155,23 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
||||||
Arrays.asList(Aref, C),
|
Arrays.asList(Aref, C),
|
||||||
Arrays.asList(Aref, T, C)); // in order of appearence
|
Arrays.asList(Aref, T, C)); // in order of appearence
|
||||||
|
|
||||||
// The following is actually a pathological case - there's no way on a vcf to represent a null allele that's non-variant.
|
new MergeAllelesTest(Arrays.asList(Aref),
|
||||||
// The code converts this (correctly) to a single-base non-variant vc with whatever base was there as a reference.
|
Arrays.asList(Aref, ATC),
|
||||||
new MergeAllelesTest(Arrays.asList(delRef),
|
Arrays.asList(Aref, ATC));
|
||||||
Arrays.asList(Cref));
|
|
||||||
|
|
||||||
new MergeAllelesTest(Arrays.asList(delRef),
|
new MergeAllelesTest(Arrays.asList(Aref),
|
||||||
Arrays.asList(delRef, ATC),
|
Arrays.asList(Aref, ATC, ATCATC),
|
||||||
Arrays.asList(delRef, ATC));
|
Arrays.asList(Aref, ATC, ATCATC));
|
||||||
|
|
||||||
new MergeAllelesTest(Arrays.asList(delRef),
|
|
||||||
Arrays.asList(delRef, ATC, ATCATC),
|
|
||||||
Arrays.asList(delRef, ATC, ATCATC));
|
|
||||||
|
|
||||||
// alleles in the order we see them
|
// alleles in the order we see them
|
||||||
new MergeAllelesTest(Arrays.asList(delRef, ATCATC),
|
new MergeAllelesTest(Arrays.asList(Aref, ATCATC),
|
||||||
Arrays.asList(delRef, ATC, ATCATC),
|
Arrays.asList(Aref, ATC, ATCATC),
|
||||||
Arrays.asList(delRef, ATCATC, ATC));
|
Arrays.asList(Aref, ATCATC, ATC));
|
||||||
|
|
||||||
// same
|
// same
|
||||||
new MergeAllelesTest(Arrays.asList(delRef, ATC),
|
new MergeAllelesTest(Arrays.asList(Aref, ATC),
|
||||||
Arrays.asList(delRef, ATCATC),
|
Arrays.asList(Aref, ATCATC),
|
||||||
Arrays.asList(delRef, ATC, ATCATC));
|
Arrays.asList(Aref, ATC, ATCATC));
|
||||||
|
|
||||||
return MergeAllelesTest.getTests(MergeAllelesTest.class);
|
return MergeAllelesTest.getTests(MergeAllelesTest.class);
|
||||||
}
|
}
|
||||||
|
|
@ -661,4 +655,52 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
||||||
// test alleles are equal
|
// test alleles are equal
|
||||||
Assert.assertEquals(VariantContextUtils.isTandemRepeat(cfg.vc, cfg.ref.getBytes()), cfg.isTrueRepeat);
|
Assert.assertEquals(VariantContextUtils.isTandemRepeat(cfg.vc, cfg.ref.getBytes()), cfg.isTrueRepeat);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// basic allele clipping test
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private class ReverseClippingPositionTestProvider extends TestDataProvider {
|
||||||
|
final String ref;
|
||||||
|
final List<Allele> alleles = new ArrayList<Allele>();
|
||||||
|
final int expectedClip;
|
||||||
|
|
||||||
|
private ReverseClippingPositionTestProvider(final int expectedClip, final String ref, final String... alleles) {
|
||||||
|
super(ReverseClippingPositionTestProvider.class);
|
||||||
|
this.ref = ref;
|
||||||
|
for ( final String allele : alleles )
|
||||||
|
this.alleles.add(Allele.create(allele));
|
||||||
|
this.expectedClip = expectedClip;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "ReverseClippingPositionTestProvider")
|
||||||
|
public Object[][] makeReverseClippingPositionTestProvider() {
|
||||||
|
// pair clipping
|
||||||
|
new ReverseClippingPositionTestProvider(0, "ATT", "CCG");
|
||||||
|
new ReverseClippingPositionTestProvider(1, "ATT", "CCT");
|
||||||
|
new ReverseClippingPositionTestProvider(2, "ATT", "CTT");
|
||||||
|
new ReverseClippingPositionTestProvider(2, "ATT", "ATT"); // cannot completely clip allele
|
||||||
|
|
||||||
|
// triplets
|
||||||
|
new ReverseClippingPositionTestProvider(0, "ATT", "CTT", "CGG");
|
||||||
|
new ReverseClippingPositionTestProvider(1, "ATT", "CTT", "CGT"); // the T can go
|
||||||
|
new ReverseClippingPositionTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go
|
||||||
|
|
||||||
|
return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test(dataProvider = "ReverseClippingPositionTestProvider")
|
||||||
|
public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) {
|
||||||
|
int result = VariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
|
||||||
|
Assert.assertEquals(result, cfg.expectedClip);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -56,7 +56,7 @@ public class VariantJEXLContextUnitTest extends BaseTest {
|
||||||
|
|
||||||
Allele A, Aref, T, Tref;
|
Allele A, Aref, T, Tref;
|
||||||
|
|
||||||
Allele del, delRef, ATC, ATCref;
|
Allele ATC, ATCref;
|
||||||
// A [ref] / T at 10
|
// A [ref] / T at 10
|
||||||
|
|
||||||
GenomeLoc snpLoc;
|
GenomeLoc snpLoc;
|
||||||
|
|
@ -84,9 +84,6 @@ public class VariantJEXLContextUnitTest extends BaseTest {
|
||||||
|
|
||||||
@BeforeMethod
|
@BeforeMethod
|
||||||
public void before() {
|
public void before() {
|
||||||
del = Allele.create("-");
|
|
||||||
delRef = Allele.create("-", true);
|
|
||||||
|
|
||||||
A = Allele.create("A");
|
A = Allele.create("A");
|
||||||
Aref = Allele.create("A", true);
|
Aref = Allele.create("A", true);
|
||||||
T = Allele.create("T");
|
T = Allele.create("T");
|
||||||
|
|
|
||||||
|
|
@ -139,8 +139,8 @@ public class VCFWriterUnitTest extends BaseTest {
|
||||||
Map<String, Object> attributes = new HashMap<String,Object>();
|
Map<String, Object> attributes = new HashMap<String,Object>();
|
||||||
GenotypesContext genotypes = GenotypesContext.create(header.getGenotypeSamples().size());
|
GenotypesContext genotypes = GenotypesContext.create(header.getGenotypeSamples().size());
|
||||||
|
|
||||||
alleles.add(Allele.create("-",true));
|
alleles.add(Allele.create("A",true));
|
||||||
alleles.add(Allele.create("CC",false));
|
alleles.add(Allele.create("ACC",false));
|
||||||
|
|
||||||
attributes.put("DP","50");
|
attributes.put("DP","50");
|
||||||
for (String name : header.getGenotypeSamples()) {
|
for (String name : header.getGenotypeSamples()) {
|
||||||
|
|
@ -148,7 +148,7 @@ public class VCFWriterUnitTest extends BaseTest {
|
||||||
genotypes.add(gt);
|
genotypes.add(gt);
|
||||||
}
|
}
|
||||||
return new VariantContextBuilder("RANDOM", loc.getContig(), loc.getStart(), loc.getStop(), alleles)
|
return new VariantContextBuilder("RANDOM", loc.getContig(), loc.getStart(), loc.getStop(), alleles)
|
||||||
.genotypes(genotypes).attributes(attributes).referenceBaseForIndel((byte)'A').make();
|
.genotypes(genotypes).attributes(attributes).make();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue