Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ryan Poplin 2012-07-30 12:13:24 -04:00
commit 13591b169f
65 changed files with 740 additions and 1517 deletions

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.compression.reducereads; package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter; import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
@ -11,6 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.ReadFilters; import org.broadinstitute.sting.gatk.walkers.ReadFilters;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
@ -39,6 +41,7 @@ import java.util.Map;
* @since 10/30/11 * @since 10/30/11
*/ */
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class}) @ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
public class CompareBAM extends LocusWalker<Map<CompareBAM.TestName, Boolean>, CompareBAM.TestResults> { public class CompareBAM extends LocusWalker<Map<CompareBAM.TestName, Boolean>, CompareBAM.TestResults> {
@Argument(required = true, shortName = "rr", fullName = "reduced_readgroup", doc = "The read group ID corresponding to the compressed BAM being tested") public String reducedReadGroupID; @Argument(required = true, shortName = "rr", fullName = "reduced_readgroup", doc = "The read group ID corresponding to the compressed BAM being tested") public String reducedReadGroupID;

View File

@ -90,7 +90,6 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
return new VariantContextBuilder("pc",referenceSampleVC.getChr(), referenceSampleVC.getStart(), referenceSampleVC.getEnd(), return new VariantContextBuilder("pc",referenceSampleVC.getChr(), referenceSampleVC.getStart(), referenceSampleVC.getEnd(),
referenceSampleVC.getAlleles()) referenceSampleVC.getAlleles())
.referenceBaseForIndel(referenceSampleVC.getReferenceBaseForIndel())
.genotypes(new GenotypeBuilder(UAC.referenceSampleName, referenceAlleles).GQ(referenceGenotype.getGQ()).make()) .genotypes(new GenotypeBuilder(UAC.referenceSampleName, referenceAlleles).GQ(referenceGenotype.getGQ()).make())
.make(); .make();
} }

View File

@ -42,7 +42,6 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
private static final int MAX_NUM_ALLELES_TO_GENOTYPE = 4; private static final int MAX_NUM_ALLELES_TO_GENOTYPE = 4;
private PairHMMIndelErrorModel pairModel; private PairHMMIndelErrorModel pairModel;
private boolean allelesArePadded = false;
/* /*
private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap = private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() { new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
@ -88,12 +87,10 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
final List<Allele> allAllelesToUse){ final List<Allele> allAllelesToUse){
final Pair<List<Allele>,Boolean> pair = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true); List<Allele> alleles = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true);
List<Allele> alleles = pair.first;
if (alleles.size() > MAX_NUM_ALLELES_TO_GENOTYPE) if (alleles.size() > MAX_NUM_ALLELES_TO_GENOTYPE)
alleles = alleles.subList(0,MAX_NUM_ALLELES_TO_GENOTYPE); alleles = alleles.subList(0,MAX_NUM_ALLELES_TO_GENOTYPE);
allelesArePadded = pair.second;
if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) { if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) {
IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap().clear(); IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap().clear();
haplotypeMap.clear(); haplotypeMap.clear();
@ -121,6 +118,6 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
protected int getEndLocation(final RefMetaDataTracker tracker, protected int getEndLocation(final RefMetaDataTracker tracker,
final ReferenceContext ref, final ReferenceContext ref,
final List<Allele> allelesToUse) { final List<Allele> allelesToUse) {
return IndelGenotypeLikelihoodsCalculationModel.computeEndLocation(allelesToUse, ref.getLocus(), allelesArePadded); return ref.getLocus().getStart() + allelesToUse.get(0).length() - 1;
} }
} }

View File

@ -33,7 +33,6 @@ import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext; import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.codecs.vcf.VCFAlleleClipper;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.*;
@ -184,8 +183,13 @@ public class GenotypingEngine {
} }
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"}) @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine, final ArrayList<Haplotype> haplotypes, final byte[] ref, final GenomeLoc refLoc, public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine,
final GenomeLoc activeRegionWindow, final GenomeLocParser genomeLocParser, final ArrayList<VariantContext> activeAllelesToGenotype ) { final ArrayList<Haplotype> haplotypes,
final byte[] ref,
final GenomeLoc refLoc,
final GenomeLoc activeRegionWindow,
final GenomeLocParser genomeLocParser,
final ArrayList<VariantContext> activeAllelesToGenotype ) {
final ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> returnCalls = new ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>>(); final ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> returnCalls = new ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>>();
@ -423,24 +427,21 @@ public class GenotypingEngine {
protected static VariantContext createMergedVariantContext( final VariantContext thisVC, final VariantContext nextVC, final byte[] ref, final GenomeLoc refLoc ) { protected static VariantContext createMergedVariantContext( final VariantContext thisVC, final VariantContext nextVC, final byte[] ref, final GenomeLoc refLoc ) {
final int thisStart = thisVC.getStart(); final int thisStart = thisVC.getStart();
final int nextStart = nextVC.getStart(); final int nextStart = nextVC.getStart();
byte[] refBases = ( thisVC.hasReferenceBaseForIndel() ? new byte[]{ thisVC.getReferenceBaseForIndel() } : new byte[]{} ); byte[] refBases = new byte[]{};
byte[] altBases = ( thisVC.hasReferenceBaseForIndel() ? new byte[]{ thisVC.getReferenceBaseForIndel() } : new byte[]{} ); byte[] altBases = new byte[]{};
refBases = ArrayUtils.addAll(refBases, thisVC.getReference().getBases()); refBases = ArrayUtils.addAll(refBases, thisVC.getReference().getBases());
altBases = ArrayUtils.addAll(altBases, thisVC.getAlternateAllele(0).getBases()); altBases = ArrayUtils.addAll(altBases, thisVC.getAlternateAllele(0).getBases());
for( int locus = thisStart + refBases.length; locus < nextStart; locus++ ) { int locus;
for( locus = thisStart + refBases.length; locus < nextStart; locus++ ) {
final byte refByte = ref[locus - refLoc.getStart()]; final byte refByte = ref[locus - refLoc.getStart()];
refBases = ArrayUtils.add(refBases, refByte); refBases = ArrayUtils.add(refBases, refByte);
altBases = ArrayUtils.add(altBases, refByte); altBases = ArrayUtils.add(altBases, refByte);
} }
if( nextVC.hasReferenceBaseForIndel() ) { refBases = ArrayUtils.addAll(refBases, ArrayUtils.subarray(nextVC.getReference().getBases(), locus > nextStart ? 1 : 0, nextVC.getReference().getBases().length)); // special case of deletion including the padding base of consecutive indel
refBases = ArrayUtils.add(refBases, nextVC.getReferenceBaseForIndel());
altBases = ArrayUtils.add(altBases, nextVC.getReferenceBaseForIndel());
}
refBases = ArrayUtils.addAll(refBases, nextVC.getReference().getBases());
altBases = ArrayUtils.addAll(altBases, nextVC.getAlternateAllele(0).getBases()); altBases = ArrayUtils.addAll(altBases, nextVC.getAlternateAllele(0).getBases());
int iii = 0; int iii = 0;
if( refBases.length == altBases.length && VCFAlleleClipper.needsPadding(thisVC) ) { // special case of insertion + deletion of same length creates an MNP --> trim padding bases off the allele if( refBases.length == altBases.length ) { // special case of insertion + deletion of same length creates an MNP --> trim padding bases off the allele
while( iii < refBases.length && refBases[iii] == altBases[iii] ) { iii++; } while( iii < refBases.length && refBases[iii] == altBases[iii] ) { iii++; }
} }
final ArrayList<Allele> mergedAlleles = new ArrayList<Allele>(); final ArrayList<Allele> mergedAlleles = new ArrayList<Allele>();
@ -533,10 +534,10 @@ public class GenotypingEngine {
final int elementLength = ce.getLength(); final int elementLength = ce.getLength();
switch( ce.getOperator() ) { switch( ce.getOperator() ) {
case I: case I:
final byte[] insertionBases = Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength ); final byte[] insertionBases = Arrays.copyOfRange( alignment, alignmentPos - 1, alignmentPos + elementLength ); // add padding base
boolean allN = true; boolean allN = true;
for( final byte b : insertionBases ) { for( int i = 1; i < insertionBases.length; i++ ) { // check all bases except for the padding base
if( b != (byte) 'N' ) { if( insertionBases[i] != (byte) 'N' ) {
allN = false; allN = false;
break; break;
} }
@ -544,14 +545,13 @@ public class GenotypingEngine {
if( !allN ) { if( !allN ) {
final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>(); final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
final int insertionStart = refLoc.getStart() + refPos - 1; final int insertionStart = refLoc.getStart() + refPos - 1;
insertionAlleles.add( Allele.create(ref[refPos-1], true) );
if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) { if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
insertionAlleles.add( Allele.create(ref[refPos-1], true) );
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE ); insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make()); vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
} else { } else {
insertionAlleles.add( Allele.create(Allele.NULL_ALLELE_STRING, true) );
insertionAlleles.add( Allele.create(insertionBases, false) ); insertionAlleles.add( Allele.create(insertionBases, false) );
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).referenceBaseForIndel(ref[refPos-1]).make()); vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
} }
} }
@ -561,7 +561,7 @@ public class GenotypingEngine {
alignmentPos += elementLength; alignmentPos += elementLength;
break; break;
case D: case D:
final byte[] deletionBases = Arrays.copyOfRange( ref, refPos, refPos + elementLength ); final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base
final ArrayList<Allele> deletionAlleles = new ArrayList<Allele>(); final ArrayList<Allele> deletionAlleles = new ArrayList<Allele>();
final int deletionStart = refLoc.getStart() + refPos - 1; final int deletionStart = refLoc.getStart() + refPos - 1;
// BUGBUG: how often does this symbolic deletion allele case happen? // BUGBUG: how often does this symbolic deletion allele case happen?
@ -572,8 +572,8 @@ public class GenotypingEngine {
// vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart, deletionAlleles).make()); // vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart, deletionAlleles).make());
//} else { //} else {
deletionAlleles.add( Allele.create(deletionBases, true) ); deletionAlleles.add( Allele.create(deletionBases, true) );
deletionAlleles.add( Allele.create(Allele.NULL_ALLELE_STRING, false) ); deletionAlleles.add( Allele.create(ref[refPos-1], false) );
vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).referenceBaseForIndel(ref[refPos-1]).make()); vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make());
//} //}
refPos += elementLength; refPos += elementLength;
break; break;

View File

@ -29,6 +29,7 @@ import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -42,6 +43,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
@ -80,6 +82,7 @@ import java.util.*;
* </pre> * </pre>
* *
*/ */
@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} )
@Reference(window=@Window(start=-HaplotypeResolver.ACTIVE_WINDOW,stop= HaplotypeResolver.ACTIVE_WINDOW)) @Reference(window=@Window(start=-HaplotypeResolver.ACTIVE_WINDOW,stop= HaplotypeResolver.ACTIVE_WINDOW))
public class HaplotypeResolver extends RodWalker<Integer, Integer> { public class HaplotypeResolver extends RodWalker<Integer, Integer> {

View File

@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMUtils; import net.sf.samtools.SAMUtils;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
@ -290,20 +289,22 @@ public class PoolGenotypeLikelihoodsUnitTest {
} }
@Test // TODO -- Guillermo, this test cannot work because the ArtificialReadPileupTestProvider returns a position of chr1:5, which is less than
// TODO -- HAPLOTYPE_SIZE in IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles() so the HaplotypeMap is not populated.
@Test (enabled = false)
public void testIndelErrorModel() { public void testIndelErrorModel() {
final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref"); final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
final byte refByte = refPileupTestProvider.getRefByte(); final byte refByte = refPileupTestProvider.getRefByte();
final String altBases = "TCA"; final String altBases = (char)refByte + "TCA";
final String refSampleName = refPileupTestProvider.getSampleNames().get(0); final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
final List<Allele> trueAlleles = new ArrayList<Allele>(); final List<Allele> trueAlleles = new ArrayList<Allele>();
trueAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, true)); trueAlleles.add(Allele.create(refByte, true));
trueAlleles.add(Allele.create("TC", false)); trueAlleles.add(Allele.create((char)refByte + "TC", false));
final String fw = new String(refPileupTestProvider.getReferenceContext().getForwardBases()); final String fw = new String(refPileupTestProvider.getReferenceContext().getForwardBases());
final VariantContext refInsertionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(), final VariantContext refInsertionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(),
refPileupTestProvider.getReferenceContext().getLocus().getStart(), trueAlleles). refPileupTestProvider.getReferenceContext().getLocus().getStart(), trueAlleles).
genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).referenceBaseForIndel(refByte).make(); genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).make();
final int[] matchArray = {95, 995, 9995, 10000}; final int[] matchArray = {95, 995, 9995, 10000};
@ -333,12 +334,12 @@ public class PoolGenotypeLikelihoodsUnitTest {
// create deletion VC // create deletion VC
final int delLength = 4; final int delLength = 4;
final List<Allele> delAlleles = new ArrayList<Allele>(); final List<Allele> delAlleles = new ArrayList<Allele>();
delAlleles.add(Allele.create(fw.substring(1,delLength+1), true)); delAlleles.add(Allele.create(fw.substring(0,delLength+1), true));
delAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, false)); delAlleles.add(Allele.create(refByte, false));
final VariantContext refDeletionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(), final VariantContext refDeletionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(),
refPileupTestProvider.getReferenceContext().getLocus().getStart()+delLength, delAlleles). refPileupTestProvider.getReferenceContext().getLocus().getStart()+delLength, delAlleles).
genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).referenceBaseForIndel(refByte).make(); genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).make();
for (int matches: matchArray) { for (int matches: matchArray) {
for (int mismatches: mismatchArray) { for (int mismatches: mismatchArray) {
@ -392,9 +393,6 @@ public class PoolGenotypeLikelihoodsUnitTest {
final byte refByte = readPileupTestProvider.getRefByte(); final byte refByte = readPileupTestProvider.getRefByte();
final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T'; final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T';
final int refIdx = BaseUtils.simpleBaseToBaseIndex(refByte);
final int altIdx = BaseUtils.simpleBaseToBaseIndex(altByte);
final List<Allele> allAlleles = new ArrayList<Allele>(); // this contains only ref Allele up to now final List<Allele> allAlleles = new ArrayList<Allele>(); // this contains only ref Allele up to now
final Set<String> laneIDs = new TreeSet<String>(); final Set<String> laneIDs = new TreeSet<String>();
laneIDs.add(GenotypeLikelihoodsCalculationModel.DUMMY_LANE); laneIDs.add(GenotypeLikelihoodsCalculationModel.DUMMY_LANE);
@ -411,11 +409,17 @@ public class PoolGenotypeLikelihoodsUnitTest {
for (String laneID : laneIDs) for (String laneID : laneIDs)
noisyErrorModels.put(laneID, Q30ErrorModel); noisyErrorModels.put(laneID, Q30ErrorModel);
final int refIdx = 0;
int altIdx = 2;
// ref allele must be first
allAlleles.add(Allele.create(refByte, true));
for (byte b: BaseUtils.BASES) { for (byte b: BaseUtils.BASES) {
if (refByte == b) if (refByte != b) {
allAlleles.add(Allele.create(b,true)); if (b == altByte)
else altIdx = allAlleles.size();
allAlleles.add(Allele.create(b, false)); allAlleles.add(Allele.create(b, false));
}
} }
PrintStream out = null; PrintStream out = null;

View File

@ -262,8 +262,6 @@ public class GenotypingEngineUnitTest extends BaseTest {
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// SNP + ref + SNP = MNP with ref base gap // SNP + ref + SNP = MNP with ref base gap
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make(); thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
@ -274,11 +272,9 @@ public class GenotypingEngineUnitTest extends BaseTest {
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// insertion + SNP // insertion + SNP
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","AAAAA").referenceBaseForIndel("T").make(); thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TAAAAA").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make(); nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make();
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TAAAAACG").source("merged").make(); truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TAAAAACG").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
@ -286,23 +282,19 @@ public class GenotypingEngineUnitTest extends BaseTest {
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// SNP + insertion // SNP + insertion
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make(); thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("-","AAAAA").referenceBaseForIndel("C").make(); nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","CAAAAA").make();
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","GCCAAAAA").source("merged").make(); truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","GCCAAAAA").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC); logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// deletion + SNP // deletion + SNP
thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("C","-").referenceBaseForIndel("T").make(); thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","T").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make(); nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make();
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TG").source("merged").make(); truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TG").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
@ -310,68 +302,66 @@ public class GenotypingEngineUnitTest extends BaseTest {
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// SNP + deletion // SNP + deletion
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make(); thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make(); nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","GCC").source("merged").make(); truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","GCC").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC); logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// insertion + deletion = MNP // insertion + deletion = MNP
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","A").referenceBaseForIndel("T").make(); thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TA").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make(); nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
truthVC = new VariantContextBuilder().loc("2", 1704, 1706).alleles("CCG","ACC").source("merged").make(); truthVC = new VariantContextBuilder().loc("2", 1704, 1706).alleles("CCG","ACC").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC); logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// insertion + deletion // insertion + deletion
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","AAAAA").referenceBaseForIndel("T").make(); thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TAAAAA").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make(); nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","TAAAAACC").source("merged").make(); truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","TAAAAACC").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC); logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// insertion + insertion // insertion + insertion
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","A").referenceBaseForIndel("T").make(); thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TA").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("-","A").referenceBaseForIndel("C").make(); nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","CA").make();
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TACCA").source("merged").make(); truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TACCA").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC); logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// deletion + deletion // deletion + deletion
thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("T","-").referenceBaseForIndel("A").make(); thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make(); nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
truthVC = new VariantContextBuilder().loc("2", 1701, 1706).alleles("ATTCCG","ATCC").source("merged").make(); truthVC = new VariantContextBuilder().loc("2", 1701, 1706).alleles("ATTCCG","ATCC").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC); logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel()); // deletion + insertion (abutting)
thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make();
nextVC = new VariantContextBuilder().loc("2", 1702, 1702).alleles("T","GCGCGC").make();
truthVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","AGCGCGC").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
// complex + complex // complex + complex
thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","AAA").make(); thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","AAA").make();
@ -382,8 +372,6 @@ public class GenotypingEngineUnitTest extends BaseTest {
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
} }
/** /**

View File

@ -30,7 +30,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test @Test
public void testHaplotypeCallerMultiSampleGGA() { public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "ff370c42c8b09a29f1aeff5ac57c7ea6"); HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "d8317f4589e8e0c48bcd087cdb75ce88");
} }
private void HCTestComplexVariants(String bam, String args, String md5) { private void HCTestComplexVariants(String bam, String args, String md5) {
@ -43,6 +43,5 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testHaplotypeCallerMultiSampleComplex() { public void testHaplotypeCallerMultiSampleComplex() {
HCTestComplexVariants(CEUTRIO_BAM, "", "6f9fda3ea82c5696bed1d48ee90cd76b"); HCTestComplexVariants(CEUTRIO_BAM, "", "6f9fda3ea82c5696bed1d48ee90cd76b");
} }
} }

View File

@ -29,11 +29,13 @@ import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
import org.broadinstitute.sting.alignment.bwa.BWTFiles; import org.broadinstitute.sting.alignment.bwa.BWTFiles;
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Iterator; import java.util.Iterator;
@ -46,6 +48,7 @@ import java.util.Iterator;
* @author mhanna * @author mhanna
* @version 0.1 * @version 0.1
*/ */
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class AlignmentValidation extends ReadWalker<Integer,Integer> { public class AlignmentValidation extends ReadWalker<Integer,Integer> {
/** /**
* The supporting BWT index generated using BWT. * The supporting BWT index generated using BWT.

View File

@ -34,11 +34,13 @@ import org.broadinstitute.sting.alignment.bwa.BWTFiles;
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName; import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.File; import java.io.File;
@ -50,6 +52,7 @@ import java.io.File;
* @author mhanna * @author mhanna
* @version 0.1 * @version 0.1
*/ */
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
@WalkerName("Align") @WalkerName("Align")
public class AlignmentWalker extends ReadWalker<Integer,Integer> { public class AlignmentWalker extends ReadWalker<Integer,Integer> {
@Argument(fullName="target_reference",shortName="target_ref",doc="The reference to which reads in the source file should be aligned. Alongside this reference should sit index files " + @Argument(fullName="target_reference",shortName="target_ref",doc="The reference to which reads in the source file should be aligned. Alongside this reference should sit index files " +

View File

@ -30,9 +30,11 @@ import org.broadinstitute.sting.alignment.bwa.BWTFiles;
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.PrintStream; import java.io.PrintStream;
@ -48,6 +50,7 @@ import java.util.TreeMap;
* @author mhanna * @author mhanna
* @version 0.1 * @version 0.1
*/ */
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class CountBestAlignments extends ReadWalker<Integer,Integer> { public class CountBestAlignments extends ReadWalker<Integer,Integer> {
/** /**
* The supporting BWT index generated using BWT. * The supporting BWT index generated using BWT.

View File

@ -131,6 +131,12 @@ public class CommandLineGATK extends CommandLineExecutable {
// can't close tribble index when writing // can't close tribble index when writing
if ( message.indexOf("Unable to close index for") != -1 ) if ( message.indexOf("Unable to close index for") != -1 )
exitSystemWithUserError(new UserException(t.getCause().getMessage())); exitSystemWithUserError(new UserException(t.getCause().getMessage()));
// disk is full
if ( message.indexOf("No space left on device") != -1 )
exitSystemWithUserError(new UserException(t.getMessage()));
if ( t.getCause().getMessage().indexOf("No space left on device") != -1 )
exitSystemWithUserError(new UserException(t.getCause().getMessage()));
} }
/** /**

View File

@ -3,10 +3,12 @@ package org.broadinstitute.sting.gatk.examples;
import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -17,8 +19,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/** /**
* Computes the coverage per sample. * Computes the coverage per sample for every position (use with -L argument!).
*/ */
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class CoverageBySample extends LocusWalker<Integer, Integer> { public class CoverageBySample extends LocusWalker<Integer, Integer> {
@Output @Output
protected PrintStream out; protected PrintStream out;

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.examples;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -35,6 +36,7 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype; import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.io.PrintStream; import java.io.PrintStream;
@ -46,6 +48,7 @@ import java.io.PrintStream;
* *
* @author aaron * @author aaron
*/ */
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
public class GATKPaperGenotyper extends LocusWalker<Integer,Long> implements TreeReducible<Long> { public class GATKPaperGenotyper extends LocusWalker<Integer,Long> implements TreeReducible<Long> {
// the possible diploid genotype strings // the possible diploid genotype strings
private static enum GENOTYPE { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT } private static enum GENOTYPE { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }

View File

@ -163,43 +163,58 @@ public class VariantContextAdaptors {
@Override @Override
public VariantContext convert(String name, Object input, ReferenceContext ref) { public VariantContext convert(String name, Object input, ReferenceContext ref) {
OldDbSNPFeature dbsnp = (OldDbSNPFeature)input; OldDbSNPFeature dbsnp = (OldDbSNPFeature)input;
if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
return null;
Allele refAllele = Allele.create(dbsnp.getNCBIRefBase(), true);
if ( isSNP(dbsnp) || isIndel(dbsnp) || isMNP(dbsnp) || dbsnp.getVariantType().contains("mixed") ) { int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
// add the reference allele if ( index < 0 )
List<Allele> alleles = new ArrayList<Allele>(); return null; // we weren't given enough reference context to create the VariantContext
alleles.add(refAllele);
// add all of the alt alleles final byte refBaseForIndel = ref.getBases()[index];
boolean sawNullAllele = refAllele.isNull();
for ( String alt : getAlternateAlleleList(dbsnp) ) {
if ( ! Allele.acceptableAlleleBases(alt) ) {
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
return null;
}
Allele altAllele = Allele.create(alt, false);
alleles.add(altAllele);
if ( altAllele.isNull() )
sawNullAllele = true;
}
Map<String, Object> attributes = new HashMap<String, Object>(); boolean addPaddingBase;
if ( isSNP(dbsnp) || isMNP(dbsnp) )
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1; addPaddingBase = false;
if ( index < 0 ) else if ( isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") )
return null; // we weren't given enough reference context to create the VariantContext addPaddingBase = VariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp)));
Byte refBaseForIndel = new Byte(ref.getBases()[index]); else
final VariantContextBuilder builder = new VariantContextBuilder();
builder.source(name).id(dbsnp.getRsID());
builder.loc(dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd() - (refAllele.isNull() ? 1 : 0));
builder.alleles(alleles);
builder.referenceBaseForIndel(refBaseForIndel);
return builder.make();
} else
return null; // can't handle anything else return null; // can't handle anything else
Allele refAllele;
if ( dbsnp.getNCBIRefBase().equals("-") )
refAllele = Allele.create(refBaseForIndel, true);
else if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
return null;
else
refAllele = Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + dbsnp.getNCBIRefBase(), true);
final List<Allele> alleles = new ArrayList<Allele>();
alleles.add(refAllele);
// add all of the alt alleles
for ( String alt : getAlternateAlleleList(dbsnp) ) {
if ( Allele.wouldBeNullAllele(alt.getBytes()))
alt = "";
else if ( ! Allele.acceptableAlleleBases(alt) )
return null;
alleles.add(Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + alt, false));
}
final VariantContextBuilder builder = new VariantContextBuilder();
builder.source(name).id(dbsnp.getRsID());
builder.loc(dbsnp.getChr(), dbsnp.getStart() - (addPaddingBase ? 1 : 0), dbsnp.getEnd() - (addPaddingBase && refAllele.length() == 1 ? 1 : 0));
builder.alleles(alleles);
return builder.make();
}
private static List<String> stripNullDashes(final List<String> alleles) {
final List<String> newAlleles = new ArrayList<String>(alleles.size());
for ( final String allele : alleles ) {
if ( allele.equals("-") )
newAlleles.add("");
else
newAlleles.add(allele);
}
return newAlleles;
} }
} }
@ -351,7 +366,7 @@ public class VariantContextAdaptors {
long end = hapmap.getEnd(); long end = hapmap.getEnd();
if ( deletionLength > 0 ) if ( deletionLength > 0 )
end += deletionLength; end += deletionLength;
VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).referenceBaseForIndel(refBaseForIndel).make(); VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).make();
return vc; return vc;
} }
} }

View File

@ -42,10 +42,6 @@ import java.util.List;
*/ */
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation { public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
private static final String REF_ALLELE = "REF";
private static final String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
public void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, GenotypeBuilder gb) { public void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, GenotypeBuilder gb) {
if ( g == null || !g.isCalled() ) if ( g == null || !g.isCalled() )
return; return;
@ -53,10 +49,10 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
if ( vc.isSNP() ) if ( vc.isSNP() )
annotateSNP(stratifiedContext, vc, gb); annotateSNP(stratifiedContext, vc, gb);
else if ( vc.isIndel() ) else if ( vc.isIndel() )
annotateIndel(stratifiedContext, vc, gb); annotateIndel(stratifiedContext, ref.getBase(), vc, gb);
} }
private void annotateSNP(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) { private void annotateSNP(final AlignmentContext stratifiedContext, final VariantContext vc, final GenotypeBuilder gb) {
HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>(); HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
for ( Allele allele : vc.getAlleles() ) for ( Allele allele : vc.getAlleles() )
@ -77,62 +73,47 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
gb.AD(counts); gb.AD(counts);
} }
private void annotateIndel(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) { private void annotateIndel(final AlignmentContext stratifiedContext, final byte refBase, final VariantContext vc, final GenotypeBuilder gb) {
ReadBackedPileup pileup = stratifiedContext.getBasePileup(); ReadBackedPileup pileup = stratifiedContext.getBasePileup();
if ( pileup == null ) if ( pileup == null )
return; return;
final HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>(); final HashMap<Allele, Integer> alleleCounts = new HashMap<Allele, Integer>();
alleleCounts.put(REF_ALLELE, 0);
final Allele refAllele = vc.getReference(); final Allele refAllele = vc.getReference();
for ( Allele allele : vc.getAlternateAlleles() ) { for ( final Allele allele : vc.getAlleles() ) {
alleleCounts.put(allele, 0);
if ( allele.isNoCall() ) {
continue; // this does not look so good, should we die???
}
alleleCounts.put(getAlleleRepresentation(allele), 0);
} }
for ( PileupElement p : pileup ) { for ( PileupElement p : pileup ) {
if ( p.isBeforeInsertion() ) { if ( p.isBeforeInsertion() ) {
final String b = p.getEventBases(); final Allele insertion = Allele.create((char)refBase + p.getEventBases(), false);
if ( alleleCounts.containsKey(b) ) { if ( alleleCounts.containsKey(insertion) ) {
alleleCounts.put(b, alleleCounts.get(b)+1); alleleCounts.put(insertion, alleleCounts.get(insertion)+1);
} }
} else if ( p.isBeforeDeletionStart() ) { } else if ( p.isBeforeDeletionStart() ) {
if ( p.getEventLength() == refAllele.length() ) { if ( p.getEventLength() == refAllele.length() - 1 ) {
// this is indeed the deletion allele recorded in VC // this is indeed the deletion allele recorded in VC
final String b = DEL; final Allele deletion = Allele.create(refBase);
if ( alleleCounts.containsKey(b) ) { if ( alleleCounts.containsKey(deletion) ) {
alleleCounts.put(b, alleleCounts.get(b)+1); alleleCounts.put(deletion, alleleCounts.get(deletion)+1);
}
} }
}
} else if ( p.getRead().getAlignmentEnd() > vc.getStart() ) { } else if ( p.getRead().getAlignmentEnd() > vc.getStart() ) {
alleleCounts.put(REF_ALLELE, alleleCounts.get(REF_ALLELE)+1); alleleCounts.put(refAllele, alleleCounts.get(refAllele)+1);
} }
} }
int[] counts = new int[alleleCounts.size()]; final int[] counts = new int[alleleCounts.size()];
counts[0] = alleleCounts.get(REF_ALLELE); counts[0] = alleleCounts.get(refAllele);
for (int i = 0; i < vc.getAlternateAlleles().size(); i++) for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
counts[i+1] = alleleCounts.get( getAlleleRepresentation(vc.getAlternateAllele(i)) ); counts[i+1] = alleleCounts.get( vc.getAlternateAllele(i) );
gb.AD(counts); gb.AD(counts);
} }
private String getAlleleRepresentation(Allele allele) {
if ( allele.isNull() ) { // deletion wrt the ref
return DEL;
} else { // insertion, pass actual bases
return allele.getBaseString();
}
}
// public String getIndelBases() // public String getIndelBases()
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.GENOTYPE_ALLELE_DEPTHS); } public List<String> getKeyNames() { return Arrays.asList(VCFConstants.GENOTYPE_ALLELE_DEPTHS); }

View File

@ -250,8 +250,6 @@ public class BeagleOutputToVCF extends RodWalker<Integer, Integer> {
// Beagle always produces genotype strings based on the strings we input in the likelihood file. // Beagle always produces genotype strings based on the strings we input in the likelihood file.
String refString = vc_input.getReference().getDisplayString(); String refString = vc_input.getReference().getDisplayString();
if (refString.length() == 0) // ref was null
refString = Allele.NULL_ALLELE_STRING;
Allele bglAlleleA, bglAlleleB; Allele bglAlleleA, bglAlleleB;

View File

@ -239,7 +239,7 @@ public class ProduceBeagleInput extends RodWalker<Integer, Integer> {
if ( markers != null ) markers.append(marker).append("\t").append(Integer.toString(markerCounter++)).append("\t"); if ( markers != null ) markers.append(marker).append("\t").append(Integer.toString(markerCounter++)).append("\t");
for ( Allele allele : preferredVC.getAlleles() ) { for ( Allele allele : preferredVC.getAlleles() ) {
String bglPrintString; String bglPrintString;
if (allele.isNoCall() || allele.isNull()) if (allele.isNoCall())
bglPrintString = "-"; bglPrintString = "-";
else else
bglPrintString = allele.getBaseString(); // get rid of * in case of reference allele bglPrintString = allele.getBaseString(); // get rid of * in case of reference allele

View File

@ -149,7 +149,7 @@ public class VariantsToBeagleUnphased extends RodWalker<Integer, Integer> {
// write out the alleles at this site // write out the alleles at this site
for ( Allele allele : vc.getAlleles() ) { for ( Allele allele : vc.getAlleles() ) {
beagleOut.append(allele.isNoCall() || allele.isNull() ? "-" : allele.getBaseString()).append(" "); beagleOut.append(allele.isNoCall() ? "-" : allele.getBaseString()).append(" ");
} }
// write out sample level genotypes // write out sample level genotypes

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
import net.sf.picard.util.PeekableIterator; import net.sf.picard.util.PeekableIterator;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -36,6 +37,7 @@ import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.*;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
@ -80,6 +82,7 @@ import java.util.*;
* @author Mauricio Carneiro, Roger Zurawicki * @author Mauricio Carneiro, Roger Zurawicki
* @since 5/8/12 * @since 5/8/12
*/ */
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
@By(value = DataSource.READS) @By(value = DataSource.READS)
@PartitionBy(PartitionType.INTERVAL) @PartitionBy(PartitionType.INTERVAL)
public class DiagnoseTargets extends LocusWalker<Long, Long> { public class DiagnoseTargets extends LocusWalker<Long, Long> {

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets; package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -33,9 +34,11 @@ import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
import org.broadinstitute.sting.gatk.walkers.PartitionBy; import org.broadinstitute.sting.gatk.walkers.PartitionBy;
import org.broadinstitute.sting.gatk.walkers.PartitionType; import org.broadinstitute.sting.gatk.walkers.PartitionType;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream; import java.io.PrintStream;
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
@PartitionBy(PartitionType.CONTIG) @PartitionBy(PartitionType.CONTIG)
@ActiveRegionExtension(extension = 0, maxRegion = 50000) @ActiveRegionExtension(extension = 0, maxRegion = 50000)
public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> { public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {

View File

@ -107,11 +107,11 @@ public class FastaAlternateReference extends FastaReference {
continue; continue;
if ( vc.isSimpleDeletion()) { if ( vc.isSimpleDeletion()) {
deletionBasesRemaining = vc.getReference().length(); deletionBasesRemaining = vc.getReference().length() - 1;
// delete the next n bases, not this one // delete the next n bases, not this one
return new Pair<GenomeLoc, String>(context.getLocation(), refBase); return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
} else if ( vc.isSimpleInsertion()) { } else if ( vc.isSimpleInsertion()) {
return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString())); return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
} else if (vc.isSNP()) { } else if (vc.isSNP()) {
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString()); return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
} }

View File

@ -26,17 +26,20 @@
package org.broadinstitute.sting.gatk.walkers.fasta; package org.broadinstitute.sting.gatk.walkers.fasta;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RefWalker; import org.broadinstitute.sting.gatk.walkers.RefWalker;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream; import java.io.PrintStream;
/** /**
* Calculates basic statistics about the reference sequence itself * Calculates basic statistics about the reference sequence itself
*/ */
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class FastaStats extends RefWalker<Byte, FastaStats.FastaStatistics> { public class FastaStats extends RefWalker<Byte, FastaStats.FastaStatistics> {
@Output PrintStream out; @Output PrintStream out;

View File

@ -246,18 +246,19 @@ public class ConsensusAlleleCounter {
// get ref bases of accurate deletion // get ref bases of accurate deletion
final int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart(); final int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart();
stop = loc.getStart() + dLen; stop = loc.getStart() + dLen;
final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen); final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference - 1, startIdxInReference + dLen); // add reference padding
if (Allele.acceptableAlleleBases(refBases, false)) { if (Allele.acceptableAlleleBases(refBases, false)) {
refAllele = Allele.create(refBases, true); refAllele = Allele.create(refBases, true);
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); altAllele = Allele.create(ref.getBase(), false);
} }
else continue; // don't go on with this allele if refBases are non-standard else continue; // don't go on with this allele if refBases are non-standard
} else { } else {
// insertion case // insertion case
if (Allele.acceptableAlleleBases(s, false)) { // don't allow N's in insertions final String insertionBases = (char)ref.getBase() + s; // add reference padding
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true); if (Allele.acceptableAlleleBases(insertionBases, false)) { // don't allow N's in insertions
altAllele = Allele.create(s, false); refAllele = Allele.create(ref.getBase(), true);
altAllele = Allele.create(insertionBases, false);
stop = loc.getStart(); stop = loc.getStart();
} }
else continue; // go on to next allele if consensus insertion has any non-standard base. else continue; // go on to next allele if consensus insertion has any non-standard base.
@ -267,7 +268,6 @@ public class ConsensusAlleleCounter {
final VariantContextBuilder builder = new VariantContextBuilder().source(""); final VariantContextBuilder builder = new VariantContextBuilder().source("");
builder.loc(loc.getContig(), loc.getStart(), stop); builder.loc(loc.getContig(), loc.getStart(), stop);
builder.alleles(Arrays.asList(refAllele, altAllele)); builder.alleles(Arrays.asList(refAllele, altAllele));
builder.referenceBaseForIndel(ref.getBase());
builder.noGenotypes(); builder.noGenotypes();
if (doMultiAllelicCalls) { if (doMultiAllelicCalls) {
vcs.add(builder.make()); vcs.add(builder.make());

View File

@ -35,7 +35,6 @@ import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.*;
@ -48,8 +47,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
private boolean DEBUG = false; private boolean DEBUG = false;
private boolean ignoreSNPAllelesWhenGenotypingIndels = false; private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
private PairHMMIndelErrorModel pairModel; private PairHMMIndelErrorModel pairModel;
private boolean allelesArePadded;
private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap = private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() { new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
protected synchronized HashMap<PileupElement, LinkedHashMap<Allele, Double>> initialValue() { protected synchronized HashMap<PileupElement, LinkedHashMap<Allele, Double>> initialValue() {
@ -105,25 +103,21 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>()); indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
haplotypeMap.clear(); haplotypeMap.clear();
Pair<List<Allele>,Boolean> pair = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels); alleleList = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
alleleList = pair.first;
allelesArePadded = pair.second;
if (alleleList.isEmpty()) if (alleleList.isEmpty())
return null; return null;
} }
getHaplotypeMapFromAlleles(alleleList, ref, loc, haplotypeMap); // will update haplotypeMap adding elements getHaplotypeMapFromAlleles(alleleList, ref, loc, haplotypeMap); // will update haplotypeMap adding elements
if (haplotypeMap == null || haplotypeMap.isEmpty()) if (haplotypeMap == null || haplotypeMap.isEmpty())
return null; return null;
// start making the VariantContext // start making the VariantContext
// For all non-snp VC types, VC end location is just startLocation + length of ref allele including padding base. // For all non-snp VC types, VC end location is just startLocation + length of ref allele including padding base.
final int endLoc = loc.getStart() + alleleList.get(0).length() - 1;
final int endLoc = computeEndLocation(alleleList, loc,allelesArePadded);
final int eventLength = getEventLength(alleleList); final int eventLength = getEventLength(alleleList);
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList).referenceBaseForIndel(ref.getBase()); final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList);
// create the genotypes; no-call everyone for now // create the genotypes; no-call everyone for now
GenotypesContext genotypes = GenotypesContext.create(); GenotypesContext genotypes = GenotypesContext.create();
@ -160,15 +154,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
return indelLikelihoodMap.get(); return indelLikelihoodMap.get();
} }
public static int computeEndLocation(final List<Allele> alleles, final GenomeLoc loc, final boolean allelesArePadded) {
Allele refAllele = alleles.get(0);
int endLoc = loc.getStart() + refAllele.length()-1;
if (allelesArePadded)
endLoc++;
return endLoc;
}
public static void getHaplotypeMapFromAlleles(final List<Allele> alleleList, public static void getHaplotypeMapFromAlleles(final List<Allele> alleleList,
final ReferenceContext ref, final ReferenceContext ref,
final GenomeLoc loc, final GenomeLoc loc,
@ -213,16 +198,15 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
} }
public static Pair<List<Allele>,Boolean> getInitialAlleleList(final RefMetaDataTracker tracker, public static List<Allele> getInitialAlleleList(final RefMetaDataTracker tracker,
final ReferenceContext ref, final ReferenceContext ref,
final Map<String, AlignmentContext> contexts, final Map<String, AlignmentContext> contexts,
final AlignmentContextUtils.ReadOrientation contextType, final AlignmentContextUtils.ReadOrientation contextType,
final GenomeLocParser locParser, final GenomeLocParser locParser,
final UnifiedArgumentCollection UAC, final UnifiedArgumentCollection UAC,
final boolean ignoreSNPAllelesWhenGenotypingIndels) { final boolean ignoreSNPAllelesWhenGenotypingIndels) {
List<Allele> alleles = new ArrayList<Allele>(); List<Allele> alleles = new ArrayList<Allele>();
boolean allelesArePadded = true;
if (UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) { if (UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
VariantContext vc = null; VariantContext vc = null;
for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) { for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) {
@ -235,7 +219,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
} }
// ignore places where we don't have a variant // ignore places where we don't have a variant
if (vc == null) if (vc == null)
return new Pair<List<Allele>,Boolean>(alleles,false); return alleles;
if (ignoreSNPAllelesWhenGenotypingIndels) { if (ignoreSNPAllelesWhenGenotypingIndels) {
// if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore it and don't genotype it // if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore it and don't genotype it
@ -248,15 +232,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
} else { } else {
alleles.addAll(vc.getAlleles()); alleles.addAll(vc.getAlleles());
} }
if ( vc.getReference().getBases().length == vc.getEnd()-vc.getStart()+1)
allelesArePadded = false;
} else { } else {
alleles = IndelGenotypeLikelihoodsCalculationModel.computeConsensusAlleles(ref, contexts, contextType, locParser, UAC); alleles = computeConsensusAlleles(ref, contexts, contextType, locParser, UAC);
} }
return new Pair<List<Allele>,Boolean> (alleles,allelesArePadded); return alleles;
} }
// Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup, // Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup,

View File

@ -37,7 +37,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.codecs.vcf.VCFAlleleClipper;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
@ -283,7 +282,7 @@ public class UnifiedGenotyperEngine {
VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, rawContext.getLocation(), false, logger, UAC.alleles); VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, rawContext.getLocation(), false, logger, UAC.alleles);
if ( vcInput == null ) if ( vcInput == null )
return null; return null;
vc = new VariantContextBuilder("UG_call", ref.getLocus().getContig(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles()).referenceBaseForIndel(vcInput.getReferenceBaseForIndel()).make(); vc = new VariantContextBuilder("UG_call", ref.getLocus().getContig(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles()).make();
} else { } else {
// deal with bad/non-standard reference bases // deal with bad/non-standard reference bases
if ( !Allele.acceptableAlleleBases(new byte[]{ref.getBase()}) ) if ( !Allele.acceptableAlleleBases(new byte[]{ref.getBase()}) )
@ -408,11 +407,6 @@ public class UnifiedGenotyperEngine {
builder.log10PError(phredScaledConfidence/-10.0); builder.log10PError(phredScaledConfidence/-10.0);
if ( ! passesCallThreshold(phredScaledConfidence) ) if ( ! passesCallThreshold(phredScaledConfidence) )
builder.filters(filter); builder.filters(filter);
if ( limitedContext ) {
builder.referenceBaseForIndel(vc.getReferenceBaseForIndel());
} else {
builder.referenceBaseForIndel(refContext.getBase());
}
// create the genotypes // create the genotypes
final GenotypesContext genotypes = afcm.get().subsetAlleles(vc, myAlleles, true,ploidy); final GenotypesContext genotypes = afcm.get().subsetAlleles(vc, myAlleles, true,ploidy);
@ -493,8 +487,8 @@ public class UnifiedGenotyperEngine {
// if we are subsetting alleles (either because there were too many or because some were not polymorphic) // if we are subsetting alleles (either because there were too many or because some were not polymorphic)
// then we may need to trim the alleles (because the original VariantContext may have had to pad at the end). // then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // TODO - this function doesn't work with mixed records or records that started as mixed and then became non-mixed if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext )
vcCall = VCFAlleleClipper.reverseTrimAlleles(vcCall); vcCall = VariantContextUtils.reverseTrimAlleles(vcCall);
if ( annotationEngine != null && !limitedContext ) { if ( annotationEngine != null && !limitedContext ) {
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations

View File

@ -872,7 +872,13 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
for ( VariantContext knownIndel : knownIndelsToTry ) { for ( VariantContext knownIndel : knownIndelsToTry ) {
if ( knownIndel == null || !knownIndel.isIndel() || knownIndel.isComplexIndel() ) if ( knownIndel == null || !knownIndel.isIndel() || knownIndel.isComplexIndel() )
continue; continue;
byte[] indelStr = knownIndel.isSimpleInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length()); final byte[] indelStr;
if ( knownIndel.isSimpleInsertion() ) {
final byte[] fullAllele = knownIndel.getAlternateAllele(0).getBases();
indelStr = Arrays.copyOfRange(fullAllele, 1, fullAllele.length); // remove ref padding
} else {
indelStr = Utils.dupBytes((byte)'-', knownIndel.getReference().length() - 1);
}
int start = knownIndel.getStart() - leftmostIndex + 1; int start = knownIndel.getStart() - leftmostIndex + 1;
Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel); Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel);
if ( c != null ) if ( c != null )

View File

@ -1131,12 +1131,13 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
List<Allele> alleles = new ArrayList<Allele>(2); // actual observed (distinct!) alleles at the site List<Allele> alleles = new ArrayList<Allele>(2); // actual observed (distinct!) alleles at the site
List<Allele> homref_alleles = null; // when needed, will contain two identical copies of ref allele - needed to generate hom-ref genotype List<Allele> homref_alleles = null; // when needed, will contain two identical copies of ref allele - needed to generate hom-ref genotype
final byte referencePaddingBase = refBases[(int)start-1];
if ( call.getVariant() == null ) { if ( call.getVariant() == null ) {
// we will need to cteate genotype with two (hom) ref alleles (below). // we will need to create genotype with two (hom) ref alleles (below).
// we can not use 'alleles' list here, since that list is supposed to contain // we can not use 'alleles' list here, since that list is supposed to contain
// only *distinct* alleles observed at the site or VCFContext will frown upon us... // only *distinct* alleles observed at the site or VCFContext will frown upon us...
alleles.add( Allele.create(refBases[(int)start-1],true) ); alleles.add( Allele.create(referencePaddingBase,true) );
homref_alleles = new ArrayList<Allele>(2); homref_alleles = new ArrayList<Allele>(2);
homref_alleles.add( alleles.get(0)); homref_alleles.add( alleles.get(0));
homref_alleles.add( alleles.get(0)); homref_alleles.add( alleles.get(0));
@ -1145,7 +1146,7 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
// (Genotype will tell us whether it is an actual call or not!) // (Genotype will tell us whether it is an actual call or not!)
int event_length = call.getVariant().lengthOnRef(); int event_length = call.getVariant().lengthOnRef();
if ( event_length < 0 ) event_length = 0; if ( event_length < 0 ) event_length = 0;
fillAlleleList(alleles,call); fillAlleleList(alleles,call,referencePaddingBase);
stop += event_length; stop += event_length;
} }
@ -1165,7 +1166,7 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
filters.add("NoCall"); filters.add("NoCall");
} }
VariantContext vc = new VariantContextBuilder("IGv2_Indel_call", refName, start, stop, alleles) VariantContext vc = new VariantContextBuilder("IGv2_Indel_call", refName, start, stop, alleles)
.genotypes(genotypes).filters(filters).referenceBaseForIndel(refBases[(int)start-1]).make(); .genotypes(genotypes).filters(filters).make();
vcf.add(vc); vcf.add(vc);
} }
@ -1175,16 +1176,16 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
* @param l * @param l
* @param call * @param call
*/ */
private void fillAlleleList(List<Allele> l, IndelPrecall call) { private void fillAlleleList(List<Allele> l, IndelPrecall call, byte referencePaddingBase) {
int event_length = call.getVariant().lengthOnRef(); int event_length = call.getVariant().lengthOnRef();
if ( event_length == 0 ) { // insertion if ( event_length == 0 ) { // insertion
l.add( Allele.create(Allele.NULL_ALLELE_STRING,true) ); l.add( Allele.create(referencePaddingBase,true) );
l.add( Allele.create(call.getVariant().getBases(), false )); l.add( Allele.create(referencePaddingBase + call.getVariant().getBases(), false ));
} else { //deletion: } else { //deletion:
l.add( Allele.create(call.getVariant().getBases(), true )); l.add( Allele.create(referencePaddingBase + call.getVariant().getBases(), true ));
l.add( Allele.create(Allele.NULL_ALLELE_STRING,false) ); l.add( Allele.create(referencePaddingBase,false) );
} }
} }
@ -1218,19 +1219,20 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
// } // }
boolean homRefT = ( tCall.getVariant() == null ); boolean homRefT = ( tCall.getVariant() == null );
boolean homRefN = ( nCall.getVariant() == null ); boolean homRefN = ( nCall.getVariant() == null );
final byte referencePaddingBase = refBases[(int)start-1];
if ( tCall.getVariant() == null && nCall.getVariant() == null) { if ( tCall.getVariant() == null && nCall.getVariant() == null) {
// no indel at all ; create base-representation ref/ref alleles for genotype construction // no indel at all ; create base-representation ref/ref alleles for genotype construction
alleles.add( Allele.create(refBases[(int)start-1],true) ); alleles.add( Allele.create(referencePaddingBase,true) );
} else { } else {
// we got indel(s) // we got indel(s)
int event_length = 0; int event_length = 0;
if ( tCall.getVariant() != null ) { if ( tCall.getVariant() != null ) {
// indel in tumor // indel in tumor
event_length = tCall.getVariant().lengthOnRef(); event_length = tCall.getVariant().lengthOnRef();
fillAlleleList(alleles, tCall); fillAlleleList(alleles, tCall, referencePaddingBase);
} else { } else {
event_length = nCall.getVariant().lengthOnRef(); event_length = nCall.getVariant().lengthOnRef();
fillAlleleList(alleles, nCall); fillAlleleList(alleles, nCall, referencePaddingBase);
} }
if ( event_length > 0 ) stop += event_length; if ( event_length > 0 ) stop += event_length;
} }
@ -1262,7 +1264,7 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
} }
VariantContext vc = new VariantContextBuilder("IGv2_Indel_call", refName, start, stop, alleles) VariantContext vc = new VariantContextBuilder("IGv2_Indel_call", refName, start, stop, alleles)
.genotypes(genotypes).filters(filters).attributes(attrs).referenceBaseForIndel(refBases[(int)start-1]).make(); .genotypes(genotypes).filters(filters).attributes(attrs).make();
vcf.add(vc); vcf.add(vc);
} }

View File

@ -32,6 +32,7 @@ import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -42,6 +43,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.*; import java.util.*;
@ -70,6 +72,7 @@ import java.util.*;
* </pre> * </pre>
* *
*/ */
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class CountRODs extends RodWalker<CountRODs.Datum, Pair<ExpandingArrayList<Long>, Long>> implements TreeReducible<Pair<ExpandingArrayList<Long>, Long>> { public class CountRODs extends RodWalker<CountRODs.Datum, Pair<ExpandingArrayList<Long>, Long>> implements TreeReducible<Pair<ExpandingArrayList<Long>, Long>> {
@Output @Output
public PrintStream out; public PrintStream out;

View File

@ -25,6 +25,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.File; import java.io.File;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
@ -262,20 +263,33 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
sequenceInvalid = true; sequenceInvalid = true;
invReason.add("SITE_IS_FILTERED"); invReason.add("SITE_IS_FILTERED");
} }
String refString = validate.getReference().getDisplayString();
String altString = validate.getAlternateAllele(0).getDisplayString();
if ( validate.isIndel() ) { if ( validate.isIndel() ) {
sequence.append(Character.toUpperCase((char)ref.getBase())); sequence.append(Character.toUpperCase((char)ref.getBase()));
rawSequence.append(Character.toUpperCase((char)ref.getBase())); rawSequence.append(Character.toUpperCase((char)ref.getBase()));
final byte[] refAllele = validate.getReference().getBases();
refString = new String(Arrays.copyOfRange(refAllele, 1, refAllele.length));
if ( refString.isEmpty() )
refString = "-";
final byte[] altAllele = validate.getAlternateAllele(0).getBases();
altString = new String(Arrays.copyOfRange(altAllele, 1, altAllele.length));
if ( altString.isEmpty() )
altString = "-";
} }
sequence.append('['); sequence.append('[');
sequence.append(validate.getAlternateAllele(0).toString()); sequence.append(altString);
sequence.append('/'); sequence.append('/');
sequence.append(validate.getReference().toString()); sequence.append(refString);
sequence.append(']'); sequence.append(']');
// do this to the raw sequence to -- the indeces will line up that way // do this to the raw sequence to -- the indeces will line up that way
rawSequence.append('['); rawSequence.append('[');
rawSequence.append(validate.getAlternateAllele(0).getBaseString()); rawSequence.append(altString);
rawSequence.append('/'); rawSequence.append('/');
rawSequence.append(validate.getReference().getBaseString()); rawSequence.append(refString);
rawSequence.append(']'); rawSequence.append(']');
allelePos = ref.getLocus(); allelePos = ref.getLocus();
if ( indelCounter > 0 ) { if ( indelCounter > 0 ) {

View File

@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -40,14 +39,11 @@ public class GenomeEvent implements Comparable {
final protected GenomeLoc loc; final protected GenomeLoc loc;
/** A set of the alleles segregating in this context */ /** A set of the alleles segregating in this context */
final protected List<Allele> alleles; final protected List<Allele> alleles;
final protected Byte refBase;
// final protected HashMap<String, Object> attributes; // final protected HashMap<String, Object> attributes;
public GenomeEvent(GenomeLocParser parser, final String contig, final int start, final int stop, final List<Allele> alleles, HashMap<String, Object> attributes, public GenomeEvent(GenomeLocParser parser, final String contig, final int start, final int stop, final List<Allele> alleles, HashMap<String, Object> attributes) {
byte base) {
this.loc = parser.createGenomeLoc(contig, start, stop); this.loc = parser.createGenomeLoc(contig, start, stop);
this.alleles = alleles; this.alleles = alleles;
this.refBase = base;
// this.attributes = attributes; // this.attributes = attributes;
} }
@ -68,7 +64,7 @@ public class GenomeEvent implements Comparable {
public VariantContext createVariantContextFromEvent() { public VariantContext createVariantContextFromEvent() {
return new VariantContextBuilder("event", loc.getContig(), loc.getStart(), loc.getStop(), alleles) return new VariantContextBuilder("event", loc.getContig(), loc.getStart(), loc.getStop(), alleles)
.log10PError(0.0).referenceBaseForIndel(refBase).make(); .log10PError(0.0).make();
} }
} }

View File

@ -115,7 +115,7 @@ public class KeepAFSpectrumFrequencySelector extends FrequencyModeSelector {
// create bare-bones event and log in corresponding bin // create bare-bones event and log in corresponding bin
// attributes contains AC,AF,AN pulled from original vc, and we keep them here and log in output file for bookkeeping purposes // attributes contains AC,AF,AN pulled from original vc, and we keep them here and log in output file for bookkeeping purposes
GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes, vc.getReferenceBaseForIndel()); GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes);
binnedEventArray[binIndex].add(event); binnedEventArray[binIndex].add(event);

View File

@ -65,7 +65,7 @@ public class UniformSamplingFrequencySelector extends FrequencyModeSelector {
} }
// create bare-bones event and log in corresponding bin // create bare-bones event and log in corresponding bin
// attributes contains AC,AF,AN pulled from original vc, and we keep them here and log in output file for bookkeeping purposes // attributes contains AC,AF,AN pulled from original vc, and we keep them here and log in output file for bookkeeping purposes
GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes, vc.getReferenceBaseForIndel()); GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes);
binnedEventArray.add(event); binnedEventArray.add(event);
} }

View File

@ -56,7 +56,7 @@ public class ThetaVariantEvaluator extends VariantEvaluator {
//increment stats for pairwise mismatches //increment stats for pairwise mismatches
for (Allele allele : genotype.getAlleles()) { for (Allele allele : genotype.getAlleles()) {
if (allele.isNonNull() && allele.isCalled()) { if (allele.isCalled()) {
String alleleString = allele.toString(); String alleleString = allele.toString();
alleleCounts.putIfAbsent(alleleString, 0); alleleCounts.putIfAbsent(alleleString, 0);
alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1); alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);

View File

@ -139,11 +139,11 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
final byte[] refSeq = ref.getBases(); final byte[] refSeq = ref.getBases();
// get the indel length // get the indel length
int indelLength; final int indelLength;
if ( vc.isSimpleDeletion() ) if ( vc.isSimpleDeletion() )
indelLength = vc.getReference().length(); indelLength = vc.getReference().length() - 1;
else else
indelLength = vc.getAlternateAllele(0).length(); indelLength = vc.getAlternateAllele(0).length() - 1;
if ( indelLength > 200 ) { if ( indelLength > 200 ) {
writer.add(vc); writer.add(vc);
@ -151,7 +151,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
} }
// create an indel haplotype // create an indel haplotype
int originalIndex = ref.getLocus().getStart() - ref.getWindow().getStart() + 1; final int originalIndex = ref.getLocus().getStart() - ref.getWindow().getStart() + 1;
final byte[] originalIndel = makeHaplotype(vc, refSeq, originalIndex, indelLength); final byte[] originalIndel = makeHaplotype(vc, refSeq, originalIndex, indelLength);
// create a CIGAR string to represent the event // create a CIGAR string to represent the event
@ -170,11 +170,12 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
VariantContext newVC = new VariantContextBuilder(vc).start(vc.getStart()-difference).stop(vc.getEnd()-difference).make(); VariantContext newVC = new VariantContextBuilder(vc).start(vc.getStart()-difference).stop(vc.getEnd()-difference).make();
//System.out.println("Moving record from " + vc.getChr()+":"+vc.getStart() + " to " + vc.getChr()+":"+(vc.getStart()-difference)); //System.out.println("Moving record from " + vc.getChr()+":"+vc.getStart() + " to " + vc.getChr()+":"+(vc.getStart()-difference));
int indelIndex = originalIndex-difference; final int indelIndex = originalIndex-difference;
byte[] newBases = new byte[indelLength]; final byte[] newBases = new byte[indelLength + 1];
System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 0, indelLength); newBases[0] = refSeq[indelIndex-1];
Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion()); System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 1, indelLength);
newVC = updateAllele(newVC, newAllele, refSeq[indelIndex-1]); final Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion());
newVC = updateAllele(newVC, newAllele);
writer.add(newVC); writer.add(newVC);
return 1; return 1;
@ -195,7 +196,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
if ( vc.isSimpleDeletion() ) { if ( vc.isSimpleDeletion() ) {
indexOfRef += indelLength; indexOfRef += indelLength;
} else { } else {
System.arraycopy(vc.getAlternateAllele(0).getBases(), 0, hap, currentPos, indelLength); System.arraycopy(vc.getAlternateAllele(0).getBases(), 1, hap, currentPos, indelLength);
currentPos += indelLength; currentPos += indelLength;
} }
@ -205,14 +206,14 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
return hap; return hap;
} }
public static VariantContext updateAllele(VariantContext vc, Allele newAllele, Byte refBaseForIndel) { public static VariantContext updateAllele(final VariantContext vc, final Allele newAllele) {
// create a mapping from original allele to new allele // create a mapping from original allele to new allele
HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size()); HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
if ( newAllele.isReference() ) { if ( newAllele.isReference() ) {
alleleMap.put(vc.getReference(), newAllele); alleleMap.put(vc.getReference(), newAllele);
alleleMap.put(vc.getAlternateAllele(0), vc.getAlternateAllele(0)); alleleMap.put(vc.getAlternateAllele(0), Allele.create(newAllele.getBases()[0], false));
} else { } else {
alleleMap.put(vc.getReference(), vc.getReference()); alleleMap.put(vc.getReference(), Allele.create(newAllele.getBases()[0], true));
alleleMap.put(vc.getAlternateAllele(0), newAllele); alleleMap.put(vc.getAlternateAllele(0), newAllele);
} }
@ -229,6 +230,6 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
newGenotypes.add(new GenotypeBuilder(genotype).alleles(newAlleles).make()); newGenotypes.add(new GenotypeBuilder(genotype).alleles(newAlleles).make());
} }
return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).referenceBaseForIndel(refBaseForIndel).make(); return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make();
} }
} }

View File

@ -119,7 +119,6 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
if ( toInterval != null ) { if ( toInterval != null ) {
// check whether the strand flips, and if so reverse complement everything // check whether the strand flips, and if so reverse complement everything
// TODO -- make this work for indels (difficult because the 'previous base' context needed will be changing based on indel type/size)
if ( fromInterval.isPositiveStrand() != toInterval.isPositiveStrand() && vc.isPointEvent() ) { if ( fromInterval.isPositiveStrand() != toInterval.isPositiveStrand() && vc.isPointEvent() ) {
vc = VariantContextUtils.reverseComplement(vc); vc = VariantContextUtils.reverseComplement(vc);
} }
@ -132,11 +131,10 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
.attribute("OriginalStart", fromInterval.getStart()).make(); .attribute("OriginalStart", fromInterval.getStart()).make();
} }
VariantContext newVC = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc); if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(vc) ) {
if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) {
logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s", logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s",
originalVC.getChr(), originalVC.getStart(), newVC.getChr(), newVC.getStart(), originalVC.getChr(), originalVC.getStart(), vc.getChr(), vc.getStart(),
originalVC.getReference(), originalVC.getAlternateAllele(0), newVC.getReference(), newVC.getAlternateAllele(0))); originalVC.getReference(), originalVC.getAlternateAllele(0), vc.getReference(), vc.getAlternateAllele(0)));
} }
writer.add(vc); writer.add(vc);

View File

@ -130,35 +130,16 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
return; return;
// get the true reference allele // get the true reference allele
Allele reportedRefAllele = vc.getReference(); final Allele reportedRefAllele = vc.getReference();
Allele observedRefAllele = null; final int refLength = reportedRefAllele.length();
// insertions if ( refLength > 100 ) {
if ( vc.isSimpleInsertion() ) { logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", refLength, vc.getChr(), vc.getStart()));
observedRefAllele = Allele.create(Allele.NULL_ALLELE_STRING); return;
} }
// deletions
else if ( vc.isSimpleDeletion() || vc.isMNP() ) {
// we can't validate arbitrarily long deletions
if ( reportedRefAllele.length() > 100 ) {
logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", reportedRefAllele.length(), vc.getChr(), vc.getStart()));
return;
}
// deletions are associated with the (position of) the last (preceding) non-deleted base; final byte[] observedRefBases = new byte[refLength];
// hence to get actually deleted bases we need offset = 1 System.arraycopy(ref.getBases(), 0, observedRefBases, 0, refLength);
int offset = vc.isMNP() ? 0 : 1; final Allele observedRefAllele = Allele.create(observedRefBases);
byte[] refBytes = ref.getBases();
byte[] trueRef = new byte[reportedRefAllele.length()];
for (int i = 0; i < reportedRefAllele.length(); i++)
trueRef[i] = refBytes[i+offset];
observedRefAllele = Allele.create(trueRef, true);
}
// SNPs, etc. but not mixed types because they are too difficult
else if ( !vc.isMixed() ) {
byte[] refByte = new byte[1];
refByte[0] = ref.getBase();
observedRefAllele = Allele.create(refByte, true);
}
// get the RS IDs // get the RS IDs
Set<String> rsIDs = null; Set<String> rsIDs = null;
@ -171,10 +152,10 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
try { try {
switch( type ) { switch( type ) {
case ALL: case ALL:
vc.extraStrictValidation(observedRefAllele, ref.getBase(), rsIDs); vc.extraStrictValidation(reportedRefAllele, observedRefAllele, rsIDs);
break; break;
case REF: case REF:
vc.validateReferenceBases(observedRefAllele, ref.getBase()); vc.validateReferenceBases(reportedRefAllele, observedRefAllele);
break; break;
case IDS: case IDS:
vc.validateRSIDs(rsIDs); vc.validateRSIDs(rsIDs);

View File

@ -381,7 +381,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
getters.put("REF", new Getter() { getters.put("REF", new Getter() {
public String get(VariantContext vc) { public String get(VariantContext vc) {
StringBuilder x = new StringBuilder(); StringBuilder x = new StringBuilder();
x.append(vc.getAlleleStringWithRefPadding(vc.getReference())); x.append(vc.getReference().getDisplayString());
return x.toString(); return x.toString();
} }
}); });
@ -393,7 +393,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
for ( int i = 0; i < n; i++ ) { for ( int i = 0; i < n; i++ ) {
if ( i != 0 ) x.append(","); if ( i != 0 ) x.append(",");
x.append(vc.getAlleleStringWithRefPadding(vc.getAlternateAllele(i))); x.append(vc.getAlternateAllele(i));
} }
return x.toString(); return x.toString();
} }
@ -435,11 +435,8 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
private static Object splitAltAlleles(VariantContext vc) { private static Object splitAltAlleles(VariantContext vc) {
final int numAltAlleles = vc.getAlternateAlleles().size(); final int numAltAlleles = vc.getAlternateAlleles().size();
if ( numAltAlleles == 1 ) if ( numAltAlleles == 1 )
return vc.getAlleleStringWithRefPadding(vc.getAlternateAllele(0)); return vc.getAlternateAllele(0);
final List<String> alleles = new ArrayList<String>(numAltAlleles); return vc.getAlternateAlleles();
for ( Allele allele : vc.getAlternateAlleles() )
alleles.add(vc.getAlleleStringWithRefPadding(allele));
return alleles;
} }
} }

View File

@ -103,12 +103,6 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
@Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod", required=false) @Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod", required=false)
protected String sampleName = null; protected String sampleName = null;
/**
* This argument is useful for fixing input VCFs with bad reference bases (the output will be a fixed version of the VCF).
*/
@Argument(fullName="fixRef", shortName="fixRef", doc="Fix common reference base in case there's an indel without padding", required=false)
protected boolean fixReferenceBase = false;
private Set<String> allowedGenotypeFormatStrings = new HashSet<String>(); private Set<String> allowedGenotypeFormatStrings = new HashSet<String>();
private boolean wroteHeader = false; private boolean wroteHeader = false;
private Set<String> samples; private Set<String> samples;
@ -140,10 +134,6 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
builder.genotypes(g); builder.genotypes(g);
} }
if ( fixReferenceBase ) {
builder.referenceBaseForIndel(ref.getBase());
}
writeRecord(builder.make(), tracker, ref.getLocus()); writeRecord(builder.make(), tracker, ref.getLocus());
} }
@ -169,8 +159,8 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
continue; continue;
Map<String, Allele> alleleMap = new HashMap<String, Allele>(2); Map<String, Allele> alleleMap = new HashMap<String, Allele>(2);
alleleMap.put(RawHapMapFeature.DELETION, Allele.create(Allele.NULL_ALLELE_STRING, dbsnpVC.isSimpleInsertion())); alleleMap.put(RawHapMapFeature.DELETION, Allele.create(ref.getBase(), dbsnpVC.isSimpleInsertion()));
alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion())); alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(ref.getBase() + ((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion()));
hapmap.setActualAlleles(alleleMap); hapmap.setActualAlleles(alleleMap);
// also, use the correct positioning for insertions // also, use the correct positioning for insertions

View File

@ -431,6 +431,37 @@ public class BaseUtils {
return new String(simpleComplement(bases.getBytes())); return new String(simpleComplement(bases.getBytes()));
} }
/**
* Returns the uppercased version of the bases
*
* @param bases the bases
* @return the upper cased version
*/
static public byte[] convertToUpperCase(final byte[] bases) {
for ( int i = 0; i < bases.length; i++ ) {
if ( (char)bases[i] >= 'a' )
bases[i] = toUpperCaseBase(bases[i]);
}
return bases;
}
static public byte toUpperCaseBase(final byte base) {
switch (base) {
case 'a':
return 'A';
case 'c':
return 'C';
case 'g':
return 'G';
case 't':
return 'T';
case 'n':
return 'N';
default:
return base;
}
}
/** /**
* Returns the index of the most common base in the basecounts array. To be used with * Returns the index of the most common base in the basecounts array. To be used with
* pileup.getBaseCounts. * pileup.getBaseCounts.

View File

@ -176,7 +176,7 @@ public class Haplotype {
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii]; newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
} }
} else if( refAllele.length() < altAllele.length() ) { // insertion } else if( refAllele.length() < altAllele.length() ) { // insertion
final int altAlleleLength = altAllele.length(); final int altAlleleLength = altAllele.length() - 1;
newHaplotype = new byte[bases.length + altAlleleLength]; newHaplotype = new byte[bases.length + altAlleleLength];
for( int iii = 0; iii < bases.length; iii++ ) { for( int iii = 0; iii < bases.length; iii++ ) {
newHaplotype[iii] = bases[iii]; newHaplotype[iii] = bases[iii];
@ -185,15 +185,16 @@ public class Haplotype {
newHaplotype[iii] = newHaplotype[iii-altAlleleLength]; newHaplotype[iii] = newHaplotype[iii-altAlleleLength];
} }
for( int iii = 0; iii < altAlleleLength; iii++ ) { for( int iii = 0; iii < altAlleleLength; iii++ ) {
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii]; newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii+1];
} }
} else { // deletion } else { // deletion
final int shift = refAllele.length() - altAllele.length(); final int shift = refAllele.length() - altAllele.length();
final int altAlleleLength = altAllele.length() - 1;
newHaplotype = new byte[bases.length - shift]; newHaplotype = new byte[bases.length - shift];
for( int iii = 0; iii < haplotypeInsertLocation + altAllele.length(); iii++ ) { for( int iii = 0; iii < haplotypeInsertLocation + altAlleleLength; iii++ ) {
newHaplotype[iii] = bases[iii]; newHaplotype[iii] = bases[iii];
} }
for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length; iii++ ) { for( int iii = haplotypeInsertLocation + altAlleleLength; iii < newHaplotype.length; iii++ ) {
newHaplotype[iii] = bases[iii+shift]; newHaplotype[iii] = bases[iii+shift];
} }
} }
@ -204,8 +205,11 @@ public class Haplotype {
return new Haplotype(newHaplotype); return new Haplotype(newHaplotype);
} }
public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(List<Allele> alleleList, int startPos, ReferenceContext ref, public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(final List<Allele> alleleList,
final int haplotypeSize, final int numPrefBases) { final int startPos,
final ReferenceContext ref,
final int haplotypeSize,
final int numPrefBases) {
LinkedHashMap<Allele,Haplotype> haplotypeMap = new LinkedHashMap<Allele,Haplotype>(); LinkedHashMap<Allele,Haplotype> haplotypeMap = new LinkedHashMap<Allele,Haplotype>();
@ -216,7 +220,6 @@ public class Haplotype {
refAllele = a; refAllele = a;
break; break;
} }
} }
if (refAllele == null) if (refAllele == null)
@ -224,19 +227,12 @@ public class Haplotype {
byte[] refBases = ref.getBases(); byte[] refBases = ref.getBases();
final int startIdxInReference = 1 + startPos - numPrefBases - ref.getWindow().getStart();
final String basesBeforeVariant = new String(Arrays.copyOfRange(refBases, startIdxInReference, startIdxInReference + numPrefBases));
int startIdxInReference = (int)(1+startPos-numPrefBases-ref.getWindow().getStart());
//int numPrefBases = (int)(vc.getStart()-ref.getWindow().getStart()+1); // indel vc starts one before event
byte[] basesBeforeVariant = Arrays.copyOfRange(refBases,startIdxInReference,startIdxInReference+numPrefBases);
int startAfter = startIdxInReference+numPrefBases+ refAllele.getBases().length;
// protect against long events that overrun available reference context // protect against long events that overrun available reference context
if (startAfter > refBases.length) final int startAfter = Math.min(startIdxInReference + numPrefBases + refAllele.getBases().length - 1, refBases.length);
startAfter = refBases.length; final String basesAfterVariant = new String(Arrays.copyOfRange(refBases, startAfter, refBases.length));
byte[] basesAfterVariant = Arrays.copyOfRange(refBases,
startAfter, refBases.length);
// Create location for all haplotypes // Create location for all haplotypes
final int startLoc = ref.getWindow().getStart() + startIdxInReference; final int startLoc = ref.getWindow().getStart() + startIdxInReference;
@ -244,16 +240,14 @@ public class Haplotype {
final GenomeLoc locus = ref.getGenomeLocParser().createGenomeLoc(ref.getLocus().getContig(),startLoc,stopLoc); final GenomeLoc locus = ref.getGenomeLocParser().createGenomeLoc(ref.getLocus().getContig(),startLoc,stopLoc);
for (final Allele a : alleleList) { for (final Allele a : alleleList) {
byte[] alleleBases = a.getBases(); final byte[] alleleBases = a.getBases();
// use string concatenation // use string concatenation
String haplotypeString = new String(basesBeforeVariant) + new String(alleleBases) + new String(basesAfterVariant); String haplotypeString = basesBeforeVariant + new String(Arrays.copyOfRange(alleleBases, 1, alleleBases.length)) + basesAfterVariant;
haplotypeString = haplotypeString.substring(0,haplotypeSize); haplotypeString = haplotypeString.substring(0,haplotypeSize);
haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus)); haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus));
} }
return haplotypeMap; return haplotypeMap;

View File

@ -305,27 +305,6 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
builder.id(id); builder.id(id);
} }
/**
* Annoying routine that deals with allele clipping from the BCF2 encoding to the standard
* GATK encoding.
*
* @param position
* @param ref
* @param unclippedAlleles
* @return
*/
@Requires({"position > 0", "ref != null && ref.length() > 0", "! unclippedAlleles.isEmpty()"})
@Ensures("result.size() == unclippedAlleles.size()")
protected List<Allele> clipAllelesIfNecessary(final int position,
final String ref,
final List<Allele> unclippedAlleles) {
// the last argument of 1 allows us to safely ignore the end, because we are
// ultimately going to use the end in the record itself
final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(position, ref, unclippedAlleles, 1);
if ( clipped.getError() != null ) error(clipped.getError());
return clipped.getClippedAlleles();
}
/** /**
* Decode the alleles from this BCF2 file and put the results in builder * Decode the alleles from this BCF2 file and put the results in builder
* @param builder * @param builder
@ -353,11 +332,9 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
} }
assert ref != null; assert ref != null;
alleles = clipAllelesIfNecessary(pos, ref, alleles);
builder.alleles(alleles); builder.alleles(alleles);
assert ref.length() > 0; assert ref.length() > 0;
builder.referenceBaseForIndel(ref.getBytes()[0]);
return alleles; return alleles;
} }

View File

@ -256,9 +256,20 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
final Map<String, Object> attrs = parseInfo(parts[7]); final Map<String, Object> attrs = parseInfo(parts[7]);
builder.attributes(attrs); builder.attributes(attrs);
if ( attrs.containsKey(VCFConstants.END_KEY) ) {
// update stop with the end key if provided
try {
builder.stop(Integer.valueOf(attrs.get(VCFConstants.END_KEY).toString()));
} catch (Exception e) {
generateException("the END value in the INFO field is not valid");
}
} else {
builder.stop(pos + ref.length() - 1);
}
// get our alleles, filters, and setup an attribute map // get our alleles, filters, and setup an attribute map
final List<Allele> rawAlleles = parseAlleles(ref, alts, lineNo); final List<Allele> alleles = parseAlleles(ref, alts, lineNo);
final List<Allele> alleles = updateBuilderAllelesAndStop(builder, ref, pos, rawAlleles, attrs); builder.alleles(alleles);
// do we have genotyping data // do we have genotyping data
if (parts.length > NUM_STANDARD_FIELDS && includeGenotypes) { if (parts.length > NUM_STANDARD_FIELDS && includeGenotypes) {
@ -275,7 +286,6 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
VariantContext vc = null; VariantContext vc = null;
try { try {
builder.referenceBaseForIndel(ref.getBytes()[0]);
vc = builder.make(); vc = builder.make();
} catch (Exception e) { } catch (Exception e) {
generateException(e.getMessage()); generateException(e.getMessage());
@ -284,31 +294,6 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
return vc; return vc;
} }
private final List<Allele> updateBuilderAllelesAndStop(final VariantContextBuilder builder,
final String ref,
final int pos,
final List<Allele> rawAlleles,
final Map<String, Object> attrs) {
int endForSymbolicAlleles = pos; // by default we use the pos
if ( attrs.containsKey(VCFConstants.END_KEY) ) {
// update stop with the end key if provided
try {
endForSymbolicAlleles = Integer.valueOf(attrs.get(VCFConstants.END_KEY).toString());
} catch (Exception e) {
generateException("the END value in the INFO field is not valid");
}
}
// find out our current location, and clip the alleles down to their minimum length
final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(pos, ref, rawAlleles, endForSymbolicAlleles);
if ( clipped.getError() != null )
generateException(clipped.getError(), lineNo);
builder.stop(clipped.getStop());
builder.alleles(clipped.getClippedAlleles());
return clipped.getClippedAlleles();
}
/** /**
* get the name of this codec * get the name of this codec
* @return our set name * @return our set name

View File

@ -1,434 +0,0 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.codecs.vcf;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*;
/**
* All of the gross allele clipping and padding routines in one place
*
* Having attempted to understand / fix / document this code myself
* I can only conclude that this entire approach needs to be rethought. This
* code just doesn't work robustly with symbolic alleles, with multiple alleles,
* requires a special "reference base for indels" stored in the VariantContext
* whose correctness isn't enforced, and overall has strange special cases
* all over the place.
*
* The reason this code is so complex is due to symbolics and multi-alleleic
* variation, which frequently occur when combining variants from multiple
* VCF files.
*
* TODO rethink this class, make it clean, and make it easy to create, mix, and write out alleles
* TODO this code doesn't work with reverse clipped alleles (ATA / GTTA -> AT / GT)
*
* @author Mark DePristo
* @since 6/12
*/
public final class VCFAlleleClipper {
private VCFAlleleClipper() { }
/**
* Determine whether we should clip off the first base of all unclippped alleles or not
*
* Returns true if all of the alleles in unclippedAlleles share a common first base with
* ref0. Ref0 should be the first base of the reference allele UnclippedAlleles may
* contain the reference allele itself, or just the alternate alleles, it doesn't matter.
*
* The algorithm returns true if the first base should be clipped off, or false otherwise
*
* This algorithm works even in the presence of symbolic alleles, logically ignoring these
* values. It
*
* @param unclippedAlleles list of unclipped alleles to assay
* @param ref0 the first base of the reference allele
* @return true if we should clip the first base of unclippedAlleles
*/
@Requires("unclippedAlleles != null")
public static boolean shouldClipFirstBaseP(final List<Allele> unclippedAlleles,
final byte ref0) {
boolean allSymbolicAlt = true;
for ( final Allele a : unclippedAlleles ) {
if ( a.isSymbolic() ) {
continue;
}
// already know we aren't symbolic, so we only need to decide if we have only seen a ref
if ( ! a.isReference() )
allSymbolicAlt = false;
if ( a.length() < 1 || (a.getBases()[0] != ref0) ) {
return false;
}
}
// to reach here all alleles are consistent with clipping the first base matching ref0
// but we don't clip if all ALT alleles are symbolic
return ! allSymbolicAlt;
}
public static int computeReverseClipping(final List<Allele> unclippedAlleles,
final byte[] ref,
final int forwardClipping,
final boolean allowFullClip) {
int clipping = 0;
boolean stillClipping = true;
while ( stillClipping ) {
for ( final Allele a : unclippedAlleles ) {
if ( a.isSymbolic() )
continue;
// we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong
// position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine).
if ( a.length() - clipping == 0 )
return clipping - (allowFullClip ? 0 : 1);
if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) {
stillClipping = false;
}
else if ( ref.length == clipping ) {
if ( allowFullClip )
stillClipping = false;
else
return -1;
}
else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) {
stillClipping = false;
}
}
if ( stillClipping )
clipping++;
}
return clipping;
}
/**
* Are the alleles describing a polymorphism substitution one base for another?
*
* @param alleles a list of alleles, must not be empty
* @return Return true if the length of any allele in alleles isn't 1
*/
@Requires("!alleles.isEmpty()")
private static boolean isSingleNucleotideEvent(final List<Allele> alleles) {
for ( final Allele a : alleles ) {
if ( a.length() != 1 )
return false;
}
return true;
}
/**
* clip the alleles, based on the reference, returning a ClippedAlleles object describing what happened
*
* The ClippedAlleles object contains the implied stop position of the alleles, given the provided start
* position, after clipping. It also contains the list of alleles, in the same order as the provided
* unclipped ones, that are the fully clipped version of the input alleles. If an error occurs
* during this option the getError() function returns a string describing the problem (for use in parsers).
*
* The basic operation are:
*
* single allele
* => stop == start and clipped == unclipped
* any number of single nucleotide events
* => stop == start and clipped == unclipped
* two alleles, second being symbolic
* => stop == start and clipped == unclipped
* Note in this case that the STOP should be computed by other means (from END in VCF, for example)
* Note that if there's more than two alleles and the second is a symbolic the code produces an error
* Any other case:
* The alleles are trimmed of any sequence shared at the end of the alleles. If N bases
* are common then the alleles will all be at least N bases shorter.
* The stop position returned is the start position + the length of the
* reverse trimmed only reference allele - 1.
* If the alleles all share a single common starting sequence (just one base is considered)
* then the alleles have this leading common base removed as well.
*
* TODO This code is gross and brittle and needs to be rethought from scratch
*
* @param start the unadjusted start position (pre-clipping)
* @param ref the reference string
* @param unclippedAlleles the list of unclipped alleles, including the reference allele
* @return the new reference end position of this event
*/
@Requires({"start > 0", "ref != null && ref.length() > 0", "!unclippedAlleles.isEmpty()"})
@Ensures("result != null")
public static ClippedAlleles clipAlleles(final int start,
final String ref,
final List<Allele> unclippedAlleles,
final int endForSymbolicAllele ) {
// no variation or single nucleotide events are by definition fully clipped
if ( unclippedAlleles.size() == 1 || isSingleNucleotideEvent(unclippedAlleles) )
return new ClippedAlleles(start, unclippedAlleles, null);
// we've got to sort out the clipping by looking at the alleles themselves
final byte firstRefBase = (byte) ref.charAt(0);
final boolean firstBaseIsClipped = shouldClipFirstBaseP(unclippedAlleles, firstRefBase);
final int forwardClipping = firstBaseIsClipped ? 1 : 0;
final int reverseClipping = computeReverseClipping(unclippedAlleles, ref.getBytes(), forwardClipping, false);
final boolean needsClipping = forwardClipping > 0 || reverseClipping > 0;
if ( reverseClipping == -1 )
return new ClippedAlleles("computeReverseClipping failed due to bad alleles");
boolean sawSymbolic = false;
List<Allele> clippedAlleles;
if ( ! needsClipping ) {
// there's nothing to clip, so clippedAlleles are the original alleles
clippedAlleles = unclippedAlleles;
} else {
clippedAlleles = new ArrayList<Allele>(unclippedAlleles.size());
for ( final Allele a : unclippedAlleles ) {
if ( a.isSymbolic() ) {
sawSymbolic = true;
clippedAlleles.add(a);
} else {
final byte[] allele = Arrays.copyOfRange(a.getBases(), forwardClipping, a.getBases().length - reverseClipping);
if ( !Allele.acceptableAlleleBases(allele) )
return new ClippedAlleles("Unparsable vcf record with bad allele [" + allele + "]");
clippedAlleles.add(Allele.create(allele, a.isReference()));
}
}
}
int stop = VariantContextUtils.computeEndFromAlleles(clippedAlleles, start, endForSymbolicAllele);
// TODO
// TODO
// TODO COMPLETELY BROKEN CODE -- THE GATK CURRENTLY ENCODES THE STOP POSITION FOR CLIPPED ALLELES AS + 1
// TODO ITS TRUE SIZE TO DIFFERENTIATE CLIPPED VS. UNCLIPPED ALLELES. NEEDS TO BE FIXED
// TODO
// TODO
if ( needsClipping && ! sawSymbolic && ! clippedAlleles.get(0).isNull() ) stop++;
// TODO
// TODO
// TODO COMPLETELY BROKEN CODE -- THE GATK CURRENTLY ENCODES THE STOP POSITION FOR CLIPPED ALLELES AS + 1
// TODO ITS TRUE SIZE TO DIFFERENTIATE CLIPPED VS. UNCLIPPED ALLELES. NEEDS TO BE FIXED
// TODO
// TODO
final Byte refBaseForIndel = firstBaseIsClipped ? firstRefBase : null;
return new ClippedAlleles(stop, clippedAlleles, refBaseForIndel);
}
/**
* Returns true if the alleles in inputVC should have reference bases added for padding
*
* We need to pad a VC with a common base if the length of the reference allele is
* less than the length of the VariantContext. This happens because the position of
* e.g. an indel is always one before the actual event (as per VCF convention).
*
* @param inputVC the VC to evaluate, cannot be null
* @return true if
*/
public static boolean needsPadding(final VariantContext inputVC) {
// biallelic sites with only symbolic never need padding
if ( inputVC.isBiallelic() && inputVC.getAlternateAllele(0).isSymbolic() )
return false;
final int recordLength = inputVC.getEnd() - inputVC.getStart() + 1;
final int referenceLength = inputVC.getReference().length();
if ( referenceLength == recordLength )
return false;
else if ( referenceLength == recordLength - 1 )
return true;
else if ( !inputVC.hasSymbolicAlleles() )
throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) +
" in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size");
else if ( inputVC.isMixed() && inputVC.hasSymbolicAlleles() )
throw new IllegalArgumentException("GATK infrastructure limitation prevents needsPadding from working properly with VariantContexts containing a mixture of symbolic and concrete alleles at " + inputVC);
return false;
}
public static Allele padAllele(final VariantContext vc, final Allele allele) {
assert needsPadding(vc);
if ( allele.isSymbolic() )
return allele;
else {
// get bases for current allele and create a new one with trimmed bases
final StringBuilder sb = new StringBuilder();
sb.append((char)vc.getReferenceBaseForIndel().byteValue());
sb.append(allele.getDisplayString());
final String newBases = sb.toString();
return Allele.create(newBases, allele.isReference());
}
}
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC) {
final boolean padVC = needsPadding(inputVC);
// nothing to do if we don't need to pad bases
if ( padVC ) {
if ( !inputVC.hasReferenceBaseForIndel() )
throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available.");
final ArrayList<Allele> alleles = new ArrayList<Allele>(inputVC.getNAlleles());
final Map<Allele, Allele> unpaddedToPadded = inputVC.hasGenotypes() ? new HashMap<Allele, Allele>(inputVC.getNAlleles()) : null;
boolean paddedAtLeastOne = false;
for (final Allele a : inputVC.getAlleles()) {
final Allele padded = padAllele(inputVC, a);
paddedAtLeastOne = paddedAtLeastOne || padded != a;
alleles.add(padded);
if ( unpaddedToPadded != null ) unpaddedToPadded.put(a, padded); // conditional to avoid making unnecessary make
}
if ( ! paddedAtLeastOne )
throw new ReviewedStingException("VC was supposed to need padding but no allele was actually changed at location " + inputVC.getChr() + ":" + inputVC.getStart() + " with allele " + inputVC.getAlleles());
final VariantContextBuilder vcb = new VariantContextBuilder(inputVC);
vcb.alleles(alleles);
// the position of the inputVC is one further, if it doesn't contain symbolic alleles
vcb.computeEndFromAlleles(alleles, inputVC.getStart(), inputVC.getEnd());
if ( inputVC.hasGenotypes() ) {
assert unpaddedToPadded != null;
// now we can recreate new genotypes with trimmed alleles
final GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples());
for (final Genotype g : inputVC.getGenotypes() ) {
final List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size());
for (final Allele a : g.getAlleles()) {
newGenotypeAlleles.add( a.isCalled() ? unpaddedToPadded.get(a) : Allele.NO_CALL);
}
genotypes.add(new GenotypeBuilder(g).alleles(newGenotypeAlleles).make());
}
vcb.genotypes(genotypes);
}
return vcb.make();
}
else
return inputVC;
}
public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {
// see if we need to trim common reference base from all alleles
final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, true);
if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
return inputVC;
final List<Allele> alleles = new ArrayList<Allele>();
final GenotypesContext genotypes = GenotypesContext.create();
final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
for (final Allele a : inputVC.getAlleles()) {
if (a.isSymbolic()) {
alleles.add(a);
originalToTrimmedAlleleMap.put(a, a);
} else {
// get bases for current allele and create a new one with trimmed bases
final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
final Allele trimmedAllele = Allele.create(newBases, a.isReference());
alleles.add(trimmedAllele);
originalToTrimmedAlleleMap.put(a, trimmedAllele);
}
}
// now we can recreate new genotypes with trimmed alleles
for ( final Genotype genotype : inputVC.getGenotypes() ) {
final List<Allele> originalAlleles = genotype.getAlleles();
final List<Allele> trimmedAlleles = new ArrayList<Allele>();
for ( final Allele a : originalAlleles ) {
if ( a.isCalled() )
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
else
trimmedAlleles.add(Allele.NO_CALL);
}
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
}
return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() + (inputVC.isMixed() ? -1 : 0)).alleles(alleles).genotypes(genotypes).make();
}
@Invariant("stop != -1 || error != null") // we're either an error or a meaningful result but not both
public static class ClippedAlleles {
private final int stop;
private final List<Allele> clippedAlleles;
private final Byte refBaseForIndel;
private final String error;
@Requires({"stop > 0", "clippedAlleles != null"})
private ClippedAlleles(final int stop, final List<Allele> clippedAlleles, final Byte refBaseForIndel) {
this.stop = stop;
this.clippedAlleles = clippedAlleles;
this.error = null;
this.refBaseForIndel = refBaseForIndel;
}
@Requires("error != null")
private ClippedAlleles(final String error) {
this.stop = -1;
this.clippedAlleles = null;
this.refBaseForIndel = null;
this.error = error;
}
/**
* Get an error if it occurred
* @return the error message, or null if no error occurred
*/
public String getError() {
return error;
}
/**
* Get the stop position to use after the clipping as been applied, given the
* provided position to clipAlleles
* @return
*/
public int getStop() {
return stop;
}
/**
* Get the clipped alleles themselves
* @return the clipped alleles in the order of the input unclipped alleles
*/
public List<Allele> getClippedAlleles() {
return clippedAlleles;
}
/**
* Returns the reference base we should use for indels, or null if none is appropriate
* @return
*/
public Byte getRefBaseForIndel() {
return refBaseForIndel;
}
}
}

View File

@ -1,9 +1,9 @@
package org.broadinstitute.sting.utils.variantcontext; package org.broadinstitute.sting.utils.variantcontext;
import java.util.ArrayList; import org.broadinstitute.sting.utils.BaseUtils;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.List;
/** /**
* Immutable representation of an allele * Immutable representation of an allele
@ -77,32 +77,36 @@ public class Allele implements Comparable<Allele> {
private static final byte[] EMPTY_ALLELE_BASES = new byte[0]; private static final byte[] EMPTY_ALLELE_BASES = new byte[0];
private boolean isRef = false; private boolean isRef = false;
private boolean isNull = false;
private boolean isNoCall = false; private boolean isNoCall = false;
private boolean isSymbolic = false; private boolean isSymbolic = false;
private byte[] bases = null; private byte[] bases = null;
public final static String NULL_ALLELE_STRING = "-";
public final static String NO_CALL_STRING = "."; public final static String NO_CALL_STRING = ".";
/** A generic static NO_CALL allele for use */ /** A generic static NO_CALL allele for use */
// no public way to create an allele // no public way to create an allele
private Allele(byte[] bases, boolean isRef) { private Allele(byte[] bases, boolean isRef) {
// standardize our representation of null allele and bases // null alleles are no longer allowed
if ( wouldBeNullAllele(bases) ) { if ( wouldBeNullAllele(bases) ) {
bases = EMPTY_ALLELE_BASES; throw new IllegalArgumentException("Null alleles are not supported");
isNull = true; }
} else if ( wouldBeNoCallAllele(bases) ) {
bases = EMPTY_ALLELE_BASES; // no-calls are represented as no bases
if ( wouldBeNoCallAllele(bases) ) {
this.bases = EMPTY_ALLELE_BASES;
isNoCall = true; isNoCall = true;
if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele"); if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele");
} else if ( wouldBeSymbolicAllele(bases) ) { return;
}
if ( wouldBeSymbolicAllele(bases) ) {
isSymbolic = true; isSymbolic = true;
if ( isRef ) throw new IllegalArgumentException("Cannot tag a symbolic allele as the reference allele"); if ( isRef ) throw new IllegalArgumentException("Cannot tag a symbolic allele as the reference allele");
} }
// else else {
// bases = new String(bases).toUpperCase().getBytes(); // todo -- slow performance bases = BaseUtils.convertToUpperCase(bases);
}
this.isRef = isRef; this.isRef = isRef;
this.bases = bases; this.bases = bases;
@ -126,8 +130,6 @@ public class Allele implements Comparable<Allele> {
private final static Allele ALT_T = new Allele("T", false); private final static Allele ALT_T = new Allele("T", false);
private final static Allele REF_N = new Allele("N", true); private final static Allele REF_N = new Allele("N", true);
private final static Allele ALT_N = new Allele("N", false); private final static Allele ALT_N = new Allele("N", false);
private final static Allele REF_NULL = new Allele(NULL_ALLELE_STRING, true);
private final static Allele ALT_NULL = new Allele(NULL_ALLELE_STRING, false);
public final static Allele NO_CALL = new Allele(NO_CALL_STRING, false); public final static Allele NO_CALL = new Allele(NO_CALL_STRING, false);
// --------------------------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------------------------
@ -154,7 +156,6 @@ public class Allele implements Comparable<Allele> {
case '.': case '.':
if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele"); if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele");
return NO_CALL; return NO_CALL;
case '-': return isRef ? REF_NULL : ALT_NULL;
case 'A': case 'a' : return isRef ? REF_A : ALT_A; case 'A': case 'a' : return isRef ? REF_A : ALT_A;
case 'C': case 'c' : return isRef ? REF_C : ALT_C; case 'C': case 'c' : return isRef ? REF_C : ALT_C;
case 'G': case 'g' : return isRef ? REF_G : ALT_G; case 'G': case 'g' : return isRef ? REF_G : ALT_G;
@ -179,14 +180,9 @@ public class Allele implements Comparable<Allele> {
public static Allele extend(Allele left, byte[] right) { public static Allele extend(Allele left, byte[] right) {
if (left.isSymbolic()) if (left.isSymbolic())
throw new IllegalArgumentException("Cannot extend a symbolic allele"); throw new IllegalArgumentException("Cannot extend a symbolic allele");
byte[] bases = null; byte[] bases = new byte[left.length() + right.length];
if ( left.length() == 0 ) System.arraycopy(left.getBases(), 0, bases, 0, left.length());
bases = right; System.arraycopy(right, 0, bases, left.length(), right.length);
else {
bases = new byte[left.length() + right.length];
System.arraycopy(left.getBases(), 0, bases, 0, left.length());
System.arraycopy(right, 0, bases, left.length(), right.length);
}
return create(bases, left.isReference()); return create(bases, left.isReference());
} }
@ -242,7 +238,10 @@ public class Allele implements Comparable<Allele> {
} }
public static boolean acceptableAlleleBases(byte[] bases, boolean allowNsAsAcceptable) { public static boolean acceptableAlleleBases(byte[] bases, boolean allowNsAsAcceptable) {
if ( wouldBeNullAllele(bases) || wouldBeNoCallAllele(bases) || wouldBeSymbolicAllele(bases) ) if ( wouldBeNullAllele(bases) )
return false;
if ( wouldBeNoCallAllele(bases) || wouldBeSymbolicAllele(bases) )
return true; return true;
for (byte base : bases ) { for (byte base : bases ) {
@ -299,11 +298,6 @@ public class Allele implements Comparable<Allele> {
// //
// --------------------------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------------------------
//Returns true if this is the null allele
public boolean isNull() { return isNull; }
// Returns true if this is not the null allele
public boolean isNonNull() { return ! isNull(); }
// Returns true if this is the NO_CALL allele // Returns true if this is the NO_CALL allele
public boolean isNoCall() { return isNoCall; } public boolean isNoCall() { return isNoCall; }
// Returns true if this is not the NO_CALL allele // Returns true if this is not the NO_CALL allele
@ -319,7 +313,7 @@ public class Allele implements Comparable<Allele> {
// Returns a nice string representation of this object // Returns a nice string representation of this object
public String toString() { public String toString() {
return (isNull() ? NULL_ALLELE_STRING : ( isNoCall() ? NO_CALL_STRING : getDisplayString() )) + (isReference() ? "*" : ""); return ( isNoCall() ? NO_CALL_STRING : getDisplayString() ) + (isReference() ? "*" : "");
} }
/** /**
@ -384,27 +378,27 @@ public class Allele implements Comparable<Allele> {
* @return true if this and other are equal * @return true if this and other are equal
*/ */
public boolean equals(Allele other, boolean ignoreRefState) { public boolean equals(Allele other, boolean ignoreRefState) {
return this == other || (isRef == other.isRef || ignoreRefState) && isNull == other.isNull && isNoCall == other.isNoCall && (bases == other.bases || Arrays.equals(bases, other.bases)); return this == other || (isRef == other.isRef || ignoreRefState) && isNoCall == other.isNoCall && (bases == other.bases || Arrays.equals(bases, other.bases));
} }
/** /**
* @param test bases to test against * @param test bases to test against
* *
* @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles * @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
*/ */
public boolean basesMatch(byte[] test) { return !isSymbolic && (bases == test || Arrays.equals(bases, test)); } public boolean basesMatch(byte[] test) { return !isSymbolic && (bases == test || Arrays.equals(bases, test)); }
/** /**
* @param test bases to test against * @param test bases to test against
* *
* @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles * @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
*/ */
public boolean basesMatch(String test) { return basesMatch(test.toUpperCase().getBytes()); } public boolean basesMatch(String test) { return basesMatch(test.toUpperCase().getBytes()); }
/** /**
* @param test allele to test against * @param test allele to test against
* *
* @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles * @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
*/ */
public boolean basesMatch(Allele test) { return basesMatch(test.getBases()); } public boolean basesMatch(Allele test) { return basesMatch(test.getBases()); }
@ -421,10 +415,6 @@ public class Allele implements Comparable<Allele> {
// //
// --------------------------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------------------------
public static Allele getMatchingAllele(Collection<Allele> allAlleles, String alleleBases) {
return getMatchingAllele(allAlleles, alleleBases.getBytes());
}
public static Allele getMatchingAllele(Collection<Allele> allAlleles, byte[] alleleBases) { public static Allele getMatchingAllele(Collection<Allele> allAlleles, byte[] alleleBases) {
for ( Allele a : allAlleles ) { for ( Allele a : allAlleles ) {
if ( a.basesMatch(alleleBases) ) { if ( a.basesMatch(alleleBases) ) {
@ -438,26 +428,6 @@ public class Allele implements Comparable<Allele> {
return null; // couldn't find anything return null; // couldn't find anything
} }
public static List<Allele> resolveAlleles(List<Allele> possibleAlleles, List<String> alleleStrings) {
List<Allele> myAlleles = new ArrayList<Allele>(alleleStrings.size());
for ( String alleleString : alleleStrings ) {
Allele allele = getMatchingAllele(possibleAlleles, alleleString);
if ( allele == null ) {
if ( Allele.wouldBeNoCallAllele(alleleString.getBytes()) ) {
allele = create(alleleString);
} else {
throw new IllegalArgumentException("Allele " + alleleString + " not present in the list of alleles " + possibleAlleles);
}
}
myAlleles.add(allele);
}
return myAlleles;
}
public int compareTo(Allele other) { public int compareTo(Allele other) {
if ( isReference() && other.isNonReference() ) if ( isReference() && other.isNonReference() )
return -1; return -1;
@ -468,9 +438,6 @@ public class Allele implements Comparable<Allele> {
} }
public static boolean oneIsPrefixOfOther(Allele a1, Allele a2) { public static boolean oneIsPrefixOfOther(Allele a1, Allele a2) {
if ( a1.isNull() || a2.isNull() )
return true;
if ( a2.length() >= a1.length() ) if ( a2.length() >= a1.length() )
return firstIsPrefixOfSecond(a1, a2); return firstIsPrefixOfSecond(a1, a2);
else else

View File

@ -188,8 +188,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
@Deprecated // ID is no longer stored in the attributes map @Deprecated // ID is no longer stored in the attributes map
private final static String ID_KEY = "ID"; private final static String ID_KEY = "ID";
private final Byte REFERENCE_BASE_FOR_INDEL;
public final static Set<String> PASSES_FILTERS = Collections.unmodifiableSet(new LinkedHashSet<String>()); public final static Set<String> PASSES_FILTERS = Collections.unmodifiableSet(new LinkedHashSet<String>());
/** The location of this VariantContext */ /** The location of this VariantContext */
@ -228,7 +226,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
// --------------------------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------------------------
public enum Validation { public enum Validation {
REF_PADDING,
ALLELES, ALLELES,
GENOTYPES GENOTYPES
} }
@ -250,7 +247,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
this(other.getSource(), other.getID(), other.getChr(), other.getStart(), other.getEnd(), this(other.getSource(), other.getID(), other.getChr(), other.getStart(), other.getEnd(),
other.getAlleles(), other.getGenotypes(), other.getLog10PError(), other.getAlleles(), other.getGenotypes(), other.getLog10PError(),
other.getFiltersMaybeNull(), other.getFiltersMaybeNull(),
other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, other.getAttributes(),
other.fullyDecoded, NO_VALIDATION); other.fullyDecoded, NO_VALIDATION);
} }
@ -266,7 +263,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
* @param log10PError qual * @param log10PError qual
* @param filters filters: use null for unfiltered and empty set for passes filters * @param filters filters: use null for unfiltered and empty set for passes filters
* @param attributes attributes * @param attributes attributes
* @param referenceBaseForIndel padded reference base
* @param validationToPerform set of validation steps to take * @param validationToPerform set of validation steps to take
*/ */
protected VariantContext(final String source, protected VariantContext(final String source,
@ -279,7 +275,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
final double log10PError, final double log10PError,
final Set<String> filters, final Set<String> filters,
final Map<String, Object> attributes, final Map<String, Object> attributes,
final Byte referenceBaseForIndel,
final boolean fullyDecoded, final boolean fullyDecoded,
final EnumSet<Validation> validationToPerform ) { final EnumSet<Validation> validationToPerform ) {
if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); } if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); }
@ -292,7 +287,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
this.ID = ID.equals(VCFConstants.EMPTY_ID_FIELD) ? VCFConstants.EMPTY_ID_FIELD : ID; this.ID = ID.equals(VCFConstants.EMPTY_ID_FIELD) ? VCFConstants.EMPTY_ID_FIELD : ID;
this.commonInfo = new CommonInfo(source, log10PError, filters, attributes); this.commonInfo = new CommonInfo(source, log10PError, filters, attributes);
REFERENCE_BASE_FOR_INDEL = referenceBaseForIndel;
// todo -- remove me when this check is no longer necessary // todo -- remove me when this check is no longer necessary
if ( this.commonInfo.hasAttribute(ID_KEY) ) if ( this.commonInfo.hasAttribute(ID_KEY) )
@ -340,8 +334,9 @@ public class VariantContext implements Feature { // to enable tribble integratio
* in this VC is returned as the set of alleles in the subContext, even if * in this VC is returned as the set of alleles in the subContext, even if
* some of those alleles aren't in the samples * some of those alleles aren't in the samples
* *
* @param sampleNames * @param sampleNames the sample names
* @return * @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples
* @return new VariantContext subsetting to just the given samples
*/ */
public VariantContext subContextFromSamples(Set<String> sampleNames, final boolean rederiveAllelesFromGenotypes ) { public VariantContext subContextFromSamples(Set<String> sampleNames, final boolean rederiveAllelesFromGenotypes ) {
if ( sampleNames.containsAll(getSampleNames()) ) { if ( sampleNames.containsAll(getSampleNames()) ) {
@ -501,7 +496,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
*/ */
public boolean isSimpleInsertion() { public boolean isSimpleInsertion() {
// can't just call !isSimpleDeletion() because of complex indels // can't just call !isSimpleDeletion() because of complex indels
return getType() == Type.INDEL && getReference().isNull() && isBiallelic(); return getType() == Type.INDEL && isBiallelic() && getReference().length() == 1;
} }
/** /**
@ -509,7 +504,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
*/ */
public boolean isSimpleDeletion() { public boolean isSimpleDeletion() {
// can't just call !isSimpleInsertion() because of complex indels // can't just call !isSimpleInsertion() because of complex indels
return getType() == Type.INDEL && getAlternateAllele(0).isNull() && isBiallelic(); return getType() == Type.INDEL && isBiallelic() && getAlternateAllele(0).length() == 1;
} }
/** /**
@ -553,22 +548,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
return ID; return ID;
} }
public boolean hasReferenceBaseForIndel() {
return REFERENCE_BASE_FOR_INDEL != null;
}
// the indel base that gets stripped off for indels
public Byte getReferenceBaseForIndel() {
return REFERENCE_BASE_FOR_INDEL;
}
public String getAlleleStringWithRefPadding(final Allele allele) {
if ( VCFAlleleClipper.needsPadding(this) )
return VCFAlleleClipper.padAllele(this, allele).getDisplayString();
else
return allele.getDisplayString();
}
// --------------------------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------------------------
// //
@ -808,8 +787,8 @@ public class VariantContext implements Feature { // to enable tribble integratio
* Returns a map from sampleName -> Genotype for the genotype associated with sampleName. Returns a map * Returns a map from sampleName -> Genotype for the genotype associated with sampleName. Returns a map
* for consistency with the multi-get function. * for consistency with the multi-get function.
* *
* @param sampleName * @param sampleName the sample name
* @return * @return mapping from sample name to genotype
* @throws IllegalArgumentException if sampleName isn't bound to a genotype * @throws IllegalArgumentException if sampleName isn't bound to a genotype
*/ */
public GenotypesContext getGenotypes(String sampleName) { public GenotypesContext getGenotypes(String sampleName) {
@ -823,7 +802,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
* For testing convenience only * For testing convenience only
* *
* @param sampleNames a unique list of sample names * @param sampleNames a unique list of sample names
* @return * @return subsetting genotypes context
* @throws IllegalArgumentException if sampleName isn't bound to a genotype * @throws IllegalArgumentException if sampleName isn't bound to a genotype
*/ */
protected GenotypesContext getGenotypes(Collection<String> sampleNames) { protected GenotypesContext getGenotypes(Collection<String> sampleNames) {
@ -1011,13 +990,13 @@ public class VariantContext implements Feature { // to enable tribble integratio
/** /**
* Run all extra-strict validation tests on a Variant Context object * Run all extra-strict validation tests on a Variant Context object
* *
* @param reference the true reference allele * @param reportedReference the reported reference allele
* @param paddedRefBase the reference base used for padding indels * @param observedReference the actual reference allele
* @param rsIDs the true dbSNP IDs * @param rsIDs the true dbSNP IDs
*/ */
public void extraStrictValidation(Allele reference, Byte paddedRefBase, Set<String> rsIDs) { public void extraStrictValidation(final Allele reportedReference, final Allele observedReference, final Set<String> rsIDs) {
// validate the reference // validate the reference
validateReferenceBases(reference, paddedRefBase); validateReferenceBases(reportedReference, observedReference);
// validate the RS IDs // validate the RS IDs
validateRSIDs(rsIDs); validateRSIDs(rsIDs);
@ -1032,18 +1011,9 @@ public class VariantContext implements Feature { // to enable tribble integratio
//checkReferenceTrack(); //checkReferenceTrack();
} }
public void validateReferenceBases(Allele reference, Byte paddedRefBase) { public void validateReferenceBases(final Allele reportedReference, final Allele observedReference) {
if ( reference == null ) if ( reportedReference != null && !reportedReference.basesMatch(observedReference) ) {
return; throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), observedReference.getBaseString(), reportedReference.getBaseString()));
// don't validate if we're a complex event
if ( !isComplexIndel() && !reference.isNull() && !reference.basesMatch(getReference()) ) {
throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), reference.getBaseString(), getReference().getBaseString()));
}
// we also need to validate the padding base for simple indels
if ( hasReferenceBaseForIndel() && !getReferenceBaseForIndel().equals(paddedRefBase) ) {
throw new TribbleException.InternalCodecException(String.format("the padded REF base is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), (char)paddedRefBase.byteValue(), (char)getReferenceBaseForIndel().byteValue()));
} }
} }
@ -1135,7 +1105,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
for (final Validation val : validationToPerform ) { for (final Validation val : validationToPerform ) {
switch (val) { switch (val) {
case ALLELES: validateAlleles(); break; case ALLELES: validateAlleles(); break;
case REF_PADDING: validateReferencePadding(); break;
case GENOTYPES: validateGenotypes(); break; case GENOTYPES: validateGenotypes(); break;
default: throw new IllegalArgumentException("Unexpected validation mode " + val); default: throw new IllegalArgumentException("Unexpected validation mode " + val);
} }
@ -1151,8 +1120,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
if ( hasAttribute(VCFConstants.END_KEY) ) { if ( hasAttribute(VCFConstants.END_KEY) ) {
final int end = getAttributeAsInt(VCFConstants.END_KEY, -1); final int end = getAttributeAsInt(VCFConstants.END_KEY, -1);
assert end != -1; assert end != -1;
if ( end != getEnd() && end != getEnd() + 1 ) { if ( end != getEnd() ) {
// the end is allowed to 1 bigger because of the padding
final String message = "Badly formed variant context at location " + getChr() + ":" final String message = "Badly formed variant context at location " + getChr() + ":"
+ getStart() + "; getEnd() was " + getEnd() + getStart() + "; getEnd() was " + getEnd()
+ " but this VariantContext contains an END key with value " + end; + " but this VariantContext contains an END key with value " + end;
@ -1161,23 +1129,19 @@ public class VariantContext implements Feature { // to enable tribble integratio
else else
throw new ReviewedStingException(message); throw new ReviewedStingException(message);
} }
} else {
final long length = (stop - start) + 1;
if ( ! hasSymbolicAlleles() && length != getReference().length() ) {
throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this);
}
} }
} }
private void validateReferencePadding() {
if ( hasSymbolicAlleles() ) // symbolic alleles don't need padding...
return;
boolean needsPadding = (getReference().length() == getEnd() - getStart()); // off by one because padded base was removed
if ( needsPadding && !hasReferenceBaseForIndel() )
throw new ReviewedStingException("Badly formed variant context at location " + getChr() + ":" + getStart() + "; no padded reference base was provided.");
}
private void validateAlleles() { private void validateAlleles() {
// check alleles
boolean alreadySeenRef = false, alreadySeenNull = false; boolean alreadySeenRef = false;
for ( Allele allele : alleles ) {
for ( final Allele allele : alleles ) {
// make sure there's only one reference allele // make sure there's only one reference allele
if ( allele.isReference() ) { if ( allele.isReference() ) {
if ( alreadySeenRef ) throw new IllegalArgumentException("BUG: Received two reference tagged alleles in VariantContext " + alleles + " this=" + this); if ( alreadySeenRef ) throw new IllegalArgumentException("BUG: Received two reference tagged alleles in VariantContext " + alleles + " this=" + this);
@ -1187,26 +1151,11 @@ public class VariantContext implements Feature { // to enable tribble integratio
if ( allele.isNoCall() ) { if ( allele.isNoCall() ) {
throw new IllegalArgumentException("BUG: Cannot add a no call allele to a variant context " + alleles + " this=" + this); throw new IllegalArgumentException("BUG: Cannot add a no call allele to a variant context " + alleles + " this=" + this);
} }
// make sure there's only one null allele
if ( allele.isNull() ) {
if ( alreadySeenNull ) throw new IllegalArgumentException("BUG: Received two null alleles in VariantContext " + alleles + " this=" + this);
alreadySeenNull = true;
}
} }
// make sure there's one reference allele // make sure there's one reference allele
if ( ! alreadySeenRef ) if ( ! alreadySeenRef )
throw new IllegalArgumentException("No reference allele found in VariantContext"); throw new IllegalArgumentException("No reference allele found in VariantContext");
// if ( getType() == Type.INDEL ) {
// if ( getReference().length() != (getLocation().size()-1) ) {
long length = (stop - start) + 1;
if ( ! hasSymbolicAlleles()
&& ((getReference().isNull() && length != 1 )
|| (getReference().isNonNull() && (length - getReference().length() > 1)))) {
throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this);
}
} }
private void validateGenotypes() { private void validateGenotypes() {

View File

@ -25,9 +25,6 @@
package org.broadinstitute.sting.utils.variantcontext; package org.broadinstitute.sting.utils.variantcontext;
import com.google.java.contract.*; import com.google.java.contract.*;
import org.broad.tribble.Feature;
import org.broad.tribble.TribbleException;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -74,7 +71,6 @@ public class VariantContextBuilder {
private Set<String> filters = null; private Set<String> filters = null;
private Map<String, Object> attributes = null; private Map<String, Object> attributes = null;
private boolean attributesCanBeModified = false; private boolean attributesCanBeModified = false;
private Byte referenceBaseForIndel = null;
/** enum of what must be validated */ /** enum of what must be validated */
final private EnumSet<VariantContext.Validation> toValidate = EnumSet.noneOf(VariantContext.Validation.class); final private EnumSet<VariantContext.Validation> toValidate = EnumSet.noneOf(VariantContext.Validation.class);
@ -117,7 +113,6 @@ public class VariantContextBuilder {
this.genotypes = parent.genotypes; this.genotypes = parent.genotypes;
this.ID = parent.getID(); this.ID = parent.getID();
this.log10PError = parent.getLog10PError(); this.log10PError = parent.getLog10PError();
this.referenceBaseForIndel = parent.getReferenceBaseForIndel();
this.source = parent.getSource(); this.source = parent.getSource();
this.start = parent.getStart(); this.start = parent.getStart();
this.stop = parent.getEnd(); this.stop = parent.getEnd();
@ -132,7 +127,6 @@ public class VariantContextBuilder {
this.genotypes = parent.genotypes; this.genotypes = parent.genotypes;
this.ID = parent.ID; this.ID = parent.ID;
this.log10PError = parent.log10PError; this.log10PError = parent.log10PError;
this.referenceBaseForIndel = parent.referenceBaseForIndel;
this.source = parent.source; this.source = parent.source;
this.start = parent.start; this.start = parent.start;
this.stop = parent.stop; this.stop = parent.stop;
@ -362,21 +356,6 @@ public class VariantContextBuilder {
return this; return this;
} }
/**
* Tells us that the resulting VariantContext should use this byte for the reference base
* Null means no refBase is available
* @param referenceBaseForIndel
*/
public VariantContextBuilder referenceBaseForIndel(final Byte referenceBaseForIndel) {
this.referenceBaseForIndel = referenceBaseForIndel;
toValidate.add(VariantContext.Validation.REF_PADDING);
return this;
}
public VariantContextBuilder referenceBaseForIndel(final String referenceBaseForIndel) {
return referenceBaseForIndel(referenceBaseForIndel.getBytes()[0]);
}
/** /**
* Tells us that the resulting VariantContext should have source field set to source * Tells us that the resulting VariantContext should have source field set to source
* @param source * @param source
@ -401,7 +380,6 @@ public class VariantContextBuilder {
this.start = start; this.start = start;
this.stop = stop; this.stop = stop;
toValidate.add(VariantContext.Validation.ALLELES); toValidate.add(VariantContext.Validation.ALLELES);
toValidate.add(VariantContext.Validation.REF_PADDING);
return this; return this;
} }
@ -416,7 +394,6 @@ public class VariantContextBuilder {
this.start = loc.getStart(); this.start = loc.getStart();
this.stop = loc.getStop(); this.stop = loc.getStop();
toValidate.add(VariantContext.Validation.ALLELES); toValidate.add(VariantContext.Validation.ALLELES);
toValidate.add(VariantContext.Validation.REF_PADDING);
return this; return this;
} }
@ -440,7 +417,6 @@ public class VariantContextBuilder {
public VariantContextBuilder start(final long start) { public VariantContextBuilder start(final long start) {
this.start = start; this.start = start;
toValidate.add(VariantContext.Validation.ALLELES); toValidate.add(VariantContext.Validation.ALLELES);
toValidate.add(VariantContext.Validation.REF_PADDING);
return this; return this;
} }
@ -517,6 +493,6 @@ public class VariantContextBuilder {
public VariantContext make() { public VariantContext make() {
return new VariantContext(source, ID, contig, start, stop, alleles, return new VariantContext(source, ID, contig, start, stop, alleles,
genotypes, log10PError, filters, attributes, genotypes, log10PError, filters, attributes,
referenceBaseForIndel, fullyDecoded, toValidate); fullyDecoded, toValidate);
} }
} }

View File

@ -64,9 +64,9 @@ public class VariantContextUtils {
* Ensures that VC contains all of the samples in allSamples by adding missing samples to * Ensures that VC contains all of the samples in allSamples by adding missing samples to
* the resulting VC with default diploid ./. genotypes * the resulting VC with default diploid ./. genotypes
* *
* @param vc * @param vc the VariantContext
* @param allSamples * @param allSamples all of the samples needed
* @return * @return a new VariantContext with missing samples added
*/ */
public static VariantContext addMissingSamples(final VariantContext vc, final Set<String> allSamples) { public static VariantContext addMissingSamples(final VariantContext vc, final Set<String> allSamples) {
// TODO -- what's the fastest way to do this calculation? // TODO -- what's the fastest way to do this calculation?
@ -376,9 +376,9 @@ public class VariantContextUtils {
/** /**
* @deprecated use variant context builder version instead * @deprecated use variant context builder version instead
* @param vc * @param vc the variant context
* @param keysToPreserve * @param keysToPreserve the keys to preserve
* @return * @return a pruned version of the original variant context
*/ */
@Deprecated @Deprecated
public static VariantContext pruneVariantContext(final VariantContext vc, Collection<String> keysToPreserve ) { public static VariantContext pruneVariantContext(final VariantContext vc, Collection<String> keysToPreserve ) {
@ -486,14 +486,13 @@ public class VariantContextUtils {
if ( genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE ) if ( genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE )
verifyUniqueSampleNames(unsortedVCs); verifyUniqueSampleNames(unsortedVCs);
final List<VariantContext> prepaddedVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions); final List<VariantContext> preFilteredVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions);
// Make sure all variant contexts are padded with reference base in case of indels if necessary // Make sure all variant contexts are padded with reference base in case of indels if necessary
final List<VariantContext> VCs = new ArrayList<VariantContext>(); final List<VariantContext> VCs = new ArrayList<VariantContext>();
for (final VariantContext vc : prepaddedVCs) { for (final VariantContext vc : preFilteredVCs) {
// also a reasonable place to remove filtered calls, if needed
if ( ! filteredAreUncalled || vc.isNotFiltered() ) if ( ! filteredAreUncalled || vc.isNotFiltered() )
VCs.add(VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc)); VCs.add(vc);
} }
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
return null; return null;
@ -547,9 +546,6 @@ public class VariantContextUtils {
filters.addAll(vc.getFilters()); filters.addAll(vc.getFilters());
if ( referenceBaseForIndel == null )
referenceBaseForIndel = vc.getReferenceBaseForIndel();
// //
// add attributes // add attributes
// //
@ -661,10 +657,9 @@ public class VariantContextUtils {
builder.genotypes(genotypes); builder.genotypes(genotypes);
builder.log10PError(log10PError); builder.log10PError(log10PError);
builder.filters(filters).attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes); builder.filters(filters).attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes);
builder.referenceBaseForIndel(referenceBaseForIndel);
// Trim the padded bases of all alleles if necessary // Trim the padded bases of all alleles if necessary
final VariantContext merged = createVariantContextWithTrimmedAlleles(builder.make()); final VariantContext merged = builder.make();
if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged); if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged);
return merged; return merged;
} }
@ -700,73 +695,6 @@ public class VariantContextUtils {
return true; return true;
} }
private static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) {
// see if we need to trim common reference base from all alleles
boolean trimVC;
// We need to trim common reference base from all alleles in all genotypes if a ref base is common to all alleles
Allele refAllele = inputVC.getReference();
if (!inputVC.isVariant())
trimVC = false;
else if (refAllele.isNull())
trimVC = false;
else {
trimVC = VCFAlleleClipper.shouldClipFirstBaseP(inputVC.getAlternateAlleles(), (byte) inputVC.getReference().getDisplayString().charAt(0));
}
// nothing to do if we don't need to trim bases
if (trimVC) {
List<Allele> alleles = new ArrayList<Allele>();
GenotypesContext genotypes = GenotypesContext.create();
Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
for (final Allele a : inputVC.getAlleles()) {
if (a.isSymbolic()) {
alleles.add(a);
originalToTrimmedAlleleMap.put(a, a);
} else {
// get bases for current allele and create a new one with trimmed bases
byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length());
Allele trimmedAllele = Allele.create(newBases, a.isReference());
alleles.add(trimmedAllele);
originalToTrimmedAlleleMap.put(a, trimmedAllele);
}
}
// detect case where we're trimming bases but resulting vc doesn't have any null allele. In that case, we keep original representation
// example: mixed records such as {TA*,TGA,TG}
boolean hasNullAlleles = false;
for (final Allele a: originalToTrimmedAlleleMap.values()) {
if (a.isNull())
hasNullAlleles = true;
}
if (!hasNullAlleles)
return inputVC;
// now we can recreate new genotypes with trimmed alleles
for ( final Genotype genotype : inputVC.getGenotypes() ) {
List<Allele> originalAlleles = genotype.getAlleles();
List<Allele> trimmedAlleles = new ArrayList<Allele>();
for ( final Allele a : originalAlleles ) {
if ( a.isCalled() )
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
else
trimmedAlleles.add(Allele.NO_CALL);
}
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
}
final VariantContextBuilder builder = new VariantContextBuilder(inputVC);
return builder.alleles(alleles).genotypes(genotypes).referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0])).make();
}
return inputVC;
}
public static GenotypesContext stripPLs(GenotypesContext genotypes) { public static GenotypesContext stripPLs(GenotypesContext genotypes) {
GenotypesContext newGs = GenotypesContext.create(genotypes.size()); GenotypesContext newGs = GenotypesContext.create(genotypes.size());
@ -819,7 +747,7 @@ public class VariantContextUtils {
if ( !mappedVCs.containsKey(vc.getType()) ) if ( !mappedVCs.containsKey(vc.getType()) )
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>()); mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
mappedVCs.get(vc.getType()).add(vc); mappedVCs.get(vc.getType()).add(vc);
} }
} }
return mappedVCs; return mappedVCs;
@ -881,10 +809,10 @@ public class VariantContextUtils {
// //
// refAllele: ACGTGA // refAllele: ACGTGA
// myRef: ACGT // myRef: ACGT
// myAlt: - // myAlt: A
// //
// We need to remap all of the alleles in vc to include the extra GA so that // We need to remap all of the alleles in vc to include the extra GA so that
// myRef => refAllele and myAlt => GA // myRef => refAllele and myAlt => AGA
// //
Allele myRef = vc.getReference(); Allele myRef = vc.getReference();
@ -979,7 +907,7 @@ public class VariantContextUtils {
HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size()); HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
for ( Allele originalAllele : vc.getAlleles() ) { for ( Allele originalAllele : vc.getAlleles() ) {
Allele newAllele; Allele newAllele;
if ( originalAllele.isNoCall() || originalAllele.isNull() ) if ( originalAllele.isNoCall() )
newAllele = originalAllele; newAllele = originalAllele;
else else
newAllele = Allele.create(BaseUtils.simpleReverseComplement(originalAllele.getBases()), originalAllele.isReference()); newAllele = Allele.create(BaseUtils.simpleReverseComplement(originalAllele.getBases()), originalAllele.isReference());
@ -1235,13 +1163,14 @@ public class VariantContextUtils {
if ( ! vc.isIndel() ) // only indels are tandem repeats if ( ! vc.isIndel() ) // only indels are tandem repeats
return null; return null;
final Allele ref = vc.getReference(); final Allele refAllele = vc.getReference();
final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length());
byte[] repeatUnit = null; byte[] repeatUnit = null;
final ArrayList<Integer> lengths = new ArrayList<Integer>(); final ArrayList<Integer> lengths = new ArrayList<Integer>();
for ( final Allele allele : vc.getAlternateAlleles() ) { for ( final Allele allele : vc.getAlternateAlleles() ) {
Pair<int[],byte[]> result = getNumTandemRepeatUnits(ref.getBases(), allele.getBases(), refBasesStartingAtVCWithoutPad.getBytes()); Pair<int[],byte[]> result = getNumTandemRepeatUnits(refAlleleBases, Arrays.copyOfRange(allele.getBases(), 1, allele.length()), refBasesStartingAtVCWithoutPad.getBytes());
final int[] repetitionCount = result.first; final int[] repetitionCount = result.first;
// repetition count = 0 means allele is not a tandem expansion of context // repetition count = 0 means allele is not a tandem expansion of context
@ -1256,7 +1185,7 @@ public class VariantContextUtils {
repeatUnit = result.second; repeatUnit = result.second;
if (VERBOSE) { if (VERBOSE) {
System.out.println("RefContext:"+refBasesStartingAtVCWithoutPad); System.out.println("RefContext:"+refBasesStartingAtVCWithoutPad);
System.out.println("Ref:"+ref.toString()+" Count:" + String.valueOf(repetitionCount[0])); System.out.println("Ref:"+refAllele.toString()+" Count:" + String.valueOf(repetitionCount[0]));
System.out.println("Allele:"+allele.toString()+" Count:" + String.valueOf(repetitionCount[1])); System.out.println("Allele:"+allele.toString()+" Count:" + String.valueOf(repetitionCount[1]));
System.out.println("RU:"+new String(repeatUnit)); System.out.println("RU:"+new String(repeatUnit));
} }
@ -1405,4 +1334,113 @@ public class VariantContextUtils {
return start + Math.max(ref.length() - 1, 0); return start + Math.max(ref.length() - 1, 0);
} }
} }
public static boolean requiresPaddingBase(final List<String> alleles) {
// see whether one of the alleles would be null if trimmed through
for ( final String allele : alleles ) {
if ( allele.isEmpty() )
return true;
}
int clipping = 0;
Character currentBase = null;
while ( true ) {
for ( final String allele : alleles ) {
if ( allele.length() - clipping == 0 )
return true;
char myBase = allele.charAt(clipping);
if ( currentBase == null )
currentBase = myBase;
else if ( currentBase != myBase )
return false;
}
clipping++;
currentBase = null;
}
}
public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {
// TODO - this function doesn't work with mixed records or records that started as mixed and then became non-mixed
// see whether we need to trim common reference base from all alleles
final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false);
if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
return inputVC;
final List<Allele> alleles = new ArrayList<Allele>();
final GenotypesContext genotypes = GenotypesContext.create();
final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
for (final Allele a : inputVC.getAlleles()) {
if (a.isSymbolic()) {
alleles.add(a);
originalToTrimmedAlleleMap.put(a, a);
} else {
// get bases for current allele and create a new one with trimmed bases
final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
final Allele trimmedAllele = Allele.create(newBases, a.isReference());
alleles.add(trimmedAllele);
originalToTrimmedAlleleMap.put(a, trimmedAllele);
}
}
// now we can recreate new genotypes with trimmed alleles
for ( final Genotype genotype : inputVC.getGenotypes() ) {
final List<Allele> originalAlleles = genotype.getAlleles();
final List<Allele> trimmedAlleles = new ArrayList<Allele>();
for ( final Allele a : originalAlleles ) {
if ( a.isCalled() )
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
else
trimmedAlleles.add(Allele.NO_CALL);
}
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
}
return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() - 1).alleles(alleles).genotypes(genotypes).make();
}
public static int computeReverseClipping(final List<Allele> unclippedAlleles,
final byte[] ref,
final int forwardClipping,
final boolean allowFullClip) {
int clipping = 0;
boolean stillClipping = true;
while ( stillClipping ) {
for ( final Allele a : unclippedAlleles ) {
if ( a.isSymbolic() )
continue;
// we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong
// position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine).
if ( a.length() - clipping == 0 )
return clipping - (allowFullClip ? 0 : 1);
if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) {
stillClipping = false;
}
else if ( ref.length == clipping ) {
if ( allowFullClip )
stillClipping = false;
else
return -1;
}
else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) {
stillClipping = false;
}
}
if ( stillClipping )
clipping++;
}
return clipping;
}
} }

View File

@ -274,10 +274,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
} }
private void buildAlleles( VariantContext vc ) throws IOException { private void buildAlleles( VariantContext vc ) throws IOException {
final boolean needsPadding = VCFAlleleClipper.needsPadding(vc);
for ( Allele allele : vc.getAlleles() ) { for ( Allele allele : vc.getAlleles() ) {
if ( needsPadding )
allele = VCFAlleleClipper.padAllele(vc, allele);
final byte[] s = allele.getDisplayBases(); final byte[] s = allele.getDisplayBases();
if ( s == null ) if ( s == null )
throw new ReviewedStingException("BUG: BCF2Writer encountered null padded allele" + allele); throw new ReviewedStingException("BUG: BCF2Writer encountered null padded allele" + allele);

View File

@ -162,7 +162,6 @@ class VCFWriter extends IndexingVariantContextWriter {
vc = new VariantContextBuilder(vc).noGenotypes().make(); vc = new VariantContextBuilder(vc).noGenotypes().make();
try { try {
vc = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc);
super.add(vc); super.add(vc);
Map<Allele, String> alleleMap = buildAlleleMap(vc); Map<Allele, String> alleleMap = buildAlleleMap(vc);

View File

@ -26,7 +26,7 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest {
WalkerTestSpec spec2 = new WalkerTestSpec( WalkerTestSpec spec2 = new WalkerTestSpec(
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s", "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s",
1, 1,
Arrays.asList("0567b32ebdc26604ddf2a390de4579ac")); Arrays.asList("ef481be9962e21d09847b8a1d4a4ff65"));
executeTest("testFastaAlternateReferenceIndels", spec2); executeTest("testFastaAlternateReferenceIndels", spec2);
WalkerTestSpec spec3 = new WalkerTestSpec( WalkerTestSpec spec3 = new WalkerTestSpec(

View File

@ -38,9 +38,6 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import java.util.*; import java.util.*;
@ -103,39 +100,27 @@ public class ArtificialReadPileupTestProvider {
boolean addBaseErrors, int phredScaledBaseErrorRate) { boolean addBaseErrors, int phredScaledBaseErrorRate) {
// RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext); // RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext);
String refAllele, altAllele;
ArrayList<Allele> vcAlleles = new ArrayList<Allele>(); if (eventLength == 0) {
Allele refAllele, altAllele; // SNP case
if (eventLength == 0) {// SNP case refAllele = new String(new byte[]{referenceContext.getBase()});
refAllele =Allele.create(referenceContext.getBase(),true); altAllele = new String(altBases.substring(0,1));
altAllele = Allele.create(altBases.substring(0,1), false);
} else if (eventLength>0){ } else if (eventLength>0){
// insertion // insertion
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true); refAllele = "";
altAllele = Allele.create(altBases.substring(0,eventLength), false); altAllele = altBases.substring(0,eventLength);
} }
else { else {
// deletion // deletion
refAllele =Allele.create(refBases.substring(offset,offset+Math.abs(eventLength)),true); refAllele = refBases.substring(offset,offset+Math.abs(eventLength));
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); altAllele = "";
} }
int stop = loc.getStart();
vcAlleles.add(refAllele);
vcAlleles.add(altAllele);
final VariantContextBuilder builder = new VariantContextBuilder().source("");
builder.loc(loc.getContig(), loc.getStart(), stop);
builder.alleles(vcAlleles);
builder.referenceBaseForIndel(referenceContext.getBase());
builder.noGenotypes();
final VariantContext vc = builder.make();
Map<String,AlignmentContext> contexts = new HashMap<String,AlignmentContext>(); Map<String,AlignmentContext> contexts = new HashMap<String,AlignmentContext>();
for (String sample: sampleNames) { for (String sample: sampleNames) {
AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc,vc, altBases, numReadsPerAllele, sample, addBaseErrors, phredScaledBaseErrorRate)); AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc, refAllele, altAllele, altBases, numReadsPerAllele, sample, addBaseErrors, phredScaledBaseErrorRate));
contexts.put(sample,context); contexts.put(sample,context);
} }
@ -149,73 +134,71 @@ public class ArtificialReadPileupTestProvider {
rg.setSample(name); rg.setSample(name);
return rg; return rg;
} }
private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, VariantContext vc, String altBases,
private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, String refAllele, String altAllele, String altBases,
int[] numReadsPerAllele, String sample, boolean addErrors, int phredScaledErrorRate) { int[] numReadsPerAllele, String sample, boolean addErrors, int phredScaledErrorRate) {
List<PileupElement> pileupElements = new ArrayList<PileupElement>(); List<PileupElement> pileupElements = new ArrayList<PileupElement>();
int readStart = contigStart;
int offset = (contigStop-contigStart+1)/2; int offset = (contigStop-contigStart+1)/2;
int refAlleleLength = 0; int refAlleleLength = refAllele.length();
int readCounter = 0;
int alleleCounter = 0;
for (Allele allele: vc.getAlleles()) {
if (allele.isReference())
refAlleleLength = allele.getBases().length;
int alleleLength = allele.getBases().length;
for ( int d = 0; d < numReadsPerAllele[alleleCounter]; d++ ) {
byte[] readBases = trueHaplotype(allele, offset, refAlleleLength);
if (addErrors)
addBaseErrors(readBases, phredScaledErrorRate);
byte[] readQuals = new byte[readBases.length];
Arrays.fill(readQuals, (byte)phredScaledErrorRate);
GATKSAMRecord read = new GATKSAMRecord(header);
read.setBaseQualities(readQuals);
read.setReadBases(readBases);
read.setReadName(artificialReadName+readCounter++);
boolean isBeforeDeletion = false, isBeforeInsertion = false;
if (allele.isReference())
read.setCigarString(readBases.length + "M");
else {
isBeforeDeletion = alleleLength<refAlleleLength;
isBeforeInsertion = alleleLength>refAlleleLength;
if (isBeforeDeletion || isBeforeInsertion)
read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") +
(readBases.length-offset)+"M");
else // SNP case
read.setCigarString(readBases.length+"M");
}
int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0));
read.setReadPairedFlag(false);
read.setAlignmentStart(readStart);
read.setMappingQuality(artificialMappingQuality);
read.setReferenceName(loc.getContig());
read.setReadNegativeStrandFlag(false);
read.setAttribute("RG", sampleRG(sample).getReadGroupId());
pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength));
}
alleleCounter++;
}
pileupElements.addAll(createPileupElements(refAllele, loc, numReadsPerAllele[0], sample, contigStart, offset, altBases, addErrors, phredScaledErrorRate, refAlleleLength, true));
pileupElements.addAll(createPileupElements(altAllele, loc, numReadsPerAllele[1], sample, contigStart, offset, altBases, addErrors, phredScaledErrorRate, refAlleleLength, false));
return new ReadBackedPileupImpl(loc,pileupElements); return new ReadBackedPileupImpl(loc,pileupElements);
} }
private byte[] trueHaplotype(Allele allele, int offset, int refAlleleLength) { private List<PileupElement> createPileupElements(String allele, GenomeLoc loc, int numReadsPerAllele, String sample, int readStart, int offset, String altBases, boolean addErrors, int phredScaledErrorRate, int refAlleleLength, boolean isReference) {
int alleleLength = allele.length();
List<PileupElement> pileupElements = new ArrayList<PileupElement>();
int readCounter = 0;
for ( int d = 0; d < numReadsPerAllele; d++ ) {
byte[] readBases = trueHaplotype(allele, offset, refAlleleLength);
if (addErrors)
addBaseErrors(readBases, phredScaledErrorRate);
byte[] readQuals = new byte[readBases.length];
Arrays.fill(readQuals, (byte)phredScaledErrorRate);
GATKSAMRecord read = new GATKSAMRecord(header);
read.setBaseQualities(readQuals);
read.setReadBases(readBases);
read.setReadName(artificialReadName+readCounter++);
boolean isBeforeDeletion = false, isBeforeInsertion = false;
if (isReference)
read.setCigarString(readBases.length + "M");
else {
isBeforeDeletion = alleleLength<refAlleleLength;
isBeforeInsertion = alleleLength>refAlleleLength;
if (isBeforeDeletion || isBeforeInsertion)
read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") +
(readBases.length-offset)+"M");
else // SNP case
read.setCigarString(readBases.length+"M");
}
int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0));
read.setReadPairedFlag(false);
read.setAlignmentStart(readStart);
read.setMappingQuality(artificialMappingQuality);
read.setReferenceName(loc.getContig());
read.setReadNegativeStrandFlag(false);
read.setAttribute("RG", sampleRG(sample).getReadGroupId());
pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength));
}
return pileupElements;
}
private byte[] trueHaplotype(String allele, int offset, int refAlleleLength) {
// create haplotype based on a particular allele // create haplotype based on a particular allele
String prefix = refBases.substring(offset); String prefix = refBases.substring(0, offset);
String alleleBases = new String(allele.getBases());
String postfix = refBases.substring(offset+refAlleleLength,refBases.length()); String postfix = refBases.substring(offset+refAlleleLength,refBases.length());
return (prefix+alleleBases+postfix).getBytes(); return (prefix+allele+postfix).getBytes();
} }
private void addBaseErrors(final byte[] readBases, final int phredScaledErrorRate) { private void addBaseErrors(final byte[] readBases, final int phredScaledErrorRate) {

View File

@ -70,7 +70,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
List<Allele> alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases); List<Allele> alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
Assert.assertEquals(alleles.size(),2); Assert.assertEquals(alleles.size(),2);
Assert.assertEquals(alleles.get(1).getBaseString(), altBases.substring(0,eventLength)); Assert.assertEquals(alleles.get(1).getBaseString().substring(1), altBases.substring(0,eventLength));
@ -79,7 +79,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
eventLength = 3; eventLength = 3;
alleles = getConsensusAlleles(eventLength,false,10,0.1, altBases); alleles = getConsensusAlleles(eventLength,false,10,0.1, altBases);
Assert.assertEquals(alleles.size(),2); Assert.assertEquals(alleles.size(),2);
Assert.assertEquals(alleles.get(0).getBaseString(), refBases.substring(pileupProvider.offset,pileupProvider.offset+eventLength)); Assert.assertEquals(alleles.get(0).getBaseString().substring(1), refBases.substring(pileupProvider.offset,pileupProvider.offset+eventLength));
// same with min Reads = 11 // same with min Reads = 11
alleles = getConsensusAlleles(eventLength,false,11,0.1, altBases); alleles = getConsensusAlleles(eventLength,false,11,0.1, altBases);
@ -97,7 +97,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases); alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
Assert.assertEquals(alleles.size(),2); Assert.assertEquals(alleles.size(),2);
Assert.assertEquals(alleles.get(1).getBaseString(), altBases.substring(0,eventLength)); Assert.assertEquals(alleles.get(1).getBaseString().substring(1), altBases.substring(0,eventLength));
altBases = "CCTCNTGAGA"; altBases = "CCTCNTGAGA";
eventLength = 5; eventLength = 5;

View File

@ -23,7 +23,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest {
testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF; testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF;
testArgs += " --virtualPrimerSize 30"; testArgs += " --virtualPrimerSize 30";
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
Arrays.asList("27f9450afa132888a8994167f0035fd7")); Arrays.asList("240d99b58f73985fb114abe9044c0271"));
executeTest("Test probes", spec); executeTest("Test probes", spec);
} }
@ -36,7 +36,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest {
testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF; testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF;
testArgs += " --virtualPrimerSize 30 --doNotUseBWA"; testArgs += " --virtualPrimerSize 30 --doNotUseBWA";
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
Arrays.asList("f2611ff1d9cd5bedaad003251fed8bc1")); Arrays.asList("6e7789445e29d91979a21e78d3d53295"));
executeTest("Test probes", spec); executeTest("Test probes", spec);
} }
@ -49,7 +49,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest {
testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF; testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF;
testArgs += " --virtualPrimerSize 30 --filterMonomorphic"; testArgs += " --virtualPrimerSize 30 --filterMonomorphic";
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
Arrays.asList("77b3f30e38fedad812125bdf6cf3255f")); Arrays.asList("18d7236208db603e143b40db06ef2aca"));
executeTest("Test probes", spec); executeTest("Test probes", spec);
} }

View File

@ -98,16 +98,16 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ac58a5fde17661e2a19004ca954d9781", " -setKey null"); } @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ac58a5fde17661e2a19004ca954d9781", " -setKey null"); }
@Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "67a8076e30b4bca0ea5acdc9cd26a4e0"); } // official project VCF files in tabix format @Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "67a8076e30b4bca0ea5acdc9cd26a4e0"); } // official project VCF files in tabix format
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "ef2d249ea4b25311966e038aac05c661"); } @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "909c6dc74eeb5ab86f8e74073eb0c1d6"); }
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "cdb448aaa92ca5a9e393d875b42581b3"); } @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "f0c2cb3e3a6160e1ed0ee2fd9b120f55"); }
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "f0ce3fb83d4ad9ba402d7cb11cd000c3"); } @Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "f0ce3fb83d4ad9ba402d7cb11cd000c3"); }
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "4efdf983918db822e4ac13d911509576"); } // official project VCF files in tabix format @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "4efdf983918db822e4ac13d911509576"); } // official project VCF files in tabix format
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "848d4408ee953053d2307cefebc6bd6d"); } // official project VCF files in tabix format @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "848d4408ee953053d2307cefebc6bd6d"); } // official project VCF files in tabix format
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "91f6087e6e2bf3df4d1c9700eaff958b"); } @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "4159a0c0d7c15852a3a545e0bea6bbc5"); }
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "a9be239ab5e03e7e97caef58a3841dd2"); } @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "61d0ded244895234ac727391f29f13a8"); }
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "0b1815c699e71e143ed129bfadaffbcb"); } @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "0b1815c699e71e143ed129bfadaffbcb"); }

View File

@ -125,4 +125,14 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
executeTest("test bad ref allele in deletion", spec); executeTest("test bad ref allele in deletion", spec);
} }
@Test
public void testComplexEvents() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("complexEvents.vcf", "ALL"),
0,
Arrays.asList("d41d8cd98f00b204e9800998ecf8427e")
);
executeTest("test validating complex events", spec);
}
} }

View File

@ -53,11 +53,11 @@ public class HaplotypeUnitTest extends BaseTest {
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M)); h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList); final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "AACTTCTGGTCAACTGGTCAACTGGTCAACTGGTCA"; String h1bases = "AACTTCTGGTCAACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases); basicInsertTest("A", "AACTT", 1, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCACTTAACTGGTCAACTGGTCAACTGGTCA"; h1bases = "ACTGGTCACTTAACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases); basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCAACTGGTCAAACTTCTGGTCAACTGGTCA"; h1bases = "ACTGGTCAACTGGTCAAACTTCTGGTCAACTGGTCA";
basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases); basicInsertTest("A", "AACTT", 17, h1Cigar, bases, h1bases);
} }
@Test @Test
@ -68,11 +68,11 @@ public class HaplotypeUnitTest extends BaseTest {
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M)); h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList); final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA"; String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases); basicInsertTest("AACTT", "A", 1, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCGGTCAACTGGTCAACTGGTCA"; h1bases = "ACTGGTCGGTCAACTGGTCAACTGGTCA";
basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases); basicInsertTest("AACTT", "A", 7, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA"; h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA";
basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases); basicInsertTest("AACTT", "A", 17, h1Cigar, bases, h1bases);
} }
@Test @Test
@ -102,11 +102,11 @@ public class HaplotypeUnitTest extends BaseTest {
h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M)); h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList); final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "AACTTTCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; String h1bases = "AACTTTCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases); basicInsertTest("A", "AACTT", 1, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCACTTGATCG" + "AGGGGGA" + "AGGC"; h1bases = "ATCG" + "CCGGCCGGCC" + "ATCACTTGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases); basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGACTTGGGGA" + "AGGC"; h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGACTTGGGGA" + "AGGC";
basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases); basicInsertTest("A", "AACTT", 17, h1Cigar, bases, h1bases);
} }
@Test @Test
@ -121,11 +121,11 @@ public class HaplotypeUnitTest extends BaseTest {
h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M)); h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList); final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "A" + "CGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; String h1bases = "A" + "CGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases); basicInsertTest("AACTT", "A", 1, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCG" + "AGGGGGA" + "AGGC"; h1bases = "ATCG" + "CCGGCCGGCC" + "ATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases); basicInsertTest("AACTT", "A", 7, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGA" + "AGGC"; h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGA" + "AGGC";
basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases); basicInsertTest("AACTT", "A", 17, h1Cigar, bases, h1bases);
} }
@Test @Test

View File

@ -1,226 +0,0 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.codecs.vcf;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.variantcontext.*;
import org.testng.Assert;
import org.testng.SkipException;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
public class VCFAlleleClipperUnitTest extends BaseTest {
// --------------------------------------------------------------------------------
//
// Test allele clipping
//
// --------------------------------------------------------------------------------
private class ClipAllelesTest extends TestDataProvider {
final int position;
final int stop;
final String ref;
List<Allele> inputs;
List<Allele> expected;
@Requires("arg.length % 2 == 0")
private ClipAllelesTest(final int position, final int stop, final String ... arg) {
super(ClipAllelesTest.class);
this.position = position;
this.stop = stop;
this.ref = arg[0];
int n = arg.length / 2;
inputs = new ArrayList<Allele>(n);
expected = new ArrayList<Allele>(n);
for ( int i = 0; i < n; i++ ) {
final boolean ref = i % n == 0;
inputs.add(Allele.create(arg[i], ref));
}
for ( int i = n; i < arg.length; i++ ) {
final boolean ref = i % n == 0;
expected.add(Allele.create(arg[i], ref));
}
}
public boolean isClipped() {
for ( int i = 0; i < inputs.size(); i++ ) {
if ( inputs.get(i).length() != expected.get(i).length() )
return true;
}
return false;
}
public String toString() {
return String.format("ClipAllelesTest input=%s expected=%s", inputs, expected);
}
}
@DataProvider(name = "ClipAllelesTest")
public Object[][] makeClipAllelesTest() {
// do no harm
new ClipAllelesTest(10, 10, "A", "A");
new ClipAllelesTest(10, 10, "A", "C", "A", "C");
new ClipAllelesTest(10, 10, "A", "C", "G", "A", "C", "G");
// insertions
new ClipAllelesTest(10, 10, "A", "AA", "-", "A");
new ClipAllelesTest(10, 10, "A", "AAA", "-", "AA");
new ClipAllelesTest(10, 10, "A", "AG", "-", "G");
// deletions
new ClipAllelesTest(10, 11, "AA", "A", "A", "-");
new ClipAllelesTest(10, 12, "AAA", "A", "AA", "-");
new ClipAllelesTest(10, 11, "AG", "A", "G", "-");
new ClipAllelesTest(10, 12, "AGG", "A", "GG", "-");
// multi-allelic insertion and deletions
new ClipAllelesTest(10, 11, "AA", "A", "AAA", "A", "-", "AA");
new ClipAllelesTest(10, 11, "AA", "A", "AAG", "A", "-", "AG");
new ClipAllelesTest(10, 10, "A", "AA", "AAA", "-", "A", "AA");
new ClipAllelesTest(10, 10, "A", "AA", "ACA", "-", "A", "CA");
new ClipAllelesTest(10, 12, "ACG", "ATC", "AGG", "CG", "TC", "GG");
new ClipAllelesTest(10, 11, "AC", "AT", "AG", "C", "T", "G");
// cannot be clipped
new ClipAllelesTest(10, 11, "AC", "CT", "AG", "AC", "CT", "AG");
new ClipAllelesTest(10, 11, "AC", "CT", "GG", "AC", "CT", "GG");
// symbolic
new ClipAllelesTest(10, 100, "A", "<DEL>", "A", "<DEL>");
new ClipAllelesTest(50, 50, "G", "G]22:60]", "G", "G]22:60]");
new ClipAllelesTest(51, 51, "T", "]22:55]T", "T", "]22:55]T");
new ClipAllelesTest(52, 52, "C", "C[22:51[", "C", "C[22:51[");
new ClipAllelesTest(60, 60, "A", "A]22:50]", "A", "A]22:50]");
// symbolic with alleles that should be clipped
new ClipAllelesTest(10, 100, "A", "<DEL>", "AA", "-", "<DEL>", "A");
new ClipAllelesTest(10, 100, "AA", "<DEL>", "A", "A", "<DEL>", "-");
new ClipAllelesTest(10, 100, "AA", "<DEL>", "A", "AAA", "A", "<DEL>", "-", "AA");
new ClipAllelesTest(10, 100, "AG", "<DEL>", "A", "AGA", "G", "<DEL>", "-", "GA");
new ClipAllelesTest(10, 100, "G", "<DEL>", "A", "G", "<DEL>", "A");
// clipping from both ends
//
// TODO -- THIS CODE IS BROKEN BECAUSE CLIPPING DOES WORK WITH ALLELES CLIPPED FROM THE END
//
// new ClipAllelesTest(10, 10, "ATA", "ATTA", "-", "T");
// new ClipAllelesTest(10, 10, "ATAA", "ATTAA", "-", "T");
// new ClipAllelesTest(10, 10, "ATAAG", "ATTAAG", "-", "T");
// new ClipAllelesTest(10, 11, "GTA", "ATTA", "G", "AT");
// new ClipAllelesTest(10, 11, "GTAA", "ATTAA", "G", "AT");
// new ClipAllelesTest(10, 11, "GTAAG", "ATTAAG", "G", "AT");
// complex substitutions
new ClipAllelesTest(10, 10, "A", "GA", "A", "GA");
return ClipAllelesTest.getTests(ClipAllelesTest.class);
}
@Test(dataProvider = "ClipAllelesTest")
public void testClipAllelesTest(ClipAllelesTest cfg) {
final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(cfg.position, cfg.ref, cfg.inputs, cfg.stop);
Assert.assertNull(clipped.getError(), "Unexpected error occurred");
Assert.assertEquals(clipped.getStop(), cfg.stop, "Clipped alleles stop");
Assert.assertEquals(clipped.getClippedAlleles(), cfg.expected, "Clipped alleles");
}
@Test(dataProvider = "ClipAllelesTest", dependsOnMethods = "testClipAllelesTest")
public void testPaddingAllelesInVC(final ClipAllelesTest cfg) {
final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(cfg.position, cfg.ref, cfg.inputs, cfg.stop);
final VariantContext vc = new VariantContextBuilder("x", "1", cfg.position, cfg.stop, clipped.getClippedAlleles())
.referenceBaseForIndel(clipped.getRefBaseForIndel()).make();
if ( vc.isMixed() && vc.hasSymbolicAlleles() )
throw new SkipException("GATK cannot handle mixed variant contexts with symbolic and concrete alleles. Remove this check when allele clipping and padding is generalized");
Assert.assertEquals(VCFAlleleClipper.needsPadding(vc), cfg.isClipped(), "needPadding method");
if ( cfg.isClipped() ) {
// TODO
// TODO note that the GATK currently uses a broken approach to the clipped alleles, so the expected stop is
// TODO actually the original stop, as the original stop is +1 its true size.
// TODO
final int expectedStop = vc.getEnd(); // + (vc.hasSymbolicAlleles() ? 0 : 1);
final VariantContext padded = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc);
Assert.assertEquals(padded.getStart(), vc.getStart(), "padded VC start");
Assert.assertEquals(padded.getAlleles(), cfg.inputs, "padded VC alleles == original unclipped alleles");
Assert.assertEquals(padded.getEnd(), expectedStop, "padded VC end should be clipped VC + 1 (added a base to ref allele)");
Assert.assertFalse(VCFAlleleClipper.needsPadding(padded), "padded VC shouldn't need padding again");
}
}
// --------------------------------------------------------------------------------
//
// basic allele clipping test
//
// --------------------------------------------------------------------------------
private class ReverseClippingPositionTestProvider extends TestDataProvider {
final String ref;
final List<Allele> alleles = new ArrayList<Allele>();
final int expectedClip;
private ReverseClippingPositionTestProvider(final int expectedClip, final String ref, final String... alleles) {
super(ReverseClippingPositionTestProvider.class);
this.ref = ref;
for ( final String allele : alleles )
this.alleles.add(Allele.create(allele));
this.expectedClip = expectedClip;
}
@Override
public String toString() {
return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip);
}
}
@DataProvider(name = "ReverseClippingPositionTestProvider")
public Object[][] makeReverseClippingPositionTestProvider() {
// pair clipping
new ReverseClippingPositionTestProvider(0, "ATT", "CCG");
new ReverseClippingPositionTestProvider(1, "ATT", "CCT");
new ReverseClippingPositionTestProvider(2, "ATT", "CTT");
new ReverseClippingPositionTestProvider(2, "ATT", "ATT"); // cannot completely clip allele
// triplets
new ReverseClippingPositionTestProvider(0, "ATT", "CTT", "CGG");
new ReverseClippingPositionTestProvider(1, "ATT", "CTT", "CGT"); // the T can go
new ReverseClippingPositionTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go
return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class);
}
@Test(dataProvider = "ReverseClippingPositionTestProvider")
public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) {
int result = VCFAlleleClipper.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
Assert.assertEquals(result, cfg.expectedClip);
}
}

View File

@ -39,6 +39,17 @@ public class VCFIntegrationTest extends WalkerTest {
executeTest("Test reading and writing breakpoint VCF", spec1); executeTest("Test reading and writing breakpoint VCF", spec1);
} }
@Test(enabled = true)
public void testReadingLowerCaseBases() {
String testVCF = privateTestDir + "lowercaseBases.vcf";
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("e0e308a25e56bde1c664139bb44ed19d"));
executeTest("Test reading VCF with lower-case bases", spec1);
}
@Test(enabled = true) @Test(enabled = true)
public void testReadingAndWriting1000GSVs() { public void testReadingAndWriting1000GSVs() {
String testVCF = privateTestDir + "1000G_SVs.chr1.vcf"; String testVCF = privateTestDir + "1000G_SVs.chr1.vcf";
@ -57,7 +68,7 @@ public class VCFIntegrationTest extends WalkerTest {
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s "; String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
String test1 = baseCommand + "-T SelectVariants -V " + testVCF; String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("0f82ac11852e7f958c1a0ce52398c2ae")); WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("38697c195e7abf18d95dcc16c8e6d284"));
executeTest("Test reading and writing samtools vcf", spec1); executeTest("Test reading and writing samtools vcf", spec1);
} }
@ -66,7 +77,7 @@ public class VCFIntegrationTest extends WalkerTest {
String testVCF = privateTestDir + "ex2.vcf"; String testVCF = privateTestDir + "ex2.vcf";
String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s "; String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";
String test1 = baseCommand + "-T SelectVariants -V " + testVCF; String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("9773d6a121cfcb18d090965bc520f120")); WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("a04a0fc22fedb516c663e56e51fc1e27"));
executeTest("Test writing samtools WEx BCF example", spec1); executeTest("Test writing samtools WEx BCF example", spec1);
} }

View File

@ -37,8 +37,6 @@ import org.testng.annotations.Test;
// public Allele(byte[] bases, boolean isRef) { // public Allele(byte[] bases, boolean isRef) {
// public Allele(boolean isRef) { // public Allele(boolean isRef) {
// public Allele(String bases, boolean isRef) { // public Allele(String bases, boolean isRef) {
// public boolean isNullAllele() { return length() == 0; }
// public boolean isNonNullAllele() { return ! isNullAllele(); }
// public boolean isReference() { return isRef; } // public boolean isReference() { return isRef; }
// public boolean isNonReference() { return ! isReference(); } // public boolean isNonReference() { return ! isReference(); }
// public byte[] getBases() { return bases; } // public byte[] getBases() { return bases; }
@ -49,13 +47,10 @@ import org.testng.annotations.Test;
* Basic unit test for RecalData * Basic unit test for RecalData
*/ */
public class AlleleUnitTest { public class AlleleUnitTest {
Allele ARef, del, delRef, A, T, ATIns, ATCIns, NoCall; Allele ARef, A, T, ATIns, ATCIns, NoCall;
@BeforeSuite @BeforeSuite
public void before() { public void before() {
del = Allele.create("-");
delRef = Allele.create("-", true);
A = Allele.create("A"); A = Allele.create("A");
ARef = Allele.create("A", true); ARef = Allele.create("A", true);
T = Allele.create("T"); T = Allele.create("T");
@ -72,8 +67,6 @@ public class AlleleUnitTest {
Assert.assertFalse(A.isReference()); Assert.assertFalse(A.isReference());
Assert.assertTrue(A.basesMatch("A")); Assert.assertTrue(A.basesMatch("A"));
Assert.assertEquals(A.length(), 1); Assert.assertEquals(A.length(), 1);
Assert.assertTrue(A.isNonNull());
Assert.assertFalse(A.isNull());
Assert.assertTrue(ARef.isReference()); Assert.assertTrue(ARef.isReference());
Assert.assertFalse(ARef.isNonReference()); Assert.assertFalse(ARef.isNonReference());
@ -92,8 +85,8 @@ public class AlleleUnitTest {
Assert.assertFalse(NoCall.isReference()); Assert.assertFalse(NoCall.isReference());
Assert.assertFalse(NoCall.basesMatch(".")); Assert.assertFalse(NoCall.basesMatch("."));
Assert.assertEquals(NoCall.length(), 0); Assert.assertEquals(NoCall.length(), 0);
Assert.assertTrue(NoCall.isNonNull()); Assert.assertTrue(NoCall.isNoCall());
Assert.assertFalse(NoCall.isNull()); Assert.assertFalse(NoCall.isCalled());
} }
@ -103,16 +96,6 @@ public class AlleleUnitTest {
Assert.assertEquals(ATCIns.length(), 3); Assert.assertEquals(ATCIns.length(), 3);
Assert.assertEquals(ATIns.getBases(), "AT".getBytes()); Assert.assertEquals(ATIns.getBases(), "AT".getBytes());
Assert.assertEquals(ATCIns.getBases(), "ATC".getBytes()); Assert.assertEquals(ATCIns.getBases(), "ATC".getBytes());
Assert.assertTrue(del.isNonReference());
Assert.assertFalse(delRef.isNonReference());
Assert.assertFalse(del.isReference());
Assert.assertTrue(delRef.isReference());
Assert.assertFalse(del.basesMatch("-"));
Assert.assertTrue(del.basesMatch(""));
Assert.assertEquals(del.length(), 0);
Assert.assertFalse(del.isNonNull());
Assert.assertTrue(del.isNull());
} }
@ -128,18 +111,6 @@ public class AlleleUnitTest {
Assert.assertFalse(a1.equals(a4)); Assert.assertFalse(a1.equals(a4));
} }
@Test
public void testDelConstructors() {
Allele a1 = Allele.create("-");
Allele a2 = Allele.create("-".getBytes());
Allele a3 = Allele.create("");
Allele a4 = Allele.create("", true);
Assert.assertTrue(a1.equals(a2));
Assert.assertTrue(a1.equals(a3));
Assert.assertFalse(a1.equals(a4));
}
@Test @Test
public void testInsConstructors() { public void testInsConstructors() {
Allele a1 = Allele.create("AC"); Allele a1 = Allele.create("AC");
@ -156,7 +127,6 @@ public class AlleleUnitTest {
public void testEquals() { public void testEquals() {
Assert.assertTrue(ARef.basesMatch(A)); Assert.assertTrue(ARef.basesMatch(A));
Assert.assertFalse(ARef.equals(A)); Assert.assertFalse(ARef.equals(A));
Assert.assertFalse(ARef.equals(del));
Assert.assertFalse(ARef.equals(ATIns)); Assert.assertFalse(ARef.equals(ATIns));
Assert.assertFalse(ARef.equals(ATCIns)); Assert.assertFalse(ARef.equals(ATCIns));
@ -164,11 +134,6 @@ public class AlleleUnitTest {
Assert.assertFalse(T.basesMatch(A)); Assert.assertFalse(T.basesMatch(A));
Assert.assertFalse(T.equals(A)); Assert.assertFalse(T.equals(A));
Assert.assertTrue(del.basesMatch(del));
Assert.assertTrue(del.basesMatch(delRef));
Assert.assertTrue(del.equals(del));
Assert.assertFalse(del.equals(delRef));
Assert.assertTrue(ATIns.equals(ATIns)); Assert.assertTrue(ATIns.equals(ATIns));
Assert.assertFalse(ATIns.equals(ATCIns)); Assert.assertFalse(ATIns.equals(ATCIns));
Assert.assertTrue(ATIns.basesMatch("AT")); Assert.assertTrue(ATIns.basesMatch("AT"));
@ -209,7 +174,6 @@ public class AlleleUnitTest {
public void testExtend() { public void testExtend() {
Assert.assertEquals("AT", Allele.extend(Allele.create("A"), "T".getBytes()).toString()); Assert.assertEquals("AT", Allele.extend(Allele.create("A"), "T".getBytes()).toString());
Assert.assertEquals("ATA", Allele.extend(Allele.create("A"), "TA".getBytes()).toString()); Assert.assertEquals("ATA", Allele.extend(Allele.create("A"), "TA".getBytes()).toString());
Assert.assertEquals("A", Allele.extend(Allele.create("-"), "A".getBytes()).toString());
Assert.assertEquals("A", Allele.extend(Allele.NO_CALL, "A".getBytes()).toString()); Assert.assertEquals("A", Allele.extend(Allele.NO_CALL, "A".getBytes()).toString());
Assert.assertEquals("ATCGA", Allele.extend(Allele.create("AT"), "CGA".getBytes()).toString()); Assert.assertEquals("ATCGA", Allele.extend(Allele.create("AT"), "CGA".getBytes()).toString());
Assert.assertEquals("ATCGA", Allele.extend(Allele.create("ATC"), "GA".getBytes()).toString()); Assert.assertEquals("ATCGA", Allele.extend(Allele.create("ATC"), "GA".getBytes()).toString());

View File

@ -225,10 +225,10 @@ public class VariantContextTestProvider {
add(builder()); add(builder());
add(builder().alleles("A")); add(builder().alleles("A"));
add(builder().alleles("A", "C", "T")); add(builder().alleles("A", "C", "T"));
add(builder().alleles("-", "C").referenceBaseForIndel("A")); add(builder().alleles("A", "AC"));
add(builder().alleles("-", "CAGT").referenceBaseForIndel("A")); add(builder().alleles("A", "ACAGT"));
add(builder().loc("1", 10, 11).alleles("C", "-").referenceBaseForIndel("A")); add(builder().loc("1", 10, 11).alleles("AC", "A"));
add(builder().loc("1", 10, 13).alleles("CGT", "-").referenceBaseForIndel("A")); add(builder().loc("1", 10, 13).alleles("ACGT", "A"));
// make sure filters work // make sure filters work
add(builder().unfiltered()); add(builder().unfiltered());
@ -302,8 +302,8 @@ public class VariantContextTestProvider {
sites.add(builder().alleles("A").make()); sites.add(builder().alleles("A").make());
sites.add(builder().alleles("A", "C", "T").make()); sites.add(builder().alleles("A", "C", "T").make());
sites.add(builder().alleles("-", "C").referenceBaseForIndel("A").make()); sites.add(builder().alleles("A", "AC").make());
sites.add(builder().alleles("-", "CAGT").referenceBaseForIndel("A").make()); sites.add(builder().alleles("A", "ACAGT").make());
for ( VariantContext site : sites ) { for ( VariantContext site : sites ) {
addGenotypes(site); addGenotypes(site);

View File

@ -28,27 +28,22 @@ public class VariantContextUnitTest extends BaseTest {
int snpLocStart = 10; int snpLocStart = 10;
int snpLocStop = 10; int snpLocStop = 10;
// - / ATC [ref] from 20-23 // - / ATC [ref] from 20-22
String delLoc = "chr1"; String delLoc = "chr1";
int delLocStart = 20; int delLocStart = 20;
int delLocStop = 23; int delLocStop = 22;
// - [ref] / ATC from 20-20 // - [ref] / ATC from 20-20
String insLoc = "chr1"; String insLoc = "chr1";
int insLocStart = 20; int insLocStart = 20;
int insLocStop = 20; int insLocStop = 20;
// - / A / T / ATC [ref] from 20-23
String mixedLoc = "chr1";
int mixedLocStart = 20;
int mixedLocStop = 23;
VariantContextBuilder basicBuilder, snpBuilder, insBuilder; VariantContextBuilder basicBuilder, snpBuilder, insBuilder;
@BeforeSuite @BeforeSuite
public void before() { public void before() {
del = Allele.create("-"); del = Allele.create("A");
delRef = Allele.create("-", true); delRef = Allele.create("A", true);
A = Allele.create("A"); A = Allele.create("A");
C = Allele.create("C"); C = Allele.create("C");
@ -62,9 +57,9 @@ public class VariantContextUnitTest extends BaseTest {
@BeforeMethod @BeforeMethod
public void beforeTest() { public void beforeTest() {
basicBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)).referenceBaseForIndel((byte)'A'); basicBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T));
snpBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)).referenceBaseForIndel((byte)'A'); snpBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T));
insBuilder = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATC)).referenceBaseForIndel((byte)'A'); insBuilder = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATC));
} }
@Test @Test
@ -213,7 +208,7 @@ public class VariantContextUnitTest extends BaseTest {
@Test @Test
public void testCreatingDeletionVariantContext() { public void testCreatingDeletionVariantContext() {
List<Allele> alleles = Arrays.asList(ATCref, del); List<Allele> alleles = Arrays.asList(ATCref, del);
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).referenceBaseForIndel((byte)'A').make(); VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make();
Assert.assertEquals(vc.getChr(), delLoc); Assert.assertEquals(vc.getChr(), delLoc);
Assert.assertEquals(vc.getStart(), delLocStart); Assert.assertEquals(vc.getStart(), delLocStart);
@ -240,8 +235,8 @@ public class VariantContextUnitTest extends BaseTest {
@Test @Test
public void testMatchingAlleles() { public void testMatchingAlleles() {
List<Allele> alleles = Arrays.asList(ATCref, del); List<Allele> alleles = Arrays.asList(ATCref, del);
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).referenceBaseForIndel((byte)'A').make(); VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make();
VariantContext vc2 = new VariantContextBuilder("test2", delLoc, delLocStart+12, delLocStop+12, alleles).referenceBaseForIndel((byte)'A').make(); VariantContext vc2 = new VariantContextBuilder("test2", delLoc, delLocStart+12, delLocStop+12, alleles).make();
Assert.assertTrue(vc.hasSameAllelesAs(vc2)); Assert.assertTrue(vc.hasSameAllelesAs(vc2));
Assert.assertTrue(vc.hasSameAlternateAllelesAs(vc2)); Assert.assertTrue(vc.hasSameAlternateAllelesAs(vc2));
@ -386,13 +381,13 @@ public class VariantContextUnitTest extends BaseTest {
@Test @Test
public void testAccessingCompleteGenotypes() { public void testAccessingCompleteGenotypes() {
List<Allele> alleles = Arrays.asList(Aref, T, del); List<Allele> alleles = Arrays.asList(Aref, T, ATC);
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref)); Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T)); Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T)); Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
Genotype g4 = GenotypeBuilder.create("Td", Arrays.asList(T, del)); Genotype g4 = GenotypeBuilder.create("Td", Arrays.asList(T, ATC));
Genotype g5 = GenotypeBuilder.create("dd", Arrays.asList(del, del)); Genotype g5 = GenotypeBuilder.create("dd", Arrays.asList(ATC, ATC));
Genotype g6 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)); Genotype g6 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles) VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles)
@ -408,7 +403,7 @@ public class VariantContextUnitTest extends BaseTest {
Assert.assertEquals(10, vc.getCalledChrCount()); Assert.assertEquals(10, vc.getCalledChrCount());
Assert.assertEquals(3, vc.getCalledChrCount(Aref)); Assert.assertEquals(3, vc.getCalledChrCount(Aref));
Assert.assertEquals(4, vc.getCalledChrCount(T)); Assert.assertEquals(4, vc.getCalledChrCount(T));
Assert.assertEquals(3, vc.getCalledChrCount(del)); Assert.assertEquals(3, vc.getCalledChrCount(ATC));
Assert.assertEquals(2, vc.getCalledChrCount(Allele.NO_CALL)); Assert.assertEquals(2, vc.getCalledChrCount(Allele.NO_CALL));
} }
@ -416,7 +411,7 @@ public class VariantContextUnitTest extends BaseTest {
public void testAccessingRefGenotypes() { public void testAccessingRefGenotypes() {
List<Allele> alleles1 = Arrays.asList(Aref, T); List<Allele> alleles1 = Arrays.asList(Aref, T);
List<Allele> alleles2 = Arrays.asList(Aref); List<Allele> alleles2 = Arrays.asList(Aref);
List<Allele> alleles3 = Arrays.asList(Aref, T, del); List<Allele> alleles3 = Arrays.asList(Aref, T);
for ( List<Allele> alleles : Arrays.asList(alleles1, alleles2, alleles3)) { for ( List<Allele> alleles : Arrays.asList(alleles1, alleles2, alleles3)) {
Genotype g1 = GenotypeBuilder.create("AA1", Arrays.asList(Aref, Aref)); Genotype g1 = GenotypeBuilder.create("AA1", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AA2", Arrays.asList(Aref, Aref)); Genotype g2 = GenotypeBuilder.create("AA2", Arrays.asList(Aref, Aref));
@ -438,7 +433,7 @@ public class VariantContextUnitTest extends BaseTest {
@Test @Test
public void testFilters() { public void testFilters() {
List<Allele> alleles = Arrays.asList(Aref, T, del); List<Allele> alleles = Arrays.asList(Aref, T);
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref)); Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T)); Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
@ -470,15 +465,15 @@ public class VariantContextUnitTest extends BaseTest {
@Test @Test
public void testRepeatAllele() { public void testRepeatAllele() {
Allele nullR = Allele.create(Allele.NULL_ALLELE_STRING, true); Allele nullR = Allele.create("A", true);
Allele nullA = Allele.create(Allele.NULL_ALLELE_STRING, false); Allele nullA = Allele.create("A", false);
Allele atc = Allele.create("ATC", false); Allele atc = Allele.create("AATC", false);
Allele atcatc = Allele.create("ATCATC", false); Allele atcatc = Allele.create("AATCATC", false);
Allele ccccR = Allele.create("CCCC", true); Allele ccccR = Allele.create("ACCCC", true);
Allele cc = Allele.create("CC", false); Allele cc = Allele.create("ACC", false);
Allele cccccc = Allele.create("CCCCCC", false); Allele cccccc = Allele.create("ACCCCCC", false);
Allele gagaR = Allele.create("GAGA", true); Allele gagaR = Allele.create("AGAGA", true);
Allele gagagaga = Allele.create("GAGAGAGA", false); Allele gagagaga = Allele.create("AGAGAGAGA", false);
Pair<List<Integer>,byte[]> result; Pair<List<Integer>,byte[]> result;
byte[] refBytes = "TATCATCATCGGA".getBytes(); byte[] refBytes = "TATCATCATCGGA".getBytes();
@ -497,15 +492,15 @@ public class VariantContextUnitTest extends BaseTest {
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("AATAATA".getBytes()),7); Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("AATAATA".getBytes()),7);
// -*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4 // A*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4
VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR,atc)).make(); VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR,atc)).make();
result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes); result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes);
Assert.assertEquals(result.getFirst().toArray()[0],3); Assert.assertEquals(result.getFirst().toArray()[0],3);
Assert.assertEquals(result.getFirst().toArray()[1],4); Assert.assertEquals(result.getFirst().toArray()[1],4);
Assert.assertEquals(result.getSecond().length,3); Assert.assertEquals(result.getSecond().length,3);
// ATC*,-,ATCATC // ATC*,A,ATCATC
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(ATCref,nullA,atcatc)).make(); vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+3, Arrays.asList(Allele.create("AATC", true),nullA,atcatc)).make();
result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes); result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes);
Assert.assertEquals(result.getFirst().toArray()[0],3); Assert.assertEquals(result.getFirst().toArray()[0],3);
Assert.assertEquals(result.getFirst().toArray()[1],2); Assert.assertEquals(result.getFirst().toArray()[1],2);
@ -522,7 +517,7 @@ public class VariantContextUnitTest extends BaseTest {
// CCCC*,CC,-,CCCCCC, context = CCC: (C)7 -> (C)5,(C)3,(C)9 // CCCC*,CC,-,CCCCCC, context = CCC: (C)7 -> (C)5,(C)3,(C)9
refBytes = "TCCCCCCCAGAGAGAG".getBytes(); refBytes = "TCCCCCCCAGAGAGAG".getBytes();
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(ccccR,cc, nullA,cccccc)).make(); vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(ccccR,cc, nullA,cccccc)).make();
result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes); result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes);
Assert.assertEquals(result.getFirst().toArray()[0],7); Assert.assertEquals(result.getFirst().toArray()[0],7);
Assert.assertEquals(result.getFirst().toArray()[1],5); Assert.assertEquals(result.getFirst().toArray()[1],5);
@ -532,7 +527,7 @@ public class VariantContextUnitTest extends BaseTest {
// GAGA*,-,GAGAGAGA // GAGA*,-,GAGAGAGA
refBytes = "TGAGAGAGAGATTT".getBytes(); refBytes = "TGAGAGAGAGATTT".getBytes();
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(gagaR, nullA,gagagaga)).make(); vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(gagaR, nullA,gagagaga)).make();
result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes); result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes);
Assert.assertEquals(result.getFirst().toArray()[0],5); Assert.assertEquals(result.getFirst().toArray()[0],5);
Assert.assertEquals(result.getFirst().toArray()[1],3); Assert.assertEquals(result.getFirst().toArray()[1],3);
@ -564,27 +559,24 @@ public class VariantContextUnitTest extends BaseTest {
@Test @Test
public void testVCFfromGenotypes() { public void testVCFfromGenotypes() {
List<Allele> alleles = Arrays.asList(Aref, T, del); List<Allele> alleles = Arrays.asList(Aref, T);
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref)); Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T)); Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T)); Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
Genotype g4 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)); Genotype g4 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
Genotype g5 = GenotypeBuilder.create("--", Arrays.asList(del, del)); VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4).make();
VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
VariantContext vc12 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g2.getSampleName())), true); VariantContext vc12 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g2.getSampleName())), true);
VariantContext vc1 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName())), true); VariantContext vc1 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName())), true);
VariantContext vc23 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g2.getSampleName(), g3.getSampleName())), true); VariantContext vc23 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g2.getSampleName(), g3.getSampleName())), true);
VariantContext vc4 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g4.getSampleName())), true); VariantContext vc4 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g4.getSampleName())), true);
VariantContext vc14 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g4.getSampleName())), true); VariantContext vc14 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g4.getSampleName())), true);
VariantContext vc5 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g5.getSampleName())), true);
Assert.assertTrue(vc12.isPolymorphicInSamples()); Assert.assertTrue(vc12.isPolymorphicInSamples());
Assert.assertTrue(vc23.isPolymorphicInSamples()); Assert.assertTrue(vc23.isPolymorphicInSamples());
Assert.assertTrue(vc1.isMonomorphicInSamples()); Assert.assertTrue(vc1.isMonomorphicInSamples());
Assert.assertTrue(vc4.isMonomorphicInSamples()); Assert.assertTrue(vc4.isMonomorphicInSamples());
Assert.assertTrue(vc14.isMonomorphicInSamples()); Assert.assertTrue(vc14.isMonomorphicInSamples());
Assert.assertTrue(vc5.isPolymorphicInSamples());
Assert.assertTrue(vc12.isSNP()); Assert.assertTrue(vc12.isSNP());
Assert.assertTrue(vc12.isVariant()); Assert.assertTrue(vc12.isVariant());
@ -606,17 +598,11 @@ public class VariantContextUnitTest extends BaseTest {
Assert.assertFalse(vc14.isVariant()); Assert.assertFalse(vc14.isVariant());
Assert.assertFalse(vc14.isBiallelic()); Assert.assertFalse(vc14.isBiallelic());
Assert.assertTrue(vc5.isIndel());
Assert.assertTrue(vc5.isSimpleDeletion());
Assert.assertTrue(vc5.isVariant());
Assert.assertTrue(vc5.isBiallelic());
Assert.assertEquals(3, vc12.getCalledChrCount(Aref)); Assert.assertEquals(3, vc12.getCalledChrCount(Aref));
Assert.assertEquals(1, vc23.getCalledChrCount(Aref)); Assert.assertEquals(1, vc23.getCalledChrCount(Aref));
Assert.assertEquals(2, vc1.getCalledChrCount(Aref)); Assert.assertEquals(2, vc1.getCalledChrCount(Aref));
Assert.assertEquals(0, vc4.getCalledChrCount(Aref)); Assert.assertEquals(0, vc4.getCalledChrCount(Aref));
Assert.assertEquals(2, vc14.getCalledChrCount(Aref)); Assert.assertEquals(2, vc14.getCalledChrCount(Aref));
Assert.assertEquals(0, vc5.getCalledChrCount(Aref));
} }
public void testGetGenotypeMethods() { public void testGetGenotypeMethods() {
@ -664,13 +650,12 @@ public class VariantContextUnitTest extends BaseTest {
@DataProvider(name = "getAlleles") @DataProvider(name = "getAlleles")
public Object[][] mergeAllelesData() { public Object[][] mergeAllelesData() {
new GetAllelesTest("A*", Aref); new GetAllelesTest("A*", Aref);
new GetAllelesTest("-*", delRef);
new GetAllelesTest("A*/C", Aref, C); new GetAllelesTest("A*/C", Aref, C);
new GetAllelesTest("A*/C/T", Aref, C, T); new GetAllelesTest("A*/C/T", Aref, C, T);
new GetAllelesTest("A*/T/C", Aref, T, C); new GetAllelesTest("A*/T/C", Aref, T, C);
new GetAllelesTest("A*/C/T/-", Aref, C, T, del); new GetAllelesTest("A*/C/T/ATC", Aref, C, T, ATC);
new GetAllelesTest("A*/T/C/-", Aref, T, C, del); new GetAllelesTest("A*/T/C/ATC", Aref, T, C, ATC);
new GetAllelesTest("A*/-/T/C", Aref, del, T, C); new GetAllelesTest("A*/ATC/T/C", Aref, ATC, T, C);
return GetAllelesTest.getTests(GetAllelesTest.class); return GetAllelesTest.getTests(GetAllelesTest.class);
} }
@ -678,7 +663,7 @@ public class VariantContextUnitTest extends BaseTest {
@Test(dataProvider = "getAlleles") @Test(dataProvider = "getAlleles")
public void testMergeAlleles(GetAllelesTest cfg) { public void testMergeAlleles(GetAllelesTest cfg) {
final List<Allele> altAlleles = cfg.alleles.subList(1, cfg.alleles.size()); final List<Allele> altAlleles = cfg.alleles.subList(1, cfg.alleles.size());
final VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles).referenceBaseForIndel((byte)'A').make(); final VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles).make();
Assert.assertEquals(vc.getAlleles(), cfg.alleles, "VC alleles not the same as input alleles"); Assert.assertEquals(vc.getAlleles(), cfg.alleles, "VC alleles not the same as input alleles");
Assert.assertEquals(vc.getNAlleles(), cfg.alleles.size(), "VC getNAlleles not the same as input alleles size"); Assert.assertEquals(vc.getNAlleles(), cfg.alleles.size(), "VC getNAlleles not the same as input alleles size");
@ -845,7 +830,6 @@ public class VariantContextUnitTest extends BaseTest {
Assert.assertEquals(sub.getLog10PError(), vc.getLog10PError()); Assert.assertEquals(sub.getLog10PError(), vc.getLog10PError());
Assert.assertEquals(sub.getFilters(), vc.getFilters()); Assert.assertEquals(sub.getFilters(), vc.getFilters());
Assert.assertEquals(sub.getID(), vc.getID()); Assert.assertEquals(sub.getID(), vc.getID());
Assert.assertEquals(sub.getReferenceBaseForIndel(), vc.getReferenceBaseForIndel());
Assert.assertEquals(sub.getAttributes(), vc.getAttributes()); Assert.assertEquals(sub.getAttributes(), vc.getAttributes());
Set<Genotype> expectedGenotypes = new HashSet<Genotype>(); Set<Genotype> expectedGenotypes = new HashSet<Genotype>();

View File

@ -39,7 +39,7 @@ import java.io.FileNotFoundException;
import java.util.*; import java.util.*;
public class VariantContextUtilsUnitTest extends BaseTest { public class VariantContextUtilsUnitTest extends BaseTest {
Allele Aref, T, C, delRef, Cref, ATC, ATCATC; Allele Aref, T, C, Cref, ATC, ATCATC;
private GenomeLocParser genomeLocParser; private GenomeLocParser genomeLocParser;
@BeforeSuite @BeforeSuite
@ -56,7 +56,6 @@ public class VariantContextUtilsUnitTest extends BaseTest {
// alleles // alleles
Aref = Allele.create("A", true); Aref = Allele.create("A", true);
Cref = Allele.create("C", true); Cref = Allele.create("C", true);
delRef = Allele.create("-", true);
T = Allele.create("T"); T = Allele.create("T");
C = Allele.create("C"); C = Allele.create("C");
ATC = Allele.create("ATC"); ATC = Allele.create("ATC");
@ -99,7 +98,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
private VariantContext makeVC(String source, List<Allele> alleles, Collection<Genotype> genotypes, Set<String> filters) { private VariantContext makeVC(String source, List<Allele> alleles, Collection<Genotype> genotypes, Set<String> filters) {
int start = 10; int start = 10;
int stop = start; // alleles.contains(ATC) ? start + 3 : start; int stop = start; // alleles.contains(ATC) ? start + 3 : start;
return new VariantContextBuilder(source, "1", start, stop, alleles).genotypes(genotypes).filters(filters).referenceBaseForIndel(Cref.getBases()[0]).make(); return new VariantContextBuilder(source, "1", start, stop, alleles).genotypes(genotypes).filters(filters).make();
} }
// -------------------------------------------------------------------------------- // --------------------------------------------------------------------------------
@ -156,28 +155,23 @@ public class VariantContextUtilsUnitTest extends BaseTest {
Arrays.asList(Aref, C), Arrays.asList(Aref, C),
Arrays.asList(Aref, T, C)); // in order of appearence Arrays.asList(Aref, T, C)); // in order of appearence
// The following is actually a pathological case - there's no way on a vcf to represent a null allele that's non-variant. new MergeAllelesTest(Arrays.asList(Aref),
// The code converts this (correctly) to a single-base non-variant vc with whatever base was there as a reference. Arrays.asList(Aref, ATC),
new MergeAllelesTest(Arrays.asList(delRef), Arrays.asList(Aref, ATC));
Arrays.asList(Cref));
new MergeAllelesTest(Arrays.asList(delRef), new MergeAllelesTest(Arrays.asList(Aref),
Arrays.asList(delRef, ATC), Arrays.asList(Aref, ATC, ATCATC),
Arrays.asList(delRef, ATC)); Arrays.asList(Aref, ATC, ATCATC));
new MergeAllelesTest(Arrays.asList(delRef),
Arrays.asList(delRef, ATC, ATCATC),
Arrays.asList(delRef, ATC, ATCATC));
// alleles in the order we see them // alleles in the order we see them
new MergeAllelesTest(Arrays.asList(delRef, ATCATC), new MergeAllelesTest(Arrays.asList(Aref, ATCATC),
Arrays.asList(delRef, ATC, ATCATC), Arrays.asList(Aref, ATC, ATCATC),
Arrays.asList(delRef, ATCATC, ATC)); Arrays.asList(Aref, ATCATC, ATC));
// same // same
new MergeAllelesTest(Arrays.asList(delRef, ATC), new MergeAllelesTest(Arrays.asList(Aref, ATC),
Arrays.asList(delRef, ATCATC), Arrays.asList(Aref, ATCATC),
Arrays.asList(delRef, ATC, ATCATC)); Arrays.asList(Aref, ATC, ATCATC));
return MergeAllelesTest.getTests(MergeAllelesTest.class); return MergeAllelesTest.getTests(MergeAllelesTest.class);
} }
@ -661,4 +655,52 @@ public class VariantContextUtilsUnitTest extends BaseTest {
// test alleles are equal // test alleles are equal
Assert.assertEquals(VariantContextUtils.isTandemRepeat(cfg.vc, cfg.ref.getBytes()), cfg.isTrueRepeat); Assert.assertEquals(VariantContextUtils.isTandemRepeat(cfg.vc, cfg.ref.getBytes()), cfg.isTrueRepeat);
} }
// --------------------------------------------------------------------------------
//
// basic allele clipping test
//
// --------------------------------------------------------------------------------
private class ReverseClippingPositionTestProvider extends TestDataProvider {
final String ref;
final List<Allele> alleles = new ArrayList<Allele>();
final int expectedClip;
private ReverseClippingPositionTestProvider(final int expectedClip, final String ref, final String... alleles) {
super(ReverseClippingPositionTestProvider.class);
this.ref = ref;
for ( final String allele : alleles )
this.alleles.add(Allele.create(allele));
this.expectedClip = expectedClip;
}
@Override
public String toString() {
return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip);
}
}
@DataProvider(name = "ReverseClippingPositionTestProvider")
public Object[][] makeReverseClippingPositionTestProvider() {
// pair clipping
new ReverseClippingPositionTestProvider(0, "ATT", "CCG");
new ReverseClippingPositionTestProvider(1, "ATT", "CCT");
new ReverseClippingPositionTestProvider(2, "ATT", "CTT");
new ReverseClippingPositionTestProvider(2, "ATT", "ATT"); // cannot completely clip allele
// triplets
new ReverseClippingPositionTestProvider(0, "ATT", "CTT", "CGG");
new ReverseClippingPositionTestProvider(1, "ATT", "CTT", "CGT"); // the T can go
new ReverseClippingPositionTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go
return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class);
}
@Test(dataProvider = "ReverseClippingPositionTestProvider")
public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) {
int result = VariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
Assert.assertEquals(result, cfg.expectedClip);
}
} }

View File

@ -56,7 +56,7 @@ public class VariantJEXLContextUnitTest extends BaseTest {
Allele A, Aref, T, Tref; Allele A, Aref, T, Tref;
Allele del, delRef, ATC, ATCref; Allele ATC, ATCref;
// A [ref] / T at 10 // A [ref] / T at 10
GenomeLoc snpLoc; GenomeLoc snpLoc;
@ -84,9 +84,6 @@ public class VariantJEXLContextUnitTest extends BaseTest {
@BeforeMethod @BeforeMethod
public void before() { public void before() {
del = Allele.create("-");
delRef = Allele.create("-", true);
A = Allele.create("A"); A = Allele.create("A");
Aref = Allele.create("A", true); Aref = Allele.create("A", true);
T = Allele.create("T"); T = Allele.create("T");

View File

@ -139,8 +139,8 @@ public class VCFWriterUnitTest extends BaseTest {
Map<String, Object> attributes = new HashMap<String,Object>(); Map<String, Object> attributes = new HashMap<String,Object>();
GenotypesContext genotypes = GenotypesContext.create(header.getGenotypeSamples().size()); GenotypesContext genotypes = GenotypesContext.create(header.getGenotypeSamples().size());
alleles.add(Allele.create("-",true)); alleles.add(Allele.create("A",true));
alleles.add(Allele.create("CC",false)); alleles.add(Allele.create("ACC",false));
attributes.put("DP","50"); attributes.put("DP","50");
for (String name : header.getGenotypeSamples()) { for (String name : header.getGenotypeSamples()) {
@ -148,7 +148,7 @@ public class VCFWriterUnitTest extends BaseTest {
genotypes.add(gt); genotypes.add(gt);
} }
return new VariantContextBuilder("RANDOM", loc.getContig(), loc.getStart(), loc.getStop(), alleles) return new VariantContextBuilder("RANDOM", loc.getContig(), loc.getStart(), loc.getStop(), alleles)
.genotypes(genotypes).attributes(attributes).referenceBaseForIndel((byte)'A').make(); .genotypes(genotypes).attributes(attributes).make();
} }