Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ryan Poplin 2012-07-30 12:13:24 -04:00
commit 13591b169f
65 changed files with 740 additions and 1517 deletions

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
@ -11,6 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.util.HashMap;
import java.util.Map;
@ -39,6 +41,7 @@ import java.util.Map;
* @since 10/30/11
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
public class CompareBAM extends LocusWalker<Map<CompareBAM.TestName, Boolean>, CompareBAM.TestResults> {
@Argument(required = true, shortName = "rr", fullName = "reduced_readgroup", doc = "The read group ID corresponding to the compressed BAM being tested") public String reducedReadGroupID;

View File

@ -90,7 +90,6 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
return new VariantContextBuilder("pc",referenceSampleVC.getChr(), referenceSampleVC.getStart(), referenceSampleVC.getEnd(),
referenceSampleVC.getAlleles())
.referenceBaseForIndel(referenceSampleVC.getReferenceBaseForIndel())
.genotypes(new GenotypeBuilder(UAC.referenceSampleName, referenceAlleles).GQ(referenceGenotype.getGQ()).make())
.make();
}

View File

@ -42,7 +42,6 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
private static final int MAX_NUM_ALLELES_TO_GENOTYPE = 4;
private PairHMMIndelErrorModel pairModel;
private boolean allelesArePadded = false;
/*
private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
@ -88,12 +87,10 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
final List<Allele> allAllelesToUse){
final Pair<List<Allele>,Boolean> pair = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true);
List<Allele> alleles = pair.first;
List<Allele> alleles = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true);
if (alleles.size() > MAX_NUM_ALLELES_TO_GENOTYPE)
alleles = alleles.subList(0,MAX_NUM_ALLELES_TO_GENOTYPE);
allelesArePadded = pair.second;
if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) {
IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap().clear();
haplotypeMap.clear();
@ -121,6 +118,6 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
protected int getEndLocation(final RefMetaDataTracker tracker,
final ReferenceContext ref,
final List<Allele> allelesToUse) {
return IndelGenotypeLikelihoodsCalculationModel.computeEndLocation(allelesToUse, ref.getLocus(), allelesArePadded);
return ref.getLocus().getStart() + allelesToUse.get(0).length() - 1;
}
}

View File

@ -33,7 +33,6 @@ import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.codecs.vcf.VCFAlleleClipper;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.*;
@ -184,8 +183,13 @@ public class GenotypingEngine {
}
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine, final ArrayList<Haplotype> haplotypes, final byte[] ref, final GenomeLoc refLoc,
final GenomeLoc activeRegionWindow, final GenomeLocParser genomeLocParser, final ArrayList<VariantContext> activeAllelesToGenotype ) {
public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine,
final ArrayList<Haplotype> haplotypes,
final byte[] ref,
final GenomeLoc refLoc,
final GenomeLoc activeRegionWindow,
final GenomeLocParser genomeLocParser,
final ArrayList<VariantContext> activeAllelesToGenotype ) {
final ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> returnCalls = new ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>>();
@ -423,24 +427,21 @@ public class GenotypingEngine {
protected static VariantContext createMergedVariantContext( final VariantContext thisVC, final VariantContext nextVC, final byte[] ref, final GenomeLoc refLoc ) {
final int thisStart = thisVC.getStart();
final int nextStart = nextVC.getStart();
byte[] refBases = ( thisVC.hasReferenceBaseForIndel() ? new byte[]{ thisVC.getReferenceBaseForIndel() } : new byte[]{} );
byte[] altBases = ( thisVC.hasReferenceBaseForIndel() ? new byte[]{ thisVC.getReferenceBaseForIndel() } : new byte[]{} );
byte[] refBases = new byte[]{};
byte[] altBases = new byte[]{};
refBases = ArrayUtils.addAll(refBases, thisVC.getReference().getBases());
altBases = ArrayUtils.addAll(altBases, thisVC.getAlternateAllele(0).getBases());
for( int locus = thisStart + refBases.length; locus < nextStart; locus++ ) {
int locus;
for( locus = thisStart + refBases.length; locus < nextStart; locus++ ) {
final byte refByte = ref[locus - refLoc.getStart()];
refBases = ArrayUtils.add(refBases, refByte);
altBases = ArrayUtils.add(altBases, refByte);
}
if( nextVC.hasReferenceBaseForIndel() ) {
refBases = ArrayUtils.add(refBases, nextVC.getReferenceBaseForIndel());
altBases = ArrayUtils.add(altBases, nextVC.getReferenceBaseForIndel());
}
refBases = ArrayUtils.addAll(refBases, nextVC.getReference().getBases());
refBases = ArrayUtils.addAll(refBases, ArrayUtils.subarray(nextVC.getReference().getBases(), locus > nextStart ? 1 : 0, nextVC.getReference().getBases().length)); // special case of deletion including the padding base of consecutive indel
altBases = ArrayUtils.addAll(altBases, nextVC.getAlternateAllele(0).getBases());
int iii = 0;
if( refBases.length == altBases.length && VCFAlleleClipper.needsPadding(thisVC) ) { // special case of insertion + deletion of same length creates an MNP --> trim padding bases off the allele
if( refBases.length == altBases.length ) { // special case of insertion + deletion of same length creates an MNP --> trim padding bases off the allele
while( iii < refBases.length && refBases[iii] == altBases[iii] ) { iii++; }
}
final ArrayList<Allele> mergedAlleles = new ArrayList<Allele>();
@ -533,10 +534,10 @@ public class GenotypingEngine {
final int elementLength = ce.getLength();
switch( ce.getOperator() ) {
case I:
final byte[] insertionBases = Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength );
final byte[] insertionBases = Arrays.copyOfRange( alignment, alignmentPos - 1, alignmentPos + elementLength ); // add padding base
boolean allN = true;
for( final byte b : insertionBases ) {
if( b != (byte) 'N' ) {
for( int i = 1; i < insertionBases.length; i++ ) { // check all bases except for the padding base
if( insertionBases[i] != (byte) 'N' ) {
allN = false;
break;
}
@ -544,14 +545,13 @@ public class GenotypingEngine {
if( !allN ) {
final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
final int insertionStart = refLoc.getStart() + refPos - 1;
insertionAlleles.add( Allele.create(ref[refPos-1], true) );
if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
insertionAlleles.add( Allele.create(ref[refPos-1], true) );
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
} else {
insertionAlleles.add( Allele.create(Allele.NULL_ALLELE_STRING, true) );
insertionAlleles.add( Allele.create(insertionBases, false) );
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).referenceBaseForIndel(ref[refPos-1]).make());
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
}
}
@ -561,7 +561,7 @@ public class GenotypingEngine {
alignmentPos += elementLength;
break;
case D:
final byte[] deletionBases = Arrays.copyOfRange( ref, refPos, refPos + elementLength );
final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base
final ArrayList<Allele> deletionAlleles = new ArrayList<Allele>();
final int deletionStart = refLoc.getStart() + refPos - 1;
// BUGBUG: how often does this symbolic deletion allele case happen?
@ -572,8 +572,8 @@ public class GenotypingEngine {
// vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart, deletionAlleles).make());
//} else {
deletionAlleles.add( Allele.create(deletionBases, true) );
deletionAlleles.add( Allele.create(Allele.NULL_ALLELE_STRING, false) );
vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).referenceBaseForIndel(ref[refPos-1]).make());
deletionAlleles.add( Allele.create(ref[refPos-1], false) );
vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make());
//}
refPos += elementLength;
break;

View File

@ -29,6 +29,7 @@ import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -42,6 +43,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
@ -80,6 +82,7 @@ import java.util.*;
* </pre>
*
*/
@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} )
@Reference(window=@Window(start=-HaplotypeResolver.ACTIVE_WINDOW,stop= HaplotypeResolver.ACTIVE_WINDOW))
public class HaplotypeResolver extends RodWalker<Integer, Integer> {

View File

@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMUtils;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
@ -290,20 +289,22 @@ public class PoolGenotypeLikelihoodsUnitTest {
}
@Test
// TODO -- Guillermo, this test cannot work because the ArtificialReadPileupTestProvider returns a position of chr1:5, which is less than
// TODO -- HAPLOTYPE_SIZE in IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles() so the HaplotypeMap is not populated.
@Test (enabled = false)
public void testIndelErrorModel() {
final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
final byte refByte = refPileupTestProvider.getRefByte();
final String altBases = "TCA";
final String altBases = (char)refByte + "TCA";
final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
final List<Allele> trueAlleles = new ArrayList<Allele>();
trueAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, true));
trueAlleles.add(Allele.create("TC", false));
trueAlleles.add(Allele.create(refByte, true));
trueAlleles.add(Allele.create((char)refByte + "TC", false));
final String fw = new String(refPileupTestProvider.getReferenceContext().getForwardBases());
final VariantContext refInsertionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(),
refPileupTestProvider.getReferenceContext().getLocus().getStart(), trueAlleles).
genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).referenceBaseForIndel(refByte).make();
genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).make();
final int[] matchArray = {95, 995, 9995, 10000};
@ -333,12 +334,12 @@ public class PoolGenotypeLikelihoodsUnitTest {
// create deletion VC
final int delLength = 4;
final List<Allele> delAlleles = new ArrayList<Allele>();
delAlleles.add(Allele.create(fw.substring(1,delLength+1), true));
delAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, false));
delAlleles.add(Allele.create(fw.substring(0,delLength+1), true));
delAlleles.add(Allele.create(refByte, false));
final VariantContext refDeletionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(),
refPileupTestProvider.getReferenceContext().getLocus().getStart()+delLength, delAlleles).
genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).referenceBaseForIndel(refByte).make();
genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).make();
for (int matches: matchArray) {
for (int mismatches: mismatchArray) {
@ -392,9 +393,6 @@ public class PoolGenotypeLikelihoodsUnitTest {
final byte refByte = readPileupTestProvider.getRefByte();
final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T';
final int refIdx = BaseUtils.simpleBaseToBaseIndex(refByte);
final int altIdx = BaseUtils.simpleBaseToBaseIndex(altByte);
final List<Allele> allAlleles = new ArrayList<Allele>(); // this contains only ref Allele up to now
final Set<String> laneIDs = new TreeSet<String>();
laneIDs.add(GenotypeLikelihoodsCalculationModel.DUMMY_LANE);
@ -411,11 +409,17 @@ public class PoolGenotypeLikelihoodsUnitTest {
for (String laneID : laneIDs)
noisyErrorModels.put(laneID, Q30ErrorModel);
final int refIdx = 0;
int altIdx = 2;
// ref allele must be first
allAlleles.add(Allele.create(refByte, true));
for (byte b: BaseUtils.BASES) {
if (refByte == b)
allAlleles.add(Allele.create(b,true));
else
if (refByte != b) {
if (b == altByte)
altIdx = allAlleles.size();
allAlleles.add(Allele.create(b, false));
}
}
PrintStream out = null;

View File

@ -262,8 +262,6 @@ public class GenotypingEngineUnitTest extends BaseTest {
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// SNP + ref + SNP = MNP with ref base gap
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
@ -274,11 +272,9 @@ public class GenotypingEngineUnitTest extends BaseTest {
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// insertion + SNP
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","AAAAA").referenceBaseForIndel("T").make();
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TAAAAA").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make();
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TAAAAACG").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
@ -286,23 +282,19 @@ public class GenotypingEngineUnitTest extends BaseTest {
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// SNP + insertion
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("-","AAAAA").referenceBaseForIndel("C").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","CAAAAA").make();
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","GCCAAAAA").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// deletion + SNP
thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("C","-").referenceBaseForIndel("T").make();
thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","T").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make();
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TG").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
@ -310,68 +302,66 @@ public class GenotypingEngineUnitTest extends BaseTest {
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// SNP + deletion
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","GCC").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// insertion + deletion = MNP
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","A").referenceBaseForIndel("T").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make();
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TA").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
truthVC = new VariantContextBuilder().loc("2", 1704, 1706).alleles("CCG","ACC").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// insertion + deletion
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","AAAAA").referenceBaseForIndel("T").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make();
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TAAAAA").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","TAAAAACC").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// insertion + insertion
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","A").referenceBaseForIndel("T").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("-","A").referenceBaseForIndel("C").make();
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TA").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","CA").make();
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TACCA").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// deletion + deletion
thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("T","-").referenceBaseForIndel("A").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make();
thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make();
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
truthVC = new VariantContextBuilder().loc("2", 1701, 1706).alleles("ATTCCG","ATCC").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
// deletion + insertion (abutting)
thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make();
nextVC = new VariantContextBuilder().loc("2", 1702, 1702).alleles("T","GCGCGC").make();
truthVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","AGCGCGC").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
// complex + complex
thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","AAA").make();
@ -382,8 +372,6 @@ public class GenotypingEngineUnitTest extends BaseTest {
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
}
/**

View File

@ -30,7 +30,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "ff370c42c8b09a29f1aeff5ac57c7ea6");
HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "d8317f4589e8e0c48bcd087cdb75ce88");
}
private void HCTestComplexVariants(String bam, String args, String md5) {
@ -43,6 +43,5 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testHaplotypeCallerMultiSampleComplex() {
HCTestComplexVariants(CEUTRIO_BAM, "", "6f9fda3ea82c5696bed1d48ee90cd76b");
}
}

View File

@ -29,11 +29,13 @@ import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Iterator;
@ -46,6 +48,7 @@ import java.util.Iterator;
* @author mhanna
* @version 0.1
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class AlignmentValidation extends ReadWalker<Integer,Integer> {
/**
* The supporting BWT index generated using BWT.

View File

@ -34,11 +34,13 @@ import org.broadinstitute.sting.alignment.bwa.BWTFiles;
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.File;
@ -50,6 +52,7 @@ import java.io.File;
* @author mhanna
* @version 0.1
*/
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
@WalkerName("Align")
public class AlignmentWalker extends ReadWalker<Integer,Integer> {
@Argument(fullName="target_reference",shortName="target_ref",doc="The reference to which reads in the source file should be aligned. Alongside this reference should sit index files " +

View File

@ -30,9 +30,11 @@ import org.broadinstitute.sting.alignment.bwa.BWTFiles;
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.PrintStream;
@ -48,6 +50,7 @@ import java.util.TreeMap;
* @author mhanna
* @version 0.1
*/
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class CountBestAlignments extends ReadWalker<Integer,Integer> {
/**
* The supporting BWT index generated using BWT.

View File

@ -131,6 +131,12 @@ public class CommandLineGATK extends CommandLineExecutable {
// can't close tribble index when writing
if ( message.indexOf("Unable to close index for") != -1 )
exitSystemWithUserError(new UserException(t.getCause().getMessage()));
// disk is full
if ( message.indexOf("No space left on device") != -1 )
exitSystemWithUserError(new UserException(t.getMessage()));
if ( t.getCause().getMessage().indexOf("No space left on device") != -1 )
exitSystemWithUserError(new UserException(t.getCause().getMessage()));
}
/**

View File

@ -3,10 +3,12 @@ package org.broadinstitute.sting.gatk.examples;
import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -17,8 +19,9 @@ import java.util.List;
import java.util.Map;
/**
* Computes the coverage per sample.
* Computes the coverage per sample for every position (use with -L argument!).
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class CoverageBySample extends LocusWalker<Integer, Integer> {
@Output
protected PrintStream out;

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.examples;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -35,6 +36,7 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.io.PrintStream;
@ -46,6 +48,7 @@ import java.io.PrintStream;
*
* @author aaron
*/
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
public class GATKPaperGenotyper extends LocusWalker<Integer,Long> implements TreeReducible<Long> {
// the possible diploid genotype strings
private static enum GENOTYPE { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }

View File

@ -163,43 +163,58 @@ public class VariantContextAdaptors {
@Override
public VariantContext convert(String name, Object input, ReferenceContext ref) {
OldDbSNPFeature dbsnp = (OldDbSNPFeature)input;
if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
return null;
Allele refAllele = Allele.create(dbsnp.getNCBIRefBase(), true);
if ( isSNP(dbsnp) || isIndel(dbsnp) || isMNP(dbsnp) || dbsnp.getVariantType().contains("mixed") ) {
// add the reference allele
List<Allele> alleles = new ArrayList<Allele>();
alleles.add(refAllele);
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
if ( index < 0 )
return null; // we weren't given enough reference context to create the VariantContext
// add all of the alt alleles
boolean sawNullAllele = refAllele.isNull();
for ( String alt : getAlternateAlleleList(dbsnp) ) {
if ( ! Allele.acceptableAlleleBases(alt) ) {
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
return null;
}
Allele altAllele = Allele.create(alt, false);
alleles.add(altAllele);
if ( altAllele.isNull() )
sawNullAllele = true;
}
final byte refBaseForIndel = ref.getBases()[index];
Map<String, Object> attributes = new HashMap<String, Object>();
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
if ( index < 0 )
return null; // we weren't given enough reference context to create the VariantContext
Byte refBaseForIndel = new Byte(ref.getBases()[index]);
final VariantContextBuilder builder = new VariantContextBuilder();
builder.source(name).id(dbsnp.getRsID());
builder.loc(dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd() - (refAllele.isNull() ? 1 : 0));
builder.alleles(alleles);
builder.referenceBaseForIndel(refBaseForIndel);
return builder.make();
} else
boolean addPaddingBase;
if ( isSNP(dbsnp) || isMNP(dbsnp) )
addPaddingBase = false;
else if ( isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") )
addPaddingBase = VariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp)));
else
return null; // can't handle anything else
Allele refAllele;
if ( dbsnp.getNCBIRefBase().equals("-") )
refAllele = Allele.create(refBaseForIndel, true);
else if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
return null;
else
refAllele = Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + dbsnp.getNCBIRefBase(), true);
final List<Allele> alleles = new ArrayList<Allele>();
alleles.add(refAllele);
// add all of the alt alleles
for ( String alt : getAlternateAlleleList(dbsnp) ) {
if ( Allele.wouldBeNullAllele(alt.getBytes()))
alt = "";
else if ( ! Allele.acceptableAlleleBases(alt) )
return null;
alleles.add(Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + alt, false));
}
final VariantContextBuilder builder = new VariantContextBuilder();
builder.source(name).id(dbsnp.getRsID());
builder.loc(dbsnp.getChr(), dbsnp.getStart() - (addPaddingBase ? 1 : 0), dbsnp.getEnd() - (addPaddingBase && refAllele.length() == 1 ? 1 : 0));
builder.alleles(alleles);
return builder.make();
}
private static List<String> stripNullDashes(final List<String> alleles) {
final List<String> newAlleles = new ArrayList<String>(alleles.size());
for ( final String allele : alleles ) {
if ( allele.equals("-") )
newAlleles.add("");
else
newAlleles.add(allele);
}
return newAlleles;
}
}
@ -351,7 +366,7 @@ public class VariantContextAdaptors {
long end = hapmap.getEnd();
if ( deletionLength > 0 )
end += deletionLength;
VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).referenceBaseForIndel(refBaseForIndel).make();
VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).make();
return vc;
}
}

View File

@ -42,10 +42,6 @@ import java.util.List;
*/
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
private static final String REF_ALLELE = "REF";
private static final String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
public void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, GenotypeBuilder gb) {
if ( g == null || !g.isCalled() )
return;
@ -53,10 +49,10 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
if ( vc.isSNP() )
annotateSNP(stratifiedContext, vc, gb);
else if ( vc.isIndel() )
annotateIndel(stratifiedContext, vc, gb);
annotateIndel(stratifiedContext, ref.getBase(), vc, gb);
}
private void annotateSNP(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) {
private void annotateSNP(final AlignmentContext stratifiedContext, final VariantContext vc, final GenotypeBuilder gb) {
HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
for ( Allele allele : vc.getAlleles() )
@ -77,62 +73,47 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
gb.AD(counts);
}
private void annotateIndel(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) {
private void annotateIndel(final AlignmentContext stratifiedContext, final byte refBase, final VariantContext vc, final GenotypeBuilder gb) {
ReadBackedPileup pileup = stratifiedContext.getBasePileup();
if ( pileup == null )
return;
final HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>();
alleleCounts.put(REF_ALLELE, 0);
final HashMap<Allele, Integer> alleleCounts = new HashMap<Allele, Integer>();
final Allele refAllele = vc.getReference();
for ( Allele allele : vc.getAlternateAlleles() ) {
if ( allele.isNoCall() ) {
continue; // this does not look so good, should we die???
}
alleleCounts.put(getAlleleRepresentation(allele), 0);
for ( final Allele allele : vc.getAlleles() ) {
alleleCounts.put(allele, 0);
}
for ( PileupElement p : pileup ) {
if ( p.isBeforeInsertion() ) {
final String b = p.getEventBases();
if ( alleleCounts.containsKey(b) ) {
alleleCounts.put(b, alleleCounts.get(b)+1);
final Allele insertion = Allele.create((char)refBase + p.getEventBases(), false);
if ( alleleCounts.containsKey(insertion) ) {
alleleCounts.put(insertion, alleleCounts.get(insertion)+1);
}
} else if ( p.isBeforeDeletionStart() ) {
if ( p.getEventLength() == refAllele.length() ) {
// this is indeed the deletion allele recorded in VC
final String b = DEL;
if ( alleleCounts.containsKey(b) ) {
alleleCounts.put(b, alleleCounts.get(b)+1);
}
if ( p.getEventLength() == refAllele.length() - 1 ) {
// this is indeed the deletion allele recorded in VC
final Allele deletion = Allele.create(refBase);
if ( alleleCounts.containsKey(deletion) ) {
alleleCounts.put(deletion, alleleCounts.get(deletion)+1);
}
}
} else if ( p.getRead().getAlignmentEnd() > vc.getStart() ) {
alleleCounts.put(REF_ALLELE, alleleCounts.get(REF_ALLELE)+1);
alleleCounts.put(refAllele, alleleCounts.get(refAllele)+1);
}
}
int[] counts = new int[alleleCounts.size()];
counts[0] = alleleCounts.get(REF_ALLELE);
final int[] counts = new int[alleleCounts.size()];
counts[0] = alleleCounts.get(refAllele);
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
counts[i+1] = alleleCounts.get( getAlleleRepresentation(vc.getAlternateAllele(i)) );
counts[i+1] = alleleCounts.get( vc.getAlternateAllele(i) );
gb.AD(counts);
}
private String getAlleleRepresentation(Allele allele) {
if ( allele.isNull() ) { // deletion wrt the ref
return DEL;
} else { // insertion, pass actual bases
return allele.getBaseString();
}
}
// public String getIndelBases()
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.GENOTYPE_ALLELE_DEPTHS); }

View File

@ -250,8 +250,6 @@ public class BeagleOutputToVCF extends RodWalker<Integer, Integer> {
// Beagle always produces genotype strings based on the strings we input in the likelihood file.
String refString = vc_input.getReference().getDisplayString();
if (refString.length() == 0) // ref was null
refString = Allele.NULL_ALLELE_STRING;
Allele bglAlleleA, bglAlleleB;

View File

@ -239,7 +239,7 @@ public class ProduceBeagleInput extends RodWalker<Integer, Integer> {
if ( markers != null ) markers.append(marker).append("\t").append(Integer.toString(markerCounter++)).append("\t");
for ( Allele allele : preferredVC.getAlleles() ) {
String bglPrintString;
if (allele.isNoCall() || allele.isNull())
if (allele.isNoCall())
bglPrintString = "-";
else
bglPrintString = allele.getBaseString(); // get rid of * in case of reference allele

View File

@ -149,7 +149,7 @@ public class VariantsToBeagleUnphased extends RodWalker<Integer, Integer> {
// write out the alleles at this site
for ( Allele allele : vc.getAlleles() ) {
beagleOut.append(allele.isNoCall() || allele.isNull() ? "-" : allele.getBaseString()).append(" ");
beagleOut.append(allele.isNoCall() ? "-" : allele.getBaseString()).append(" ");
}
// write out sample level genotypes

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
import net.sf.picard.util.PeekableIterator;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -36,6 +37,7 @@ import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.variantcontext.*;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
@ -80,6 +82,7 @@ import java.util.*;
* @author Mauricio Carneiro, Roger Zurawicki
* @since 5/8/12
*/
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
@By(value = DataSource.READS)
@PartitionBy(PartitionType.INTERVAL)
public class DiagnoseTargets extends LocusWalker<Long, Long> {

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -33,9 +34,11 @@ import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
import org.broadinstitute.sting.gatk.walkers.PartitionBy;
import org.broadinstitute.sting.gatk.walkers.PartitionType;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream;
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
@PartitionBy(PartitionType.CONTIG)
@ActiveRegionExtension(extension = 0, maxRegion = 50000)
public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {

View File

@ -107,11 +107,11 @@ public class FastaAlternateReference extends FastaReference {
continue;
if ( vc.isSimpleDeletion()) {
deletionBasesRemaining = vc.getReference().length();
deletionBasesRemaining = vc.getReference().length() - 1;
// delete the next n bases, not this one
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
} else if ( vc.isSimpleInsertion()) {
return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString()));
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
} else if (vc.isSNP()) {
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
}

View File

@ -26,17 +26,20 @@
package org.broadinstitute.sting.gatk.walkers.fasta;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RefWalker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream;
/**
* Calculates basic statistics about the reference sequence itself
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class FastaStats extends RefWalker<Byte, FastaStats.FastaStatistics> {
@Output PrintStream out;

View File

@ -246,18 +246,19 @@ public class ConsensusAlleleCounter {
// get ref bases of accurate deletion
final int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart();
stop = loc.getStart() + dLen;
final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen);
final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference - 1, startIdxInReference + dLen); // add reference padding
if (Allele.acceptableAlleleBases(refBases, false)) {
refAllele = Allele.create(refBases, true);
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
altAllele = Allele.create(ref.getBase(), false);
}
else continue; // don't go on with this allele if refBases are non-standard
} else {
// insertion case
if (Allele.acceptableAlleleBases(s, false)) { // don't allow N's in insertions
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
altAllele = Allele.create(s, false);
final String insertionBases = (char)ref.getBase() + s; // add reference padding
if (Allele.acceptableAlleleBases(insertionBases, false)) { // don't allow N's in insertions
refAllele = Allele.create(ref.getBase(), true);
altAllele = Allele.create(insertionBases, false);
stop = loc.getStart();
}
else continue; // go on to next allele if consensus insertion has any non-standard base.
@ -267,7 +268,6 @@ public class ConsensusAlleleCounter {
final VariantContextBuilder builder = new VariantContextBuilder().source("");
builder.loc(loc.getContig(), loc.getStart(), stop);
builder.alleles(Arrays.asList(refAllele, altAllele));
builder.referenceBaseForIndel(ref.getBase());
builder.noGenotypes();
if (doMultiAllelicCalls) {
vcs.add(builder.make());

View File

@ -35,7 +35,6 @@ import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.*;
@ -48,8 +47,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
private boolean DEBUG = false;
private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
private PairHMMIndelErrorModel pairModel;
private boolean allelesArePadded;
private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
protected synchronized HashMap<PileupElement, LinkedHashMap<Allele, Double>> initialValue() {
@ -105,25 +103,21 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
haplotypeMap.clear();
Pair<List<Allele>,Boolean> pair = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
alleleList = pair.first;
allelesArePadded = pair.second;
alleleList = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
if (alleleList.isEmpty())
return null;
}
getHaplotypeMapFromAlleles(alleleList, ref, loc, haplotypeMap); // will update haplotypeMap adding elements
if (haplotypeMap == null || haplotypeMap.isEmpty())
return null;
// start making the VariantContext
// For all non-snp VC types, VC end location is just startLocation + length of ref allele including padding base.
final int endLoc = computeEndLocation(alleleList, loc,allelesArePadded);
final int endLoc = loc.getStart() + alleleList.get(0).length() - 1;
final int eventLength = getEventLength(alleleList);
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList).referenceBaseForIndel(ref.getBase());
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList);
// create the genotypes; no-call everyone for now
GenotypesContext genotypes = GenotypesContext.create();
@ -160,15 +154,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
return indelLikelihoodMap.get();
}
public static int computeEndLocation(final List<Allele> alleles, final GenomeLoc loc, final boolean allelesArePadded) {
Allele refAllele = alleles.get(0);
int endLoc = loc.getStart() + refAllele.length()-1;
if (allelesArePadded)
endLoc++;
return endLoc;
}
public static void getHaplotypeMapFromAlleles(final List<Allele> alleleList,
final ReferenceContext ref,
final GenomeLoc loc,
@ -213,16 +198,15 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
}
public static Pair<List<Allele>,Boolean> getInitialAlleleList(final RefMetaDataTracker tracker,
public static List<Allele> getInitialAlleleList(final RefMetaDataTracker tracker,
final ReferenceContext ref,
final Map<String, AlignmentContext> contexts,
final AlignmentContextUtils.ReadOrientation contextType,
final GenomeLocParser locParser,
final UnifiedArgumentCollection UAC,
final boolean ignoreSNPAllelesWhenGenotypingIndels) {
List<Allele> alleles = new ArrayList<Allele>();
boolean allelesArePadded = true;
if (UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
VariantContext vc = null;
for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) {
@ -235,7 +219,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
}
// ignore places where we don't have a variant
if (vc == null)
return new Pair<List<Allele>,Boolean>(alleles,false);
return alleles;
if (ignoreSNPAllelesWhenGenotypingIndels) {
// if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore it and don't genotype it
@ -248,15 +232,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
} else {
alleles.addAll(vc.getAlleles());
}
if ( vc.getReference().getBases().length == vc.getEnd()-vc.getStart()+1)
allelesArePadded = false;
} else {
alleles = IndelGenotypeLikelihoodsCalculationModel.computeConsensusAlleles(ref, contexts, contextType, locParser, UAC);
alleles = computeConsensusAlleles(ref, contexts, contextType, locParser, UAC);
}
return new Pair<List<Allele>,Boolean> (alleles,allelesArePadded);
return alleles;
}
// Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup,

View File

@ -37,7 +37,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.codecs.vcf.VCFAlleleClipper;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -283,7 +282,7 @@ public class UnifiedGenotyperEngine {
VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, rawContext.getLocation(), false, logger, UAC.alleles);
if ( vcInput == null )
return null;
vc = new VariantContextBuilder("UG_call", ref.getLocus().getContig(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles()).referenceBaseForIndel(vcInput.getReferenceBaseForIndel()).make();
vc = new VariantContextBuilder("UG_call", ref.getLocus().getContig(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles()).make();
} else {
// deal with bad/non-standard reference bases
if ( !Allele.acceptableAlleleBases(new byte[]{ref.getBase()}) )
@ -408,11 +407,6 @@ public class UnifiedGenotyperEngine {
builder.log10PError(phredScaledConfidence/-10.0);
if ( ! passesCallThreshold(phredScaledConfidence) )
builder.filters(filter);
if ( limitedContext ) {
builder.referenceBaseForIndel(vc.getReferenceBaseForIndel());
} else {
builder.referenceBaseForIndel(refContext.getBase());
}
// create the genotypes
final GenotypesContext genotypes = afcm.get().subsetAlleles(vc, myAlleles, true,ploidy);
@ -493,8 +487,8 @@ public class UnifiedGenotyperEngine {
// if we are subsetting alleles (either because there were too many or because some were not polymorphic)
// then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // TODO - this function doesn't work with mixed records or records that started as mixed and then became non-mixed
vcCall = VCFAlleleClipper.reverseTrimAlleles(vcCall);
if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext )
vcCall = VariantContextUtils.reverseTrimAlleles(vcCall);
if ( annotationEngine != null && !limitedContext ) {
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations

View File

@ -872,7 +872,13 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
for ( VariantContext knownIndel : knownIndelsToTry ) {
if ( knownIndel == null || !knownIndel.isIndel() || knownIndel.isComplexIndel() )
continue;
byte[] indelStr = knownIndel.isSimpleInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length());
final byte[] indelStr;
if ( knownIndel.isSimpleInsertion() ) {
final byte[] fullAllele = knownIndel.getAlternateAllele(0).getBases();
indelStr = Arrays.copyOfRange(fullAllele, 1, fullAllele.length); // remove ref padding
} else {
indelStr = Utils.dupBytes((byte)'-', knownIndel.getReference().length() - 1);
}
int start = knownIndel.getStart() - leftmostIndex + 1;
Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel);
if ( c != null )

View File

@ -1131,12 +1131,13 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
List<Allele> alleles = new ArrayList<Allele>(2); // actual observed (distinct!) alleles at the site
List<Allele> homref_alleles = null; // when needed, will contain two identical copies of ref allele - needed to generate hom-ref genotype
final byte referencePaddingBase = refBases[(int)start-1];
if ( call.getVariant() == null ) {
// we will need to cteate genotype with two (hom) ref alleles (below).
// we will need to create genotype with two (hom) ref alleles (below).
// we can not use 'alleles' list here, since that list is supposed to contain
// only *distinct* alleles observed at the site or VCFContext will frown upon us...
alleles.add( Allele.create(refBases[(int)start-1],true) );
alleles.add( Allele.create(referencePaddingBase,true) );
homref_alleles = new ArrayList<Allele>(2);
homref_alleles.add( alleles.get(0));
homref_alleles.add( alleles.get(0));
@ -1145,7 +1146,7 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
// (Genotype will tell us whether it is an actual call or not!)
int event_length = call.getVariant().lengthOnRef();
if ( event_length < 0 ) event_length = 0;
fillAlleleList(alleles,call);
fillAlleleList(alleles,call,referencePaddingBase);
stop += event_length;
}
@ -1165,7 +1166,7 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
filters.add("NoCall");
}
VariantContext vc = new VariantContextBuilder("IGv2_Indel_call", refName, start, stop, alleles)
.genotypes(genotypes).filters(filters).referenceBaseForIndel(refBases[(int)start-1]).make();
.genotypes(genotypes).filters(filters).make();
vcf.add(vc);
}
@ -1175,16 +1176,16 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
* @param l
* @param call
*/
private void fillAlleleList(List<Allele> l, IndelPrecall call) {
private void fillAlleleList(List<Allele> l, IndelPrecall call, byte referencePaddingBase) {
int event_length = call.getVariant().lengthOnRef();
if ( event_length == 0 ) { // insertion
l.add( Allele.create(Allele.NULL_ALLELE_STRING,true) );
l.add( Allele.create(call.getVariant().getBases(), false ));
l.add( Allele.create(referencePaddingBase,true) );
l.add( Allele.create(referencePaddingBase + call.getVariant().getBases(), false ));
} else { //deletion:
l.add( Allele.create(call.getVariant().getBases(), true ));
l.add( Allele.create(Allele.NULL_ALLELE_STRING,false) );
l.add( Allele.create(referencePaddingBase + call.getVariant().getBases(), true ));
l.add( Allele.create(referencePaddingBase,false) );
}
}
@ -1218,19 +1219,20 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
// }
boolean homRefT = ( tCall.getVariant() == null );
boolean homRefN = ( nCall.getVariant() == null );
final byte referencePaddingBase = refBases[(int)start-1];
if ( tCall.getVariant() == null && nCall.getVariant() == null) {
// no indel at all ; create base-representation ref/ref alleles for genotype construction
alleles.add( Allele.create(refBases[(int)start-1],true) );
alleles.add( Allele.create(referencePaddingBase,true) );
} else {
// we got indel(s)
int event_length = 0;
if ( tCall.getVariant() != null ) {
// indel in tumor
event_length = tCall.getVariant().lengthOnRef();
fillAlleleList(alleles, tCall);
fillAlleleList(alleles, tCall, referencePaddingBase);
} else {
event_length = nCall.getVariant().lengthOnRef();
fillAlleleList(alleles, nCall);
fillAlleleList(alleles, nCall, referencePaddingBase);
}
if ( event_length > 0 ) stop += event_length;
}
@ -1262,7 +1264,7 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
}
VariantContext vc = new VariantContextBuilder("IGv2_Indel_call", refName, start, stop, alleles)
.genotypes(genotypes).filters(filters).attributes(attrs).referenceBaseForIndel(refBases[(int)start-1]).make();
.genotypes(genotypes).filters(filters).attributes(attrs).make();
vcf.add(vc);
}

View File

@ -32,6 +32,7 @@ import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -42,6 +43,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream;
import java.util.*;
@ -70,6 +72,7 @@ import java.util.*;
* </pre>
*
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class CountRODs extends RodWalker<CountRODs.Datum, Pair<ExpandingArrayList<Long>, Long>> implements TreeReducible<Pair<ExpandingArrayList<Long>, Long>> {
@Output
public PrintStream out;

View File

@ -25,6 +25,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.File;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
@ -262,20 +263,33 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
sequenceInvalid = true;
invReason.add("SITE_IS_FILTERED");
}
String refString = validate.getReference().getDisplayString();
String altString = validate.getAlternateAllele(0).getDisplayString();
if ( validate.isIndel() ) {
sequence.append(Character.toUpperCase((char)ref.getBase()));
rawSequence.append(Character.toUpperCase((char)ref.getBase()));
final byte[] refAllele = validate.getReference().getBases();
refString = new String(Arrays.copyOfRange(refAllele, 1, refAllele.length));
if ( refString.isEmpty() )
refString = "-";
final byte[] altAllele = validate.getAlternateAllele(0).getBases();
altString = new String(Arrays.copyOfRange(altAllele, 1, altAllele.length));
if ( altString.isEmpty() )
altString = "-";
}
sequence.append('[');
sequence.append(validate.getAlternateAllele(0).toString());
sequence.append(altString);
sequence.append('/');
sequence.append(validate.getReference().toString());
sequence.append(refString);
sequence.append(']');
// do this to the raw sequence to -- the indeces will line up that way
rawSequence.append('[');
rawSequence.append(validate.getAlternateAllele(0).getBaseString());
rawSequence.append(altString);
rawSequence.append('/');
rawSequence.append(validate.getReference().getBaseString());
rawSequence.append(refString);
rawSequence.append(']');
allelePos = ref.getLocus();
if ( indelCounter > 0 ) {

View File

@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -40,14 +39,11 @@ public class GenomeEvent implements Comparable {
final protected GenomeLoc loc;
/** A set of the alleles segregating in this context */
final protected List<Allele> alleles;
final protected Byte refBase;
// final protected HashMap<String, Object> attributes;
public GenomeEvent(GenomeLocParser parser, final String contig, final int start, final int stop, final List<Allele> alleles, HashMap<String, Object> attributes,
byte base) {
public GenomeEvent(GenomeLocParser parser, final String contig, final int start, final int stop, final List<Allele> alleles, HashMap<String, Object> attributes) {
this.loc = parser.createGenomeLoc(contig, start, stop);
this.alleles = alleles;
this.refBase = base;
// this.attributes = attributes;
}
@ -68,7 +64,7 @@ public class GenomeEvent implements Comparable {
public VariantContext createVariantContextFromEvent() {
return new VariantContextBuilder("event", loc.getContig(), loc.getStart(), loc.getStop(), alleles)
.log10PError(0.0).referenceBaseForIndel(refBase).make();
.log10PError(0.0).make();
}
}

View File

@ -115,7 +115,7 @@ public class KeepAFSpectrumFrequencySelector extends FrequencyModeSelector {
// create bare-bones event and log in corresponding bin
// attributes contains AC,AF,AN pulled from original vc, and we keep them here and log in output file for bookkeeping purposes
GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes, vc.getReferenceBaseForIndel());
GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes);
binnedEventArray[binIndex].add(event);

View File

@ -65,7 +65,7 @@ public class UniformSamplingFrequencySelector extends FrequencyModeSelector {
}
// create bare-bones event and log in corresponding bin
// attributes contains AC,AF,AN pulled from original vc, and we keep them here and log in output file for bookkeeping purposes
GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes, vc.getReferenceBaseForIndel());
GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes);
binnedEventArray.add(event);
}

View File

@ -56,7 +56,7 @@ public class ThetaVariantEvaluator extends VariantEvaluator {
//increment stats for pairwise mismatches
for (Allele allele : genotype.getAlleles()) {
if (allele.isNonNull() && allele.isCalled()) {
if (allele.isCalled()) {
String alleleString = allele.toString();
alleleCounts.putIfAbsent(alleleString, 0);
alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);

View File

@ -139,11 +139,11 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
final byte[] refSeq = ref.getBases();
// get the indel length
int indelLength;
final int indelLength;
if ( vc.isSimpleDeletion() )
indelLength = vc.getReference().length();
indelLength = vc.getReference().length() - 1;
else
indelLength = vc.getAlternateAllele(0).length();
indelLength = vc.getAlternateAllele(0).length() - 1;
if ( indelLength > 200 ) {
writer.add(vc);
@ -151,7 +151,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
}
// create an indel haplotype
int originalIndex = ref.getLocus().getStart() - ref.getWindow().getStart() + 1;
final int originalIndex = ref.getLocus().getStart() - ref.getWindow().getStart() + 1;
final byte[] originalIndel = makeHaplotype(vc, refSeq, originalIndex, indelLength);
// create a CIGAR string to represent the event
@ -170,11 +170,12 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
VariantContext newVC = new VariantContextBuilder(vc).start(vc.getStart()-difference).stop(vc.getEnd()-difference).make();
//System.out.println("Moving record from " + vc.getChr()+":"+vc.getStart() + " to " + vc.getChr()+":"+(vc.getStart()-difference));
int indelIndex = originalIndex-difference;
byte[] newBases = new byte[indelLength];
System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 0, indelLength);
Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion());
newVC = updateAllele(newVC, newAllele, refSeq[indelIndex-1]);
final int indelIndex = originalIndex-difference;
final byte[] newBases = new byte[indelLength + 1];
newBases[0] = refSeq[indelIndex-1];
System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 1, indelLength);
final Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion());
newVC = updateAllele(newVC, newAllele);
writer.add(newVC);
return 1;
@ -195,7 +196,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
if ( vc.isSimpleDeletion() ) {
indexOfRef += indelLength;
} else {
System.arraycopy(vc.getAlternateAllele(0).getBases(), 0, hap, currentPos, indelLength);
System.arraycopy(vc.getAlternateAllele(0).getBases(), 1, hap, currentPos, indelLength);
currentPos += indelLength;
}
@ -205,14 +206,14 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
return hap;
}
public static VariantContext updateAllele(VariantContext vc, Allele newAllele, Byte refBaseForIndel) {
public static VariantContext updateAllele(final VariantContext vc, final Allele newAllele) {
// create a mapping from original allele to new allele
HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
if ( newAllele.isReference() ) {
alleleMap.put(vc.getReference(), newAllele);
alleleMap.put(vc.getAlternateAllele(0), vc.getAlternateAllele(0));
alleleMap.put(vc.getAlternateAllele(0), Allele.create(newAllele.getBases()[0], false));
} else {
alleleMap.put(vc.getReference(), vc.getReference());
alleleMap.put(vc.getReference(), Allele.create(newAllele.getBases()[0], true));
alleleMap.put(vc.getAlternateAllele(0), newAllele);
}
@ -229,6 +230,6 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
newGenotypes.add(new GenotypeBuilder(genotype).alleles(newAlleles).make());
}
return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).referenceBaseForIndel(refBaseForIndel).make();
return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make();
}
}

View File

@ -119,7 +119,6 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
if ( toInterval != null ) {
// check whether the strand flips, and if so reverse complement everything
// TODO -- make this work for indels (difficult because the 'previous base' context needed will be changing based on indel type/size)
if ( fromInterval.isPositiveStrand() != toInterval.isPositiveStrand() && vc.isPointEvent() ) {
vc = VariantContextUtils.reverseComplement(vc);
}
@ -132,11 +131,10 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
.attribute("OriginalStart", fromInterval.getStart()).make();
}
VariantContext newVC = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc);
if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) {
if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(vc) ) {
logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s",
originalVC.getChr(), originalVC.getStart(), newVC.getChr(), newVC.getStart(),
originalVC.getReference(), originalVC.getAlternateAllele(0), newVC.getReference(), newVC.getAlternateAllele(0)));
originalVC.getChr(), originalVC.getStart(), vc.getChr(), vc.getStart(),
originalVC.getReference(), originalVC.getAlternateAllele(0), vc.getReference(), vc.getAlternateAllele(0)));
}
writer.add(vc);

View File

@ -130,35 +130,16 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
return;
// get the true reference allele
Allele reportedRefAllele = vc.getReference();
Allele observedRefAllele = null;
// insertions
if ( vc.isSimpleInsertion() ) {
observedRefAllele = Allele.create(Allele.NULL_ALLELE_STRING);
final Allele reportedRefAllele = vc.getReference();
final int refLength = reportedRefAllele.length();
if ( refLength > 100 ) {
logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", refLength, vc.getChr(), vc.getStart()));
return;
}
// deletions
else if ( vc.isSimpleDeletion() || vc.isMNP() ) {
// we can't validate arbitrarily long deletions
if ( reportedRefAllele.length() > 100 ) {
logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", reportedRefAllele.length(), vc.getChr(), vc.getStart()));
return;
}
// deletions are associated with the (position of) the last (preceding) non-deleted base;
// hence to get actually deleted bases we need offset = 1
int offset = vc.isMNP() ? 0 : 1;
byte[] refBytes = ref.getBases();
byte[] trueRef = new byte[reportedRefAllele.length()];
for (int i = 0; i < reportedRefAllele.length(); i++)
trueRef[i] = refBytes[i+offset];
observedRefAllele = Allele.create(trueRef, true);
}
// SNPs, etc. but not mixed types because they are too difficult
else if ( !vc.isMixed() ) {
byte[] refByte = new byte[1];
refByte[0] = ref.getBase();
observedRefAllele = Allele.create(refByte, true);
}
final byte[] observedRefBases = new byte[refLength];
System.arraycopy(ref.getBases(), 0, observedRefBases, 0, refLength);
final Allele observedRefAllele = Allele.create(observedRefBases);
// get the RS IDs
Set<String> rsIDs = null;
@ -171,10 +152,10 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
try {
switch( type ) {
case ALL:
vc.extraStrictValidation(observedRefAllele, ref.getBase(), rsIDs);
vc.extraStrictValidation(reportedRefAllele, observedRefAllele, rsIDs);
break;
case REF:
vc.validateReferenceBases(observedRefAllele, ref.getBase());
vc.validateReferenceBases(reportedRefAllele, observedRefAllele);
break;
case IDS:
vc.validateRSIDs(rsIDs);

View File

@ -381,7 +381,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
getters.put("REF", new Getter() {
public String get(VariantContext vc) {
StringBuilder x = new StringBuilder();
x.append(vc.getAlleleStringWithRefPadding(vc.getReference()));
x.append(vc.getReference().getDisplayString());
return x.toString();
}
});
@ -393,7 +393,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
for ( int i = 0; i < n; i++ ) {
if ( i != 0 ) x.append(",");
x.append(vc.getAlleleStringWithRefPadding(vc.getAlternateAllele(i)));
x.append(vc.getAlternateAllele(i));
}
return x.toString();
}
@ -435,11 +435,8 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
private static Object splitAltAlleles(VariantContext vc) {
final int numAltAlleles = vc.getAlternateAlleles().size();
if ( numAltAlleles == 1 )
return vc.getAlleleStringWithRefPadding(vc.getAlternateAllele(0));
return vc.getAlternateAllele(0);
final List<String> alleles = new ArrayList<String>(numAltAlleles);
for ( Allele allele : vc.getAlternateAlleles() )
alleles.add(vc.getAlleleStringWithRefPadding(allele));
return alleles;
return vc.getAlternateAlleles();
}
}

View File

@ -103,12 +103,6 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
@Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod", required=false)
protected String sampleName = null;
/**
* This argument is useful for fixing input VCFs with bad reference bases (the output will be a fixed version of the VCF).
*/
@Argument(fullName="fixRef", shortName="fixRef", doc="Fix common reference base in case there's an indel without padding", required=false)
protected boolean fixReferenceBase = false;
private Set<String> allowedGenotypeFormatStrings = new HashSet<String>();
private boolean wroteHeader = false;
private Set<String> samples;
@ -140,10 +134,6 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
builder.genotypes(g);
}
if ( fixReferenceBase ) {
builder.referenceBaseForIndel(ref.getBase());
}
writeRecord(builder.make(), tracker, ref.getLocus());
}
@ -169,8 +159,8 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
continue;
Map<String, Allele> alleleMap = new HashMap<String, Allele>(2);
alleleMap.put(RawHapMapFeature.DELETION, Allele.create(Allele.NULL_ALLELE_STRING, dbsnpVC.isSimpleInsertion()));
alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion()));
alleleMap.put(RawHapMapFeature.DELETION, Allele.create(ref.getBase(), dbsnpVC.isSimpleInsertion()));
alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(ref.getBase() + ((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion()));
hapmap.setActualAlleles(alleleMap);
// also, use the correct positioning for insertions

View File

@ -431,6 +431,37 @@ public class BaseUtils {
return new String(simpleComplement(bases.getBytes()));
}
/**
* Returns the uppercased version of the bases
*
* @param bases the bases
* @return the upper cased version
*/
static public byte[] convertToUpperCase(final byte[] bases) {
for ( int i = 0; i < bases.length; i++ ) {
if ( (char)bases[i] >= 'a' )
bases[i] = toUpperCaseBase(bases[i]);
}
return bases;
}
static public byte toUpperCaseBase(final byte base) {
switch (base) {
case 'a':
return 'A';
case 'c':
return 'C';
case 'g':
return 'G';
case 't':
return 'T';
case 'n':
return 'N';
default:
return base;
}
}
/**
* Returns the index of the most common base in the basecounts array. To be used with
* pileup.getBaseCounts.

View File

@ -176,7 +176,7 @@ public class Haplotype {
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
}
} else if( refAllele.length() < altAllele.length() ) { // insertion
final int altAlleleLength = altAllele.length();
final int altAlleleLength = altAllele.length() - 1;
newHaplotype = new byte[bases.length + altAlleleLength];
for( int iii = 0; iii < bases.length; iii++ ) {
newHaplotype[iii] = bases[iii];
@ -185,15 +185,16 @@ public class Haplotype {
newHaplotype[iii] = newHaplotype[iii-altAlleleLength];
}
for( int iii = 0; iii < altAlleleLength; iii++ ) {
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii+1];
}
} else { // deletion
final int shift = refAllele.length() - altAllele.length();
final int altAlleleLength = altAllele.length() - 1;
newHaplotype = new byte[bases.length - shift];
for( int iii = 0; iii < haplotypeInsertLocation + altAllele.length(); iii++ ) {
for( int iii = 0; iii < haplotypeInsertLocation + altAlleleLength; iii++ ) {
newHaplotype[iii] = bases[iii];
}
for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length; iii++ ) {
for( int iii = haplotypeInsertLocation + altAlleleLength; iii < newHaplotype.length; iii++ ) {
newHaplotype[iii] = bases[iii+shift];
}
}
@ -204,8 +205,11 @@ public class Haplotype {
return new Haplotype(newHaplotype);
}
public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(List<Allele> alleleList, int startPos, ReferenceContext ref,
final int haplotypeSize, final int numPrefBases) {
public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(final List<Allele> alleleList,
final int startPos,
final ReferenceContext ref,
final int haplotypeSize,
final int numPrefBases) {
LinkedHashMap<Allele,Haplotype> haplotypeMap = new LinkedHashMap<Allele,Haplotype>();
@ -216,7 +220,6 @@ public class Haplotype {
refAllele = a;
break;
}
}
if (refAllele == null)
@ -224,19 +227,12 @@ public class Haplotype {
byte[] refBases = ref.getBases();
final int startIdxInReference = 1 + startPos - numPrefBases - ref.getWindow().getStart();
final String basesBeforeVariant = new String(Arrays.copyOfRange(refBases, startIdxInReference, startIdxInReference + numPrefBases));
int startIdxInReference = (int)(1+startPos-numPrefBases-ref.getWindow().getStart());
//int numPrefBases = (int)(vc.getStart()-ref.getWindow().getStart()+1); // indel vc starts one before event
byte[] basesBeforeVariant = Arrays.copyOfRange(refBases,startIdxInReference,startIdxInReference+numPrefBases);
int startAfter = startIdxInReference+numPrefBases+ refAllele.getBases().length;
// protect against long events that overrun available reference context
if (startAfter > refBases.length)
startAfter = refBases.length;
byte[] basesAfterVariant = Arrays.copyOfRange(refBases,
startAfter, refBases.length);
final int startAfter = Math.min(startIdxInReference + numPrefBases + refAllele.getBases().length - 1, refBases.length);
final String basesAfterVariant = new String(Arrays.copyOfRange(refBases, startAfter, refBases.length));
// Create location for all haplotypes
final int startLoc = ref.getWindow().getStart() + startIdxInReference;
@ -244,16 +240,14 @@ public class Haplotype {
final GenomeLoc locus = ref.getGenomeLocParser().createGenomeLoc(ref.getLocus().getContig(),startLoc,stopLoc);
for (final Allele a : alleleList) {
byte[] alleleBases = a.getBases();
final byte[] alleleBases = a.getBases();
// use string concatenation
String haplotypeString = new String(basesBeforeVariant) + new String(alleleBases) + new String(basesAfterVariant);
String haplotypeString = basesBeforeVariant + new String(Arrays.copyOfRange(alleleBases, 1, alleleBases.length)) + basesAfterVariant;
haplotypeString = haplotypeString.substring(0,haplotypeSize);
haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus));
haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus));
}
return haplotypeMap;

View File

@ -305,27 +305,6 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
builder.id(id);
}
/**
* Annoying routine that deals with allele clipping from the BCF2 encoding to the standard
* GATK encoding.
*
* @param position
* @param ref
* @param unclippedAlleles
* @return
*/
@Requires({"position > 0", "ref != null && ref.length() > 0", "! unclippedAlleles.isEmpty()"})
@Ensures("result.size() == unclippedAlleles.size()")
protected List<Allele> clipAllelesIfNecessary(final int position,
final String ref,
final List<Allele> unclippedAlleles) {
// the last argument of 1 allows us to safely ignore the end, because we are
// ultimately going to use the end in the record itself
final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(position, ref, unclippedAlleles, 1);
if ( clipped.getError() != null ) error(clipped.getError());
return clipped.getClippedAlleles();
}
/**
* Decode the alleles from this BCF2 file and put the results in builder
* @param builder
@ -353,11 +332,9 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
}
assert ref != null;
alleles = clipAllelesIfNecessary(pos, ref, alleles);
builder.alleles(alleles);
assert ref.length() > 0;
builder.referenceBaseForIndel(ref.getBytes()[0]);
return alleles;
}

View File

@ -256,9 +256,20 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
final Map<String, Object> attrs = parseInfo(parts[7]);
builder.attributes(attrs);
if ( attrs.containsKey(VCFConstants.END_KEY) ) {
// update stop with the end key if provided
try {
builder.stop(Integer.valueOf(attrs.get(VCFConstants.END_KEY).toString()));
} catch (Exception e) {
generateException("the END value in the INFO field is not valid");
}
} else {
builder.stop(pos + ref.length() - 1);
}
// get our alleles, filters, and setup an attribute map
final List<Allele> rawAlleles = parseAlleles(ref, alts, lineNo);
final List<Allele> alleles = updateBuilderAllelesAndStop(builder, ref, pos, rawAlleles, attrs);
final List<Allele> alleles = parseAlleles(ref, alts, lineNo);
builder.alleles(alleles);
// do we have genotyping data
if (parts.length > NUM_STANDARD_FIELDS && includeGenotypes) {
@ -275,7 +286,6 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
VariantContext vc = null;
try {
builder.referenceBaseForIndel(ref.getBytes()[0]);
vc = builder.make();
} catch (Exception e) {
generateException(e.getMessage());
@ -284,31 +294,6 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
return vc;
}
private final List<Allele> updateBuilderAllelesAndStop(final VariantContextBuilder builder,
final String ref,
final int pos,
final List<Allele> rawAlleles,
final Map<String, Object> attrs) {
int endForSymbolicAlleles = pos; // by default we use the pos
if ( attrs.containsKey(VCFConstants.END_KEY) ) {
// update stop with the end key if provided
try {
endForSymbolicAlleles = Integer.valueOf(attrs.get(VCFConstants.END_KEY).toString());
} catch (Exception e) {
generateException("the END value in the INFO field is not valid");
}
}
// find out our current location, and clip the alleles down to their minimum length
final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(pos, ref, rawAlleles, endForSymbolicAlleles);
if ( clipped.getError() != null )
generateException(clipped.getError(), lineNo);
builder.stop(clipped.getStop());
builder.alleles(clipped.getClippedAlleles());
return clipped.getClippedAlleles();
}
/**
* get the name of this codec
* @return our set name

View File

@ -1,434 +0,0 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.codecs.vcf;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*;
/**
* All of the gross allele clipping and padding routines in one place
*
* Having attempted to understand / fix / document this code myself
* I can only conclude that this entire approach needs to be rethought. This
* code just doesn't work robustly with symbolic alleles, with multiple alleles,
* requires a special "reference base for indels" stored in the VariantContext
* whose correctness isn't enforced, and overall has strange special cases
* all over the place.
*
* The reason this code is so complex is due to symbolics and multi-alleleic
* variation, which frequently occur when combining variants from multiple
* VCF files.
*
* TODO rethink this class, make it clean, and make it easy to create, mix, and write out alleles
* TODO this code doesn't work with reverse clipped alleles (ATA / GTTA -> AT / GT)
*
* @author Mark DePristo
* @since 6/12
*/
public final class VCFAlleleClipper {
private VCFAlleleClipper() { }
/**
* Determine whether we should clip off the first base of all unclippped alleles or not
*
* Returns true if all of the alleles in unclippedAlleles share a common first base with
* ref0. Ref0 should be the first base of the reference allele UnclippedAlleles may
* contain the reference allele itself, or just the alternate alleles, it doesn't matter.
*
* The algorithm returns true if the first base should be clipped off, or false otherwise
*
* This algorithm works even in the presence of symbolic alleles, logically ignoring these
* values. It
*
* @param unclippedAlleles list of unclipped alleles to assay
* @param ref0 the first base of the reference allele
* @return true if we should clip the first base of unclippedAlleles
*/
@Requires("unclippedAlleles != null")
public static boolean shouldClipFirstBaseP(final List<Allele> unclippedAlleles,
final byte ref0) {
boolean allSymbolicAlt = true;
for ( final Allele a : unclippedAlleles ) {
if ( a.isSymbolic() ) {
continue;
}
// already know we aren't symbolic, so we only need to decide if we have only seen a ref
if ( ! a.isReference() )
allSymbolicAlt = false;
if ( a.length() < 1 || (a.getBases()[0] != ref0) ) {
return false;
}
}
// to reach here all alleles are consistent with clipping the first base matching ref0
// but we don't clip if all ALT alleles are symbolic
return ! allSymbolicAlt;
}
public static int computeReverseClipping(final List<Allele> unclippedAlleles,
final byte[] ref,
final int forwardClipping,
final boolean allowFullClip) {
int clipping = 0;
boolean stillClipping = true;
while ( stillClipping ) {
for ( final Allele a : unclippedAlleles ) {
if ( a.isSymbolic() )
continue;
// we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong
// position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine).
if ( a.length() - clipping == 0 )
return clipping - (allowFullClip ? 0 : 1);
if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) {
stillClipping = false;
}
else if ( ref.length == clipping ) {
if ( allowFullClip )
stillClipping = false;
else
return -1;
}
else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) {
stillClipping = false;
}
}
if ( stillClipping )
clipping++;
}
return clipping;
}
/**
* Are the alleles describing a polymorphism substitution one base for another?
*
* @param alleles a list of alleles, must not be empty
* @return Return true if the length of any allele in alleles isn't 1
*/
@Requires("!alleles.isEmpty()")
private static boolean isSingleNucleotideEvent(final List<Allele> alleles) {
for ( final Allele a : alleles ) {
if ( a.length() != 1 )
return false;
}
return true;
}
/**
* clip the alleles, based on the reference, returning a ClippedAlleles object describing what happened
*
* The ClippedAlleles object contains the implied stop position of the alleles, given the provided start
* position, after clipping. It also contains the list of alleles, in the same order as the provided
* unclipped ones, that are the fully clipped version of the input alleles. If an error occurs
* during this option the getError() function returns a string describing the problem (for use in parsers).
*
* The basic operation are:
*
* single allele
* => stop == start and clipped == unclipped
* any number of single nucleotide events
* => stop == start and clipped == unclipped
* two alleles, second being symbolic
* => stop == start and clipped == unclipped
* Note in this case that the STOP should be computed by other means (from END in VCF, for example)
* Note that if there's more than two alleles and the second is a symbolic the code produces an error
* Any other case:
* The alleles are trimmed of any sequence shared at the end of the alleles. If N bases
* are common then the alleles will all be at least N bases shorter.
* The stop position returned is the start position + the length of the
* reverse trimmed only reference allele - 1.
* If the alleles all share a single common starting sequence (just one base is considered)
* then the alleles have this leading common base removed as well.
*
* TODO This code is gross and brittle and needs to be rethought from scratch
*
* @param start the unadjusted start position (pre-clipping)
* @param ref the reference string
* @param unclippedAlleles the list of unclipped alleles, including the reference allele
* @return the new reference end position of this event
*/
@Requires({"start > 0", "ref != null && ref.length() > 0", "!unclippedAlleles.isEmpty()"})
@Ensures("result != null")
public static ClippedAlleles clipAlleles(final int start,
final String ref,
final List<Allele> unclippedAlleles,
final int endForSymbolicAllele ) {
// no variation or single nucleotide events are by definition fully clipped
if ( unclippedAlleles.size() == 1 || isSingleNucleotideEvent(unclippedAlleles) )
return new ClippedAlleles(start, unclippedAlleles, null);
// we've got to sort out the clipping by looking at the alleles themselves
final byte firstRefBase = (byte) ref.charAt(0);
final boolean firstBaseIsClipped = shouldClipFirstBaseP(unclippedAlleles, firstRefBase);
final int forwardClipping = firstBaseIsClipped ? 1 : 0;
final int reverseClipping = computeReverseClipping(unclippedAlleles, ref.getBytes(), forwardClipping, false);
final boolean needsClipping = forwardClipping > 0 || reverseClipping > 0;
if ( reverseClipping == -1 )
return new ClippedAlleles("computeReverseClipping failed due to bad alleles");
boolean sawSymbolic = false;
List<Allele> clippedAlleles;
if ( ! needsClipping ) {
// there's nothing to clip, so clippedAlleles are the original alleles
clippedAlleles = unclippedAlleles;
} else {
clippedAlleles = new ArrayList<Allele>(unclippedAlleles.size());
for ( final Allele a : unclippedAlleles ) {
if ( a.isSymbolic() ) {
sawSymbolic = true;
clippedAlleles.add(a);
} else {
final byte[] allele = Arrays.copyOfRange(a.getBases(), forwardClipping, a.getBases().length - reverseClipping);
if ( !Allele.acceptableAlleleBases(allele) )
return new ClippedAlleles("Unparsable vcf record with bad allele [" + allele + "]");
clippedAlleles.add(Allele.create(allele, a.isReference()));
}
}
}
int stop = VariantContextUtils.computeEndFromAlleles(clippedAlleles, start, endForSymbolicAllele);
// TODO
// TODO
// TODO COMPLETELY BROKEN CODE -- THE GATK CURRENTLY ENCODES THE STOP POSITION FOR CLIPPED ALLELES AS + 1
// TODO ITS TRUE SIZE TO DIFFERENTIATE CLIPPED VS. UNCLIPPED ALLELES. NEEDS TO BE FIXED
// TODO
// TODO
if ( needsClipping && ! sawSymbolic && ! clippedAlleles.get(0).isNull() ) stop++;
// TODO
// TODO
// TODO COMPLETELY BROKEN CODE -- THE GATK CURRENTLY ENCODES THE STOP POSITION FOR CLIPPED ALLELES AS + 1
// TODO ITS TRUE SIZE TO DIFFERENTIATE CLIPPED VS. UNCLIPPED ALLELES. NEEDS TO BE FIXED
// TODO
// TODO
final Byte refBaseForIndel = firstBaseIsClipped ? firstRefBase : null;
return new ClippedAlleles(stop, clippedAlleles, refBaseForIndel);
}
/**
* Returns true if the alleles in inputVC should have reference bases added for padding
*
* We need to pad a VC with a common base if the length of the reference allele is
* less than the length of the VariantContext. This happens because the position of
* e.g. an indel is always one before the actual event (as per VCF convention).
*
* @param inputVC the VC to evaluate, cannot be null
* @return true if
*/
public static boolean needsPadding(final VariantContext inputVC) {
// biallelic sites with only symbolic never need padding
if ( inputVC.isBiallelic() && inputVC.getAlternateAllele(0).isSymbolic() )
return false;
final int recordLength = inputVC.getEnd() - inputVC.getStart() + 1;
final int referenceLength = inputVC.getReference().length();
if ( referenceLength == recordLength )
return false;
else if ( referenceLength == recordLength - 1 )
return true;
else if ( !inputVC.hasSymbolicAlleles() )
throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) +
" in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size");
else if ( inputVC.isMixed() && inputVC.hasSymbolicAlleles() )
throw new IllegalArgumentException("GATK infrastructure limitation prevents needsPadding from working properly with VariantContexts containing a mixture of symbolic and concrete alleles at " + inputVC);
return false;
}
public static Allele padAllele(final VariantContext vc, final Allele allele) {
assert needsPadding(vc);
if ( allele.isSymbolic() )
return allele;
else {
// get bases for current allele and create a new one with trimmed bases
final StringBuilder sb = new StringBuilder();
sb.append((char)vc.getReferenceBaseForIndel().byteValue());
sb.append(allele.getDisplayString());
final String newBases = sb.toString();
return Allele.create(newBases, allele.isReference());
}
}
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC) {
final boolean padVC = needsPadding(inputVC);
// nothing to do if we don't need to pad bases
if ( padVC ) {
if ( !inputVC.hasReferenceBaseForIndel() )
throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available.");
final ArrayList<Allele> alleles = new ArrayList<Allele>(inputVC.getNAlleles());
final Map<Allele, Allele> unpaddedToPadded = inputVC.hasGenotypes() ? new HashMap<Allele, Allele>(inputVC.getNAlleles()) : null;
boolean paddedAtLeastOne = false;
for (final Allele a : inputVC.getAlleles()) {
final Allele padded = padAllele(inputVC, a);
paddedAtLeastOne = paddedAtLeastOne || padded != a;
alleles.add(padded);
if ( unpaddedToPadded != null ) unpaddedToPadded.put(a, padded); // conditional to avoid making unnecessary make
}
if ( ! paddedAtLeastOne )
throw new ReviewedStingException("VC was supposed to need padding but no allele was actually changed at location " + inputVC.getChr() + ":" + inputVC.getStart() + " with allele " + inputVC.getAlleles());
final VariantContextBuilder vcb = new VariantContextBuilder(inputVC);
vcb.alleles(alleles);
// the position of the inputVC is one further, if it doesn't contain symbolic alleles
vcb.computeEndFromAlleles(alleles, inputVC.getStart(), inputVC.getEnd());
if ( inputVC.hasGenotypes() ) {
assert unpaddedToPadded != null;
// now we can recreate new genotypes with trimmed alleles
final GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples());
for (final Genotype g : inputVC.getGenotypes() ) {
final List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size());
for (final Allele a : g.getAlleles()) {
newGenotypeAlleles.add( a.isCalled() ? unpaddedToPadded.get(a) : Allele.NO_CALL);
}
genotypes.add(new GenotypeBuilder(g).alleles(newGenotypeAlleles).make());
}
vcb.genotypes(genotypes);
}
return vcb.make();
}
else
return inputVC;
}
public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {
// see if we need to trim common reference base from all alleles
final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, true);
if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
return inputVC;
final List<Allele> alleles = new ArrayList<Allele>();
final GenotypesContext genotypes = GenotypesContext.create();
final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
for (final Allele a : inputVC.getAlleles()) {
if (a.isSymbolic()) {
alleles.add(a);
originalToTrimmedAlleleMap.put(a, a);
} else {
// get bases for current allele and create a new one with trimmed bases
final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
final Allele trimmedAllele = Allele.create(newBases, a.isReference());
alleles.add(trimmedAllele);
originalToTrimmedAlleleMap.put(a, trimmedAllele);
}
}
// now we can recreate new genotypes with trimmed alleles
for ( final Genotype genotype : inputVC.getGenotypes() ) {
final List<Allele> originalAlleles = genotype.getAlleles();
final List<Allele> trimmedAlleles = new ArrayList<Allele>();
for ( final Allele a : originalAlleles ) {
if ( a.isCalled() )
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
else
trimmedAlleles.add(Allele.NO_CALL);
}
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
}
return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() + (inputVC.isMixed() ? -1 : 0)).alleles(alleles).genotypes(genotypes).make();
}
@Invariant("stop != -1 || error != null") // we're either an error or a meaningful result but not both
public static class ClippedAlleles {
private final int stop;
private final List<Allele> clippedAlleles;
private final Byte refBaseForIndel;
private final String error;
@Requires({"stop > 0", "clippedAlleles != null"})
private ClippedAlleles(final int stop, final List<Allele> clippedAlleles, final Byte refBaseForIndel) {
this.stop = stop;
this.clippedAlleles = clippedAlleles;
this.error = null;
this.refBaseForIndel = refBaseForIndel;
}
@Requires("error != null")
private ClippedAlleles(final String error) {
this.stop = -1;
this.clippedAlleles = null;
this.refBaseForIndel = null;
this.error = error;
}
/**
* Get an error if it occurred
* @return the error message, or null if no error occurred
*/
public String getError() {
return error;
}
/**
* Get the stop position to use after the clipping as been applied, given the
* provided position to clipAlleles
* @return
*/
public int getStop() {
return stop;
}
/**
* Get the clipped alleles themselves
* @return the clipped alleles in the order of the input unclipped alleles
*/
public List<Allele> getClippedAlleles() {
return clippedAlleles;
}
/**
* Returns the reference base we should use for indels, or null if none is appropriate
* @return
*/
public Byte getRefBaseForIndel() {
return refBaseForIndel;
}
}
}

View File

@ -1,9 +1,9 @@
package org.broadinstitute.sting.utils.variantcontext;
import java.util.ArrayList;
import org.broadinstitute.sting.utils.BaseUtils;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
/**
* Immutable representation of an allele
@ -77,32 +77,36 @@ public class Allele implements Comparable<Allele> {
private static final byte[] EMPTY_ALLELE_BASES = new byte[0];
private boolean isRef = false;
private boolean isNull = false;
private boolean isNoCall = false;
private boolean isSymbolic = false;
private byte[] bases = null;
public final static String NULL_ALLELE_STRING = "-";
public final static String NO_CALL_STRING = ".";
/** A generic static NO_CALL allele for use */
// no public way to create an allele
private Allele(byte[] bases, boolean isRef) {
// standardize our representation of null allele and bases
// null alleles are no longer allowed
if ( wouldBeNullAllele(bases) ) {
bases = EMPTY_ALLELE_BASES;
isNull = true;
} else if ( wouldBeNoCallAllele(bases) ) {
bases = EMPTY_ALLELE_BASES;
throw new IllegalArgumentException("Null alleles are not supported");
}
// no-calls are represented as no bases
if ( wouldBeNoCallAllele(bases) ) {
this.bases = EMPTY_ALLELE_BASES;
isNoCall = true;
if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele");
} else if ( wouldBeSymbolicAllele(bases) ) {
return;
}
if ( wouldBeSymbolicAllele(bases) ) {
isSymbolic = true;
if ( isRef ) throw new IllegalArgumentException("Cannot tag a symbolic allele as the reference allele");
}
// else
// bases = new String(bases).toUpperCase().getBytes(); // todo -- slow performance
else {
bases = BaseUtils.convertToUpperCase(bases);
}
this.isRef = isRef;
this.bases = bases;
@ -126,8 +130,6 @@ public class Allele implements Comparable<Allele> {
private final static Allele ALT_T = new Allele("T", false);
private final static Allele REF_N = new Allele("N", true);
private final static Allele ALT_N = new Allele("N", false);
private final static Allele REF_NULL = new Allele(NULL_ALLELE_STRING, true);
private final static Allele ALT_NULL = new Allele(NULL_ALLELE_STRING, false);
public final static Allele NO_CALL = new Allele(NO_CALL_STRING, false);
// ---------------------------------------------------------------------------------------------------------
@ -154,7 +156,6 @@ public class Allele implements Comparable<Allele> {
case '.':
if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele");
return NO_CALL;
case '-': return isRef ? REF_NULL : ALT_NULL;
case 'A': case 'a' : return isRef ? REF_A : ALT_A;
case 'C': case 'c' : return isRef ? REF_C : ALT_C;
case 'G': case 'g' : return isRef ? REF_G : ALT_G;
@ -179,14 +180,9 @@ public class Allele implements Comparable<Allele> {
public static Allele extend(Allele left, byte[] right) {
if (left.isSymbolic())
throw new IllegalArgumentException("Cannot extend a symbolic allele");
byte[] bases = null;
if ( left.length() == 0 )
bases = right;
else {
bases = new byte[left.length() + right.length];
System.arraycopy(left.getBases(), 0, bases, 0, left.length());
System.arraycopy(right, 0, bases, left.length(), right.length);
}
byte[] bases = new byte[left.length() + right.length];
System.arraycopy(left.getBases(), 0, bases, 0, left.length());
System.arraycopy(right, 0, bases, left.length(), right.length);
return create(bases, left.isReference());
}
@ -242,7 +238,10 @@ public class Allele implements Comparable<Allele> {
}
public static boolean acceptableAlleleBases(byte[] bases, boolean allowNsAsAcceptable) {
if ( wouldBeNullAllele(bases) || wouldBeNoCallAllele(bases) || wouldBeSymbolicAllele(bases) )
if ( wouldBeNullAllele(bases) )
return false;
if ( wouldBeNoCallAllele(bases) || wouldBeSymbolicAllele(bases) )
return true;
for (byte base : bases ) {
@ -299,11 +298,6 @@ public class Allele implements Comparable<Allele> {
//
// ---------------------------------------------------------------------------------------------------------
//Returns true if this is the null allele
public boolean isNull() { return isNull; }
// Returns true if this is not the null allele
public boolean isNonNull() { return ! isNull(); }
// Returns true if this is the NO_CALL allele
public boolean isNoCall() { return isNoCall; }
// Returns true if this is not the NO_CALL allele
@ -319,7 +313,7 @@ public class Allele implements Comparable<Allele> {
// Returns a nice string representation of this object
public String toString() {
return (isNull() ? NULL_ALLELE_STRING : ( isNoCall() ? NO_CALL_STRING : getDisplayString() )) + (isReference() ? "*" : "");
return ( isNoCall() ? NO_CALL_STRING : getDisplayString() ) + (isReference() ? "*" : "");
}
/**
@ -384,27 +378,27 @@ public class Allele implements Comparable<Allele> {
* @return true if this and other are equal
*/
public boolean equals(Allele other, boolean ignoreRefState) {
return this == other || (isRef == other.isRef || ignoreRefState) && isNull == other.isNull && isNoCall == other.isNoCall && (bases == other.bases || Arrays.equals(bases, other.bases));
return this == other || (isRef == other.isRef || ignoreRefState) && isNoCall == other.isNoCall && (bases == other.bases || Arrays.equals(bases, other.bases));
}
/**
* @param test bases to test against
*
* @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
* @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
*/
public boolean basesMatch(byte[] test) { return !isSymbolic && (bases == test || Arrays.equals(bases, test)); }
/**
* @param test bases to test against
*
* @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
* @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
*/
public boolean basesMatch(String test) { return basesMatch(test.toUpperCase().getBytes()); }
/**
* @param test allele to test against
*
* @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
* @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
*/
public boolean basesMatch(Allele test) { return basesMatch(test.getBases()); }
@ -421,10 +415,6 @@ public class Allele implements Comparable<Allele> {
//
// ---------------------------------------------------------------------------------------------------------
public static Allele getMatchingAllele(Collection<Allele> allAlleles, String alleleBases) {
return getMatchingAllele(allAlleles, alleleBases.getBytes());
}
public static Allele getMatchingAllele(Collection<Allele> allAlleles, byte[] alleleBases) {
for ( Allele a : allAlleles ) {
if ( a.basesMatch(alleleBases) ) {
@ -438,26 +428,6 @@ public class Allele implements Comparable<Allele> {
return null; // couldn't find anything
}
public static List<Allele> resolveAlleles(List<Allele> possibleAlleles, List<String> alleleStrings) {
List<Allele> myAlleles = new ArrayList<Allele>(alleleStrings.size());
for ( String alleleString : alleleStrings ) {
Allele allele = getMatchingAllele(possibleAlleles, alleleString);
if ( allele == null ) {
if ( Allele.wouldBeNoCallAllele(alleleString.getBytes()) ) {
allele = create(alleleString);
} else {
throw new IllegalArgumentException("Allele " + alleleString + " not present in the list of alleles " + possibleAlleles);
}
}
myAlleles.add(allele);
}
return myAlleles;
}
public int compareTo(Allele other) {
if ( isReference() && other.isNonReference() )
return -1;
@ -468,9 +438,6 @@ public class Allele implements Comparable<Allele> {
}
public static boolean oneIsPrefixOfOther(Allele a1, Allele a2) {
if ( a1.isNull() || a2.isNull() )
return true;
if ( a2.length() >= a1.length() )
return firstIsPrefixOfSecond(a1, a2);
else

View File

@ -188,8 +188,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
@Deprecated // ID is no longer stored in the attributes map
private final static String ID_KEY = "ID";
private final Byte REFERENCE_BASE_FOR_INDEL;
public final static Set<String> PASSES_FILTERS = Collections.unmodifiableSet(new LinkedHashSet<String>());
/** The location of this VariantContext */
@ -228,7 +226,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
// ---------------------------------------------------------------------------------------------------------
public enum Validation {
REF_PADDING,
ALLELES,
GENOTYPES
}
@ -250,7 +247,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
this(other.getSource(), other.getID(), other.getChr(), other.getStart(), other.getEnd(),
other.getAlleles(), other.getGenotypes(), other.getLog10PError(),
other.getFiltersMaybeNull(),
other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL,
other.getAttributes(),
other.fullyDecoded, NO_VALIDATION);
}
@ -266,7 +263,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
* @param log10PError qual
* @param filters filters: use null for unfiltered and empty set for passes filters
* @param attributes attributes
* @param referenceBaseForIndel padded reference base
* @param validationToPerform set of validation steps to take
*/
protected VariantContext(final String source,
@ -279,7 +275,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
final double log10PError,
final Set<String> filters,
final Map<String, Object> attributes,
final Byte referenceBaseForIndel,
final boolean fullyDecoded,
final EnumSet<Validation> validationToPerform ) {
if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); }
@ -292,7 +287,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
this.ID = ID.equals(VCFConstants.EMPTY_ID_FIELD) ? VCFConstants.EMPTY_ID_FIELD : ID;
this.commonInfo = new CommonInfo(source, log10PError, filters, attributes);
REFERENCE_BASE_FOR_INDEL = referenceBaseForIndel;
// todo -- remove me when this check is no longer necessary
if ( this.commonInfo.hasAttribute(ID_KEY) )
@ -340,8 +334,9 @@ public class VariantContext implements Feature { // to enable tribble integratio
* in this VC is returned as the set of alleles in the subContext, even if
* some of those alleles aren't in the samples
*
* @param sampleNames
* @return
* @param sampleNames the sample names
* @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples
* @return new VariantContext subsetting to just the given samples
*/
public VariantContext subContextFromSamples(Set<String> sampleNames, final boolean rederiveAllelesFromGenotypes ) {
if ( sampleNames.containsAll(getSampleNames()) ) {
@ -501,7 +496,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
*/
public boolean isSimpleInsertion() {
// can't just call !isSimpleDeletion() because of complex indels
return getType() == Type.INDEL && getReference().isNull() && isBiallelic();
return getType() == Type.INDEL && isBiallelic() && getReference().length() == 1;
}
/**
@ -509,7 +504,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
*/
public boolean isSimpleDeletion() {
// can't just call !isSimpleInsertion() because of complex indels
return getType() == Type.INDEL && getAlternateAllele(0).isNull() && isBiallelic();
return getType() == Type.INDEL && isBiallelic() && getAlternateAllele(0).length() == 1;
}
/**
@ -553,22 +548,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
return ID;
}
public boolean hasReferenceBaseForIndel() {
return REFERENCE_BASE_FOR_INDEL != null;
}
// the indel base that gets stripped off for indels
public Byte getReferenceBaseForIndel() {
return REFERENCE_BASE_FOR_INDEL;
}
public String getAlleleStringWithRefPadding(final Allele allele) {
if ( VCFAlleleClipper.needsPadding(this) )
return VCFAlleleClipper.padAllele(this, allele).getDisplayString();
else
return allele.getDisplayString();
}
// ---------------------------------------------------------------------------------------------------------
//
@ -808,8 +787,8 @@ public class VariantContext implements Feature { // to enable tribble integratio
* Returns a map from sampleName -> Genotype for the genotype associated with sampleName. Returns a map
* for consistency with the multi-get function.
*
* @param sampleName
* @return
* @param sampleName the sample name
* @return mapping from sample name to genotype
* @throws IllegalArgumentException if sampleName isn't bound to a genotype
*/
public GenotypesContext getGenotypes(String sampleName) {
@ -823,7 +802,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
* For testing convenience only
*
* @param sampleNames a unique list of sample names
* @return
* @return subsetting genotypes context
* @throws IllegalArgumentException if sampleName isn't bound to a genotype
*/
protected GenotypesContext getGenotypes(Collection<String> sampleNames) {
@ -1011,13 +990,13 @@ public class VariantContext implements Feature { // to enable tribble integratio
/**
* Run all extra-strict validation tests on a Variant Context object
*
* @param reference the true reference allele
* @param paddedRefBase the reference base used for padding indels
* @param rsIDs the true dbSNP IDs
* @param reportedReference the reported reference allele
* @param observedReference the actual reference allele
* @param rsIDs the true dbSNP IDs
*/
public void extraStrictValidation(Allele reference, Byte paddedRefBase, Set<String> rsIDs) {
public void extraStrictValidation(final Allele reportedReference, final Allele observedReference, final Set<String> rsIDs) {
// validate the reference
validateReferenceBases(reference, paddedRefBase);
validateReferenceBases(reportedReference, observedReference);
// validate the RS IDs
validateRSIDs(rsIDs);
@ -1032,18 +1011,9 @@ public class VariantContext implements Feature { // to enable tribble integratio
//checkReferenceTrack();
}
public void validateReferenceBases(Allele reference, Byte paddedRefBase) {
if ( reference == null )
return;
// don't validate if we're a complex event
if ( !isComplexIndel() && !reference.isNull() && !reference.basesMatch(getReference()) ) {
throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), reference.getBaseString(), getReference().getBaseString()));
}
// we also need to validate the padding base for simple indels
if ( hasReferenceBaseForIndel() && !getReferenceBaseForIndel().equals(paddedRefBase) ) {
throw new TribbleException.InternalCodecException(String.format("the padded REF base is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), (char)paddedRefBase.byteValue(), (char)getReferenceBaseForIndel().byteValue()));
public void validateReferenceBases(final Allele reportedReference, final Allele observedReference) {
if ( reportedReference != null && !reportedReference.basesMatch(observedReference) ) {
throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), observedReference.getBaseString(), reportedReference.getBaseString()));
}
}
@ -1135,7 +1105,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
for (final Validation val : validationToPerform ) {
switch (val) {
case ALLELES: validateAlleles(); break;
case REF_PADDING: validateReferencePadding(); break;
case GENOTYPES: validateGenotypes(); break;
default: throw new IllegalArgumentException("Unexpected validation mode " + val);
}
@ -1151,8 +1120,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
if ( hasAttribute(VCFConstants.END_KEY) ) {
final int end = getAttributeAsInt(VCFConstants.END_KEY, -1);
assert end != -1;
if ( end != getEnd() && end != getEnd() + 1 ) {
// the end is allowed to 1 bigger because of the padding
if ( end != getEnd() ) {
final String message = "Badly formed variant context at location " + getChr() + ":"
+ getStart() + "; getEnd() was " + getEnd()
+ " but this VariantContext contains an END key with value " + end;
@ -1161,23 +1129,19 @@ public class VariantContext implements Feature { // to enable tribble integratio
else
throw new ReviewedStingException(message);
}
} else {
final long length = (stop - start) + 1;
if ( ! hasSymbolicAlleles() && length != getReference().length() ) {
throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this);
}
}
}
private void validateReferencePadding() {
if ( hasSymbolicAlleles() ) // symbolic alleles don't need padding...
return;
boolean needsPadding = (getReference().length() == getEnd() - getStart()); // off by one because padded base was removed
if ( needsPadding && !hasReferenceBaseForIndel() )
throw new ReviewedStingException("Badly formed variant context at location " + getChr() + ":" + getStart() + "; no padded reference base was provided.");
}
private void validateAlleles() {
// check alleles
boolean alreadySeenRef = false, alreadySeenNull = false;
for ( Allele allele : alleles ) {
boolean alreadySeenRef = false;
for ( final Allele allele : alleles ) {
// make sure there's only one reference allele
if ( allele.isReference() ) {
if ( alreadySeenRef ) throw new IllegalArgumentException("BUG: Received two reference tagged alleles in VariantContext " + alleles + " this=" + this);
@ -1187,26 +1151,11 @@ public class VariantContext implements Feature { // to enable tribble integratio
if ( allele.isNoCall() ) {
throw new IllegalArgumentException("BUG: Cannot add a no call allele to a variant context " + alleles + " this=" + this);
}
// make sure there's only one null allele
if ( allele.isNull() ) {
if ( alreadySeenNull ) throw new IllegalArgumentException("BUG: Received two null alleles in VariantContext " + alleles + " this=" + this);
alreadySeenNull = true;
}
}
// make sure there's one reference allele
if ( ! alreadySeenRef )
throw new IllegalArgumentException("No reference allele found in VariantContext");
// if ( getType() == Type.INDEL ) {
// if ( getReference().length() != (getLocation().size()-1) ) {
long length = (stop - start) + 1;
if ( ! hasSymbolicAlleles()
&& ((getReference().isNull() && length != 1 )
|| (getReference().isNonNull() && (length - getReference().length() > 1)))) {
throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this);
}
}
private void validateGenotypes() {

View File

@ -25,9 +25,6 @@
package org.broadinstitute.sting.utils.variantcontext;
import com.google.java.contract.*;
import org.broad.tribble.Feature;
import org.broad.tribble.TribbleException;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -74,7 +71,6 @@ public class VariantContextBuilder {
private Set<String> filters = null;
private Map<String, Object> attributes = null;
private boolean attributesCanBeModified = false;
private Byte referenceBaseForIndel = null;
/** enum of what must be validated */
final private EnumSet<VariantContext.Validation> toValidate = EnumSet.noneOf(VariantContext.Validation.class);
@ -117,7 +113,6 @@ public class VariantContextBuilder {
this.genotypes = parent.genotypes;
this.ID = parent.getID();
this.log10PError = parent.getLog10PError();
this.referenceBaseForIndel = parent.getReferenceBaseForIndel();
this.source = parent.getSource();
this.start = parent.getStart();
this.stop = parent.getEnd();
@ -132,7 +127,6 @@ public class VariantContextBuilder {
this.genotypes = parent.genotypes;
this.ID = parent.ID;
this.log10PError = parent.log10PError;
this.referenceBaseForIndel = parent.referenceBaseForIndel;
this.source = parent.source;
this.start = parent.start;
this.stop = parent.stop;
@ -362,21 +356,6 @@ public class VariantContextBuilder {
return this;
}
/**
* Tells us that the resulting VariantContext should use this byte for the reference base
* Null means no refBase is available
* @param referenceBaseForIndel
*/
public VariantContextBuilder referenceBaseForIndel(final Byte referenceBaseForIndel) {
this.referenceBaseForIndel = referenceBaseForIndel;
toValidate.add(VariantContext.Validation.REF_PADDING);
return this;
}
public VariantContextBuilder referenceBaseForIndel(final String referenceBaseForIndel) {
return referenceBaseForIndel(referenceBaseForIndel.getBytes()[0]);
}
/**
* Tells us that the resulting VariantContext should have source field set to source
* @param source
@ -401,7 +380,6 @@ public class VariantContextBuilder {
this.start = start;
this.stop = stop;
toValidate.add(VariantContext.Validation.ALLELES);
toValidate.add(VariantContext.Validation.REF_PADDING);
return this;
}
@ -416,7 +394,6 @@ public class VariantContextBuilder {
this.start = loc.getStart();
this.stop = loc.getStop();
toValidate.add(VariantContext.Validation.ALLELES);
toValidate.add(VariantContext.Validation.REF_PADDING);
return this;
}
@ -440,7 +417,6 @@ public class VariantContextBuilder {
public VariantContextBuilder start(final long start) {
this.start = start;
toValidate.add(VariantContext.Validation.ALLELES);
toValidate.add(VariantContext.Validation.REF_PADDING);
return this;
}
@ -517,6 +493,6 @@ public class VariantContextBuilder {
public VariantContext make() {
return new VariantContext(source, ID, contig, start, stop, alleles,
genotypes, log10PError, filters, attributes,
referenceBaseForIndel, fullyDecoded, toValidate);
fullyDecoded, toValidate);
}
}

View File

@ -64,9 +64,9 @@ public class VariantContextUtils {
* Ensures that VC contains all of the samples in allSamples by adding missing samples to
* the resulting VC with default diploid ./. genotypes
*
* @param vc
* @param allSamples
* @return
* @param vc the VariantContext
* @param allSamples all of the samples needed
* @return a new VariantContext with missing samples added
*/
public static VariantContext addMissingSamples(final VariantContext vc, final Set<String> allSamples) {
// TODO -- what's the fastest way to do this calculation?
@ -376,9 +376,9 @@ public class VariantContextUtils {
/**
* @deprecated use variant context builder version instead
* @param vc
* @param keysToPreserve
* @return
* @param vc the variant context
* @param keysToPreserve the keys to preserve
* @return a pruned version of the original variant context
*/
@Deprecated
public static VariantContext pruneVariantContext(final VariantContext vc, Collection<String> keysToPreserve ) {
@ -486,14 +486,13 @@ public class VariantContextUtils {
if ( genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE )
verifyUniqueSampleNames(unsortedVCs);
final List<VariantContext> prepaddedVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions);
final List<VariantContext> preFilteredVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions);
// Make sure all variant contexts are padded with reference base in case of indels if necessary
final List<VariantContext> VCs = new ArrayList<VariantContext>();
for (final VariantContext vc : prepaddedVCs) {
// also a reasonable place to remove filtered calls, if needed
for (final VariantContext vc : preFilteredVCs) {
if ( ! filteredAreUncalled || vc.isNotFiltered() )
VCs.add(VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc));
VCs.add(vc);
}
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
return null;
@ -547,9 +546,6 @@ public class VariantContextUtils {
filters.addAll(vc.getFilters());
if ( referenceBaseForIndel == null )
referenceBaseForIndel = vc.getReferenceBaseForIndel();
//
// add attributes
//
@ -661,10 +657,9 @@ public class VariantContextUtils {
builder.genotypes(genotypes);
builder.log10PError(log10PError);
builder.filters(filters).attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes);
builder.referenceBaseForIndel(referenceBaseForIndel);
// Trim the padded bases of all alleles if necessary
final VariantContext merged = createVariantContextWithTrimmedAlleles(builder.make());
final VariantContext merged = builder.make();
if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged);
return merged;
}
@ -700,73 +695,6 @@ public class VariantContextUtils {
return true;
}
private static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) {
// see if we need to trim common reference base from all alleles
boolean trimVC;
// We need to trim common reference base from all alleles in all genotypes if a ref base is common to all alleles
Allele refAllele = inputVC.getReference();
if (!inputVC.isVariant())
trimVC = false;
else if (refAllele.isNull())
trimVC = false;
else {
trimVC = VCFAlleleClipper.shouldClipFirstBaseP(inputVC.getAlternateAlleles(), (byte) inputVC.getReference().getDisplayString().charAt(0));
}
// nothing to do if we don't need to trim bases
if (trimVC) {
List<Allele> alleles = new ArrayList<Allele>();
GenotypesContext genotypes = GenotypesContext.create();
Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
for (final Allele a : inputVC.getAlleles()) {
if (a.isSymbolic()) {
alleles.add(a);
originalToTrimmedAlleleMap.put(a, a);
} else {
// get bases for current allele and create a new one with trimmed bases
byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length());
Allele trimmedAllele = Allele.create(newBases, a.isReference());
alleles.add(trimmedAllele);
originalToTrimmedAlleleMap.put(a, trimmedAllele);
}
}
// detect case where we're trimming bases but resulting vc doesn't have any null allele. In that case, we keep original representation
// example: mixed records such as {TA*,TGA,TG}
boolean hasNullAlleles = false;
for (final Allele a: originalToTrimmedAlleleMap.values()) {
if (a.isNull())
hasNullAlleles = true;
}
if (!hasNullAlleles)
return inputVC;
// now we can recreate new genotypes with trimmed alleles
for ( final Genotype genotype : inputVC.getGenotypes() ) {
List<Allele> originalAlleles = genotype.getAlleles();
List<Allele> trimmedAlleles = new ArrayList<Allele>();
for ( final Allele a : originalAlleles ) {
if ( a.isCalled() )
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
else
trimmedAlleles.add(Allele.NO_CALL);
}
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
}
final VariantContextBuilder builder = new VariantContextBuilder(inputVC);
return builder.alleles(alleles).genotypes(genotypes).referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0])).make();
}
return inputVC;
}
public static GenotypesContext stripPLs(GenotypesContext genotypes) {
GenotypesContext newGs = GenotypesContext.create(genotypes.size());
@ -819,7 +747,7 @@ public class VariantContextUtils {
if ( !mappedVCs.containsKey(vc.getType()) )
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
mappedVCs.get(vc.getType()).add(vc);
}
}
}
return mappedVCs;
@ -881,10 +809,10 @@ public class VariantContextUtils {
//
// refAllele: ACGTGA
// myRef: ACGT
// myAlt: -
// myAlt: A
//
// We need to remap all of the alleles in vc to include the extra GA so that
// myRef => refAllele and myAlt => GA
// myRef => refAllele and myAlt => AGA
//
Allele myRef = vc.getReference();
@ -979,7 +907,7 @@ public class VariantContextUtils {
HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
for ( Allele originalAllele : vc.getAlleles() ) {
Allele newAllele;
if ( originalAllele.isNoCall() || originalAllele.isNull() )
if ( originalAllele.isNoCall() )
newAllele = originalAllele;
else
newAllele = Allele.create(BaseUtils.simpleReverseComplement(originalAllele.getBases()), originalAllele.isReference());
@ -1235,13 +1163,14 @@ public class VariantContextUtils {
if ( ! vc.isIndel() ) // only indels are tandem repeats
return null;
final Allele ref = vc.getReference();
final Allele refAllele = vc.getReference();
final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length());
byte[] repeatUnit = null;
final ArrayList<Integer> lengths = new ArrayList<Integer>();
for ( final Allele allele : vc.getAlternateAlleles() ) {
Pair<int[],byte[]> result = getNumTandemRepeatUnits(ref.getBases(), allele.getBases(), refBasesStartingAtVCWithoutPad.getBytes());
Pair<int[],byte[]> result = getNumTandemRepeatUnits(refAlleleBases, Arrays.copyOfRange(allele.getBases(), 1, allele.length()), refBasesStartingAtVCWithoutPad.getBytes());
final int[] repetitionCount = result.first;
// repetition count = 0 means allele is not a tandem expansion of context
@ -1256,7 +1185,7 @@ public class VariantContextUtils {
repeatUnit = result.second;
if (VERBOSE) {
System.out.println("RefContext:"+refBasesStartingAtVCWithoutPad);
System.out.println("Ref:"+ref.toString()+" Count:" + String.valueOf(repetitionCount[0]));
System.out.println("Ref:"+refAllele.toString()+" Count:" + String.valueOf(repetitionCount[0]));
System.out.println("Allele:"+allele.toString()+" Count:" + String.valueOf(repetitionCount[1]));
System.out.println("RU:"+new String(repeatUnit));
}
@ -1405,4 +1334,113 @@ public class VariantContextUtils {
return start + Math.max(ref.length() - 1, 0);
}
}
public static boolean requiresPaddingBase(final List<String> alleles) {
// see whether one of the alleles would be null if trimmed through
for ( final String allele : alleles ) {
if ( allele.isEmpty() )
return true;
}
int clipping = 0;
Character currentBase = null;
while ( true ) {
for ( final String allele : alleles ) {
if ( allele.length() - clipping == 0 )
return true;
char myBase = allele.charAt(clipping);
if ( currentBase == null )
currentBase = myBase;
else if ( currentBase != myBase )
return false;
}
clipping++;
currentBase = null;
}
}
public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {
// TODO - this function doesn't work with mixed records or records that started as mixed and then became non-mixed
// see whether we need to trim common reference base from all alleles
final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false);
if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
return inputVC;
final List<Allele> alleles = new ArrayList<Allele>();
final GenotypesContext genotypes = GenotypesContext.create();
final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
for (final Allele a : inputVC.getAlleles()) {
if (a.isSymbolic()) {
alleles.add(a);
originalToTrimmedAlleleMap.put(a, a);
} else {
// get bases for current allele and create a new one with trimmed bases
final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
final Allele trimmedAllele = Allele.create(newBases, a.isReference());
alleles.add(trimmedAllele);
originalToTrimmedAlleleMap.put(a, trimmedAllele);
}
}
// now we can recreate new genotypes with trimmed alleles
for ( final Genotype genotype : inputVC.getGenotypes() ) {
final List<Allele> originalAlleles = genotype.getAlleles();
final List<Allele> trimmedAlleles = new ArrayList<Allele>();
for ( final Allele a : originalAlleles ) {
if ( a.isCalled() )
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
else
trimmedAlleles.add(Allele.NO_CALL);
}
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
}
return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() - 1).alleles(alleles).genotypes(genotypes).make();
}
public static int computeReverseClipping(final List<Allele> unclippedAlleles,
final byte[] ref,
final int forwardClipping,
final boolean allowFullClip) {
int clipping = 0;
boolean stillClipping = true;
while ( stillClipping ) {
for ( final Allele a : unclippedAlleles ) {
if ( a.isSymbolic() )
continue;
// we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong
// position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine).
if ( a.length() - clipping == 0 )
return clipping - (allowFullClip ? 0 : 1);
if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) {
stillClipping = false;
}
else if ( ref.length == clipping ) {
if ( allowFullClip )
stillClipping = false;
else
return -1;
}
else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) {
stillClipping = false;
}
}
if ( stillClipping )
clipping++;
}
return clipping;
}
}

View File

@ -274,10 +274,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
}
private void buildAlleles( VariantContext vc ) throws IOException {
final boolean needsPadding = VCFAlleleClipper.needsPadding(vc);
for ( Allele allele : vc.getAlleles() ) {
if ( needsPadding )
allele = VCFAlleleClipper.padAllele(vc, allele);
final byte[] s = allele.getDisplayBases();
if ( s == null )
throw new ReviewedStingException("BUG: BCF2Writer encountered null padded allele" + allele);

View File

@ -162,7 +162,6 @@ class VCFWriter extends IndexingVariantContextWriter {
vc = new VariantContextBuilder(vc).noGenotypes().make();
try {
vc = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc);
super.add(vc);
Map<Allele, String> alleleMap = buildAlleleMap(vc);

View File

@ -26,7 +26,7 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest {
WalkerTestSpec spec2 = new WalkerTestSpec(
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s",
1,
Arrays.asList("0567b32ebdc26604ddf2a390de4579ac"));
Arrays.asList("ef481be9962e21d09847b8a1d4a4ff65"));
executeTest("testFastaAlternateReferenceIndels", spec2);
WalkerTestSpec spec3 = new WalkerTestSpec(

View File

@ -38,9 +38,6 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import java.util.*;
@ -103,39 +100,27 @@ public class ArtificialReadPileupTestProvider {
boolean addBaseErrors, int phredScaledBaseErrorRate) {
// RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext);
ArrayList<Allele> vcAlleles = new ArrayList<Allele>();
Allele refAllele, altAllele;
if (eventLength == 0) {// SNP case
refAllele =Allele.create(referenceContext.getBase(),true);
altAllele = Allele.create(altBases.substring(0,1), false);
String refAllele, altAllele;
if (eventLength == 0) {
// SNP case
refAllele = new String(new byte[]{referenceContext.getBase()});
altAllele = new String(altBases.substring(0,1));
} else if (eventLength>0){
// insertion
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
altAllele = Allele.create(altBases.substring(0,eventLength), false);
refAllele = "";
altAllele = altBases.substring(0,eventLength);
}
else {
// deletion
refAllele =Allele.create(refBases.substring(offset,offset+Math.abs(eventLength)),true);
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
refAllele = refBases.substring(offset,offset+Math.abs(eventLength));
altAllele = "";
}
int stop = loc.getStart();
vcAlleles.add(refAllele);
vcAlleles.add(altAllele);
final VariantContextBuilder builder = new VariantContextBuilder().source("");
builder.loc(loc.getContig(), loc.getStart(), stop);
builder.alleles(vcAlleles);
builder.referenceBaseForIndel(referenceContext.getBase());
builder.noGenotypes();
final VariantContext vc = builder.make();
Map<String,AlignmentContext> contexts = new HashMap<String,AlignmentContext>();
for (String sample: sampleNames) {
AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc,vc, altBases, numReadsPerAllele, sample, addBaseErrors, phredScaledBaseErrorRate));
AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc, refAllele, altAllele, altBases, numReadsPerAllele, sample, addBaseErrors, phredScaledBaseErrorRate));
contexts.put(sample,context);
}
@ -149,73 +134,71 @@ public class ArtificialReadPileupTestProvider {
rg.setSample(name);
return rg;
}
private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, VariantContext vc, String altBases,
private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, String refAllele, String altAllele, String altBases,
int[] numReadsPerAllele, String sample, boolean addErrors, int phredScaledErrorRate) {
List<PileupElement> pileupElements = new ArrayList<PileupElement>();
int readStart = contigStart;
int offset = (contigStop-contigStart+1)/2;
int refAlleleLength = 0;
int readCounter = 0;
int alleleCounter = 0;
for (Allele allele: vc.getAlleles()) {
if (allele.isReference())
refAlleleLength = allele.getBases().length;
int alleleLength = allele.getBases().length;
for ( int d = 0; d < numReadsPerAllele[alleleCounter]; d++ ) {
byte[] readBases = trueHaplotype(allele, offset, refAlleleLength);
if (addErrors)
addBaseErrors(readBases, phredScaledErrorRate);
byte[] readQuals = new byte[readBases.length];
Arrays.fill(readQuals, (byte)phredScaledErrorRate);
GATKSAMRecord read = new GATKSAMRecord(header);
read.setBaseQualities(readQuals);
read.setReadBases(readBases);
read.setReadName(artificialReadName+readCounter++);
boolean isBeforeDeletion = false, isBeforeInsertion = false;
if (allele.isReference())
read.setCigarString(readBases.length + "M");
else {
isBeforeDeletion = alleleLength<refAlleleLength;
isBeforeInsertion = alleleLength>refAlleleLength;
if (isBeforeDeletion || isBeforeInsertion)
read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") +
(readBases.length-offset)+"M");
else // SNP case
read.setCigarString(readBases.length+"M");
}
int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0));
read.setReadPairedFlag(false);
read.setAlignmentStart(readStart);
read.setMappingQuality(artificialMappingQuality);
read.setReferenceName(loc.getContig());
read.setReadNegativeStrandFlag(false);
read.setAttribute("RG", sampleRG(sample).getReadGroupId());
pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength));
}
alleleCounter++;
}
int refAlleleLength = refAllele.length();
pileupElements.addAll(createPileupElements(refAllele, loc, numReadsPerAllele[0], sample, contigStart, offset, altBases, addErrors, phredScaledErrorRate, refAlleleLength, true));
pileupElements.addAll(createPileupElements(altAllele, loc, numReadsPerAllele[1], sample, contigStart, offset, altBases, addErrors, phredScaledErrorRate, refAlleleLength, false));
return new ReadBackedPileupImpl(loc,pileupElements);
}
private byte[] trueHaplotype(Allele allele, int offset, int refAlleleLength) {
private List<PileupElement> createPileupElements(String allele, GenomeLoc loc, int numReadsPerAllele, String sample, int readStart, int offset, String altBases, boolean addErrors, int phredScaledErrorRate, int refAlleleLength, boolean isReference) {
int alleleLength = allele.length();
List<PileupElement> pileupElements = new ArrayList<PileupElement>();
int readCounter = 0;
for ( int d = 0; d < numReadsPerAllele; d++ ) {
byte[] readBases = trueHaplotype(allele, offset, refAlleleLength);
if (addErrors)
addBaseErrors(readBases, phredScaledErrorRate);
byte[] readQuals = new byte[readBases.length];
Arrays.fill(readQuals, (byte)phredScaledErrorRate);
GATKSAMRecord read = new GATKSAMRecord(header);
read.setBaseQualities(readQuals);
read.setReadBases(readBases);
read.setReadName(artificialReadName+readCounter++);
boolean isBeforeDeletion = false, isBeforeInsertion = false;
if (isReference)
read.setCigarString(readBases.length + "M");
else {
isBeforeDeletion = alleleLength<refAlleleLength;
isBeforeInsertion = alleleLength>refAlleleLength;
if (isBeforeDeletion || isBeforeInsertion)
read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") +
(readBases.length-offset)+"M");
else // SNP case
read.setCigarString(readBases.length+"M");
}
int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0));
read.setReadPairedFlag(false);
read.setAlignmentStart(readStart);
read.setMappingQuality(artificialMappingQuality);
read.setReferenceName(loc.getContig());
read.setReadNegativeStrandFlag(false);
read.setAttribute("RG", sampleRG(sample).getReadGroupId());
pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength));
}
return pileupElements;
}
private byte[] trueHaplotype(String allele, int offset, int refAlleleLength) {
// create haplotype based on a particular allele
String prefix = refBases.substring(offset);
String alleleBases = new String(allele.getBases());
String prefix = refBases.substring(0, offset);
String postfix = refBases.substring(offset+refAlleleLength,refBases.length());
return (prefix+alleleBases+postfix).getBytes();
return (prefix+allele+postfix).getBytes();
}
private void addBaseErrors(final byte[] readBases, final int phredScaledErrorRate) {

View File

@ -70,7 +70,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
List<Allele> alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
Assert.assertEquals(alleles.size(),2);
Assert.assertEquals(alleles.get(1).getBaseString(), altBases.substring(0,eventLength));
Assert.assertEquals(alleles.get(1).getBaseString().substring(1), altBases.substring(0,eventLength));
@ -79,7 +79,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
eventLength = 3;
alleles = getConsensusAlleles(eventLength,false,10,0.1, altBases);
Assert.assertEquals(alleles.size(),2);
Assert.assertEquals(alleles.get(0).getBaseString(), refBases.substring(pileupProvider.offset,pileupProvider.offset+eventLength));
Assert.assertEquals(alleles.get(0).getBaseString().substring(1), refBases.substring(pileupProvider.offset,pileupProvider.offset+eventLength));
// same with min Reads = 11
alleles = getConsensusAlleles(eventLength,false,11,0.1, altBases);
@ -97,7 +97,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
Assert.assertEquals(alleles.size(),2);
Assert.assertEquals(alleles.get(1).getBaseString(), altBases.substring(0,eventLength));
Assert.assertEquals(alleles.get(1).getBaseString().substring(1), altBases.substring(0,eventLength));
altBases = "CCTCNTGAGA";
eventLength = 5;

View File

@ -23,7 +23,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest {
testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF;
testArgs += " --virtualPrimerSize 30";
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
Arrays.asList("27f9450afa132888a8994167f0035fd7"));
Arrays.asList("240d99b58f73985fb114abe9044c0271"));
executeTest("Test probes", spec);
}
@ -36,7 +36,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest {
testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF;
testArgs += " --virtualPrimerSize 30 --doNotUseBWA";
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
Arrays.asList("f2611ff1d9cd5bedaad003251fed8bc1"));
Arrays.asList("6e7789445e29d91979a21e78d3d53295"));
executeTest("Test probes", spec);
}
@ -49,7 +49,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest {
testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF;
testArgs += " --virtualPrimerSize 30 --filterMonomorphic";
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
Arrays.asList("77b3f30e38fedad812125bdf6cf3255f"));
Arrays.asList("18d7236208db603e143b40db06ef2aca"));
executeTest("Test probes", spec);
}

View File

@ -98,16 +98,16 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ac58a5fde17661e2a19004ca954d9781", " -setKey null"); }
@Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "67a8076e30b4bca0ea5acdc9cd26a4e0"); } // official project VCF files in tabix format
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "ef2d249ea4b25311966e038aac05c661"); }
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "cdb448aaa92ca5a9e393d875b42581b3"); }
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "909c6dc74eeb5ab86f8e74073eb0c1d6"); }
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "f0c2cb3e3a6160e1ed0ee2fd9b120f55"); }
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "f0ce3fb83d4ad9ba402d7cb11cd000c3"); }
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "4efdf983918db822e4ac13d911509576"); } // official project VCF files in tabix format
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "848d4408ee953053d2307cefebc6bd6d"); } // official project VCF files in tabix format
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "91f6087e6e2bf3df4d1c9700eaff958b"); }
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "4159a0c0d7c15852a3a545e0bea6bbc5"); }
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "a9be239ab5e03e7e97caef58a3841dd2"); }
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "61d0ded244895234ac727391f29f13a8"); }
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "0b1815c699e71e143ed129bfadaffbcb"); }

View File

@ -125,4 +125,14 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
executeTest("test bad ref allele in deletion", spec);
}
@Test
public void testComplexEvents() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("complexEvents.vcf", "ALL"),
0,
Arrays.asList("d41d8cd98f00b204e9800998ecf8427e")
);
executeTest("test validating complex events", spec);
}
}

View File

@ -53,11 +53,11 @@ public class HaplotypeUnitTest extends BaseTest {
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "AACTTCTGGTCAACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases);
basicInsertTest("A", "AACTT", 1, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCACTTAACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases);
basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCAACTGGTCAAACTTCTGGTCAACTGGTCA";
basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases);
basicInsertTest("A", "AACTT", 17, h1Cigar, bases, h1bases);
}
@Test
@ -68,11 +68,11 @@ public class HaplotypeUnitTest extends BaseTest {
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases);
basicInsertTest("AACTT", "A", 1, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCGGTCAACTGGTCAACTGGTCA";
basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases);
basicInsertTest("AACTT", "A", 7, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA";
basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases);
basicInsertTest("AACTT", "A", 17, h1Cigar, bases, h1bases);
}
@Test
@ -102,11 +102,11 @@ public class HaplotypeUnitTest extends BaseTest {
h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "AACTTTCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases);
basicInsertTest("A", "AACTT", 1, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCACTTGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases);
basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGACTTGGGGA" + "AGGC";
basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases);
basicInsertTest("A", "AACTT", 17, h1Cigar, bases, h1bases);
}
@Test
@ -121,11 +121,11 @@ public class HaplotypeUnitTest extends BaseTest {
h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "A" + "CGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases);
basicInsertTest("AACTT", "A", 1, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases);
basicInsertTest("AACTT", "A", 7, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGA" + "AGGC";
basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases);
basicInsertTest("AACTT", "A", 17, h1Cigar, bases, h1bases);
}
@Test

View File

@ -1,226 +0,0 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.codecs.vcf;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.variantcontext.*;
import org.testng.Assert;
import org.testng.SkipException;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
public class VCFAlleleClipperUnitTest extends BaseTest {
// --------------------------------------------------------------------------------
//
// Test allele clipping
//
// --------------------------------------------------------------------------------
private class ClipAllelesTest extends TestDataProvider {
final int position;
final int stop;
final String ref;
List<Allele> inputs;
List<Allele> expected;
@Requires("arg.length % 2 == 0")
private ClipAllelesTest(final int position, final int stop, final String ... arg) {
super(ClipAllelesTest.class);
this.position = position;
this.stop = stop;
this.ref = arg[0];
int n = arg.length / 2;
inputs = new ArrayList<Allele>(n);
expected = new ArrayList<Allele>(n);
for ( int i = 0; i < n; i++ ) {
final boolean ref = i % n == 0;
inputs.add(Allele.create(arg[i], ref));
}
for ( int i = n; i < arg.length; i++ ) {
final boolean ref = i % n == 0;
expected.add(Allele.create(arg[i], ref));
}
}
public boolean isClipped() {
for ( int i = 0; i < inputs.size(); i++ ) {
if ( inputs.get(i).length() != expected.get(i).length() )
return true;
}
return false;
}
public String toString() {
return String.format("ClipAllelesTest input=%s expected=%s", inputs, expected);
}
}
@DataProvider(name = "ClipAllelesTest")
public Object[][] makeClipAllelesTest() {
// do no harm
new ClipAllelesTest(10, 10, "A", "A");
new ClipAllelesTest(10, 10, "A", "C", "A", "C");
new ClipAllelesTest(10, 10, "A", "C", "G", "A", "C", "G");
// insertions
new ClipAllelesTest(10, 10, "A", "AA", "-", "A");
new ClipAllelesTest(10, 10, "A", "AAA", "-", "AA");
new ClipAllelesTest(10, 10, "A", "AG", "-", "G");
// deletions
new ClipAllelesTest(10, 11, "AA", "A", "A", "-");
new ClipAllelesTest(10, 12, "AAA", "A", "AA", "-");
new ClipAllelesTest(10, 11, "AG", "A", "G", "-");
new ClipAllelesTest(10, 12, "AGG", "A", "GG", "-");
// multi-allelic insertion and deletions
new ClipAllelesTest(10, 11, "AA", "A", "AAA", "A", "-", "AA");
new ClipAllelesTest(10, 11, "AA", "A", "AAG", "A", "-", "AG");
new ClipAllelesTest(10, 10, "A", "AA", "AAA", "-", "A", "AA");
new ClipAllelesTest(10, 10, "A", "AA", "ACA", "-", "A", "CA");
new ClipAllelesTest(10, 12, "ACG", "ATC", "AGG", "CG", "TC", "GG");
new ClipAllelesTest(10, 11, "AC", "AT", "AG", "C", "T", "G");
// cannot be clipped
new ClipAllelesTest(10, 11, "AC", "CT", "AG", "AC", "CT", "AG");
new ClipAllelesTest(10, 11, "AC", "CT", "GG", "AC", "CT", "GG");
// symbolic
new ClipAllelesTest(10, 100, "A", "<DEL>", "A", "<DEL>");
new ClipAllelesTest(50, 50, "G", "G]22:60]", "G", "G]22:60]");
new ClipAllelesTest(51, 51, "T", "]22:55]T", "T", "]22:55]T");
new ClipAllelesTest(52, 52, "C", "C[22:51[", "C", "C[22:51[");
new ClipAllelesTest(60, 60, "A", "A]22:50]", "A", "A]22:50]");
// symbolic with alleles that should be clipped
new ClipAllelesTest(10, 100, "A", "<DEL>", "AA", "-", "<DEL>", "A");
new ClipAllelesTest(10, 100, "AA", "<DEL>", "A", "A", "<DEL>", "-");
new ClipAllelesTest(10, 100, "AA", "<DEL>", "A", "AAA", "A", "<DEL>", "-", "AA");
new ClipAllelesTest(10, 100, "AG", "<DEL>", "A", "AGA", "G", "<DEL>", "-", "GA");
new ClipAllelesTest(10, 100, "G", "<DEL>", "A", "G", "<DEL>", "A");
// clipping from both ends
//
// TODO -- THIS CODE IS BROKEN BECAUSE CLIPPING DOES WORK WITH ALLELES CLIPPED FROM THE END
//
// new ClipAllelesTest(10, 10, "ATA", "ATTA", "-", "T");
// new ClipAllelesTest(10, 10, "ATAA", "ATTAA", "-", "T");
// new ClipAllelesTest(10, 10, "ATAAG", "ATTAAG", "-", "T");
// new ClipAllelesTest(10, 11, "GTA", "ATTA", "G", "AT");
// new ClipAllelesTest(10, 11, "GTAA", "ATTAA", "G", "AT");
// new ClipAllelesTest(10, 11, "GTAAG", "ATTAAG", "G", "AT");
// complex substitutions
new ClipAllelesTest(10, 10, "A", "GA", "A", "GA");
return ClipAllelesTest.getTests(ClipAllelesTest.class);
}
@Test(dataProvider = "ClipAllelesTest")
public void testClipAllelesTest(ClipAllelesTest cfg) {
final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(cfg.position, cfg.ref, cfg.inputs, cfg.stop);
Assert.assertNull(clipped.getError(), "Unexpected error occurred");
Assert.assertEquals(clipped.getStop(), cfg.stop, "Clipped alleles stop");
Assert.assertEquals(clipped.getClippedAlleles(), cfg.expected, "Clipped alleles");
}
@Test(dataProvider = "ClipAllelesTest", dependsOnMethods = "testClipAllelesTest")
public void testPaddingAllelesInVC(final ClipAllelesTest cfg) {
final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(cfg.position, cfg.ref, cfg.inputs, cfg.stop);
final VariantContext vc = new VariantContextBuilder("x", "1", cfg.position, cfg.stop, clipped.getClippedAlleles())
.referenceBaseForIndel(clipped.getRefBaseForIndel()).make();
if ( vc.isMixed() && vc.hasSymbolicAlleles() )
throw new SkipException("GATK cannot handle mixed variant contexts with symbolic and concrete alleles. Remove this check when allele clipping and padding is generalized");
Assert.assertEquals(VCFAlleleClipper.needsPadding(vc), cfg.isClipped(), "needPadding method");
if ( cfg.isClipped() ) {
// TODO
// TODO note that the GATK currently uses a broken approach to the clipped alleles, so the expected stop is
// TODO actually the original stop, as the original stop is +1 its true size.
// TODO
final int expectedStop = vc.getEnd(); // + (vc.hasSymbolicAlleles() ? 0 : 1);
final VariantContext padded = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc);
Assert.assertEquals(padded.getStart(), vc.getStart(), "padded VC start");
Assert.assertEquals(padded.getAlleles(), cfg.inputs, "padded VC alleles == original unclipped alleles");
Assert.assertEquals(padded.getEnd(), expectedStop, "padded VC end should be clipped VC + 1 (added a base to ref allele)");
Assert.assertFalse(VCFAlleleClipper.needsPadding(padded), "padded VC shouldn't need padding again");
}
}
// --------------------------------------------------------------------------------
//
// basic allele clipping test
//
// --------------------------------------------------------------------------------
private class ReverseClippingPositionTestProvider extends TestDataProvider {
final String ref;
final List<Allele> alleles = new ArrayList<Allele>();
final int expectedClip;
private ReverseClippingPositionTestProvider(final int expectedClip, final String ref, final String... alleles) {
super(ReverseClippingPositionTestProvider.class);
this.ref = ref;
for ( final String allele : alleles )
this.alleles.add(Allele.create(allele));
this.expectedClip = expectedClip;
}
@Override
public String toString() {
return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip);
}
}
@DataProvider(name = "ReverseClippingPositionTestProvider")
public Object[][] makeReverseClippingPositionTestProvider() {
// pair clipping
new ReverseClippingPositionTestProvider(0, "ATT", "CCG");
new ReverseClippingPositionTestProvider(1, "ATT", "CCT");
new ReverseClippingPositionTestProvider(2, "ATT", "CTT");
new ReverseClippingPositionTestProvider(2, "ATT", "ATT"); // cannot completely clip allele
// triplets
new ReverseClippingPositionTestProvider(0, "ATT", "CTT", "CGG");
new ReverseClippingPositionTestProvider(1, "ATT", "CTT", "CGT"); // the T can go
new ReverseClippingPositionTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go
return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class);
}
@Test(dataProvider = "ReverseClippingPositionTestProvider")
public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) {
int result = VCFAlleleClipper.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
Assert.assertEquals(result, cfg.expectedClip);
}
}

View File

@ -39,6 +39,17 @@ public class VCFIntegrationTest extends WalkerTest {
executeTest("Test reading and writing breakpoint VCF", spec1);
}
@Test(enabled = true)
public void testReadingLowerCaseBases() {
String testVCF = privateTestDir + "lowercaseBases.vcf";
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("e0e308a25e56bde1c664139bb44ed19d"));
executeTest("Test reading VCF with lower-case bases", spec1);
}
@Test(enabled = true)
public void testReadingAndWriting1000GSVs() {
String testVCF = privateTestDir + "1000G_SVs.chr1.vcf";
@ -57,7 +68,7 @@ public class VCFIntegrationTest extends WalkerTest {
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("0f82ac11852e7f958c1a0ce52398c2ae"));
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("38697c195e7abf18d95dcc16c8e6d284"));
executeTest("Test reading and writing samtools vcf", spec1);
}
@ -66,7 +77,7 @@ public class VCFIntegrationTest extends WalkerTest {
String testVCF = privateTestDir + "ex2.vcf";
String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("9773d6a121cfcb18d090965bc520f120"));
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("a04a0fc22fedb516c663e56e51fc1e27"));
executeTest("Test writing samtools WEx BCF example", spec1);
}

View File

@ -37,8 +37,6 @@ import org.testng.annotations.Test;
// public Allele(byte[] bases, boolean isRef) {
// public Allele(boolean isRef) {
// public Allele(String bases, boolean isRef) {
// public boolean isNullAllele() { return length() == 0; }
// public boolean isNonNullAllele() { return ! isNullAllele(); }
// public boolean isReference() { return isRef; }
// public boolean isNonReference() { return ! isReference(); }
// public byte[] getBases() { return bases; }
@ -49,13 +47,10 @@ import org.testng.annotations.Test;
* Basic unit test for RecalData
*/
public class AlleleUnitTest {
Allele ARef, del, delRef, A, T, ATIns, ATCIns, NoCall;
Allele ARef, A, T, ATIns, ATCIns, NoCall;
@BeforeSuite
public void before() {
del = Allele.create("-");
delRef = Allele.create("-", true);
A = Allele.create("A");
ARef = Allele.create("A", true);
T = Allele.create("T");
@ -72,8 +67,6 @@ public class AlleleUnitTest {
Assert.assertFalse(A.isReference());
Assert.assertTrue(A.basesMatch("A"));
Assert.assertEquals(A.length(), 1);
Assert.assertTrue(A.isNonNull());
Assert.assertFalse(A.isNull());
Assert.assertTrue(ARef.isReference());
Assert.assertFalse(ARef.isNonReference());
@ -92,8 +85,8 @@ public class AlleleUnitTest {
Assert.assertFalse(NoCall.isReference());
Assert.assertFalse(NoCall.basesMatch("."));
Assert.assertEquals(NoCall.length(), 0);
Assert.assertTrue(NoCall.isNonNull());
Assert.assertFalse(NoCall.isNull());
Assert.assertTrue(NoCall.isNoCall());
Assert.assertFalse(NoCall.isCalled());
}
@ -103,16 +96,6 @@ public class AlleleUnitTest {
Assert.assertEquals(ATCIns.length(), 3);
Assert.assertEquals(ATIns.getBases(), "AT".getBytes());
Assert.assertEquals(ATCIns.getBases(), "ATC".getBytes());
Assert.assertTrue(del.isNonReference());
Assert.assertFalse(delRef.isNonReference());
Assert.assertFalse(del.isReference());
Assert.assertTrue(delRef.isReference());
Assert.assertFalse(del.basesMatch("-"));
Assert.assertTrue(del.basesMatch(""));
Assert.assertEquals(del.length(), 0);
Assert.assertFalse(del.isNonNull());
Assert.assertTrue(del.isNull());
}
@ -128,18 +111,6 @@ public class AlleleUnitTest {
Assert.assertFalse(a1.equals(a4));
}
@Test
public void testDelConstructors() {
Allele a1 = Allele.create("-");
Allele a2 = Allele.create("-".getBytes());
Allele a3 = Allele.create("");
Allele a4 = Allele.create("", true);
Assert.assertTrue(a1.equals(a2));
Assert.assertTrue(a1.equals(a3));
Assert.assertFalse(a1.equals(a4));
}
@Test
public void testInsConstructors() {
Allele a1 = Allele.create("AC");
@ -156,7 +127,6 @@ public class AlleleUnitTest {
public void testEquals() {
Assert.assertTrue(ARef.basesMatch(A));
Assert.assertFalse(ARef.equals(A));
Assert.assertFalse(ARef.equals(del));
Assert.assertFalse(ARef.equals(ATIns));
Assert.assertFalse(ARef.equals(ATCIns));
@ -164,11 +134,6 @@ public class AlleleUnitTest {
Assert.assertFalse(T.basesMatch(A));
Assert.assertFalse(T.equals(A));
Assert.assertTrue(del.basesMatch(del));
Assert.assertTrue(del.basesMatch(delRef));
Assert.assertTrue(del.equals(del));
Assert.assertFalse(del.equals(delRef));
Assert.assertTrue(ATIns.equals(ATIns));
Assert.assertFalse(ATIns.equals(ATCIns));
Assert.assertTrue(ATIns.basesMatch("AT"));
@ -209,7 +174,6 @@ public class AlleleUnitTest {
public void testExtend() {
Assert.assertEquals("AT", Allele.extend(Allele.create("A"), "T".getBytes()).toString());
Assert.assertEquals("ATA", Allele.extend(Allele.create("A"), "TA".getBytes()).toString());
Assert.assertEquals("A", Allele.extend(Allele.create("-"), "A".getBytes()).toString());
Assert.assertEquals("A", Allele.extend(Allele.NO_CALL, "A".getBytes()).toString());
Assert.assertEquals("ATCGA", Allele.extend(Allele.create("AT"), "CGA".getBytes()).toString());
Assert.assertEquals("ATCGA", Allele.extend(Allele.create("ATC"), "GA".getBytes()).toString());

View File

@ -225,10 +225,10 @@ public class VariantContextTestProvider {
add(builder());
add(builder().alleles("A"));
add(builder().alleles("A", "C", "T"));
add(builder().alleles("-", "C").referenceBaseForIndel("A"));
add(builder().alleles("-", "CAGT").referenceBaseForIndel("A"));
add(builder().loc("1", 10, 11).alleles("C", "-").referenceBaseForIndel("A"));
add(builder().loc("1", 10, 13).alleles("CGT", "-").referenceBaseForIndel("A"));
add(builder().alleles("A", "AC"));
add(builder().alleles("A", "ACAGT"));
add(builder().loc("1", 10, 11).alleles("AC", "A"));
add(builder().loc("1", 10, 13).alleles("ACGT", "A"));
// make sure filters work
add(builder().unfiltered());
@ -302,8 +302,8 @@ public class VariantContextTestProvider {
sites.add(builder().alleles("A").make());
sites.add(builder().alleles("A", "C", "T").make());
sites.add(builder().alleles("-", "C").referenceBaseForIndel("A").make());
sites.add(builder().alleles("-", "CAGT").referenceBaseForIndel("A").make());
sites.add(builder().alleles("A", "AC").make());
sites.add(builder().alleles("A", "ACAGT").make());
for ( VariantContext site : sites ) {
addGenotypes(site);

View File

@ -28,27 +28,22 @@ public class VariantContextUnitTest extends BaseTest {
int snpLocStart = 10;
int snpLocStop = 10;
// - / ATC [ref] from 20-23
// - / ATC [ref] from 20-22
String delLoc = "chr1";
int delLocStart = 20;
int delLocStop = 23;
int delLocStop = 22;
// - [ref] / ATC from 20-20
String insLoc = "chr1";
int insLocStart = 20;
int insLocStop = 20;
// - / A / T / ATC [ref] from 20-23
String mixedLoc = "chr1";
int mixedLocStart = 20;
int mixedLocStop = 23;
VariantContextBuilder basicBuilder, snpBuilder, insBuilder;
@BeforeSuite
public void before() {
del = Allele.create("-");
delRef = Allele.create("-", true);
del = Allele.create("A");
delRef = Allele.create("A", true);
A = Allele.create("A");
C = Allele.create("C");
@ -62,9 +57,9 @@ public class VariantContextUnitTest extends BaseTest {
@BeforeMethod
public void beforeTest() {
basicBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)).referenceBaseForIndel((byte)'A');
snpBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)).referenceBaseForIndel((byte)'A');
insBuilder = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATC)).referenceBaseForIndel((byte)'A');
basicBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T));
snpBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T));
insBuilder = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATC));
}
@Test
@ -213,7 +208,7 @@ public class VariantContextUnitTest extends BaseTest {
@Test
public void testCreatingDeletionVariantContext() {
List<Allele> alleles = Arrays.asList(ATCref, del);
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).referenceBaseForIndel((byte)'A').make();
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make();
Assert.assertEquals(vc.getChr(), delLoc);
Assert.assertEquals(vc.getStart(), delLocStart);
@ -240,8 +235,8 @@ public class VariantContextUnitTest extends BaseTest {
@Test
public void testMatchingAlleles() {
List<Allele> alleles = Arrays.asList(ATCref, del);
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).referenceBaseForIndel((byte)'A').make();
VariantContext vc2 = new VariantContextBuilder("test2", delLoc, delLocStart+12, delLocStop+12, alleles).referenceBaseForIndel((byte)'A').make();
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make();
VariantContext vc2 = new VariantContextBuilder("test2", delLoc, delLocStart+12, delLocStop+12, alleles).make();
Assert.assertTrue(vc.hasSameAllelesAs(vc2));
Assert.assertTrue(vc.hasSameAlternateAllelesAs(vc2));
@ -386,13 +381,13 @@ public class VariantContextUnitTest extends BaseTest {
@Test
public void testAccessingCompleteGenotypes() {
List<Allele> alleles = Arrays.asList(Aref, T, del);
List<Allele> alleles = Arrays.asList(Aref, T, ATC);
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
Genotype g4 = GenotypeBuilder.create("Td", Arrays.asList(T, del));
Genotype g5 = GenotypeBuilder.create("dd", Arrays.asList(del, del));
Genotype g4 = GenotypeBuilder.create("Td", Arrays.asList(T, ATC));
Genotype g5 = GenotypeBuilder.create("dd", Arrays.asList(ATC, ATC));
Genotype g6 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles)
@ -408,7 +403,7 @@ public class VariantContextUnitTest extends BaseTest {
Assert.assertEquals(10, vc.getCalledChrCount());
Assert.assertEquals(3, vc.getCalledChrCount(Aref));
Assert.assertEquals(4, vc.getCalledChrCount(T));
Assert.assertEquals(3, vc.getCalledChrCount(del));
Assert.assertEquals(3, vc.getCalledChrCount(ATC));
Assert.assertEquals(2, vc.getCalledChrCount(Allele.NO_CALL));
}
@ -416,7 +411,7 @@ public class VariantContextUnitTest extends BaseTest {
public void testAccessingRefGenotypes() {
List<Allele> alleles1 = Arrays.asList(Aref, T);
List<Allele> alleles2 = Arrays.asList(Aref);
List<Allele> alleles3 = Arrays.asList(Aref, T, del);
List<Allele> alleles3 = Arrays.asList(Aref, T);
for ( List<Allele> alleles : Arrays.asList(alleles1, alleles2, alleles3)) {
Genotype g1 = GenotypeBuilder.create("AA1", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AA2", Arrays.asList(Aref, Aref));
@ -438,7 +433,7 @@ public class VariantContextUnitTest extends BaseTest {
@Test
public void testFilters() {
List<Allele> alleles = Arrays.asList(Aref, T, del);
List<Allele> alleles = Arrays.asList(Aref, T);
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
@ -470,15 +465,15 @@ public class VariantContextUnitTest extends BaseTest {
@Test
public void testRepeatAllele() {
Allele nullR = Allele.create(Allele.NULL_ALLELE_STRING, true);
Allele nullA = Allele.create(Allele.NULL_ALLELE_STRING, false);
Allele atc = Allele.create("ATC", false);
Allele atcatc = Allele.create("ATCATC", false);
Allele ccccR = Allele.create("CCCC", true);
Allele cc = Allele.create("CC", false);
Allele cccccc = Allele.create("CCCCCC", false);
Allele gagaR = Allele.create("GAGA", true);
Allele gagagaga = Allele.create("GAGAGAGA", false);
Allele nullR = Allele.create("A", true);
Allele nullA = Allele.create("A", false);
Allele atc = Allele.create("AATC", false);
Allele atcatc = Allele.create("AATCATC", false);
Allele ccccR = Allele.create("ACCCC", true);
Allele cc = Allele.create("ACC", false);
Allele cccccc = Allele.create("ACCCCCC", false);
Allele gagaR = Allele.create("AGAGA", true);
Allele gagagaga = Allele.create("AGAGAGAGA", false);
Pair<List<Integer>,byte[]> result;
byte[] refBytes = "TATCATCATCGGA".getBytes();
@ -497,15 +492,15 @@ public class VariantContextUnitTest extends BaseTest {
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("AATAATA".getBytes()),7);
// -*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4
// A*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4
VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR,atc)).make();
result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes);
Assert.assertEquals(result.getFirst().toArray()[0],3);
Assert.assertEquals(result.getFirst().toArray()[1],4);
Assert.assertEquals(result.getSecond().length,3);
// ATC*,-,ATCATC
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(ATCref,nullA,atcatc)).make();
// ATC*,A,ATCATC
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+3, Arrays.asList(Allele.create("AATC", true),nullA,atcatc)).make();
result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes);
Assert.assertEquals(result.getFirst().toArray()[0],3);
Assert.assertEquals(result.getFirst().toArray()[1],2);
@ -522,7 +517,7 @@ public class VariantContextUnitTest extends BaseTest {
// CCCC*,CC,-,CCCCCC, context = CCC: (C)7 -> (C)5,(C)3,(C)9
refBytes = "TCCCCCCCAGAGAGAG".getBytes();
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(ccccR,cc, nullA,cccccc)).make();
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(ccccR,cc, nullA,cccccc)).make();
result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes);
Assert.assertEquals(result.getFirst().toArray()[0],7);
Assert.assertEquals(result.getFirst().toArray()[1],5);
@ -532,7 +527,7 @@ public class VariantContextUnitTest extends BaseTest {
// GAGA*,-,GAGAGAGA
refBytes = "TGAGAGAGAGATTT".getBytes();
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(gagaR, nullA,gagagaga)).make();
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(gagaR, nullA,gagagaga)).make();
result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes);
Assert.assertEquals(result.getFirst().toArray()[0],5);
Assert.assertEquals(result.getFirst().toArray()[1],3);
@ -564,27 +559,24 @@ public class VariantContextUnitTest extends BaseTest {
@Test
public void testVCFfromGenotypes() {
List<Allele> alleles = Arrays.asList(Aref, T, del);
List<Allele> alleles = Arrays.asList(Aref, T);
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
Genotype g4 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
Genotype g5 = GenotypeBuilder.create("--", Arrays.asList(del, del));
VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4).make();
VariantContext vc12 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g2.getSampleName())), true);
VariantContext vc1 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName())), true);
VariantContext vc23 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g2.getSampleName(), g3.getSampleName())), true);
VariantContext vc4 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g4.getSampleName())), true);
VariantContext vc14 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g4.getSampleName())), true);
VariantContext vc5 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g5.getSampleName())), true);
Assert.assertTrue(vc12.isPolymorphicInSamples());
Assert.assertTrue(vc23.isPolymorphicInSamples());
Assert.assertTrue(vc1.isMonomorphicInSamples());
Assert.assertTrue(vc4.isMonomorphicInSamples());
Assert.assertTrue(vc14.isMonomorphicInSamples());
Assert.assertTrue(vc5.isPolymorphicInSamples());
Assert.assertTrue(vc12.isSNP());
Assert.assertTrue(vc12.isVariant());
@ -606,17 +598,11 @@ public class VariantContextUnitTest extends BaseTest {
Assert.assertFalse(vc14.isVariant());
Assert.assertFalse(vc14.isBiallelic());
Assert.assertTrue(vc5.isIndel());
Assert.assertTrue(vc5.isSimpleDeletion());
Assert.assertTrue(vc5.isVariant());
Assert.assertTrue(vc5.isBiallelic());
Assert.assertEquals(3, vc12.getCalledChrCount(Aref));
Assert.assertEquals(1, vc23.getCalledChrCount(Aref));
Assert.assertEquals(2, vc1.getCalledChrCount(Aref));
Assert.assertEquals(0, vc4.getCalledChrCount(Aref));
Assert.assertEquals(2, vc14.getCalledChrCount(Aref));
Assert.assertEquals(0, vc5.getCalledChrCount(Aref));
}
public void testGetGenotypeMethods() {
@ -664,13 +650,12 @@ public class VariantContextUnitTest extends BaseTest {
@DataProvider(name = "getAlleles")
public Object[][] mergeAllelesData() {
new GetAllelesTest("A*", Aref);
new GetAllelesTest("-*", delRef);
new GetAllelesTest("A*/C", Aref, C);
new GetAllelesTest("A*/C/T", Aref, C, T);
new GetAllelesTest("A*/T/C", Aref, T, C);
new GetAllelesTest("A*/C/T/-", Aref, C, T, del);
new GetAllelesTest("A*/T/C/-", Aref, T, C, del);
new GetAllelesTest("A*/-/T/C", Aref, del, T, C);
new GetAllelesTest("A*/C/T/ATC", Aref, C, T, ATC);
new GetAllelesTest("A*/T/C/ATC", Aref, T, C, ATC);
new GetAllelesTest("A*/ATC/T/C", Aref, ATC, T, C);
return GetAllelesTest.getTests(GetAllelesTest.class);
}
@ -678,7 +663,7 @@ public class VariantContextUnitTest extends BaseTest {
@Test(dataProvider = "getAlleles")
public void testMergeAlleles(GetAllelesTest cfg) {
final List<Allele> altAlleles = cfg.alleles.subList(1, cfg.alleles.size());
final VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles).referenceBaseForIndel((byte)'A').make();
final VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles).make();
Assert.assertEquals(vc.getAlleles(), cfg.alleles, "VC alleles not the same as input alleles");
Assert.assertEquals(vc.getNAlleles(), cfg.alleles.size(), "VC getNAlleles not the same as input alleles size");
@ -845,7 +830,6 @@ public class VariantContextUnitTest extends BaseTest {
Assert.assertEquals(sub.getLog10PError(), vc.getLog10PError());
Assert.assertEquals(sub.getFilters(), vc.getFilters());
Assert.assertEquals(sub.getID(), vc.getID());
Assert.assertEquals(sub.getReferenceBaseForIndel(), vc.getReferenceBaseForIndel());
Assert.assertEquals(sub.getAttributes(), vc.getAttributes());
Set<Genotype> expectedGenotypes = new HashSet<Genotype>();

View File

@ -39,7 +39,7 @@ import java.io.FileNotFoundException;
import java.util.*;
public class VariantContextUtilsUnitTest extends BaseTest {
Allele Aref, T, C, delRef, Cref, ATC, ATCATC;
Allele Aref, T, C, Cref, ATC, ATCATC;
private GenomeLocParser genomeLocParser;
@BeforeSuite
@ -56,7 +56,6 @@ public class VariantContextUtilsUnitTest extends BaseTest {
// alleles
Aref = Allele.create("A", true);
Cref = Allele.create("C", true);
delRef = Allele.create("-", true);
T = Allele.create("T");
C = Allele.create("C");
ATC = Allele.create("ATC");
@ -99,7 +98,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
private VariantContext makeVC(String source, List<Allele> alleles, Collection<Genotype> genotypes, Set<String> filters) {
int start = 10;
int stop = start; // alleles.contains(ATC) ? start + 3 : start;
return new VariantContextBuilder(source, "1", start, stop, alleles).genotypes(genotypes).filters(filters).referenceBaseForIndel(Cref.getBases()[0]).make();
return new VariantContextBuilder(source, "1", start, stop, alleles).genotypes(genotypes).filters(filters).make();
}
// --------------------------------------------------------------------------------
@ -156,28 +155,23 @@ public class VariantContextUtilsUnitTest extends BaseTest {
Arrays.asList(Aref, C),
Arrays.asList(Aref, T, C)); // in order of appearence
// The following is actually a pathological case - there's no way on a vcf to represent a null allele that's non-variant.
// The code converts this (correctly) to a single-base non-variant vc with whatever base was there as a reference.
new MergeAllelesTest(Arrays.asList(delRef),
Arrays.asList(Cref));
new MergeAllelesTest(Arrays.asList(Aref),
Arrays.asList(Aref, ATC),
Arrays.asList(Aref, ATC));
new MergeAllelesTest(Arrays.asList(delRef),
Arrays.asList(delRef, ATC),
Arrays.asList(delRef, ATC));
new MergeAllelesTest(Arrays.asList(delRef),
Arrays.asList(delRef, ATC, ATCATC),
Arrays.asList(delRef, ATC, ATCATC));
new MergeAllelesTest(Arrays.asList(Aref),
Arrays.asList(Aref, ATC, ATCATC),
Arrays.asList(Aref, ATC, ATCATC));
// alleles in the order we see them
new MergeAllelesTest(Arrays.asList(delRef, ATCATC),
Arrays.asList(delRef, ATC, ATCATC),
Arrays.asList(delRef, ATCATC, ATC));
new MergeAllelesTest(Arrays.asList(Aref, ATCATC),
Arrays.asList(Aref, ATC, ATCATC),
Arrays.asList(Aref, ATCATC, ATC));
// same
new MergeAllelesTest(Arrays.asList(delRef, ATC),
Arrays.asList(delRef, ATCATC),
Arrays.asList(delRef, ATC, ATCATC));
new MergeAllelesTest(Arrays.asList(Aref, ATC),
Arrays.asList(Aref, ATCATC),
Arrays.asList(Aref, ATC, ATCATC));
return MergeAllelesTest.getTests(MergeAllelesTest.class);
}
@ -661,4 +655,52 @@ public class VariantContextUtilsUnitTest extends BaseTest {
// test alleles are equal
Assert.assertEquals(VariantContextUtils.isTandemRepeat(cfg.vc, cfg.ref.getBytes()), cfg.isTrueRepeat);
}
// --------------------------------------------------------------------------------
//
// basic allele clipping test
//
// --------------------------------------------------------------------------------
private class ReverseClippingPositionTestProvider extends TestDataProvider {
final String ref;
final List<Allele> alleles = new ArrayList<Allele>();
final int expectedClip;
private ReverseClippingPositionTestProvider(final int expectedClip, final String ref, final String... alleles) {
super(ReverseClippingPositionTestProvider.class);
this.ref = ref;
for ( final String allele : alleles )
this.alleles.add(Allele.create(allele));
this.expectedClip = expectedClip;
}
@Override
public String toString() {
return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip);
}
}
@DataProvider(name = "ReverseClippingPositionTestProvider")
public Object[][] makeReverseClippingPositionTestProvider() {
// pair clipping
new ReverseClippingPositionTestProvider(0, "ATT", "CCG");
new ReverseClippingPositionTestProvider(1, "ATT", "CCT");
new ReverseClippingPositionTestProvider(2, "ATT", "CTT");
new ReverseClippingPositionTestProvider(2, "ATT", "ATT"); // cannot completely clip allele
// triplets
new ReverseClippingPositionTestProvider(0, "ATT", "CTT", "CGG");
new ReverseClippingPositionTestProvider(1, "ATT", "CTT", "CGT"); // the T can go
new ReverseClippingPositionTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go
return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class);
}
@Test(dataProvider = "ReverseClippingPositionTestProvider")
public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) {
int result = VariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
Assert.assertEquals(result, cfg.expectedClip);
}
}

View File

@ -56,7 +56,7 @@ public class VariantJEXLContextUnitTest extends BaseTest {
Allele A, Aref, T, Tref;
Allele del, delRef, ATC, ATCref;
Allele ATC, ATCref;
// A [ref] / T at 10
GenomeLoc snpLoc;
@ -84,9 +84,6 @@ public class VariantJEXLContextUnitTest extends BaseTest {
@BeforeMethod
public void before() {
del = Allele.create("-");
delRef = Allele.create("-", true);
A = Allele.create("A");
Aref = Allele.create("A", true);
T = Allele.create("T");

View File

@ -139,8 +139,8 @@ public class VCFWriterUnitTest extends BaseTest {
Map<String, Object> attributes = new HashMap<String,Object>();
GenotypesContext genotypes = GenotypesContext.create(header.getGenotypeSamples().size());
alleles.add(Allele.create("-",true));
alleles.add(Allele.create("CC",false));
alleles.add(Allele.create("A",true));
alleles.add(Allele.create("ACC",false));
attributes.put("DP","50");
for (String name : header.getGenotypeSamples()) {
@ -148,7 +148,7 @@ public class VCFWriterUnitTest extends BaseTest {
genotypes.add(gt);
}
return new VariantContextBuilder("RANDOM", loc.getContig(), loc.getStart(), loc.getStop(), alleles)
.genotypes(genotypes).attributes(attributes).referenceBaseForIndel((byte)'A').make();
.genotypes(genotypes).attributes(attributes).make();
}