Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
13591b169f
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
|
||||
|
|
@ -11,6 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
|
@ -39,6 +41,7 @@ import java.util.Map;
|
|||
* @since 10/30/11
|
||||
*/
|
||||
|
||||
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
|
||||
public class CompareBAM extends LocusWalker<Map<CompareBAM.TestName, Boolean>, CompareBAM.TestResults> {
|
||||
@Argument(required = true, shortName = "rr", fullName = "reduced_readgroup", doc = "The read group ID corresponding to the compressed BAM being tested") public String reducedReadGroupID;
|
||||
|
|
|
|||
|
|
@ -90,7 +90,6 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
|
|||
|
||||
return new VariantContextBuilder("pc",referenceSampleVC.getChr(), referenceSampleVC.getStart(), referenceSampleVC.getEnd(),
|
||||
referenceSampleVC.getAlleles())
|
||||
.referenceBaseForIndel(referenceSampleVC.getReferenceBaseForIndel())
|
||||
.genotypes(new GenotypeBuilder(UAC.referenceSampleName, referenceAlleles).GQ(referenceGenotype.getGQ()).make())
|
||||
.make();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -42,7 +42,6 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
|
|||
private static final int MAX_NUM_ALLELES_TO_GENOTYPE = 4;
|
||||
|
||||
private PairHMMIndelErrorModel pairModel;
|
||||
private boolean allelesArePadded = false;
|
||||
/*
|
||||
private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
|
||||
new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
|
||||
|
|
@ -88,12 +87,10 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
|
|||
final List<Allele> allAllelesToUse){
|
||||
|
||||
|
||||
final Pair<List<Allele>,Boolean> pair = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true);
|
||||
List<Allele> alleles = pair.first;
|
||||
List<Allele> alleles = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true);
|
||||
|
||||
if (alleles.size() > MAX_NUM_ALLELES_TO_GENOTYPE)
|
||||
alleles = alleles.subList(0,MAX_NUM_ALLELES_TO_GENOTYPE);
|
||||
allelesArePadded = pair.second;
|
||||
if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) {
|
||||
IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap().clear();
|
||||
haplotypeMap.clear();
|
||||
|
|
@ -121,6 +118,6 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
|
|||
protected int getEndLocation(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext ref,
|
||||
final List<Allele> allelesToUse) {
|
||||
return IndelGenotypeLikelihoodsCalculationModel.computeEndLocation(allelesToUse, ref.getLocus(), allelesArePadded);
|
||||
return ref.getLocus().getStart() + allelesToUse.get(0).length() - 1;
|
||||
}
|
||||
}
|
||||
|
|
@ -33,7 +33,6 @@ import org.apache.commons.lang.ArrayUtils;
|
|||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFAlleleClipper;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
|
@ -184,8 +183,13 @@ public class GenotypingEngine {
|
|||
}
|
||||
|
||||
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
|
||||
public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine, final ArrayList<Haplotype> haplotypes, final byte[] ref, final GenomeLoc refLoc,
|
||||
final GenomeLoc activeRegionWindow, final GenomeLocParser genomeLocParser, final ArrayList<VariantContext> activeAllelesToGenotype ) {
|
||||
public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine,
|
||||
final ArrayList<Haplotype> haplotypes,
|
||||
final byte[] ref,
|
||||
final GenomeLoc refLoc,
|
||||
final GenomeLoc activeRegionWindow,
|
||||
final GenomeLocParser genomeLocParser,
|
||||
final ArrayList<VariantContext> activeAllelesToGenotype ) {
|
||||
|
||||
final ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> returnCalls = new ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>>();
|
||||
|
||||
|
|
@ -423,24 +427,21 @@ public class GenotypingEngine {
|
|||
protected static VariantContext createMergedVariantContext( final VariantContext thisVC, final VariantContext nextVC, final byte[] ref, final GenomeLoc refLoc ) {
|
||||
final int thisStart = thisVC.getStart();
|
||||
final int nextStart = nextVC.getStart();
|
||||
byte[] refBases = ( thisVC.hasReferenceBaseForIndel() ? new byte[]{ thisVC.getReferenceBaseForIndel() } : new byte[]{} );
|
||||
byte[] altBases = ( thisVC.hasReferenceBaseForIndel() ? new byte[]{ thisVC.getReferenceBaseForIndel() } : new byte[]{} );
|
||||
byte[] refBases = new byte[]{};
|
||||
byte[] altBases = new byte[]{};
|
||||
refBases = ArrayUtils.addAll(refBases, thisVC.getReference().getBases());
|
||||
altBases = ArrayUtils.addAll(altBases, thisVC.getAlternateAllele(0).getBases());
|
||||
for( int locus = thisStart + refBases.length; locus < nextStart; locus++ ) {
|
||||
int locus;
|
||||
for( locus = thisStart + refBases.length; locus < nextStart; locus++ ) {
|
||||
final byte refByte = ref[locus - refLoc.getStart()];
|
||||
refBases = ArrayUtils.add(refBases, refByte);
|
||||
altBases = ArrayUtils.add(altBases, refByte);
|
||||
}
|
||||
if( nextVC.hasReferenceBaseForIndel() ) {
|
||||
refBases = ArrayUtils.add(refBases, nextVC.getReferenceBaseForIndel());
|
||||
altBases = ArrayUtils.add(altBases, nextVC.getReferenceBaseForIndel());
|
||||
}
|
||||
refBases = ArrayUtils.addAll(refBases, nextVC.getReference().getBases());
|
||||
refBases = ArrayUtils.addAll(refBases, ArrayUtils.subarray(nextVC.getReference().getBases(), locus > nextStart ? 1 : 0, nextVC.getReference().getBases().length)); // special case of deletion including the padding base of consecutive indel
|
||||
altBases = ArrayUtils.addAll(altBases, nextVC.getAlternateAllele(0).getBases());
|
||||
|
||||
int iii = 0;
|
||||
if( refBases.length == altBases.length && VCFAlleleClipper.needsPadding(thisVC) ) { // special case of insertion + deletion of same length creates an MNP --> trim padding bases off the allele
|
||||
if( refBases.length == altBases.length ) { // special case of insertion + deletion of same length creates an MNP --> trim padding bases off the allele
|
||||
while( iii < refBases.length && refBases[iii] == altBases[iii] ) { iii++; }
|
||||
}
|
||||
final ArrayList<Allele> mergedAlleles = new ArrayList<Allele>();
|
||||
|
|
@ -533,10 +534,10 @@ public class GenotypingEngine {
|
|||
final int elementLength = ce.getLength();
|
||||
switch( ce.getOperator() ) {
|
||||
case I:
|
||||
final byte[] insertionBases = Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength );
|
||||
final byte[] insertionBases = Arrays.copyOfRange( alignment, alignmentPos - 1, alignmentPos + elementLength ); // add padding base
|
||||
boolean allN = true;
|
||||
for( final byte b : insertionBases ) {
|
||||
if( b != (byte) 'N' ) {
|
||||
for( int i = 1; i < insertionBases.length; i++ ) { // check all bases except for the padding base
|
||||
if( insertionBases[i] != (byte) 'N' ) {
|
||||
allN = false;
|
||||
break;
|
||||
}
|
||||
|
|
@ -544,14 +545,13 @@ public class GenotypingEngine {
|
|||
if( !allN ) {
|
||||
final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
|
||||
final int insertionStart = refLoc.getStart() + refPos - 1;
|
||||
insertionAlleles.add( Allele.create(ref[refPos-1], true) );
|
||||
if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
|
||||
insertionAlleles.add( Allele.create(ref[refPos-1], true) );
|
||||
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
|
||||
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
|
||||
} else {
|
||||
insertionAlleles.add( Allele.create(Allele.NULL_ALLELE_STRING, true) );
|
||||
insertionAlleles.add( Allele.create(insertionBases, false) );
|
||||
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).referenceBaseForIndel(ref[refPos-1]).make());
|
||||
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -561,7 +561,7 @@ public class GenotypingEngine {
|
|||
alignmentPos += elementLength;
|
||||
break;
|
||||
case D:
|
||||
final byte[] deletionBases = Arrays.copyOfRange( ref, refPos, refPos + elementLength );
|
||||
final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base
|
||||
final ArrayList<Allele> deletionAlleles = new ArrayList<Allele>();
|
||||
final int deletionStart = refLoc.getStart() + refPos - 1;
|
||||
// BUGBUG: how often does this symbolic deletion allele case happen?
|
||||
|
|
@ -572,8 +572,8 @@ public class GenotypingEngine {
|
|||
// vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart, deletionAlleles).make());
|
||||
//} else {
|
||||
deletionAlleles.add( Allele.create(deletionBases, true) );
|
||||
deletionAlleles.add( Allele.create(Allele.NULL_ALLELE_STRING, false) );
|
||||
vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).referenceBaseForIndel(ref[refPos-1]).make());
|
||||
deletionAlleles.add( Allele.create(ref[refPos-1], false) );
|
||||
vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make());
|
||||
//}
|
||||
refPos += elementLength;
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ import org.broadinstitute.sting.commandline.Argument;
|
|||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -42,6 +43,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
|||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
|
|
@ -80,6 +82,7 @@ import java.util.*;
|
|||
* </pre>
|
||||
*
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@Reference(window=@Window(start=-HaplotypeResolver.ACTIVE_WINDOW,stop= HaplotypeResolver.ACTIVE_WINDOW))
|
||||
public class HaplotypeResolver extends RodWalker<Integer, Integer> {
|
||||
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
import net.sf.samtools.SAMUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
|
|
@ -290,20 +289,22 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
// TODO -- Guillermo, this test cannot work because the ArtificialReadPileupTestProvider returns a position of chr1:5, which is less than
|
||||
// TODO -- HAPLOTYPE_SIZE in IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles() so the HaplotypeMap is not populated.
|
||||
@Test (enabled = false)
|
||||
public void testIndelErrorModel() {
|
||||
final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
|
||||
final byte refByte = refPileupTestProvider.getRefByte();
|
||||
final String altBases = "TCA";
|
||||
final String altBases = (char)refByte + "TCA";
|
||||
final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
|
||||
final List<Allele> trueAlleles = new ArrayList<Allele>();
|
||||
trueAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, true));
|
||||
trueAlleles.add(Allele.create("TC", false));
|
||||
trueAlleles.add(Allele.create(refByte, true));
|
||||
trueAlleles.add(Allele.create((char)refByte + "TC", false));
|
||||
|
||||
final String fw = new String(refPileupTestProvider.getReferenceContext().getForwardBases());
|
||||
final VariantContext refInsertionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(),
|
||||
refPileupTestProvider.getReferenceContext().getLocus().getStart(), trueAlleles).
|
||||
genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).referenceBaseForIndel(refByte).make();
|
||||
genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).make();
|
||||
|
||||
|
||||
final int[] matchArray = {95, 995, 9995, 10000};
|
||||
|
|
@ -333,12 +334,12 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
// create deletion VC
|
||||
final int delLength = 4;
|
||||
final List<Allele> delAlleles = new ArrayList<Allele>();
|
||||
delAlleles.add(Allele.create(fw.substring(1,delLength+1), true));
|
||||
delAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, false));
|
||||
delAlleles.add(Allele.create(fw.substring(0,delLength+1), true));
|
||||
delAlleles.add(Allele.create(refByte, false));
|
||||
|
||||
final VariantContext refDeletionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(),
|
||||
refPileupTestProvider.getReferenceContext().getLocus().getStart()+delLength, delAlleles).
|
||||
genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).referenceBaseForIndel(refByte).make();
|
||||
genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).make();
|
||||
|
||||
for (int matches: matchArray) {
|
||||
for (int mismatches: mismatchArray) {
|
||||
|
|
@ -392,9 +393,6 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
final byte refByte = readPileupTestProvider.getRefByte();
|
||||
final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T';
|
||||
|
||||
final int refIdx = BaseUtils.simpleBaseToBaseIndex(refByte);
|
||||
final int altIdx = BaseUtils.simpleBaseToBaseIndex(altByte);
|
||||
|
||||
final List<Allele> allAlleles = new ArrayList<Allele>(); // this contains only ref Allele up to now
|
||||
final Set<String> laneIDs = new TreeSet<String>();
|
||||
laneIDs.add(GenotypeLikelihoodsCalculationModel.DUMMY_LANE);
|
||||
|
|
@ -411,11 +409,17 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
for (String laneID : laneIDs)
|
||||
noisyErrorModels.put(laneID, Q30ErrorModel);
|
||||
|
||||
final int refIdx = 0;
|
||||
int altIdx = 2;
|
||||
|
||||
// ref allele must be first
|
||||
allAlleles.add(Allele.create(refByte, true));
|
||||
for (byte b: BaseUtils.BASES) {
|
||||
if (refByte == b)
|
||||
allAlleles.add(Allele.create(b,true));
|
||||
else
|
||||
if (refByte != b) {
|
||||
if (b == altByte)
|
||||
altIdx = allAlleles.size();
|
||||
allAlleles.add(Allele.create(b, false));
|
||||
}
|
||||
}
|
||||
|
||||
PrintStream out = null;
|
||||
|
|
|
|||
|
|
@ -262,8 +262,6 @@ public class GenotypingEngineUnitTest extends BaseTest {
|
|||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
||||
|
||||
// SNP + ref + SNP = MNP with ref base gap
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
|
||||
|
|
@ -274,11 +272,9 @@ public class GenotypingEngineUnitTest extends BaseTest {
|
|||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
||||
|
||||
// insertion + SNP
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","AAAAA").referenceBaseForIndel("T").make();
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TAAAAA").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make();
|
||||
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TAAAAACG").source("merged").make();
|
||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||
|
|
@ -286,23 +282,19 @@ public class GenotypingEngineUnitTest extends BaseTest {
|
|||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
||||
|
||||
// SNP + insertion
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("-","AAAAA").referenceBaseForIndel("C").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","CAAAAA").make();
|
||||
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","GCCAAAAA").source("merged").make();
|
||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||
logger.warn(truthVC + " == " + mergedVC);
|
||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
||||
|
||||
// deletion + SNP
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("C","-").referenceBaseForIndel("T").make();
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","T").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make();
|
||||
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TG").source("merged").make();
|
||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||
|
|
@ -310,68 +302,66 @@ public class GenotypingEngineUnitTest extends BaseTest {
|
|||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
||||
|
||||
// SNP + deletion
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
|
||||
truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","GCC").source("merged").make();
|
||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||
logger.warn(truthVC + " == " + mergedVC);
|
||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
||||
|
||||
// insertion + deletion = MNP
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","A").referenceBaseForIndel("T").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make();
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TA").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
|
||||
truthVC = new VariantContextBuilder().loc("2", 1704, 1706).alleles("CCG","ACC").source("merged").make();
|
||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||
logger.warn(truthVC + " == " + mergedVC);
|
||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
||||
|
||||
// insertion + deletion
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","AAAAA").referenceBaseForIndel("T").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make();
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TAAAAA").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
|
||||
truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","TAAAAACC").source("merged").make();
|
||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||
logger.warn(truthVC + " == " + mergedVC);
|
||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
||||
|
||||
// insertion + insertion
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","A").referenceBaseForIndel("T").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("-","A").referenceBaseForIndel("C").make();
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TA").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","CA").make();
|
||||
truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TACCA").source("merged").make();
|
||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||
logger.warn(truthVC + " == " + mergedVC);
|
||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
||||
|
||||
// deletion + deletion
|
||||
thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("T","-").referenceBaseForIndel("A").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make();
|
||||
thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
|
||||
truthVC = new VariantContextBuilder().loc("2", 1701, 1706).alleles("ATTCCG","ATCC").source("merged").make();
|
||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||
logger.warn(truthVC + " == " + mergedVC);
|
||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
||||
|
||||
// deletion + insertion (abutting)
|
||||
thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1702, 1702).alleles("T","GCGCGC").make();
|
||||
truthVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","AGCGCGC").source("merged").make();
|
||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||
logger.warn(truthVC + " == " + mergedVC);
|
||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
|
||||
// complex + complex
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","AAA").make();
|
||||
|
|
@ -382,8 +372,6 @@ public class GenotypingEngineUnitTest extends BaseTest {
|
|||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
|
||||
Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "ff370c42c8b09a29f1aeff5ac57c7ea6");
|
||||
HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "d8317f4589e8e0c48bcd087cdb75ce88");
|
||||
}
|
||||
|
||||
private void HCTestComplexVariants(String bam, String args, String md5) {
|
||||
|
|
@ -43,6 +43,5 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void testHaplotypeCallerMultiSampleComplex() {
|
||||
HCTestComplexVariants(CEUTRIO_BAM, "", "6f9fda3ea82c5696bed1d48ee90cd76b");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -29,11 +29,13 @@ import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
|||
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
||||
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
|
@ -46,6 +48,7 @@ import java.util.Iterator;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
public class AlignmentValidation extends ReadWalker<Integer,Integer> {
|
||||
/**
|
||||
* The supporting BWT index generated using BWT.
|
||||
|
|
|
|||
|
|
@ -34,11 +34,13 @@ import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
|||
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -50,6 +52,7 @@ import java.io.File;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@WalkerName("Align")
|
||||
public class AlignmentWalker extends ReadWalker<Integer,Integer> {
|
||||
@Argument(fullName="target_reference",shortName="target_ref",doc="The reference to which reads in the source file should be aligned. Alongside this reference should sit index files " +
|
||||
|
|
|
|||
|
|
@ -30,9 +30,11 @@ import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
|||
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -48,6 +50,7 @@ import java.util.TreeMap;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
public class CountBestAlignments extends ReadWalker<Integer,Integer> {
|
||||
/**
|
||||
* The supporting BWT index generated using BWT.
|
||||
|
|
|
|||
|
|
@ -131,6 +131,12 @@ public class CommandLineGATK extends CommandLineExecutable {
|
|||
// can't close tribble index when writing
|
||||
if ( message.indexOf("Unable to close index for") != -1 )
|
||||
exitSystemWithUserError(new UserException(t.getCause().getMessage()));
|
||||
|
||||
// disk is full
|
||||
if ( message.indexOf("No space left on device") != -1 )
|
||||
exitSystemWithUserError(new UserException(t.getMessage()));
|
||||
if ( t.getCause().getMessage().indexOf("No space left on device") != -1 )
|
||||
exitSystemWithUserError(new UserException(t.getCause().getMessage()));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -3,10 +3,12 @@ package org.broadinstitute.sting.gatk.examples;
|
|||
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
|
|
@ -17,8 +19,9 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Computes the coverage per sample.
|
||||
* Computes the coverage per sample for every position (use with -L argument!).
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
public class CoverageBySample extends LocusWalker<Integer, Integer> {
|
||||
@Output
|
||||
protected PrintStream out;
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.examples;
|
|||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -35,6 +36,7 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
|||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -46,6 +48,7 @@ import java.io.PrintStream;
|
|||
*
|
||||
* @author aaron
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
|
||||
public class GATKPaperGenotyper extends LocusWalker<Integer,Long> implements TreeReducible<Long> {
|
||||
// the possible diploid genotype strings
|
||||
private static enum GENOTYPE { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
|
||||
|
|
|
|||
|
|
@ -163,43 +163,58 @@ public class VariantContextAdaptors {
|
|||
@Override
|
||||
public VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||
OldDbSNPFeature dbsnp = (OldDbSNPFeature)input;
|
||||
if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
|
||||
return null;
|
||||
Allele refAllele = Allele.create(dbsnp.getNCBIRefBase(), true);
|
||||
|
||||
if ( isSNP(dbsnp) || isIndel(dbsnp) || isMNP(dbsnp) || dbsnp.getVariantType().contains("mixed") ) {
|
||||
// add the reference allele
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
alleles.add(refAllele);
|
||||
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
|
||||
if ( index < 0 )
|
||||
return null; // we weren't given enough reference context to create the VariantContext
|
||||
|
||||
// add all of the alt alleles
|
||||
boolean sawNullAllele = refAllele.isNull();
|
||||
for ( String alt : getAlternateAlleleList(dbsnp) ) {
|
||||
if ( ! Allele.acceptableAlleleBases(alt) ) {
|
||||
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
|
||||
return null;
|
||||
}
|
||||
Allele altAllele = Allele.create(alt, false);
|
||||
alleles.add(altAllele);
|
||||
if ( altAllele.isNull() )
|
||||
sawNullAllele = true;
|
||||
}
|
||||
final byte refBaseForIndel = ref.getBases()[index];
|
||||
|
||||
Map<String, Object> attributes = new HashMap<String, Object>();
|
||||
|
||||
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
|
||||
if ( index < 0 )
|
||||
return null; // we weren't given enough reference context to create the VariantContext
|
||||
Byte refBaseForIndel = new Byte(ref.getBases()[index]);
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder();
|
||||
builder.source(name).id(dbsnp.getRsID());
|
||||
builder.loc(dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd() - (refAllele.isNull() ? 1 : 0));
|
||||
builder.alleles(alleles);
|
||||
builder.referenceBaseForIndel(refBaseForIndel);
|
||||
return builder.make();
|
||||
} else
|
||||
boolean addPaddingBase;
|
||||
if ( isSNP(dbsnp) || isMNP(dbsnp) )
|
||||
addPaddingBase = false;
|
||||
else if ( isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") )
|
||||
addPaddingBase = VariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp)));
|
||||
else
|
||||
return null; // can't handle anything else
|
||||
|
||||
Allele refAllele;
|
||||
if ( dbsnp.getNCBIRefBase().equals("-") )
|
||||
refAllele = Allele.create(refBaseForIndel, true);
|
||||
else if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
|
||||
return null;
|
||||
else
|
||||
refAllele = Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + dbsnp.getNCBIRefBase(), true);
|
||||
|
||||
final List<Allele> alleles = new ArrayList<Allele>();
|
||||
alleles.add(refAllele);
|
||||
|
||||
// add all of the alt alleles
|
||||
for ( String alt : getAlternateAlleleList(dbsnp) ) {
|
||||
if ( Allele.wouldBeNullAllele(alt.getBytes()))
|
||||
alt = "";
|
||||
else if ( ! Allele.acceptableAlleleBases(alt) )
|
||||
return null;
|
||||
|
||||
alleles.add(Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + alt, false));
|
||||
}
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder();
|
||||
builder.source(name).id(dbsnp.getRsID());
|
||||
builder.loc(dbsnp.getChr(), dbsnp.getStart() - (addPaddingBase ? 1 : 0), dbsnp.getEnd() - (addPaddingBase && refAllele.length() == 1 ? 1 : 0));
|
||||
builder.alleles(alleles);
|
||||
return builder.make();
|
||||
}
|
||||
|
||||
private static List<String> stripNullDashes(final List<String> alleles) {
|
||||
final List<String> newAlleles = new ArrayList<String>(alleles.size());
|
||||
for ( final String allele : alleles ) {
|
||||
if ( allele.equals("-") )
|
||||
newAlleles.add("");
|
||||
else
|
||||
newAlleles.add(allele);
|
||||
}
|
||||
return newAlleles;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -351,7 +366,7 @@ public class VariantContextAdaptors {
|
|||
long end = hapmap.getEnd();
|
||||
if ( deletionLength > 0 )
|
||||
end += deletionLength;
|
||||
VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).referenceBaseForIndel(refBaseForIndel).make();
|
||||
VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).make();
|
||||
return vc;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -42,10 +42,6 @@ import java.util.List;
|
|||
*/
|
||||
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
|
||||
|
||||
private static final String REF_ALLELE = "REF";
|
||||
|
||||
private static final String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
|
||||
|
||||
public void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, GenotypeBuilder gb) {
|
||||
if ( g == null || !g.isCalled() )
|
||||
return;
|
||||
|
|
@ -53,10 +49,10 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
|||
if ( vc.isSNP() )
|
||||
annotateSNP(stratifiedContext, vc, gb);
|
||||
else if ( vc.isIndel() )
|
||||
annotateIndel(stratifiedContext, vc, gb);
|
||||
annotateIndel(stratifiedContext, ref.getBase(), vc, gb);
|
||||
}
|
||||
|
||||
private void annotateSNP(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) {
|
||||
private void annotateSNP(final AlignmentContext stratifiedContext, final VariantContext vc, final GenotypeBuilder gb) {
|
||||
|
||||
HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
|
||||
for ( Allele allele : vc.getAlleles() )
|
||||
|
|
@ -77,62 +73,47 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
|||
gb.AD(counts);
|
||||
}
|
||||
|
||||
private void annotateIndel(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) {
|
||||
private void annotateIndel(final AlignmentContext stratifiedContext, final byte refBase, final VariantContext vc, final GenotypeBuilder gb) {
|
||||
ReadBackedPileup pileup = stratifiedContext.getBasePileup();
|
||||
if ( pileup == null )
|
||||
return;
|
||||
|
||||
final HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>();
|
||||
alleleCounts.put(REF_ALLELE, 0);
|
||||
final HashMap<Allele, Integer> alleleCounts = new HashMap<Allele, Integer>();
|
||||
final Allele refAllele = vc.getReference();
|
||||
|
||||
for ( Allele allele : vc.getAlternateAlleles() ) {
|
||||
|
||||
if ( allele.isNoCall() ) {
|
||||
continue; // this does not look so good, should we die???
|
||||
}
|
||||
|
||||
alleleCounts.put(getAlleleRepresentation(allele), 0);
|
||||
for ( final Allele allele : vc.getAlleles() ) {
|
||||
alleleCounts.put(allele, 0);
|
||||
}
|
||||
|
||||
for ( PileupElement p : pileup ) {
|
||||
if ( p.isBeforeInsertion() ) {
|
||||
|
||||
final String b = p.getEventBases();
|
||||
if ( alleleCounts.containsKey(b) ) {
|
||||
alleleCounts.put(b, alleleCounts.get(b)+1);
|
||||
final Allele insertion = Allele.create((char)refBase + p.getEventBases(), false);
|
||||
if ( alleleCounts.containsKey(insertion) ) {
|
||||
alleleCounts.put(insertion, alleleCounts.get(insertion)+1);
|
||||
}
|
||||
|
||||
} else if ( p.isBeforeDeletionStart() ) {
|
||||
if ( p.getEventLength() == refAllele.length() ) {
|
||||
// this is indeed the deletion allele recorded in VC
|
||||
final String b = DEL;
|
||||
if ( alleleCounts.containsKey(b) ) {
|
||||
alleleCounts.put(b, alleleCounts.get(b)+1);
|
||||
}
|
||||
if ( p.getEventLength() == refAllele.length() - 1 ) {
|
||||
// this is indeed the deletion allele recorded in VC
|
||||
final Allele deletion = Allele.create(refBase);
|
||||
if ( alleleCounts.containsKey(deletion) ) {
|
||||
alleleCounts.put(deletion, alleleCounts.get(deletion)+1);
|
||||
}
|
||||
}
|
||||
} else if ( p.getRead().getAlignmentEnd() > vc.getStart() ) {
|
||||
alleleCounts.put(REF_ALLELE, alleleCounts.get(REF_ALLELE)+1);
|
||||
alleleCounts.put(refAllele, alleleCounts.get(refAllele)+1);
|
||||
}
|
||||
}
|
||||
|
||||
int[] counts = new int[alleleCounts.size()];
|
||||
counts[0] = alleleCounts.get(REF_ALLELE);
|
||||
final int[] counts = new int[alleleCounts.size()];
|
||||
counts[0] = alleleCounts.get(refAllele);
|
||||
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
|
||||
counts[i+1] = alleleCounts.get( getAlleleRepresentation(vc.getAlternateAllele(i)) );
|
||||
counts[i+1] = alleleCounts.get( vc.getAlternateAllele(i) );
|
||||
|
||||
gb.AD(counts);
|
||||
}
|
||||
|
||||
private String getAlleleRepresentation(Allele allele) {
|
||||
if ( allele.isNull() ) { // deletion wrt the ref
|
||||
return DEL;
|
||||
} else { // insertion, pass actual bases
|
||||
return allele.getBaseString();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// public String getIndelBases()
|
||||
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.GENOTYPE_ALLELE_DEPTHS); }
|
||||
|
||||
|
|
|
|||
|
|
@ -250,8 +250,6 @@ public class BeagleOutputToVCF extends RodWalker<Integer, Integer> {
|
|||
|
||||
// Beagle always produces genotype strings based on the strings we input in the likelihood file.
|
||||
String refString = vc_input.getReference().getDisplayString();
|
||||
if (refString.length() == 0) // ref was null
|
||||
refString = Allele.NULL_ALLELE_STRING;
|
||||
|
||||
Allele bglAlleleA, bglAlleleB;
|
||||
|
||||
|
|
|
|||
|
|
@ -239,7 +239,7 @@ public class ProduceBeagleInput extends RodWalker<Integer, Integer> {
|
|||
if ( markers != null ) markers.append(marker).append("\t").append(Integer.toString(markerCounter++)).append("\t");
|
||||
for ( Allele allele : preferredVC.getAlleles() ) {
|
||||
String bglPrintString;
|
||||
if (allele.isNoCall() || allele.isNull())
|
||||
if (allele.isNoCall())
|
||||
bglPrintString = "-";
|
||||
else
|
||||
bglPrintString = allele.getBaseString(); // get rid of * in case of reference allele
|
||||
|
|
|
|||
|
|
@ -149,7 +149,7 @@ public class VariantsToBeagleUnphased extends RodWalker<Integer, Integer> {
|
|||
|
||||
// write out the alleles at this site
|
||||
for ( Allele allele : vc.getAlleles() ) {
|
||||
beagleOut.append(allele.isNoCall() || allele.isNull() ? "-" : allele.getBaseString()).append(" ");
|
||||
beagleOut.append(allele.isNoCall() ? "-" : allele.getBaseString()).append(" ");
|
||||
}
|
||||
|
||||
// write out sample level genotypes
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
|||
import net.sf.picard.util.PeekableIterator;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -36,6 +37,7 @@ import org.broadinstitute.sting.utils.SampleUtils;
|
|||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
|
||||
|
|
@ -80,6 +82,7 @@ import java.util.*;
|
|||
* @author Mauricio Carneiro, Roger Zurawicki
|
||||
* @since 5/8/12
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@By(value = DataSource.READS)
|
||||
@PartitionBy(PartitionType.INTERVAL)
|
||||
public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -33,9 +34,11 @@ import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
|||
import org.broadinstitute.sting.gatk.walkers.PartitionBy;
|
||||
import org.broadinstitute.sting.gatk.walkers.PartitionType;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
||||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@PartitionBy(PartitionType.CONTIG)
|
||||
@ActiveRegionExtension(extension = 0, maxRegion = 50000)
|
||||
public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
|
||||
|
|
|
|||
|
|
@ -107,11 +107,11 @@ public class FastaAlternateReference extends FastaReference {
|
|||
continue;
|
||||
|
||||
if ( vc.isSimpleDeletion()) {
|
||||
deletionBasesRemaining = vc.getReference().length();
|
||||
deletionBasesRemaining = vc.getReference().length() - 1;
|
||||
// delete the next n bases, not this one
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
|
||||
} else if ( vc.isSimpleInsertion()) {
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString()));
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
|
||||
} else if (vc.isSNP()) {
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,17 +26,20 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.fasta;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
||||
/**
|
||||
* Calculates basic statistics about the reference sequence itself
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
public class FastaStats extends RefWalker<Byte, FastaStats.FastaStatistics> {
|
||||
@Output PrintStream out;
|
||||
|
||||
|
|
|
|||
|
|
@ -246,18 +246,19 @@ public class ConsensusAlleleCounter {
|
|||
// get ref bases of accurate deletion
|
||||
final int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart();
|
||||
stop = loc.getStart() + dLen;
|
||||
final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen);
|
||||
final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference - 1, startIdxInReference + dLen); // add reference padding
|
||||
|
||||
if (Allele.acceptableAlleleBases(refBases, false)) {
|
||||
refAllele = Allele.create(refBases, true);
|
||||
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
||||
altAllele = Allele.create(ref.getBase(), false);
|
||||
}
|
||||
else continue; // don't go on with this allele if refBases are non-standard
|
||||
} else {
|
||||
// insertion case
|
||||
if (Allele.acceptableAlleleBases(s, false)) { // don't allow N's in insertions
|
||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||
altAllele = Allele.create(s, false);
|
||||
final String insertionBases = (char)ref.getBase() + s; // add reference padding
|
||||
if (Allele.acceptableAlleleBases(insertionBases, false)) { // don't allow N's in insertions
|
||||
refAllele = Allele.create(ref.getBase(), true);
|
||||
altAllele = Allele.create(insertionBases, false);
|
||||
stop = loc.getStart();
|
||||
}
|
||||
else continue; // go on to next allele if consensus insertion has any non-standard base.
|
||||
|
|
@ -267,7 +268,6 @@ public class ConsensusAlleleCounter {
|
|||
final VariantContextBuilder builder = new VariantContextBuilder().source("");
|
||||
builder.loc(loc.getContig(), loc.getStart(), stop);
|
||||
builder.alleles(Arrays.asList(refAllele, altAllele));
|
||||
builder.referenceBaseForIndel(ref.getBase());
|
||||
builder.noGenotypes();
|
||||
if (doMultiAllelicCalls) {
|
||||
vcs.add(builder.make());
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
|
@ -48,8 +47,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
private boolean DEBUG = false;
|
||||
private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
|
||||
private PairHMMIndelErrorModel pairModel;
|
||||
private boolean allelesArePadded;
|
||||
|
||||
|
||||
private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
|
||||
new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
|
||||
protected synchronized HashMap<PileupElement, LinkedHashMap<Allele, Double>> initialValue() {
|
||||
|
|
@ -105,25 +103,21 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
|
||||
haplotypeMap.clear();
|
||||
|
||||
Pair<List<Allele>,Boolean> pair = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
|
||||
alleleList = pair.first;
|
||||
allelesArePadded = pair.second;
|
||||
alleleList = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
|
||||
if (alleleList.isEmpty())
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
getHaplotypeMapFromAlleles(alleleList, ref, loc, haplotypeMap); // will update haplotypeMap adding elements
|
||||
if (haplotypeMap == null || haplotypeMap.isEmpty())
|
||||
return null;
|
||||
|
||||
// start making the VariantContext
|
||||
// For all non-snp VC types, VC end location is just startLocation + length of ref allele including padding base.
|
||||
|
||||
final int endLoc = computeEndLocation(alleleList, loc,allelesArePadded);
|
||||
final int endLoc = loc.getStart() + alleleList.get(0).length() - 1;
|
||||
final int eventLength = getEventLength(alleleList);
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList).referenceBaseForIndel(ref.getBase());
|
||||
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList);
|
||||
|
||||
// create the genotypes; no-call everyone for now
|
||||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
|
|
@ -160,15 +154,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
return indelLikelihoodMap.get();
|
||||
}
|
||||
|
||||
public static int computeEndLocation(final List<Allele> alleles, final GenomeLoc loc, final boolean allelesArePadded) {
|
||||
Allele refAllele = alleles.get(0);
|
||||
int endLoc = loc.getStart() + refAllele.length()-1;
|
||||
if (allelesArePadded)
|
||||
endLoc++;
|
||||
|
||||
return endLoc;
|
||||
}
|
||||
|
||||
public static void getHaplotypeMapFromAlleles(final List<Allele> alleleList,
|
||||
final ReferenceContext ref,
|
||||
final GenomeLoc loc,
|
||||
|
|
@ -213,16 +198,15 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
|
||||
}
|
||||
|
||||
public static Pair<List<Allele>,Boolean> getInitialAlleleList(final RefMetaDataTracker tracker,
|
||||
public static List<Allele> getInitialAlleleList(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> contexts,
|
||||
final AlignmentContextUtils.ReadOrientation contextType,
|
||||
final GenomeLocParser locParser,
|
||||
final UnifiedArgumentCollection UAC,
|
||||
final boolean ignoreSNPAllelesWhenGenotypingIndels) {
|
||||
|
||||
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
boolean allelesArePadded = true;
|
||||
if (UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
|
||||
VariantContext vc = null;
|
||||
for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) {
|
||||
|
|
@ -235,7 +219,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
}
|
||||
// ignore places where we don't have a variant
|
||||
if (vc == null)
|
||||
return new Pair<List<Allele>,Boolean>(alleles,false);
|
||||
return alleles;
|
||||
|
||||
if (ignoreSNPAllelesWhenGenotypingIndels) {
|
||||
// if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore it and don't genotype it
|
||||
|
|
@ -248,15 +232,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
} else {
|
||||
alleles.addAll(vc.getAlleles());
|
||||
}
|
||||
if ( vc.getReference().getBases().length == vc.getEnd()-vc.getStart()+1)
|
||||
allelesArePadded = false;
|
||||
|
||||
|
||||
|
||||
} else {
|
||||
alleles = IndelGenotypeLikelihoodsCalculationModel.computeConsensusAlleles(ref, contexts, contextType, locParser, UAC);
|
||||
alleles = computeConsensusAlleles(ref, contexts, contextType, locParser, UAC);
|
||||
}
|
||||
return new Pair<List<Allele>,Boolean> (alleles,allelesArePadded);
|
||||
return alleles;
|
||||
}
|
||||
|
||||
// Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup,
|
||||
|
|
|
|||
|
|
@ -37,7 +37,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
|||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFAlleleClipper;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -283,7 +282,7 @@ public class UnifiedGenotyperEngine {
|
|||
VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, rawContext.getLocation(), false, logger, UAC.alleles);
|
||||
if ( vcInput == null )
|
||||
return null;
|
||||
vc = new VariantContextBuilder("UG_call", ref.getLocus().getContig(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles()).referenceBaseForIndel(vcInput.getReferenceBaseForIndel()).make();
|
||||
vc = new VariantContextBuilder("UG_call", ref.getLocus().getContig(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles()).make();
|
||||
} else {
|
||||
// deal with bad/non-standard reference bases
|
||||
if ( !Allele.acceptableAlleleBases(new byte[]{ref.getBase()}) )
|
||||
|
|
@ -408,11 +407,6 @@ public class UnifiedGenotyperEngine {
|
|||
builder.log10PError(phredScaledConfidence/-10.0);
|
||||
if ( ! passesCallThreshold(phredScaledConfidence) )
|
||||
builder.filters(filter);
|
||||
if ( limitedContext ) {
|
||||
builder.referenceBaseForIndel(vc.getReferenceBaseForIndel());
|
||||
} else {
|
||||
builder.referenceBaseForIndel(refContext.getBase());
|
||||
}
|
||||
|
||||
// create the genotypes
|
||||
final GenotypesContext genotypes = afcm.get().subsetAlleles(vc, myAlleles, true,ploidy);
|
||||
|
|
@ -493,8 +487,8 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
// if we are subsetting alleles (either because there were too many or because some were not polymorphic)
|
||||
// then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
|
||||
if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // TODO - this function doesn't work with mixed records or records that started as mixed and then became non-mixed
|
||||
vcCall = VCFAlleleClipper.reverseTrimAlleles(vcCall);
|
||||
if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext )
|
||||
vcCall = VariantContextUtils.reverseTrimAlleles(vcCall);
|
||||
|
||||
if ( annotationEngine != null && !limitedContext ) {
|
||||
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
|
||||
|
|
|
|||
|
|
@ -872,7 +872,13 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
for ( VariantContext knownIndel : knownIndelsToTry ) {
|
||||
if ( knownIndel == null || !knownIndel.isIndel() || knownIndel.isComplexIndel() )
|
||||
continue;
|
||||
byte[] indelStr = knownIndel.isSimpleInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length());
|
||||
final byte[] indelStr;
|
||||
if ( knownIndel.isSimpleInsertion() ) {
|
||||
final byte[] fullAllele = knownIndel.getAlternateAllele(0).getBases();
|
||||
indelStr = Arrays.copyOfRange(fullAllele, 1, fullAllele.length); // remove ref padding
|
||||
} else {
|
||||
indelStr = Utils.dupBytes((byte)'-', knownIndel.getReference().length() - 1);
|
||||
}
|
||||
int start = knownIndel.getStart() - leftmostIndex + 1;
|
||||
Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel);
|
||||
if ( c != null )
|
||||
|
|
|
|||
|
|
@ -1131,12 +1131,13 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
|
|||
List<Allele> alleles = new ArrayList<Allele>(2); // actual observed (distinct!) alleles at the site
|
||||
List<Allele> homref_alleles = null; // when needed, will contain two identical copies of ref allele - needed to generate hom-ref genotype
|
||||
|
||||
final byte referencePaddingBase = refBases[(int)start-1];
|
||||
|
||||
if ( call.getVariant() == null ) {
|
||||
// we will need to cteate genotype with two (hom) ref alleles (below).
|
||||
// we will need to create genotype with two (hom) ref alleles (below).
|
||||
// we can not use 'alleles' list here, since that list is supposed to contain
|
||||
// only *distinct* alleles observed at the site or VCFContext will frown upon us...
|
||||
alleles.add( Allele.create(refBases[(int)start-1],true) );
|
||||
alleles.add( Allele.create(referencePaddingBase,true) );
|
||||
homref_alleles = new ArrayList<Allele>(2);
|
||||
homref_alleles.add( alleles.get(0));
|
||||
homref_alleles.add( alleles.get(0));
|
||||
|
|
@ -1145,7 +1146,7 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
|
|||
// (Genotype will tell us whether it is an actual call or not!)
|
||||
int event_length = call.getVariant().lengthOnRef();
|
||||
if ( event_length < 0 ) event_length = 0;
|
||||
fillAlleleList(alleles,call);
|
||||
fillAlleleList(alleles,call,referencePaddingBase);
|
||||
stop += event_length;
|
||||
}
|
||||
|
||||
|
|
@ -1165,7 +1166,7 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
|
|||
filters.add("NoCall");
|
||||
}
|
||||
VariantContext vc = new VariantContextBuilder("IGv2_Indel_call", refName, start, stop, alleles)
|
||||
.genotypes(genotypes).filters(filters).referenceBaseForIndel(refBases[(int)start-1]).make();
|
||||
.genotypes(genotypes).filters(filters).make();
|
||||
vcf.add(vc);
|
||||
}
|
||||
|
||||
|
|
@ -1175,16 +1176,16 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
|
|||
* @param l
|
||||
* @param call
|
||||
*/
|
||||
private void fillAlleleList(List<Allele> l, IndelPrecall call) {
|
||||
private void fillAlleleList(List<Allele> l, IndelPrecall call, byte referencePaddingBase) {
|
||||
int event_length = call.getVariant().lengthOnRef();
|
||||
if ( event_length == 0 ) { // insertion
|
||||
|
||||
l.add( Allele.create(Allele.NULL_ALLELE_STRING,true) );
|
||||
l.add( Allele.create(call.getVariant().getBases(), false ));
|
||||
l.add( Allele.create(referencePaddingBase,true) );
|
||||
l.add( Allele.create(referencePaddingBase + call.getVariant().getBases(), false ));
|
||||
|
||||
} else { //deletion:
|
||||
l.add( Allele.create(call.getVariant().getBases(), true ));
|
||||
l.add( Allele.create(Allele.NULL_ALLELE_STRING,false) );
|
||||
l.add( Allele.create(referencePaddingBase + call.getVariant().getBases(), true ));
|
||||
l.add( Allele.create(referencePaddingBase,false) );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1218,19 +1219,20 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
|
|||
// }
|
||||
boolean homRefT = ( tCall.getVariant() == null );
|
||||
boolean homRefN = ( nCall.getVariant() == null );
|
||||
final byte referencePaddingBase = refBases[(int)start-1];
|
||||
if ( tCall.getVariant() == null && nCall.getVariant() == null) {
|
||||
// no indel at all ; create base-representation ref/ref alleles for genotype construction
|
||||
alleles.add( Allele.create(refBases[(int)start-1],true) );
|
||||
alleles.add( Allele.create(referencePaddingBase,true) );
|
||||
} else {
|
||||
// we got indel(s)
|
||||
int event_length = 0;
|
||||
if ( tCall.getVariant() != null ) {
|
||||
// indel in tumor
|
||||
event_length = tCall.getVariant().lengthOnRef();
|
||||
fillAlleleList(alleles, tCall);
|
||||
fillAlleleList(alleles, tCall, referencePaddingBase);
|
||||
} else {
|
||||
event_length = nCall.getVariant().lengthOnRef();
|
||||
fillAlleleList(alleles, nCall);
|
||||
fillAlleleList(alleles, nCall, referencePaddingBase);
|
||||
}
|
||||
if ( event_length > 0 ) stop += event_length;
|
||||
}
|
||||
|
|
@ -1262,7 +1264,7 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
|
|||
}
|
||||
|
||||
VariantContext vc = new VariantContextBuilder("IGv2_Indel_call", refName, start, stop, alleles)
|
||||
.genotypes(genotypes).filters(filters).attributes(attrs).referenceBaseForIndel(refBases[(int)start-1]).make();
|
||||
.genotypes(genotypes).filters(filters).attributes(attrs).make();
|
||||
vcf.add(vc);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.commandline.Argument;
|
|||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -42,6 +43,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
|
@ -70,6 +72,7 @@ import java.util.*;
|
|||
* </pre>
|
||||
*
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
public class CountRODs extends RodWalker<CountRODs.Datum, Pair<ExpandingArrayList<Long>, Long>> implements TreeReducible<Pair<ExpandingArrayList<Long>, Long>> {
|
||||
@Output
|
||||
public PrintStream out;
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -262,20 +263,33 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
|||
sequenceInvalid = true;
|
||||
invReason.add("SITE_IS_FILTERED");
|
||||
}
|
||||
|
||||
String refString = validate.getReference().getDisplayString();
|
||||
String altString = validate.getAlternateAllele(0).getDisplayString();
|
||||
|
||||
if ( validate.isIndel() ) {
|
||||
sequence.append(Character.toUpperCase((char)ref.getBase()));
|
||||
rawSequence.append(Character.toUpperCase((char)ref.getBase()));
|
||||
final byte[] refAllele = validate.getReference().getBases();
|
||||
refString = new String(Arrays.copyOfRange(refAllele, 1, refAllele.length));
|
||||
if ( refString.isEmpty() )
|
||||
refString = "-";
|
||||
final byte[] altAllele = validate.getAlternateAllele(0).getBases();
|
||||
altString = new String(Arrays.copyOfRange(altAllele, 1, altAllele.length));
|
||||
if ( altString.isEmpty() )
|
||||
altString = "-";
|
||||
}
|
||||
|
||||
sequence.append('[');
|
||||
sequence.append(validate.getAlternateAllele(0).toString());
|
||||
sequence.append(altString);
|
||||
sequence.append('/');
|
||||
sequence.append(validate.getReference().toString());
|
||||
sequence.append(refString);
|
||||
sequence.append(']');
|
||||
// do this to the raw sequence to -- the indeces will line up that way
|
||||
rawSequence.append('[');
|
||||
rawSequence.append(validate.getAlternateAllele(0).getBaseString());
|
||||
rawSequence.append(altString);
|
||||
rawSequence.append('/');
|
||||
rawSequence.append(validate.getReference().getBaseString());
|
||||
rawSequence.append(refString);
|
||||
rawSequence.append(']');
|
||||
allelePos = ref.getLocus();
|
||||
if ( indelCounter > 0 ) {
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;
|
|||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
|
@ -40,14 +39,11 @@ public class GenomeEvent implements Comparable {
|
|||
final protected GenomeLoc loc;
|
||||
/** A set of the alleles segregating in this context */
|
||||
final protected List<Allele> alleles;
|
||||
final protected Byte refBase;
|
||||
// final protected HashMap<String, Object> attributes;
|
||||
|
||||
public GenomeEvent(GenomeLocParser parser, final String contig, final int start, final int stop, final List<Allele> alleles, HashMap<String, Object> attributes,
|
||||
byte base) {
|
||||
public GenomeEvent(GenomeLocParser parser, final String contig, final int start, final int stop, final List<Allele> alleles, HashMap<String, Object> attributes) {
|
||||
this.loc = parser.createGenomeLoc(contig, start, stop);
|
||||
this.alleles = alleles;
|
||||
this.refBase = base;
|
||||
// this.attributes = attributes;
|
||||
}
|
||||
|
||||
|
|
@ -68,7 +64,7 @@ public class GenomeEvent implements Comparable {
|
|||
|
||||
public VariantContext createVariantContextFromEvent() {
|
||||
return new VariantContextBuilder("event", loc.getContig(), loc.getStart(), loc.getStop(), alleles)
|
||||
.log10PError(0.0).referenceBaseForIndel(refBase).make();
|
||||
.log10PError(0.0).make();
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -115,7 +115,7 @@ public class KeepAFSpectrumFrequencySelector extends FrequencyModeSelector {
|
|||
|
||||
// create bare-bones event and log in corresponding bin
|
||||
// attributes contains AC,AF,AN pulled from original vc, and we keep them here and log in output file for bookkeeping purposes
|
||||
GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes, vc.getReferenceBaseForIndel());
|
||||
GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes);
|
||||
|
||||
binnedEventArray[binIndex].add(event);
|
||||
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ public class UniformSamplingFrequencySelector extends FrequencyModeSelector {
|
|||
}
|
||||
// create bare-bones event and log in corresponding bin
|
||||
// attributes contains AC,AF,AN pulled from original vc, and we keep them here and log in output file for bookkeeping purposes
|
||||
GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes, vc.getReferenceBaseForIndel());
|
||||
GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes);
|
||||
binnedEventArray.add(event);
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ public class ThetaVariantEvaluator extends VariantEvaluator {
|
|||
//increment stats for pairwise mismatches
|
||||
|
||||
for (Allele allele : genotype.getAlleles()) {
|
||||
if (allele.isNonNull() && allele.isCalled()) {
|
||||
if (allele.isCalled()) {
|
||||
String alleleString = allele.toString();
|
||||
alleleCounts.putIfAbsent(alleleString, 0);
|
||||
alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);
|
||||
|
|
|
|||
|
|
@ -139,11 +139,11 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
|||
final byte[] refSeq = ref.getBases();
|
||||
|
||||
// get the indel length
|
||||
int indelLength;
|
||||
final int indelLength;
|
||||
if ( vc.isSimpleDeletion() )
|
||||
indelLength = vc.getReference().length();
|
||||
indelLength = vc.getReference().length() - 1;
|
||||
else
|
||||
indelLength = vc.getAlternateAllele(0).length();
|
||||
indelLength = vc.getAlternateAllele(0).length() - 1;
|
||||
|
||||
if ( indelLength > 200 ) {
|
||||
writer.add(vc);
|
||||
|
|
@ -151,7 +151,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
// create an indel haplotype
|
||||
int originalIndex = ref.getLocus().getStart() - ref.getWindow().getStart() + 1;
|
||||
final int originalIndex = ref.getLocus().getStart() - ref.getWindow().getStart() + 1;
|
||||
final byte[] originalIndel = makeHaplotype(vc, refSeq, originalIndex, indelLength);
|
||||
|
||||
// create a CIGAR string to represent the event
|
||||
|
|
@ -170,11 +170,12 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
|||
VariantContext newVC = new VariantContextBuilder(vc).start(vc.getStart()-difference).stop(vc.getEnd()-difference).make();
|
||||
//System.out.println("Moving record from " + vc.getChr()+":"+vc.getStart() + " to " + vc.getChr()+":"+(vc.getStart()-difference));
|
||||
|
||||
int indelIndex = originalIndex-difference;
|
||||
byte[] newBases = new byte[indelLength];
|
||||
System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 0, indelLength);
|
||||
Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion());
|
||||
newVC = updateAllele(newVC, newAllele, refSeq[indelIndex-1]);
|
||||
final int indelIndex = originalIndex-difference;
|
||||
final byte[] newBases = new byte[indelLength + 1];
|
||||
newBases[0] = refSeq[indelIndex-1];
|
||||
System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 1, indelLength);
|
||||
final Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion());
|
||||
newVC = updateAllele(newVC, newAllele);
|
||||
|
||||
writer.add(newVC);
|
||||
return 1;
|
||||
|
|
@ -195,7 +196,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
|||
if ( vc.isSimpleDeletion() ) {
|
||||
indexOfRef += indelLength;
|
||||
} else {
|
||||
System.arraycopy(vc.getAlternateAllele(0).getBases(), 0, hap, currentPos, indelLength);
|
||||
System.arraycopy(vc.getAlternateAllele(0).getBases(), 1, hap, currentPos, indelLength);
|
||||
currentPos += indelLength;
|
||||
}
|
||||
|
||||
|
|
@ -205,14 +206,14 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
|||
return hap;
|
||||
}
|
||||
|
||||
public static VariantContext updateAllele(VariantContext vc, Allele newAllele, Byte refBaseForIndel) {
|
||||
public static VariantContext updateAllele(final VariantContext vc, final Allele newAllele) {
|
||||
// create a mapping from original allele to new allele
|
||||
HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
|
||||
if ( newAllele.isReference() ) {
|
||||
alleleMap.put(vc.getReference(), newAllele);
|
||||
alleleMap.put(vc.getAlternateAllele(0), vc.getAlternateAllele(0));
|
||||
alleleMap.put(vc.getAlternateAllele(0), Allele.create(newAllele.getBases()[0], false));
|
||||
} else {
|
||||
alleleMap.put(vc.getReference(), vc.getReference());
|
||||
alleleMap.put(vc.getReference(), Allele.create(newAllele.getBases()[0], true));
|
||||
alleleMap.put(vc.getAlternateAllele(0), newAllele);
|
||||
}
|
||||
|
||||
|
|
@ -229,6 +230,6 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
|||
newGenotypes.add(new GenotypeBuilder(genotype).alleles(newAlleles).make());
|
||||
}
|
||||
|
||||
return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).referenceBaseForIndel(refBaseForIndel).make();
|
||||
return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -119,7 +119,6 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
if ( toInterval != null ) {
|
||||
// check whether the strand flips, and if so reverse complement everything
|
||||
// TODO -- make this work for indels (difficult because the 'previous base' context needed will be changing based on indel type/size)
|
||||
if ( fromInterval.isPositiveStrand() != toInterval.isPositiveStrand() && vc.isPointEvent() ) {
|
||||
vc = VariantContextUtils.reverseComplement(vc);
|
||||
}
|
||||
|
|
@ -132,11 +131,10 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
|||
.attribute("OriginalStart", fromInterval.getStart()).make();
|
||||
}
|
||||
|
||||
VariantContext newVC = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc);
|
||||
if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) {
|
||||
if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(vc) ) {
|
||||
logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s",
|
||||
originalVC.getChr(), originalVC.getStart(), newVC.getChr(), newVC.getStart(),
|
||||
originalVC.getReference(), originalVC.getAlternateAllele(0), newVC.getReference(), newVC.getAlternateAllele(0)));
|
||||
originalVC.getChr(), originalVC.getStart(), vc.getChr(), vc.getStart(),
|
||||
originalVC.getReference(), originalVC.getAlternateAllele(0), vc.getReference(), vc.getAlternateAllele(0)));
|
||||
}
|
||||
|
||||
writer.add(vc);
|
||||
|
|
|
|||
|
|
@ -130,35 +130,16 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
|||
return;
|
||||
|
||||
// get the true reference allele
|
||||
Allele reportedRefAllele = vc.getReference();
|
||||
Allele observedRefAllele = null;
|
||||
// insertions
|
||||
if ( vc.isSimpleInsertion() ) {
|
||||
observedRefAllele = Allele.create(Allele.NULL_ALLELE_STRING);
|
||||
final Allele reportedRefAllele = vc.getReference();
|
||||
final int refLength = reportedRefAllele.length();
|
||||
if ( refLength > 100 ) {
|
||||
logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", refLength, vc.getChr(), vc.getStart()));
|
||||
return;
|
||||
}
|
||||
// deletions
|
||||
else if ( vc.isSimpleDeletion() || vc.isMNP() ) {
|
||||
// we can't validate arbitrarily long deletions
|
||||
if ( reportedRefAllele.length() > 100 ) {
|
||||
logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", reportedRefAllele.length(), vc.getChr(), vc.getStart()));
|
||||
return;
|
||||
}
|
||||
|
||||
// deletions are associated with the (position of) the last (preceding) non-deleted base;
|
||||
// hence to get actually deleted bases we need offset = 1
|
||||
int offset = vc.isMNP() ? 0 : 1;
|
||||
byte[] refBytes = ref.getBases();
|
||||
byte[] trueRef = new byte[reportedRefAllele.length()];
|
||||
for (int i = 0; i < reportedRefAllele.length(); i++)
|
||||
trueRef[i] = refBytes[i+offset];
|
||||
observedRefAllele = Allele.create(trueRef, true);
|
||||
}
|
||||
// SNPs, etc. but not mixed types because they are too difficult
|
||||
else if ( !vc.isMixed() ) {
|
||||
byte[] refByte = new byte[1];
|
||||
refByte[0] = ref.getBase();
|
||||
observedRefAllele = Allele.create(refByte, true);
|
||||
}
|
||||
final byte[] observedRefBases = new byte[refLength];
|
||||
System.arraycopy(ref.getBases(), 0, observedRefBases, 0, refLength);
|
||||
final Allele observedRefAllele = Allele.create(observedRefBases);
|
||||
|
||||
// get the RS IDs
|
||||
Set<String> rsIDs = null;
|
||||
|
|
@ -171,10 +152,10 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
|||
try {
|
||||
switch( type ) {
|
||||
case ALL:
|
||||
vc.extraStrictValidation(observedRefAllele, ref.getBase(), rsIDs);
|
||||
vc.extraStrictValidation(reportedRefAllele, observedRefAllele, rsIDs);
|
||||
break;
|
||||
case REF:
|
||||
vc.validateReferenceBases(observedRefAllele, ref.getBase());
|
||||
vc.validateReferenceBases(reportedRefAllele, observedRefAllele);
|
||||
break;
|
||||
case IDS:
|
||||
vc.validateRSIDs(rsIDs);
|
||||
|
|
|
|||
|
|
@ -381,7 +381,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
getters.put("REF", new Getter() {
|
||||
public String get(VariantContext vc) {
|
||||
StringBuilder x = new StringBuilder();
|
||||
x.append(vc.getAlleleStringWithRefPadding(vc.getReference()));
|
||||
x.append(vc.getReference().getDisplayString());
|
||||
return x.toString();
|
||||
}
|
||||
});
|
||||
|
|
@ -393,7 +393,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
|
||||
for ( int i = 0; i < n; i++ ) {
|
||||
if ( i != 0 ) x.append(",");
|
||||
x.append(vc.getAlleleStringWithRefPadding(vc.getAlternateAllele(i)));
|
||||
x.append(vc.getAlternateAllele(i));
|
||||
}
|
||||
return x.toString();
|
||||
}
|
||||
|
|
@ -435,11 +435,8 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
private static Object splitAltAlleles(VariantContext vc) {
|
||||
final int numAltAlleles = vc.getAlternateAlleles().size();
|
||||
if ( numAltAlleles == 1 )
|
||||
return vc.getAlleleStringWithRefPadding(vc.getAlternateAllele(0));
|
||||
return vc.getAlternateAllele(0);
|
||||
|
||||
final List<String> alleles = new ArrayList<String>(numAltAlleles);
|
||||
for ( Allele allele : vc.getAlternateAlleles() )
|
||||
alleles.add(vc.getAlleleStringWithRefPadding(allele));
|
||||
return alleles;
|
||||
return vc.getAlternateAlleles();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -103,12 +103,6 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod", required=false)
|
||||
protected String sampleName = null;
|
||||
|
||||
/**
|
||||
* This argument is useful for fixing input VCFs with bad reference bases (the output will be a fixed version of the VCF).
|
||||
*/
|
||||
@Argument(fullName="fixRef", shortName="fixRef", doc="Fix common reference base in case there's an indel without padding", required=false)
|
||||
protected boolean fixReferenceBase = false;
|
||||
|
||||
private Set<String> allowedGenotypeFormatStrings = new HashSet<String>();
|
||||
private boolean wroteHeader = false;
|
||||
private Set<String> samples;
|
||||
|
|
@ -140,10 +134,6 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
builder.genotypes(g);
|
||||
}
|
||||
|
||||
if ( fixReferenceBase ) {
|
||||
builder.referenceBaseForIndel(ref.getBase());
|
||||
}
|
||||
|
||||
writeRecord(builder.make(), tracker, ref.getLocus());
|
||||
}
|
||||
|
||||
|
|
@ -169,8 +159,8 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
continue;
|
||||
|
||||
Map<String, Allele> alleleMap = new HashMap<String, Allele>(2);
|
||||
alleleMap.put(RawHapMapFeature.DELETION, Allele.create(Allele.NULL_ALLELE_STRING, dbsnpVC.isSimpleInsertion()));
|
||||
alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion()));
|
||||
alleleMap.put(RawHapMapFeature.DELETION, Allele.create(ref.getBase(), dbsnpVC.isSimpleInsertion()));
|
||||
alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(ref.getBase() + ((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion()));
|
||||
hapmap.setActualAlleles(alleleMap);
|
||||
|
||||
// also, use the correct positioning for insertions
|
||||
|
|
|
|||
|
|
@ -431,6 +431,37 @@ public class BaseUtils {
|
|||
return new String(simpleComplement(bases.getBytes()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the uppercased version of the bases
|
||||
*
|
||||
* @param bases the bases
|
||||
* @return the upper cased version
|
||||
*/
|
||||
static public byte[] convertToUpperCase(final byte[] bases) {
|
||||
for ( int i = 0; i < bases.length; i++ ) {
|
||||
if ( (char)bases[i] >= 'a' )
|
||||
bases[i] = toUpperCaseBase(bases[i]);
|
||||
}
|
||||
return bases;
|
||||
}
|
||||
|
||||
static public byte toUpperCaseBase(final byte base) {
|
||||
switch (base) {
|
||||
case 'a':
|
||||
return 'A';
|
||||
case 'c':
|
||||
return 'C';
|
||||
case 'g':
|
||||
return 'G';
|
||||
case 't':
|
||||
return 'T';
|
||||
case 'n':
|
||||
return 'N';
|
||||
default:
|
||||
return base;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index of the most common base in the basecounts array. To be used with
|
||||
* pileup.getBaseCounts.
|
||||
|
|
|
|||
|
|
@ -176,7 +176,7 @@ public class Haplotype {
|
|||
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
|
||||
}
|
||||
} else if( refAllele.length() < altAllele.length() ) { // insertion
|
||||
final int altAlleleLength = altAllele.length();
|
||||
final int altAlleleLength = altAllele.length() - 1;
|
||||
newHaplotype = new byte[bases.length + altAlleleLength];
|
||||
for( int iii = 0; iii < bases.length; iii++ ) {
|
||||
newHaplotype[iii] = bases[iii];
|
||||
|
|
@ -185,15 +185,16 @@ public class Haplotype {
|
|||
newHaplotype[iii] = newHaplotype[iii-altAlleleLength];
|
||||
}
|
||||
for( int iii = 0; iii < altAlleleLength; iii++ ) {
|
||||
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
|
||||
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii+1];
|
||||
}
|
||||
} else { // deletion
|
||||
final int shift = refAllele.length() - altAllele.length();
|
||||
final int altAlleleLength = altAllele.length() - 1;
|
||||
newHaplotype = new byte[bases.length - shift];
|
||||
for( int iii = 0; iii < haplotypeInsertLocation + altAllele.length(); iii++ ) {
|
||||
for( int iii = 0; iii < haplotypeInsertLocation + altAlleleLength; iii++ ) {
|
||||
newHaplotype[iii] = bases[iii];
|
||||
}
|
||||
for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length; iii++ ) {
|
||||
for( int iii = haplotypeInsertLocation + altAlleleLength; iii < newHaplotype.length; iii++ ) {
|
||||
newHaplotype[iii] = bases[iii+shift];
|
||||
}
|
||||
}
|
||||
|
|
@ -204,8 +205,11 @@ public class Haplotype {
|
|||
return new Haplotype(newHaplotype);
|
||||
}
|
||||
|
||||
public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(List<Allele> alleleList, int startPos, ReferenceContext ref,
|
||||
final int haplotypeSize, final int numPrefBases) {
|
||||
public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(final List<Allele> alleleList,
|
||||
final int startPos,
|
||||
final ReferenceContext ref,
|
||||
final int haplotypeSize,
|
||||
final int numPrefBases) {
|
||||
|
||||
LinkedHashMap<Allele,Haplotype> haplotypeMap = new LinkedHashMap<Allele,Haplotype>();
|
||||
|
||||
|
|
@ -216,7 +220,6 @@ public class Haplotype {
|
|||
refAllele = a;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (refAllele == null)
|
||||
|
|
@ -224,19 +227,12 @@ public class Haplotype {
|
|||
|
||||
byte[] refBases = ref.getBases();
|
||||
|
||||
final int startIdxInReference = 1 + startPos - numPrefBases - ref.getWindow().getStart();
|
||||
final String basesBeforeVariant = new String(Arrays.copyOfRange(refBases, startIdxInReference, startIdxInReference + numPrefBases));
|
||||
|
||||
int startIdxInReference = (int)(1+startPos-numPrefBases-ref.getWindow().getStart());
|
||||
//int numPrefBases = (int)(vc.getStart()-ref.getWindow().getStart()+1); // indel vc starts one before event
|
||||
|
||||
|
||||
byte[] basesBeforeVariant = Arrays.copyOfRange(refBases,startIdxInReference,startIdxInReference+numPrefBases);
|
||||
int startAfter = startIdxInReference+numPrefBases+ refAllele.getBases().length;
|
||||
// protect against long events that overrun available reference context
|
||||
if (startAfter > refBases.length)
|
||||
startAfter = refBases.length;
|
||||
byte[] basesAfterVariant = Arrays.copyOfRange(refBases,
|
||||
startAfter, refBases.length);
|
||||
|
||||
final int startAfter = Math.min(startIdxInReference + numPrefBases + refAllele.getBases().length - 1, refBases.length);
|
||||
final String basesAfterVariant = new String(Arrays.copyOfRange(refBases, startAfter, refBases.length));
|
||||
|
||||
// Create location for all haplotypes
|
||||
final int startLoc = ref.getWindow().getStart() + startIdxInReference;
|
||||
|
|
@ -244,16 +240,14 @@ public class Haplotype {
|
|||
|
||||
final GenomeLoc locus = ref.getGenomeLocParser().createGenomeLoc(ref.getLocus().getContig(),startLoc,stopLoc);
|
||||
|
||||
|
||||
for (final Allele a : alleleList) {
|
||||
|
||||
byte[] alleleBases = a.getBases();
|
||||
final byte[] alleleBases = a.getBases();
|
||||
// use string concatenation
|
||||
String haplotypeString = new String(basesBeforeVariant) + new String(alleleBases) + new String(basesAfterVariant);
|
||||
String haplotypeString = basesBeforeVariant + new String(Arrays.copyOfRange(alleleBases, 1, alleleBases.length)) + basesAfterVariant;
|
||||
haplotypeString = haplotypeString.substring(0,haplotypeSize);
|
||||
|
||||
haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus));
|
||||
|
||||
haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus));
|
||||
}
|
||||
|
||||
return haplotypeMap;
|
||||
|
|
|
|||
|
|
@ -305,27 +305,6 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
|||
builder.id(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Annoying routine that deals with allele clipping from the BCF2 encoding to the standard
|
||||
* GATK encoding.
|
||||
*
|
||||
* @param position
|
||||
* @param ref
|
||||
* @param unclippedAlleles
|
||||
* @return
|
||||
*/
|
||||
@Requires({"position > 0", "ref != null && ref.length() > 0", "! unclippedAlleles.isEmpty()"})
|
||||
@Ensures("result.size() == unclippedAlleles.size()")
|
||||
protected List<Allele> clipAllelesIfNecessary(final int position,
|
||||
final String ref,
|
||||
final List<Allele> unclippedAlleles) {
|
||||
// the last argument of 1 allows us to safely ignore the end, because we are
|
||||
// ultimately going to use the end in the record itself
|
||||
final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(position, ref, unclippedAlleles, 1);
|
||||
if ( clipped.getError() != null ) error(clipped.getError());
|
||||
return clipped.getClippedAlleles();
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode the alleles from this BCF2 file and put the results in builder
|
||||
* @param builder
|
||||
|
|
@ -353,11 +332,9 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
|||
}
|
||||
assert ref != null;
|
||||
|
||||
alleles = clipAllelesIfNecessary(pos, ref, alleles);
|
||||
builder.alleles(alleles);
|
||||
|
||||
assert ref.length() > 0;
|
||||
builder.referenceBaseForIndel(ref.getBytes()[0]);
|
||||
|
||||
return alleles;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -256,9 +256,20 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
final Map<String, Object> attrs = parseInfo(parts[7]);
|
||||
builder.attributes(attrs);
|
||||
|
||||
if ( attrs.containsKey(VCFConstants.END_KEY) ) {
|
||||
// update stop with the end key if provided
|
||||
try {
|
||||
builder.stop(Integer.valueOf(attrs.get(VCFConstants.END_KEY).toString()));
|
||||
} catch (Exception e) {
|
||||
generateException("the END value in the INFO field is not valid");
|
||||
}
|
||||
} else {
|
||||
builder.stop(pos + ref.length() - 1);
|
||||
}
|
||||
|
||||
// get our alleles, filters, and setup an attribute map
|
||||
final List<Allele> rawAlleles = parseAlleles(ref, alts, lineNo);
|
||||
final List<Allele> alleles = updateBuilderAllelesAndStop(builder, ref, pos, rawAlleles, attrs);
|
||||
final List<Allele> alleles = parseAlleles(ref, alts, lineNo);
|
||||
builder.alleles(alleles);
|
||||
|
||||
// do we have genotyping data
|
||||
if (parts.length > NUM_STANDARD_FIELDS && includeGenotypes) {
|
||||
|
|
@ -275,7 +286,6 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
|
||||
VariantContext vc = null;
|
||||
try {
|
||||
builder.referenceBaseForIndel(ref.getBytes()[0]);
|
||||
vc = builder.make();
|
||||
} catch (Exception e) {
|
||||
generateException(e.getMessage());
|
||||
|
|
@ -284,31 +294,6 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
return vc;
|
||||
}
|
||||
|
||||
private final List<Allele> updateBuilderAllelesAndStop(final VariantContextBuilder builder,
|
||||
final String ref,
|
||||
final int pos,
|
||||
final List<Allele> rawAlleles,
|
||||
final Map<String, Object> attrs) {
|
||||
int endForSymbolicAlleles = pos; // by default we use the pos
|
||||
if ( attrs.containsKey(VCFConstants.END_KEY) ) {
|
||||
// update stop with the end key if provided
|
||||
try {
|
||||
endForSymbolicAlleles = Integer.valueOf(attrs.get(VCFConstants.END_KEY).toString());
|
||||
} catch (Exception e) {
|
||||
generateException("the END value in the INFO field is not valid");
|
||||
}
|
||||
}
|
||||
|
||||
// find out our current location, and clip the alleles down to their minimum length
|
||||
final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(pos, ref, rawAlleles, endForSymbolicAlleles);
|
||||
if ( clipped.getError() != null )
|
||||
generateException(clipped.getError(), lineNo);
|
||||
|
||||
builder.stop(clipped.getStop());
|
||||
builder.alleles(clipped.getClippedAlleles());
|
||||
return clipped.getClippedAlleles();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the name of this codec
|
||||
* @return our set name
|
||||
|
|
|
|||
|
|
@ -1,434 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* All of the gross allele clipping and padding routines in one place
|
||||
*
|
||||
* Having attempted to understand / fix / document this code myself
|
||||
* I can only conclude that this entire approach needs to be rethought. This
|
||||
* code just doesn't work robustly with symbolic alleles, with multiple alleles,
|
||||
* requires a special "reference base for indels" stored in the VariantContext
|
||||
* whose correctness isn't enforced, and overall has strange special cases
|
||||
* all over the place.
|
||||
*
|
||||
* The reason this code is so complex is due to symbolics and multi-alleleic
|
||||
* variation, which frequently occur when combining variants from multiple
|
||||
* VCF files.
|
||||
*
|
||||
* TODO rethink this class, make it clean, and make it easy to create, mix, and write out alleles
|
||||
* TODO this code doesn't work with reverse clipped alleles (ATA / GTTA -> AT / GT)
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 6/12
|
||||
*/
|
||||
public final class VCFAlleleClipper {
|
||||
private VCFAlleleClipper() { }
|
||||
|
||||
/**
|
||||
* Determine whether we should clip off the first base of all unclippped alleles or not
|
||||
*
|
||||
* Returns true if all of the alleles in unclippedAlleles share a common first base with
|
||||
* ref0. Ref0 should be the first base of the reference allele UnclippedAlleles may
|
||||
* contain the reference allele itself, or just the alternate alleles, it doesn't matter.
|
||||
*
|
||||
* The algorithm returns true if the first base should be clipped off, or false otherwise
|
||||
*
|
||||
* This algorithm works even in the presence of symbolic alleles, logically ignoring these
|
||||
* values. It
|
||||
*
|
||||
* @param unclippedAlleles list of unclipped alleles to assay
|
||||
* @param ref0 the first base of the reference allele
|
||||
* @return true if we should clip the first base of unclippedAlleles
|
||||
*/
|
||||
@Requires("unclippedAlleles != null")
|
||||
public static boolean shouldClipFirstBaseP(final List<Allele> unclippedAlleles,
|
||||
final byte ref0) {
|
||||
boolean allSymbolicAlt = true;
|
||||
|
||||
for ( final Allele a : unclippedAlleles ) {
|
||||
if ( a.isSymbolic() ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// already know we aren't symbolic, so we only need to decide if we have only seen a ref
|
||||
if ( ! a.isReference() )
|
||||
allSymbolicAlt = false;
|
||||
|
||||
if ( a.length() < 1 || (a.getBases()[0] != ref0) ) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// to reach here all alleles are consistent with clipping the first base matching ref0
|
||||
// but we don't clip if all ALT alleles are symbolic
|
||||
return ! allSymbolicAlt;
|
||||
}
|
||||
|
||||
public static int computeReverseClipping(final List<Allele> unclippedAlleles,
|
||||
final byte[] ref,
|
||||
final int forwardClipping,
|
||||
final boolean allowFullClip) {
|
||||
int clipping = 0;
|
||||
boolean stillClipping = true;
|
||||
|
||||
while ( stillClipping ) {
|
||||
for ( final Allele a : unclippedAlleles ) {
|
||||
if ( a.isSymbolic() )
|
||||
continue;
|
||||
|
||||
// we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong
|
||||
// position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine).
|
||||
if ( a.length() - clipping == 0 )
|
||||
return clipping - (allowFullClip ? 0 : 1);
|
||||
|
||||
if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) {
|
||||
stillClipping = false;
|
||||
}
|
||||
else if ( ref.length == clipping ) {
|
||||
if ( allowFullClip )
|
||||
stillClipping = false;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) {
|
||||
stillClipping = false;
|
||||
}
|
||||
}
|
||||
if ( stillClipping )
|
||||
clipping++;
|
||||
}
|
||||
|
||||
return clipping;
|
||||
}
|
||||
|
||||
/**
|
||||
* Are the alleles describing a polymorphism substitution one base for another?
|
||||
*
|
||||
* @param alleles a list of alleles, must not be empty
|
||||
* @return Return true if the length of any allele in alleles isn't 1
|
||||
*/
|
||||
@Requires("!alleles.isEmpty()")
|
||||
private static boolean isSingleNucleotideEvent(final List<Allele> alleles) {
|
||||
for ( final Allele a : alleles ) {
|
||||
if ( a.length() != 1 )
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* clip the alleles, based on the reference, returning a ClippedAlleles object describing what happened
|
||||
*
|
||||
* The ClippedAlleles object contains the implied stop position of the alleles, given the provided start
|
||||
* position, after clipping. It also contains the list of alleles, in the same order as the provided
|
||||
* unclipped ones, that are the fully clipped version of the input alleles. If an error occurs
|
||||
* during this option the getError() function returns a string describing the problem (for use in parsers).
|
||||
*
|
||||
* The basic operation are:
|
||||
*
|
||||
* single allele
|
||||
* => stop == start and clipped == unclipped
|
||||
* any number of single nucleotide events
|
||||
* => stop == start and clipped == unclipped
|
||||
* two alleles, second being symbolic
|
||||
* => stop == start and clipped == unclipped
|
||||
* Note in this case that the STOP should be computed by other means (from END in VCF, for example)
|
||||
* Note that if there's more than two alleles and the second is a symbolic the code produces an error
|
||||
* Any other case:
|
||||
* The alleles are trimmed of any sequence shared at the end of the alleles. If N bases
|
||||
* are common then the alleles will all be at least N bases shorter.
|
||||
* The stop position returned is the start position + the length of the
|
||||
* reverse trimmed only reference allele - 1.
|
||||
* If the alleles all share a single common starting sequence (just one base is considered)
|
||||
* then the alleles have this leading common base removed as well.
|
||||
*
|
||||
* TODO This code is gross and brittle and needs to be rethought from scratch
|
||||
*
|
||||
* @param start the unadjusted start position (pre-clipping)
|
||||
* @param ref the reference string
|
||||
* @param unclippedAlleles the list of unclipped alleles, including the reference allele
|
||||
* @return the new reference end position of this event
|
||||
*/
|
||||
@Requires({"start > 0", "ref != null && ref.length() > 0", "!unclippedAlleles.isEmpty()"})
|
||||
@Ensures("result != null")
|
||||
public static ClippedAlleles clipAlleles(final int start,
|
||||
final String ref,
|
||||
final List<Allele> unclippedAlleles,
|
||||
final int endForSymbolicAllele ) {
|
||||
// no variation or single nucleotide events are by definition fully clipped
|
||||
if ( unclippedAlleles.size() == 1 || isSingleNucleotideEvent(unclippedAlleles) )
|
||||
return new ClippedAlleles(start, unclippedAlleles, null);
|
||||
|
||||
// we've got to sort out the clipping by looking at the alleles themselves
|
||||
final byte firstRefBase = (byte) ref.charAt(0);
|
||||
final boolean firstBaseIsClipped = shouldClipFirstBaseP(unclippedAlleles, firstRefBase);
|
||||
final int forwardClipping = firstBaseIsClipped ? 1 : 0;
|
||||
final int reverseClipping = computeReverseClipping(unclippedAlleles, ref.getBytes(), forwardClipping, false);
|
||||
final boolean needsClipping = forwardClipping > 0 || reverseClipping > 0;
|
||||
|
||||
if ( reverseClipping == -1 )
|
||||
return new ClippedAlleles("computeReverseClipping failed due to bad alleles");
|
||||
|
||||
boolean sawSymbolic = false;
|
||||
List<Allele> clippedAlleles;
|
||||
if ( ! needsClipping ) {
|
||||
// there's nothing to clip, so clippedAlleles are the original alleles
|
||||
clippedAlleles = unclippedAlleles;
|
||||
} else {
|
||||
clippedAlleles = new ArrayList<Allele>(unclippedAlleles.size());
|
||||
for ( final Allele a : unclippedAlleles ) {
|
||||
if ( a.isSymbolic() ) {
|
||||
sawSymbolic = true;
|
||||
clippedAlleles.add(a);
|
||||
} else {
|
||||
final byte[] allele = Arrays.copyOfRange(a.getBases(), forwardClipping, a.getBases().length - reverseClipping);
|
||||
if ( !Allele.acceptableAlleleBases(allele) )
|
||||
return new ClippedAlleles("Unparsable vcf record with bad allele [" + allele + "]");
|
||||
clippedAlleles.add(Allele.create(allele, a.isReference()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int stop = VariantContextUtils.computeEndFromAlleles(clippedAlleles, start, endForSymbolicAllele);
|
||||
|
||||
// TODO
|
||||
// TODO
|
||||
// TODO COMPLETELY BROKEN CODE -- THE GATK CURRENTLY ENCODES THE STOP POSITION FOR CLIPPED ALLELES AS + 1
|
||||
// TODO ITS TRUE SIZE TO DIFFERENTIATE CLIPPED VS. UNCLIPPED ALLELES. NEEDS TO BE FIXED
|
||||
// TODO
|
||||
// TODO
|
||||
if ( needsClipping && ! sawSymbolic && ! clippedAlleles.get(0).isNull() ) stop++;
|
||||
// TODO
|
||||
// TODO
|
||||
// TODO COMPLETELY BROKEN CODE -- THE GATK CURRENTLY ENCODES THE STOP POSITION FOR CLIPPED ALLELES AS + 1
|
||||
// TODO ITS TRUE SIZE TO DIFFERENTIATE CLIPPED VS. UNCLIPPED ALLELES. NEEDS TO BE FIXED
|
||||
// TODO
|
||||
// TODO
|
||||
|
||||
final Byte refBaseForIndel = firstBaseIsClipped ? firstRefBase : null;
|
||||
return new ClippedAlleles(stop, clippedAlleles, refBaseForIndel);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the alleles in inputVC should have reference bases added for padding
|
||||
*
|
||||
* We need to pad a VC with a common base if the length of the reference allele is
|
||||
* less than the length of the VariantContext. This happens because the position of
|
||||
* e.g. an indel is always one before the actual event (as per VCF convention).
|
||||
*
|
||||
* @param inputVC the VC to evaluate, cannot be null
|
||||
* @return true if
|
||||
*/
|
||||
public static boolean needsPadding(final VariantContext inputVC) {
|
||||
// biallelic sites with only symbolic never need padding
|
||||
if ( inputVC.isBiallelic() && inputVC.getAlternateAllele(0).isSymbolic() )
|
||||
return false;
|
||||
|
||||
final int recordLength = inputVC.getEnd() - inputVC.getStart() + 1;
|
||||
final int referenceLength = inputVC.getReference().length();
|
||||
|
||||
if ( referenceLength == recordLength )
|
||||
return false;
|
||||
else if ( referenceLength == recordLength - 1 )
|
||||
return true;
|
||||
else if ( !inputVC.hasSymbolicAlleles() )
|
||||
throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) +
|
||||
" in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size");
|
||||
else if ( inputVC.isMixed() && inputVC.hasSymbolicAlleles() )
|
||||
throw new IllegalArgumentException("GATK infrastructure limitation prevents needsPadding from working properly with VariantContexts containing a mixture of symbolic and concrete alleles at " + inputVC);
|
||||
return false;
|
||||
}
|
||||
|
||||
public static Allele padAllele(final VariantContext vc, final Allele allele) {
|
||||
assert needsPadding(vc);
|
||||
|
||||
if ( allele.isSymbolic() )
|
||||
return allele;
|
||||
else {
|
||||
// get bases for current allele and create a new one with trimmed bases
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
sb.append((char)vc.getReferenceBaseForIndel().byteValue());
|
||||
sb.append(allele.getDisplayString());
|
||||
final String newBases = sb.toString();
|
||||
return Allele.create(newBases, allele.isReference());
|
||||
}
|
||||
}
|
||||
|
||||
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC) {
|
||||
final boolean padVC = needsPadding(inputVC);
|
||||
|
||||
// nothing to do if we don't need to pad bases
|
||||
if ( padVC ) {
|
||||
if ( !inputVC.hasReferenceBaseForIndel() )
|
||||
throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available.");
|
||||
|
||||
final ArrayList<Allele> alleles = new ArrayList<Allele>(inputVC.getNAlleles());
|
||||
final Map<Allele, Allele> unpaddedToPadded = inputVC.hasGenotypes() ? new HashMap<Allele, Allele>(inputVC.getNAlleles()) : null;
|
||||
|
||||
boolean paddedAtLeastOne = false;
|
||||
for (final Allele a : inputVC.getAlleles()) {
|
||||
final Allele padded = padAllele(inputVC, a);
|
||||
paddedAtLeastOne = paddedAtLeastOne || padded != a;
|
||||
alleles.add(padded);
|
||||
if ( unpaddedToPadded != null ) unpaddedToPadded.put(a, padded); // conditional to avoid making unnecessary make
|
||||
}
|
||||
|
||||
if ( ! paddedAtLeastOne )
|
||||
throw new ReviewedStingException("VC was supposed to need padding but no allele was actually changed at location " + inputVC.getChr() + ":" + inputVC.getStart() + " with allele " + inputVC.getAlleles());
|
||||
|
||||
final VariantContextBuilder vcb = new VariantContextBuilder(inputVC);
|
||||
vcb.alleles(alleles);
|
||||
|
||||
// the position of the inputVC is one further, if it doesn't contain symbolic alleles
|
||||
vcb.computeEndFromAlleles(alleles, inputVC.getStart(), inputVC.getEnd());
|
||||
|
||||
if ( inputVC.hasGenotypes() ) {
|
||||
assert unpaddedToPadded != null;
|
||||
|
||||
// now we can recreate new genotypes with trimmed alleles
|
||||
final GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples());
|
||||
for (final Genotype g : inputVC.getGenotypes() ) {
|
||||
final List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size());
|
||||
for (final Allele a : g.getAlleles()) {
|
||||
newGenotypeAlleles.add( a.isCalled() ? unpaddedToPadded.get(a) : Allele.NO_CALL);
|
||||
}
|
||||
genotypes.add(new GenotypeBuilder(g).alleles(newGenotypeAlleles).make());
|
||||
}
|
||||
vcb.genotypes(genotypes);
|
||||
}
|
||||
|
||||
return vcb.make();
|
||||
}
|
||||
else
|
||||
return inputVC;
|
||||
|
||||
}
|
||||
|
||||
public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {
|
||||
// see if we need to trim common reference base from all alleles
|
||||
|
||||
final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, true);
|
||||
if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
|
||||
return inputVC;
|
||||
|
||||
final List<Allele> alleles = new ArrayList<Allele>();
|
||||
final GenotypesContext genotypes = GenotypesContext.create();
|
||||
final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
|
||||
|
||||
for (final Allele a : inputVC.getAlleles()) {
|
||||
if (a.isSymbolic()) {
|
||||
alleles.add(a);
|
||||
originalToTrimmedAlleleMap.put(a, a);
|
||||
} else {
|
||||
// get bases for current allele and create a new one with trimmed bases
|
||||
final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
|
||||
final Allele trimmedAllele = Allele.create(newBases, a.isReference());
|
||||
alleles.add(trimmedAllele);
|
||||
originalToTrimmedAlleleMap.put(a, trimmedAllele);
|
||||
}
|
||||
}
|
||||
|
||||
// now we can recreate new genotypes with trimmed alleles
|
||||
for ( final Genotype genotype : inputVC.getGenotypes() ) {
|
||||
final List<Allele> originalAlleles = genotype.getAlleles();
|
||||
final List<Allele> trimmedAlleles = new ArrayList<Allele>();
|
||||
for ( final Allele a : originalAlleles ) {
|
||||
if ( a.isCalled() )
|
||||
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
|
||||
else
|
||||
trimmedAlleles.add(Allele.NO_CALL);
|
||||
}
|
||||
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
|
||||
}
|
||||
|
||||
return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() + (inputVC.isMixed() ? -1 : 0)).alleles(alleles).genotypes(genotypes).make();
|
||||
}
|
||||
|
||||
@Invariant("stop != -1 || error != null") // we're either an error or a meaningful result but not both
|
||||
public static class ClippedAlleles {
|
||||
private final int stop;
|
||||
private final List<Allele> clippedAlleles;
|
||||
private final Byte refBaseForIndel;
|
||||
private final String error;
|
||||
|
||||
@Requires({"stop > 0", "clippedAlleles != null"})
|
||||
private ClippedAlleles(final int stop, final List<Allele> clippedAlleles, final Byte refBaseForIndel) {
|
||||
this.stop = stop;
|
||||
this.clippedAlleles = clippedAlleles;
|
||||
this.error = null;
|
||||
this.refBaseForIndel = refBaseForIndel;
|
||||
}
|
||||
|
||||
@Requires("error != null")
|
||||
private ClippedAlleles(final String error) {
|
||||
this.stop = -1;
|
||||
this.clippedAlleles = null;
|
||||
this.refBaseForIndel = null;
|
||||
this.error = error;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an error if it occurred
|
||||
* @return the error message, or null if no error occurred
|
||||
*/
|
||||
public String getError() {
|
||||
return error;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the stop position to use after the clipping as been applied, given the
|
||||
* provided position to clipAlleles
|
||||
* @return
|
||||
*/
|
||||
public int getStop() {
|
||||
return stop;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the clipped alleles themselves
|
||||
* @return the clipped alleles in the order of the input unclipped alleles
|
||||
*/
|
||||
public List<Allele> getClippedAlleles() {
|
||||
return clippedAlleles;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the reference base we should use for indels, or null if none is appropriate
|
||||
* @return
|
||||
*/
|
||||
public Byte getRefBaseForIndel() {
|
||||
return refBaseForIndel;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,9 +1,9 @@
|
|||
package org.broadinstitute.sting.utils.variantcontext;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Immutable representation of an allele
|
||||
|
|
@ -77,32 +77,36 @@ public class Allele implements Comparable<Allele> {
|
|||
private static final byte[] EMPTY_ALLELE_BASES = new byte[0];
|
||||
|
||||
private boolean isRef = false;
|
||||
private boolean isNull = false;
|
||||
private boolean isNoCall = false;
|
||||
private boolean isSymbolic = false;
|
||||
|
||||
private byte[] bases = null;
|
||||
|
||||
public final static String NULL_ALLELE_STRING = "-";
|
||||
public final static String NO_CALL_STRING = ".";
|
||||
/** A generic static NO_CALL allele for use */
|
||||
|
||||
// no public way to create an allele
|
||||
private Allele(byte[] bases, boolean isRef) {
|
||||
// standardize our representation of null allele and bases
|
||||
// null alleles are no longer allowed
|
||||
if ( wouldBeNullAllele(bases) ) {
|
||||
bases = EMPTY_ALLELE_BASES;
|
||||
isNull = true;
|
||||
} else if ( wouldBeNoCallAllele(bases) ) {
|
||||
bases = EMPTY_ALLELE_BASES;
|
||||
throw new IllegalArgumentException("Null alleles are not supported");
|
||||
}
|
||||
|
||||
// no-calls are represented as no bases
|
||||
if ( wouldBeNoCallAllele(bases) ) {
|
||||
this.bases = EMPTY_ALLELE_BASES;
|
||||
isNoCall = true;
|
||||
if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele");
|
||||
} else if ( wouldBeSymbolicAllele(bases) ) {
|
||||
return;
|
||||
}
|
||||
|
||||
if ( wouldBeSymbolicAllele(bases) ) {
|
||||
isSymbolic = true;
|
||||
if ( isRef ) throw new IllegalArgumentException("Cannot tag a symbolic allele as the reference allele");
|
||||
}
|
||||
// else
|
||||
// bases = new String(bases).toUpperCase().getBytes(); // todo -- slow performance
|
||||
else {
|
||||
bases = BaseUtils.convertToUpperCase(bases);
|
||||
}
|
||||
|
||||
this.isRef = isRef;
|
||||
this.bases = bases;
|
||||
|
|
@ -126,8 +130,6 @@ public class Allele implements Comparable<Allele> {
|
|||
private final static Allele ALT_T = new Allele("T", false);
|
||||
private final static Allele REF_N = new Allele("N", true);
|
||||
private final static Allele ALT_N = new Allele("N", false);
|
||||
private final static Allele REF_NULL = new Allele(NULL_ALLELE_STRING, true);
|
||||
private final static Allele ALT_NULL = new Allele(NULL_ALLELE_STRING, false);
|
||||
public final static Allele NO_CALL = new Allele(NO_CALL_STRING, false);
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -154,7 +156,6 @@ public class Allele implements Comparable<Allele> {
|
|||
case '.':
|
||||
if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele");
|
||||
return NO_CALL;
|
||||
case '-': return isRef ? REF_NULL : ALT_NULL;
|
||||
case 'A': case 'a' : return isRef ? REF_A : ALT_A;
|
||||
case 'C': case 'c' : return isRef ? REF_C : ALT_C;
|
||||
case 'G': case 'g' : return isRef ? REF_G : ALT_G;
|
||||
|
|
@ -179,14 +180,9 @@ public class Allele implements Comparable<Allele> {
|
|||
public static Allele extend(Allele left, byte[] right) {
|
||||
if (left.isSymbolic())
|
||||
throw new IllegalArgumentException("Cannot extend a symbolic allele");
|
||||
byte[] bases = null;
|
||||
if ( left.length() == 0 )
|
||||
bases = right;
|
||||
else {
|
||||
bases = new byte[left.length() + right.length];
|
||||
System.arraycopy(left.getBases(), 0, bases, 0, left.length());
|
||||
System.arraycopy(right, 0, bases, left.length(), right.length);
|
||||
}
|
||||
byte[] bases = new byte[left.length() + right.length];
|
||||
System.arraycopy(left.getBases(), 0, bases, 0, left.length());
|
||||
System.arraycopy(right, 0, bases, left.length(), right.length);
|
||||
|
||||
return create(bases, left.isReference());
|
||||
}
|
||||
|
|
@ -242,7 +238,10 @@ public class Allele implements Comparable<Allele> {
|
|||
}
|
||||
|
||||
public static boolean acceptableAlleleBases(byte[] bases, boolean allowNsAsAcceptable) {
|
||||
if ( wouldBeNullAllele(bases) || wouldBeNoCallAllele(bases) || wouldBeSymbolicAllele(bases) )
|
||||
if ( wouldBeNullAllele(bases) )
|
||||
return false;
|
||||
|
||||
if ( wouldBeNoCallAllele(bases) || wouldBeSymbolicAllele(bases) )
|
||||
return true;
|
||||
|
||||
for (byte base : bases ) {
|
||||
|
|
@ -299,11 +298,6 @@ public class Allele implements Comparable<Allele> {
|
|||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
//Returns true if this is the null allele
|
||||
public boolean isNull() { return isNull; }
|
||||
// Returns true if this is not the null allele
|
||||
public boolean isNonNull() { return ! isNull(); }
|
||||
|
||||
// Returns true if this is the NO_CALL allele
|
||||
public boolean isNoCall() { return isNoCall; }
|
||||
// Returns true if this is not the NO_CALL allele
|
||||
|
|
@ -319,7 +313,7 @@ public class Allele implements Comparable<Allele> {
|
|||
|
||||
// Returns a nice string representation of this object
|
||||
public String toString() {
|
||||
return (isNull() ? NULL_ALLELE_STRING : ( isNoCall() ? NO_CALL_STRING : getDisplayString() )) + (isReference() ? "*" : "");
|
||||
return ( isNoCall() ? NO_CALL_STRING : getDisplayString() ) + (isReference() ? "*" : "");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -384,27 +378,27 @@ public class Allele implements Comparable<Allele> {
|
|||
* @return true if this and other are equal
|
||||
*/
|
||||
public boolean equals(Allele other, boolean ignoreRefState) {
|
||||
return this == other || (isRef == other.isRef || ignoreRefState) && isNull == other.isNull && isNoCall == other.isNoCall && (bases == other.bases || Arrays.equals(bases, other.bases));
|
||||
return this == other || (isRef == other.isRef || ignoreRefState) && isNoCall == other.isNoCall && (bases == other.bases || Arrays.equals(bases, other.bases));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param test bases to test against
|
||||
*
|
||||
* @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
|
||||
* @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
|
||||
*/
|
||||
public boolean basesMatch(byte[] test) { return !isSymbolic && (bases == test || Arrays.equals(bases, test)); }
|
||||
|
||||
/**
|
||||
* @param test bases to test against
|
||||
*
|
||||
* @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
|
||||
* @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
|
||||
*/
|
||||
public boolean basesMatch(String test) { return basesMatch(test.toUpperCase().getBytes()); }
|
||||
|
||||
/**
|
||||
* @param test allele to test against
|
||||
*
|
||||
* @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
|
||||
* @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
|
||||
*/
|
||||
public boolean basesMatch(Allele test) { return basesMatch(test.getBases()); }
|
||||
|
||||
|
|
@ -421,10 +415,6 @@ public class Allele implements Comparable<Allele> {
|
|||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
public static Allele getMatchingAllele(Collection<Allele> allAlleles, String alleleBases) {
|
||||
return getMatchingAllele(allAlleles, alleleBases.getBytes());
|
||||
}
|
||||
|
||||
public static Allele getMatchingAllele(Collection<Allele> allAlleles, byte[] alleleBases) {
|
||||
for ( Allele a : allAlleles ) {
|
||||
if ( a.basesMatch(alleleBases) ) {
|
||||
|
|
@ -438,26 +428,6 @@ public class Allele implements Comparable<Allele> {
|
|||
return null; // couldn't find anything
|
||||
}
|
||||
|
||||
public static List<Allele> resolveAlleles(List<Allele> possibleAlleles, List<String> alleleStrings) {
|
||||
List<Allele> myAlleles = new ArrayList<Allele>(alleleStrings.size());
|
||||
|
||||
for ( String alleleString : alleleStrings ) {
|
||||
Allele allele = getMatchingAllele(possibleAlleles, alleleString);
|
||||
|
||||
if ( allele == null ) {
|
||||
if ( Allele.wouldBeNoCallAllele(alleleString.getBytes()) ) {
|
||||
allele = create(alleleString);
|
||||
} else {
|
||||
throw new IllegalArgumentException("Allele " + alleleString + " not present in the list of alleles " + possibleAlleles);
|
||||
}
|
||||
}
|
||||
|
||||
myAlleles.add(allele);
|
||||
}
|
||||
|
||||
return myAlleles;
|
||||
}
|
||||
|
||||
public int compareTo(Allele other) {
|
||||
if ( isReference() && other.isNonReference() )
|
||||
return -1;
|
||||
|
|
@ -468,9 +438,6 @@ public class Allele implements Comparable<Allele> {
|
|||
}
|
||||
|
||||
public static boolean oneIsPrefixOfOther(Allele a1, Allele a2) {
|
||||
if ( a1.isNull() || a2.isNull() )
|
||||
return true;
|
||||
|
||||
if ( a2.length() >= a1.length() )
|
||||
return firstIsPrefixOfSecond(a1, a2);
|
||||
else
|
||||
|
|
|
|||
|
|
@ -188,8 +188,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
@Deprecated // ID is no longer stored in the attributes map
|
||||
private final static String ID_KEY = "ID";
|
||||
|
||||
private final Byte REFERENCE_BASE_FOR_INDEL;
|
||||
|
||||
public final static Set<String> PASSES_FILTERS = Collections.unmodifiableSet(new LinkedHashSet<String>());
|
||||
|
||||
/** The location of this VariantContext */
|
||||
|
|
@ -228,7 +226,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
public enum Validation {
|
||||
REF_PADDING,
|
||||
ALLELES,
|
||||
GENOTYPES
|
||||
}
|
||||
|
|
@ -250,7 +247,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
this(other.getSource(), other.getID(), other.getChr(), other.getStart(), other.getEnd(),
|
||||
other.getAlleles(), other.getGenotypes(), other.getLog10PError(),
|
||||
other.getFiltersMaybeNull(),
|
||||
other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL,
|
||||
other.getAttributes(),
|
||||
other.fullyDecoded, NO_VALIDATION);
|
||||
}
|
||||
|
||||
|
|
@ -266,7 +263,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
* @param log10PError qual
|
||||
* @param filters filters: use null for unfiltered and empty set for passes filters
|
||||
* @param attributes attributes
|
||||
* @param referenceBaseForIndel padded reference base
|
||||
* @param validationToPerform set of validation steps to take
|
||||
*/
|
||||
protected VariantContext(final String source,
|
||||
|
|
@ -279,7 +275,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
final double log10PError,
|
||||
final Set<String> filters,
|
||||
final Map<String, Object> attributes,
|
||||
final Byte referenceBaseForIndel,
|
||||
final boolean fullyDecoded,
|
||||
final EnumSet<Validation> validationToPerform ) {
|
||||
if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); }
|
||||
|
|
@ -292,7 +287,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
this.ID = ID.equals(VCFConstants.EMPTY_ID_FIELD) ? VCFConstants.EMPTY_ID_FIELD : ID;
|
||||
|
||||
this.commonInfo = new CommonInfo(source, log10PError, filters, attributes);
|
||||
REFERENCE_BASE_FOR_INDEL = referenceBaseForIndel;
|
||||
|
||||
// todo -- remove me when this check is no longer necessary
|
||||
if ( this.commonInfo.hasAttribute(ID_KEY) )
|
||||
|
|
@ -340,8 +334,9 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
* in this VC is returned as the set of alleles in the subContext, even if
|
||||
* some of those alleles aren't in the samples
|
||||
*
|
||||
* @param sampleNames
|
||||
* @return
|
||||
* @param sampleNames the sample names
|
||||
* @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples
|
||||
* @return new VariantContext subsetting to just the given samples
|
||||
*/
|
||||
public VariantContext subContextFromSamples(Set<String> sampleNames, final boolean rederiveAllelesFromGenotypes ) {
|
||||
if ( sampleNames.containsAll(getSampleNames()) ) {
|
||||
|
|
@ -501,7 +496,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
*/
|
||||
public boolean isSimpleInsertion() {
|
||||
// can't just call !isSimpleDeletion() because of complex indels
|
||||
return getType() == Type.INDEL && getReference().isNull() && isBiallelic();
|
||||
return getType() == Type.INDEL && isBiallelic() && getReference().length() == 1;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -509,7 +504,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
*/
|
||||
public boolean isSimpleDeletion() {
|
||||
// can't just call !isSimpleInsertion() because of complex indels
|
||||
return getType() == Type.INDEL && getAlternateAllele(0).isNull() && isBiallelic();
|
||||
return getType() == Type.INDEL && isBiallelic() && getAlternateAllele(0).length() == 1;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -553,22 +548,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
return ID;
|
||||
}
|
||||
|
||||
public boolean hasReferenceBaseForIndel() {
|
||||
return REFERENCE_BASE_FOR_INDEL != null;
|
||||
}
|
||||
|
||||
// the indel base that gets stripped off for indels
|
||||
public Byte getReferenceBaseForIndel() {
|
||||
return REFERENCE_BASE_FOR_INDEL;
|
||||
}
|
||||
|
||||
public String getAlleleStringWithRefPadding(final Allele allele) {
|
||||
if ( VCFAlleleClipper.needsPadding(this) )
|
||||
return VCFAlleleClipper.padAllele(this, allele).getDisplayString();
|
||||
else
|
||||
return allele.getDisplayString();
|
||||
}
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -808,8 +787,8 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
* Returns a map from sampleName -> Genotype for the genotype associated with sampleName. Returns a map
|
||||
* for consistency with the multi-get function.
|
||||
*
|
||||
* @param sampleName
|
||||
* @return
|
||||
* @param sampleName the sample name
|
||||
* @return mapping from sample name to genotype
|
||||
* @throws IllegalArgumentException if sampleName isn't bound to a genotype
|
||||
*/
|
||||
public GenotypesContext getGenotypes(String sampleName) {
|
||||
|
|
@ -823,7 +802,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
* For testing convenience only
|
||||
*
|
||||
* @param sampleNames a unique list of sample names
|
||||
* @return
|
||||
* @return subsetting genotypes context
|
||||
* @throws IllegalArgumentException if sampleName isn't bound to a genotype
|
||||
*/
|
||||
protected GenotypesContext getGenotypes(Collection<String> sampleNames) {
|
||||
|
|
@ -1011,13 +990,13 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
/**
|
||||
* Run all extra-strict validation tests on a Variant Context object
|
||||
*
|
||||
* @param reference the true reference allele
|
||||
* @param paddedRefBase the reference base used for padding indels
|
||||
* @param rsIDs the true dbSNP IDs
|
||||
* @param reportedReference the reported reference allele
|
||||
* @param observedReference the actual reference allele
|
||||
* @param rsIDs the true dbSNP IDs
|
||||
*/
|
||||
public void extraStrictValidation(Allele reference, Byte paddedRefBase, Set<String> rsIDs) {
|
||||
public void extraStrictValidation(final Allele reportedReference, final Allele observedReference, final Set<String> rsIDs) {
|
||||
// validate the reference
|
||||
validateReferenceBases(reference, paddedRefBase);
|
||||
validateReferenceBases(reportedReference, observedReference);
|
||||
|
||||
// validate the RS IDs
|
||||
validateRSIDs(rsIDs);
|
||||
|
|
@ -1032,18 +1011,9 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
//checkReferenceTrack();
|
||||
}
|
||||
|
||||
public void validateReferenceBases(Allele reference, Byte paddedRefBase) {
|
||||
if ( reference == null )
|
||||
return;
|
||||
|
||||
// don't validate if we're a complex event
|
||||
if ( !isComplexIndel() && !reference.isNull() && !reference.basesMatch(getReference()) ) {
|
||||
throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), reference.getBaseString(), getReference().getBaseString()));
|
||||
}
|
||||
|
||||
// we also need to validate the padding base for simple indels
|
||||
if ( hasReferenceBaseForIndel() && !getReferenceBaseForIndel().equals(paddedRefBase) ) {
|
||||
throw new TribbleException.InternalCodecException(String.format("the padded REF base is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), (char)paddedRefBase.byteValue(), (char)getReferenceBaseForIndel().byteValue()));
|
||||
public void validateReferenceBases(final Allele reportedReference, final Allele observedReference) {
|
||||
if ( reportedReference != null && !reportedReference.basesMatch(observedReference) ) {
|
||||
throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), observedReference.getBaseString(), reportedReference.getBaseString()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1135,7 +1105,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
for (final Validation val : validationToPerform ) {
|
||||
switch (val) {
|
||||
case ALLELES: validateAlleles(); break;
|
||||
case REF_PADDING: validateReferencePadding(); break;
|
||||
case GENOTYPES: validateGenotypes(); break;
|
||||
default: throw new IllegalArgumentException("Unexpected validation mode " + val);
|
||||
}
|
||||
|
|
@ -1151,8 +1120,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
if ( hasAttribute(VCFConstants.END_KEY) ) {
|
||||
final int end = getAttributeAsInt(VCFConstants.END_KEY, -1);
|
||||
assert end != -1;
|
||||
if ( end != getEnd() && end != getEnd() + 1 ) {
|
||||
// the end is allowed to 1 bigger because of the padding
|
||||
if ( end != getEnd() ) {
|
||||
final String message = "Badly formed variant context at location " + getChr() + ":"
|
||||
+ getStart() + "; getEnd() was " + getEnd()
|
||||
+ " but this VariantContext contains an END key with value " + end;
|
||||
|
|
@ -1161,23 +1129,19 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
else
|
||||
throw new ReviewedStingException(message);
|
||||
}
|
||||
} else {
|
||||
final long length = (stop - start) + 1;
|
||||
if ( ! hasSymbolicAlleles() && length != getReference().length() ) {
|
||||
throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void validateReferencePadding() {
|
||||
if ( hasSymbolicAlleles() ) // symbolic alleles don't need padding...
|
||||
return;
|
||||
|
||||
boolean needsPadding = (getReference().length() == getEnd() - getStart()); // off by one because padded base was removed
|
||||
|
||||
if ( needsPadding && !hasReferenceBaseForIndel() )
|
||||
throw new ReviewedStingException("Badly formed variant context at location " + getChr() + ":" + getStart() + "; no padded reference base was provided.");
|
||||
}
|
||||
|
||||
private void validateAlleles() {
|
||||
// check alleles
|
||||
boolean alreadySeenRef = false, alreadySeenNull = false;
|
||||
for ( Allele allele : alleles ) {
|
||||
|
||||
boolean alreadySeenRef = false;
|
||||
|
||||
for ( final Allele allele : alleles ) {
|
||||
// make sure there's only one reference allele
|
||||
if ( allele.isReference() ) {
|
||||
if ( alreadySeenRef ) throw new IllegalArgumentException("BUG: Received two reference tagged alleles in VariantContext " + alleles + " this=" + this);
|
||||
|
|
@ -1187,26 +1151,11 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
if ( allele.isNoCall() ) {
|
||||
throw new IllegalArgumentException("BUG: Cannot add a no call allele to a variant context " + alleles + " this=" + this);
|
||||
}
|
||||
|
||||
// make sure there's only one null allele
|
||||
if ( allele.isNull() ) {
|
||||
if ( alreadySeenNull ) throw new IllegalArgumentException("BUG: Received two null alleles in VariantContext " + alleles + " this=" + this);
|
||||
alreadySeenNull = true;
|
||||
}
|
||||
}
|
||||
|
||||
// make sure there's one reference allele
|
||||
if ( ! alreadySeenRef )
|
||||
throw new IllegalArgumentException("No reference allele found in VariantContext");
|
||||
|
||||
// if ( getType() == Type.INDEL ) {
|
||||
// if ( getReference().length() != (getLocation().size()-1) ) {
|
||||
long length = (stop - start) + 1;
|
||||
if ( ! hasSymbolicAlleles()
|
||||
&& ((getReference().isNull() && length != 1 )
|
||||
|| (getReference().isNonNull() && (length - getReference().length() > 1)))) {
|
||||
throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this);
|
||||
}
|
||||
}
|
||||
|
||||
private void validateGenotypes() {
|
||||
|
|
|
|||
|
|
@ -25,9 +25,6 @@
|
|||
package org.broadinstitute.sting.utils.variantcontext;
|
||||
|
||||
import com.google.java.contract.*;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -74,7 +71,6 @@ public class VariantContextBuilder {
|
|||
private Set<String> filters = null;
|
||||
private Map<String, Object> attributes = null;
|
||||
private boolean attributesCanBeModified = false;
|
||||
private Byte referenceBaseForIndel = null;
|
||||
|
||||
/** enum of what must be validated */
|
||||
final private EnumSet<VariantContext.Validation> toValidate = EnumSet.noneOf(VariantContext.Validation.class);
|
||||
|
|
@ -117,7 +113,6 @@ public class VariantContextBuilder {
|
|||
this.genotypes = parent.genotypes;
|
||||
this.ID = parent.getID();
|
||||
this.log10PError = parent.getLog10PError();
|
||||
this.referenceBaseForIndel = parent.getReferenceBaseForIndel();
|
||||
this.source = parent.getSource();
|
||||
this.start = parent.getStart();
|
||||
this.stop = parent.getEnd();
|
||||
|
|
@ -132,7 +127,6 @@ public class VariantContextBuilder {
|
|||
this.genotypes = parent.genotypes;
|
||||
this.ID = parent.ID;
|
||||
this.log10PError = parent.log10PError;
|
||||
this.referenceBaseForIndel = parent.referenceBaseForIndel;
|
||||
this.source = parent.source;
|
||||
this.start = parent.start;
|
||||
this.stop = parent.stop;
|
||||
|
|
@ -362,21 +356,6 @@ public class VariantContextBuilder {
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells us that the resulting VariantContext should use this byte for the reference base
|
||||
* Null means no refBase is available
|
||||
* @param referenceBaseForIndel
|
||||
*/
|
||||
public VariantContextBuilder referenceBaseForIndel(final Byte referenceBaseForIndel) {
|
||||
this.referenceBaseForIndel = referenceBaseForIndel;
|
||||
toValidate.add(VariantContext.Validation.REF_PADDING);
|
||||
return this;
|
||||
}
|
||||
|
||||
public VariantContextBuilder referenceBaseForIndel(final String referenceBaseForIndel) {
|
||||
return referenceBaseForIndel(referenceBaseForIndel.getBytes()[0]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells us that the resulting VariantContext should have source field set to source
|
||||
* @param source
|
||||
|
|
@ -401,7 +380,6 @@ public class VariantContextBuilder {
|
|||
this.start = start;
|
||||
this.stop = stop;
|
||||
toValidate.add(VariantContext.Validation.ALLELES);
|
||||
toValidate.add(VariantContext.Validation.REF_PADDING);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
|
@ -416,7 +394,6 @@ public class VariantContextBuilder {
|
|||
this.start = loc.getStart();
|
||||
this.stop = loc.getStop();
|
||||
toValidate.add(VariantContext.Validation.ALLELES);
|
||||
toValidate.add(VariantContext.Validation.REF_PADDING);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
|
@ -440,7 +417,6 @@ public class VariantContextBuilder {
|
|||
public VariantContextBuilder start(final long start) {
|
||||
this.start = start;
|
||||
toValidate.add(VariantContext.Validation.ALLELES);
|
||||
toValidate.add(VariantContext.Validation.REF_PADDING);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
|
@ -517,6 +493,6 @@ public class VariantContextBuilder {
|
|||
public VariantContext make() {
|
||||
return new VariantContext(source, ID, contig, start, stop, alleles,
|
||||
genotypes, log10PError, filters, attributes,
|
||||
referenceBaseForIndel, fullyDecoded, toValidate);
|
||||
fullyDecoded, toValidate);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -64,9 +64,9 @@ public class VariantContextUtils {
|
|||
* Ensures that VC contains all of the samples in allSamples by adding missing samples to
|
||||
* the resulting VC with default diploid ./. genotypes
|
||||
*
|
||||
* @param vc
|
||||
* @param allSamples
|
||||
* @return
|
||||
* @param vc the VariantContext
|
||||
* @param allSamples all of the samples needed
|
||||
* @return a new VariantContext with missing samples added
|
||||
*/
|
||||
public static VariantContext addMissingSamples(final VariantContext vc, final Set<String> allSamples) {
|
||||
// TODO -- what's the fastest way to do this calculation?
|
||||
|
|
@ -376,9 +376,9 @@ public class VariantContextUtils {
|
|||
|
||||
/**
|
||||
* @deprecated use variant context builder version instead
|
||||
* @param vc
|
||||
* @param keysToPreserve
|
||||
* @return
|
||||
* @param vc the variant context
|
||||
* @param keysToPreserve the keys to preserve
|
||||
* @return a pruned version of the original variant context
|
||||
*/
|
||||
@Deprecated
|
||||
public static VariantContext pruneVariantContext(final VariantContext vc, Collection<String> keysToPreserve ) {
|
||||
|
|
@ -486,14 +486,13 @@ public class VariantContextUtils {
|
|||
if ( genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE )
|
||||
verifyUniqueSampleNames(unsortedVCs);
|
||||
|
||||
final List<VariantContext> prepaddedVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions);
|
||||
final List<VariantContext> preFilteredVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions);
|
||||
// Make sure all variant contexts are padded with reference base in case of indels if necessary
|
||||
final List<VariantContext> VCs = new ArrayList<VariantContext>();
|
||||
|
||||
for (final VariantContext vc : prepaddedVCs) {
|
||||
// also a reasonable place to remove filtered calls, if needed
|
||||
for (final VariantContext vc : preFilteredVCs) {
|
||||
if ( ! filteredAreUncalled || vc.isNotFiltered() )
|
||||
VCs.add(VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc));
|
||||
VCs.add(vc);
|
||||
}
|
||||
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
|
||||
return null;
|
||||
|
|
@ -547,9 +546,6 @@ public class VariantContextUtils {
|
|||
|
||||
filters.addAll(vc.getFilters());
|
||||
|
||||
if ( referenceBaseForIndel == null )
|
||||
referenceBaseForIndel = vc.getReferenceBaseForIndel();
|
||||
|
||||
//
|
||||
// add attributes
|
||||
//
|
||||
|
|
@ -661,10 +657,9 @@ public class VariantContextUtils {
|
|||
builder.genotypes(genotypes);
|
||||
builder.log10PError(log10PError);
|
||||
builder.filters(filters).attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes);
|
||||
builder.referenceBaseForIndel(referenceBaseForIndel);
|
||||
|
||||
// Trim the padded bases of all alleles if necessary
|
||||
final VariantContext merged = createVariantContextWithTrimmedAlleles(builder.make());
|
||||
final VariantContext merged = builder.make();
|
||||
if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged);
|
||||
return merged;
|
||||
}
|
||||
|
|
@ -700,73 +695,6 @@ public class VariantContextUtils {
|
|||
return true;
|
||||
}
|
||||
|
||||
private static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) {
|
||||
// see if we need to trim common reference base from all alleles
|
||||
boolean trimVC;
|
||||
|
||||
// We need to trim common reference base from all alleles in all genotypes if a ref base is common to all alleles
|
||||
Allele refAllele = inputVC.getReference();
|
||||
if (!inputVC.isVariant())
|
||||
trimVC = false;
|
||||
else if (refAllele.isNull())
|
||||
trimVC = false;
|
||||
else {
|
||||
trimVC = VCFAlleleClipper.shouldClipFirstBaseP(inputVC.getAlternateAlleles(), (byte) inputVC.getReference().getDisplayString().charAt(0));
|
||||
}
|
||||
|
||||
// nothing to do if we don't need to trim bases
|
||||
if (trimVC) {
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
|
||||
Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
|
||||
|
||||
for (final Allele a : inputVC.getAlleles()) {
|
||||
if (a.isSymbolic()) {
|
||||
alleles.add(a);
|
||||
originalToTrimmedAlleleMap.put(a, a);
|
||||
} else {
|
||||
// get bases for current allele and create a new one with trimmed bases
|
||||
byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length());
|
||||
Allele trimmedAllele = Allele.create(newBases, a.isReference());
|
||||
alleles.add(trimmedAllele);
|
||||
originalToTrimmedAlleleMap.put(a, trimmedAllele);
|
||||
}
|
||||
}
|
||||
|
||||
// detect case where we're trimming bases but resulting vc doesn't have any null allele. In that case, we keep original representation
|
||||
// example: mixed records such as {TA*,TGA,TG}
|
||||
boolean hasNullAlleles = false;
|
||||
|
||||
for (final Allele a: originalToTrimmedAlleleMap.values()) {
|
||||
if (a.isNull())
|
||||
hasNullAlleles = true;
|
||||
}
|
||||
|
||||
if (!hasNullAlleles)
|
||||
return inputVC;
|
||||
// now we can recreate new genotypes with trimmed alleles
|
||||
for ( final Genotype genotype : inputVC.getGenotypes() ) {
|
||||
|
||||
List<Allele> originalAlleles = genotype.getAlleles();
|
||||
List<Allele> trimmedAlleles = new ArrayList<Allele>();
|
||||
for ( final Allele a : originalAlleles ) {
|
||||
if ( a.isCalled() )
|
||||
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
|
||||
else
|
||||
trimmedAlleles.add(Allele.NO_CALL);
|
||||
}
|
||||
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
|
||||
|
||||
}
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder(inputVC);
|
||||
return builder.alleles(alleles).genotypes(genotypes).referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0])).make();
|
||||
}
|
||||
|
||||
return inputVC;
|
||||
}
|
||||
|
||||
public static GenotypesContext stripPLs(GenotypesContext genotypes) {
|
||||
GenotypesContext newGs = GenotypesContext.create(genotypes.size());
|
||||
|
||||
|
|
@ -819,7 +747,7 @@ public class VariantContextUtils {
|
|||
if ( !mappedVCs.containsKey(vc.getType()) )
|
||||
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
|
||||
mappedVCs.get(vc.getType()).add(vc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return mappedVCs;
|
||||
|
|
@ -881,10 +809,10 @@ public class VariantContextUtils {
|
|||
//
|
||||
// refAllele: ACGTGA
|
||||
// myRef: ACGT
|
||||
// myAlt: -
|
||||
// myAlt: A
|
||||
//
|
||||
// We need to remap all of the alleles in vc to include the extra GA so that
|
||||
// myRef => refAllele and myAlt => GA
|
||||
// myRef => refAllele and myAlt => AGA
|
||||
//
|
||||
|
||||
Allele myRef = vc.getReference();
|
||||
|
|
@ -979,7 +907,7 @@ public class VariantContextUtils {
|
|||
HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
|
||||
for ( Allele originalAllele : vc.getAlleles() ) {
|
||||
Allele newAllele;
|
||||
if ( originalAllele.isNoCall() || originalAllele.isNull() )
|
||||
if ( originalAllele.isNoCall() )
|
||||
newAllele = originalAllele;
|
||||
else
|
||||
newAllele = Allele.create(BaseUtils.simpleReverseComplement(originalAllele.getBases()), originalAllele.isReference());
|
||||
|
|
@ -1235,13 +1163,14 @@ public class VariantContextUtils {
|
|||
if ( ! vc.isIndel() ) // only indels are tandem repeats
|
||||
return null;
|
||||
|
||||
final Allele ref = vc.getReference();
|
||||
final Allele refAllele = vc.getReference();
|
||||
final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length());
|
||||
|
||||
byte[] repeatUnit = null;
|
||||
final ArrayList<Integer> lengths = new ArrayList<Integer>();
|
||||
|
||||
for ( final Allele allele : vc.getAlternateAlleles() ) {
|
||||
Pair<int[],byte[]> result = getNumTandemRepeatUnits(ref.getBases(), allele.getBases(), refBasesStartingAtVCWithoutPad.getBytes());
|
||||
Pair<int[],byte[]> result = getNumTandemRepeatUnits(refAlleleBases, Arrays.copyOfRange(allele.getBases(), 1, allele.length()), refBasesStartingAtVCWithoutPad.getBytes());
|
||||
|
||||
final int[] repetitionCount = result.first;
|
||||
// repetition count = 0 means allele is not a tandem expansion of context
|
||||
|
|
@ -1256,7 +1185,7 @@ public class VariantContextUtils {
|
|||
repeatUnit = result.second;
|
||||
if (VERBOSE) {
|
||||
System.out.println("RefContext:"+refBasesStartingAtVCWithoutPad);
|
||||
System.out.println("Ref:"+ref.toString()+" Count:" + String.valueOf(repetitionCount[0]));
|
||||
System.out.println("Ref:"+refAllele.toString()+" Count:" + String.valueOf(repetitionCount[0]));
|
||||
System.out.println("Allele:"+allele.toString()+" Count:" + String.valueOf(repetitionCount[1]));
|
||||
System.out.println("RU:"+new String(repeatUnit));
|
||||
}
|
||||
|
|
@ -1405,4 +1334,113 @@ public class VariantContextUtils {
|
|||
return start + Math.max(ref.length() - 1, 0);
|
||||
}
|
||||
}
|
||||
|
||||
public static boolean requiresPaddingBase(final List<String> alleles) {
|
||||
|
||||
// see whether one of the alleles would be null if trimmed through
|
||||
|
||||
for ( final String allele : alleles ) {
|
||||
if ( allele.isEmpty() )
|
||||
return true;
|
||||
}
|
||||
|
||||
int clipping = 0;
|
||||
Character currentBase = null;
|
||||
|
||||
while ( true ) {
|
||||
for ( final String allele : alleles ) {
|
||||
if ( allele.length() - clipping == 0 )
|
||||
return true;
|
||||
|
||||
char myBase = allele.charAt(clipping);
|
||||
if ( currentBase == null )
|
||||
currentBase = myBase;
|
||||
else if ( currentBase != myBase )
|
||||
return false;
|
||||
}
|
||||
|
||||
clipping++;
|
||||
currentBase = null;
|
||||
}
|
||||
}
|
||||
|
||||
public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {
|
||||
|
||||
// TODO - this function doesn't work with mixed records or records that started as mixed and then became non-mixed
|
||||
|
||||
// see whether we need to trim common reference base from all alleles
|
||||
|
||||
final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false);
|
||||
if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
|
||||
return inputVC;
|
||||
|
||||
final List<Allele> alleles = new ArrayList<Allele>();
|
||||
final GenotypesContext genotypes = GenotypesContext.create();
|
||||
final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
|
||||
|
||||
for (final Allele a : inputVC.getAlleles()) {
|
||||
if (a.isSymbolic()) {
|
||||
alleles.add(a);
|
||||
originalToTrimmedAlleleMap.put(a, a);
|
||||
} else {
|
||||
// get bases for current allele and create a new one with trimmed bases
|
||||
final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
|
||||
final Allele trimmedAllele = Allele.create(newBases, a.isReference());
|
||||
alleles.add(trimmedAllele);
|
||||
originalToTrimmedAlleleMap.put(a, trimmedAllele);
|
||||
}
|
||||
}
|
||||
|
||||
// now we can recreate new genotypes with trimmed alleles
|
||||
for ( final Genotype genotype : inputVC.getGenotypes() ) {
|
||||
final List<Allele> originalAlleles = genotype.getAlleles();
|
||||
final List<Allele> trimmedAlleles = new ArrayList<Allele>();
|
||||
for ( final Allele a : originalAlleles ) {
|
||||
if ( a.isCalled() )
|
||||
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
|
||||
else
|
||||
trimmedAlleles.add(Allele.NO_CALL);
|
||||
}
|
||||
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
|
||||
}
|
||||
|
||||
return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() - 1).alleles(alleles).genotypes(genotypes).make();
|
||||
}
|
||||
|
||||
public static int computeReverseClipping(final List<Allele> unclippedAlleles,
|
||||
final byte[] ref,
|
||||
final int forwardClipping,
|
||||
final boolean allowFullClip) {
|
||||
int clipping = 0;
|
||||
boolean stillClipping = true;
|
||||
|
||||
while ( stillClipping ) {
|
||||
for ( final Allele a : unclippedAlleles ) {
|
||||
if ( a.isSymbolic() )
|
||||
continue;
|
||||
|
||||
// we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong
|
||||
// position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine).
|
||||
if ( a.length() - clipping == 0 )
|
||||
return clipping - (allowFullClip ? 0 : 1);
|
||||
|
||||
if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) {
|
||||
stillClipping = false;
|
||||
}
|
||||
else if ( ref.length == clipping ) {
|
||||
if ( allowFullClip )
|
||||
stillClipping = false;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) {
|
||||
stillClipping = false;
|
||||
}
|
||||
}
|
||||
if ( stillClipping )
|
||||
clipping++;
|
||||
}
|
||||
|
||||
return clipping;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -274,10 +274,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
}
|
||||
|
||||
private void buildAlleles( VariantContext vc ) throws IOException {
|
||||
final boolean needsPadding = VCFAlleleClipper.needsPadding(vc);
|
||||
for ( Allele allele : vc.getAlleles() ) {
|
||||
if ( needsPadding )
|
||||
allele = VCFAlleleClipper.padAllele(vc, allele);
|
||||
final byte[] s = allele.getDisplayBases();
|
||||
if ( s == null )
|
||||
throw new ReviewedStingException("BUG: BCF2Writer encountered null padded allele" + allele);
|
||||
|
|
|
|||
|
|
@ -162,7 +162,6 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
vc = new VariantContextBuilder(vc).noGenotypes().make();
|
||||
|
||||
try {
|
||||
vc = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc);
|
||||
super.add(vc);
|
||||
|
||||
Map<Allele, String> alleleMap = buildAlleleMap(vc);
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s",
|
||||
1,
|
||||
Arrays.asList("0567b32ebdc26604ddf2a390de4579ac"));
|
||||
Arrays.asList("ef481be9962e21d09847b8a1d4a4ff65"));
|
||||
executeTest("testFastaAlternateReferenceIndels", spec2);
|
||||
|
||||
WalkerTestSpec spec3 = new WalkerTestSpec(
|
||||
|
|
|
|||
|
|
@ -38,9 +38,6 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
|||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -103,39 +100,27 @@ public class ArtificialReadPileupTestProvider {
|
|||
boolean addBaseErrors, int phredScaledBaseErrorRate) {
|
||||
// RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext);
|
||||
|
||||
|
||||
ArrayList<Allele> vcAlleles = new ArrayList<Allele>();
|
||||
Allele refAllele, altAllele;
|
||||
if (eventLength == 0) {// SNP case
|
||||
refAllele =Allele.create(referenceContext.getBase(),true);
|
||||
altAllele = Allele.create(altBases.substring(0,1), false);
|
||||
String refAllele, altAllele;
|
||||
if (eventLength == 0) {
|
||||
// SNP case
|
||||
refAllele = new String(new byte[]{referenceContext.getBase()});
|
||||
altAllele = new String(altBases.substring(0,1));
|
||||
|
||||
} else if (eventLength>0){
|
||||
// insertion
|
||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||
altAllele = Allele.create(altBases.substring(0,eventLength), false);
|
||||
refAllele = "";
|
||||
altAllele = altBases.substring(0,eventLength);
|
||||
}
|
||||
else {
|
||||
// deletion
|
||||
refAllele =Allele.create(refBases.substring(offset,offset+Math.abs(eventLength)),true);
|
||||
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
||||
refAllele = refBases.substring(offset,offset+Math.abs(eventLength));
|
||||
altAllele = "";
|
||||
}
|
||||
int stop = loc.getStart();
|
||||
vcAlleles.add(refAllele);
|
||||
vcAlleles.add(altAllele);
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder().source("");
|
||||
builder.loc(loc.getContig(), loc.getStart(), stop);
|
||||
builder.alleles(vcAlleles);
|
||||
builder.referenceBaseForIndel(referenceContext.getBase());
|
||||
builder.noGenotypes();
|
||||
|
||||
final VariantContext vc = builder.make();
|
||||
|
||||
Map<String,AlignmentContext> contexts = new HashMap<String,AlignmentContext>();
|
||||
|
||||
for (String sample: sampleNames) {
|
||||
AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc,vc, altBases, numReadsPerAllele, sample, addBaseErrors, phredScaledBaseErrorRate));
|
||||
AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc, refAllele, altAllele, altBases, numReadsPerAllele, sample, addBaseErrors, phredScaledBaseErrorRate));
|
||||
contexts.put(sample,context);
|
||||
|
||||
}
|
||||
|
|
@ -149,73 +134,71 @@ public class ArtificialReadPileupTestProvider {
|
|||
rg.setSample(name);
|
||||
return rg;
|
||||
}
|
||||
private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, VariantContext vc, String altBases,
|
||||
|
||||
private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, String refAllele, String altAllele, String altBases,
|
||||
int[] numReadsPerAllele, String sample, boolean addErrors, int phredScaledErrorRate) {
|
||||
List<PileupElement> pileupElements = new ArrayList<PileupElement>();
|
||||
int readStart = contigStart;
|
||||
int offset = (contigStop-contigStart+1)/2;
|
||||
int refAlleleLength = 0;
|
||||
int readCounter = 0;
|
||||
int alleleCounter = 0;
|
||||
for (Allele allele: vc.getAlleles()) {
|
||||
if (allele.isReference())
|
||||
refAlleleLength = allele.getBases().length;
|
||||
|
||||
int alleleLength = allele.getBases().length;
|
||||
|
||||
for ( int d = 0; d < numReadsPerAllele[alleleCounter]; d++ ) {
|
||||
byte[] readBases = trueHaplotype(allele, offset, refAlleleLength);
|
||||
if (addErrors)
|
||||
addBaseErrors(readBases, phredScaledErrorRate);
|
||||
|
||||
byte[] readQuals = new byte[readBases.length];
|
||||
Arrays.fill(readQuals, (byte)phredScaledErrorRate);
|
||||
|
||||
GATKSAMRecord read = new GATKSAMRecord(header);
|
||||
read.setBaseQualities(readQuals);
|
||||
read.setReadBases(readBases);
|
||||
read.setReadName(artificialReadName+readCounter++);
|
||||
|
||||
boolean isBeforeDeletion = false, isBeforeInsertion = false;
|
||||
if (allele.isReference())
|
||||
read.setCigarString(readBases.length + "M");
|
||||
else {
|
||||
isBeforeDeletion = alleleLength<refAlleleLength;
|
||||
isBeforeInsertion = alleleLength>refAlleleLength;
|
||||
if (isBeforeDeletion || isBeforeInsertion)
|
||||
read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") +
|
||||
(readBases.length-offset)+"M");
|
||||
else // SNP case
|
||||
read.setCigarString(readBases.length+"M");
|
||||
}
|
||||
|
||||
int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0));
|
||||
read.setReadPairedFlag(false);
|
||||
read.setAlignmentStart(readStart);
|
||||
read.setMappingQuality(artificialMappingQuality);
|
||||
read.setReferenceName(loc.getContig());
|
||||
read.setReadNegativeStrandFlag(false);
|
||||
read.setAttribute("RG", sampleRG(sample).getReadGroupId());
|
||||
|
||||
|
||||
pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength));
|
||||
}
|
||||
alleleCounter++;
|
||||
}
|
||||
int refAlleleLength = refAllele.length();
|
||||
|
||||
pileupElements.addAll(createPileupElements(refAllele, loc, numReadsPerAllele[0], sample, contigStart, offset, altBases, addErrors, phredScaledErrorRate, refAlleleLength, true));
|
||||
pileupElements.addAll(createPileupElements(altAllele, loc, numReadsPerAllele[1], sample, contigStart, offset, altBases, addErrors, phredScaledErrorRate, refAlleleLength, false));
|
||||
return new ReadBackedPileupImpl(loc,pileupElements);
|
||||
}
|
||||
|
||||
private byte[] trueHaplotype(Allele allele, int offset, int refAlleleLength) {
|
||||
private List<PileupElement> createPileupElements(String allele, GenomeLoc loc, int numReadsPerAllele, String sample, int readStart, int offset, String altBases, boolean addErrors, int phredScaledErrorRate, int refAlleleLength, boolean isReference) {
|
||||
|
||||
int alleleLength = allele.length();
|
||||
List<PileupElement> pileupElements = new ArrayList<PileupElement>();
|
||||
|
||||
int readCounter = 0;
|
||||
for ( int d = 0; d < numReadsPerAllele; d++ ) {
|
||||
byte[] readBases = trueHaplotype(allele, offset, refAlleleLength);
|
||||
if (addErrors)
|
||||
addBaseErrors(readBases, phredScaledErrorRate);
|
||||
|
||||
byte[] readQuals = new byte[readBases.length];
|
||||
Arrays.fill(readQuals, (byte)phredScaledErrorRate);
|
||||
|
||||
GATKSAMRecord read = new GATKSAMRecord(header);
|
||||
read.setBaseQualities(readQuals);
|
||||
read.setReadBases(readBases);
|
||||
read.setReadName(artificialReadName+readCounter++);
|
||||
|
||||
boolean isBeforeDeletion = false, isBeforeInsertion = false;
|
||||
if (isReference)
|
||||
read.setCigarString(readBases.length + "M");
|
||||
else {
|
||||
isBeforeDeletion = alleleLength<refAlleleLength;
|
||||
isBeforeInsertion = alleleLength>refAlleleLength;
|
||||
if (isBeforeDeletion || isBeforeInsertion)
|
||||
read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") +
|
||||
(readBases.length-offset)+"M");
|
||||
else // SNP case
|
||||
read.setCigarString(readBases.length+"M");
|
||||
}
|
||||
|
||||
int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0));
|
||||
read.setReadPairedFlag(false);
|
||||
read.setAlignmentStart(readStart);
|
||||
read.setMappingQuality(artificialMappingQuality);
|
||||
read.setReferenceName(loc.getContig());
|
||||
read.setReadNegativeStrandFlag(false);
|
||||
read.setAttribute("RG", sampleRG(sample).getReadGroupId());
|
||||
|
||||
|
||||
pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength));
|
||||
}
|
||||
|
||||
return pileupElements;
|
||||
}
|
||||
|
||||
private byte[] trueHaplotype(String allele, int offset, int refAlleleLength) {
|
||||
// create haplotype based on a particular allele
|
||||
String prefix = refBases.substring(offset);
|
||||
String alleleBases = new String(allele.getBases());
|
||||
String prefix = refBases.substring(0, offset);
|
||||
String postfix = refBases.substring(offset+refAlleleLength,refBases.length());
|
||||
|
||||
return (prefix+alleleBases+postfix).getBytes();
|
||||
|
||||
|
||||
|
||||
return (prefix+allele+postfix).getBytes();
|
||||
}
|
||||
|
||||
private void addBaseErrors(final byte[] readBases, final int phredScaledErrorRate) {
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
|
|||
List<Allele> alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
|
||||
|
||||
Assert.assertEquals(alleles.size(),2);
|
||||
Assert.assertEquals(alleles.get(1).getBaseString(), altBases.substring(0,eventLength));
|
||||
Assert.assertEquals(alleles.get(1).getBaseString().substring(1), altBases.substring(0,eventLength));
|
||||
|
||||
|
||||
|
||||
|
|
@ -79,7 +79,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
|
|||
eventLength = 3;
|
||||
alleles = getConsensusAlleles(eventLength,false,10,0.1, altBases);
|
||||
Assert.assertEquals(alleles.size(),2);
|
||||
Assert.assertEquals(alleles.get(0).getBaseString(), refBases.substring(pileupProvider.offset,pileupProvider.offset+eventLength));
|
||||
Assert.assertEquals(alleles.get(0).getBaseString().substring(1), refBases.substring(pileupProvider.offset,pileupProvider.offset+eventLength));
|
||||
|
||||
// same with min Reads = 11
|
||||
alleles = getConsensusAlleles(eventLength,false,11,0.1, altBases);
|
||||
|
|
@ -97,7 +97,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
|
|||
alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
|
||||
|
||||
Assert.assertEquals(alleles.size(),2);
|
||||
Assert.assertEquals(alleles.get(1).getBaseString(), altBases.substring(0,eventLength));
|
||||
Assert.assertEquals(alleles.get(1).getBaseString().substring(1), altBases.substring(0,eventLength));
|
||||
|
||||
altBases = "CCTCNTGAGA";
|
||||
eventLength = 5;
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest {
|
|||
testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF;
|
||||
testArgs += " --virtualPrimerSize 30";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
|
||||
Arrays.asList("27f9450afa132888a8994167f0035fd7"));
|
||||
Arrays.asList("240d99b58f73985fb114abe9044c0271"));
|
||||
executeTest("Test probes", spec);
|
||||
}
|
||||
|
||||
|
|
@ -36,7 +36,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest {
|
|||
testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF;
|
||||
testArgs += " --virtualPrimerSize 30 --doNotUseBWA";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
|
||||
Arrays.asList("f2611ff1d9cd5bedaad003251fed8bc1"));
|
||||
Arrays.asList("6e7789445e29d91979a21e78d3d53295"));
|
||||
executeTest("Test probes", spec);
|
||||
}
|
||||
|
||||
|
|
@ -49,7 +49,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest {
|
|||
testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF;
|
||||
testArgs += " --virtualPrimerSize 30 --filterMonomorphic";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
|
||||
Arrays.asList("77b3f30e38fedad812125bdf6cf3255f"));
|
||||
Arrays.asList("18d7236208db603e143b40db06ef2aca"));
|
||||
executeTest("Test probes", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -98,16 +98,16 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ac58a5fde17661e2a19004ca954d9781", " -setKey null"); }
|
||||
@Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "67a8076e30b4bca0ea5acdc9cd26a4e0"); } // official project VCF files in tabix format
|
||||
|
||||
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "ef2d249ea4b25311966e038aac05c661"); }
|
||||
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "cdb448aaa92ca5a9e393d875b42581b3"); }
|
||||
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "909c6dc74eeb5ab86f8e74073eb0c1d6"); }
|
||||
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "f0c2cb3e3a6160e1ed0ee2fd9b120f55"); }
|
||||
|
||||
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "f0ce3fb83d4ad9ba402d7cb11cd000c3"); }
|
||||
|
||||
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "4efdf983918db822e4ac13d911509576"); } // official project VCF files in tabix format
|
||||
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "848d4408ee953053d2307cefebc6bd6d"); } // official project VCF files in tabix format
|
||||
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "91f6087e6e2bf3df4d1c9700eaff958b"); }
|
||||
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "4159a0c0d7c15852a3a545e0bea6bbc5"); }
|
||||
|
||||
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "a9be239ab5e03e7e97caef58a3841dd2"); }
|
||||
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "61d0ded244895234ac727391f29f13a8"); }
|
||||
|
||||
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "0b1815c699e71e143ed129bfadaffbcb"); }
|
||||
|
||||
|
|
|
|||
|
|
@ -125,4 +125,14 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
|
|||
executeTest("test bad ref allele in deletion", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComplexEvents() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("complexEvents.vcf", "ALL"),
|
||||
0,
|
||||
Arrays.asList("d41d8cd98f00b204e9800998ecf8427e")
|
||||
);
|
||||
|
||||
executeTest("test validating complex events", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,11 +53,11 @@ public class HaplotypeUnitTest extends BaseTest {
|
|||
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
|
||||
final Cigar h1Cigar = new Cigar(h1CigarList);
|
||||
String h1bases = "AACTTCTGGTCAACTGGTCAACTGGTCAACTGGTCA";
|
||||
basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases);
|
||||
basicInsertTest("A", "AACTT", 1, h1Cigar, bases, h1bases);
|
||||
h1bases = "ACTGGTCACTTAACTGGTCAACTGGTCAACTGGTCA";
|
||||
basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases);
|
||||
basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases);
|
||||
h1bases = "ACTGGTCAACTGGTCAAACTTCTGGTCAACTGGTCA";
|
||||
basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases);
|
||||
basicInsertTest("A", "AACTT", 17, h1Cigar, bases, h1bases);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -68,11 +68,11 @@ public class HaplotypeUnitTest extends BaseTest {
|
|||
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
|
||||
final Cigar h1Cigar = new Cigar(h1CigarList);
|
||||
String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA";
|
||||
basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases);
|
||||
basicInsertTest("AACTT", "A", 1, h1Cigar, bases, h1bases);
|
||||
h1bases = "ACTGGTCGGTCAACTGGTCAACTGGTCA";
|
||||
basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases);
|
||||
basicInsertTest("AACTT", "A", 7, h1Cigar, bases, h1bases);
|
||||
h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA";
|
||||
basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases);
|
||||
basicInsertTest("AACTT", "A", 17, h1Cigar, bases, h1bases);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -102,11 +102,11 @@ public class HaplotypeUnitTest extends BaseTest {
|
|||
h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M));
|
||||
final Cigar h1Cigar = new Cigar(h1CigarList);
|
||||
String h1bases = "AACTTTCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
|
||||
basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases);
|
||||
basicInsertTest("A", "AACTT", 1, h1Cigar, bases, h1bases);
|
||||
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCACTTGATCG" + "AGGGGGA" + "AGGC";
|
||||
basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases);
|
||||
basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases);
|
||||
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGACTTGGGGA" + "AGGC";
|
||||
basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases);
|
||||
basicInsertTest("A", "AACTT", 17, h1Cigar, bases, h1bases);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -121,11 +121,11 @@ public class HaplotypeUnitTest extends BaseTest {
|
|||
h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M));
|
||||
final Cigar h1Cigar = new Cigar(h1CigarList);
|
||||
String h1bases = "A" + "CGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
|
||||
basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases);
|
||||
basicInsertTest("AACTT", "A", 1, h1Cigar, bases, h1bases);
|
||||
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCG" + "AGGGGGA" + "AGGC";
|
||||
basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases);
|
||||
basicInsertTest("AACTT", "A", 7, h1Cigar, bases, h1bases);
|
||||
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGA" + "AGGC";
|
||||
basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases);
|
||||
basicInsertTest("AACTT", "A", 17, h1Cigar, bases, h1bases);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -1,226 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
import org.testng.Assert;
|
||||
import org.testng.SkipException;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class VCFAlleleClipperUnitTest extends BaseTest {
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Test allele clipping
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private class ClipAllelesTest extends TestDataProvider {
|
||||
final int position;
|
||||
final int stop;
|
||||
final String ref;
|
||||
List<Allele> inputs;
|
||||
List<Allele> expected;
|
||||
|
||||
@Requires("arg.length % 2 == 0")
|
||||
private ClipAllelesTest(final int position, final int stop, final String ... arg) {
|
||||
super(ClipAllelesTest.class);
|
||||
this.position = position;
|
||||
this.stop = stop;
|
||||
this.ref = arg[0];
|
||||
|
||||
int n = arg.length / 2;
|
||||
inputs = new ArrayList<Allele>(n);
|
||||
expected = new ArrayList<Allele>(n);
|
||||
|
||||
for ( int i = 0; i < n; i++ ) {
|
||||
final boolean ref = i % n == 0;
|
||||
inputs.add(Allele.create(arg[i], ref));
|
||||
}
|
||||
for ( int i = n; i < arg.length; i++ ) {
|
||||
final boolean ref = i % n == 0;
|
||||
expected.add(Allele.create(arg[i], ref));
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isClipped() {
|
||||
for ( int i = 0; i < inputs.size(); i++ ) {
|
||||
if ( inputs.get(i).length() != expected.get(i).length() )
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("ClipAllelesTest input=%s expected=%s", inputs, expected);
|
||||
}
|
||||
}
|
||||
@DataProvider(name = "ClipAllelesTest")
|
||||
public Object[][] makeClipAllelesTest() {
|
||||
// do no harm
|
||||
new ClipAllelesTest(10, 10, "A", "A");
|
||||
new ClipAllelesTest(10, 10, "A", "C", "A", "C");
|
||||
new ClipAllelesTest(10, 10, "A", "C", "G", "A", "C", "G");
|
||||
|
||||
// insertions
|
||||
new ClipAllelesTest(10, 10, "A", "AA", "-", "A");
|
||||
new ClipAllelesTest(10, 10, "A", "AAA", "-", "AA");
|
||||
new ClipAllelesTest(10, 10, "A", "AG", "-", "G");
|
||||
|
||||
// deletions
|
||||
new ClipAllelesTest(10, 11, "AA", "A", "A", "-");
|
||||
new ClipAllelesTest(10, 12, "AAA", "A", "AA", "-");
|
||||
new ClipAllelesTest(10, 11, "AG", "A", "G", "-");
|
||||
new ClipAllelesTest(10, 12, "AGG", "A", "GG", "-");
|
||||
|
||||
// multi-allelic insertion and deletions
|
||||
new ClipAllelesTest(10, 11, "AA", "A", "AAA", "A", "-", "AA");
|
||||
new ClipAllelesTest(10, 11, "AA", "A", "AAG", "A", "-", "AG");
|
||||
new ClipAllelesTest(10, 10, "A", "AA", "AAA", "-", "A", "AA");
|
||||
new ClipAllelesTest(10, 10, "A", "AA", "ACA", "-", "A", "CA");
|
||||
new ClipAllelesTest(10, 12, "ACG", "ATC", "AGG", "CG", "TC", "GG");
|
||||
new ClipAllelesTest(10, 11, "AC", "AT", "AG", "C", "T", "G");
|
||||
|
||||
// cannot be clipped
|
||||
new ClipAllelesTest(10, 11, "AC", "CT", "AG", "AC", "CT", "AG");
|
||||
new ClipAllelesTest(10, 11, "AC", "CT", "GG", "AC", "CT", "GG");
|
||||
|
||||
// symbolic
|
||||
new ClipAllelesTest(10, 100, "A", "<DEL>", "A", "<DEL>");
|
||||
new ClipAllelesTest(50, 50, "G", "G]22:60]", "G", "G]22:60]");
|
||||
new ClipAllelesTest(51, 51, "T", "]22:55]T", "T", "]22:55]T");
|
||||
new ClipAllelesTest(52, 52, "C", "C[22:51[", "C", "C[22:51[");
|
||||
new ClipAllelesTest(60, 60, "A", "A]22:50]", "A", "A]22:50]");
|
||||
|
||||
// symbolic with alleles that should be clipped
|
||||
new ClipAllelesTest(10, 100, "A", "<DEL>", "AA", "-", "<DEL>", "A");
|
||||
new ClipAllelesTest(10, 100, "AA", "<DEL>", "A", "A", "<DEL>", "-");
|
||||
new ClipAllelesTest(10, 100, "AA", "<DEL>", "A", "AAA", "A", "<DEL>", "-", "AA");
|
||||
new ClipAllelesTest(10, 100, "AG", "<DEL>", "A", "AGA", "G", "<DEL>", "-", "GA");
|
||||
new ClipAllelesTest(10, 100, "G", "<DEL>", "A", "G", "<DEL>", "A");
|
||||
|
||||
// clipping from both ends
|
||||
//
|
||||
// TODO -- THIS CODE IS BROKEN BECAUSE CLIPPING DOES WORK WITH ALLELES CLIPPED FROM THE END
|
||||
//
|
||||
// new ClipAllelesTest(10, 10, "ATA", "ATTA", "-", "T");
|
||||
// new ClipAllelesTest(10, 10, "ATAA", "ATTAA", "-", "T");
|
||||
// new ClipAllelesTest(10, 10, "ATAAG", "ATTAAG", "-", "T");
|
||||
// new ClipAllelesTest(10, 11, "GTA", "ATTA", "G", "AT");
|
||||
// new ClipAllelesTest(10, 11, "GTAA", "ATTAA", "G", "AT");
|
||||
// new ClipAllelesTest(10, 11, "GTAAG", "ATTAAG", "G", "AT");
|
||||
|
||||
// complex substitutions
|
||||
new ClipAllelesTest(10, 10, "A", "GA", "A", "GA");
|
||||
|
||||
return ClipAllelesTest.getTests(ClipAllelesTest.class);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "ClipAllelesTest")
|
||||
public void testClipAllelesTest(ClipAllelesTest cfg) {
|
||||
final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(cfg.position, cfg.ref, cfg.inputs, cfg.stop);
|
||||
Assert.assertNull(clipped.getError(), "Unexpected error occurred");
|
||||
Assert.assertEquals(clipped.getStop(), cfg.stop, "Clipped alleles stop");
|
||||
Assert.assertEquals(clipped.getClippedAlleles(), cfg.expected, "Clipped alleles");
|
||||
}
|
||||
|
||||
@Test(dataProvider = "ClipAllelesTest", dependsOnMethods = "testClipAllelesTest")
|
||||
public void testPaddingAllelesInVC(final ClipAllelesTest cfg) {
|
||||
final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(cfg.position, cfg.ref, cfg.inputs, cfg.stop);
|
||||
final VariantContext vc = new VariantContextBuilder("x", "1", cfg.position, cfg.stop, clipped.getClippedAlleles())
|
||||
.referenceBaseForIndel(clipped.getRefBaseForIndel()).make();
|
||||
|
||||
if ( vc.isMixed() && vc.hasSymbolicAlleles() )
|
||||
throw new SkipException("GATK cannot handle mixed variant contexts with symbolic and concrete alleles. Remove this check when allele clipping and padding is generalized");
|
||||
|
||||
Assert.assertEquals(VCFAlleleClipper.needsPadding(vc), cfg.isClipped(), "needPadding method");
|
||||
|
||||
if ( cfg.isClipped() ) {
|
||||
// TODO
|
||||
// TODO note that the GATK currently uses a broken approach to the clipped alleles, so the expected stop is
|
||||
// TODO actually the original stop, as the original stop is +1 its true size.
|
||||
// TODO
|
||||
final int expectedStop = vc.getEnd(); // + (vc.hasSymbolicAlleles() ? 0 : 1);
|
||||
|
||||
final VariantContext padded = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc);
|
||||
Assert.assertEquals(padded.getStart(), vc.getStart(), "padded VC start");
|
||||
Assert.assertEquals(padded.getAlleles(), cfg.inputs, "padded VC alleles == original unclipped alleles");
|
||||
Assert.assertEquals(padded.getEnd(), expectedStop, "padded VC end should be clipped VC + 1 (added a base to ref allele)");
|
||||
Assert.assertFalse(VCFAlleleClipper.needsPadding(padded), "padded VC shouldn't need padding again");
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// basic allele clipping test
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private class ReverseClippingPositionTestProvider extends TestDataProvider {
|
||||
final String ref;
|
||||
final List<Allele> alleles = new ArrayList<Allele>();
|
||||
final int expectedClip;
|
||||
|
||||
private ReverseClippingPositionTestProvider(final int expectedClip, final String ref, final String... alleles) {
|
||||
super(ReverseClippingPositionTestProvider.class);
|
||||
this.ref = ref;
|
||||
for ( final String allele : alleles )
|
||||
this.alleles.add(Allele.create(allele));
|
||||
this.expectedClip = expectedClip;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "ReverseClippingPositionTestProvider")
|
||||
public Object[][] makeReverseClippingPositionTestProvider() {
|
||||
// pair clipping
|
||||
new ReverseClippingPositionTestProvider(0, "ATT", "CCG");
|
||||
new ReverseClippingPositionTestProvider(1, "ATT", "CCT");
|
||||
new ReverseClippingPositionTestProvider(2, "ATT", "CTT");
|
||||
new ReverseClippingPositionTestProvider(2, "ATT", "ATT"); // cannot completely clip allele
|
||||
|
||||
// triplets
|
||||
new ReverseClippingPositionTestProvider(0, "ATT", "CTT", "CGG");
|
||||
new ReverseClippingPositionTestProvider(1, "ATT", "CTT", "CGT"); // the T can go
|
||||
new ReverseClippingPositionTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go
|
||||
|
||||
return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class);
|
||||
}
|
||||
|
||||
|
||||
@Test(dataProvider = "ReverseClippingPositionTestProvider")
|
||||
public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) {
|
||||
int result = VCFAlleleClipper.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
|
||||
Assert.assertEquals(result, cfg.expectedClip);
|
||||
}
|
||||
}
|
||||
|
|
@ -39,6 +39,17 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
executeTest("Test reading and writing breakpoint VCF", spec1);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testReadingLowerCaseBases() {
|
||||
String testVCF = privateTestDir + "lowercaseBases.vcf";
|
||||
|
||||
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
|
||||
|
||||
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("e0e308a25e56bde1c664139bb44ed19d"));
|
||||
executeTest("Test reading VCF with lower-case bases", spec1);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testReadingAndWriting1000GSVs() {
|
||||
String testVCF = privateTestDir + "1000G_SVs.chr1.vcf";
|
||||
|
|
@ -57,7 +68,7 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
|
||||
|
||||
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("0f82ac11852e7f958c1a0ce52398c2ae"));
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("38697c195e7abf18d95dcc16c8e6d284"));
|
||||
executeTest("Test reading and writing samtools vcf", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -66,7 +77,7 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
String testVCF = privateTestDir + "ex2.vcf";
|
||||
String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";
|
||||
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("9773d6a121cfcb18d090965bc520f120"));
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("a04a0fc22fedb516c663e56e51fc1e27"));
|
||||
executeTest("Test writing samtools WEx BCF example", spec1);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -37,8 +37,6 @@ import org.testng.annotations.Test;
|
|||
// public Allele(byte[] bases, boolean isRef) {
|
||||
// public Allele(boolean isRef) {
|
||||
// public Allele(String bases, boolean isRef) {
|
||||
// public boolean isNullAllele() { return length() == 0; }
|
||||
// public boolean isNonNullAllele() { return ! isNullAllele(); }
|
||||
// public boolean isReference() { return isRef; }
|
||||
// public boolean isNonReference() { return ! isReference(); }
|
||||
// public byte[] getBases() { return bases; }
|
||||
|
|
@ -49,13 +47,10 @@ import org.testng.annotations.Test;
|
|||
* Basic unit test for RecalData
|
||||
*/
|
||||
public class AlleleUnitTest {
|
||||
Allele ARef, del, delRef, A, T, ATIns, ATCIns, NoCall;
|
||||
Allele ARef, A, T, ATIns, ATCIns, NoCall;
|
||||
|
||||
@BeforeSuite
|
||||
public void before() {
|
||||
del = Allele.create("-");
|
||||
delRef = Allele.create("-", true);
|
||||
|
||||
A = Allele.create("A");
|
||||
ARef = Allele.create("A", true);
|
||||
T = Allele.create("T");
|
||||
|
|
@ -72,8 +67,6 @@ public class AlleleUnitTest {
|
|||
Assert.assertFalse(A.isReference());
|
||||
Assert.assertTrue(A.basesMatch("A"));
|
||||
Assert.assertEquals(A.length(), 1);
|
||||
Assert.assertTrue(A.isNonNull());
|
||||
Assert.assertFalse(A.isNull());
|
||||
|
||||
Assert.assertTrue(ARef.isReference());
|
||||
Assert.assertFalse(ARef.isNonReference());
|
||||
|
|
@ -92,8 +85,8 @@ public class AlleleUnitTest {
|
|||
Assert.assertFalse(NoCall.isReference());
|
||||
Assert.assertFalse(NoCall.basesMatch("."));
|
||||
Assert.assertEquals(NoCall.length(), 0);
|
||||
Assert.assertTrue(NoCall.isNonNull());
|
||||
Assert.assertFalse(NoCall.isNull());
|
||||
Assert.assertTrue(NoCall.isNoCall());
|
||||
Assert.assertFalse(NoCall.isCalled());
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -103,16 +96,6 @@ public class AlleleUnitTest {
|
|||
Assert.assertEquals(ATCIns.length(), 3);
|
||||
Assert.assertEquals(ATIns.getBases(), "AT".getBytes());
|
||||
Assert.assertEquals(ATCIns.getBases(), "ATC".getBytes());
|
||||
|
||||
Assert.assertTrue(del.isNonReference());
|
||||
Assert.assertFalse(delRef.isNonReference());
|
||||
Assert.assertFalse(del.isReference());
|
||||
Assert.assertTrue(delRef.isReference());
|
||||
Assert.assertFalse(del.basesMatch("-"));
|
||||
Assert.assertTrue(del.basesMatch(""));
|
||||
Assert.assertEquals(del.length(), 0);
|
||||
Assert.assertFalse(del.isNonNull());
|
||||
Assert.assertTrue(del.isNull());
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -128,18 +111,6 @@ public class AlleleUnitTest {
|
|||
Assert.assertFalse(a1.equals(a4));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDelConstructors() {
|
||||
Allele a1 = Allele.create("-");
|
||||
Allele a2 = Allele.create("-".getBytes());
|
||||
Allele a3 = Allele.create("");
|
||||
Allele a4 = Allele.create("", true);
|
||||
|
||||
Assert.assertTrue(a1.equals(a2));
|
||||
Assert.assertTrue(a1.equals(a3));
|
||||
Assert.assertFalse(a1.equals(a4));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInsConstructors() {
|
||||
Allele a1 = Allele.create("AC");
|
||||
|
|
@ -156,7 +127,6 @@ public class AlleleUnitTest {
|
|||
public void testEquals() {
|
||||
Assert.assertTrue(ARef.basesMatch(A));
|
||||
Assert.assertFalse(ARef.equals(A));
|
||||
Assert.assertFalse(ARef.equals(del));
|
||||
Assert.assertFalse(ARef.equals(ATIns));
|
||||
Assert.assertFalse(ARef.equals(ATCIns));
|
||||
|
||||
|
|
@ -164,11 +134,6 @@ public class AlleleUnitTest {
|
|||
Assert.assertFalse(T.basesMatch(A));
|
||||
Assert.assertFalse(T.equals(A));
|
||||
|
||||
Assert.assertTrue(del.basesMatch(del));
|
||||
Assert.assertTrue(del.basesMatch(delRef));
|
||||
Assert.assertTrue(del.equals(del));
|
||||
Assert.assertFalse(del.equals(delRef));
|
||||
|
||||
Assert.assertTrue(ATIns.equals(ATIns));
|
||||
Assert.assertFalse(ATIns.equals(ATCIns));
|
||||
Assert.assertTrue(ATIns.basesMatch("AT"));
|
||||
|
|
@ -209,7 +174,6 @@ public class AlleleUnitTest {
|
|||
public void testExtend() {
|
||||
Assert.assertEquals("AT", Allele.extend(Allele.create("A"), "T".getBytes()).toString());
|
||||
Assert.assertEquals("ATA", Allele.extend(Allele.create("A"), "TA".getBytes()).toString());
|
||||
Assert.assertEquals("A", Allele.extend(Allele.create("-"), "A".getBytes()).toString());
|
||||
Assert.assertEquals("A", Allele.extend(Allele.NO_CALL, "A".getBytes()).toString());
|
||||
Assert.assertEquals("ATCGA", Allele.extend(Allele.create("AT"), "CGA".getBytes()).toString());
|
||||
Assert.assertEquals("ATCGA", Allele.extend(Allele.create("ATC"), "GA".getBytes()).toString());
|
||||
|
|
|
|||
|
|
@ -225,10 +225,10 @@ public class VariantContextTestProvider {
|
|||
add(builder());
|
||||
add(builder().alleles("A"));
|
||||
add(builder().alleles("A", "C", "T"));
|
||||
add(builder().alleles("-", "C").referenceBaseForIndel("A"));
|
||||
add(builder().alleles("-", "CAGT").referenceBaseForIndel("A"));
|
||||
add(builder().loc("1", 10, 11).alleles("C", "-").referenceBaseForIndel("A"));
|
||||
add(builder().loc("1", 10, 13).alleles("CGT", "-").referenceBaseForIndel("A"));
|
||||
add(builder().alleles("A", "AC"));
|
||||
add(builder().alleles("A", "ACAGT"));
|
||||
add(builder().loc("1", 10, 11).alleles("AC", "A"));
|
||||
add(builder().loc("1", 10, 13).alleles("ACGT", "A"));
|
||||
|
||||
// make sure filters work
|
||||
add(builder().unfiltered());
|
||||
|
|
@ -302,8 +302,8 @@ public class VariantContextTestProvider {
|
|||
|
||||
sites.add(builder().alleles("A").make());
|
||||
sites.add(builder().alleles("A", "C", "T").make());
|
||||
sites.add(builder().alleles("-", "C").referenceBaseForIndel("A").make());
|
||||
sites.add(builder().alleles("-", "CAGT").referenceBaseForIndel("A").make());
|
||||
sites.add(builder().alleles("A", "AC").make());
|
||||
sites.add(builder().alleles("A", "ACAGT").make());
|
||||
|
||||
for ( VariantContext site : sites ) {
|
||||
addGenotypes(site);
|
||||
|
|
|
|||
|
|
@ -28,27 +28,22 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
int snpLocStart = 10;
|
||||
int snpLocStop = 10;
|
||||
|
||||
// - / ATC [ref] from 20-23
|
||||
// - / ATC [ref] from 20-22
|
||||
String delLoc = "chr1";
|
||||
int delLocStart = 20;
|
||||
int delLocStop = 23;
|
||||
int delLocStop = 22;
|
||||
|
||||
// - [ref] / ATC from 20-20
|
||||
String insLoc = "chr1";
|
||||
int insLocStart = 20;
|
||||
int insLocStop = 20;
|
||||
|
||||
// - / A / T / ATC [ref] from 20-23
|
||||
String mixedLoc = "chr1";
|
||||
int mixedLocStart = 20;
|
||||
int mixedLocStop = 23;
|
||||
|
||||
VariantContextBuilder basicBuilder, snpBuilder, insBuilder;
|
||||
|
||||
@BeforeSuite
|
||||
public void before() {
|
||||
del = Allele.create("-");
|
||||
delRef = Allele.create("-", true);
|
||||
del = Allele.create("A");
|
||||
delRef = Allele.create("A", true);
|
||||
|
||||
A = Allele.create("A");
|
||||
C = Allele.create("C");
|
||||
|
|
@ -62,9 +57,9 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
|
||||
@BeforeMethod
|
||||
public void beforeTest() {
|
||||
basicBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)).referenceBaseForIndel((byte)'A');
|
||||
snpBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)).referenceBaseForIndel((byte)'A');
|
||||
insBuilder = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATC)).referenceBaseForIndel((byte)'A');
|
||||
basicBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T));
|
||||
snpBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T));
|
||||
insBuilder = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATC));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -213,7 +208,7 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testCreatingDeletionVariantContext() {
|
||||
List<Allele> alleles = Arrays.asList(ATCref, del);
|
||||
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).referenceBaseForIndel((byte)'A').make();
|
||||
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make();
|
||||
|
||||
Assert.assertEquals(vc.getChr(), delLoc);
|
||||
Assert.assertEquals(vc.getStart(), delLocStart);
|
||||
|
|
@ -240,8 +235,8 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testMatchingAlleles() {
|
||||
List<Allele> alleles = Arrays.asList(ATCref, del);
|
||||
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).referenceBaseForIndel((byte)'A').make();
|
||||
VariantContext vc2 = new VariantContextBuilder("test2", delLoc, delLocStart+12, delLocStop+12, alleles).referenceBaseForIndel((byte)'A').make();
|
||||
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make();
|
||||
VariantContext vc2 = new VariantContextBuilder("test2", delLoc, delLocStart+12, delLocStop+12, alleles).make();
|
||||
|
||||
Assert.assertTrue(vc.hasSameAllelesAs(vc2));
|
||||
Assert.assertTrue(vc.hasSameAlternateAllelesAs(vc2));
|
||||
|
|
@ -386,13 +381,13 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testAccessingCompleteGenotypes() {
|
||||
List<Allele> alleles = Arrays.asList(Aref, T, del);
|
||||
List<Allele> alleles = Arrays.asList(Aref, T, ATC);
|
||||
|
||||
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
|
||||
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
|
||||
Genotype g4 = GenotypeBuilder.create("Td", Arrays.asList(T, del));
|
||||
Genotype g5 = GenotypeBuilder.create("dd", Arrays.asList(del, del));
|
||||
Genotype g4 = GenotypeBuilder.create("Td", Arrays.asList(T, ATC));
|
||||
Genotype g5 = GenotypeBuilder.create("dd", Arrays.asList(ATC, ATC));
|
||||
Genotype g6 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
|
||||
|
||||
VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles)
|
||||
|
|
@ -408,7 +403,7 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
Assert.assertEquals(10, vc.getCalledChrCount());
|
||||
Assert.assertEquals(3, vc.getCalledChrCount(Aref));
|
||||
Assert.assertEquals(4, vc.getCalledChrCount(T));
|
||||
Assert.assertEquals(3, vc.getCalledChrCount(del));
|
||||
Assert.assertEquals(3, vc.getCalledChrCount(ATC));
|
||||
Assert.assertEquals(2, vc.getCalledChrCount(Allele.NO_CALL));
|
||||
}
|
||||
|
||||
|
|
@ -416,7 +411,7 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
public void testAccessingRefGenotypes() {
|
||||
List<Allele> alleles1 = Arrays.asList(Aref, T);
|
||||
List<Allele> alleles2 = Arrays.asList(Aref);
|
||||
List<Allele> alleles3 = Arrays.asList(Aref, T, del);
|
||||
List<Allele> alleles3 = Arrays.asList(Aref, T);
|
||||
for ( List<Allele> alleles : Arrays.asList(alleles1, alleles2, alleles3)) {
|
||||
Genotype g1 = GenotypeBuilder.create("AA1", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = GenotypeBuilder.create("AA2", Arrays.asList(Aref, Aref));
|
||||
|
|
@ -438,7 +433,7 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testFilters() {
|
||||
List<Allele> alleles = Arrays.asList(Aref, T, del);
|
||||
List<Allele> alleles = Arrays.asList(Aref, T);
|
||||
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
|
||||
|
||||
|
|
@ -470,15 +465,15 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testRepeatAllele() {
|
||||
Allele nullR = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||
Allele nullA = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
||||
Allele atc = Allele.create("ATC", false);
|
||||
Allele atcatc = Allele.create("ATCATC", false);
|
||||
Allele ccccR = Allele.create("CCCC", true);
|
||||
Allele cc = Allele.create("CC", false);
|
||||
Allele cccccc = Allele.create("CCCCCC", false);
|
||||
Allele gagaR = Allele.create("GAGA", true);
|
||||
Allele gagagaga = Allele.create("GAGAGAGA", false);
|
||||
Allele nullR = Allele.create("A", true);
|
||||
Allele nullA = Allele.create("A", false);
|
||||
Allele atc = Allele.create("AATC", false);
|
||||
Allele atcatc = Allele.create("AATCATC", false);
|
||||
Allele ccccR = Allele.create("ACCCC", true);
|
||||
Allele cc = Allele.create("ACC", false);
|
||||
Allele cccccc = Allele.create("ACCCCCC", false);
|
||||
Allele gagaR = Allele.create("AGAGA", true);
|
||||
Allele gagagaga = Allele.create("AGAGAGAGA", false);
|
||||
|
||||
Pair<List<Integer>,byte[]> result;
|
||||
byte[] refBytes = "TATCATCATCGGA".getBytes();
|
||||
|
|
@ -497,15 +492,15 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("AATAATA".getBytes()),7);
|
||||
|
||||
|
||||
// -*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4
|
||||
// A*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4
|
||||
VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR,atc)).make();
|
||||
result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes);
|
||||
Assert.assertEquals(result.getFirst().toArray()[0],3);
|
||||
Assert.assertEquals(result.getFirst().toArray()[1],4);
|
||||
Assert.assertEquals(result.getSecond().length,3);
|
||||
|
||||
// ATC*,-,ATCATC
|
||||
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(ATCref,nullA,atcatc)).make();
|
||||
// ATC*,A,ATCATC
|
||||
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+3, Arrays.asList(Allele.create("AATC", true),nullA,atcatc)).make();
|
||||
result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes);
|
||||
Assert.assertEquals(result.getFirst().toArray()[0],3);
|
||||
Assert.assertEquals(result.getFirst().toArray()[1],2);
|
||||
|
|
@ -522,7 +517,7 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
|
||||
// CCCC*,CC,-,CCCCCC, context = CCC: (C)7 -> (C)5,(C)3,(C)9
|
||||
refBytes = "TCCCCCCCAGAGAGAG".getBytes();
|
||||
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(ccccR,cc, nullA,cccccc)).make();
|
||||
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(ccccR,cc, nullA,cccccc)).make();
|
||||
result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes);
|
||||
Assert.assertEquals(result.getFirst().toArray()[0],7);
|
||||
Assert.assertEquals(result.getFirst().toArray()[1],5);
|
||||
|
|
@ -532,7 +527,7 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
|
||||
// GAGA*,-,GAGAGAGA
|
||||
refBytes = "TGAGAGAGAGATTT".getBytes();
|
||||
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(gagaR, nullA,gagagaga)).make();
|
||||
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(gagaR, nullA,gagagaga)).make();
|
||||
result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes);
|
||||
Assert.assertEquals(result.getFirst().toArray()[0],5);
|
||||
Assert.assertEquals(result.getFirst().toArray()[1],3);
|
||||
|
|
@ -564,27 +559,24 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testVCFfromGenotypes() {
|
||||
List<Allele> alleles = Arrays.asList(Aref, T, del);
|
||||
List<Allele> alleles = Arrays.asList(Aref, T);
|
||||
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
|
||||
Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T));
|
||||
Genotype g4 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
|
||||
Genotype g5 = GenotypeBuilder.create("--", Arrays.asList(del, del));
|
||||
VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
|
||||
VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4).make();
|
||||
|
||||
VariantContext vc12 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g2.getSampleName())), true);
|
||||
VariantContext vc1 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName())), true);
|
||||
VariantContext vc23 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g2.getSampleName(), g3.getSampleName())), true);
|
||||
VariantContext vc4 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g4.getSampleName())), true);
|
||||
VariantContext vc14 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g4.getSampleName())), true);
|
||||
VariantContext vc5 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g5.getSampleName())), true);
|
||||
|
||||
Assert.assertTrue(vc12.isPolymorphicInSamples());
|
||||
Assert.assertTrue(vc23.isPolymorphicInSamples());
|
||||
Assert.assertTrue(vc1.isMonomorphicInSamples());
|
||||
Assert.assertTrue(vc4.isMonomorphicInSamples());
|
||||
Assert.assertTrue(vc14.isMonomorphicInSamples());
|
||||
Assert.assertTrue(vc5.isPolymorphicInSamples());
|
||||
|
||||
Assert.assertTrue(vc12.isSNP());
|
||||
Assert.assertTrue(vc12.isVariant());
|
||||
|
|
@ -606,17 +598,11 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
Assert.assertFalse(vc14.isVariant());
|
||||
Assert.assertFalse(vc14.isBiallelic());
|
||||
|
||||
Assert.assertTrue(vc5.isIndel());
|
||||
Assert.assertTrue(vc5.isSimpleDeletion());
|
||||
Assert.assertTrue(vc5.isVariant());
|
||||
Assert.assertTrue(vc5.isBiallelic());
|
||||
|
||||
Assert.assertEquals(3, vc12.getCalledChrCount(Aref));
|
||||
Assert.assertEquals(1, vc23.getCalledChrCount(Aref));
|
||||
Assert.assertEquals(2, vc1.getCalledChrCount(Aref));
|
||||
Assert.assertEquals(0, vc4.getCalledChrCount(Aref));
|
||||
Assert.assertEquals(2, vc14.getCalledChrCount(Aref));
|
||||
Assert.assertEquals(0, vc5.getCalledChrCount(Aref));
|
||||
}
|
||||
|
||||
public void testGetGenotypeMethods() {
|
||||
|
|
@ -664,13 +650,12 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
@DataProvider(name = "getAlleles")
|
||||
public Object[][] mergeAllelesData() {
|
||||
new GetAllelesTest("A*", Aref);
|
||||
new GetAllelesTest("-*", delRef);
|
||||
new GetAllelesTest("A*/C", Aref, C);
|
||||
new GetAllelesTest("A*/C/T", Aref, C, T);
|
||||
new GetAllelesTest("A*/T/C", Aref, T, C);
|
||||
new GetAllelesTest("A*/C/T/-", Aref, C, T, del);
|
||||
new GetAllelesTest("A*/T/C/-", Aref, T, C, del);
|
||||
new GetAllelesTest("A*/-/T/C", Aref, del, T, C);
|
||||
new GetAllelesTest("A*/C/T/ATC", Aref, C, T, ATC);
|
||||
new GetAllelesTest("A*/T/C/ATC", Aref, T, C, ATC);
|
||||
new GetAllelesTest("A*/ATC/T/C", Aref, ATC, T, C);
|
||||
|
||||
return GetAllelesTest.getTests(GetAllelesTest.class);
|
||||
}
|
||||
|
|
@ -678,7 +663,7 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
@Test(dataProvider = "getAlleles")
|
||||
public void testMergeAlleles(GetAllelesTest cfg) {
|
||||
final List<Allele> altAlleles = cfg.alleles.subList(1, cfg.alleles.size());
|
||||
final VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles).referenceBaseForIndel((byte)'A').make();
|
||||
final VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles).make();
|
||||
|
||||
Assert.assertEquals(vc.getAlleles(), cfg.alleles, "VC alleles not the same as input alleles");
|
||||
Assert.assertEquals(vc.getNAlleles(), cfg.alleles.size(), "VC getNAlleles not the same as input alleles size");
|
||||
|
|
@ -845,7 +830,6 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
Assert.assertEquals(sub.getLog10PError(), vc.getLog10PError());
|
||||
Assert.assertEquals(sub.getFilters(), vc.getFilters());
|
||||
Assert.assertEquals(sub.getID(), vc.getID());
|
||||
Assert.assertEquals(sub.getReferenceBaseForIndel(), vc.getReferenceBaseForIndel());
|
||||
Assert.assertEquals(sub.getAttributes(), vc.getAttributes());
|
||||
|
||||
Set<Genotype> expectedGenotypes = new HashSet<Genotype>();
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ import java.io.FileNotFoundException;
|
|||
import java.util.*;
|
||||
|
||||
public class VariantContextUtilsUnitTest extends BaseTest {
|
||||
Allele Aref, T, C, delRef, Cref, ATC, ATCATC;
|
||||
Allele Aref, T, C, Cref, ATC, ATCATC;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
@BeforeSuite
|
||||
|
|
@ -56,7 +56,6 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
|||
// alleles
|
||||
Aref = Allele.create("A", true);
|
||||
Cref = Allele.create("C", true);
|
||||
delRef = Allele.create("-", true);
|
||||
T = Allele.create("T");
|
||||
C = Allele.create("C");
|
||||
ATC = Allele.create("ATC");
|
||||
|
|
@ -99,7 +98,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
|||
private VariantContext makeVC(String source, List<Allele> alleles, Collection<Genotype> genotypes, Set<String> filters) {
|
||||
int start = 10;
|
||||
int stop = start; // alleles.contains(ATC) ? start + 3 : start;
|
||||
return new VariantContextBuilder(source, "1", start, stop, alleles).genotypes(genotypes).filters(filters).referenceBaseForIndel(Cref.getBases()[0]).make();
|
||||
return new VariantContextBuilder(source, "1", start, stop, alleles).genotypes(genotypes).filters(filters).make();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
|
|
@ -156,28 +155,23 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
|||
Arrays.asList(Aref, C),
|
||||
Arrays.asList(Aref, T, C)); // in order of appearence
|
||||
|
||||
// The following is actually a pathological case - there's no way on a vcf to represent a null allele that's non-variant.
|
||||
// The code converts this (correctly) to a single-base non-variant vc with whatever base was there as a reference.
|
||||
new MergeAllelesTest(Arrays.asList(delRef),
|
||||
Arrays.asList(Cref));
|
||||
new MergeAllelesTest(Arrays.asList(Aref),
|
||||
Arrays.asList(Aref, ATC),
|
||||
Arrays.asList(Aref, ATC));
|
||||
|
||||
new MergeAllelesTest(Arrays.asList(delRef),
|
||||
Arrays.asList(delRef, ATC),
|
||||
Arrays.asList(delRef, ATC));
|
||||
|
||||
new MergeAllelesTest(Arrays.asList(delRef),
|
||||
Arrays.asList(delRef, ATC, ATCATC),
|
||||
Arrays.asList(delRef, ATC, ATCATC));
|
||||
new MergeAllelesTest(Arrays.asList(Aref),
|
||||
Arrays.asList(Aref, ATC, ATCATC),
|
||||
Arrays.asList(Aref, ATC, ATCATC));
|
||||
|
||||
// alleles in the order we see them
|
||||
new MergeAllelesTest(Arrays.asList(delRef, ATCATC),
|
||||
Arrays.asList(delRef, ATC, ATCATC),
|
||||
Arrays.asList(delRef, ATCATC, ATC));
|
||||
new MergeAllelesTest(Arrays.asList(Aref, ATCATC),
|
||||
Arrays.asList(Aref, ATC, ATCATC),
|
||||
Arrays.asList(Aref, ATCATC, ATC));
|
||||
|
||||
// same
|
||||
new MergeAllelesTest(Arrays.asList(delRef, ATC),
|
||||
Arrays.asList(delRef, ATCATC),
|
||||
Arrays.asList(delRef, ATC, ATCATC));
|
||||
new MergeAllelesTest(Arrays.asList(Aref, ATC),
|
||||
Arrays.asList(Aref, ATCATC),
|
||||
Arrays.asList(Aref, ATC, ATCATC));
|
||||
|
||||
return MergeAllelesTest.getTests(MergeAllelesTest.class);
|
||||
}
|
||||
|
|
@ -661,4 +655,52 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
|||
// test alleles are equal
|
||||
Assert.assertEquals(VariantContextUtils.isTandemRepeat(cfg.vc, cfg.ref.getBytes()), cfg.isTrueRepeat);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// basic allele clipping test
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private class ReverseClippingPositionTestProvider extends TestDataProvider {
|
||||
final String ref;
|
||||
final List<Allele> alleles = new ArrayList<Allele>();
|
||||
final int expectedClip;
|
||||
|
||||
private ReverseClippingPositionTestProvider(final int expectedClip, final String ref, final String... alleles) {
|
||||
super(ReverseClippingPositionTestProvider.class);
|
||||
this.ref = ref;
|
||||
for ( final String allele : alleles )
|
||||
this.alleles.add(Allele.create(allele));
|
||||
this.expectedClip = expectedClip;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "ReverseClippingPositionTestProvider")
|
||||
public Object[][] makeReverseClippingPositionTestProvider() {
|
||||
// pair clipping
|
||||
new ReverseClippingPositionTestProvider(0, "ATT", "CCG");
|
||||
new ReverseClippingPositionTestProvider(1, "ATT", "CCT");
|
||||
new ReverseClippingPositionTestProvider(2, "ATT", "CTT");
|
||||
new ReverseClippingPositionTestProvider(2, "ATT", "ATT"); // cannot completely clip allele
|
||||
|
||||
// triplets
|
||||
new ReverseClippingPositionTestProvider(0, "ATT", "CTT", "CGG");
|
||||
new ReverseClippingPositionTestProvider(1, "ATT", "CTT", "CGT"); // the T can go
|
||||
new ReverseClippingPositionTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go
|
||||
|
||||
return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class);
|
||||
}
|
||||
|
||||
|
||||
@Test(dataProvider = "ReverseClippingPositionTestProvider")
|
||||
public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) {
|
||||
int result = VariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
|
||||
Assert.assertEquals(result, cfg.expectedClip);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ public class VariantJEXLContextUnitTest extends BaseTest {
|
|||
|
||||
Allele A, Aref, T, Tref;
|
||||
|
||||
Allele del, delRef, ATC, ATCref;
|
||||
Allele ATC, ATCref;
|
||||
// A [ref] / T at 10
|
||||
|
||||
GenomeLoc snpLoc;
|
||||
|
|
@ -84,9 +84,6 @@ public class VariantJEXLContextUnitTest extends BaseTest {
|
|||
|
||||
@BeforeMethod
|
||||
public void before() {
|
||||
del = Allele.create("-");
|
||||
delRef = Allele.create("-", true);
|
||||
|
||||
A = Allele.create("A");
|
||||
Aref = Allele.create("A", true);
|
||||
T = Allele.create("T");
|
||||
|
|
|
|||
|
|
@ -139,8 +139,8 @@ public class VCFWriterUnitTest extends BaseTest {
|
|||
Map<String, Object> attributes = new HashMap<String,Object>();
|
||||
GenotypesContext genotypes = GenotypesContext.create(header.getGenotypeSamples().size());
|
||||
|
||||
alleles.add(Allele.create("-",true));
|
||||
alleles.add(Allele.create("CC",false));
|
||||
alleles.add(Allele.create("A",true));
|
||||
alleles.add(Allele.create("ACC",false));
|
||||
|
||||
attributes.put("DP","50");
|
||||
for (String name : header.getGenotypeSamples()) {
|
||||
|
|
@ -148,7 +148,7 @@ public class VCFWriterUnitTest extends BaseTest {
|
|||
genotypes.add(gt);
|
||||
}
|
||||
return new VariantContextBuilder("RANDOM", loc.getContig(), loc.getStart(), loc.getStop(), alleles)
|
||||
.genotypes(genotypes).attributes(attributes).referenceBaseForIndel((byte)'A').make();
|
||||
.genotypes(genotypes).attributes(attributes).make();
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue