From 29319bf2224aa25f4ff8fe8993dc7666608c9f31 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 19 Feb 2013 17:20:26 -0500 Subject: [PATCH 2/3] Improved allele trimming code in GATKVariantContextUtils -- Now supports trimming the alleles from both the reverse and forward direction. -- Added lots of unit tests for forwrad allele trimming, as well as creating VC from forward and reverse trimming. -- Added docs and tests for the code, to bring it up to GATK spec --- .../variant/GATKVariantContextUtils.java | 161 ++++++++++++++++-- .../GATKVariantContextUtilsUnitTest.java | 101 ++++++++++- 2 files changed, 242 insertions(+), 20 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java index b0f3cc5fe..288ee4ca3 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.utils.variant; +import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.apache.commons.lang.ArrayUtils; import org.apache.log4j.Logger; @@ -509,6 +510,25 @@ public class GATKVariantContextUtils { * @return a list of bi-allelic (or monomorphic) variant context */ public static List splitVariantContextToBiallelics(final VariantContext vc) { + return splitVariantContextToBiallelics(vc, false); + } + + /** + * Split variant context into its biallelic components if there are more than 2 alleles + * + * For VC has A/B/C alleles, returns A/B and A/C contexts. + * Genotypes are all no-calls now (it's not possible to fix them easily) + * Alleles are right trimmed to satisfy VCF conventions + * + * If vc is biallelic or non-variant it is just returned + * + * Chromosome counts are updated (but they are by definition 0) + * + * @param vc a potentially multi-allelic variant context + * @param trimLeft if true, we will also left trim alleles, potentially moving the resulting vcs forward on the genome + * @return a list of bi-allelic (or monomorphic) variant context + */ + public static List splitVariantContextToBiallelics(final VariantContext vc, final boolean trimLeft) { if ( ! vc.isVariant() || vc.isBiallelic() ) // non variant or biallelics already satisfy the contract return Collections.singletonList(vc); @@ -521,7 +541,8 @@ public class GATKVariantContextUtils { builder.alleles(alleles); builder.genotypes(subsetDiploidAlleles(vc, alleles, false)); VariantContextUtils.calculateChromosomeCounts(builder, true); - biallelics.add(reverseTrimAlleles(builder.make())); + final VariantContext trimmed = trimAlleles(builder.make(), trimLeft, true); + biallelics.add(trimmed); } return biallelics; @@ -558,7 +579,7 @@ public class GATKVariantContextUtils { final boolean filteredAreUncalled, final boolean mergeInfoWithMaxAC ) { int originalNumOfVCs = priorityListOfVCs == null ? 0 : priorityListOfVCs.size(); - return simpleMerge(unsortedVCs,priorityListOfVCs,originalNumOfVCs,filteredRecordMergeType,genotypeMergeOptions,annotateOrigin,printMessages,setKey,filteredAreUncalled,mergeInfoWithMaxAC); + return simpleMerge(unsortedVCs, priorityListOfVCs, originalNumOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, setKey, filteredAreUncalled, mergeInfoWithMaxAC); } /** @@ -902,14 +923,66 @@ public class GATKVariantContextUtils { return uniqify ? sampleName + "." + trackName : sampleName; } + /** + * Trim the alleles in inputVC from the reverse direction + * + * @param inputVC a non-null input VC whose alleles might need a haircut + * @return a non-null VariantContext (may be == to inputVC) with alleles trimmed up + */ public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) { + return trimAlleles(inputVC, false, true); + } - // see whether we need to trim common reference base from all alleles - final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false); - if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 ) + /** + * Trim the alleles in inputVC from the forward direction + * + * @param inputVC a non-null input VC whose alleles might need a haircut + * @return a non-null VariantContext (may be == to inputVC) with alleles trimmed up + */ + public static VariantContext forwardTrimAlleles( final VariantContext inputVC ) { + return trimAlleles(inputVC, true, false); + } + + /** + * Trim the alleles in inputVC forward and reverse, as requested + * + * @param inputVC a non-null input VC whose alleles might need a haircut + * @param trimForward should we trim up the alleles from the foward direction? + * @param trimReverse shold we trim up the alleles from the reverse direction? + * @return a non-null VariantContext (may be == to inputVC) with trimmed up alleles + */ + @Ensures("result != null") + public static VariantContext trimAlleles(final VariantContext inputVC, final boolean trimForward, final boolean trimReverse) { + if ( inputVC == null ) throw new IllegalArgumentException("inputVC cannot be null"); + + if ( inputVC.getNAlleles() <= 1 ) return inputVC; - final List alleles = new ArrayList(); + // see whether we need to trim common reference base from all alleles + final int revTrim = trimReverse ? computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes()) : 0; + final VariantContext revTrimVC = trimAlleles(inputVC, -1, revTrim); + final int fwdTrim = trimForward ? computeForwardClipping(revTrimVC.getAlleles()) : -1; + return trimAlleles(revTrimVC, fwdTrim, 0); + } + + /** + * Trim up alleles in inputVC, cutting out all bases up to fwdTrimEnd inclusive and + * the last revTrim bases from the end + * + * @param inputVC a non-null input VC + * @param fwdTrimEnd bases up to this index (can be -1) will be removed from the start of all alleles + * @param revTrim the last revTrim bases of each allele will be clipped off as well + * @return a non-null VariantContext (may be == to inputVC) with trimmed up alleles + */ + @Requires({"inputVC != null"}) + @Ensures("result != null") + protected static VariantContext trimAlleles(final VariantContext inputVC, + final int fwdTrimEnd, + final int revTrim) { + if( fwdTrimEnd == -1 && revTrim == 0 ) // nothing to do, so just return inputVC unmodified + return inputVC; + + final List alleles = new LinkedList(); final GenotypesContext genotypes = GenotypesContext.create(); final Map originalToTrimmedAlleleMap = new HashMap(); @@ -919,7 +992,7 @@ public class GATKVariantContextUtils { originalToTrimmedAlleleMap.put(a, a); } else { // get bases for current allele and create a new one with trimmed bases - final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent); + final byte[] newBases = Arrays.copyOfRange(a.getBases(), fwdTrimEnd+1, a.length()-revTrim); final Allele trimmedAllele = Allele.create(newBases, a.isReference()); alleles.add(trimmedAllele); originalToTrimmedAlleleMap.put(a, trimmedAllele); @@ -939,13 +1012,16 @@ public class GATKVariantContextUtils { genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make()); } - return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() - 1).alleles(alleles).genotypes(genotypes).make(); + final int start = inputVC.getStart() + (fwdTrimEnd + 1); + final VariantContextBuilder builder = new VariantContextBuilder(inputVC); + builder.start(start); + builder.stop(start + alleles.get(0).length() - 1); + builder.alleles(alleles); + builder.genotypes(genotypes); + return builder.make(); } - public static int computeReverseClipping(final List unclippedAlleles, - final byte[] ref, - final int forwardClipping, - final boolean allowFullClip) { + public static int computeReverseClipping(final List unclippedAlleles, final byte[] ref) { int clipping = 0; boolean stillClipping = true; @@ -957,16 +1033,13 @@ public class GATKVariantContextUtils { // we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong // position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine). if ( a.length() - clipping == 0 ) - return clipping - (allowFullClip ? 0 : 1); + return clipping - 1; - if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) { + if ( a.length() - clipping <= 0 || a.length() == 0 ) { stillClipping = false; } else if ( ref.length == clipping ) { - if ( allowFullClip ) - stillClipping = false; - else - return -1; + return -1; } else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) { stillClipping = false; @@ -979,6 +1052,58 @@ public class GATKVariantContextUtils { return clipping; } + /** + * Clip out any unnecessary bases off the front of the alleles + * + * The VCF spec represents alleles as block substitutions, replacing AC with A for a + * 1 bp deletion of the C. However, it's possible that we'd end up with alleles that + * contain extra bases on the left, such as GAC/GA to represent the same 1 bp deletion. + * This routine finds an offset among all alleles that can be safely trimmed + * off the left of each allele and still represent the same block substitution. + * + * A/C => A/C + * AC/A => AC/A + * ACC/AC => CC/C + * AGT/CAT => AGT/CAT + * /C => /C + * + * @param unclippedAlleles a non-null list of alleles that we want to clip + * @return the offset into the alleles where we can safely clip, inclusive, or + * -1 if no clipping is tolerated. So, if the result is 0, then we can remove + * the first base of every allele. If the result is 1, we can remove the + * second base. + */ + public static int computeForwardClipping(final List unclippedAlleles) { + // cannot clip unless there's at least 1 alt allele + if ( unclippedAlleles.size() <= 1 ) + return -1; + + // we cannot forward clip any set of alleles containing a symbolic allele + int minAlleleLength = Integer.MAX_VALUE; + for ( final Allele a : unclippedAlleles ) { + if ( a.isSymbolic() ) + return -1; + minAlleleLength = Math.min(minAlleleLength, a.length()); + } + + final byte[] firstAlleleBases = unclippedAlleles.get(0).getBases(); + int indexOflastSharedBase = -1; + + // the -1 to the stop is that we can never clip off the right most base + for ( int i = 0; i < minAlleleLength - 1; i++) { + final byte base = firstAlleleBases[i]; + + for ( final Allele allele : unclippedAlleles ) { + if ( allele.getBases()[i] != base ) + return indexOflastSharedBase; + } + + indexOflastSharedBase = i; + } + + return indexOflastSharedBase; + } + public static double computeHardyWeinbergPvalue(VariantContext vc) { if ( vc.getCalledChrCount() == 0 ) return 0.0; diff --git a/public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java index 6eb9afc8c..433f4056c 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java @@ -618,7 +618,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { @Test(dataProvider = "ReverseClippingPositionTestProvider") public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) { - int result = GATKVariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false); + int result = GATKVariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes()); Assert.assertEquals(result, cfg.expectedClip); } @@ -888,4 +888,101 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { Assert.assertEquals(result.getSecond().length,2); } -} + + // -------------------------------------------------------------------------------- + // + // test forward clipping + // + // -------------------------------------------------------------------------------- + + @DataProvider(name = "ForwardClippingData") + public Object[][] makeForwardClippingData() { + List tests = new ArrayList(); + + // this functionality can be adapted to provide input data for whatever you might want in your data + tests.add(new Object[]{Arrays.asList("A"), -1}); + tests.add(new Object[]{Arrays.asList(""), -1}); + tests.add(new Object[]{Arrays.asList("A", "C"), -1}); + tests.add(new Object[]{Arrays.asList("AC", "C"), -1}); + tests.add(new Object[]{Arrays.asList("A", "G"), -1}); + tests.add(new Object[]{Arrays.asList("A", "T"), -1}); + tests.add(new Object[]{Arrays.asList("GT", "CA"), -1}); + tests.add(new Object[]{Arrays.asList("GT", "CT"), -1}); + tests.add(new Object[]{Arrays.asList("ACC", "AC"), 0}); + tests.add(new Object[]{Arrays.asList("ACGC", "ACG"), 1}); + tests.add(new Object[]{Arrays.asList("ACGC", "ACG"), 1}); + tests.add(new Object[]{Arrays.asList("ACGC", "ACGA"), 2}); + tests.add(new Object[]{Arrays.asList("ACGC", "AGC"), 0}); + tests.add(new Object[]{Arrays.asList("A", ""), -1}); + for ( int len = 0; len < 50; len++ ) + tests.add(new Object[]{Arrays.asList("A" + new String(Utils.dupBytes((byte)'C', len)), "C"), -1}); + + tests.add(new Object[]{Arrays.asList("A", "T", "C"), -1}); + tests.add(new Object[]{Arrays.asList("AT", "AC", "AG"), 0}); + tests.add(new Object[]{Arrays.asList("AT", "AC", "A"), -1}); + tests.add(new Object[]{Arrays.asList("AT", "AC", "ACG"), 0}); + tests.add(new Object[]{Arrays.asList("AC", "AC", "ACG"), 0}); + tests.add(new Object[]{Arrays.asList("AC", "ACT", "ACG"), 0}); + tests.add(new Object[]{Arrays.asList("ACG", "ACGT", "ACGTA"), 1}); + tests.add(new Object[]{Arrays.asList("ACG", "ACGT", "ACGCA"), 1}); + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "ForwardClippingData") + public void testForwardClipping(final List alleleStrings, final int expectedClip) { + final List alleles = new LinkedList(); + for ( final String alleleString : alleleStrings ) + alleles.add(Allele.create(alleleString)); + + for ( final List myAlleles : Utils.makePermutations(alleles, alleles.size(), false)) { + final int actual = GATKVariantContextUtils.computeForwardClipping(myAlleles); + Assert.assertEquals(actual, expectedClip); + } + } + + @DataProvider(name = "ClipAlleleTest") + public Object[][] makeClipAlleleTest() { + List tests = new ArrayList(); + + // this functionality can be adapted to provide input data for whatever you might want in your data + tests.add(new Object[]{Arrays.asList("ACC", "AC"), Arrays.asList("AC", "A"), 0}); + tests.add(new Object[]{Arrays.asList("ACGC", "ACG"), Arrays.asList("GC", "G"), 2}); + tests.add(new Object[]{Arrays.asList("ACGC", "ACGA"), Arrays.asList("C", "A"), 3}); + tests.add(new Object[]{Arrays.asList("ACGC", "AGC"), Arrays.asList("AC", "A"), 0}); + tests.add(new Object[]{Arrays.asList("AT", "AC", "AG"), Arrays.asList("T", "C", "G"), 1}); + tests.add(new Object[]{Arrays.asList("AT", "AC", "ACG"), Arrays.asList("T", "C", "CG"), 1}); + tests.add(new Object[]{Arrays.asList("AC", "ACT", "ACG"), Arrays.asList("C", "CT", "CG"), 1}); + tests.add(new Object[]{Arrays.asList("ACG", "ACGT", "ACGTA"), Arrays.asList("G", "GT", "GTA"), 2}); + tests.add(new Object[]{Arrays.asList("ACG", "ACGT", "ACGCA"), Arrays.asList("G", "GT", "GCA"), 2}); + + // trims from left and right + tests.add(new Object[]{Arrays.asList("ACGTT", "ACCTT"), Arrays.asList("G", "C"), 2}); + tests.add(new Object[]{Arrays.asList("ACGTT", "ACCCTT"), Arrays.asList("G", "CC"), 2}); + tests.add(new Object[]{Arrays.asList("ACGTT", "ACGCTT"), Arrays.asList("G", "GC"), 2}); + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "ClipAlleleTest") + public void testClipAlleles(final List alleleStrings, final List expected, final int numLeftClipped) { + final List alleles = new LinkedList(); + final int length = alleleStrings.get(0).length(); + boolean first = true; + for ( final String alleleString : alleleStrings ) { + alleles.add(Allele.create(alleleString, first)); + first = false; + } + + final int start = 10; + final VariantContextBuilder builder = new VariantContextBuilder("test", "20", start, start+length-1, alleles); + final VariantContext unclipped = builder.make(); + final VariantContext clipped = GATKVariantContextUtils.trimAlleles(unclipped, true, true); + + Assert.assertEquals(clipped.getStart(), unclipped.getStart() + numLeftClipped); + for ( int i = 0; i < alleles.size(); i++ ) { + final Allele trimmed = clipped.getAlleles().get(i); + Assert.assertEquals(trimmed.getBaseString(), expected.get(i)); + } + } +} \ No newline at end of file From 8ac6d3521f80566de31480979457d8658534f561 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 19 Feb 2013 20:19:49 -0500 Subject: [PATCH 3/3] Vast improvements to AssessNA12878 code and functionality -- AssessNA12878 now breaks out multi-allelics into bi-allelic components. This means that we can properly assess multi-allelic calls against the bi-allelic KB -- Refactor AssessNA12878, moving into assess package in KB. Split out previously private classes in the walker itself into separate classes. Added real docs for all of the classes. -- Vastly expand (from 0) unit tests for NA12878 assessments -- Allow sites only VCs to be evaluated by Assessor -- Move utility for creating simple VCs from a list of string alleles from GATKVariantContextUtilsUnitTest to GATKVariantContextUtils -- Assessor bugfix for discordant records at a site. Previous version didn't handle properly the case where one had a non-matching call in the callset w.r.t. the KB, so that the KB element was eaten during the analysis. Fixed. UnitTested -- See GSA-781 -- Handle multi-allelic variants in KB for more information -- Bugfix for missing site counting in AssessNA12878. Previous version would count N misses for every missed value at a site. Not that this has much impact but it's worth fixing -- UnitTests for BadSitesWriter -- UnitTests for filtered and filtering sites in the Assessor -- Cleanup end report generation code (simply the code). Note that instead of "indel" the new code will print out "INDELS" -- Assessor DoC calculations now us LIBS and RBPs for the depth calculation. The previous version was broken for reduced reads. Added unit test that reads a complex reduced read example and matches the DoC of this BAM with the output of the GATK DoC tool here. -- Added convenience constructor for LIBS using just SAMFileReader and an iterator. It's now easy to create a LIBS from a BAM at a locus. Added advanceToLocus function that moves the LIBS to a specific position. UnitTested via the assessor (which isn't ideal, but is a proper test) --- .../locusiterator/LocusIteratorByState.java | 52 ++++++++++++++++++- .../variant/GATKVariantContextUtils.java | 27 ++++++++++ .../GATKVariantContextUtilsUnitTest.java | 19 ++----- 3 files changed, 82 insertions(+), 16 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java index 435f9901a..eed29feca 100644 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java @@ -28,13 +28,18 @@ package org.broadinstitute.sting.utils.locusiterator; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMRecordIterator; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.downsampling.DownsampleType; +import org.broadinstitute.sting.gatk.iterators.GATKSAMIterator; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.pileup.*; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -136,6 +141,25 @@ public final class LocusIteratorByState extends LocusIterator { readInformation.keepUniqueReadListInLIBS()); } + /** + * Create a new LocusIteratorByState based on a SAMFileReader using reads in an iterator it + * + * Simple constructor that uses the samples in the reader, doesn't do any downsampling, + * and makes a new GenomeLocParser using the reader. This constructor will be slow(ish) + * if you continually invoke this constructor, but it's easy to make. + * + * @param reader a non-null reader + * @param it an iterator from reader that has the reads we want to use to create ReadBackPileups + */ + public LocusIteratorByState(final SAMFileReader reader, final SAMRecordIterator it) { + this(new GATKSAMIterator(it), + new LIBSDownsamplingInfo(false, 0), + true, + new GenomeLocParser(reader.getFileHeader().getSequenceDictionary()), + SampleUtils.getSAMFileSamples(reader.getFileHeader()), + false); + } + /** * Create a new LocusIteratorByState * @@ -149,7 +173,8 @@ public final class LocusIteratorByState extends LocusIterator { * be mapped to this null sample * @param maintainUniqueReadsList if true, we will keep the unique reads from off the samIterator and make them * available via the transferReadsFromAllPreviousPileups interface - */ protected LocusIteratorByState(final Iterator samIterator, + */ + protected LocusIteratorByState(final Iterator samIterator, final LIBSDownsamplingInfo downsamplingInfo, final boolean includeReadsWithDeletionAtLoci, final GenomeLocParser genomeLocParser, @@ -221,6 +246,29 @@ public final class LocusIteratorByState extends LocusIterator { return currentAlignmentContext; } + /** + * Move this LIBS until we are over position + * + * Will return null if cannot reach position (because we run out of data in the locus) + * + * @param position the start position of the AlignmentContext we want back + * @return a AlignmentContext at position, or null if this isn't possible + */ + public AlignmentContext advanceToLocus(final int position) { + while ( hasNext() ) { + final AlignmentContext context = next(); + + if ( context == null ) + // we ran out of data + return null; + + if ( context.getPosition() == position) + return context; + } + + return null; + } + /** * Creates the next alignment context from the given state. Note that this is implemented as a * lazy load method. nextAlignmentContext MUST BE null in order for this method to advance to the diff --git a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java index 288ee4ca3..3a5ddb7a0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java @@ -589,6 +589,8 @@ public class GATKVariantContextUtils { * simpleMerge does not verify any more unique sample names EVEN if genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE. One should use * SampleUtils.verifyUniqueSamplesNames to check that before using sempleMerge. * + * For more information on this method see: http://www.thedistractionnetwork.com/programmer-problem/ + * * @param unsortedVCs collection of unsorted VCs * @param priorityListOfVCs priority list detailing the order in which we should grab the VCs * @param filteredRecordMergeType merge type for filtered records @@ -1292,4 +1294,29 @@ public class GATKVariantContextUtils { return Integer.valueOf(getIndex(vc1)).compareTo(getIndex(vc2)); } } + + /** + * For testing purposes only. Create a site-only VariantContext at contig:start containing alleles + * + * @param name the name of the VC + * @param contig the contig for the VC + * @param start the start of the VC + * @param alleleStrings a non-null, non-empty list of strings for the alleles. The first will be the ref allele, and others the + * alt. Will compute the stop of the VC from the length of the reference allele + * @return a non-null VariantContext + */ + public static VariantContext makeFromAlleles(final String name, final String contig, final int start, final List alleleStrings) { + if ( alleleStrings == null || alleleStrings.isEmpty() ) + throw new IllegalArgumentException("alleleStrings must be non-empty, non-null list"); + + final List alleles = new LinkedList(); + final int length = alleleStrings.get(0).length(); + + boolean first = true; + for ( final String alleleString : alleleStrings ) { + alleles.add(Allele.create(alleleString, first)); + first = false; + } + return new VariantContextBuilder(name, contig, start, start+length-1, alleles).make(); + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java index 433f4056c..2a15d709a 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java @@ -1,6 +1,6 @@ /* * Copyright (c) 2012 The Broad Institute -* +* * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without @@ -9,10 +9,10 @@ * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: -* +* * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. -* +* * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND @@ -966,21 +966,12 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { @Test(dataProvider = "ClipAlleleTest") public void testClipAlleles(final List alleleStrings, final List expected, final int numLeftClipped) { - final List alleles = new LinkedList(); - final int length = alleleStrings.get(0).length(); - boolean first = true; - for ( final String alleleString : alleleStrings ) { - alleles.add(Allele.create(alleleString, first)); - first = false; - } - final int start = 10; - final VariantContextBuilder builder = new VariantContextBuilder("test", "20", start, start+length-1, alleles); - final VariantContext unclipped = builder.make(); + final VariantContext unclipped = GATKVariantContextUtils.makeFromAlleles("test", "20", start, alleleStrings); final VariantContext clipped = GATKVariantContextUtils.trimAlleles(unclipped, true, true); Assert.assertEquals(clipped.getStart(), unclipped.getStart() + numLeftClipped); - for ( int i = 0; i < alleles.size(); i++ ) { + for ( int i = 0; i < unclipped.getAlleles().size(); i++ ) { final Allele trimmed = clipped.getAlleles().get(i); Assert.assertEquals(trimmed.getBaseString(), expected.get(i)); }