Merge pull request #54 from broadinstitute/md_improve_kb
Many improvements to NA12878 KB
This commit is contained in:
commit
48c699eec6
|
|
@ -28,13 +28,18 @@ package org.broadinstitute.sting.utils.locusiterator;
|
|||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMRecordIterator;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.iterators.GATKSAMIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.pileup.*;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
|
|
@ -136,6 +141,25 @@ public final class LocusIteratorByState extends LocusIterator {
|
|||
readInformation.keepUniqueReadListInLIBS());
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new LocusIteratorByState based on a SAMFileReader using reads in an iterator it
|
||||
*
|
||||
* Simple constructor that uses the samples in the reader, doesn't do any downsampling,
|
||||
* and makes a new GenomeLocParser using the reader. This constructor will be slow(ish)
|
||||
* if you continually invoke this constructor, but it's easy to make.
|
||||
*
|
||||
* @param reader a non-null reader
|
||||
* @param it an iterator from reader that has the reads we want to use to create ReadBackPileups
|
||||
*/
|
||||
public LocusIteratorByState(final SAMFileReader reader, final SAMRecordIterator it) {
|
||||
this(new GATKSAMIterator(it),
|
||||
new LIBSDownsamplingInfo(false, 0),
|
||||
true,
|
||||
new GenomeLocParser(reader.getFileHeader().getSequenceDictionary()),
|
||||
SampleUtils.getSAMFileSamples(reader.getFileHeader()),
|
||||
false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new LocusIteratorByState
|
||||
*
|
||||
|
|
@ -149,7 +173,8 @@ public final class LocusIteratorByState extends LocusIterator {
|
|||
* be mapped to this null sample
|
||||
* @param maintainUniqueReadsList if true, we will keep the unique reads from off the samIterator and make them
|
||||
* available via the transferReadsFromAllPreviousPileups interface
|
||||
*/ protected LocusIteratorByState(final Iterator<GATKSAMRecord> samIterator,
|
||||
*/
|
||||
protected LocusIteratorByState(final Iterator<GATKSAMRecord> samIterator,
|
||||
final LIBSDownsamplingInfo downsamplingInfo,
|
||||
final boolean includeReadsWithDeletionAtLoci,
|
||||
final GenomeLocParser genomeLocParser,
|
||||
|
|
@ -221,6 +246,29 @@ public final class LocusIteratorByState extends LocusIterator {
|
|||
return currentAlignmentContext;
|
||||
}
|
||||
|
||||
/**
|
||||
* Move this LIBS until we are over position
|
||||
*
|
||||
* Will return null if cannot reach position (because we run out of data in the locus)
|
||||
*
|
||||
* @param position the start position of the AlignmentContext we want back
|
||||
* @return a AlignmentContext at position, or null if this isn't possible
|
||||
*/
|
||||
public AlignmentContext advanceToLocus(final int position) {
|
||||
while ( hasNext() ) {
|
||||
final AlignmentContext context = next();
|
||||
|
||||
if ( context == null )
|
||||
// we ran out of data
|
||||
return null;
|
||||
|
||||
if ( context.getPosition() == position)
|
||||
return context;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the next alignment context from the given state. Note that this is implemented as a
|
||||
* lazy load method. nextAlignmentContext MUST BE null in order for this method to advance to the
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.variant;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
|
|
@ -509,6 +510,25 @@ public class GATKVariantContextUtils {
|
|||
* @return a list of bi-allelic (or monomorphic) variant context
|
||||
*/
|
||||
public static List<VariantContext> splitVariantContextToBiallelics(final VariantContext vc) {
|
||||
return splitVariantContextToBiallelics(vc, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Split variant context into its biallelic components if there are more than 2 alleles
|
||||
*
|
||||
* For VC has A/B/C alleles, returns A/B and A/C contexts.
|
||||
* Genotypes are all no-calls now (it's not possible to fix them easily)
|
||||
* Alleles are right trimmed to satisfy VCF conventions
|
||||
*
|
||||
* If vc is biallelic or non-variant it is just returned
|
||||
*
|
||||
* Chromosome counts are updated (but they are by definition 0)
|
||||
*
|
||||
* @param vc a potentially multi-allelic variant context
|
||||
* @param trimLeft if true, we will also left trim alleles, potentially moving the resulting vcs forward on the genome
|
||||
* @return a list of bi-allelic (or monomorphic) variant context
|
||||
*/
|
||||
public static List<VariantContext> splitVariantContextToBiallelics(final VariantContext vc, final boolean trimLeft) {
|
||||
if ( ! vc.isVariant() || vc.isBiallelic() )
|
||||
// non variant or biallelics already satisfy the contract
|
||||
return Collections.singletonList(vc);
|
||||
|
|
@ -521,7 +541,8 @@ public class GATKVariantContextUtils {
|
|||
builder.alleles(alleles);
|
||||
builder.genotypes(subsetDiploidAlleles(vc, alleles, false));
|
||||
VariantContextUtils.calculateChromosomeCounts(builder, true);
|
||||
biallelics.add(reverseTrimAlleles(builder.make()));
|
||||
final VariantContext trimmed = trimAlleles(builder.make(), trimLeft, true);
|
||||
biallelics.add(trimmed);
|
||||
}
|
||||
|
||||
return biallelics;
|
||||
|
|
@ -558,7 +579,7 @@ public class GATKVariantContextUtils {
|
|||
final boolean filteredAreUncalled,
|
||||
final boolean mergeInfoWithMaxAC ) {
|
||||
int originalNumOfVCs = priorityListOfVCs == null ? 0 : priorityListOfVCs.size();
|
||||
return simpleMerge(unsortedVCs,priorityListOfVCs,originalNumOfVCs,filteredRecordMergeType,genotypeMergeOptions,annotateOrigin,printMessages,setKey,filteredAreUncalled,mergeInfoWithMaxAC);
|
||||
return simpleMerge(unsortedVCs, priorityListOfVCs, originalNumOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, setKey, filteredAreUncalled, mergeInfoWithMaxAC);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -568,6 +589,8 @@ public class GATKVariantContextUtils {
|
|||
* simpleMerge does not verify any more unique sample names EVEN if genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE. One should use
|
||||
* SampleUtils.verifyUniqueSamplesNames to check that before using sempleMerge.
|
||||
*
|
||||
* For more information on this method see: http://www.thedistractionnetwork.com/programmer-problem/
|
||||
*
|
||||
* @param unsortedVCs collection of unsorted VCs
|
||||
* @param priorityListOfVCs priority list detailing the order in which we should grab the VCs
|
||||
* @param filteredRecordMergeType merge type for filtered records
|
||||
|
|
@ -902,14 +925,66 @@ public class GATKVariantContextUtils {
|
|||
return uniqify ? sampleName + "." + trackName : sampleName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Trim the alleles in inputVC from the reverse direction
|
||||
*
|
||||
* @param inputVC a non-null input VC whose alleles might need a haircut
|
||||
* @return a non-null VariantContext (may be == to inputVC) with alleles trimmed up
|
||||
*/
|
||||
public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {
|
||||
return trimAlleles(inputVC, false, true);
|
||||
}
|
||||
|
||||
// see whether we need to trim common reference base from all alleles
|
||||
final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false);
|
||||
if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
|
||||
/**
|
||||
* Trim the alleles in inputVC from the forward direction
|
||||
*
|
||||
* @param inputVC a non-null input VC whose alleles might need a haircut
|
||||
* @return a non-null VariantContext (may be == to inputVC) with alleles trimmed up
|
||||
*/
|
||||
public static VariantContext forwardTrimAlleles( final VariantContext inputVC ) {
|
||||
return trimAlleles(inputVC, true, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Trim the alleles in inputVC forward and reverse, as requested
|
||||
*
|
||||
* @param inputVC a non-null input VC whose alleles might need a haircut
|
||||
* @param trimForward should we trim up the alleles from the foward direction?
|
||||
* @param trimReverse shold we trim up the alleles from the reverse direction?
|
||||
* @return a non-null VariantContext (may be == to inputVC) with trimmed up alleles
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public static VariantContext trimAlleles(final VariantContext inputVC, final boolean trimForward, final boolean trimReverse) {
|
||||
if ( inputVC == null ) throw new IllegalArgumentException("inputVC cannot be null");
|
||||
|
||||
if ( inputVC.getNAlleles() <= 1 )
|
||||
return inputVC;
|
||||
|
||||
final List<Allele> alleles = new ArrayList<Allele>();
|
||||
// see whether we need to trim common reference base from all alleles
|
||||
final int revTrim = trimReverse ? computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes()) : 0;
|
||||
final VariantContext revTrimVC = trimAlleles(inputVC, -1, revTrim);
|
||||
final int fwdTrim = trimForward ? computeForwardClipping(revTrimVC.getAlleles()) : -1;
|
||||
return trimAlleles(revTrimVC, fwdTrim, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Trim up alleles in inputVC, cutting out all bases up to fwdTrimEnd inclusive and
|
||||
* the last revTrim bases from the end
|
||||
*
|
||||
* @param inputVC a non-null input VC
|
||||
* @param fwdTrimEnd bases up to this index (can be -1) will be removed from the start of all alleles
|
||||
* @param revTrim the last revTrim bases of each allele will be clipped off as well
|
||||
* @return a non-null VariantContext (may be == to inputVC) with trimmed up alleles
|
||||
*/
|
||||
@Requires({"inputVC != null"})
|
||||
@Ensures("result != null")
|
||||
protected static VariantContext trimAlleles(final VariantContext inputVC,
|
||||
final int fwdTrimEnd,
|
||||
final int revTrim) {
|
||||
if( fwdTrimEnd == -1 && revTrim == 0 ) // nothing to do, so just return inputVC unmodified
|
||||
return inputVC;
|
||||
|
||||
final List<Allele> alleles = new LinkedList<Allele>();
|
||||
final GenotypesContext genotypes = GenotypesContext.create();
|
||||
final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
|
||||
|
||||
|
|
@ -919,7 +994,7 @@ public class GATKVariantContextUtils {
|
|||
originalToTrimmedAlleleMap.put(a, a);
|
||||
} else {
|
||||
// get bases for current allele and create a new one with trimmed bases
|
||||
final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
|
||||
final byte[] newBases = Arrays.copyOfRange(a.getBases(), fwdTrimEnd+1, a.length()-revTrim);
|
||||
final Allele trimmedAllele = Allele.create(newBases, a.isReference());
|
||||
alleles.add(trimmedAllele);
|
||||
originalToTrimmedAlleleMap.put(a, trimmedAllele);
|
||||
|
|
@ -939,13 +1014,16 @@ public class GATKVariantContextUtils {
|
|||
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
|
||||
}
|
||||
|
||||
return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() - 1).alleles(alleles).genotypes(genotypes).make();
|
||||
final int start = inputVC.getStart() + (fwdTrimEnd + 1);
|
||||
final VariantContextBuilder builder = new VariantContextBuilder(inputVC);
|
||||
builder.start(start);
|
||||
builder.stop(start + alleles.get(0).length() - 1);
|
||||
builder.alleles(alleles);
|
||||
builder.genotypes(genotypes);
|
||||
return builder.make();
|
||||
}
|
||||
|
||||
public static int computeReverseClipping(final List<Allele> unclippedAlleles,
|
||||
final byte[] ref,
|
||||
final int forwardClipping,
|
||||
final boolean allowFullClip) {
|
||||
public static int computeReverseClipping(final List<Allele> unclippedAlleles, final byte[] ref) {
|
||||
int clipping = 0;
|
||||
boolean stillClipping = true;
|
||||
|
||||
|
|
@ -957,16 +1035,13 @@ public class GATKVariantContextUtils {
|
|||
// we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong
|
||||
// position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine).
|
||||
if ( a.length() - clipping == 0 )
|
||||
return clipping - (allowFullClip ? 0 : 1);
|
||||
return clipping - 1;
|
||||
|
||||
if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) {
|
||||
if ( a.length() - clipping <= 0 || a.length() == 0 ) {
|
||||
stillClipping = false;
|
||||
}
|
||||
else if ( ref.length == clipping ) {
|
||||
if ( allowFullClip )
|
||||
stillClipping = false;
|
||||
else
|
||||
return -1;
|
||||
return -1;
|
||||
}
|
||||
else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) {
|
||||
stillClipping = false;
|
||||
|
|
@ -979,6 +1054,58 @@ public class GATKVariantContextUtils {
|
|||
return clipping;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clip out any unnecessary bases off the front of the alleles
|
||||
*
|
||||
* The VCF spec represents alleles as block substitutions, replacing AC with A for a
|
||||
* 1 bp deletion of the C. However, it's possible that we'd end up with alleles that
|
||||
* contain extra bases on the left, such as GAC/GA to represent the same 1 bp deletion.
|
||||
* This routine finds an offset among all alleles that can be safely trimmed
|
||||
* off the left of each allele and still represent the same block substitution.
|
||||
*
|
||||
* A/C => A/C
|
||||
* AC/A => AC/A
|
||||
* ACC/AC => CC/C
|
||||
* AGT/CAT => AGT/CAT
|
||||
* <DEL>/C => <DEL>/C
|
||||
*
|
||||
* @param unclippedAlleles a non-null list of alleles that we want to clip
|
||||
* @return the offset into the alleles where we can safely clip, inclusive, or
|
||||
* -1 if no clipping is tolerated. So, if the result is 0, then we can remove
|
||||
* the first base of every allele. If the result is 1, we can remove the
|
||||
* second base.
|
||||
*/
|
||||
public static int computeForwardClipping(final List<Allele> unclippedAlleles) {
|
||||
// cannot clip unless there's at least 1 alt allele
|
||||
if ( unclippedAlleles.size() <= 1 )
|
||||
return -1;
|
||||
|
||||
// we cannot forward clip any set of alleles containing a symbolic allele
|
||||
int minAlleleLength = Integer.MAX_VALUE;
|
||||
for ( final Allele a : unclippedAlleles ) {
|
||||
if ( a.isSymbolic() )
|
||||
return -1;
|
||||
minAlleleLength = Math.min(minAlleleLength, a.length());
|
||||
}
|
||||
|
||||
final byte[] firstAlleleBases = unclippedAlleles.get(0).getBases();
|
||||
int indexOflastSharedBase = -1;
|
||||
|
||||
// the -1 to the stop is that we can never clip off the right most base
|
||||
for ( int i = 0; i < minAlleleLength - 1; i++) {
|
||||
final byte base = firstAlleleBases[i];
|
||||
|
||||
for ( final Allele allele : unclippedAlleles ) {
|
||||
if ( allele.getBases()[i] != base )
|
||||
return indexOflastSharedBase;
|
||||
}
|
||||
|
||||
indexOflastSharedBase = i;
|
||||
}
|
||||
|
||||
return indexOflastSharedBase;
|
||||
}
|
||||
|
||||
public static double computeHardyWeinbergPvalue(VariantContext vc) {
|
||||
if ( vc.getCalledChrCount() == 0 )
|
||||
return 0.0;
|
||||
|
|
@ -1167,4 +1294,29 @@ public class GATKVariantContextUtils {
|
|||
return Integer.valueOf(getIndex(vc1)).compareTo(getIndex(vc2));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* For testing purposes only. Create a site-only VariantContext at contig:start containing alleles
|
||||
*
|
||||
* @param name the name of the VC
|
||||
* @param contig the contig for the VC
|
||||
* @param start the start of the VC
|
||||
* @param alleleStrings a non-null, non-empty list of strings for the alleles. The first will be the ref allele, and others the
|
||||
* alt. Will compute the stop of the VC from the length of the reference allele
|
||||
* @return a non-null VariantContext
|
||||
*/
|
||||
public static VariantContext makeFromAlleles(final String name, final String contig, final int start, final List<String> alleleStrings) {
|
||||
if ( alleleStrings == null || alleleStrings.isEmpty() )
|
||||
throw new IllegalArgumentException("alleleStrings must be non-empty, non-null list");
|
||||
|
||||
final List<Allele> alleles = new LinkedList<Allele>();
|
||||
final int length = alleleStrings.get(0).length();
|
||||
|
||||
boolean first = true;
|
||||
for ( final String alleleString : alleleStrings ) {
|
||||
alleles.add(Allele.create(alleleString, first));
|
||||
first = false;
|
||||
}
|
||||
return new VariantContextBuilder(name, contig, start, start+length-1, alleles).make();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
|
|
@ -9,10 +9,10 @@
|
|||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
|
|
@ -618,7 +618,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
|||
|
||||
@Test(dataProvider = "ReverseClippingPositionTestProvider")
|
||||
public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) {
|
||||
int result = GATKVariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
|
||||
int result = GATKVariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes());
|
||||
Assert.assertEquals(result, cfg.expectedClip);
|
||||
}
|
||||
|
||||
|
|
@ -888,4 +888,92 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
|||
Assert.assertEquals(result.getSecond().length,2);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// test forward clipping
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "ForwardClippingData")
|
||||
public Object[][] makeForwardClippingData() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
// this functionality can be adapted to provide input data for whatever you might want in your data
|
||||
tests.add(new Object[]{Arrays.asList("A"), -1});
|
||||
tests.add(new Object[]{Arrays.asList("<DEL>"), -1});
|
||||
tests.add(new Object[]{Arrays.asList("A", "C"), -1});
|
||||
tests.add(new Object[]{Arrays.asList("AC", "C"), -1});
|
||||
tests.add(new Object[]{Arrays.asList("A", "G"), -1});
|
||||
tests.add(new Object[]{Arrays.asList("A", "T"), -1});
|
||||
tests.add(new Object[]{Arrays.asList("GT", "CA"), -1});
|
||||
tests.add(new Object[]{Arrays.asList("GT", "CT"), -1});
|
||||
tests.add(new Object[]{Arrays.asList("ACC", "AC"), 0});
|
||||
tests.add(new Object[]{Arrays.asList("ACGC", "ACG"), 1});
|
||||
tests.add(new Object[]{Arrays.asList("ACGC", "ACG"), 1});
|
||||
tests.add(new Object[]{Arrays.asList("ACGC", "ACGA"), 2});
|
||||
tests.add(new Object[]{Arrays.asList("ACGC", "AGC"), 0});
|
||||
tests.add(new Object[]{Arrays.asList("A", "<DEL>"), -1});
|
||||
for ( int len = 0; len < 50; len++ )
|
||||
tests.add(new Object[]{Arrays.asList("A" + new String(Utils.dupBytes((byte)'C', len)), "C"), -1});
|
||||
|
||||
tests.add(new Object[]{Arrays.asList("A", "T", "C"), -1});
|
||||
tests.add(new Object[]{Arrays.asList("AT", "AC", "AG"), 0});
|
||||
tests.add(new Object[]{Arrays.asList("AT", "AC", "A"), -1});
|
||||
tests.add(new Object[]{Arrays.asList("AT", "AC", "ACG"), 0});
|
||||
tests.add(new Object[]{Arrays.asList("AC", "AC", "ACG"), 0});
|
||||
tests.add(new Object[]{Arrays.asList("AC", "ACT", "ACG"), 0});
|
||||
tests.add(new Object[]{Arrays.asList("ACG", "ACGT", "ACGTA"), 1});
|
||||
tests.add(new Object[]{Arrays.asList("ACG", "ACGT", "ACGCA"), 1});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "ForwardClippingData")
|
||||
public void testForwardClipping(final List<String> alleleStrings, final int expectedClip) {
|
||||
final List<Allele> alleles = new LinkedList<Allele>();
|
||||
for ( final String alleleString : alleleStrings )
|
||||
alleles.add(Allele.create(alleleString));
|
||||
|
||||
for ( final List<Allele> myAlleles : Utils.makePermutations(alleles, alleles.size(), false)) {
|
||||
final int actual = GATKVariantContextUtils.computeForwardClipping(myAlleles);
|
||||
Assert.assertEquals(actual, expectedClip);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "ClipAlleleTest")
|
||||
public Object[][] makeClipAlleleTest() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
// this functionality can be adapted to provide input data for whatever you might want in your data
|
||||
tests.add(new Object[]{Arrays.asList("ACC", "AC"), Arrays.asList("AC", "A"), 0});
|
||||
tests.add(new Object[]{Arrays.asList("ACGC", "ACG"), Arrays.asList("GC", "G"), 2});
|
||||
tests.add(new Object[]{Arrays.asList("ACGC", "ACGA"), Arrays.asList("C", "A"), 3});
|
||||
tests.add(new Object[]{Arrays.asList("ACGC", "AGC"), Arrays.asList("AC", "A"), 0});
|
||||
tests.add(new Object[]{Arrays.asList("AT", "AC", "AG"), Arrays.asList("T", "C", "G"), 1});
|
||||
tests.add(new Object[]{Arrays.asList("AT", "AC", "ACG"), Arrays.asList("T", "C", "CG"), 1});
|
||||
tests.add(new Object[]{Arrays.asList("AC", "ACT", "ACG"), Arrays.asList("C", "CT", "CG"), 1});
|
||||
tests.add(new Object[]{Arrays.asList("ACG", "ACGT", "ACGTA"), Arrays.asList("G", "GT", "GTA"), 2});
|
||||
tests.add(new Object[]{Arrays.asList("ACG", "ACGT", "ACGCA"), Arrays.asList("G", "GT", "GCA"), 2});
|
||||
|
||||
// trims from left and right
|
||||
tests.add(new Object[]{Arrays.asList("ACGTT", "ACCTT"), Arrays.asList("G", "C"), 2});
|
||||
tests.add(new Object[]{Arrays.asList("ACGTT", "ACCCTT"), Arrays.asList("G", "CC"), 2});
|
||||
tests.add(new Object[]{Arrays.asList("ACGTT", "ACGCTT"), Arrays.asList("G", "GC"), 2});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "ClipAlleleTest")
|
||||
public void testClipAlleles(final List<String> alleleStrings, final List<String> expected, final int numLeftClipped) {
|
||||
final int start = 10;
|
||||
final VariantContext unclipped = GATKVariantContextUtils.makeFromAlleles("test", "20", start, alleleStrings);
|
||||
final VariantContext clipped = GATKVariantContextUtils.trimAlleles(unclipped, true, true);
|
||||
|
||||
Assert.assertEquals(clipped.getStart(), unclipped.getStart() + numLeftClipped);
|
||||
for ( int i = 0; i < unclipped.getAlleles().size(); i++ ) {
|
||||
final Allele trimmed = clipped.getAlleles().get(i);
|
||||
Assert.assertEquals(trimmed.getBaseString(), expected.get(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue