Merge pull request #812 from broadinstitute/ldg_combineData_submit

New walker to combine WGS and WES data
This commit is contained in:
Valentin Ruano Rubio 2015-03-02 15:12:31 -05:00
commit f8f2680142
6 changed files with 127 additions and 27 deletions

View File

@ -92,6 +92,7 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
private Set<String> founderIds; private Set<String> founderIds;
private int sampleCount; private int sampleCount;
private boolean pedigreeCheckWarningLogged = false; private boolean pedigreeCheckWarningLogged = false;
private boolean didUniquifiedSampleNameCheck = false;
@Override @Override
public Map<String, Object> annotate(final RefMetaDataTracker tracker, public Map<String, Object> annotate(final RefMetaDataTracker tracker,
@ -104,6 +105,11 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
if(founderIds == null && walker != null) { if(founderIds == null && walker != null) {
founderIds = ((Walker) walker).getSampleDB().getFounderIds(); founderIds = ((Walker) walker).getSampleDB().getFounderIds();
} }
//if none of the "founders" are in the vc samples, assume we uniquified the samples upstream and they are all founders
if (!didUniquifiedSampleNameCheck) {
checkSampleNames(vc);
didUniquifiedSampleNameCheck = true;
}
if ( founderIds == null || founderIds.isEmpty() ) { if ( founderIds == null || founderIds.isEmpty() ) {
if ( !pedigreeCheckWarningLogged ) { if ( !pedigreeCheckWarningLogged ) {
logger.warn("Annotation will not be calculated, must provide a valid PED file (-ped) from the command line."); logger.warn("Annotation will not be calculated, must provide a valid PED file (-ped) from the command line.");
@ -181,6 +187,22 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
return Collections.singletonMap(getKeyNames().get(0), (Object)String.format("%.4f", F)); return Collections.singletonMap(getKeyNames().get(0), (Object)String.format("%.4f", F));
} }
//this method is intended to reconcile uniquified sample names
// it comes into play when calling this annotation from GenotypeGVCFs with --uniquifySamples because founderIds
// is derived from the sampleDB, which comes from the input sample names, but vc will have uniquified (i.e. different)
// sample names. Without this check, the founderIds won't be found in the vc and the annotation won't be calculated.
protected void checkSampleNames(final VariantContext vc) {
Set<String> vcSamples = new HashSet<>();
vcSamples.addAll(vc.getSampleNames());
if (!vcSamples.isEmpty()) {
if (founderIds!=null) {
vcSamples.removeAll(founderIds);
if (vcSamples.equals(vc.getSampleNames()))
founderIds = vc.getSampleNames();
}
}
}
@Override @Override
public List<String> getKeyNames() { return Collections.singletonList(GATKVCFConstants.INBREEDING_COEFFICIENT_KEY); } public List<String> getKeyNames() { return Collections.singletonList(GATKVCFConstants.INBREEDING_COEFFICIENT_KEY); }

View File

@ -293,7 +293,7 @@ public class CombineGVCFs extends RodWalker<CombineGVCFs.PositionalState, Combin
// we need the specialized merge if the site contains anything other than ref blocks // we need the specialized merge if the site contains anything other than ref blocks
final VariantContext mergedVC; final VariantContext mergedVC;
if ( containsTrueAltAllele(stoppedVCs) ) if ( containsTrueAltAllele(stoppedVCs) )
mergedVC = ReferenceConfidenceVariantContextMerger.merge(stoppedVCs, gLoc, refBase, false); mergedVC = ReferenceConfidenceVariantContextMerger.merge(stoppedVCs, gLoc, refBase, false, false);
else else
mergedVC = referenceBlockMerge(stoppedVCs, state, pos.getStart()); mergedVC = referenceBlockMerge(stoppedVCs, state, pos.getStart());

View File

@ -60,6 +60,7 @@ import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection;
import org.broadinstitute.gatk.engine.arguments.GenotypeCalculationArgumentCollection; import org.broadinstitute.gatk.engine.arguments.GenotypeCalculationArgumentCollection;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext; import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext; import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.genotyper.IndexedSampleList; import org.broadinstitute.gatk.utils.genotyper.IndexedSampleList;
import org.broadinstitute.gatk.utils.genotyper.SampleList; import org.broadinstitute.gatk.utils.genotyper.SampleList;
import org.broadinstitute.gatk.utils.genotyper.SampleListUtils; import org.broadinstitute.gatk.utils.genotyper.SampleListUtils;
@ -137,6 +138,14 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
@Argument(fullName="includeNonVariantSites", shortName="allSites", doc="Include loci found to be non-variant after genotyping", required=false) @Argument(fullName="includeNonVariantSites", shortName="allSites", doc="Include loci found to be non-variant after genotyping", required=false)
public boolean INCLUDE_NON_VARIANTS = false; public boolean INCLUDE_NON_VARIANTS = false;
/**
* Uniquify all sample names (intended for use with multiple inputs for the same sample)
*/
@Hidden
@Advanced
@Argument(fullName="uniquifySamples", shortName="uniquifySamples", doc="Assume duplicate samples are present and uniquify all names with '.variant' and file number index")
public boolean uniquifySamples = false;
@ArgumentCollection @ArgumentCollection
public GenotypeCalculationArgumentCollection genotypeArgs = new GenotypeCalculationArgumentCollection(); public GenotypeCalculationArgumentCollection genotypeArgs = new GenotypeCalculationArgumentCollection();
@ -166,13 +175,33 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
public void initialize() { public void initialize() {
boolean inputsAreTagged = false;
// collect the actual rod bindings into a list for use later // collect the actual rod bindings into a list for use later
for ( final RodBindingCollection<VariantContext> variantCollection : variantCollections ) for ( final RodBindingCollection<VariantContext> variantCollection : variantCollections ) {
variants.addAll(variantCollection.getRodBindings()); variants.addAll(variantCollection.getRodBindings());
if (uniquifySamples) {
for (final RodBinding<VariantContext> rb : variantCollection.getRodBindings()) {
//are inputs passed in with -V:fileTag ?
if (!rb.getTags().isEmpty()) inputsAreTagged = true;
}
}
}
//RodBinding tags are used in sample uniquification
if (inputsAreTagged)
logger.warn("Output uniquified VCF may not be suitable for input to CombineSampleData because input VCF(s) contain tags.");
final GenomeAnalysisEngine toolkit = getToolkit(); final GenomeAnalysisEngine toolkit = getToolkit();
final Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(toolkit, variants); final Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(toolkit, variants);
final SampleList samples = new IndexedSampleList(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
final GATKVariantContextUtils.GenotypeMergeType mergeType;
if(uniquifySamples) {
mergeType = GATKVariantContextUtils.GenotypeMergeType.UNIQUIFY;
}
else
mergeType = GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE;
final SampleList samples = new IndexedSampleList(SampleUtils.getSampleList(vcfRods, mergeType));
// create the genotyping engine // create the genotyping engine
genotypingEngine = new UnifiedGenotypingEngine(createUAC(), samples, toolkit.getGenomeLocParser(), GeneralPloidyFailOverAFCalculatorProvider.createThreadSafeProvider(toolkit, genotypeArgs, logger), genotypingEngine = new UnifiedGenotypingEngine(createUAC(), samples, toolkit.getGenomeLocParser(), GeneralPloidyFailOverAFCalculatorProvider.createThreadSafeProvider(toolkit, genotypeArgs, logger),
toolkit.getArguments().BAQMode); toolkit.getArguments().BAQMode);
@ -201,7 +230,7 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
return null; return null;
final GenomeLoc loc = ref.getLocus(); final GenomeLoc loc = ref.getLocus();
final VariantContext combinedVC = ReferenceConfidenceVariantContextMerger.merge(tracker.getPrioritizedValue(variants, loc), loc, INCLUDE_NON_VARIANTS ? ref.getBase() : null, true); final VariantContext combinedVC = ReferenceConfidenceVariantContextMerger.merge(tracker.getPrioritizedValue(variants, loc), loc, INCLUDE_NON_VARIANTS ? ref.getBase() : null, true, uniquifySamples);
if ( combinedVC == null ) if ( combinedVC == null )
return null; return null;
return regenotypeVC(tracker, ref, combinedVC); return regenotypeVC(tracker, ref, combinedVC);
@ -336,6 +365,10 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
return recoveredGs; return recoveredGs;
} }
private void checkRODtags() {
}
/** /**
* Creates a UnifiedArgumentCollection with appropriate values filled in from the arguments in this walker * Creates a UnifiedArgumentCollection with appropriate values filled in from the arguments in this walker
* @return a complete UnifiedArgumentCollection * @return a complete UnifiedArgumentCollection

View File

@ -83,9 +83,11 @@ public class ReferenceConfidenceVariantContextMerger {
* @param loc the current location * @param loc the current location
* @param refBase the reference allele to use if all contexts in the VC are spanning (i.e. don't start at the location in loc); if null, we'll return null in this case * @param refBase the reference allele to use if all contexts in the VC are spanning (i.e. don't start at the location in loc); if null, we'll return null in this case
* @param removeNonRefSymbolicAllele if true, remove the <NON_REF> allele from the merged VC * @param removeNonRefSymbolicAllele if true, remove the <NON_REF> allele from the merged VC
* @param samplesAreUniquified if true, sample names have been uniquified
* @return new VariantContext representing the merge of all VCs or null if it not relevant * @return new VariantContext representing the merge of all VCs or null if it not relevant
*/ */
public static VariantContext merge(final List<VariantContext> VCs, final GenomeLoc loc, final Byte refBase, final boolean removeNonRefSymbolicAllele) { public static VariantContext merge(final List<VariantContext> VCs, final GenomeLoc loc, final Byte refBase, final boolean removeNonRefSymbolicAllele,
final boolean samplesAreUniquified) {
// this can happen if e.g. you are using a dbSNP file that spans a region with no gVCFs // this can happen if e.g. you are using a dbSNP file that spans a region with no gVCFs
if ( VCs == null || VCs.size() == 0 ) if ( VCs == null || VCs.size() == 0 )
return null; return null;
@ -133,7 +135,7 @@ public class ReferenceConfidenceVariantContextMerger {
final VariantContext vc = pair.getFirst(); final VariantContext vc = pair.getFirst();
final List<Allele> remappedAlleles = pair.getSecond(); final List<Allele> remappedAlleles = pair.getSecond();
mergeRefConfidenceGenotypes(genotypes, vc, remappedAlleles, allelesList); mergeRefConfidenceGenotypes(genotypes, vc, remappedAlleles, allelesList, samplesAreUniquified);
// special case DP (add it up) for all events // special case DP (add it up) for all events
if ( vc.hasAttribute(VCFConstants.DEPTH_KEY) ) { if ( vc.hasAttribute(VCFConstants.DEPTH_KEY) ) {
@ -307,11 +309,13 @@ public class ReferenceConfidenceVariantContextMerger {
* @param VC the Variant Context for the sample * @param VC the Variant Context for the sample
* @param remappedAlleles the list of remapped alleles for the sample * @param remappedAlleles the list of remapped alleles for the sample
* @param targetAlleles the list of target alleles * @param targetAlleles the list of target alleles
* @param samplesAreUniquified true if sample names have been uniquified
*/ */
private static void mergeRefConfidenceGenotypes(final GenotypesContext mergedGenotypes, private static void mergeRefConfidenceGenotypes(final GenotypesContext mergedGenotypes,
final VariantContext VC, final VariantContext VC,
final List<Allele> remappedAlleles, final List<Allele> remappedAlleles,
final List<Allele> targetAlleles) { final List<Allele> targetAlleles,
final boolean samplesAreUniquified) {
final int maximumPloidy = VC.getMaxPloidy(GATKVariantContextUtils.DEFAULT_PLOIDY); final int maximumPloidy = VC.getMaxPloidy(GATKVariantContextUtils.DEFAULT_PLOIDY);
// the map is different depending on the ploidy, so in order to keep this method flexible (mixed ploidies) // the map is different depending on the ploidy, so in order to keep this method flexible (mixed ploidies)
// we need to get a map done (lazily inside the loop) for each ploidy, up to the maximum possible. // we need to get a map done (lazily inside the loop) for each ploidy, up to the maximum possible.
@ -320,11 +324,14 @@ public class ReferenceConfidenceVariantContextMerger {
int[] perSampleIndexesOfRelevantAlleles; int[] perSampleIndexesOfRelevantAlleles;
for ( final Genotype g : VC.getGenotypes() ) { for ( final Genotype g : VC.getGenotypes() ) {
final String name = g.getSampleName(); final String name;
if ( mergedGenotypes.containsSample(name) ) if (samplesAreUniquified)
continue; name = g.getSampleName() + "." + VC.getSource();
else
name = g.getSampleName();
final int ploidy = g.getPloidy(); final int ploidy = g.getPloidy();
final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(g).alleles(GATKVariantContextUtils.noCallAlleles(g.getPloidy())); final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(g).alleles(GATKVariantContextUtils.noCallAlleles(g.getPloidy()));
genotypeBuilder.name(name);
if (g.hasPL()) { if (g.hasPL()) {
// lazy initialization of the genotype index map by ploidy. // lazy initialization of the genotype index map by ploidy.
perSampleIndexesOfRelevantAlleles = getIndexesOfRelevantAlleles(remappedAlleles, targetAlleles, VC.getStart(), g); perSampleIndexesOfRelevantAlleles = getIndexesOfRelevantAlleles(remappedAlleles, targetAlleles, VC.getStart(), g);

View File

@ -261,6 +261,23 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
Assert.assertTrue(gVCFList.containsAll(outputVCFList)); Assert.assertTrue(gVCFList.containsAll(outputVCFList));
} }
@Test
public void testUniquifiedSamples() throws IOException {
//two copies of 5 samples; will also test InbreedingCoeff calculation for uniquified samples
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -V:sample1 " + privateTestDir + "combine.single.sample.pipeline.1.vcf" +
" -V:sample1B " + privateTestDir + "combine.single.sample.pipeline.1.vcf" +
" -V:sample2 " + privateTestDir + "combine.single.sample.pipeline.2.vcf" +
" -V:sample2B " + privateTestDir + "combine.single.sample.pipeline.2.vcf" +
" -V:combined1 " + privateTestDir + "combine.single.sample.pipeline.combined.vcf" +
" -V:combined2 " + privateTestDir + "combine.single.sample.pipeline.combined.vcf" +
" --uniquifySamples", b37KGReference),
1,
Arrays.asList("ef6a96d57434bb63935fa7d9f012da9f"));
executeTest("testUniquifiedSamples", spec);
}
/** /**
* Returns a list of attribute values from a VCF file * Returns a list of attribute values from a VCF file
* *

View File

@ -99,8 +99,9 @@ public class VariantContextMergerUnitTest extends BaseTest {
} }
@Test(dataProvider = "referenceConfidenceMergeData") @Test(dataProvider = "referenceConfidenceMergeData")
public void testReferenceConfidenceMerge(final String testID, final List<VariantContext> toMerge, final GenomeLoc loc, final boolean returnSiteEvenIfMonomorphic, final VariantContext expectedResult) { public void testReferenceConfidenceMerge(final String testID, final List<VariantContext> toMerge, final GenomeLoc loc,
final VariantContext result = ReferenceConfidenceVariantContextMerger.merge(toMerge, loc, returnSiteEvenIfMonomorphic ? (byte) 'A' : null, true); final boolean returnSiteEvenIfMonomorphic, final boolean uniquifySamples, final VariantContext expectedResult) {
final VariantContext result = ReferenceConfidenceVariantContextMerger.merge(toMerge, loc, returnSiteEvenIfMonomorphic ? (byte) 'A' : null, true, uniquifySamples);
if ( result == null ) { if ( result == null ) {
Assert.assertTrue(expectedResult == null); Assert.assertTrue(expectedResult == null);
return; return;
@ -171,6 +172,7 @@ public class VariantContextMergerUnitTest extends BaseTest {
final int start = 10; final int start = 10;
final GenomeLoc loc = new UnvalidatingGenomeLoc("20", 0, start, start); final GenomeLoc loc = new UnvalidatingGenomeLoc("20", 0, start, start);
final VariantContext VCbase = new VariantContextBuilder("test", "20", start, start, Arrays.asList(Aref)).make(); final VariantContext VCbase = new VariantContextBuilder("test", "20", start, start, Arrays.asList(Aref)).make();
final VariantContext VCbase2 = new VariantContextBuilder("test2", "20", start, start, Arrays.asList(Aref)).make();
final VariantContext VCprevBase = new VariantContextBuilder("test", "20", start-1, start-1, Arrays.asList(Aref)).make(); final VariantContext VCprevBase = new VariantContextBuilder("test", "20", start-1, start-1, Arrays.asList(Aref)).make();
final int[] standardPLs = new int[]{30, 20, 10, 71, 72, 73}; final int[] standardPLs = new int[]{30, 20, 10, 71, 72, 73};
@ -183,26 +185,34 @@ public class VariantContextMergerUnitTest extends BaseTest {
final List<Allele> A_ALT = Arrays.asList(Aref, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE); final List<Allele> A_ALT = Arrays.asList(Aref, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
final Genotype gA_ALT = new GenotypeBuilder("A").PL(new int[]{0, 100, 1000}).alleles(noCalls).make(); final Genotype gA_ALT = new GenotypeBuilder("A").PL(new int[]{0, 100, 1000}).alleles(noCalls).make();
final VariantContext vcA_ALT = new VariantContextBuilder(VCbase).alleles(A_ALT).genotypes(gA_ALT).make(); final VariantContext vcA_ALT = new VariantContextBuilder(VCbase).alleles(A_ALT).genotypes(gA_ALT).make();
final Allele AAref = Allele.create("AA", true); final Allele AAref = Allele.create("AA", true);
final List<Allele> AA_ALT = Arrays.asList(AAref, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE); final List<Allele> AA_ALT = Arrays.asList(AAref, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
final Genotype gAA_ALT = new GenotypeBuilder("AA").PL(new int[]{0, 80, 800}).alleles(noCalls).make(); final Genotype gAA_ALT = new GenotypeBuilder("AA").PL(new int[]{0, 80, 800}).alleles(noCalls).make();
final VariantContext vcAA_ALT = new VariantContextBuilder(VCprevBase).alleles(AA_ALT).genotypes(gAA_ALT).make(); final VariantContext vcAA_ALT = new VariantContextBuilder(VCprevBase).alleles(AA_ALT).genotypes(gAA_ALT).make();
final List<Allele> A_C = Arrays.asList(Aref, C); final List<Allele> A_C = Arrays.asList(Aref, C);
final Genotype gA_C = new GenotypeBuilder("A_C").PL(new int[]{30, 20, 10}).alleles(noCalls).make(); final Genotype gA_C = new GenotypeBuilder("A_C").PL(new int[]{30, 20, 10}).alleles(noCalls).make();
final List<Allele> A_C_ALT = Arrays.asList(Aref, C, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE); final List<Allele> A_C_ALT = Arrays.asList(Aref, C, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
final Genotype gA_C_ALT = new GenotypeBuilder("A_C").PL(standardPLs).alleles(noCalls).make(); final Genotype gA_C_ALT = new GenotypeBuilder("A_C").PL(standardPLs).alleles(noCalls).make();
final VariantContext vcA_C = new VariantContextBuilder(VCbase2).alleles(A_C_ALT).genotypes(gA_C).make();
final VariantContext vcA_C_ALT = new VariantContextBuilder(VCbase).alleles(A_C_ALT).genotypes(gA_C_ALT).make(); final VariantContext vcA_C_ALT = new VariantContextBuilder(VCbase).alleles(A_C_ALT).genotypes(gA_C_ALT).make();
final List<Allele> A_G_ALT = Arrays.asList(Aref, G, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE); final List<Allele> A_G_ALT = Arrays.asList(Aref, G, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
final Genotype gA_G_ALT = new GenotypeBuilder("A_G").PL(standardPLs).alleles(noCalls).make(); final Genotype gA_G_ALT = new GenotypeBuilder("A_G").PL(standardPLs).alleles(noCalls).make();
final VariantContext vcA_G_ALT = new VariantContextBuilder(VCbase).alleles(A_G_ALT).genotypes(gA_G_ALT).make(); final VariantContext vcA_G_ALT = new VariantContextBuilder(VCbase).alleles(A_G_ALT).genotypes(gA_G_ALT).make();
final List<Allele> A_C_G = Arrays.asList(Aref, C, G); final List<Allele> A_C_G = Arrays.asList(Aref, C, G);
final Genotype gA_C_G = new GenotypeBuilder("A_C_G").PL(new int[]{40, 20, 30, 20, 10, 30}).alleles(noCalls).make(); final Genotype gA_C_G = new GenotypeBuilder("A_C_G").PL(new int[]{40, 20, 30, 20, 10, 30}).alleles(noCalls).make();
final List<Allele> A_C_G_ALT = Arrays.asList(Aref, C, G, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE); final List<Allele> A_C_G_ALT = Arrays.asList(Aref, C, G, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
final Genotype gA_C_G_ALT = new GenotypeBuilder("A_C_G").PL(new int[]{40, 20, 30, 20, 10, 30, 71, 72, 73, 74}).alleles(noCalls).make(); final Genotype gA_C_G_ALT = new GenotypeBuilder("A_C_G").PL(new int[]{40, 20, 30, 20, 10, 30, 71, 72, 73, 74}).alleles(noCalls).make();
final VariantContext vcA_C_G = new VariantContextBuilder(VCbase2).alleles(A_C_G_ALT).genotypes(gA_C_G).make();
final VariantContext vcA_C_G_ALT = new VariantContextBuilder(VCbase).alleles(A_C_G_ALT).genotypes(gA_C_G_ALT).make(); final VariantContext vcA_C_G_ALT = new VariantContextBuilder(VCbase).alleles(A_C_G_ALT).genotypes(gA_C_G_ALT).make();
final List<Allele> A_ATC_ALT = Arrays.asList(Aref, ATC, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE); final List<Allele> A_ATC_ALT = Arrays.asList(Aref, ATC, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
final Genotype gA_ATC_ALT = new GenotypeBuilder("A_ATC").PL(standardPLs).alleles(noCalls).make(); final Genotype gA_ATC_ALT = new GenotypeBuilder("A_ATC").PL(standardPLs).alleles(noCalls).make();
final VariantContext vcA_ATC_ALT = new VariantContextBuilder(VCbase).alleles(A_ATC_ALT).genotypes(gA_ATC_ALT).make(); final VariantContext vcA_ATC_ALT = new VariantContextBuilder(VCbase).alleles(A_ATC_ALT).genotypes(gA_ATC_ALT).make();
final Allele A = Allele.create("A", false); final Allele A = Allele.create("A", false);
final List<Allele> AA_A_ALT = Arrays.asList(AAref, A, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE); final List<Allele> AA_A_ALT = Arrays.asList(AAref, A, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
final Genotype gAA_A_ALT = new GenotypeBuilder("AA_A").PL(standardPLs).alleles(noCalls).make(); final Genotype gAA_A_ALT = new GenotypeBuilder("AA_A").PL(standardPLs).alleles(noCalls).make();
@ -210,40 +220,40 @@ public class VariantContextMergerUnitTest extends BaseTest {
// first test the case of a single record // first test the case of a single record
tests.add(new Object[]{"test00",Arrays.asList(vcA_C_ALT), tests.add(new Object[]{"test00",Arrays.asList(vcA_C_ALT),
loc, false, loc, false, false,
new VariantContextBuilder(VCbase).alleles(A_C).genotypes(gA_C).make()}); new VariantContextBuilder(VCbase).alleles(A_C).genotypes(gA_C).make()});
// now, test pairs: // now, test pairs:
// a SNP with another SNP // a SNP with another SNP
tests.add(new Object[]{"test01",Arrays.asList(vcA_C_ALT, vcA_G_ALT), tests.add(new Object[]{"test01",Arrays.asList(vcA_C_ALT, vcA_G_ALT),
loc, false, loc, false, false,
new VariantContextBuilder(VCbase).alleles(A_C_G).genotypes(gA_C_ALT, new GenotypeBuilder("A_G").PL(reorderedSecondAllelePLs).alleles(noCalls).make()).make()}); new VariantContextBuilder(VCbase).alleles(A_C_G).genotypes(gA_C_ALT, new GenotypeBuilder("A_G").PL(reorderedSecondAllelePLs).alleles(noCalls).make()).make()});
// a SNP with an indel // a SNP with an indel
tests.add(new Object[]{"test02",Arrays.asList(vcA_C_ALT, vcA_ATC_ALT), tests.add(new Object[]{"test02",Arrays.asList(vcA_C_ALT, vcA_ATC_ALT),
loc, false, loc, false, false,
new VariantContextBuilder(VCbase).alleles(Arrays.asList(Aref, C, ATC)).genotypes(gA_C_ALT, new GenotypeBuilder("A_ATC").PL(reorderedSecondAllelePLs).alleles(noCalls).make()).make()}); new VariantContextBuilder(VCbase).alleles(Arrays.asList(Aref, C, ATC)).genotypes(gA_C_ALT, new GenotypeBuilder("A_ATC").PL(reorderedSecondAllelePLs).alleles(noCalls).make()).make()});
// a SNP with 2 SNPs // a SNP with 2 SNPs
tests.add(new Object[]{"test03",Arrays.asList(vcA_C_ALT, vcA_C_G_ALT), tests.add(new Object[]{"test03",Arrays.asList(vcA_C_ALT, vcA_C_G_ALT),
loc, false, loc, false, false,
new VariantContextBuilder(VCbase).alleles(A_C_G).genotypes(gA_C_ALT, gA_C_G).make()}); new VariantContextBuilder(VCbase).alleles(A_C_G).genotypes(gA_C_ALT, gA_C_G).make()});
// a SNP with a ref record // a SNP with a ref record
tests.add(new Object[]{"test04",Arrays.asList(vcA_C_ALT, vcA_ALT), tests.add(new Object[]{"test04",Arrays.asList(vcA_C_ALT, vcA_ALT),
loc, false, loc, false, false,
new VariantContextBuilder(VCbase).alleles(A_C).genotypes(gA_C, gA_ALT).make()}); new VariantContextBuilder(VCbase).alleles(A_C).genotypes(gA_C, gA_ALT).make()});
// spanning records: // spanning records:
// a SNP with a spanning ref record // a SNP with a spanning ref record
tests.add(new Object[]{"test05",Arrays.asList(vcA_C_ALT, vcAA_ALT), tests.add(new Object[]{"test05",Arrays.asList(vcA_C_ALT, vcAA_ALT),
loc, false, loc, false, false,
new VariantContextBuilder(VCbase).alleles(A_C).genotypes(gA_C, gAA_ALT).make()}); new VariantContextBuilder(VCbase).alleles(A_C).genotypes(gA_C, gAA_ALT).make()});
// a SNP with a spanning deletion // a SNP with a spanning deletion
tests.add(new Object[]{"test06",Arrays.asList(vcA_C_ALT, vcAA_A_ALT), tests.add(new Object[]{"test06",Arrays.asList(vcA_C_ALT, vcAA_A_ALT),
loc, false, loc, false, false,
new VariantContextBuilder(VCbase).alleles(A_C).genotypes(gA_C, new GenotypeBuilder("AA_A").PL(new int[]{30, 71, 73}).alleles(noCalls).make()).make()}); new VariantContextBuilder(VCbase).alleles(A_C).genotypes(gA_C, new GenotypeBuilder("AA_A").PL(new int[]{30, 71, 73}).alleles(noCalls).make()).make()});
// combination of all // combination of all
tests.add(new Object[]{"test07",Arrays.asList(vcA_C_ALT, vcA_G_ALT, vcA_ATC_ALT, vcA_C_G_ALT, vcA_ALT, vcAA_ALT, vcAA_A_ALT), tests.add(new Object[]{"test07",Arrays.asList(vcA_C_ALT, vcA_G_ALT, vcA_ATC_ALT, vcA_C_G_ALT, vcA_ALT, vcAA_ALT, vcAA_A_ALT),
loc, false, loc, false, false,
new VariantContextBuilder(VCbase).alleles(Arrays.asList(Aref, C, G, ATC)).genotypes(new GenotypeBuilder("A_C").PL(new int[]{30, 20, 10, 71, 72, 73, 71, 72, 73, 73}).alleles(noCalls).make(), new VariantContextBuilder(VCbase).alleles(Arrays.asList(Aref, C, G, ATC)).genotypes(new GenotypeBuilder("A_C").PL(new int[]{30, 20, 10, 71, 72, 73, 71, 72, 73, 73}).alleles(noCalls).make(),
new GenotypeBuilder("A_G").PL(new int[]{30, 71, 73, 20, 72, 10, 71, 73, 72, 73}).alleles(noCalls).make(), new GenotypeBuilder("A_G").PL(new int[]{30, 71, 73, 20, 72, 10, 71, 73, 72, 73}).alleles(noCalls).make(),
new GenotypeBuilder("A_ATC").PL(new int[]{30, 71, 73, 71, 73, 73, 20, 72, 72, 10}).alleles(noCalls).make(), new GenotypeBuilder("A_ATC").PL(new int[]{30, 71, 73, 71, 73, 73, 20, 72, 72, 10}).alleles(noCalls).make(),
@ -255,12 +265,23 @@ public class VariantContextMergerUnitTest extends BaseTest {
// just spanning ref contexts, trying both instances where we want/do not want ref-only contexts // just spanning ref contexts, trying both instances where we want/do not want ref-only contexts
tests.add(new Object[]{"test08",Arrays.asList(vcAA_ALT), tests.add(new Object[]{"test08",Arrays.asList(vcAA_ALT),
loc, false, loc, false, false,
null}); null});
tests.add(new Object[]{"test09", Arrays.asList(vcAA_ALT), tests.add(new Object[]{"test09", Arrays.asList(vcAA_ALT),
loc, true, loc, true, false,
new VariantContextBuilder(VCbase).alleles(Arrays.asList(Allele.create("A", true))).genotypes(new GenotypeBuilder("AA").PL(new int[]{0}).alleles(noCalls).make()).make()}); new VariantContextBuilder(VCbase).alleles(Arrays.asList(Allele.create("A", true))).genotypes(new GenotypeBuilder("AA").PL(new int[]{0}).alleles(noCalls).make()).make()});
// test uniquification of sample names
tests.add(new Object[]{"test10",Arrays.asList(vcA_C, vcA_C_ALT), loc, false, true,
new VariantContextBuilder(VCbase).alleles(A_C).genotypes(
new GenotypeBuilder("A_C.test2").PL(new int[]{30, 20, 10}).alleles(noCalls).make(),
new GenotypeBuilder("A_C.test").PL(new int[]{30, 20, 10}).alleles(noCalls).make()).make()});
tests.add(new Object[]{"test11",Arrays.asList(vcA_C_G, vcA_C_G_ALT), loc, false, true,
new VariantContextBuilder(VCbase).alleles(A_C_G).genotypes(
new GenotypeBuilder("A_C_G.test2").PL(new int[]{40, 20, 30, 20, 10, 30}).alleles(noCalls).make(),
new GenotypeBuilder("A_C_G.test").PL(new int[]{40, 20, 30, 20, 10, 30}).alleles(noCalls).make()).make()});
final Object[][] result = tests.toArray(new Object[][]{}); final Object[][] result = tests.toArray(new Object[][]{});
return result; return result;
} }