Merge pull request #812 from broadinstitute/ldg_combineData_submit
New walker to combine WGS and WES data
This commit is contained in:
commit
f8f2680142
|
|
@ -92,6 +92,7 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
|
||||||
private Set<String> founderIds;
|
private Set<String> founderIds;
|
||||||
private int sampleCount;
|
private int sampleCount;
|
||||||
private boolean pedigreeCheckWarningLogged = false;
|
private boolean pedigreeCheckWarningLogged = false;
|
||||||
|
private boolean didUniquifiedSampleNameCheck = false;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
|
|
@ -104,6 +105,11 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
|
||||||
if(founderIds == null && walker != null) {
|
if(founderIds == null && walker != null) {
|
||||||
founderIds = ((Walker) walker).getSampleDB().getFounderIds();
|
founderIds = ((Walker) walker).getSampleDB().getFounderIds();
|
||||||
}
|
}
|
||||||
|
//if none of the "founders" are in the vc samples, assume we uniquified the samples upstream and they are all founders
|
||||||
|
if (!didUniquifiedSampleNameCheck) {
|
||||||
|
checkSampleNames(vc);
|
||||||
|
didUniquifiedSampleNameCheck = true;
|
||||||
|
}
|
||||||
if ( founderIds == null || founderIds.isEmpty() ) {
|
if ( founderIds == null || founderIds.isEmpty() ) {
|
||||||
if ( !pedigreeCheckWarningLogged ) {
|
if ( !pedigreeCheckWarningLogged ) {
|
||||||
logger.warn("Annotation will not be calculated, must provide a valid PED file (-ped) from the command line.");
|
logger.warn("Annotation will not be calculated, must provide a valid PED file (-ped) from the command line.");
|
||||||
|
|
@ -181,6 +187,22 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
|
||||||
return Collections.singletonMap(getKeyNames().get(0), (Object)String.format("%.4f", F));
|
return Collections.singletonMap(getKeyNames().get(0), (Object)String.format("%.4f", F));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//this method is intended to reconcile uniquified sample names
|
||||||
|
// it comes into play when calling this annotation from GenotypeGVCFs with --uniquifySamples because founderIds
|
||||||
|
// is derived from the sampleDB, which comes from the input sample names, but vc will have uniquified (i.e. different)
|
||||||
|
// sample names. Without this check, the founderIds won't be found in the vc and the annotation won't be calculated.
|
||||||
|
protected void checkSampleNames(final VariantContext vc) {
|
||||||
|
Set<String> vcSamples = new HashSet<>();
|
||||||
|
vcSamples.addAll(vc.getSampleNames());
|
||||||
|
if (!vcSamples.isEmpty()) {
|
||||||
|
if (founderIds!=null) {
|
||||||
|
vcSamples.removeAll(founderIds);
|
||||||
|
if (vcSamples.equals(vc.getSampleNames()))
|
||||||
|
founderIds = vc.getSampleNames();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getKeyNames() { return Collections.singletonList(GATKVCFConstants.INBREEDING_COEFFICIENT_KEY); }
|
public List<String> getKeyNames() { return Collections.singletonList(GATKVCFConstants.INBREEDING_COEFFICIENT_KEY); }
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -293,7 +293,7 @@ public class CombineGVCFs extends RodWalker<CombineGVCFs.PositionalState, Combin
|
||||||
// we need the specialized merge if the site contains anything other than ref blocks
|
// we need the specialized merge if the site contains anything other than ref blocks
|
||||||
final VariantContext mergedVC;
|
final VariantContext mergedVC;
|
||||||
if ( containsTrueAltAllele(stoppedVCs) )
|
if ( containsTrueAltAllele(stoppedVCs) )
|
||||||
mergedVC = ReferenceConfidenceVariantContextMerger.merge(stoppedVCs, gLoc, refBase, false);
|
mergedVC = ReferenceConfidenceVariantContextMerger.merge(stoppedVCs, gLoc, refBase, false, false);
|
||||||
else
|
else
|
||||||
mergedVC = referenceBlockMerge(stoppedVCs, state, pos.getStart());
|
mergedVC = referenceBlockMerge(stoppedVCs, state, pos.getStart());
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -60,6 +60,7 @@ import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection;
|
||||||
import org.broadinstitute.gatk.engine.arguments.GenotypeCalculationArgumentCollection;
|
import org.broadinstitute.gatk.engine.arguments.GenotypeCalculationArgumentCollection;
|
||||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.gatk.utils.genotyper.IndexedSampleList;
|
import org.broadinstitute.gatk.utils.genotyper.IndexedSampleList;
|
||||||
import org.broadinstitute.gatk.utils.genotyper.SampleList;
|
import org.broadinstitute.gatk.utils.genotyper.SampleList;
|
||||||
import org.broadinstitute.gatk.utils.genotyper.SampleListUtils;
|
import org.broadinstitute.gatk.utils.genotyper.SampleListUtils;
|
||||||
|
|
@ -137,6 +138,14 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
||||||
@Argument(fullName="includeNonVariantSites", shortName="allSites", doc="Include loci found to be non-variant after genotyping", required=false)
|
@Argument(fullName="includeNonVariantSites", shortName="allSites", doc="Include loci found to be non-variant after genotyping", required=false)
|
||||||
public boolean INCLUDE_NON_VARIANTS = false;
|
public boolean INCLUDE_NON_VARIANTS = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Uniquify all sample names (intended for use with multiple inputs for the same sample)
|
||||||
|
*/
|
||||||
|
@Hidden
|
||||||
|
@Advanced
|
||||||
|
@Argument(fullName="uniquifySamples", shortName="uniquifySamples", doc="Assume duplicate samples are present and uniquify all names with '.variant' and file number index")
|
||||||
|
public boolean uniquifySamples = false;
|
||||||
|
|
||||||
@ArgumentCollection
|
@ArgumentCollection
|
||||||
public GenotypeCalculationArgumentCollection genotypeArgs = new GenotypeCalculationArgumentCollection();
|
public GenotypeCalculationArgumentCollection genotypeArgs = new GenotypeCalculationArgumentCollection();
|
||||||
|
|
||||||
|
|
@ -166,13 +175,33 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
||||||
|
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
|
boolean inputsAreTagged = false;
|
||||||
|
|
||||||
// collect the actual rod bindings into a list for use later
|
// collect the actual rod bindings into a list for use later
|
||||||
for ( final RodBindingCollection<VariantContext> variantCollection : variantCollections )
|
for ( final RodBindingCollection<VariantContext> variantCollection : variantCollections ) {
|
||||||
variants.addAll(variantCollection.getRodBindings());
|
variants.addAll(variantCollection.getRodBindings());
|
||||||
|
if (uniquifySamples) {
|
||||||
|
for (final RodBinding<VariantContext> rb : variantCollection.getRodBindings()) {
|
||||||
|
//are inputs passed in with -V:fileTag ?
|
||||||
|
if (!rb.getTags().isEmpty()) inputsAreTagged = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//RodBinding tags are used in sample uniquification
|
||||||
|
if (inputsAreTagged)
|
||||||
|
logger.warn("Output uniquified VCF may not be suitable for input to CombineSampleData because input VCF(s) contain tags.");
|
||||||
|
|
||||||
final GenomeAnalysisEngine toolkit = getToolkit();
|
final GenomeAnalysisEngine toolkit = getToolkit();
|
||||||
final Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(toolkit, variants);
|
final Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(toolkit, variants);
|
||||||
final SampleList samples = new IndexedSampleList(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
|
|
||||||
|
final GATKVariantContextUtils.GenotypeMergeType mergeType;
|
||||||
|
if(uniquifySamples) {
|
||||||
|
mergeType = GATKVariantContextUtils.GenotypeMergeType.UNIQUIFY;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
mergeType = GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE;
|
||||||
|
|
||||||
|
final SampleList samples = new IndexedSampleList(SampleUtils.getSampleList(vcfRods, mergeType));
|
||||||
// create the genotyping engine
|
// create the genotyping engine
|
||||||
genotypingEngine = new UnifiedGenotypingEngine(createUAC(), samples, toolkit.getGenomeLocParser(), GeneralPloidyFailOverAFCalculatorProvider.createThreadSafeProvider(toolkit, genotypeArgs, logger),
|
genotypingEngine = new UnifiedGenotypingEngine(createUAC(), samples, toolkit.getGenomeLocParser(), GeneralPloidyFailOverAFCalculatorProvider.createThreadSafeProvider(toolkit, genotypeArgs, logger),
|
||||||
toolkit.getArguments().BAQMode);
|
toolkit.getArguments().BAQMode);
|
||||||
|
|
@ -201,7 +230,7 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
final GenomeLoc loc = ref.getLocus();
|
final GenomeLoc loc = ref.getLocus();
|
||||||
final VariantContext combinedVC = ReferenceConfidenceVariantContextMerger.merge(tracker.getPrioritizedValue(variants, loc), loc, INCLUDE_NON_VARIANTS ? ref.getBase() : null, true);
|
final VariantContext combinedVC = ReferenceConfidenceVariantContextMerger.merge(tracker.getPrioritizedValue(variants, loc), loc, INCLUDE_NON_VARIANTS ? ref.getBase() : null, true, uniquifySamples);
|
||||||
if ( combinedVC == null )
|
if ( combinedVC == null )
|
||||||
return null;
|
return null;
|
||||||
return regenotypeVC(tracker, ref, combinedVC);
|
return regenotypeVC(tracker, ref, combinedVC);
|
||||||
|
|
@ -336,6 +365,10 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
||||||
return recoveredGs;
|
return recoveredGs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void checkRODtags() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a UnifiedArgumentCollection with appropriate values filled in from the arguments in this walker
|
* Creates a UnifiedArgumentCollection with appropriate values filled in from the arguments in this walker
|
||||||
* @return a complete UnifiedArgumentCollection
|
* @return a complete UnifiedArgumentCollection
|
||||||
|
|
|
||||||
|
|
@ -83,9 +83,11 @@ public class ReferenceConfidenceVariantContextMerger {
|
||||||
* @param loc the current location
|
* @param loc the current location
|
||||||
* @param refBase the reference allele to use if all contexts in the VC are spanning (i.e. don't start at the location in loc); if null, we'll return null in this case
|
* @param refBase the reference allele to use if all contexts in the VC are spanning (i.e. don't start at the location in loc); if null, we'll return null in this case
|
||||||
* @param removeNonRefSymbolicAllele if true, remove the <NON_REF> allele from the merged VC
|
* @param removeNonRefSymbolicAllele if true, remove the <NON_REF> allele from the merged VC
|
||||||
|
* @param samplesAreUniquified if true, sample names have been uniquified
|
||||||
* @return new VariantContext representing the merge of all VCs or null if it not relevant
|
* @return new VariantContext representing the merge of all VCs or null if it not relevant
|
||||||
*/
|
*/
|
||||||
public static VariantContext merge(final List<VariantContext> VCs, final GenomeLoc loc, final Byte refBase, final boolean removeNonRefSymbolicAllele) {
|
public static VariantContext merge(final List<VariantContext> VCs, final GenomeLoc loc, final Byte refBase, final boolean removeNonRefSymbolicAllele,
|
||||||
|
final boolean samplesAreUniquified) {
|
||||||
// this can happen if e.g. you are using a dbSNP file that spans a region with no gVCFs
|
// this can happen if e.g. you are using a dbSNP file that spans a region with no gVCFs
|
||||||
if ( VCs == null || VCs.size() == 0 )
|
if ( VCs == null || VCs.size() == 0 )
|
||||||
return null;
|
return null;
|
||||||
|
|
@ -133,7 +135,7 @@ public class ReferenceConfidenceVariantContextMerger {
|
||||||
final VariantContext vc = pair.getFirst();
|
final VariantContext vc = pair.getFirst();
|
||||||
final List<Allele> remappedAlleles = pair.getSecond();
|
final List<Allele> remappedAlleles = pair.getSecond();
|
||||||
|
|
||||||
mergeRefConfidenceGenotypes(genotypes, vc, remappedAlleles, allelesList);
|
mergeRefConfidenceGenotypes(genotypes, vc, remappedAlleles, allelesList, samplesAreUniquified);
|
||||||
|
|
||||||
// special case DP (add it up) for all events
|
// special case DP (add it up) for all events
|
||||||
if ( vc.hasAttribute(VCFConstants.DEPTH_KEY) ) {
|
if ( vc.hasAttribute(VCFConstants.DEPTH_KEY) ) {
|
||||||
|
|
@ -307,11 +309,13 @@ public class ReferenceConfidenceVariantContextMerger {
|
||||||
* @param VC the Variant Context for the sample
|
* @param VC the Variant Context for the sample
|
||||||
* @param remappedAlleles the list of remapped alleles for the sample
|
* @param remappedAlleles the list of remapped alleles for the sample
|
||||||
* @param targetAlleles the list of target alleles
|
* @param targetAlleles the list of target alleles
|
||||||
|
* @param samplesAreUniquified true if sample names have been uniquified
|
||||||
*/
|
*/
|
||||||
private static void mergeRefConfidenceGenotypes(final GenotypesContext mergedGenotypes,
|
private static void mergeRefConfidenceGenotypes(final GenotypesContext mergedGenotypes,
|
||||||
final VariantContext VC,
|
final VariantContext VC,
|
||||||
final List<Allele> remappedAlleles,
|
final List<Allele> remappedAlleles,
|
||||||
final List<Allele> targetAlleles) {
|
final List<Allele> targetAlleles,
|
||||||
|
final boolean samplesAreUniquified) {
|
||||||
final int maximumPloidy = VC.getMaxPloidy(GATKVariantContextUtils.DEFAULT_PLOIDY);
|
final int maximumPloidy = VC.getMaxPloidy(GATKVariantContextUtils.DEFAULT_PLOIDY);
|
||||||
// the map is different depending on the ploidy, so in order to keep this method flexible (mixed ploidies)
|
// the map is different depending on the ploidy, so in order to keep this method flexible (mixed ploidies)
|
||||||
// we need to get a map done (lazily inside the loop) for each ploidy, up to the maximum possible.
|
// we need to get a map done (lazily inside the loop) for each ploidy, up to the maximum possible.
|
||||||
|
|
@ -320,11 +324,14 @@ public class ReferenceConfidenceVariantContextMerger {
|
||||||
int[] perSampleIndexesOfRelevantAlleles;
|
int[] perSampleIndexesOfRelevantAlleles;
|
||||||
|
|
||||||
for ( final Genotype g : VC.getGenotypes() ) {
|
for ( final Genotype g : VC.getGenotypes() ) {
|
||||||
final String name = g.getSampleName();
|
final String name;
|
||||||
if ( mergedGenotypes.containsSample(name) )
|
if (samplesAreUniquified)
|
||||||
continue;
|
name = g.getSampleName() + "." + VC.getSource();
|
||||||
|
else
|
||||||
|
name = g.getSampleName();
|
||||||
final int ploidy = g.getPloidy();
|
final int ploidy = g.getPloidy();
|
||||||
final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(g).alleles(GATKVariantContextUtils.noCallAlleles(g.getPloidy()));
|
final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(g).alleles(GATKVariantContextUtils.noCallAlleles(g.getPloidy()));
|
||||||
|
genotypeBuilder.name(name);
|
||||||
if (g.hasPL()) {
|
if (g.hasPL()) {
|
||||||
// lazy initialization of the genotype index map by ploidy.
|
// lazy initialization of the genotype index map by ploidy.
|
||||||
perSampleIndexesOfRelevantAlleles = getIndexesOfRelevantAlleles(remappedAlleles, targetAlleles, VC.getStart(), g);
|
perSampleIndexesOfRelevantAlleles = getIndexesOfRelevantAlleles(remappedAlleles, targetAlleles, VC.getStart(), g);
|
||||||
|
|
|
||||||
|
|
@ -261,6 +261,23 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
||||||
Assert.assertTrue(gVCFList.containsAll(outputVCFList));
|
Assert.assertTrue(gVCFList.containsAll(outputVCFList));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUniquifiedSamples() throws IOException {
|
||||||
|
//two copies of 5 samples; will also test InbreedingCoeff calculation for uniquified samples
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
baseTestString(" -V:sample1 " + privateTestDir + "combine.single.sample.pipeline.1.vcf" +
|
||||||
|
" -V:sample1B " + privateTestDir + "combine.single.sample.pipeline.1.vcf" +
|
||||||
|
" -V:sample2 " + privateTestDir + "combine.single.sample.pipeline.2.vcf" +
|
||||||
|
" -V:sample2B " + privateTestDir + "combine.single.sample.pipeline.2.vcf" +
|
||||||
|
" -V:combined1 " + privateTestDir + "combine.single.sample.pipeline.combined.vcf" +
|
||||||
|
" -V:combined2 " + privateTestDir + "combine.single.sample.pipeline.combined.vcf" +
|
||||||
|
" --uniquifySamples", b37KGReference),
|
||||||
|
1,
|
||||||
|
Arrays.asList("ef6a96d57434bb63935fa7d9f012da9f"));
|
||||||
|
executeTest("testUniquifiedSamples", spec);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a list of attribute values from a VCF file
|
* Returns a list of attribute values from a VCF file
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -99,8 +99,9 @@ public class VariantContextMergerUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(dataProvider = "referenceConfidenceMergeData")
|
@Test(dataProvider = "referenceConfidenceMergeData")
|
||||||
public void testReferenceConfidenceMerge(final String testID, final List<VariantContext> toMerge, final GenomeLoc loc, final boolean returnSiteEvenIfMonomorphic, final VariantContext expectedResult) {
|
public void testReferenceConfidenceMerge(final String testID, final List<VariantContext> toMerge, final GenomeLoc loc,
|
||||||
final VariantContext result = ReferenceConfidenceVariantContextMerger.merge(toMerge, loc, returnSiteEvenIfMonomorphic ? (byte) 'A' : null, true);
|
final boolean returnSiteEvenIfMonomorphic, final boolean uniquifySamples, final VariantContext expectedResult) {
|
||||||
|
final VariantContext result = ReferenceConfidenceVariantContextMerger.merge(toMerge, loc, returnSiteEvenIfMonomorphic ? (byte) 'A' : null, true, uniquifySamples);
|
||||||
if ( result == null ) {
|
if ( result == null ) {
|
||||||
Assert.assertTrue(expectedResult == null);
|
Assert.assertTrue(expectedResult == null);
|
||||||
return;
|
return;
|
||||||
|
|
@ -171,6 +172,7 @@ public class VariantContextMergerUnitTest extends BaseTest {
|
||||||
final int start = 10;
|
final int start = 10;
|
||||||
final GenomeLoc loc = new UnvalidatingGenomeLoc("20", 0, start, start);
|
final GenomeLoc loc = new UnvalidatingGenomeLoc("20", 0, start, start);
|
||||||
final VariantContext VCbase = new VariantContextBuilder("test", "20", start, start, Arrays.asList(Aref)).make();
|
final VariantContext VCbase = new VariantContextBuilder("test", "20", start, start, Arrays.asList(Aref)).make();
|
||||||
|
final VariantContext VCbase2 = new VariantContextBuilder("test2", "20", start, start, Arrays.asList(Aref)).make();
|
||||||
final VariantContext VCprevBase = new VariantContextBuilder("test", "20", start-1, start-1, Arrays.asList(Aref)).make();
|
final VariantContext VCprevBase = new VariantContextBuilder("test", "20", start-1, start-1, Arrays.asList(Aref)).make();
|
||||||
|
|
||||||
final int[] standardPLs = new int[]{30, 20, 10, 71, 72, 73};
|
final int[] standardPLs = new int[]{30, 20, 10, 71, 72, 73};
|
||||||
|
|
@ -183,26 +185,34 @@ public class VariantContextMergerUnitTest extends BaseTest {
|
||||||
final List<Allele> A_ALT = Arrays.asList(Aref, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
final List<Allele> A_ALT = Arrays.asList(Aref, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||||
final Genotype gA_ALT = new GenotypeBuilder("A").PL(new int[]{0, 100, 1000}).alleles(noCalls).make();
|
final Genotype gA_ALT = new GenotypeBuilder("A").PL(new int[]{0, 100, 1000}).alleles(noCalls).make();
|
||||||
final VariantContext vcA_ALT = new VariantContextBuilder(VCbase).alleles(A_ALT).genotypes(gA_ALT).make();
|
final VariantContext vcA_ALT = new VariantContextBuilder(VCbase).alleles(A_ALT).genotypes(gA_ALT).make();
|
||||||
|
|
||||||
final Allele AAref = Allele.create("AA", true);
|
final Allele AAref = Allele.create("AA", true);
|
||||||
final List<Allele> AA_ALT = Arrays.asList(AAref, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
final List<Allele> AA_ALT = Arrays.asList(AAref, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||||
final Genotype gAA_ALT = new GenotypeBuilder("AA").PL(new int[]{0, 80, 800}).alleles(noCalls).make();
|
final Genotype gAA_ALT = new GenotypeBuilder("AA").PL(new int[]{0, 80, 800}).alleles(noCalls).make();
|
||||||
final VariantContext vcAA_ALT = new VariantContextBuilder(VCprevBase).alleles(AA_ALT).genotypes(gAA_ALT).make();
|
final VariantContext vcAA_ALT = new VariantContextBuilder(VCprevBase).alleles(AA_ALT).genotypes(gAA_ALT).make();
|
||||||
|
|
||||||
final List<Allele> A_C = Arrays.asList(Aref, C);
|
final List<Allele> A_C = Arrays.asList(Aref, C);
|
||||||
final Genotype gA_C = new GenotypeBuilder("A_C").PL(new int[]{30, 20, 10}).alleles(noCalls).make();
|
final Genotype gA_C = new GenotypeBuilder("A_C").PL(new int[]{30, 20, 10}).alleles(noCalls).make();
|
||||||
final List<Allele> A_C_ALT = Arrays.asList(Aref, C, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
final List<Allele> A_C_ALT = Arrays.asList(Aref, C, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||||
final Genotype gA_C_ALT = new GenotypeBuilder("A_C").PL(standardPLs).alleles(noCalls).make();
|
final Genotype gA_C_ALT = new GenotypeBuilder("A_C").PL(standardPLs).alleles(noCalls).make();
|
||||||
|
final VariantContext vcA_C = new VariantContextBuilder(VCbase2).alleles(A_C_ALT).genotypes(gA_C).make();
|
||||||
final VariantContext vcA_C_ALT = new VariantContextBuilder(VCbase).alleles(A_C_ALT).genotypes(gA_C_ALT).make();
|
final VariantContext vcA_C_ALT = new VariantContextBuilder(VCbase).alleles(A_C_ALT).genotypes(gA_C_ALT).make();
|
||||||
|
|
||||||
final List<Allele> A_G_ALT = Arrays.asList(Aref, G, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
final List<Allele> A_G_ALT = Arrays.asList(Aref, G, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||||
final Genotype gA_G_ALT = new GenotypeBuilder("A_G").PL(standardPLs).alleles(noCalls).make();
|
final Genotype gA_G_ALT = new GenotypeBuilder("A_G").PL(standardPLs).alleles(noCalls).make();
|
||||||
final VariantContext vcA_G_ALT = new VariantContextBuilder(VCbase).alleles(A_G_ALT).genotypes(gA_G_ALT).make();
|
final VariantContext vcA_G_ALT = new VariantContextBuilder(VCbase).alleles(A_G_ALT).genotypes(gA_G_ALT).make();
|
||||||
|
|
||||||
final List<Allele> A_C_G = Arrays.asList(Aref, C, G);
|
final List<Allele> A_C_G = Arrays.asList(Aref, C, G);
|
||||||
final Genotype gA_C_G = new GenotypeBuilder("A_C_G").PL(new int[]{40, 20, 30, 20, 10, 30}).alleles(noCalls).make();
|
final Genotype gA_C_G = new GenotypeBuilder("A_C_G").PL(new int[]{40, 20, 30, 20, 10, 30}).alleles(noCalls).make();
|
||||||
final List<Allele> A_C_G_ALT = Arrays.asList(Aref, C, G, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
final List<Allele> A_C_G_ALT = Arrays.asList(Aref, C, G, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||||
final Genotype gA_C_G_ALT = new GenotypeBuilder("A_C_G").PL(new int[]{40, 20, 30, 20, 10, 30, 71, 72, 73, 74}).alleles(noCalls).make();
|
final Genotype gA_C_G_ALT = new GenotypeBuilder("A_C_G").PL(new int[]{40, 20, 30, 20, 10, 30, 71, 72, 73, 74}).alleles(noCalls).make();
|
||||||
|
final VariantContext vcA_C_G = new VariantContextBuilder(VCbase2).alleles(A_C_G_ALT).genotypes(gA_C_G).make();
|
||||||
final VariantContext vcA_C_G_ALT = new VariantContextBuilder(VCbase).alleles(A_C_G_ALT).genotypes(gA_C_G_ALT).make();
|
final VariantContext vcA_C_G_ALT = new VariantContextBuilder(VCbase).alleles(A_C_G_ALT).genotypes(gA_C_G_ALT).make();
|
||||||
|
|
||||||
final List<Allele> A_ATC_ALT = Arrays.asList(Aref, ATC, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
final List<Allele> A_ATC_ALT = Arrays.asList(Aref, ATC, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||||
final Genotype gA_ATC_ALT = new GenotypeBuilder("A_ATC").PL(standardPLs).alleles(noCalls).make();
|
final Genotype gA_ATC_ALT = new GenotypeBuilder("A_ATC").PL(standardPLs).alleles(noCalls).make();
|
||||||
final VariantContext vcA_ATC_ALT = new VariantContextBuilder(VCbase).alleles(A_ATC_ALT).genotypes(gA_ATC_ALT).make();
|
final VariantContext vcA_ATC_ALT = new VariantContextBuilder(VCbase).alleles(A_ATC_ALT).genotypes(gA_ATC_ALT).make();
|
||||||
|
|
||||||
final Allele A = Allele.create("A", false);
|
final Allele A = Allele.create("A", false);
|
||||||
final List<Allele> AA_A_ALT = Arrays.asList(AAref, A, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
final List<Allele> AA_A_ALT = Arrays.asList(AAref, A, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||||
final Genotype gAA_A_ALT = new GenotypeBuilder("AA_A").PL(standardPLs).alleles(noCalls).make();
|
final Genotype gAA_A_ALT = new GenotypeBuilder("AA_A").PL(standardPLs).alleles(noCalls).make();
|
||||||
|
|
@ -210,40 +220,40 @@ public class VariantContextMergerUnitTest extends BaseTest {
|
||||||
|
|
||||||
// first test the case of a single record
|
// first test the case of a single record
|
||||||
tests.add(new Object[]{"test00",Arrays.asList(vcA_C_ALT),
|
tests.add(new Object[]{"test00",Arrays.asList(vcA_C_ALT),
|
||||||
loc, false,
|
loc, false, false,
|
||||||
new VariantContextBuilder(VCbase).alleles(A_C).genotypes(gA_C).make()});
|
new VariantContextBuilder(VCbase).alleles(A_C).genotypes(gA_C).make()});
|
||||||
|
|
||||||
// now, test pairs:
|
// now, test pairs:
|
||||||
// a SNP with another SNP
|
// a SNP with another SNP
|
||||||
tests.add(new Object[]{"test01",Arrays.asList(vcA_C_ALT, vcA_G_ALT),
|
tests.add(new Object[]{"test01",Arrays.asList(vcA_C_ALT, vcA_G_ALT),
|
||||||
loc, false,
|
loc, false, false,
|
||||||
new VariantContextBuilder(VCbase).alleles(A_C_G).genotypes(gA_C_ALT, new GenotypeBuilder("A_G").PL(reorderedSecondAllelePLs).alleles(noCalls).make()).make()});
|
new VariantContextBuilder(VCbase).alleles(A_C_G).genotypes(gA_C_ALT, new GenotypeBuilder("A_G").PL(reorderedSecondAllelePLs).alleles(noCalls).make()).make()});
|
||||||
// a SNP with an indel
|
// a SNP with an indel
|
||||||
tests.add(new Object[]{"test02",Arrays.asList(vcA_C_ALT, vcA_ATC_ALT),
|
tests.add(new Object[]{"test02",Arrays.asList(vcA_C_ALT, vcA_ATC_ALT),
|
||||||
loc, false,
|
loc, false, false,
|
||||||
new VariantContextBuilder(VCbase).alleles(Arrays.asList(Aref, C, ATC)).genotypes(gA_C_ALT, new GenotypeBuilder("A_ATC").PL(reorderedSecondAllelePLs).alleles(noCalls).make()).make()});
|
new VariantContextBuilder(VCbase).alleles(Arrays.asList(Aref, C, ATC)).genotypes(gA_C_ALT, new GenotypeBuilder("A_ATC").PL(reorderedSecondAllelePLs).alleles(noCalls).make()).make()});
|
||||||
// a SNP with 2 SNPs
|
// a SNP with 2 SNPs
|
||||||
tests.add(new Object[]{"test03",Arrays.asList(vcA_C_ALT, vcA_C_G_ALT),
|
tests.add(new Object[]{"test03",Arrays.asList(vcA_C_ALT, vcA_C_G_ALT),
|
||||||
loc, false,
|
loc, false, false,
|
||||||
new VariantContextBuilder(VCbase).alleles(A_C_G).genotypes(gA_C_ALT, gA_C_G).make()});
|
new VariantContextBuilder(VCbase).alleles(A_C_G).genotypes(gA_C_ALT, gA_C_G).make()});
|
||||||
// a SNP with a ref record
|
// a SNP with a ref record
|
||||||
tests.add(new Object[]{"test04",Arrays.asList(vcA_C_ALT, vcA_ALT),
|
tests.add(new Object[]{"test04",Arrays.asList(vcA_C_ALT, vcA_ALT),
|
||||||
loc, false,
|
loc, false, false,
|
||||||
new VariantContextBuilder(VCbase).alleles(A_C).genotypes(gA_C, gA_ALT).make()});
|
new VariantContextBuilder(VCbase).alleles(A_C).genotypes(gA_C, gA_ALT).make()});
|
||||||
|
|
||||||
// spanning records:
|
// spanning records:
|
||||||
// a SNP with a spanning ref record
|
// a SNP with a spanning ref record
|
||||||
tests.add(new Object[]{"test05",Arrays.asList(vcA_C_ALT, vcAA_ALT),
|
tests.add(new Object[]{"test05",Arrays.asList(vcA_C_ALT, vcAA_ALT),
|
||||||
loc, false,
|
loc, false, false,
|
||||||
new VariantContextBuilder(VCbase).alleles(A_C).genotypes(gA_C, gAA_ALT).make()});
|
new VariantContextBuilder(VCbase).alleles(A_C).genotypes(gA_C, gAA_ALT).make()});
|
||||||
// a SNP with a spanning deletion
|
// a SNP with a spanning deletion
|
||||||
tests.add(new Object[]{"test06",Arrays.asList(vcA_C_ALT, vcAA_A_ALT),
|
tests.add(new Object[]{"test06",Arrays.asList(vcA_C_ALT, vcAA_A_ALT),
|
||||||
loc, false,
|
loc, false, false,
|
||||||
new VariantContextBuilder(VCbase).alleles(A_C).genotypes(gA_C, new GenotypeBuilder("AA_A").PL(new int[]{30, 71, 73}).alleles(noCalls).make()).make()});
|
new VariantContextBuilder(VCbase).alleles(A_C).genotypes(gA_C, new GenotypeBuilder("AA_A").PL(new int[]{30, 71, 73}).alleles(noCalls).make()).make()});
|
||||||
|
|
||||||
// combination of all
|
// combination of all
|
||||||
tests.add(new Object[]{"test07",Arrays.asList(vcA_C_ALT, vcA_G_ALT, vcA_ATC_ALT, vcA_C_G_ALT, vcA_ALT, vcAA_ALT, vcAA_A_ALT),
|
tests.add(new Object[]{"test07",Arrays.asList(vcA_C_ALT, vcA_G_ALT, vcA_ATC_ALT, vcA_C_G_ALT, vcA_ALT, vcAA_ALT, vcAA_A_ALT),
|
||||||
loc, false,
|
loc, false, false,
|
||||||
new VariantContextBuilder(VCbase).alleles(Arrays.asList(Aref, C, G, ATC)).genotypes(new GenotypeBuilder("A_C").PL(new int[]{30, 20, 10, 71, 72, 73, 71, 72, 73, 73}).alleles(noCalls).make(),
|
new VariantContextBuilder(VCbase).alleles(Arrays.asList(Aref, C, G, ATC)).genotypes(new GenotypeBuilder("A_C").PL(new int[]{30, 20, 10, 71, 72, 73, 71, 72, 73, 73}).alleles(noCalls).make(),
|
||||||
new GenotypeBuilder("A_G").PL(new int[]{30, 71, 73, 20, 72, 10, 71, 73, 72, 73}).alleles(noCalls).make(),
|
new GenotypeBuilder("A_G").PL(new int[]{30, 71, 73, 20, 72, 10, 71, 73, 72, 73}).alleles(noCalls).make(),
|
||||||
new GenotypeBuilder("A_ATC").PL(new int[]{30, 71, 73, 71, 73, 73, 20, 72, 72, 10}).alleles(noCalls).make(),
|
new GenotypeBuilder("A_ATC").PL(new int[]{30, 71, 73, 71, 73, 73, 20, 72, 72, 10}).alleles(noCalls).make(),
|
||||||
|
|
@ -255,12 +265,23 @@ public class VariantContextMergerUnitTest extends BaseTest {
|
||||||
// just spanning ref contexts, trying both instances where we want/do not want ref-only contexts
|
// just spanning ref contexts, trying both instances where we want/do not want ref-only contexts
|
||||||
tests.add(new Object[]{"test08",Arrays.asList(vcAA_ALT),
|
tests.add(new Object[]{"test08",Arrays.asList(vcAA_ALT),
|
||||||
|
|
||||||
loc, false,
|
loc, false, false,
|
||||||
null});
|
null});
|
||||||
tests.add(new Object[]{"test09", Arrays.asList(vcAA_ALT),
|
tests.add(new Object[]{"test09", Arrays.asList(vcAA_ALT),
|
||||||
loc, true,
|
loc, true, false,
|
||||||
new VariantContextBuilder(VCbase).alleles(Arrays.asList(Allele.create("A", true))).genotypes(new GenotypeBuilder("AA").PL(new int[]{0}).alleles(noCalls).make()).make()});
|
new VariantContextBuilder(VCbase).alleles(Arrays.asList(Allele.create("A", true))).genotypes(new GenotypeBuilder("AA").PL(new int[]{0}).alleles(noCalls).make()).make()});
|
||||||
|
|
||||||
|
// test uniquification of sample names
|
||||||
|
tests.add(new Object[]{"test10",Arrays.asList(vcA_C, vcA_C_ALT), loc, false, true,
|
||||||
|
new VariantContextBuilder(VCbase).alleles(A_C).genotypes(
|
||||||
|
new GenotypeBuilder("A_C.test2").PL(new int[]{30, 20, 10}).alleles(noCalls).make(),
|
||||||
|
new GenotypeBuilder("A_C.test").PL(new int[]{30, 20, 10}).alleles(noCalls).make()).make()});
|
||||||
|
|
||||||
|
tests.add(new Object[]{"test11",Arrays.asList(vcA_C_G, vcA_C_G_ALT), loc, false, true,
|
||||||
|
new VariantContextBuilder(VCbase).alleles(A_C_G).genotypes(
|
||||||
|
new GenotypeBuilder("A_C_G.test2").PL(new int[]{40, 20, 30, 20, 10, 30}).alleles(noCalls).make(),
|
||||||
|
new GenotypeBuilder("A_C_G.test").PL(new int[]{40, 20, 30, 20, 10, 30}).alleles(noCalls).make()).make()});
|
||||||
|
|
||||||
final Object[][] result = tests.toArray(new Object[][]{});
|
final Object[][] result = tests.toArray(new Object[][]{});
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue