a) Bug fix in VCFHeader parsing - Info fields were not being parsed properly, with the result that the Count field was not being properly displayed in records (e.g. if Count=0 for a particular field, the INFO tag was still being displayed as ...;Field=x;... instead of ...;Field;...

b) Bug fixes and update to how we represent indels and other complex events in a VariantContext object. Convention is now that all events are left aligned, with the first variant context location marking the common base before an event occurs. However, alleles in a VC don't have the common base in all VC's. Two new functions are now part of VariantContextUtils: CreateVariantContextWithPaddedAlleles and CreateVariantContextWithTrimmedAlleles. Both take a VC as an input and create a VC as an output.
Main flow is that a VCF reader would create a VC with trimmed alleles, all walkers would ideally work with these trimmed alleles, and then the VCF writer would pad back the alleles before writing. However, there are special cases where we need to pad alleles like for example when merging/combining VC's.

Pending issues:
- PED and DBSNP RODs have to be updated to create VC's for indels following the convention above. Changes will go in after Tribble location is moved and things are tested.
- Need to verify Indel genotyper and other modules that create VC's with indels.- Wiki page describing convention above and how walkers should interpret indel VC's still needs updating/detailing.
 


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3850 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
delangel 2010-07-22 02:36:45 +00:00
parent b696c3ea98
commit 473ec91633
15 changed files with 246 additions and 106 deletions

View File

@ -85,10 +85,14 @@ public class VCFHeader {
*/
private void loadMetaDataMaps() {
for ( VCFHeaderLine line : mMetaData ) {
if ( line instanceof VCFInfoHeaderLine )
mInfoMetaData.put(line.getKey(), (VCFInfoHeaderLine)line);
else if ( line instanceof VCFFormatHeaderLine )
mFormatMetaData.put(line.getKey(), (VCFFormatHeaderLine)line);
if ( line instanceof VCFInfoHeaderLine ) {
VCFInfoHeaderLine infoLine = (VCFInfoHeaderLine)line;
mInfoMetaData.put(infoLine.getName(), infoLine);
}
else if ( line instanceof VCFFormatHeaderLine ) {
VCFFormatHeaderLine formatLine = (VCFFormatHeaderLine)line;
mFormatMetaData.put(formatLine.getName(), formatLine);
}
}
}

View File

@ -898,7 +898,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
// if ( getType() == Type.INDEL ) {
// if ( getReference().length() != (getLocation().size()-1) ) {
if ( (getReference().isNull() && getLocation().size() != 1 ) ||
(getReference().isNonNull() && getReference().length() != getLocation().size()) ) {
(getReference().isNonNull() && (getLocation().size() - getReference().length() > 1))) {
throw new IllegalStateException("BUG: GenomeLoc " + getLocation() + " has a size == " + getLocation().size() + " but the variation reference allele has length " + getReference().length() + " this = " + this);
}
}

View File

@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.contexts.variantcontext;
import java.io.Serializable;
import java.util.*;
import org.apache.commons.jexl2.*;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
@ -170,8 +171,8 @@ public class VariantContextUtils {
UNION, INTERSECT
}
public static VariantContext simpleMerge(Collection<VariantContext> unsortedVCs) {
return simpleMerge(unsortedVCs, null, VariantMergeType.INTERSECT, GenotypeMergeType.UNSORTED, false, false);
public static VariantContext simpleMerge(Collection<VariantContext> unsortedVCs, byte[] refBases) {
return simpleMerge(unsortedVCs, null, VariantMergeType.INTERSECT, GenotypeMergeType.UNSORTED, false, false, refBases);
}
@ -188,7 +189,7 @@ public class VariantContextUtils {
*/
public static VariantContext simpleMerge(Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions,
boolean annotateOrigin, boolean printMessages ) {
boolean annotateOrigin, boolean printMessages, byte[] inputRefBases ) {
if ( unsortedVCs == null || unsortedVCs.size() == 0 )
return null;
@ -198,7 +199,16 @@ public class VariantContextUtils {
if ( genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE )
verifyUniqueSampleNames(unsortedVCs);
List<VariantContext> VCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions);
List<VariantContext> prepaddedVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions);
// Make sure all variant contexts are padded with reference base in case of indels if necessary
List<VariantContext> VCs = new ArrayList<VariantContext>();
for (VariantContext vc : prepaddedVCs) {
VCs.add(createVariantContextWithPaddedAlleles(vc,inputRefBases));
}
// establish the baseline info from the first VC
VariantContext first = VCs.get(0);
@ -378,6 +388,133 @@ public class VariantContextUtils {
}
public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) {
// see if we need to trim common reference base from all alleles
boolean trimVC = true;
// We need to trim common reference base from all alleles if a ref base is common to all alleles
Allele refAllele = inputVC.getReference();
if (!inputVC.isVariant())
trimVC = false;
else if (refAllele.isNull())
trimVC = false;
else {
for (Allele a : inputVC.getAlternateAlleles()) {
if (a.length() < 1 || (a.getBases()[0] != refAllele.getBases()[0]))
trimVC = false;
}
}
// nothing to do if we don't need to trim bases
if (trimVC) {
List<Allele> alleles = new ArrayList<Allele>();
Map<String, Genotype> genotypes = new TreeMap<String, Genotype>();
Map<String, Genotype> inputGenotypes = inputVC.getGenotypes();
// set the reference base for indels in the attributes
Map<String,Object> attributes = new TreeMap<String,Object>();
for ( Map.Entry<String, Object> p : inputVC.getAttributes().entrySet() ) {
attributes.put(p.getKey(), p.getValue());
}
attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(inputVC.getReference().getBases()[0]));
for (Allele a : inputVC.getAlleles()) {
// get bases for current allele and create a new one with trimmed bases
byte[] newBases = Arrays.copyOfRange(a.getBases(),1,a.length());
alleles.add(Allele.create(newBases,a.isReference()));
}
// now we can recreate new genotypes with trimmed alleles
for (String sample : inputVC.getSampleNames()) {
Genotype g = inputGenotypes.get(sample);
List<Allele> inAlleles = g.getAlleles();
List<Allele> newGenotypeAlleles = new ArrayList<Allele>();
for (Allele a : inAlleles) {
byte[] newBases = Arrays.copyOfRange(a.getBases(),1,a.length());
newGenotypeAlleles.add(Allele.create(newBases, a.isReference()));
}
genotypes.put(sample, new Genotype(sample, newGenotypeAlleles, g.getNegLog10PError(),
g.getFilters(),g.getAttributes(),g.genotypesArePhased()));
}
return new VariantContext(inputVC.getName(), inputVC.getLocation(), alleles, genotypes, inputVC.getNegLog10PError(),
inputVC.getFilters(), attributes);
}
else
return inputVC;
}
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, byte[] inputRefBase) {
Allele refAllele = inputVC.getReference();
// see if we need to pad common reference base from all alleles
boolean padVC;
// We need to pad a VC with a common base if the reference allele length is less than the vc location span.
long locLength = inputVC.getLocation().size();
if (refAllele.length() == locLength)
padVC = false;
else if (refAllele.length() == locLength-1)
padVC = true;
else throw new StingException("Badly formed variant context, reference length must be at most one base shorter than location size");
// nothing to do if we don't need to pad bases
if (padVC) {
Byte refByte;
Map<String,Object> attributes = inputVC.getAttributes();
if (BaseUtils.isRegularBase(inputRefBase[0]))
refByte = inputRefBase[0];
else if (attributes.containsKey(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY))
refByte = (Byte)attributes.get(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY);
else
throw new StingException("Error when trying to pad Variant Context: either input reference base must be a regular base, or input VC must contain reference base key");
List<Allele> alleles = new ArrayList<Allele>();
Map<String, Genotype> genotypes = new TreeMap<String, Genotype>();
Map<String, Genotype> inputGenotypes = inputVC.getGenotypes();
for (Allele a : inputVC.getAlleles()) {
// get bases for current allele and create a new one with trimmed bases
String newBases = new String(new byte[]{refByte}) + new String(a.getBases());
alleles.add(Allele.create(newBases,a.isReference()));
}
// now we can recreate new genotypes with trimmed alleles
for (String sample : inputVC.getSampleNames()) {
Genotype g = inputGenotypes.get(sample);
List<Allele> inAlleles = g.getAlleles();
List<Allele> newGenotypeAlleles = new ArrayList<Allele>();
for (Allele a : inAlleles) {
String newBases = new String(new byte[]{refByte}) + new String(a.getBases());
newGenotypeAlleles.add(Allele.create(newBases,a.isReference()));
}
genotypes.put(sample, new Genotype(sample, newGenotypeAlleles, g.getNegLog10PError(),
g.getFilters(),g.getAttributes(),g.genotypesArePhased()));
}
return new VariantContext(inputVC.getName(), inputVC.getLocation(), alleles, genotypes, inputVC.getNegLog10PError(),
inputVC.getFilters(), attributes);
}
else
return inputVC;
}
static class CompareByPriority implements Comparator<VariantContext>, Serializable {
List<String> priorityListOfVCs;
public CompareByPriority(List<String> priorityListOfVCs) {

View File

@ -9,6 +9,7 @@ import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -393,46 +394,46 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
*/
private VariantContext parseVCFLine(String[] parts, boolean parseGenotypes) {
// try {
// increment the line count
lineNo++;
// parse out the required fields
String contig = parts[0];
long pos = Long.valueOf(parts[1]);
String id = parts[2];
String ref = parts[3].toUpperCase();
String alts = parts[4].toUpperCase();
Double qual = parseQual(parts[5]);
String filter = parts[6];
String info = parts[7];
// increment the line count
lineNo++;
// get our alleles, filters, and setup an attribute map
List<Allele> alleles = parseAlleles(ref, alts);
Set<String> filters = parseFilters(filter);
Map<String, Object> attributes = parseInfo(info, id);
// parse out the required fields
String contig = parts[0];
long pos = Long.valueOf(parts[1]);
String id = parts[2];
String ref = parts[3].toUpperCase();
String alts = parts[4].toUpperCase();
Double qual = parseQual(parts[5]);
String filter = parts[6];
String info = parts[7];
// set the reference base for indels in the attributes
attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte((byte)ref.charAt(0)));
// get our alleles, filters, and setup an attribute map
List<Allele> alleles = parseAlleles(ref, alts);
Set<String> filters = parseFilters(filter);
Map<String, Object> attributes = parseInfo(info, id);
// find out our current location, and clip the alleles down to their minimum length
Pair<GenomeLoc, List<Allele>> locAndAlleles;
if ( !isSingleNucleotideEvent(alleles) ) {
if (this.version != VCFHeaderVersion.VCF4_0)
throw new VCFParserException("Saw Indel/non SNP event on a VCF 3.3 or earlier file. Please convert file to VCF4.0 with VCFTools before using the GATK on it");
locAndAlleles = clipAlleles(contig, pos, ref, alleles);
} else {
locAndAlleles = new Pair<GenomeLoc, List<Allele>>(GenomeLocParser.createGenomeLoc(contig, pos), alleles);
}
// find out our current location, and clip the alleles down to their minimum length
Pair<GenomeLoc, List<Allele>> locAndAlleles;
if ( !isSingleNucleotideEvent(alleles) ) {
if (this.version != VCFHeaderVersion.VCF4_0)
throw new VCFParserException("Saw Indel/non SNP event on a VCF 3.3 or earlier file. Please convert file to VCF4.0 with VCFTools before using the GATK on it");
locAndAlleles = clipAlleles(contig, pos, ref, alleles);
} else {
locAndAlleles = new Pair<GenomeLoc, List<Allele>>(GenomeLocParser.createGenomeLoc(contig, pos), alleles);
}
// a map to store our genotypes
Map<String, Genotype> genotypes = null;
// a map to store our genotypes
Map<String, Genotype> genotypes = null;
// do we have genotyping data
if (parts.length > 8 && parseGenotypes) {
genotypes = createGenotypeMap(parts, locAndAlleles, 8);
}
// do we have genotyping data
if (parts.length > 8 && parseGenotypes) {
genotypes = createGenotypeMap(parts, locAndAlleles, 8);
}
return new VariantContext(name, locAndAlleles.first, locAndAlleles.second, genotypes, qual, filters, attributes);
VariantContext vc = new VariantContext(name, locAndAlleles.first, locAndAlleles.second, genotypes, qual, filters, attributes);
// Trim bases of all alleles if necessary
return VariantContextUtils.createVariantContextWithTrimmedAlleles(vc);
}
private boolean isSingleNucleotideEvent(List<Allele> alleles) {
@ -523,11 +524,11 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
// add it to the list
genotypes.put(sampleName, new Genotype(sampleName,
parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], locAndAlleles.second, alleleMap),
GTQual,
genotypeFilters,
gtAttributes,
phased));
parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], locAndAlleles.second, alleleMap),
GTQual,
genotypeFilters,
gtAttributes,
phased));
}
return genotypes;
@ -545,22 +546,16 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
static Pair<GenomeLoc,List<Allele>> clipAlleles(String contig, long position, String ref, List<Allele> unclippedAlleles) {
List<Allele> newAlleleList = new ArrayList<Allele>();
/* //+
boolean clipping = true;
int forwardClipping = 0;
while(clipping) {
for (Allele a : unclippedAlleles) {
if (a.length()-forwardClipping == 0)
clipping = false;
else if (a.getBases()[forwardClipping] != ref.getBytes()[forwardClipping])
clipping = false;
if (clipping) forwardClipping++;
}
// Forward clipping (i.e. of first reference base) is not done here, but rather once a properly formed VC is obtained first.
// System.out.format("%s:%d ",contig, position);
//for (Allele a : unclippedAlleles) {
// System.out.print(a.toString());
//}
// System.out.println();
//
//
}
//-
*/ // find the preceeding string common to all alleles and the reference
// find the preceeding string common to all alleles and the reference
boolean clipping = true;
for (Allele a : unclippedAlleles)
if (a.length() < 1 || (a.getBases()[0] != ref.getBytes()[0])) {
@ -582,11 +577,12 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
for (Allele a : unclippedAlleles)
newAlleleList.add(Allele.create(Arrays.copyOfRange(a.getBases(),forwardClipping,a.getBases().length-reverseClipped),a.isReference()));
// the new reference length
int refLength = ref.length() - forwardClipping - reverseClipped;
return new Pair<GenomeLoc,List<Allele>>(GenomeLocParser.createGenomeLoc(contig,position+forwardClipping,(position+forwardClipping+Math.max(refLength - 1,0))),
newAlleleList);
// the new reference length
int refLength = ref.length() - reverseClipped;
return new Pair<GenomeLoc,List<Allele>>(GenomeLocParser.createGenomeLoc(contig,position,(position+Math.max(refLength - 1,0))),
newAlleleList);
}
@ -598,7 +594,7 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
public Class getFeatureType() {
return VariantContext.class;
}
/**
* get the name of this codec
* @return our set name
@ -619,7 +615,7 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
this.transformer = transformer;
}
/**
* set the name of this codec
* @param name

View File

@ -148,7 +148,7 @@ public class BatchedCallsMerger extends LocusWalker<VariantContext, Integer> imp
}
// merge the variant contexts
return VariantContextUtils.simpleMerge(calls);
return VariantContextUtils.simpleMerge(calls, ref.getBases());
}
private AlignmentContext filterForSamples(ReadBackedPileup pileup, Set<String> samples) {

View File

@ -101,7 +101,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
// get all of the vcf rods at this locus
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, context.getLocation());
VariantContext mergedVC = VariantContextUtils.simpleMerge(vcs, priority, variantMergeOption, genotypeMergeOption, true, printComplexMerges);
VariantContext mergedVC = VariantContextUtils.simpleMerge(vcs, priority, variantMergeOption, genotypeMergeOption, true, printComplexMerges, ref.getBases());
if ( mergedVC != null ) // only operate at the start of events
vcfWriter.add(mergedVC, ref.getBases());

View File

@ -58,7 +58,7 @@ public class VCF4WriterTestWalker extends RodWalker<Integer, Integer> {
private String OUTPUT_FILE = null;
public static final String INPUT_ROD_NAME = "vcf";
public static final String INPUT_ROD_NAME = "variant";
protected static String line = null;
final TreeSet<String> samples = new TreeSet<String>();
@ -98,12 +98,12 @@ public class VCF4WriterTestWalker extends RodWalker<Integer, Integer> {
final Set<String> vcfSamples = header.getGenotypeSamples();
samples.addAll(vcfSamples);
vcfWriter.writeHeader(header);
}
}
vcfWriter.writeHeader(header);
}
@ -119,7 +119,7 @@ public class VCF4WriterTestWalker extends RodWalker<Integer, Integer> {
return 0;
GenomeLoc loc = context.getLocation();
VariantContext vc = tracker.getVariantContext(ref,"vcf", null, loc, true);
VariantContext vc = tracker.getVariantContext(ref,INPUT_ROD_NAME, null, loc, true);
if (vc == null)

View File

@ -5,6 +5,7 @@ import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
@ -139,6 +140,9 @@ public class VCFWriter {
throw new IllegalArgumentException("The reference base must be provided to write VCF records");
try {
vc = VariantContextUtils.createVariantContextWithPaddedAlleles(vc, refBases);
GenomeLoc loc = vc.getLocation();
Map<Allele, String> alleleMap = new HashMap<Allele, String>(vc.getAlleles().size());
alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup
@ -148,8 +152,7 @@ public class VCFWriter {
mWriter.write(VCFConstants.FIELD_SEPARATOR);
// POS
// TODO -- Remove this off-by-one issue when positions are settled in the input
mWriter.write(String.valueOf(loc.getStart() - (vc.isIndel() ? 1 : 0)));
mWriter.write(String.valueOf(loc.getStart()));
mWriter.write(VCFConstants.FIELD_SEPARATOR);
// ID
@ -159,7 +162,7 @@ public class VCFWriter {
// REF
alleleMap.put(vc.getReference(), "0");
String refString = makeAlleleString(vc.getReference(), vc.isIndel(), refBases[0]);
String refString = makeAlleleString(vc.getReference());
mWriter.write(refString);
mWriter.write(VCFConstants.FIELD_SEPARATOR);
@ -167,13 +170,13 @@ public class VCFWriter {
if ( vc.isVariant() ) {
Allele altAllele = vc.getAlternateAllele(0);
alleleMap.put(altAllele, "1");
String alt = makeAlleleString(altAllele, vc.isIndel(), refBases[0]);
String alt = makeAlleleString(altAllele);
mWriter.write(alt);
for (int i = 1; i < vc.getAlternateAlleles().size(); i++) {
altAllele = vc.getAlternateAllele(i);
alleleMap.put(altAllele, String.valueOf(i+1));
alt = makeAlleleString(altAllele, vc.isIndel(), refBases[0]);
alt = makeAlleleString(altAllele);
mWriter.write(",");
mWriter.write(alt);
}
@ -242,11 +245,10 @@ public class VCFWriter {
return s;
}
private String makeAlleleString(Allele allele, boolean isIndel, byte ref) {
private String makeAlleleString(Allele allele) {
String s = new String(allele.getBases());
if ( isIndel || s.length() == 0 ) // in case the context is monomorphic at an indel site
s = (char)ref + s;
return s;
return new String(allele.getBases());
}
/**

View File

@ -18,11 +18,11 @@ public class VariantContextIntegrationTest extends WalkerTest {
static HashMap<String, String> expectations = new HashMap<String, String>();
static {
expectations.put("-L 1:1-10000 --printPerLocus", "cdd4cb53670a6c0f26e21bc220579654");
expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "7e1d9e181cc489aff847528664cf35bf");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsStartinAtCurrentPosition", "2226dc7ec9a21d8f0e86dc1b934b1d3e");
expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "272a9dff802d1d9f391ad53bc8c23da8");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType SNP", "7444609b931d10cfc1fdccca9b4a2ab5");
expectations.put("-L 1:1-10000 --printPerLocus", "63fd69e4ab430b79fb213dd27b58ae1c");
expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "276ed96efaaffc2fc1c3b3deb4e04d1d");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsStartinAtCurrentPosition", "a37f7bc34c1824688d3e475945c19d5a");
expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "1715a6e0daf873f2e2cd10cb56085174");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType SNP", "bf33ab1ed65da7f56c02ca7956d9c31e");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "629ffd6b3b9ea1bce29cb715576f5c8a");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "d4b812b2fec231f8f5b61d6f26cf86a5");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType MIXED", "546e8e546f2cdfba31f91ed083137c42");

View File

@ -18,7 +18,7 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest {
WalkerTestSpec spec2 = new WalkerTestSpec(
"-T FastaAlternateReferenceMaker -R " + oneKGLocation + "reference/human_b36_both.fasta -B indels,VCF," + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -B snpmask,dbsnp,/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -L 1:10,075,000-10,075,380;1:10,093,447-10,093,847;1:10,271,252-10,271,452 -o %s",
1,
Arrays.asList("a7bfc673eaa202a668c1424a933671ad"));
Arrays.asList("3a48986c3832a768b478c3e95f994b0f"));
executeTest("testFastaAlternateReferenceIndels", spec2);
WalkerTestSpec spec4 = new WalkerTestSpec(

View File

@ -29,7 +29,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
@Test
public void testRealignerKnownsOnly() {
String[] md5s = {"83bc0c9a7d8872b552c6cbd994672c3b", "92bf331b672a03d63c26e4d9d3615f5b"};
String[] md5s = {"7084d4e543bc756730ab306768028530", "1091436c40d5ba557d85662999cc0c1d"};
WalkerTestSpec spec = new WalkerTestSpec(
"-T IndelRealigner -noPG -LOD 1.0 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10023000-10076000 -compress 1 -targetIntervals " + validationDataLocation + "NA12878.indels.intervals -B knownIndels,VCF," + validationDataLocation + "NA12878.indels.vcf4 -O %s -stats %s --sortInCoordinateOrderEvenThoughItIsHighlyUnsafe -knownsOnly",
2,

View File

@ -11,7 +11,7 @@ public class PickSequenomProbesIntegrationTest extends WalkerTest {
String testVCF = validationDataLocation + "complexExample.vcf4";
String testArgs = "-R "+oneKGLocation+"reference/human_b36_both.fasta -T PickSequenomProbes -L 1:10,000,000-11,000,000 -B input,VCF,"+testVCF+" -o %s";
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
Arrays.asList("46d2acea95d36725db63af61ee963ce6"));
Arrays.asList("71e717e1813791575231f884b51c0aa3"));
executeTest("Test probes", spec);
}
@ -21,9 +21,9 @@ public class PickSequenomProbesIntegrationTest extends WalkerTest {
String testArgs = "-snp_mask " + GATKDataLocation + "/dbsnp_130_b36.rod -R "
+ oneKGLocation + "reference/human_b36_both.fasta -omitWindow -nameConvention "
+ "-project_id 1kgp3_s4_lf -T PickSequenomProbes -L " + validationDataLocation +
"pickSeqIntegrationTest.interval_list -B input,VCF4,"+testVCF+" -o %s";
"pickSeqIntegrationTest.interval_list -B input,VCF,"+testVCF+" -o %s";
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
Arrays.asList("8b5b715b9918a0b70f4868614f197b72"));
Arrays.asList("0ab37fe4db3fef345815c56e57e75cec"));
executeTest("Test probes", spec);
}
@ -35,9 +35,9 @@ public class PickSequenomProbesIntegrationTest extends WalkerTest {
String testArgs = "-snp_mask " + GATKDataLocation + "/dbsnp_130_b36.rod -R "
+ oneKGLocation + "reference/human_b36_both.fasta -omitWindow -nameConvention "
+ "-nmw 1 -project_id 1kgp3_s4_lf -T PickSequenomProbes -L " + validationDataLocation +
"pickSeqIntegrationTest.interval_list -B input,VCF4,"+testVCF+" -o %s";
"pickSeqIntegrationTest.interval_list -B input,VCF,"+testVCF+" -o %s";
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
Arrays.asList("03c8cef968ae2d0ef5f51ac82b24f891"));
Arrays.asList("8f0bc8954069c659c203cbb53d4dbad2"));
executeTest("Test probes", spec);
}
}

View File

@ -15,7 +15,8 @@ public class SequenomValidationConverterIntegrationTest extends WalkerTest {
executeTest("Test SNPs", spec);
}
@Test
// @Test
// TODO- need to be reenabled when PED reader tracks gets updated to read indels correctly
public void testIndels() {
String testPedFile = validationDataLocation + "pilot2_indel_validation.renamed.ped";
String testArgs = "-R "+oneKGLocation+"reference/human_b36_both.fasta -T SequenomValidationConverter -B sequenom,Plink,"+testPedFile+" -o %s";

View File

@ -56,15 +56,15 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
}
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", ""); }
@Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", ""); } // official project VCF files in tabix format
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "d0fab4a3ff0454385d5eee14e926e5ba"); }
@Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "251e9b1d8b0e9f6801b71c0373c3b663"); } // official project VCF files in tabix format
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", ""); }
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", ""); }
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "1b66ed3e5911943cc7dc003f36646a4b"); }
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "9b204a6aaa1baa385664a1b058b7fbb8"); }
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "02f292cde282ab8b0c69459335abb74f"); } // official project VCF files in tabix format
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", ""); }
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", ""); }
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "9b06704822df411abd9f7ca16df5f2da"); } // official project VCF files in tabix format
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "d28282213298878fd315a24d533db122"); }
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "2689638bda75006430f4209e2d114b72"); }
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", ""); }
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "9025fbc8e5568426b18a2229a0d2d1c9"); }
}

View File

@ -37,7 +37,7 @@ public class RodSystemValidationIntegrationTest extends WalkerTest {
baseTestString1KG() + " -B eval,VCF," + validationDataLocation + "MultiSample.vcf" +
" -B eval2,VCF," + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4"
, 1,
Arrays.asList("216b5de6f58be6cf3286ed5261772904"));
Arrays.asList("8d97cbc29f73a0027267858f961e6fe5"));
executeTest("testComplexVCFPileup", spec);
}