Redoing the conversion to VariantContext: instead of walkers passing in a ref allele, they pass in the ref context and the adaptors create the allele. This is the right way of doing it.

Also, adding some more useful integration tests.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3194 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-04-19 05:47:17 +00:00
parent 4eba9bffc1
commit d73c63a99a
13 changed files with 184 additions and 116 deletions

View File

@ -1,8 +1,8 @@
package org.broadinstitute.sting.gatk.refdata;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -113,7 +113,7 @@ public class RefMetaDataTracker {
* Get all of the RMDs at the current site. The collection is "flattened": for any track that has multiple records
* at the current site, they all will be added to the list as separate elements.
*
* @return
* @return collection of all rods
*/
public Collection<GATKFeature> getAllRods() {
List<GATKFeature> l = new ArrayList<GATKFeature>();
@ -129,7 +129,7 @@ public class RefMetaDataTracker {
* Get all of the RMD tracks at the current site. Each track is returned as a single compound
* object (RODRecordList) that may contain multiple RMD records associated with the current site.
*
* @return
* @return collection of all tracks
*/
public Collection<RODRecordList> getBoundRodTracks() {
LinkedList<RODRecordList> bound = new LinkedList<RODRecordList>();
@ -176,9 +176,13 @@ public class RefMetaDataTracker {
/**
* Converts all possible ROD tracks to VariantContexts objects, of all types, allowing any start and any number
* of entries per ROD.
* The name of each VariantContext corresponds to the ROD name.
*
* @param ref reference context
* @return variant context
*/
public Collection<VariantContext> getAllVariantContexts() {
return getAllVariantContexts(null, null, false, false);
public Collection<VariantContext> getAllVariantContexts(ReferenceContext ref) {
return getAllVariantContexts(ref, null, null, false, false);
}
/**
@ -192,17 +196,18 @@ public class RefMetaDataTracker {
*
* The name of each VariantContext corresponds to the ROD name.
*
* @param curLocation location
* @param ref reference context
* @param allowedTypes allowed types
* @param curLocation location
* @param requireStartHere do we require the rod to start at this location?
* @param takeFirstOnly do we take the first rod only?
* @return variant context
*/
public Collection<VariantContext> getAllVariantContexts(EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
public Collection<VariantContext> getAllVariantContexts(ReferenceContext ref, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
List<VariantContext> contexts = new ArrayList<VariantContext>();
for ( RODRecordList rodList : getBoundRodTracks() ) {
addVariantContexts(contexts, rodList, allowedTypes, curLocation, null, requireStartHere, takeFirstOnly);
addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly);
}
return contexts;
@ -221,25 +226,25 @@ public class RefMetaDataTracker {
* @return variant context
*/
public Collection<VariantContext> getVariantContexts(String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
return getVariantContexts(Arrays.asList(name), allowedTypes, curLocation, null, requireStartHere, takeFirstOnly);
return getVariantContexts(null, Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly);
}
public Collection<VariantContext> getVariantContexts(String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, Allele ref, boolean requireStartHere, boolean takeFirstOnly ) {
return getVariantContexts(Arrays.asList(name), allowedTypes, curLocation, ref, requireStartHere, takeFirstOnly);
public Collection<VariantContext> getVariantContexts(ReferenceContext ref, String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
return getVariantContexts(ref, Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly);
}
public Collection<VariantContext> getVariantContexts(Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
return getVariantContexts(names, allowedTypes, curLocation, null, requireStartHere, takeFirstOnly);
return getVariantContexts(null, names, allowedTypes, curLocation, requireStartHere, takeFirstOnly);
}
public Collection<VariantContext> getVariantContexts(Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, Allele ref, boolean requireStartHere, boolean takeFirstOnly ) {
public Collection<VariantContext> getVariantContexts(ReferenceContext ref, Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
Collection<VariantContext> contexts = new ArrayList<VariantContext>();
for ( String name : names ) {
RODRecordList rodList = getTrackDataByName(name,true); // require that the name is an exact match
if ( rodList != null )
addVariantContexts(contexts, rodList, allowedTypes, curLocation, ref, requireStartHere, takeFirstOnly );
addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly );
}
return contexts;
@ -267,15 +272,11 @@ public class RefMetaDataTracker {
}
private void addVariantContexts(Collection<VariantContext> contexts, RODRecordList rodList, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, Allele ref, boolean requireStartHere, boolean takeFirstOnly ) {
private void addVariantContexts(Collection<VariantContext> contexts, RODRecordList rodList, ReferenceContext ref, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
for ( GATKFeature rec : rodList ) {
if ( VariantContextAdaptors.canBeConvertedToVariantContext(rec.getUnderlyingObject()) ) {
// ok, we might actually be able to turn this record in a variant context
VariantContext vc;
if ( ref == null )
vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec.getUnderlyingObject());
else
vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec.getUnderlyingObject(), ref);
VariantContext vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec.getUnderlyingObject(), ref);
if ( vc == null ) // sometimes the track has odd stuff in it that can't be converted
continue;
@ -304,8 +305,9 @@ public class RefMetaDataTracker {
* defaultValue.getLocation() may be not equal to what RODRecordList's location would be expected to be otherwise:
* for instance, on locus traversal, location is usually expected to be a single base we are currently looking at,
* regardless of the presence of "extended" RODs overlapping with that location).
* @param name
* @return
* @param name track name
* @param requireExactMatch do we require an exact match of the rod name?
* @return track data for the given rod
*/
private RODRecordList getTrackDataByName(final String name, boolean requireExactMatch) {
//logger.debug(String.format("Lookup %s%n", name));
@ -335,7 +337,7 @@ public class RefMetaDataTracker {
/**
* Returns the canonical name of the rod name (lowercases it)
* @param name the name of the rod
* @return
* @return canonical name of the rod
*/
private final String canonicalName(final String name) {
return name.toLowerCase();

View File

@ -4,9 +4,11 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableGenotype;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.CalledGenotype;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
import org.broadinstitute.sting.utils.genotype.glf.GLFSingleCall;
import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
@ -56,7 +58,7 @@ public class VariantContextAdaptors {
/** generic superclass */
private static abstract class VCAdaptor {
abstract VariantContext convert(String name, Object input);
abstract VariantContext convert(String name, Object input, Allele refAllele);
abstract VariantContext convert(String name, Object input, ReferenceContext ref);
}
public static VariantContext toVariantContext(String name, Object variantContainingObject) {
@ -67,11 +69,11 @@ public class VariantContextAdaptors {
}
}
public static VariantContext toVariantContext(String name, Object variantContainingObject, Allele refAllele) {
public static VariantContext toVariantContext(String name, Object variantContainingObject, ReferenceContext ref) {
if ( ! adaptors.containsKey(variantContainingObject.getClass()) )
return null;
else {
return adaptors.get(variantContainingObject.getClass()).convert(name, variantContainingObject, refAllele);
return adaptors.get(variantContainingObject.getClass()).convert(name, variantContainingObject, ref);
}
}
@ -91,14 +93,15 @@ public class VariantContextAdaptors {
private static class RodDBSnpAdaptor extends VCAdaptor {
VariantContext convert(String name, Object input) {
if ( ! Allele.acceptableAlleleBases(((rodDbSNP)input).getReference()) )
return null;
Allele refAllele = new Allele(((rodDbSNP)input).getReference(), true);
return convert(name, input, refAllele);
return convert(name, input, null);
}
VariantContext convert(String name, Object input, Allele refAllele) {
VariantContext convert(String name, Object input, ReferenceContext ref) {
rodDbSNP dbsnp = (rodDbSNP)input;
if ( ! Allele.acceptableAlleleBases(dbsnp.getReference()) )
return null;
Allele refAllele = new Allele(dbsnp.getReference(), true);
if ( dbsnp.isSNP() || dbsnp.isIndel() || dbsnp.varType.contains("mixed") ) {
// add the reference allele
List<Allele> alleles = new ArrayList<Allele>();
@ -132,28 +135,25 @@ public class VariantContextAdaptors {
private static class RodVCFAdaptor extends VCAdaptor {
VariantContext convert(String name, Object input) {
Allele refAllele = new Allele(((RodVCF)input).getReference(), true);
return vcfToVariantContext(name, ((RodVCF)input).getRecord(), refAllele);
return vcfToVariantContext(name, ((RodVCF)input).getRecord(), null);
}
VariantContext convert(String name, Object input, Allele refAllele) {
return vcfToVariantContext(name, ((RodVCF)input).getRecord(), refAllele);
VariantContext convert(String name, Object input, ReferenceContext ref) {
return vcfToVariantContext(name, ((RodVCF)input).getRecord(), ref);
}
}
private static class VCFRecordAdaptor extends VCAdaptor {
VariantContext convert(String name, Object input) {
Allele refAllele = new Allele(((VCFRecord)input).getReference(), true);
return vcfToVariantContext(name, (VCFRecord)input, refAllele);
return vcfToVariantContext(name, (VCFRecord)input, null);
}
VariantContext convert(String name, Object input, Allele refAllele) {
return vcfToVariantContext(name, (VCFRecord)input, refAllele);
VariantContext convert(String name, Object input, ReferenceContext ref) {
return vcfToVariantContext(name, (VCFRecord)input, ref);
}
}
private static VariantContext vcfToVariantContext(String name, VCFRecord vcf, Allele refAllele) {
private static VariantContext vcfToVariantContext(String name, VCFRecord vcf, ReferenceContext ref) {
if ( vcf.isReference() || vcf.isSNP() || vcf.isIndel() ) {
// add the reference allele
if ( ! Allele.acceptableAlleleBases(vcf.getReference()) ) {
@ -167,7 +167,9 @@ public class VariantContextAdaptors {
// add all of the alt alleles
List<Allele> alleles = new ArrayList<Allele>();
Allele refAllele = determineRefAllele(vcf, ref);
alleles.add(refAllele);
for ( String alt : vcf.getAlternateAlleleList() ) {
if ( ! Allele.acceptableAlleleBases(alt) ) {
//System.out.printf("Excluding vcf record %s%n", vcf);
@ -184,10 +186,14 @@ public class VariantContextAdaptors {
List<Allele> genotypeAlleles = new ArrayList<Allele>();
for ( VCFGenotypeEncoding s : vcfG.getAlleles() ) {
Allele a = Allele.getMatchingAllele(alleles, s.getBases());
if ( a == null )
throw new StingException("Invalid VCF genotype allele " + s + " in VCF " + vcf);
genotypeAlleles.add(a);
if ( a == null ) {
if ( vcf.getType() == Variation.VARIANT_TYPE.INSERTION || vcf.getType() == Variation.VARIANT_TYPE.DELETION )
genotypeAlleles.add(refAllele);
else
throw new StingException("Invalid VCF genotype allele " + s + " in VCF " + vcf);
} else {
genotypeAlleles.add(a);
}
}
Map<String, String> fields = new HashMap<String, String>();
@ -207,13 +213,45 @@ public class VariantContextAdaptors {
}
double qual = vcf.isMissingQual() ? VariantContext.NO_NEG_LOG_10PERROR : vcf.getNegLog10PError();
VariantContext vc = new VariantContext(name, vcf.getLocation(), alleles, genotypes, qual, filters, attributes);
GenomeLoc loc = vcf.getLocation();
if ( vcf.isDeletion() )
loc = GenomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart(), loc.getStart()+refAllele.length()-1);
VariantContext vc = new VariantContext(name, loc, alleles, genotypes, qual, filters, attributes);
vc.validate();
return vc;
} else
return null; // can't handle anything else
}
private static Allele determineRefAllele(VCFRecord vcf, ReferenceContext ref) {
Allele refAllele;
if ( ref == null ) {
refAllele = new Allele(vcf.getReference(), true);
} else if ( !vcf.isIndel() ) {
refAllele = new Allele(Character.toString(ref.getBase()), true);
} else if ( vcf.isInsertion() ) {
refAllele = new Allele(Allele.NULL_ALLELE_STRING, true);
} else if ( vcf.isDeletion() ) {
int start = (int)(ref.getLocus().getStart() - ref.getWindow().getStart() + 1);
int delLength = 0;
for ( VCFGenotypeEncoding enc : vcf.getAlternateAlleles() ) {
if ( enc.getLength() > delLength )
delLength = enc.getLength();
}
if ( delLength > ref.getWindow().getStop() - ref.getLocus().getStop() )
throw new IllegalArgumentException("Length of deletion is larger than reference context provided at " + ref.getLocus());
char[] deletion = new char[delLength];
System.arraycopy(ref.getBases(), start, deletion, 0, delLength);
refAllele = new Allele(String.valueOf(deletion), true);
} else {
throw new UnsupportedOperationException("Conversion of VCF type " + vcf.getType() + " is not supported.");
}
return refAllele;
}
public static VCFHeader createVCFHeader(Set<VCFHeaderLine> hInfo, VariantContext vc) {
HashSet<String> names = new LinkedHashSet<String>();
@ -364,11 +402,15 @@ public class VariantContextAdaptors {
private static List<String> calcVCFGenotypeKeys(VariantContext vc) {
Set<String> keys = new HashSet<String>();
boolean sawGoodQual = false;
for ( Genotype g : vc.getGenotypes().values() ) {
keys.addAll(g.getAttributes().keySet());
if ( g.hasNegLog10PError() )
sawGoodQual = true;
}
keys.add(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY);
if ( sawGoodQual )
keys.add(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY);
return Utils.sorted(new ArrayList<String>(keys));
}
@ -380,13 +422,17 @@ public class VariantContextAdaptors {
private static class PlinkRodAdaptor extends VCAdaptor {
VariantContext convert(String name, Object input) {
throw new UnsupportedOperationException("Conversion from Plink to VC requires a reference allele");
return convert(name, input, null);
}
VariantContext convert(String name, Object input, Allele refAllele) {
VariantContext convert(String name, Object input, ReferenceContext ref) {
if ( ref == null )
throw new UnsupportedOperationException("Conversion from Plink to VariantContext requires a reference context");
PlinkRod plink = (PlinkRod)input;
HashSet<Allele> VCAlleles = new HashSet<Allele>();
Allele refAllele = determineRefAllele(plink, ref);
VCAlleles.add(refAllele);
// mapping from Plink Allele to VC Allele (since the PlinkRod does not annotate any of the Alleles as reference)
@ -440,6 +486,23 @@ public class VariantContextAdaptors {
throw new IllegalArgumentException(e.getMessage() + "; please make sure that e.g. a sample isn't present more than one time in your ped file");
}
}
private Allele determineRefAllele(PlinkRod plink, ReferenceContext ref) {
Allele refAllele;
if ( !plink.isIndel() ) {
refAllele = new Allele(Character.toString(ref.getBase()), true);
} else if ( plink.isInsertion() ) {
refAllele = new Allele(Allele.NULL_ALLELE_STRING, true);
} else {
long maxLength = ref.getWindow().getStop() - ref.getLocus().getStop();
if ( plink.getLength() > maxLength )
throw new UnsupportedOperationException("Plink conversion currently can only handle indels up to length " + maxLength);
char[] deletion = new char[plink.getLength()];
System.arraycopy(ref.getBases(), 1, deletion, 0, plink.getLength());
refAllele = new Allele(String.valueOf(deletion), true);
}
return refAllele;
}
}
// --------------------------------------------------------------------------------------------------------------
@ -455,22 +518,23 @@ public class VariantContextAdaptors {
* @return a VariantContext object
*/
VariantContext convert(String name, Object input) {
if ( ! Allele.acceptableAlleleBases(((RodGLF)input).getReference()) )
return null;
Allele refAllele = new Allele(((RodGLF)input).getReference(), true);
return convert(name, input, refAllele);
return convert(name, input, null);
}
/**
* convert to a Variant Context, given:
* @param name the name of the ROD
* @param input the Rod object, in this case a RodGLF
* @param refAllele the reference base as an Allele object
* @param ref the reference context
* @return a VariantContext object
*/
VariantContext convert(String name, Object input, Allele refAllele) {
VariantContext convert(String name, Object input, ReferenceContext ref) {
RodGLF glf = (RodGLF)input;
if ( ! Allele.acceptableAlleleBases(glf.getReference()) )
return null;
Allele refAllele = new Allele(glf.getReference(), true);
// make sure we can convert it
if ( glf.isSNP() || glf.isIndel()) {
// add the reference allele
@ -528,21 +592,21 @@ public class VariantContextAdaptors {
* @return a VariantContext object
*/
VariantContext convert(String name, Object input) {
if ( ! Allele.acceptableAlleleBases(((RodGeliText)input).getReference()) )
return null;
Allele refAllele = new Allele(((RodGeliText)input).getReference(), true);
return convert(name, input, refAllele);
return convert(name, input, null);
}
/**
* convert to a Variant Context, given:
* @param name the name of the ROD
* @param name the name of the ROD
* @param input the Rod object, in this case a RodGeliText
* @param refAllele the reference base as an Allele object
* @param ref the reference context
* @return a VariantContext object
*/
VariantContext convert(String name, Object input, Allele refAllele) {
VariantContext convert(String name, Object input, ReferenceContext ref) {
RodGeliText geli = (RodGeliText)input;
if ( ! Allele.acceptableAlleleBases(geli.getReference()) )
return null;
Allele refAllele = new Allele(geli.getReference(), true);
// make sure we can convert it
if ( geli.isSNP() || geli.isIndel()) {
@ -593,21 +657,25 @@ public class VariantContextAdaptors {
* @return a VariantContext object
*/
VariantContext convert(String name, Object input) {
throw new UnsupportedOperationException("Conversion from HapMap to VariantContext requires knowledge of the reference allele");
return convert(name, input, null);
}
/**
* convert to a Variant Context, given:
* @param name the name of the ROD
* @param name the name of the ROD
* @param input the Rod object, in this case a RodGeliText
* @param refAllele the reference base as an Allele object
* @param ref the reference context
* @return a VariantContext object
*/
VariantContext convert(String name, Object input, Allele refAllele) {
VariantContext convert(String name, Object input, ReferenceContext ref) {
if ( ref == null )
throw new UnsupportedOperationException("Conversion from HapMap to VariantContext requires a reference context");
HapMapROD hapmap = (HapMapROD)input;
// add the reference allele
HashSet<Allele> alleles = new HashSet<Allele>();
Allele refAllele = new Allele(Character.toString(ref.getBase()), true);
alleles.add(refAllele);
// make a mapping from sample to genotype

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.utils.cmdLine.Argument;
@ -15,6 +14,7 @@ import java.util.*;
* Converts variants from other file formats to VCF format.
*/
@Requires(value={},referenceMetaData=@RMD(name=VariantsToVCF.INPUT_ROD_NAME,type= ReferenceOrderedDatum.class))
@Reference(window=@Window(start=0,stop=40))
public class VariantsToVCF extends RodWalker<Integer, Integer> {
public static final String INPUT_ROD_NAME = "variant";
@ -27,7 +27,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
// Don't allow mixed types for now
private EnumSet<VariantContext.Type> ALLOWED_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.SNP, VariantContext.Type.NO_VARIATION, VariantContext.Type.INDEL);
private String[] ALLOWED_FORMAT_FIELDS = {"GT"};
private String[] ALLOWED_FORMAT_FIELDS = {VCFGenotypeRecord.GENOTYPE_KEY, VCFGenotypeRecord.GENOTYPE_QUALITY_KEY, VCFGenotypeRecord.DEPTH_KEY, VCFGenotypeRecord.GENOTYPE_POSTERIORS_TRIPLET_KEY };
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) )
@ -35,8 +35,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
rodDbSNP dbsnp = rodDbSNP.getFirstRealSNP(tracker.getReferenceMetaData(rodDbSNP.STANDARD_DBSNP_TRACK_NAME));
Allele refAllele = new Allele(Character.toString(ref.getBase()), true);
Collection<VariantContext> contexts = tracker.getVariantContexts(INPUT_ROD_NAME, ALLOWED_VARIANT_CONTEXT_TYPES, context.getLocation(), refAllele, true, false);
Collection<VariantContext> contexts = tracker.getVariantContexts(ref, INPUT_ROD_NAME, ALLOWED_VARIANT_CONTEXT_TYPES, context.getLocation(), true, false);
for ( VariantContext vc : contexts ) {
VCFRecord vcf = VariantContextAdaptors.toVCF(vc, ref.getBase(), Arrays.asList(ALLOWED_FORMAT_FIELDS), false, false);

View File

@ -73,7 +73,7 @@ public class RealignerTargetCreator extends LocusWalker<RealignerTargetCreator.E
// look at the rods for indels or SNPs
if ( tracker != null ) {
for ( VariantContext vc : tracker.getAllVariantContexts() ) {
for ( VariantContext vc : tracker.getAllVariantContexts(ref) ) {
switch ( vc.getType() ) {
case INDEL:
hasIndel = true;

View File

@ -61,7 +61,7 @@ public class PickSequenomProbes extends RodWalker<String, String> {
String refBase = String.valueOf(ref.getBase());
Collection<VariantContext> VCs = tracker.getAllVariantContexts();
Collection<VariantContext> VCs = tracker.getAllVariantContexts(ref);
if ( VCs.size() == 0 )
return "";
@ -111,7 +111,7 @@ public class PickSequenomProbes extends RodWalker<String, String> {
else if ( vc.isInsertion() )
assay_sequence = leading_bases + refBase + "[-/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases;
else if ( vc.isDeletion() )
assay_sequence = leading_bases + refBase + "[" + vc.getReference().toString() + "/-]" + trailing_bases.substring(vc.getReference().length());
assay_sequence = leading_bases + refBase + "[" + new String(vc.getReference().getBases()) + "/-]" + trailing_bases.substring(vc.getReference().length());
else
return "";

View File

@ -33,9 +33,6 @@ public class SequenomValidationConverter extends RodWalker<VCFRecord,Integer> {
// "used for smart Hardy-Weinberg annotation",required = false)
//private File popFile = null;
// max allowable indel size (based on ref window)
private static final int MAX_INDEL_SIZE = 40;
// sample names
private TreeSet<String> sampleNames = null;
@ -74,10 +71,7 @@ public class SequenomValidationConverter extends RodWalker<VCFRecord,Integer> {
Object rod = rods.get(0);
// determine the reference allele
Allele refAllele = determineRefAllele(rod, ref);
VariantContext vc = VariantContextAdaptors.toVariantContext("sequenom", rod, refAllele);
VariantContext vc = VariantContextAdaptors.toVariantContext("sequenom", rod, ref);
if ( sampleNames == null )
sampleNames = new TreeSet<String>(vc.getSampleNames());
@ -85,32 +79,6 @@ public class SequenomValidationConverter extends RodWalker<VCFRecord,Integer> {
return addVariantInformationToCall(ref, vc, rod);
}
private Allele determineRefAllele(Object rod, ReferenceContext ref) {
Allele refAllele;
// ugly hack to get around the fact that the Plink rod needs
// a very specific determination of the reference allele
if ( rod instanceof PlinkRod ) {
PlinkRod plink = (PlinkRod)rod;
if ( !plink.isIndel() ) {
refAllele = new Allele(Character.toString(ref.getBase()), true);
} else if ( plink.isInsertion() ) {
refAllele = new Allele(PlinkRod.SEQUENOM_NO_BASE, true);
} else {
if ( plink.getLength() > MAX_INDEL_SIZE )
throw new UnsupportedOperationException("PlinkToVCF currently can only handle indels up to length " + MAX_INDEL_SIZE);
char[] deletion = new char[plink.getLength()];
System.arraycopy(ref.getBases(), 1, deletion, 0, plink.getLength());
refAllele = new Allele(new String(deletion), true);
}
} else {
refAllele = new Allele(Character.toString(ref.getBase()), true);
}
return refAllele;
}
public Integer reduce(VCFRecord call, Integer numVariants) {
if ( call != null ) {
numVariants++;

View File

@ -45,7 +45,7 @@ public class TestVariantContextWalker extends RodWalker<Integer, Integer> {
EnumSet<VariantContext.Type> allowedTypes = onlyOfThisType == null ? null : EnumSet.of(onlyOfThisType);
int n = 0;
for (VariantContext vc : tracker.getAllVariantContexts(allowedTypes, context.getLocation(), onlyContextsStartinAtCurrentPosition, takeFirstOnly) ) {
for (VariantContext vc : tracker.getAllVariantContexts(ref, allowedTypes, context.getLocation(), onlyContextsStartinAtCurrentPosition, takeFirstOnly) ) {
if ( writer != null && n == 0 ) {
if ( ! wroteHeader ) {
writer.writeHeader(VariantContextAdaptors.createVCFHeader(null, vc));

View File

@ -6,9 +6,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RodGenotypeChipAsGFF;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper;
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
@ -18,7 +16,6 @@ import org.broadinstitute.sting.utils.cmdLine.Argument;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Collection;
@ -49,12 +46,12 @@ public class SnpCallRateByCoverageWalker extends LocusWalker<List<String>, Strin
return (BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 &&
context.getBasePileup().size() != 0 &&
tracker != null &&
tracker.getAllVariantContexts() != null
tracker.getAllVariantContexts(ref) != null
);
}
public List<String> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
Collection<VariantContext> contexts = tracker.getAllVariantContexts();
Collection<VariantContext> contexts = tracker.getAllVariantContexts(ref);
for (VariantContext vc : contexts) {
if (vc.isVariant() && !vc.isFiltered()) {

View File

@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import java.util.HashMap;
import java.util.Collection;
/*
* Copyright (c) 2010 The Broad Institute
@ -100,11 +101,12 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
if( tracker != null ) {
Collection<VariantContext> VCs = tracker.getAllVariantContexts(ref);
// First find out if this variant is in the truth sets
boolean isInTruthSet = false;
boolean isTrueVariant = false;
for ( VariantContext vc : tracker.getAllVariantContexts() ) {
for ( VariantContext vc : VCs ) {
if( vc.getName().toUpperCase().startsWith("TRUTH") ) {
isInTruthSet = true;
if (vc.isVariant())
@ -113,7 +115,7 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
}
// Add each annotation in this VCF Record to the dataManager
for ( VariantContext vc : tracker.getAllVariantContexts() ) {
for ( VariantContext vc : VCs ) {
if( !vc.getName().toUpperCase().startsWith("TRUTH") ) {
if( vc.isVariant() ) {
dataManager.addAnnotations( vc, ref.getBase(), SAMPLE_NAME, isInTruthSet, isTrueVariant );

View File

@ -126,7 +126,7 @@ public class VariantConcordanceROCCurveWalker extends RodWalker<ExpandingArrayLi
boolean isInTruthSet = false;
boolean isTrueVariant = false;
for( final VariantContext vc : tracker.getAllVariantContexts(null, context.getLocation(), false, false) ) {
for( final VariantContext vc : tracker.getAllVariantContexts(ref, null, context.getLocation(), false, false) ) {
if( vc != null && vc.getName().toUpperCase().startsWith("TRUTH") ) {
if( vc.isSNP() && !vc.isFiltered() ) {
if( multiSampleCalls ) {

View File

@ -116,7 +116,7 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
double annotationValues[] = new double[numAnnotations];
for( final VariantContext vc : tracker.getAllVariantContexts(null, context.getLocation(), false, false) ) {
for( final VariantContext vc : tracker.getAllVariantContexts(ref, null, context.getLocation(), false, false) ) {
if( vc != null && vc.isSNP() ) {
if( !vc.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(vc.getFilters())) ) {

View File

@ -66,4 +66,19 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
md5);
executeTest("testVariantsToVCFUsingHapMapInput", spec).getFirst();
}
@Test
public void testGenotypesToVCFUsingVCFInput() {
List<String> md5 = new ArrayList<String>();
md5.add("92d661a3789e55078197666eb9ee7020");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +
" -B variant,VCF," + validationDataLocation + "complexExample.vcf" +
" -T VariantsToVCF" +
" -o %s",
1, // just one output file
md5);
executeTest("testVariantsToVCFUsingVCFInput", spec).getFirst();
}
}

View File

@ -0,0 +1,17 @@
package org.broadinstitute.sting.gatk.walkers.sequenom;
import org.broadinstitute.sting.WalkerTest;
import org.junit.Test;
import java.util.Arrays;
public class PickSequenomProbesIntegrationTest extends WalkerTest {
@Test
public void testProbes() {
String testVCF = validationDataLocation + "complexExample.vcf";
String testArgs = "-R "+oneKGLocation+"reference/human_b36_both.fasta -T PickSequenomProbes -L 1:10,000,000-11,000,000 -B input,VCF,"+testVCF+" -o %s";
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
Arrays.asList("6b5409cc78960f1be855536ed89ea9dd"));
executeTest("Test probes", spec);
}
}