Redoing the conversion to VariantContext: instead of walkers passing in a ref allele, they pass in the ref context and the adaptors create the allele. This is the right way of doing it.
Also, adding some more useful integration tests. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3194 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4eba9bffc1
commit
d73c63a99a
|
|
@ -1,8 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
|
@ -113,7 +113,7 @@ public class RefMetaDataTracker {
|
|||
* Get all of the RMDs at the current site. The collection is "flattened": for any track that has multiple records
|
||||
* at the current site, they all will be added to the list as separate elements.
|
||||
*
|
||||
* @return
|
||||
* @return collection of all rods
|
||||
*/
|
||||
public Collection<GATKFeature> getAllRods() {
|
||||
List<GATKFeature> l = new ArrayList<GATKFeature>();
|
||||
|
|
@ -129,7 +129,7 @@ public class RefMetaDataTracker {
|
|||
* Get all of the RMD tracks at the current site. Each track is returned as a single compound
|
||||
* object (RODRecordList) that may contain multiple RMD records associated with the current site.
|
||||
*
|
||||
* @return
|
||||
* @return collection of all tracks
|
||||
*/
|
||||
public Collection<RODRecordList> getBoundRodTracks() {
|
||||
LinkedList<RODRecordList> bound = new LinkedList<RODRecordList>();
|
||||
|
|
@ -176,9 +176,13 @@ public class RefMetaDataTracker {
|
|||
/**
|
||||
* Converts all possible ROD tracks to VariantContexts objects, of all types, allowing any start and any number
|
||||
* of entries per ROD.
|
||||
* The name of each VariantContext corresponds to the ROD name.
|
||||
*
|
||||
* @param ref reference context
|
||||
* @return variant context
|
||||
*/
|
||||
public Collection<VariantContext> getAllVariantContexts() {
|
||||
return getAllVariantContexts(null, null, false, false);
|
||||
public Collection<VariantContext> getAllVariantContexts(ReferenceContext ref) {
|
||||
return getAllVariantContexts(ref, null, null, false, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -192,17 +196,18 @@ public class RefMetaDataTracker {
|
|||
*
|
||||
* The name of each VariantContext corresponds to the ROD name.
|
||||
*
|
||||
* @param curLocation location
|
||||
* @param ref reference context
|
||||
* @param allowedTypes allowed types
|
||||
* @param curLocation location
|
||||
* @param requireStartHere do we require the rod to start at this location?
|
||||
* @param takeFirstOnly do we take the first rod only?
|
||||
* @return variant context
|
||||
*/
|
||||
public Collection<VariantContext> getAllVariantContexts(EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
public Collection<VariantContext> getAllVariantContexts(ReferenceContext ref, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
List<VariantContext> contexts = new ArrayList<VariantContext>();
|
||||
|
||||
for ( RODRecordList rodList : getBoundRodTracks() ) {
|
||||
addVariantContexts(contexts, rodList, allowedTypes, curLocation, null, requireStartHere, takeFirstOnly);
|
||||
addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly);
|
||||
}
|
||||
|
||||
return contexts;
|
||||
|
|
@ -221,25 +226,25 @@ public class RefMetaDataTracker {
|
|||
* @return variant context
|
||||
*/
|
||||
public Collection<VariantContext> getVariantContexts(String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
return getVariantContexts(Arrays.asList(name), allowedTypes, curLocation, null, requireStartHere, takeFirstOnly);
|
||||
return getVariantContexts(null, Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly);
|
||||
}
|
||||
|
||||
public Collection<VariantContext> getVariantContexts(String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, Allele ref, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
return getVariantContexts(Arrays.asList(name), allowedTypes, curLocation, ref, requireStartHere, takeFirstOnly);
|
||||
public Collection<VariantContext> getVariantContexts(ReferenceContext ref, String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
return getVariantContexts(ref, Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly);
|
||||
}
|
||||
|
||||
public Collection<VariantContext> getVariantContexts(Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
return getVariantContexts(names, allowedTypes, curLocation, null, requireStartHere, takeFirstOnly);
|
||||
return getVariantContexts(null, names, allowedTypes, curLocation, requireStartHere, takeFirstOnly);
|
||||
}
|
||||
|
||||
public Collection<VariantContext> getVariantContexts(Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, Allele ref, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
public Collection<VariantContext> getVariantContexts(ReferenceContext ref, Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
Collection<VariantContext> contexts = new ArrayList<VariantContext>();
|
||||
|
||||
for ( String name : names ) {
|
||||
RODRecordList rodList = getTrackDataByName(name,true); // require that the name is an exact match
|
||||
|
||||
if ( rodList != null )
|
||||
addVariantContexts(contexts, rodList, allowedTypes, curLocation, ref, requireStartHere, takeFirstOnly );
|
||||
addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly );
|
||||
}
|
||||
|
||||
return contexts;
|
||||
|
|
@ -267,15 +272,11 @@ public class RefMetaDataTracker {
|
|||
}
|
||||
|
||||
|
||||
private void addVariantContexts(Collection<VariantContext> contexts, RODRecordList rodList, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, Allele ref, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
private void addVariantContexts(Collection<VariantContext> contexts, RODRecordList rodList, ReferenceContext ref, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
for ( GATKFeature rec : rodList ) {
|
||||
if ( VariantContextAdaptors.canBeConvertedToVariantContext(rec.getUnderlyingObject()) ) {
|
||||
// ok, we might actually be able to turn this record in a variant context
|
||||
VariantContext vc;
|
||||
if ( ref == null )
|
||||
vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec.getUnderlyingObject());
|
||||
else
|
||||
vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec.getUnderlyingObject(), ref);
|
||||
VariantContext vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec.getUnderlyingObject(), ref);
|
||||
|
||||
if ( vc == null ) // sometimes the track has odd stuff in it that can't be converted
|
||||
continue;
|
||||
|
|
@ -304,8 +305,9 @@ public class RefMetaDataTracker {
|
|||
* defaultValue.getLocation() may be not equal to what RODRecordList's location would be expected to be otherwise:
|
||||
* for instance, on locus traversal, location is usually expected to be a single base we are currently looking at,
|
||||
* regardless of the presence of "extended" RODs overlapping with that location).
|
||||
* @param name
|
||||
* @return
|
||||
* @param name track name
|
||||
* @param requireExactMatch do we require an exact match of the rod name?
|
||||
* @return track data for the given rod
|
||||
*/
|
||||
private RODRecordList getTrackDataByName(final String name, boolean requireExactMatch) {
|
||||
//logger.debug(String.format("Lookup %s%n", name));
|
||||
|
|
@ -335,7 +337,7 @@ public class RefMetaDataTracker {
|
|||
/**
|
||||
* Returns the canonical name of the rod name (lowercases it)
|
||||
* @param name the name of the rod
|
||||
* @return
|
||||
* @return canonical name of the rod
|
||||
*/
|
||||
private final String canonicalName(final String name) {
|
||||
return name.toLowerCase();
|
||||
|
|
|
|||
|
|
@ -4,9 +4,11 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
|||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableGenotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.CalledGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFSingleCall;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
|
||||
|
|
@ -56,7 +58,7 @@ public class VariantContextAdaptors {
|
|||
/** generic superclass */
|
||||
private static abstract class VCAdaptor {
|
||||
abstract VariantContext convert(String name, Object input);
|
||||
abstract VariantContext convert(String name, Object input, Allele refAllele);
|
||||
abstract VariantContext convert(String name, Object input, ReferenceContext ref);
|
||||
}
|
||||
|
||||
public static VariantContext toVariantContext(String name, Object variantContainingObject) {
|
||||
|
|
@ -67,11 +69,11 @@ public class VariantContextAdaptors {
|
|||
}
|
||||
}
|
||||
|
||||
public static VariantContext toVariantContext(String name, Object variantContainingObject, Allele refAllele) {
|
||||
public static VariantContext toVariantContext(String name, Object variantContainingObject, ReferenceContext ref) {
|
||||
if ( ! adaptors.containsKey(variantContainingObject.getClass()) )
|
||||
return null;
|
||||
else {
|
||||
return adaptors.get(variantContainingObject.getClass()).convert(name, variantContainingObject, refAllele);
|
||||
return adaptors.get(variantContainingObject.getClass()).convert(name, variantContainingObject, ref);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -91,14 +93,15 @@ public class VariantContextAdaptors {
|
|||
|
||||
private static class RodDBSnpAdaptor extends VCAdaptor {
|
||||
VariantContext convert(String name, Object input) {
|
||||
if ( ! Allele.acceptableAlleleBases(((rodDbSNP)input).getReference()) )
|
||||
return null;
|
||||
Allele refAllele = new Allele(((rodDbSNP)input).getReference(), true);
|
||||
return convert(name, input, refAllele);
|
||||
return convert(name, input, null);
|
||||
}
|
||||
|
||||
VariantContext convert(String name, Object input, Allele refAllele) {
|
||||
VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||
rodDbSNP dbsnp = (rodDbSNP)input;
|
||||
if ( ! Allele.acceptableAlleleBases(dbsnp.getReference()) )
|
||||
return null;
|
||||
Allele refAllele = new Allele(dbsnp.getReference(), true);
|
||||
|
||||
if ( dbsnp.isSNP() || dbsnp.isIndel() || dbsnp.varType.contains("mixed") ) {
|
||||
// add the reference allele
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
|
|
@ -132,28 +135,25 @@ public class VariantContextAdaptors {
|
|||
|
||||
private static class RodVCFAdaptor extends VCAdaptor {
|
||||
VariantContext convert(String name, Object input) {
|
||||
Allele refAllele = new Allele(((RodVCF)input).getReference(), true);
|
||||
return vcfToVariantContext(name, ((RodVCF)input).getRecord(), refAllele);
|
||||
return vcfToVariantContext(name, ((RodVCF)input).getRecord(), null);
|
||||
}
|
||||
|
||||
VariantContext convert(String name, Object input, Allele refAllele) {
|
||||
return vcfToVariantContext(name, ((RodVCF)input).getRecord(), refAllele);
|
||||
VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||
return vcfToVariantContext(name, ((RodVCF)input).getRecord(), ref);
|
||||
}
|
||||
}
|
||||
|
||||
private static class VCFRecordAdaptor extends VCAdaptor {
|
||||
VariantContext convert(String name, Object input) {
|
||||
Allele refAllele = new Allele(((VCFRecord)input).getReference(), true);
|
||||
return vcfToVariantContext(name, (VCFRecord)input, refAllele);
|
||||
return vcfToVariantContext(name, (VCFRecord)input, null);
|
||||
}
|
||||
|
||||
VariantContext convert(String name, Object input, Allele refAllele) {
|
||||
return vcfToVariantContext(name, (VCFRecord)input, refAllele);
|
||||
VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||
return vcfToVariantContext(name, (VCFRecord)input, ref);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static VariantContext vcfToVariantContext(String name, VCFRecord vcf, Allele refAllele) {
|
||||
private static VariantContext vcfToVariantContext(String name, VCFRecord vcf, ReferenceContext ref) {
|
||||
if ( vcf.isReference() || vcf.isSNP() || vcf.isIndel() ) {
|
||||
// add the reference allele
|
||||
if ( ! Allele.acceptableAlleleBases(vcf.getReference()) ) {
|
||||
|
|
@ -167,7 +167,9 @@ public class VariantContextAdaptors {
|
|||
|
||||
// add all of the alt alleles
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
Allele refAllele = determineRefAllele(vcf, ref);
|
||||
alleles.add(refAllele);
|
||||
|
||||
for ( String alt : vcf.getAlternateAlleleList() ) {
|
||||
if ( ! Allele.acceptableAlleleBases(alt) ) {
|
||||
//System.out.printf("Excluding vcf record %s%n", vcf);
|
||||
|
|
@ -184,10 +186,14 @@ public class VariantContextAdaptors {
|
|||
List<Allele> genotypeAlleles = new ArrayList<Allele>();
|
||||
for ( VCFGenotypeEncoding s : vcfG.getAlleles() ) {
|
||||
Allele a = Allele.getMatchingAllele(alleles, s.getBases());
|
||||
if ( a == null )
|
||||
throw new StingException("Invalid VCF genotype allele " + s + " in VCF " + vcf);
|
||||
|
||||
genotypeAlleles.add(a);
|
||||
if ( a == null ) {
|
||||
if ( vcf.getType() == Variation.VARIANT_TYPE.INSERTION || vcf.getType() == Variation.VARIANT_TYPE.DELETION )
|
||||
genotypeAlleles.add(refAllele);
|
||||
else
|
||||
throw new StingException("Invalid VCF genotype allele " + s + " in VCF " + vcf);
|
||||
} else {
|
||||
genotypeAlleles.add(a);
|
||||
}
|
||||
}
|
||||
|
||||
Map<String, String> fields = new HashMap<String, String>();
|
||||
|
|
@ -207,13 +213,45 @@ public class VariantContextAdaptors {
|
|||
}
|
||||
|
||||
double qual = vcf.isMissingQual() ? VariantContext.NO_NEG_LOG_10PERROR : vcf.getNegLog10PError();
|
||||
VariantContext vc = new VariantContext(name, vcf.getLocation(), alleles, genotypes, qual, filters, attributes);
|
||||
|
||||
GenomeLoc loc = vcf.getLocation();
|
||||
if ( vcf.isDeletion() )
|
||||
loc = GenomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart(), loc.getStart()+refAllele.length()-1);
|
||||
|
||||
VariantContext vc = new VariantContext(name, loc, alleles, genotypes, qual, filters, attributes);
|
||||
vc.validate();
|
||||
return vc;
|
||||
} else
|
||||
return null; // can't handle anything else
|
||||
}
|
||||
|
||||
private static Allele determineRefAllele(VCFRecord vcf, ReferenceContext ref) {
|
||||
Allele refAllele;
|
||||
if ( ref == null ) {
|
||||
refAllele = new Allele(vcf.getReference(), true);
|
||||
} else if ( !vcf.isIndel() ) {
|
||||
refAllele = new Allele(Character.toString(ref.getBase()), true);
|
||||
} else if ( vcf.isInsertion() ) {
|
||||
refAllele = new Allele(Allele.NULL_ALLELE_STRING, true);
|
||||
} else if ( vcf.isDeletion() ) {
|
||||
int start = (int)(ref.getLocus().getStart() - ref.getWindow().getStart() + 1);
|
||||
int delLength = 0;
|
||||
for ( VCFGenotypeEncoding enc : vcf.getAlternateAlleles() ) {
|
||||
if ( enc.getLength() > delLength )
|
||||
delLength = enc.getLength();
|
||||
}
|
||||
if ( delLength > ref.getWindow().getStop() - ref.getLocus().getStop() )
|
||||
throw new IllegalArgumentException("Length of deletion is larger than reference context provided at " + ref.getLocus());
|
||||
|
||||
char[] deletion = new char[delLength];
|
||||
System.arraycopy(ref.getBases(), start, deletion, 0, delLength);
|
||||
refAllele = new Allele(String.valueOf(deletion), true);
|
||||
} else {
|
||||
throw new UnsupportedOperationException("Conversion of VCF type " + vcf.getType() + " is not supported.");
|
||||
}
|
||||
|
||||
return refAllele;
|
||||
}
|
||||
|
||||
public static VCFHeader createVCFHeader(Set<VCFHeaderLine> hInfo, VariantContext vc) {
|
||||
HashSet<String> names = new LinkedHashSet<String>();
|
||||
|
|
@ -364,11 +402,15 @@ public class VariantContextAdaptors {
|
|||
private static List<String> calcVCFGenotypeKeys(VariantContext vc) {
|
||||
Set<String> keys = new HashSet<String>();
|
||||
|
||||
boolean sawGoodQual = false;
|
||||
for ( Genotype g : vc.getGenotypes().values() ) {
|
||||
keys.addAll(g.getAttributes().keySet());
|
||||
if ( g.hasNegLog10PError() )
|
||||
sawGoodQual = true;
|
||||
}
|
||||
|
||||
keys.add(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY);
|
||||
if ( sawGoodQual )
|
||||
keys.add(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY);
|
||||
return Utils.sorted(new ArrayList<String>(keys));
|
||||
}
|
||||
|
||||
|
|
@ -380,13 +422,17 @@ public class VariantContextAdaptors {
|
|||
|
||||
private static class PlinkRodAdaptor extends VCAdaptor {
|
||||
VariantContext convert(String name, Object input) {
|
||||
throw new UnsupportedOperationException("Conversion from Plink to VC requires a reference allele");
|
||||
return convert(name, input, null);
|
||||
}
|
||||
|
||||
VariantContext convert(String name, Object input, Allele refAllele) {
|
||||
VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||
if ( ref == null )
|
||||
throw new UnsupportedOperationException("Conversion from Plink to VariantContext requires a reference context");
|
||||
|
||||
PlinkRod plink = (PlinkRod)input;
|
||||
|
||||
HashSet<Allele> VCAlleles = new HashSet<Allele>();
|
||||
Allele refAllele = determineRefAllele(plink, ref);
|
||||
VCAlleles.add(refAllele);
|
||||
|
||||
// mapping from Plink Allele to VC Allele (since the PlinkRod does not annotate any of the Alleles as reference)
|
||||
|
|
@ -440,6 +486,23 @@ public class VariantContextAdaptors {
|
|||
throw new IllegalArgumentException(e.getMessage() + "; please make sure that e.g. a sample isn't present more than one time in your ped file");
|
||||
}
|
||||
}
|
||||
|
||||
private Allele determineRefAllele(PlinkRod plink, ReferenceContext ref) {
|
||||
Allele refAllele;
|
||||
if ( !plink.isIndel() ) {
|
||||
refAllele = new Allele(Character.toString(ref.getBase()), true);
|
||||
} else if ( plink.isInsertion() ) {
|
||||
refAllele = new Allele(Allele.NULL_ALLELE_STRING, true);
|
||||
} else {
|
||||
long maxLength = ref.getWindow().getStop() - ref.getLocus().getStop();
|
||||
if ( plink.getLength() > maxLength )
|
||||
throw new UnsupportedOperationException("Plink conversion currently can only handle indels up to length " + maxLength);
|
||||
char[] deletion = new char[plink.getLength()];
|
||||
System.arraycopy(ref.getBases(), 1, deletion, 0, plink.getLength());
|
||||
refAllele = new Allele(String.valueOf(deletion), true);
|
||||
}
|
||||
return refAllele;
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -455,22 +518,23 @@ public class VariantContextAdaptors {
|
|||
* @return a VariantContext object
|
||||
*/
|
||||
VariantContext convert(String name, Object input) {
|
||||
if ( ! Allele.acceptableAlleleBases(((RodGLF)input).getReference()) )
|
||||
return null;
|
||||
Allele refAllele = new Allele(((RodGLF)input).getReference(), true);
|
||||
return convert(name, input, refAllele);
|
||||
return convert(name, input, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* convert to a Variant Context, given:
|
||||
* @param name the name of the ROD
|
||||
* @param input the Rod object, in this case a RodGLF
|
||||
* @param refAllele the reference base as an Allele object
|
||||
* @param ref the reference context
|
||||
* @return a VariantContext object
|
||||
*/
|
||||
VariantContext convert(String name, Object input, Allele refAllele) {
|
||||
VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||
RodGLF glf = (RodGLF)input;
|
||||
|
||||
if ( ! Allele.acceptableAlleleBases(glf.getReference()) )
|
||||
return null;
|
||||
Allele refAllele = new Allele(glf.getReference(), true);
|
||||
|
||||
// make sure we can convert it
|
||||
if ( glf.isSNP() || glf.isIndel()) {
|
||||
// add the reference allele
|
||||
|
|
@ -528,21 +592,21 @@ public class VariantContextAdaptors {
|
|||
* @return a VariantContext object
|
||||
*/
|
||||
VariantContext convert(String name, Object input) {
|
||||
if ( ! Allele.acceptableAlleleBases(((RodGeliText)input).getReference()) )
|
||||
return null;
|
||||
Allele refAllele = new Allele(((RodGeliText)input).getReference(), true);
|
||||
return convert(name, input, refAllele);
|
||||
return convert(name, input, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* convert to a Variant Context, given:
|
||||
* @param name the name of the ROD
|
||||
* @param name the name of the ROD
|
||||
* @param input the Rod object, in this case a RodGeliText
|
||||
* @param refAllele the reference base as an Allele object
|
||||
* @param ref the reference context
|
||||
* @return a VariantContext object
|
||||
*/
|
||||
VariantContext convert(String name, Object input, Allele refAllele) {
|
||||
VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||
RodGeliText geli = (RodGeliText)input;
|
||||
if ( ! Allele.acceptableAlleleBases(geli.getReference()) )
|
||||
return null;
|
||||
Allele refAllele = new Allele(geli.getReference(), true);
|
||||
|
||||
// make sure we can convert it
|
||||
if ( geli.isSNP() || geli.isIndel()) {
|
||||
|
|
@ -593,21 +657,25 @@ public class VariantContextAdaptors {
|
|||
* @return a VariantContext object
|
||||
*/
|
||||
VariantContext convert(String name, Object input) {
|
||||
throw new UnsupportedOperationException("Conversion from HapMap to VariantContext requires knowledge of the reference allele");
|
||||
return convert(name, input, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* convert to a Variant Context, given:
|
||||
* @param name the name of the ROD
|
||||
* @param name the name of the ROD
|
||||
* @param input the Rod object, in this case a RodGeliText
|
||||
* @param refAllele the reference base as an Allele object
|
||||
* @param ref the reference context
|
||||
* @return a VariantContext object
|
||||
*/
|
||||
VariantContext convert(String name, Object input, Allele refAllele) {
|
||||
VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||
if ( ref == null )
|
||||
throw new UnsupportedOperationException("Conversion from HapMap to VariantContext requires a reference context");
|
||||
|
||||
HapMapROD hapmap = (HapMapROD)input;
|
||||
|
||||
// add the reference allele
|
||||
HashSet<Allele> alleles = new HashSet<Allele>();
|
||||
Allele refAllele = new Allele(Character.toString(ref.getBase()), true);
|
||||
alleles.add(refAllele);
|
||||
|
||||
// make a mapping from sample to genotype
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers;
|
|||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
|
|
@ -15,6 +14,7 @@ import java.util.*;
|
|||
* Converts variants from other file formats to VCF format.
|
||||
*/
|
||||
@Requires(value={},referenceMetaData=@RMD(name=VariantsToVCF.INPUT_ROD_NAME,type= ReferenceOrderedDatum.class))
|
||||
@Reference(window=@Window(start=0,stop=40))
|
||||
public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
||||
|
||||
public static final String INPUT_ROD_NAME = "variant";
|
||||
|
|
@ -27,7 +27,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
// Don't allow mixed types for now
|
||||
private EnumSet<VariantContext.Type> ALLOWED_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.SNP, VariantContext.Type.NO_VARIATION, VariantContext.Type.INDEL);
|
||||
|
||||
private String[] ALLOWED_FORMAT_FIELDS = {"GT"};
|
||||
private String[] ALLOWED_FORMAT_FIELDS = {VCFGenotypeRecord.GENOTYPE_KEY, VCFGenotypeRecord.GENOTYPE_QUALITY_KEY, VCFGenotypeRecord.DEPTH_KEY, VCFGenotypeRecord.GENOTYPE_POSTERIORS_TRIPLET_KEY };
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) )
|
||||
|
|
@ -35,8 +35,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
|
||||
rodDbSNP dbsnp = rodDbSNP.getFirstRealSNP(tracker.getReferenceMetaData(rodDbSNP.STANDARD_DBSNP_TRACK_NAME));
|
||||
|
||||
Allele refAllele = new Allele(Character.toString(ref.getBase()), true);
|
||||
Collection<VariantContext> contexts = tracker.getVariantContexts(INPUT_ROD_NAME, ALLOWED_VARIANT_CONTEXT_TYPES, context.getLocation(), refAllele, true, false);
|
||||
Collection<VariantContext> contexts = tracker.getVariantContexts(ref, INPUT_ROD_NAME, ALLOWED_VARIANT_CONTEXT_TYPES, context.getLocation(), true, false);
|
||||
|
||||
for ( VariantContext vc : contexts ) {
|
||||
VCFRecord vcf = VariantContextAdaptors.toVCF(vc, ref.getBase(), Arrays.asList(ALLOWED_FORMAT_FIELDS), false, false);
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ public class RealignerTargetCreator extends LocusWalker<RealignerTargetCreator.E
|
|||
|
||||
// look at the rods for indels or SNPs
|
||||
if ( tracker != null ) {
|
||||
for ( VariantContext vc : tracker.getAllVariantContexts() ) {
|
||||
for ( VariantContext vc : tracker.getAllVariantContexts(ref) ) {
|
||||
switch ( vc.getType() ) {
|
||||
case INDEL:
|
||||
hasIndel = true;
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ public class PickSequenomProbes extends RodWalker<String, String> {
|
|||
|
||||
String refBase = String.valueOf(ref.getBase());
|
||||
|
||||
Collection<VariantContext> VCs = tracker.getAllVariantContexts();
|
||||
Collection<VariantContext> VCs = tracker.getAllVariantContexts(ref);
|
||||
if ( VCs.size() == 0 )
|
||||
return "";
|
||||
|
||||
|
|
@ -111,7 +111,7 @@ public class PickSequenomProbes extends RodWalker<String, String> {
|
|||
else if ( vc.isInsertion() )
|
||||
assay_sequence = leading_bases + refBase + "[-/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases;
|
||||
else if ( vc.isDeletion() )
|
||||
assay_sequence = leading_bases + refBase + "[" + vc.getReference().toString() + "/-]" + trailing_bases.substring(vc.getReference().length());
|
||||
assay_sequence = leading_bases + refBase + "[" + new String(vc.getReference().getBases()) + "/-]" + trailing_bases.substring(vc.getReference().length());
|
||||
else
|
||||
return "";
|
||||
|
||||
|
|
|
|||
|
|
@ -33,9 +33,6 @@ public class SequenomValidationConverter extends RodWalker<VCFRecord,Integer> {
|
|||
// "used for smart Hardy-Weinberg annotation",required = false)
|
||||
//private File popFile = null;
|
||||
|
||||
// max allowable indel size (based on ref window)
|
||||
private static final int MAX_INDEL_SIZE = 40;
|
||||
|
||||
// sample names
|
||||
private TreeSet<String> sampleNames = null;
|
||||
|
||||
|
|
@ -74,10 +71,7 @@ public class SequenomValidationConverter extends RodWalker<VCFRecord,Integer> {
|
|||
|
||||
Object rod = rods.get(0);
|
||||
|
||||
// determine the reference allele
|
||||
Allele refAllele = determineRefAllele(rod, ref);
|
||||
|
||||
VariantContext vc = VariantContextAdaptors.toVariantContext("sequenom", rod, refAllele);
|
||||
VariantContext vc = VariantContextAdaptors.toVariantContext("sequenom", rod, ref);
|
||||
|
||||
if ( sampleNames == null )
|
||||
sampleNames = new TreeSet<String>(vc.getSampleNames());
|
||||
|
|
@ -85,32 +79,6 @@ public class SequenomValidationConverter extends RodWalker<VCFRecord,Integer> {
|
|||
return addVariantInformationToCall(ref, vc, rod);
|
||||
}
|
||||
|
||||
private Allele determineRefAllele(Object rod, ReferenceContext ref) {
|
||||
Allele refAllele;
|
||||
|
||||
// ugly hack to get around the fact that the Plink rod needs
|
||||
// a very specific determination of the reference allele
|
||||
if ( rod instanceof PlinkRod ) {
|
||||
PlinkRod plink = (PlinkRod)rod;
|
||||
if ( !plink.isIndel() ) {
|
||||
refAllele = new Allele(Character.toString(ref.getBase()), true);
|
||||
} else if ( plink.isInsertion() ) {
|
||||
refAllele = new Allele(PlinkRod.SEQUENOM_NO_BASE, true);
|
||||
} else {
|
||||
if ( plink.getLength() > MAX_INDEL_SIZE )
|
||||
throw new UnsupportedOperationException("PlinkToVCF currently can only handle indels up to length " + MAX_INDEL_SIZE);
|
||||
char[] deletion = new char[plink.getLength()];
|
||||
System.arraycopy(ref.getBases(), 1, deletion, 0, plink.getLength());
|
||||
refAllele = new Allele(new String(deletion), true);
|
||||
}
|
||||
} else {
|
||||
refAllele = new Allele(Character.toString(ref.getBase()), true);
|
||||
}
|
||||
|
||||
return refAllele;
|
||||
}
|
||||
|
||||
|
||||
public Integer reduce(VCFRecord call, Integer numVariants) {
|
||||
if ( call != null ) {
|
||||
numVariants++;
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ public class TestVariantContextWalker extends RodWalker<Integer, Integer> {
|
|||
EnumSet<VariantContext.Type> allowedTypes = onlyOfThisType == null ? null : EnumSet.of(onlyOfThisType);
|
||||
|
||||
int n = 0;
|
||||
for (VariantContext vc : tracker.getAllVariantContexts(allowedTypes, context.getLocation(), onlyContextsStartinAtCurrentPosition, takeFirstOnly) ) {
|
||||
for (VariantContext vc : tracker.getAllVariantContexts(ref, allowedTypes, context.getLocation(), onlyContextsStartinAtCurrentPosition, takeFirstOnly) ) {
|
||||
if ( writer != null && n == 0 ) {
|
||||
if ( ! wroteHeader ) {
|
||||
writer.writeHeader(VariantContextAdaptors.createVCFHeader(null, vc));
|
||||
|
|
|
|||
|
|
@ -6,9 +6,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodGenotypeChipAsGFF;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||
|
|
@ -18,7 +16,6 @@ import org.broadinstitute.sting.utils.cmdLine.Argument;
|
|||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Collection;
|
||||
|
||||
|
||||
|
|
@ -49,12 +46,12 @@ public class SnpCallRateByCoverageWalker extends LocusWalker<List<String>, Strin
|
|||
return (BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 &&
|
||||
context.getBasePileup().size() != 0 &&
|
||||
tracker != null &&
|
||||
tracker.getAllVariantContexts() != null
|
||||
tracker.getAllVariantContexts(ref) != null
|
||||
);
|
||||
}
|
||||
|
||||
public List<String> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
Collection<VariantContext> contexts = tracker.getAllVariantContexts();
|
||||
Collection<VariantContext> contexts = tracker.getAllVariantContexts(ref);
|
||||
|
||||
for (VariantContext vc : contexts) {
|
||||
if (vc.isVariant() && !vc.isFiltered()) {
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
|||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Collection;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
|
|
@ -100,11 +101,12 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
|
|||
public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||
|
||||
if( tracker != null ) {
|
||||
Collection<VariantContext> VCs = tracker.getAllVariantContexts(ref);
|
||||
|
||||
// First find out if this variant is in the truth sets
|
||||
boolean isInTruthSet = false;
|
||||
boolean isTrueVariant = false;
|
||||
for ( VariantContext vc : tracker.getAllVariantContexts() ) {
|
||||
for ( VariantContext vc : VCs ) {
|
||||
if( vc.getName().toUpperCase().startsWith("TRUTH") ) {
|
||||
isInTruthSet = true;
|
||||
if (vc.isVariant())
|
||||
|
|
@ -113,7 +115,7 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
// Add each annotation in this VCF Record to the dataManager
|
||||
for ( VariantContext vc : tracker.getAllVariantContexts() ) {
|
||||
for ( VariantContext vc : VCs ) {
|
||||
if( !vc.getName().toUpperCase().startsWith("TRUTH") ) {
|
||||
if( vc.isVariant() ) {
|
||||
dataManager.addAnnotations( vc, ref.getBase(), SAMPLE_NAME, isInTruthSet, isTrueVariant );
|
||||
|
|
|
|||
|
|
@ -126,7 +126,7 @@ public class VariantConcordanceROCCurveWalker extends RodWalker<ExpandingArrayLi
|
|||
boolean isInTruthSet = false;
|
||||
boolean isTrueVariant = false;
|
||||
|
||||
for( final VariantContext vc : tracker.getAllVariantContexts(null, context.getLocation(), false, false) ) {
|
||||
for( final VariantContext vc : tracker.getAllVariantContexts(ref, null, context.getLocation(), false, false) ) {
|
||||
if( vc != null && vc.getName().toUpperCase().startsWith("TRUTH") ) {
|
||||
if( vc.isSNP() && !vc.isFiltered() ) {
|
||||
if( multiSampleCalls ) {
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
|
|||
|
||||
double annotationValues[] = new double[numAnnotations];
|
||||
|
||||
for( final VariantContext vc : tracker.getAllVariantContexts(null, context.getLocation(), false, false) ) {
|
||||
for( final VariantContext vc : tracker.getAllVariantContexts(ref, null, context.getLocation(), false, false) ) {
|
||||
|
||||
if( vc != null && vc.isSNP() ) {
|
||||
if( !vc.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
|
||||
|
|
|
|||
|
|
@ -66,4 +66,19 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
md5);
|
||||
executeTest("testVariantsToVCFUsingHapMapInput", spec).getFirst();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGenotypesToVCFUsingVCFInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("92d661a3789e55078197666eb9ee7020");
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +
|
||||
" -B variant,VCF," + validationDataLocation + "complexExample.vcf" +
|
||||
" -T VariantsToVCF" +
|
||||
" -o %s",
|
||||
1, // just one output file
|
||||
md5);
|
||||
executeTest("testVariantsToVCFUsingVCFInput", spec).getFirst();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,17 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.sequenom;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class PickSequenomProbesIntegrationTest extends WalkerTest {
|
||||
@Test
|
||||
public void testProbes() {
|
||||
String testVCF = validationDataLocation + "complexExample.vcf";
|
||||
String testArgs = "-R "+oneKGLocation+"reference/human_b36_both.fasta -T PickSequenomProbes -L 1:10,000,000-11,000,000 -B input,VCF,"+testVCF+" -o %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
|
||||
Arrays.asList("6b5409cc78960f1be855536ed89ea9dd"));
|
||||
executeTest("Test probes", spec);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue