2010-03-27 02:55:12 +08:00
package org.broadinstitute.sting.gatk.walkers ;
2009-09-08 14:12:18 +08:00
import org.broadinstitute.sting.gatk.contexts.AlignmentContext ;
2009-09-08 21:13:55 +08:00
import org.broadinstitute.sting.gatk.contexts.ReferenceContext ;
2010-03-27 02:34:59 +08:00
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele ;
2010-04-01 06:39:56 +08:00
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext ;
2010-03-25 12:53:31 +08:00
import org.broadinstitute.sting.gatk.refdata.* ;
import org.broadinstitute.sting.utils.cmdLine.Argument ;
2010-04-01 06:39:56 +08:00
import org.broadinstitute.sting.utils.genotype.vcf.* ;
2010-04-11 10:59:11 +08:00
import org.broadinstitute.sting.utils.BaseUtils ;
2009-09-08 14:12:18 +08:00
2009-09-08 21:13:55 +08:00
import java.util.* ;
2009-09-08 14:12:18 +08:00
2009-12-13 05:41:07 +08:00
/ * *
2010-03-25 12:53:31 +08:00
* Converts variants from other file formats to VCF format .
2009-12-13 05:41:07 +08:00
* /
2010-03-27 02:34:59 +08:00
@Requires ( value = { } , referenceMetaData = @RMD ( name = VariantsToVCF . INPUT_ROD_NAME , type = ReferenceOrderedDatum . class ) )
2010-03-25 12:53:31 +08:00
public class VariantsToVCF extends RodWalker < Integer , Integer > {
2009-09-08 14:12:18 +08:00
2010-03-27 02:34:59 +08:00
public static final String INPUT_ROD_NAME = "variant" ;
2010-03-25 12:53:31 +08:00
@Argument ( fullName = "sample" , shortName = "sample" , doc = "The sample name represented by the variant rod (for data like GELI with genotypes)" , required = false )
protected String sampleName = null ;
2009-09-08 14:12:18 +08:00
2010-03-25 12:53:31 +08:00
private VCFWriter vcfwriter = null ;
2009-09-08 14:12:18 +08:00
2010-03-25 12:53:31 +08:00
// Don't allow mixed types for now
private EnumSet < VariantContext . Type > ALLOWED_VARIANT_CONTEXT_TYPES = EnumSet . of ( VariantContext . Type . SNP , VariantContext . Type . NO_VARIATION , VariantContext . Type . INDEL ) ;
2009-09-08 14:12:18 +08:00
2010-03-25 12:53:31 +08:00
private String [ ] ALLOWED_FORMAT_FIELDS = { "GT" } ;
2009-09-08 14:12:18 +08:00
public Integer map ( RefMetaDataTracker tracker , ReferenceContext ref , AlignmentContext context ) {
2010-04-11 10:59:11 +08:00
if ( tracker = = null | | ! BaseUtils . isRegularBase ( ref . getBase ( ) ) )
2010-03-25 12:53:31 +08:00
return 0 ;
2010-04-01 06:39:56 +08:00
rodDbSNP dbsnp = rodDbSNP . getFirstRealSNP ( tracker . getReferenceMetaData ( "dbsnp" ) ) ;
2010-03-25 12:53:31 +08:00
2010-03-27 02:34:59 +08:00
Allele refAllele = new Allele ( Character . toString ( ref . getBase ( ) ) , true ) ;
Collection < VariantContext > contexts = tracker . getVariantContexts ( INPUT_ROD_NAME , ALLOWED_VARIANT_CONTEXT_TYPES , context . getLocation ( ) , refAllele , true , false ) ;
2010-03-25 12:53:31 +08:00
for ( VariantContext vc : contexts ) {
VCFRecord vcf = VariantContextAdaptors . toVCF ( vc , ref . getBase ( ) , Arrays . asList ( ALLOWED_FORMAT_FIELDS ) , false , false ) ;
if ( dbsnp ! = null )
vcf . setID ( dbsnp . getRS_ID ( ) ) ;
2010-03-27 02:34:59 +08:00
// set the appropriate sample name if necessary
if ( sampleName ! = null & & vcf . hasGenotypeData ( ) & & vcf . getGenotype ( INPUT_ROD_NAME ) ! = null )
vcf . getGenotype ( INPUT_ROD_NAME ) . setSampleName ( sampleName ) ;
writeRecord ( vcf , tracker ) ;
2009-09-08 14:12:18 +08:00
}
2010-03-25 12:53:31 +08:00
return 1 ;
}
2009-09-08 14:12:18 +08:00
2010-03-27 02:34:59 +08:00
private void writeRecord ( VCFRecord rec , RefMetaDataTracker tracker ) {
2010-03-25 12:53:31 +08:00
if ( vcfwriter = = null ) {
// setup the header fields
Set < VCFHeaderLine > hInfo = new HashSet < VCFHeaderLine > ( ) ;
hInfo . addAll ( VCFUtils . getHeaderFields ( getToolkit ( ) ) ) ;
2010-03-27 02:34:59 +08:00
hInfo . add ( new VCFHeaderLine ( "source" , "VariantsToVCF" ) ) ;
2010-03-25 12:53:31 +08:00
hInfo . add ( new VCFHeaderLine ( "reference" , getToolkit ( ) . getArguments ( ) . referenceFile . getName ( ) ) ) ;
2009-09-08 14:12:18 +08:00
2010-03-25 12:53:31 +08:00
TreeSet < String > samples = new TreeSet < String > ( ) ;
2010-03-27 02:34:59 +08:00
if ( sampleName ! = null ) {
2010-03-25 12:53:31 +08:00
samples . add ( sampleName ) ;
2010-03-27 02:34:59 +08:00
} else {
2009-09-08 14:12:18 +08:00
2010-04-01 06:39:56 +08:00
List < Object > rods = tracker . getReferenceMetaData ( INPUT_ROD_NAME ) ;
2010-03-27 02:34:59 +08:00
if ( rods . size ( ) = = 0 )
throw new IllegalStateException ( "VCF record was created, but no rod data is present" ) ;
2010-04-01 06:39:56 +08:00
Object rod = rods . get ( 0 ) ;
2010-03-27 02:34:59 +08:00
if ( rod instanceof RodVCF )
samples . addAll ( Arrays . asList ( ( ( RodVCF ) rod ) . getSampleNames ( ) ) ) ;
2010-04-11 09:43:07 +08:00
else if ( rod instanceof HapMapROD )
samples . addAll ( Arrays . asList ( ( ( HapMapROD ) rod ) . getSampleIDs ( ) ) ) ;
2010-03-27 02:34:59 +08:00
else
samples . addAll ( Arrays . asList ( rec . getSampleNames ( ) ) ) ;
}
2009-09-10 04:04:32 +08:00
2010-03-27 02:34:59 +08:00
vcfwriter = new VCFWriter ( out ) ;
vcfwriter . writeHeader ( new VCFHeader ( hInfo , samples ) ) ;
2009-09-08 14:12:18 +08:00
}
2010-03-25 12:53:31 +08:00
vcfwriter . addRecord ( rec ) ;
2009-09-08 14:12:18 +08:00
}
public Integer reduceInit ( ) {
return 0 ;
}
public Integer reduce ( Integer value , Integer sum ) {
return value + sum ;
}
public void onTraversalDone ( Integer sum ) {
2010-03-27 02:34:59 +08:00
if ( vcfwriter ! = null )
vcfwriter . close ( ) ;
2009-09-08 14:12:18 +08:00
}
}