Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
fa1db3913b
|
|
@ -373,16 +373,16 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
if ( featureDescriptor != null ) {
|
if ( featureDescriptor != null ) {
|
||||||
tribbleType = featureDescriptor.getName();
|
tribbleType = featureDescriptor.getName();
|
||||||
logger.warn("Dynamically determined type of " + file + " to be " + tribbleType);
|
logger.warn("Dynamically determined type of " + file + " to be " + tribbleType);
|
||||||
|
} else {
|
||||||
|
throw new UserException.CommandLineException(
|
||||||
|
String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " +
|
||||||
|
"Please add an explicit type tag :TYPE listing the correct type from among the supported types: %s",
|
||||||
|
manager.userFriendlyListOfAvailableFeatures()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( tribbleType == null ) // error handling
|
|
||||||
throw new UserException.CommandLineException(
|
|
||||||
String.format("Could not parse argument %s with value %s",
|
|
||||||
defaultDefinition.fullName, value));
|
|
||||||
|
|
||||||
Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
|
Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
|
||||||
Class parameterType = getParameterizedTypeClass(type);
|
Class parameterType = getParameterizedTypeClass(type);
|
||||||
RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
|
RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
|
||||||
|
|
@ -395,8 +395,8 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
value, source.field.getName()));
|
value, source.field.getName()));
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new UserException.CommandLineException(
|
throw new UserException.CommandLineException(
|
||||||
String.format("Failed to parse value %s for argument %s.",
|
String.format("Failed to parse value %s for argument %s. Message: %s",
|
||||||
value, source.field.getName()));
|
value, source.field.getName(), e.getMessage()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,110 +1,110 @@
|
||||||
package org.broadinstitute.sting.gatk.refdata.features.refseq;
|
package org.broadinstitute.sting.gatk.refdata.features.refseq;
|
||||||
|
|
||||||
import org.apache.commons.io.filefilter.FalseFileFilter;
|
import org.apache.commons.io.filefilter.FalseFileFilter;
|
||||||
import org.broad.tribble.Feature;
|
import org.broad.tribble.Feature;
|
||||||
import org.broad.tribble.TribbleException;
|
import org.broad.tribble.TribbleException;
|
||||||
import org.broad.tribble.readers.LineReader;
|
import org.broad.tribble.readers.LineReader;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* the ref seq codec
|
* the ref seq codec
|
||||||
*/
|
*/
|
||||||
public class RefSeqCodec implements ReferenceDependentFeatureCodec<RefSeqFeature> {
|
public class RefSeqCodec implements ReferenceDependentFeatureCodec<RefSeqFeature> {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The parser to use when resolving genome-wide locations.
|
* The parser to use when resolving genome-wide locations.
|
||||||
*/
|
*/
|
||||||
private GenomeLocParser genomeLocParser;
|
private GenomeLocParser genomeLocParser;
|
||||||
private boolean zero_coding_length_user_warned = false;
|
private boolean zero_coding_length_user_warned = false;
|
||||||
/**
|
/**
|
||||||
* Set the parser to use when resolving genetic data.
|
* Set the parser to use when resolving genetic data.
|
||||||
* @param genomeLocParser The supplied parser.
|
* @param genomeLocParser The supplied parser.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void setGenomeLocParser(GenomeLocParser genomeLocParser) {
|
public void setGenomeLocParser(GenomeLocParser genomeLocParser) {
|
||||||
this.genomeLocParser = genomeLocParser;
|
this.genomeLocParser = genomeLocParser;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Feature decodeLoc(String line) {
|
public Feature decodeLoc(String line) {
|
||||||
if (line.startsWith("#")) return null;
|
if (line.startsWith("#")) return null;
|
||||||
String fields[] = line.split("\t");
|
String fields[] = line.split("\t");
|
||||||
if (fields.length < 3) throw new TribbleException("RefSeq (decodeLoc) : Unable to parse line -> " + line + ", we expected at least 3 columns, we saw " + fields.length);
|
if (fields.length < 3) throw new TribbleException("RefSeq (decodeLoc) : Unable to parse line -> " + line + ", we expected at least 3 columns, we saw " + fields.length);
|
||||||
String contig_name = fields[2];
|
String contig_name = fields[2];
|
||||||
try {
|
try {
|
||||||
return new RefSeqFeature(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])));
|
return new RefSeqFeature(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])));
|
||||||
} catch ( UserException.MalformedGenomeLoc e ) {
|
} catch ( UserException.MalformedGenomeLoc e ) {
|
||||||
Utils.warnUser("RefSeq file is potentially incorrect, as some transcripts or exons have a negative length ("+fields[2]+")");
|
Utils.warnUser("RefSeq file is potentially incorrect, as some transcripts or exons have a negative length ("+fields[2]+")");
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Fills this object from a text line in RefSeq (UCSC) text dump file */
|
/** Fills this object from a text line in RefSeq (UCSC) text dump file */
|
||||||
@Override
|
@Override
|
||||||
public RefSeqFeature decode(String line) {
|
public RefSeqFeature decode(String line) {
|
||||||
if (line.startsWith("#")) return null;
|
if (line.startsWith("#")) return null;
|
||||||
String fields[] = line.split("\t");
|
String fields[] = line.split("\t");
|
||||||
|
|
||||||
// we reference postion 15 in the split array below, make sure we have at least that many columns
|
// we reference postion 15 in the split array below, make sure we have at least that many columns
|
||||||
if (fields.length < 16) throw new TribbleException("RefSeq (decode) : Unable to parse line -> " + line + ", we expected at least 16 columns, we saw " + fields.length);
|
if (fields.length < 16) throw new TribbleException("RefSeq (decode) : Unable to parse line -> " + line + ", we expected at least 16 columns, we saw " + fields.length);
|
||||||
String contig_name = fields[2];
|
String contig_name = fields[2];
|
||||||
RefSeqFeature feature = new RefSeqFeature(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])));
|
RefSeqFeature feature = new RefSeqFeature(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])));
|
||||||
|
|
||||||
feature.setTranscript_id(fields[1]);
|
feature.setTranscript_id(fields[1]);
|
||||||
if ( fields[3].length()==1 && fields[3].charAt(0)=='+') feature.setStrand(1);
|
if ( fields[3].length()==1 && fields[3].charAt(0)=='+') feature.setStrand(1);
|
||||||
else if ( fields[3].length()==1 && fields[3].charAt(0)=='-') feature.setStrand(-1);
|
else if ( fields[3].length()==1 && fields[3].charAt(0)=='-') feature.setStrand(-1);
|
||||||
else throw new UserException.MalformedFile("Expected strand symbol (+/-), found: "+fields[3] + " for line=" + line);
|
else throw new UserException.MalformedFile("Expected strand symbol (+/-), found: "+fields[3] + " for line=" + line);
|
||||||
|
|
||||||
int coding_start = Integer.parseInt(fields[6])+1;
|
int coding_start = Integer.parseInt(fields[6])+1;
|
||||||
int coding_stop = Integer.parseInt(fields[7]);
|
int coding_stop = Integer.parseInt(fields[7]);
|
||||||
|
|
||||||
if ( coding_start > coding_stop ) {
|
if ( coding_start > coding_stop ) {
|
||||||
if ( ! zero_coding_length_user_warned ) {
|
if ( ! zero_coding_length_user_warned ) {
|
||||||
Utils.warnUser("RefSeq file contains transcripts with zero coding length. "+
|
Utils.warnUser("RefSeq file contains transcripts with zero coding length. "+
|
||||||
"Such transcripts will be ignored (this warning is printed only once)");
|
"Such transcripts will be ignored (this warning is printed only once)");
|
||||||
zero_coding_length_user_warned = true;
|
zero_coding_length_user_warned = true;
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
feature.setTranscript_interval(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])));
|
feature.setTranscript_interval(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])));
|
||||||
feature.setTranscript_coding_interval(genomeLocParser.createGenomeLoc(contig_name, coding_start, coding_stop));
|
feature.setTranscript_coding_interval(genomeLocParser.createGenomeLoc(contig_name, coding_start, coding_stop));
|
||||||
feature.setGene_name(fields[12]);
|
feature.setGene_name(fields[12]);
|
||||||
String[] exon_starts = fields[9].split(",");
|
String[] exon_starts = fields[9].split(",");
|
||||||
String[] exon_stops = fields[10].split(",");
|
String[] exon_stops = fields[10].split(",");
|
||||||
String[] eframes = fields[15].split(",");
|
String[] eframes = fields[15].split(",");
|
||||||
|
|
||||||
if ( exon_starts.length != exon_stops.length )
|
if ( exon_starts.length != exon_stops.length )
|
||||||
throw new UserException.MalformedFile("Data format error: numbers of exon start and stop positions differ for line=" + line);
|
throw new UserException.MalformedFile("Data format error: numbers of exon start and stop positions differ for line=" + line);
|
||||||
if ( exon_starts.length != eframes.length )
|
if ( exon_starts.length != eframes.length )
|
||||||
throw new UserException.MalformedFile("Data format error: numbers of exons and exon frameshifts differ for line=" + line);
|
throw new UserException.MalformedFile("Data format error: numbers of exons and exon frameshifts differ for line=" + line);
|
||||||
|
|
||||||
ArrayList<GenomeLoc> exons = new ArrayList<GenomeLoc>(exon_starts.length);
|
ArrayList<GenomeLoc> exons = new ArrayList<GenomeLoc>(exon_starts.length);
|
||||||
ArrayList<Integer> exon_frames = new ArrayList<Integer>(eframes.length);
|
ArrayList<Integer> exon_frames = new ArrayList<Integer>(eframes.length);
|
||||||
|
|
||||||
for ( int i = 0 ; i < exon_starts.length ; i++ ) {
|
for ( int i = 0 ; i < exon_starts.length ; i++ ) {
|
||||||
exons.add(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) );
|
exons.add(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) );
|
||||||
exon_frames.add(Integer.decode(eframes[i]));
|
exon_frames.add(Integer.decode(eframes[i]));
|
||||||
}
|
}
|
||||||
|
|
||||||
feature.setExons(exons);
|
feature.setExons(exons);
|
||||||
feature.setExon_frames(exon_frames);
|
feature.setExon_frames(exon_frames);
|
||||||
return feature;
|
return feature;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object readHeader(LineReader reader) {
|
public Object readHeader(LineReader reader) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Class getFeatureType() {
|
public Class getFeatureType() {
|
||||||
return RefSeqCodec.class;
|
return RefSeqFeature.class;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -161,19 +161,19 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
|
||||||
|
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||||
return Arrays.asList(
|
return Arrays.asList(
|
||||||
new VCFInfoHeaderLine(GENE_ID_KEY, 1, VCFHeaderLineType.String, "Gene ID"),
|
new VCFInfoHeaderLine(GENE_ID_KEY, 1, VCFHeaderLineType.String, "Gene ID for the highest-impact effect resulting from the current variant"),
|
||||||
new VCFInfoHeaderLine(GENE_NAME_KEY, 1, VCFHeaderLineType.String, "Gene name"),
|
new VCFInfoHeaderLine(GENE_NAME_KEY, 1, VCFHeaderLineType.String, "Gene name for the highest-impact effect resulting from the current variant"),
|
||||||
new VCFInfoHeaderLine(TRANSCRIPT_ID_KEY, 1, VCFHeaderLineType.String, "Transcript ID"),
|
new VCFInfoHeaderLine(TRANSCRIPT_ID_KEY, 1, VCFHeaderLineType.String, "Transcript ID for the highest-impact effect resulting from the current variant"),
|
||||||
new VCFInfoHeaderLine(EXON_ID_KEY, 1, VCFHeaderLineType.String, "Exon ID"),
|
new VCFInfoHeaderLine(EXON_ID_KEY, 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant"),
|
||||||
new VCFInfoHeaderLine(EXON_RANK_KEY, 1, VCFHeaderLineType.Integer, "Exon rank"),
|
new VCFInfoHeaderLine(EXON_RANK_KEY, 1, VCFHeaderLineType.Integer, "Exon rank for the highest-impact effect resulting from the current variant"),
|
||||||
new VCFInfoHeaderLine(WITHIN_NON_CODING_GENE_KEY, 0, VCFHeaderLineType.Flag, "If present, gene is non-coding"),
|
new VCFInfoHeaderLine(WITHIN_NON_CODING_GENE_KEY, 0, VCFHeaderLineType.Flag, "If this flag is present, the highest-impact effect resulting from the current variant is within a non-coding gene"),
|
||||||
new VCFInfoHeaderLine(EFFECT_KEY, 1, VCFHeaderLineType.String, "One of the most high-impact effects across all transcripts at this site"),
|
new VCFInfoHeaderLine(EFFECT_KEY, 1, VCFHeaderLineType.String, "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
|
||||||
new VCFInfoHeaderLine(EFFECT_IMPACT_KEY, 1, VCFHeaderLineType.String, "Impact of the effect " + Arrays.toString(SnpEffConstants.EffectImpact.values())),
|
new VCFInfoHeaderLine(EFFECT_IMPACT_KEY, 1, VCFHeaderLineType.String, "Impact of the highest-impact effect resulting from the current variant " + Arrays.toString(SnpEffConstants.EffectImpact.values())),
|
||||||
new VCFInfoHeaderLine(EFFECT_EXTRA_INFORMATION_KEY, 1, VCFHeaderLineType.String, "Additional information about the effect"),
|
new VCFInfoHeaderLine(EFFECT_EXTRA_INFORMATION_KEY, 1, VCFHeaderLineType.String, "Additional information about the highest-impact effect resulting from the current variant"),
|
||||||
new VCFInfoHeaderLine(OLD_NEW_AA_KEY, 1, VCFHeaderLineType.String, "Old/New amino acid"),
|
new VCFInfoHeaderLine(OLD_NEW_AA_KEY, 1, VCFHeaderLineType.String, "Old/New amino acid for the highest-impact effect resulting from the current variant"),
|
||||||
new VCFInfoHeaderLine(OLD_NEW_CODON_KEY, 1, VCFHeaderLineType.String, "Old/New codon"),
|
new VCFInfoHeaderLine(OLD_NEW_CODON_KEY, 1, VCFHeaderLineType.String, "Old/New codon for the highest-impact effect resulting from the current variant"),
|
||||||
new VCFInfoHeaderLine(CODON_NUM_KEY, 1, VCFHeaderLineType.Integer, "Codon number"),
|
new VCFInfoHeaderLine(CODON_NUM_KEY, 1, VCFHeaderLineType.Integer, "Codon number for the highest-impact effect resulting from the current variant"),
|
||||||
new VCFInfoHeaderLine(CDS_SIZE_KEY, 1, VCFHeaderLineType.Integer, "CDS size")
|
new VCFInfoHeaderLine(CDS_SIZE_KEY, 1, VCFHeaderLineType.Integer, "CDS size for the highest-impact effect resulting from the current variant")
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -48,6 +48,31 @@ import static java.lang.Math.log10;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Takes files produced by Beagle imputation engine and creates a vcf with modified annotations.
|
* Takes files produced by Beagle imputation engine and creates a vcf with modified annotations.
|
||||||
|
*
|
||||||
|
* <p>This walker is intended to be run after Beagle has successfully executed. The full calling sequence for using Beagle along with the GATK is: </p>
|
||||||
|
*
|
||||||
|
* <p>1. Run ProduceBeagleInputWalker. </p>
|
||||||
|
* <p>2. Run Beagle</p>
|
||||||
|
* <p>3. Uncompress output files</p>
|
||||||
|
* <p>4. Run BeagleOutputToVCFWalker.</p>
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* Note that this walker requires all input files produced by Beagle.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* <h2>Example</h2>
|
||||||
|
* <pre>
|
||||||
|
* java -Xmx4000m -jar dist/GenomeAnalysisTK.jar \
|
||||||
|
* -R reffile.fasta -T BeagleOutputToVCF \
|
||||||
|
* -V input_vcf.vcf \
|
||||||
|
* -beagleR2:BEAGLE /myrun.beagle_output.r2 \
|
||||||
|
* -beaglePhased:BEAGLE /myrun.beagle_output.phased \
|
||||||
|
* -beagleProbs:BEAGLE /myrun.beagle_output.gprobs \
|
||||||
|
* -o output_vcf.vcf
|
||||||
|
* </pre>
|
||||||
|
|
||||||
|
<p> Note that Beagle produces some of these files compressed as .gz, so gunzip must be run on them before walker is run in order to decompress them </p>
|
||||||
|
|
||||||
*/
|
*/
|
||||||
public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
|
|
@ -57,22 +82,18 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
||||||
@Input(fullName="comp", shortName = "comp", doc="Comparison VCF file", required=false)
|
@Input(fullName="comp", shortName = "comp", doc="Comparison VCF file", required=false)
|
||||||
public RodBinding<VariantContext> comp;
|
public RodBinding<VariantContext> comp;
|
||||||
|
|
||||||
@Input(fullName="beagleR2", shortName = "beagleR2", doc="VCF file", required=true)
|
@Input(fullName="beagleR2", shortName = "beagleR2", doc="Beagle-produced .r2 file containing R^2 values for all markers", required=true)
|
||||||
public RodBinding<BeagleFeature> beagleR2;
|
public RodBinding<BeagleFeature> beagleR2;
|
||||||
|
|
||||||
@Input(fullName="beagleProbs", shortName = "beagleProbs", doc="VCF file", required=true)
|
@Input(fullName="beagleProbs", shortName = "beagleProbs", doc="Beagle-produced .probs file containing posterior genotype probabilities", required=true)
|
||||||
public RodBinding<BeagleFeature> beagleProbs;
|
public RodBinding<BeagleFeature> beagleProbs;
|
||||||
|
|
||||||
@Input(fullName="beaglePhased", shortName = "beaglePhased", doc="VCF file", required=true)
|
@Input(fullName="beaglePhased", shortName = "beaglePhased", doc="Beagle-produced .phased file containing phased genotypes", required=true)
|
||||||
public RodBinding<BeagleFeature> beaglePhased;
|
public RodBinding<BeagleFeature> beaglePhased;
|
||||||
|
|
||||||
@Output(doc="File to which variants should be written",required=true)
|
@Output(doc="VCF File to which variants should be written",required=true)
|
||||||
protected VCFWriter vcfWriter = null;
|
protected VCFWriter vcfWriter = null;
|
||||||
|
|
||||||
@Argument(fullName="output_file", shortName="output", doc="Please use --out instead" ,required=false)
|
|
||||||
@Deprecated
|
|
||||||
protected String oldOutputArg;
|
|
||||||
|
|
||||||
@Argument(fullName="dont_mark_monomorphic_sites_as_filtered", shortName="keep_monomorphic", doc="If provided, we won't filter sites that beagle tags as monomorphic. Useful for imputing a sample's genotypes from a reference panel" ,required=false)
|
@Argument(fullName="dont_mark_monomorphic_sites_as_filtered", shortName="keep_monomorphic", doc="If provided, we won't filter sites that beagle tags as monomorphic. Useful for imputing a sample's genotypes from a reference panel" ,required=false)
|
||||||
public boolean DONT_FILTER_MONOMORPHIC_SITES = false;
|
public boolean DONT_FILTER_MONOMORPHIC_SITES = false;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -48,19 +48,45 @@ import java.io.PrintStream;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Produces an input file to Beagle imputation engine, listing genotype likelihoods for each sample in input variant file
|
* Converts the input VCF into a format accepted by the Beagle imputation/analysis program.
|
||||||
|
* <p>
|
||||||
|
*
|
||||||
|
* <h2>Input</h2>
|
||||||
|
* <p>
|
||||||
|
* A VCF with variants to convert to Beagle format
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Outputs</h2>
|
||||||
|
* <p>
|
||||||
|
* A single text file which can be fed to Beagle
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* Optional: A file with a list of markers
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Examples</h2>
|
||||||
|
* <pre>
|
||||||
|
* java -Xmx2g -jar dist/GenomeAnalysisTK.jar -L 20 \
|
||||||
|
* -R reffile.fasta -T ProduceBeagleInput \
|
||||||
|
* -V path_to_input_vcf/inputvcf.vcf -o path_to_beagle_output/beagle_output
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
||||||
|
|
||||||
@Input(fullName="validation", shortName = "validation", doc="Input VCF file", required=false)
|
@Hidden
|
||||||
|
@Input(fullName="validation", shortName = "validation", doc="Validation VCF file", required=false)
|
||||||
public RodBinding<VariantContext> validation;
|
public RodBinding<VariantContext> validation;
|
||||||
|
|
||||||
|
|
||||||
@Output(doc="File to which BEAGLE input should be written",required=true)
|
@Output(doc="File to which BEAGLE input should be written",required=true)
|
||||||
protected PrintStream beagleWriter = null;
|
protected PrintStream beagleWriter = null;
|
||||||
|
|
||||||
@Output(doc="File to which BEAGLE markers should be written", shortName="markers", fullName = "markers", required = false)
|
@Hidden
|
||||||
|
@Output(doc="File to which BEAGLE markers should be written", shortName="markers", fullName = "markers", required = false)
|
||||||
protected PrintStream markers = null;
|
protected PrintStream markers = null;
|
||||||
int markerCounter = 1;
|
int markerCounter = 1;
|
||||||
|
|
||||||
|
|
@ -73,14 +99,19 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
||||||
@Argument(doc="VQSqual key", shortName = "vqskey", required=false)
|
@Argument(doc="VQSqual key", shortName = "vqskey", required=false)
|
||||||
protected String VQSLOD_KEY = "VQSqual";
|
protected String VQSLOD_KEY = "VQSqual";
|
||||||
|
|
||||||
@Argument(fullName = "inserted_nocall_rate", shortName = "nc_rate", doc = "Rate (0-1) at which genotype no-calls will be randomly inserted, for testing", required = false)
|
@Hidden
|
||||||
|
@Argument(fullName = "inserted_nocall_rate", shortName = "nc_rate", doc = "Rate (0-1) at which genotype no-calls will be randomly inserted, for testing", required = false)
|
||||||
public double insertedNoCallRate = 0;
|
public double insertedNoCallRate = 0;
|
||||||
@Argument(fullName = "validation_genotype_ptrue", shortName = "valp", doc = "Flat probability to assign to validation genotypes. Will override GL field.", required = false)
|
@Hidden
|
||||||
|
@Argument(fullName = "validation_genotype_ptrue", shortName = "valp", doc = "Flat probability to assign to validation genotypes. Will override GL field.", required = false)
|
||||||
public double validationPrior = -1.0;
|
public double validationPrior = -1.0;
|
||||||
@Argument(fullName = "validation_bootstrap", shortName = "bs", doc = "Proportion of records to be used in bootstrap set", required = false)
|
@Hidden
|
||||||
|
@Argument(fullName = "validation_bootstrap", shortName = "bs", doc = "Proportion of records to be used in bootstrap set", required = false)
|
||||||
public double bootstrap = 0.0;
|
public double bootstrap = 0.0;
|
||||||
@Argument(fullName = "bootstrap_vcf",shortName = "bvcf", doc = "Output a VCF with the records used for bootstrapping filtered out", required = false)
|
@Hidden
|
||||||
|
@Argument(fullName = "bootstrap_vcf",shortName = "bvcf", doc = "Output a VCF with the records used for bootstrapping filtered out", required = false)
|
||||||
VCFWriter bootstrapVCFOutput = null;
|
VCFWriter bootstrapVCFOutput = null;
|
||||||
|
|
||||||
@Argument(fullName = "checkIsMaleOnChrX", shortName = "checkIsMaleOnChrX", doc = "Set to true when Beagle-ing chrX and want to ensure male samples don't have heterozygous calls.", required = false)
|
@Argument(fullName = "checkIsMaleOnChrX", shortName = "checkIsMaleOnChrX", doc = "Set to true when Beagle-ing chrX and want to ensure male samples don't have heterozygous calls.", required = false)
|
||||||
public boolean CHECK_IS_MALE_ON_CHR_X = false;
|
public boolean CHECK_IS_MALE_ON_CHR_X = false;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -160,8 +160,8 @@ public class ListFileUtils {
|
||||||
rodBinding.getName(), rodBinding.getTribbleType(), builderForValidation.userFriendlyListOfAvailableFeatures()));
|
rodBinding.getName(), rodBinding.getTribbleType(), builderForValidation.userFriendlyListOfAvailableFeatures()));
|
||||||
if ( ! rodBinding.getType().isAssignableFrom(descriptor.getFeatureClass()) )
|
if ( ! rodBinding.getType().isAssignableFrom(descriptor.getFeatureClass()) )
|
||||||
throw new UserException.BadArgumentValue(rodBinding.getName(),
|
throw new UserException.BadArgumentValue(rodBinding.getName(),
|
||||||
String.format("Field %s expected type %s, but the type of the input file provided on the command line was %s. Please make sure that you have provided the correct file type and/or that you are not binding your rod to a name matching one of the available types.",
|
String.format("Field %s expected type %s, but the type of the input file provided on the command line was %s producing %s. Please make sure that you have provided the correct file type and/or that you are not binding your rod to a name matching one of the available types.",
|
||||||
rodBinding.getName(), rodBinding.getType(), descriptor.getName()));
|
rodBinding.getName(), rodBinding.getType(), descriptor.getName(), descriptor.getFeatureClass()));
|
||||||
|
|
||||||
|
|
||||||
rodBindings.add(triplet);
|
rodBindings.add(triplet);
|
||||||
|
|
|
||||||
|
|
@ -133,7 +133,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
validationDataLocation + "1000G.exomes.vcf --snpEffFile " + validationDataLocation +
|
validationDataLocation + "1000G.exomes.vcf --snpEffFile " + validationDataLocation +
|
||||||
"snpEff_1.9.6_1000G.exomes.vcf_hg37.61.out -L 1:26,000,000-26,500,000",
|
"snpEff_1.9.6_1000G.exomes.vcf_hg37.61.out -L 1:26,000,000-26,500,000",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("c08648a078368c80530bff004b3157f1")
|
Arrays.asList("03eae1dab19a9358250890594bf53607")
|
||||||
);
|
);
|
||||||
executeTest("Testing SnpEff annotations", spec);
|
executeTest("Testing SnpEff annotations", spec);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue