Support for selecting only variants with specific IDs from a file in SelectVariants
-- Cleaned up unused variables as well
This commit is contained in:
parent
d6d2f49c88
commit
b6067be952
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||||
|
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
|
@ -42,6 +43,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -140,7 +142,7 @@ import java.util.*;
|
||||||
* -R ref.fasta \
|
* -R ref.fasta \
|
||||||
* -T SelectVariants \
|
* -T SelectVariants \
|
||||||
* --variant input.vcf \
|
* --variant input.vcf \
|
||||||
* -family NA12891+NA12892=NA12878 \
|
* -bed family.ped \
|
||||||
* -mvq 50 \
|
* -mvq 50 \
|
||||||
* -o violations.vcf
|
* -o violations.vcf
|
||||||
*
|
*
|
||||||
|
|
@ -250,16 +252,6 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
@Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Don't update the AC, AF, or AN values in the INFO field after selecting", required=false)
|
@Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Don't update the AC, AF, or AN values in the INFO field after selecting", required=false)
|
||||||
private boolean KEEP_ORIGINAL_CHR_COUNTS = false;
|
private boolean KEEP_ORIGINAL_CHR_COUNTS = false;
|
||||||
|
|
||||||
@Hidden
|
|
||||||
@Argument(fullName="family_structure_file", shortName="familyFile", doc="use -family unless you know what you're doing", required=false)
|
|
||||||
private File FAMILY_STRUCTURE_FILE = null;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* String formatted as dad+mom=child where these parameters determine which sample names are examined.
|
|
||||||
*/
|
|
||||||
@Argument(fullName="family_structure", shortName="family", doc="string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
|
|
||||||
private String FAMILY_STRUCTURE = "";
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This activates the mendelian violation module that will select all variants that correspond to a mendelian violation following the rules given by the family structure.
|
* This activates the mendelian violation module that will select all variants that correspond to a mendelian violation following the rules given by the family structure.
|
||||||
*/
|
*/
|
||||||
|
|
@ -293,6 +285,14 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
@Argument(fullName="selectTypeToInclude", shortName="selectType", doc="Select only a certain type of variants from the input file. Valid types are INDEL, SNP, MIXED, MNP, SYMBOLIC, NO_VARIATION. Can be specified multiple times", required=false)
|
@Argument(fullName="selectTypeToInclude", shortName="selectType", doc="Select only a certain type of variants from the input file. Valid types are INDEL, SNP, MIXED, MNP, SYMBOLIC, NO_VARIATION. Can be specified multiple times", required=false)
|
||||||
private List<VariantContext.Type> TYPES_TO_INCLUDE = new ArrayList<VariantContext.Type>();
|
private List<VariantContext.Type> TYPES_TO_INCLUDE = new ArrayList<VariantContext.Type>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If provided, we will only include variants whose ID field is present in this list of ids. The matching
|
||||||
|
* is exact string matching. The file format is just one ID per line
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
@Argument(fullName="keepIDs", shortName="IDs", doc="Only emit sites whose ID is found in this file (one ID per line)", required=false)
|
||||||
|
private File rsIDFile = null;
|
||||||
|
|
||||||
|
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName="outMVFile", shortName="outMVFile", doc="", required=false)
|
@Argument(fullName="outMVFile", shortName="outMVFile", doc="", required=false)
|
||||||
|
|
@ -339,17 +339,13 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
private int positionToAdd = 0;
|
private int positionToAdd = 0;
|
||||||
private RandomVariantStructure [] variantArray;
|
private RandomVariantStructure [] variantArray;
|
||||||
|
|
||||||
|
|
||||||
/* Variables used for random selection with AF boosting */
|
|
||||||
private ArrayList<Double> afBreakpoints = null;
|
|
||||||
private ArrayList<Double> afBoosts = null;
|
|
||||||
double bkDelta = 0.0;
|
|
||||||
|
|
||||||
private PrintStream outMVFileStream = null;
|
private PrintStream outMVFileStream = null;
|
||||||
|
|
||||||
//Random number generator for the genotypes to remove
|
//Random number generator for the genotypes to remove
|
||||||
private Random randomGenotypes = new Random();
|
private Random randomGenotypes = new Random();
|
||||||
|
|
||||||
|
private Set<String> IDsToKeep = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
||||||
*/
|
*/
|
||||||
|
|
@ -437,7 +433,18 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
if (SELECT_RANDOM_FRACTION) logger.info("Selecting approximately " + 100.0*fractionRandom + "% of the variants at random from the variant track");
|
if (SELECT_RANDOM_FRACTION) logger.info("Selecting approximately " + 100.0*fractionRandom + "% of the variants at random from the variant track");
|
||||||
|
|
||||||
|
|
||||||
|
/** load in the IDs file to a hashset for matching */
|
||||||
|
if ( rsIDFile != null ) {
|
||||||
|
IDsToKeep = new HashSet<String>();
|
||||||
|
try {
|
||||||
|
for ( final String line : new XReadLines(rsIDFile).readLines() ) {
|
||||||
|
IDsToKeep.add(line.trim());
|
||||||
|
}
|
||||||
|
logger.info("Selecting only variants with one of " + IDsToKeep.size() + " IDs from " + rsIDFile);
|
||||||
|
} catch ( FileNotFoundException e ) {
|
||||||
|
throw new UserException.CouldNotReadInputFile(rsIDFile, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -460,6 +467,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
}
|
}
|
||||||
|
|
||||||
for (VariantContext vc : vcs) {
|
for (VariantContext vc : vcs) {
|
||||||
|
if ( IDsToKeep != null && ! IDsToKeep.contains(vc.getID()) )
|
||||||
|
continue;
|
||||||
|
|
||||||
if (MENDELIAN_VIOLATIONS && mv.countViolations(this.getSampleDB().getFamilies(samples),vc) < 1)
|
if (MENDELIAN_VIOLATIONS && mv.countViolations(this.getSampleDB().getFamilies(samples),vc) < 1)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
@ -513,10 +523,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
|
else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
|
||||||
vcfWriter.add(sub);
|
vcfWriter.add(sub);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue