Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ryan Poplin 2011-11-06 10:26:19 -05:00
commit 5c565d28b9
12 changed files with 44 additions and 58 deletions

View File

@ -131,7 +131,7 @@ public class AlignmentContextUtils {
} }
} }
public static Map<String, AlignmentContext> splitContextBySampleName(ReadBackedPileup pileup, String assumedSingleSample) { public static Map<String, AlignmentContext> splitContextBySampleName(ReadBackedPileup pileup) {
return splitContextBySampleName(new AlignmentContext(pileup.getLocation(), pileup)); return splitContextBySampleName(new AlignmentContext(pileup.getLocation(), pileup));
} }

View File

@ -164,10 +164,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
@Argument(fullName="list", shortName="ls", doc="List the available annotations and exit") @Argument(fullName="list", shortName="ls", doc="List the available annotations and exit")
protected Boolean LIST = false; protected Boolean LIST = false;
@Hidden
@Argument(fullName = "assume_single_sample_reads", shortName = "single_sample", doc = "The single sample that we should assume is represented in the input bam (and therefore associate with all reads regardless of whether they have read groups)", required = false)
protected String ASSUME_SINGLE_SAMPLE = null;
@Hidden @Hidden
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false) @Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
protected boolean indelsOnly = false; protected boolean indelsOnly = false;
@ -213,11 +209,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
List<String> rodName = Arrays.asList(variantCollection.variants.getName()); List<String> rodName = Arrays.asList(variantCollection.variants.getName());
Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName); Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName);
// if there are no valid samples, warn the user
if ( samples.size() == 0 ) {
logger.warn("There are no samples input at all; use the --sampleName argument to specify one if desired.");
}
if ( USE_ALL_ANNOTATIONS ) if ( USE_ALL_ANNOTATIONS )
engine = new VariantAnnotatorEngine(annotationsToExclude, this, getToolkit()); engine = new VariantAnnotatorEngine(annotationsToExclude, this, getToolkit());
else else
@ -301,9 +292,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
Map<String, AlignmentContext> stratifiedContexts; Map<String, AlignmentContext> stratifiedContexts;
if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) { if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) {
if ( ! context.hasExtendedEventPileup() ) { if ( ! context.hasExtendedEventPileup() ) {
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getBasePileup(), ASSUME_SINGLE_SAMPLE); stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getBasePileup());
} else { } else {
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getExtendedEventPileup(), ASSUME_SINGLE_SAMPLE); stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getExtendedEventPileup());
} }
if ( stratifiedContexts != null ) { if ( stratifiedContexts != null ) {
annotatedVCs = new ArrayList<VariantContext>(VCs.size()); annotatedVCs = new ArrayList<VariantContext>(VCs.size());

View File

@ -39,7 +39,6 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.HashSet; import java.util.HashSet;
import java.util.Set; import java.util.Set;
import java.util.TreeSet;
/** /**
@ -71,12 +70,7 @@ public class UGCalcLikelihoods extends LocusWalker<VariantCallContext, Integer>
public void initialize() { public void initialize() {
// get all of the unique sample names // get all of the unique sample names
// if we're supposed to assume a single sample, do so Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
Set<String> samples = new TreeSet<String>();
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
samples.add(UAC.ASSUME_SINGLE_SAMPLE);
else
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples); UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples);

View File

@ -96,11 +96,6 @@ public class UnifiedArgumentCollection {
@Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when in GENOTYPE_MODE = GENOTYPE_GIVEN_ALLELES", required=false) @Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when in GENOTYPE_MODE = GENOTYPE_GIVEN_ALLELES", required=false)
public RodBinding<VariantContext> alleles; public RodBinding<VariantContext> alleles;
// control the error modes
@Hidden
@Argument(fullName = "assume_single_sample_reads", shortName = "single_sample", doc = "The single sample that we should assume is represented in the input bam (and therefore associate with all reads regardless of whether they have read groups)", required = false)
public String ASSUME_SINGLE_SAMPLE = null;
/** /**
* The minimum confidence needed in a given base for it to be used in variant calling. Note that the base quality of a base * The minimum confidence needed in a given base for it to be used in variant calling. Note that the base quality of a base
* is capped by the mapping quality so that bases on reads with low mapping quality may get filtered out depending on this value. * is capped by the mapping quality so that bases on reads with low mapping quality may get filtered out depending on this value.
@ -170,7 +165,6 @@ public class UnifiedArgumentCollection {
uac.GenotypingMode = GenotypingMode; uac.GenotypingMode = GenotypingMode;
uac.OutputMode = OutputMode; uac.OutputMode = OutputMode;
uac.COMPUTE_SLOD = COMPUTE_SLOD; uac.COMPUTE_SLOD = COMPUTE_SLOD;
uac.ASSUME_SINGLE_SAMPLE = ASSUME_SINGLE_SAMPLE;
uac.STANDARD_CONFIDENCE_FOR_CALLING = STANDARD_CONFIDENCE_FOR_CALLING; uac.STANDARD_CONFIDENCE_FOR_CALLING = STANDARD_CONFIDENCE_FOR_CALLING;
uac.STANDARD_CONFIDENCE_FOR_EMITTING = STANDARD_CONFIDENCE_FOR_EMITTING; uac.STANDARD_CONFIDENCE_FOR_EMITTING = STANDARD_CONFIDENCE_FOR_EMITTING;
uac.MIN_BASE_QUALTY_SCORE = MIN_BASE_QUALTY_SCORE; uac.MIN_BASE_QUALTY_SCORE = MIN_BASE_QUALTY_SCORE;

View File

@ -206,12 +206,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
**/ **/
public void initialize() { public void initialize() {
// get all of the unique sample names // get all of the unique sample names
// if we're supposed to assume a single sample, do so Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
Set<String> samples = new TreeSet<String>();
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
samples.add(UAC.ASSUME_SINGLE_SAMPLE);
else
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
// initialize the verbose writer // initialize the verbose writer
if ( verboseWriter != null ) if ( verboseWriter != null )

View File

@ -106,12 +106,7 @@ public class UnifiedGenotyperEngine {
// --------------------------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------------------------
@Requires({"toolkit != null", "UAC != null"}) @Requires({"toolkit != null", "UAC != null"})
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) { public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()));
// get the number of samples
// if we're supposed to assume a single sample, do so
UAC.ASSUME_SINGLE_SAMPLE != null ?
new TreeSet<String>(Arrays.asList(UAC.ASSUME_SINGLE_SAMPLE)) :
SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()));
} }
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0"}) @Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0"})
@ -253,7 +248,7 @@ public class UnifiedGenotyperEngine {
pileup = rawContext.getExtendedEventPileup(); pileup = rawContext.getExtendedEventPileup();
else if (rawContext.hasBasePileup()) else if (rawContext.hasBasePileup())
pileup = rawContext.getBasePileup(); pileup = rawContext.getBasePileup();
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE); stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
vc = annotationEngine.annotateContext(tracker, ref, stratifiedContexts, vc); vc = annotationEngine.annotateContext(tracker, ref, stratifiedContexts, vc);
} }
@ -435,7 +430,7 @@ public class UnifiedGenotyperEngine {
pileup = rawContext.getExtendedEventPileup(); pileup = rawContext.getExtendedEventPileup();
else if (rawContext.hasBasePileup()) else if (rawContext.hasBasePileup())
pileup = rawContext.getBasePileup(); pileup = rawContext.getBasePileup();
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE); stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall); vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall);
} }
@ -569,7 +564,7 @@ public class UnifiedGenotyperEngine {
return null; return null;
// stratify the AlignmentContext and cut by sample // stratify the AlignmentContext and cut by sample
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE); stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
} else { } else {
@ -586,12 +581,12 @@ public class UnifiedGenotyperEngine {
return null; return null;
// stratify the AlignmentContext and cut by sample // stratify the AlignmentContext and cut by sample
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE); stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
} }
} else if ( model == GenotypeLikelihoodsCalculationModel.Model.SNP ) { } else if ( model == GenotypeLikelihoodsCalculationModel.Model.SNP ) {
// stratify the AlignmentContext and cut by sample // stratify the AlignmentContext and cut by sample
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup(), UAC.ASSUME_SINGLE_SAMPLE); stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup());
if( !(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) ) { if( !(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) ) {
int numDeletions = 0; int numDeletions = 0;

View File

@ -817,7 +817,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
// For now, we will just arbitrarily add 10 to the mapping quality. [EB, 6/7/2010]. // For now, we will just arbitrarily add 10 to the mapping quality. [EB, 6/7/2010].
// TODO -- we need a better solution here // TODO -- we need a better solution here
GATKSAMRecord read = aRead.getRead(); GATKSAMRecord read = aRead.getRead();
read.setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + 10, 254)); if ( read.getMappingQuality() != 255 ) // 255 == Unknown, so don't modify it
read.setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + 10, 254));
// before we fix the attribute tags we first need to make sure we have enough of the reference sequence // before we fix the attribute tags we first need to make sure we have enough of the reference sequence
int neededBasesToLeft = leftmostIndex - read.getAlignmentStart(); int neededBasesToLeft = leftmostIndex - read.getAlignmentStart();

View File

@ -58,15 +58,12 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
@Argument(fullName="fractionToOut1", shortName="fraction", doc="Fraction of records to be placed in out1 (must be 0 >= fraction <= 1); all other records are placed in out2", required=false) @Argument(fullName="fractionToOut1", shortName="fraction", doc="Fraction of records to be placed in out1 (must be 0 >= fraction <= 1); all other records are placed in out2", required=false)
protected double fraction = 0.5; protected double fraction = 0.5;
protected int iFraction;
/** /**
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher * Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
*/ */
public void initialize() { public void initialize() {
if ( fraction < 0.0 || fraction > 1.0 ) if ( fraction < 0.0 || fraction > 1.0 )
throw new UserException.BadArgumentValue("fractionToOut1", "this value needs to be a number between 0 and 1"); throw new UserException.BadArgumentValue("fractionToOut1", "this value needs to be a number between 0 and 1");
iFraction = (int)(fraction * 1000.0);
// setup the header info // setup the header info
final List<String> inputNames = Arrays.asList(variantCollection.variants.getName()); final List<String> inputNames = Arrays.asList(variantCollection.variants.getName());
@ -93,8 +90,8 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
Collection<VariantContext> vcs = tracker.getValues(variantCollection.variants, context.getLocation()); Collection<VariantContext> vcs = tracker.getValues(variantCollection.variants, context.getLocation());
for ( VariantContext vc : vcs ) { for ( VariantContext vc : vcs ) {
int random = GenomeAnalysisEngine.getRandomGenerator().nextInt(1000); double random = GenomeAnalysisEngine.getRandomGenerator().nextDouble();
if ( random < iFraction ) if ( random < fraction )
vcfWriter1.add(vc); vcfWriter1.add(vc);
else else
vcfWriter2.add(vc); vcfWriter2.add(vc);
@ -107,5 +104,8 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
public Integer reduce(Integer value, Integer sum) { return value + sum; } public Integer reduce(Integer value, Integer sum) { return value + sum; }
public void onTraversalDone(Integer result) { logger.info(result + " records processed."); } public void onTraversalDone(Integer result) {
logger.info(result + " records processed.");
vcfWriter2.close();
}
} }

View File

@ -162,19 +162,27 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
* @return a feature, (not guaranteed complete) that has the correct start and stop * @return a feature, (not guaranteed complete) that has the correct start and stop
*/ */
public Feature decodeLoc(String line) { public Feature decodeLoc(String line) {
String[] locParts = new String[6]; lineNo++;
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
// our header cannot be null, we need the genotype sample names and counts
if (header == null) throw new ReviewedStingException("VCF Header cannot be null when decoding a record");
final String[] locParts = new String[6];
int nParts = ParsingUtils.split(line, locParts, VCFConstants.FIELD_SEPARATOR_CHAR, true); int nParts = ParsingUtils.split(line, locParts, VCFConstants.FIELD_SEPARATOR_CHAR, true);
if ( nParts != 6 ) if ( nParts != 6 )
throw new UserException.MalformedVCF("there aren't enough columns for line " + line, lineNo); throw new UserException.MalformedVCF("there aren't enough columns for line " + line, lineNo);
// get our alleles (because the end position depends on them) // get our alleles (because the end position depends on them)
String ref = getCachedString(locParts[3].toUpperCase()); final String ref = getCachedString(locParts[3].toUpperCase());
String alts = getCachedString(locParts[4].toUpperCase()); final String alts = getCachedString(locParts[4].toUpperCase());
List<Allele> alleles = parseAlleles(ref, alts, lineNo); final List<Allele> alleles = parseAlleles(ref, alts, lineNo);
// find out our location // find out our location
int start = Integer.valueOf(locParts[1]); final int start = Integer.valueOf(locParts[1]);
int stop = start; int stop = start;
// ref alleles don't need to be single bases for monomorphic sites // ref alleles don't need to be single bases for monomorphic sites

View File

@ -124,6 +124,14 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
executeTest("using expression", spec); executeTest("using expression", spec);
} }
@Test
public void testUsingExpressionWithID() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --resource:foo " + validationDataLocation + "targetAnnotations.vcf -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.ID -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
Arrays.asList("4a6f0675242f685e9072c1da5ad9e715"));
executeTest("using expression with ID", spec);
}
@Test @Test
public void testTabixAnnotations() { public void testTabixAnnotations() {
final String MD5 = "13269d5a2e16f06fd755cc0fb9271acf"; final String MD5 = "13269d5a2e16f06fd755cc0fb9271acf";

View File

@ -9,7 +9,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval"; private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval";
private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf"; private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf";
private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.vcf"; private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.vcf";
private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.HG00625.vcf"; private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.NA12045.vcf";
private static String cmdRoot = "-T VariantEval" + private static String cmdRoot = "-T VariantEval" +
" -R " + b36KGReference; " -R " + b36KGReference;
@ -359,7 +359,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
@Test @Test
public void testPerSampleAndSubsettedSampleHaveSameResults() { public void testPerSampleAndSubsettedSampleHaveSameResults() {
String md5 = "b0565ac61b2860248e4abd478a177b5e"; String md5 = "7425ca5c439afd7bb33ed5cfea02c2b3";
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine( buildCommandLine(
@ -369,7 +369,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"--eval " + fundamentalTestSNPsVCF, "--eval " + fundamentalTestSNPsVCF,
"-noEV", "-noEV",
"-EV CompOverlap", "-EV CompOverlap",
"-sn HG00625", "-sn NA12045",
"-noST", "-noST",
"-L " + fundamentalTestSNPsVCF, "-L " + fundamentalTestSNPsVCF,
"-o %s" "-o %s"

View File

@ -56,7 +56,7 @@ trait GATKScatterFunction extends ScatterFunction {
override def init() { override def init() {
this.originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK] this.originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK]
this.referenceSequence = this.originalGATK.reference_sequence this.referenceSequence = this.originalGATK.reference_sequence
if (this.originalGATK.intervals.isEmpty && this.originalGATK.intervalsString.isEmpty) { if (this.originalGATK.intervals.isEmpty && (this.originalGATK.intervalsString == null || this.originalGATK.intervalsString.isEmpty)) {
this.intervals ++= GATKScatterFunction.getGATKIntervals(this.referenceSequence, List.empty[String]).contigs this.intervals ++= GATKScatterFunction.getGATKIntervals(this.referenceSequence, List.empty[String]).contigs
} else { } else {
this.intervals ++= this.originalGATK.intervals.map(_.toString) this.intervals ++= this.originalGATK.intervals.map(_.toString)