Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
5c565d28b9
|
|
@ -131,7 +131,7 @@ public class AlignmentContextUtils {
|
|||
}
|
||||
}
|
||||
|
||||
public static Map<String, AlignmentContext> splitContextBySampleName(ReadBackedPileup pileup, String assumedSingleSample) {
|
||||
public static Map<String, AlignmentContext> splitContextBySampleName(ReadBackedPileup pileup) {
|
||||
return splitContextBySampleName(new AlignmentContext(pileup.getLocation(), pileup));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -164,10 +164,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
@Argument(fullName="list", shortName="ls", doc="List the available annotations and exit")
|
||||
protected Boolean LIST = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "assume_single_sample_reads", shortName = "single_sample", doc = "The single sample that we should assume is represented in the input bam (and therefore associate with all reads regardless of whether they have read groups)", required = false)
|
||||
protected String ASSUME_SINGLE_SAMPLE = null;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
|
||||
protected boolean indelsOnly = false;
|
||||
|
|
@ -213,11 +209,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
List<String> rodName = Arrays.asList(variantCollection.variants.getName());
|
||||
Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName);
|
||||
|
||||
// if there are no valid samples, warn the user
|
||||
if ( samples.size() == 0 ) {
|
||||
logger.warn("There are no samples input at all; use the --sampleName argument to specify one if desired.");
|
||||
}
|
||||
|
||||
if ( USE_ALL_ANNOTATIONS )
|
||||
engine = new VariantAnnotatorEngine(annotationsToExclude, this, getToolkit());
|
||||
else
|
||||
|
|
@ -301,9 +292,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
Map<String, AlignmentContext> stratifiedContexts;
|
||||
if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) {
|
||||
if ( ! context.hasExtendedEventPileup() ) {
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getBasePileup(), ASSUME_SINGLE_SAMPLE);
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getBasePileup());
|
||||
} else {
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getExtendedEventPileup(), ASSUME_SINGLE_SAMPLE);
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getExtendedEventPileup());
|
||||
}
|
||||
if ( stratifiedContexts != null ) {
|
||||
annotatedVCs = new ArrayList<VariantContext>(VCs.size());
|
||||
|
|
|
|||
|
|
@ -39,7 +39,6 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -71,12 +70,7 @@ public class UGCalcLikelihoods extends LocusWalker<VariantCallContext, Integer>
|
|||
|
||||
public void initialize() {
|
||||
// get all of the unique sample names
|
||||
// if we're supposed to assume a single sample, do so
|
||||
Set<String> samples = new TreeSet<String>();
|
||||
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
|
||||
samples.add(UAC.ASSUME_SINGLE_SAMPLE);
|
||||
else
|
||||
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||
Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples);
|
||||
|
||||
|
|
|
|||
|
|
@ -96,11 +96,6 @@ public class UnifiedArgumentCollection {
|
|||
@Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when in GENOTYPE_MODE = GENOTYPE_GIVEN_ALLELES", required=false)
|
||||
public RodBinding<VariantContext> alleles;
|
||||
|
||||
// control the error modes
|
||||
@Hidden
|
||||
@Argument(fullName = "assume_single_sample_reads", shortName = "single_sample", doc = "The single sample that we should assume is represented in the input bam (and therefore associate with all reads regardless of whether they have read groups)", required = false)
|
||||
public String ASSUME_SINGLE_SAMPLE = null;
|
||||
|
||||
/**
|
||||
* The minimum confidence needed in a given base for it to be used in variant calling. Note that the base quality of a base
|
||||
* is capped by the mapping quality so that bases on reads with low mapping quality may get filtered out depending on this value.
|
||||
|
|
@ -170,7 +165,6 @@ public class UnifiedArgumentCollection {
|
|||
uac.GenotypingMode = GenotypingMode;
|
||||
uac.OutputMode = OutputMode;
|
||||
uac.COMPUTE_SLOD = COMPUTE_SLOD;
|
||||
uac.ASSUME_SINGLE_SAMPLE = ASSUME_SINGLE_SAMPLE;
|
||||
uac.STANDARD_CONFIDENCE_FOR_CALLING = STANDARD_CONFIDENCE_FOR_CALLING;
|
||||
uac.STANDARD_CONFIDENCE_FOR_EMITTING = STANDARD_CONFIDENCE_FOR_EMITTING;
|
||||
uac.MIN_BASE_QUALTY_SCORE = MIN_BASE_QUALTY_SCORE;
|
||||
|
|
|
|||
|
|
@ -206,12 +206,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
**/
|
||||
public void initialize() {
|
||||
// get all of the unique sample names
|
||||
// if we're supposed to assume a single sample, do so
|
||||
Set<String> samples = new TreeSet<String>();
|
||||
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
|
||||
samples.add(UAC.ASSUME_SINGLE_SAMPLE);
|
||||
else
|
||||
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||
Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||
|
||||
// initialize the verbose writer
|
||||
if ( verboseWriter != null )
|
||||
|
|
|
|||
|
|
@ -106,12 +106,7 @@ public class UnifiedGenotyperEngine {
|
|||
// ---------------------------------------------------------------------------------------------------------
|
||||
@Requires({"toolkit != null", "UAC != null"})
|
||||
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
|
||||
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null,
|
||||
// get the number of samples
|
||||
// if we're supposed to assume a single sample, do so
|
||||
UAC.ASSUME_SINGLE_SAMPLE != null ?
|
||||
new TreeSet<String>(Arrays.asList(UAC.ASSUME_SINGLE_SAMPLE)) :
|
||||
SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()));
|
||||
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()));
|
||||
}
|
||||
|
||||
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0"})
|
||||
|
|
@ -253,7 +248,7 @@ public class UnifiedGenotyperEngine {
|
|||
pileup = rawContext.getExtendedEventPileup();
|
||||
else if (rawContext.hasBasePileup())
|
||||
pileup = rawContext.getBasePileup();
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
|
||||
|
||||
vc = annotationEngine.annotateContext(tracker, ref, stratifiedContexts, vc);
|
||||
}
|
||||
|
|
@ -435,7 +430,7 @@ public class UnifiedGenotyperEngine {
|
|||
pileup = rawContext.getExtendedEventPileup();
|
||||
else if (rawContext.hasBasePileup())
|
||||
pileup = rawContext.getBasePileup();
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
|
||||
|
||||
vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall);
|
||||
}
|
||||
|
|
@ -569,7 +564,7 @@ public class UnifiedGenotyperEngine {
|
|||
return null;
|
||||
|
||||
// stratify the AlignmentContext and cut by sample
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
|
||||
|
||||
} else {
|
||||
|
||||
|
|
@ -586,12 +581,12 @@ public class UnifiedGenotyperEngine {
|
|||
return null;
|
||||
|
||||
// stratify the AlignmentContext and cut by sample
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
|
||||
}
|
||||
} else if ( model == GenotypeLikelihoodsCalculationModel.Model.SNP ) {
|
||||
|
||||
// stratify the AlignmentContext and cut by sample
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup(), UAC.ASSUME_SINGLE_SAMPLE);
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup());
|
||||
|
||||
if( !(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) ) {
|
||||
int numDeletions = 0;
|
||||
|
|
|
|||
|
|
@ -817,7 +817,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
// For now, we will just arbitrarily add 10 to the mapping quality. [EB, 6/7/2010].
|
||||
// TODO -- we need a better solution here
|
||||
GATKSAMRecord read = aRead.getRead();
|
||||
read.setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + 10, 254));
|
||||
if ( read.getMappingQuality() != 255 ) // 255 == Unknown, so don't modify it
|
||||
read.setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + 10, 254));
|
||||
|
||||
// before we fix the attribute tags we first need to make sure we have enough of the reference sequence
|
||||
int neededBasesToLeft = leftmostIndex - read.getAlignmentStart();
|
||||
|
|
|
|||
|
|
@ -58,15 +58,12 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="fractionToOut1", shortName="fraction", doc="Fraction of records to be placed in out1 (must be 0 >= fraction <= 1); all other records are placed in out2", required=false)
|
||||
protected double fraction = 0.5;
|
||||
|
||||
protected int iFraction;
|
||||
|
||||
/**
|
||||
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
||||
*/
|
||||
public void initialize() {
|
||||
if ( fraction < 0.0 || fraction > 1.0 )
|
||||
throw new UserException.BadArgumentValue("fractionToOut1", "this value needs to be a number between 0 and 1");
|
||||
iFraction = (int)(fraction * 1000.0);
|
||||
|
||||
// setup the header info
|
||||
final List<String> inputNames = Arrays.asList(variantCollection.variants.getName());
|
||||
|
|
@ -93,8 +90,8 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
Collection<VariantContext> vcs = tracker.getValues(variantCollection.variants, context.getLocation());
|
||||
for ( VariantContext vc : vcs ) {
|
||||
int random = GenomeAnalysisEngine.getRandomGenerator().nextInt(1000);
|
||||
if ( random < iFraction )
|
||||
double random = GenomeAnalysisEngine.getRandomGenerator().nextDouble();
|
||||
if ( random < fraction )
|
||||
vcfWriter1.add(vc);
|
||||
else
|
||||
vcfWriter2.add(vc);
|
||||
|
|
@ -107,5 +104,8 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
public Integer reduce(Integer value, Integer sum) { return value + sum; }
|
||||
|
||||
public void onTraversalDone(Integer result) { logger.info(result + " records processed."); }
|
||||
public void onTraversalDone(Integer result) {
|
||||
logger.info(result + " records processed.");
|
||||
vcfWriter2.close();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -162,19 +162,27 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
* @return a feature, (not guaranteed complete) that has the correct start and stop
|
||||
*/
|
||||
public Feature decodeLoc(String line) {
|
||||
String[] locParts = new String[6];
|
||||
lineNo++;
|
||||
|
||||
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
|
||||
if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
|
||||
|
||||
// our header cannot be null, we need the genotype sample names and counts
|
||||
if (header == null) throw new ReviewedStingException("VCF Header cannot be null when decoding a record");
|
||||
|
||||
final String[] locParts = new String[6];
|
||||
int nParts = ParsingUtils.split(line, locParts, VCFConstants.FIELD_SEPARATOR_CHAR, true);
|
||||
|
||||
if ( nParts != 6 )
|
||||
throw new UserException.MalformedVCF("there aren't enough columns for line " + line, lineNo);
|
||||
|
||||
// get our alleles (because the end position depends on them)
|
||||
String ref = getCachedString(locParts[3].toUpperCase());
|
||||
String alts = getCachedString(locParts[4].toUpperCase());
|
||||
List<Allele> alleles = parseAlleles(ref, alts, lineNo);
|
||||
final String ref = getCachedString(locParts[3].toUpperCase());
|
||||
final String alts = getCachedString(locParts[4].toUpperCase());
|
||||
final List<Allele> alleles = parseAlleles(ref, alts, lineNo);
|
||||
|
||||
// find out our location
|
||||
int start = Integer.valueOf(locParts[1]);
|
||||
final int start = Integer.valueOf(locParts[1]);
|
||||
int stop = start;
|
||||
|
||||
// ref alleles don't need to be single bases for monomorphic sites
|
||||
|
|
|
|||
|
|
@ -124,6 +124,14 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
executeTest("using expression", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUsingExpressionWithID() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --resource:foo " + validationDataLocation + "targetAnnotations.vcf -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.ID -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("4a6f0675242f685e9072c1da5ad9e715"));
|
||||
executeTest("using expression with ID", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTabixAnnotations() {
|
||||
final String MD5 = "13269d5a2e16f06fd755cc0fb9271acf";
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval";
|
||||
private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf";
|
||||
private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.vcf";
|
||||
private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.HG00625.vcf";
|
||||
private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.NA12045.vcf";
|
||||
|
||||
private static String cmdRoot = "-T VariantEval" +
|
||||
" -R " + b36KGReference;
|
||||
|
|
@ -359,7 +359,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testPerSampleAndSubsettedSampleHaveSameResults() {
|
||||
String md5 = "b0565ac61b2860248e4abd478a177b5e";
|
||||
String md5 = "7425ca5c439afd7bb33ed5cfea02c2b3";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
|
|
@ -369,7 +369,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"--eval " + fundamentalTestSNPsVCF,
|
||||
"-noEV",
|
||||
"-EV CompOverlap",
|
||||
"-sn HG00625",
|
||||
"-sn NA12045",
|
||||
"-noST",
|
||||
"-L " + fundamentalTestSNPsVCF,
|
||||
"-o %s"
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ trait GATKScatterFunction extends ScatterFunction {
|
|||
override def init() {
|
||||
this.originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK]
|
||||
this.referenceSequence = this.originalGATK.reference_sequence
|
||||
if (this.originalGATK.intervals.isEmpty && this.originalGATK.intervalsString.isEmpty) {
|
||||
if (this.originalGATK.intervals.isEmpty && (this.originalGATK.intervalsString == null || this.originalGATK.intervalsString.isEmpty)) {
|
||||
this.intervals ++= GATKScatterFunction.getGATKIntervals(this.referenceSequence, List.empty[String]).contigs
|
||||
} else {
|
||||
this.intervals ++= this.originalGATK.intervals.map(_.toString)
|
||||
|
|
|
|||
Loading…
Reference in New Issue