Merge branch 'master' of ssh://gsa4.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
c384677917
|
|
@ -475,16 +475,10 @@ public class UnifiedGenotyperEngine {
|
|||
if ( alleleCountsofMLE.size() > 0 ) {
|
||||
attributes.put(VCFConstants.MLE_ALLELE_COUNT_KEY, alleleCountsofMLE);
|
||||
final int AN = builder.make().getCalledChrCount();
|
||||
|
||||
// let's sanity check that we don't have an invalid MLE value in there
|
||||
for ( int MLEAC : alleleCountsofMLE ) {
|
||||
if ( MLEAC > AN )
|
||||
throw new ReviewedStingException(String.format("MLEAC value (%d) is larger than AN (%d) at position %s:%d", MLEAC, AN, loc.getContig(), loc.getStart()));
|
||||
}
|
||||
|
||||
final ArrayList<Double> MLEfrequencies = new ArrayList<Double>(alleleCountsofMLE.size());
|
||||
// the MLEAC is allowed to be larger than the AN (e.g. in the case of all PLs being 0, the GT is ./. but the exact model may arbitrarily choose an AC>1)
|
||||
for ( int AC : alleleCountsofMLE )
|
||||
MLEfrequencies.add((double)AC / (double)AN);
|
||||
MLEfrequencies.add(Math.min(1.0, (double)AC / (double)AN));
|
||||
attributes.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -46,7 +46,8 @@ public class AlleleCount extends VariantStratifier {
|
|||
int AC = 0; // by default, the site is considered monomorphic
|
||||
|
||||
if ( eval.hasAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY) && eval.isBiallelic() ) {
|
||||
AC = eval.getAttributeAsInt(VCFConstants.MLE_ALLELE_COUNT_KEY, 0);
|
||||
// the MLEAC is allowed to be larger than the AN (e.g. in the case of all PLs being 0, the GT is ./. but the exact model may arbitrarily choose an AC>1)
|
||||
AC = Math.min(eval.getAttributeAsInt(VCFConstants.MLE_ALLELE_COUNT_KEY, 0), nchrom);
|
||||
} else if ( eval.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) && eval.isBiallelic() ) {
|
||||
AC = eval.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY, 0);
|
||||
} else if ( eval.isVariant() ) {
|
||||
|
|
@ -56,7 +57,7 @@ public class AlleleCount extends VariantStratifier {
|
|||
|
||||
// make sure that the AC isn't invalid
|
||||
if ( AC > nchrom )
|
||||
throw new UserException.MalformedVCF(String.format("The AC or MLEAC value (%d) at position %s:%d " +
|
||||
throw new UserException.MalformedVCF(String.format("The AC value (%d) at position %s:%d " +
|
||||
"is larger than the number of chromosomes over all samples (%d)", AC,
|
||||
eval.getChr(), eval.getStart(), nchrom));
|
||||
|
||||
|
|
|
|||
|
|
@ -73,6 +73,13 @@ import java.util.*;
|
|||
* efficiency. However, since this merge runs in only one thread, you can quickly reach diminishing
|
||||
* returns with the number of parallel threads. -nt 4 works well but -nt 8 may be too much.
|
||||
*
|
||||
* Some fine details about the merging algorithm:
|
||||
* <ul>
|
||||
* <li> As of GATK 2.1, when merging multiple VCF records at a site, the combined VCF record has the QUAL of
|
||||
* the first VCF record with a non-MISSING QUAL value. The previous behavior was to take the
|
||||
* max QUAL, which resulted in sometime strange downstream confusion</li>
|
||||
* </ul>
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* One or more variant sets to combine.
|
||||
|
|
|
|||
|
|
@ -149,7 +149,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
|||
if ( bcfVersion.getMinorVersion() < MIN_MINOR_VERSION )
|
||||
error("BCF2Codec can only process BCF2 files with minor version >= " + MIN_MINOR_VERSION + " but this file has minor version " + bcfVersion.getMinorVersion());
|
||||
|
||||
logger.info("Parsing data stream with BCF version " + bcfVersion);
|
||||
logger.debug("Parsing data stream with BCF version " + bcfVersion);
|
||||
|
||||
final int headerSizeInBytes = BCF2Type.INT32.read(inputStream);
|
||||
|
||||
|
|
|
|||
|
|
@ -514,7 +514,7 @@ public class VariantContextUtils {
|
|||
int depth = 0;
|
||||
int maxAC = -1;
|
||||
final Map<String, Object> attributesWithMaxAC = new LinkedHashMap<String, Object>();
|
||||
double log10PError = 1;
|
||||
double log10PError = CommonInfo.NO_LOG10_PERROR;
|
||||
VariantContext vcWithMaxAC = null;
|
||||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
|
||||
|
|
@ -542,7 +542,9 @@ public class VariantContextUtils {
|
|||
|
||||
mergeGenotypes(genotypes, vc, alleleMapping, genotypeMergeOptions == GenotypeMergeType.UNIQUIFY);
|
||||
|
||||
log10PError = Math.min(log10PError, vc.isVariant() ? vc.getLog10PError() : 1);
|
||||
// We always take the QUAL of the first VC with a non-MISSING qual for the combined value
|
||||
if ( log10PError == CommonInfo.NO_LOG10_PERROR )
|
||||
log10PError = vc.getLog10PError();
|
||||
|
||||
filters.addAll(vc.getFilters());
|
||||
|
||||
|
|
|
|||
|
|
@ -113,13 +113,13 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "4efdf983918db822e4ac13d911509576"); } // official project VCF files in tabix format
|
||||
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "848d4408ee953053d2307cefebc6bd6d"); } // official project VCF files in tabix format
|
||||
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "5d04f22ef88ed9226cbd7b4483c5cb23"); }
|
||||
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "629656bfef7713c23f3a593523503b2f"); }
|
||||
|
||||
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e54d0dcf14f90d5c8e58b45191dd0219"); }
|
||||
|
||||
@Test public void uniqueSNPs() {
|
||||
// parallelism must be disabled because the input VCF is malformed (DB=0) and parallelism actually fixes this which breaks the md5s
|
||||
combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "acc70f33be741b564f7be9aa3f819dd4", true);
|
||||
combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "e5ea6ac3905bd9eeea1a2ef5d2cb5af7", true);
|
||||
}
|
||||
|
||||
@Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "def52bcd3942bbe39cd7ebe845c4f206"); }
|
||||
|
|
@ -137,7 +137,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
|
||||
" -genotypeMergeOptions UNIQUIFY -L 1"),
|
||||
1,
|
||||
Arrays.asList("3039cfff7abee6aa7fbbafec66a1b019"));
|
||||
Arrays.asList("e5f0e7a80cd392172ebf5ddb06b91a00"));
|
||||
cvExecuteTest("threeWayWithRefs", spec, true);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue