Merge pull request #389 from broadinstitute/rp_single_sample_calling_pipline_HC
Created a single sample calling pipeline which leverages the reference m...
This commit is contained in:
commit
08474a39fb
|
|
@ -112,18 +112,18 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
|||
|
||||
private void annotateWithPileup(final AlignmentContext stratifiedContext, final VariantContext vc, final GenotypeBuilder gb) {
|
||||
|
||||
HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
|
||||
for ( Allele allele : vc.getAlleles() )
|
||||
final HashMap<Byte, Integer> alleleCounts = new HashMap<>();
|
||||
for ( final Allele allele : vc.getAlleles() )
|
||||
alleleCounts.put(allele.getBases()[0], 0);
|
||||
|
||||
ReadBackedPileup pileup = stratifiedContext.getBasePileup();
|
||||
for ( PileupElement p : pileup ) {
|
||||
final ReadBackedPileup pileup = stratifiedContext.getBasePileup();
|
||||
for ( final PileupElement p : pileup ) {
|
||||
if ( alleleCounts.containsKey(p.getBase()) )
|
||||
alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase())+p.getRepresentativeCount());
|
||||
}
|
||||
|
||||
// we need to add counts in the correct order
|
||||
int[] counts = new int[alleleCounts.size()];
|
||||
final int[] counts = new int[alleleCounts.size()];
|
||||
counts[0] = alleleCounts.get(vc.getReference().getBases()[0]);
|
||||
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
|
||||
counts[i+1] = alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]);
|
||||
|
|
@ -141,7 +141,7 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
|||
final HashMap<Allele, Integer> alleleCounts = new HashMap<>();
|
||||
for ( final Allele allele : vc.getAlleles() ) { alleleCounts.put(allele, 0); }
|
||||
|
||||
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : perReadAlleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
|
||||
for ( final Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : perReadAlleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
|
||||
final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue(), alleles);
|
||||
if (! a.isInformative() ) continue; // read is non-informative
|
||||
final GATKSAMRecord read = el.getKey();
|
||||
|
|
|
|||
|
|
@ -58,6 +58,8 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnota
|
|||
import org.broadinstitute.sting.utils.genotyper.MostLikelyAllele;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.variant.variantcontext.Genotype;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
@ -97,6 +99,13 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
if ( !vc.isVariant() )
|
||||
return null;
|
||||
|
||||
if ( vc.hasGenotypes() ) {
|
||||
final int[][] tableFromPerSampleAnnotations = getTableFromSamples( vc.getGenotypes() );
|
||||
if ( tableFromPerSampleAnnotations != null ) {
|
||||
return pValueForBestTable(tableFromPerSampleAnnotations, null);
|
||||
}
|
||||
}
|
||||
|
||||
if (vc.isSNP() && stratifiedContexts != null) {
|
||||
final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1);
|
||||
final int[][] tableFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), MIN_QUAL_FOR_FILTERED_TEST);
|
||||
|
|
@ -117,6 +126,32 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the FisherStrand table by retrieving the per-sample strand bias annotation and adding them together
|
||||
* @param genotypes the genotypes from which to pull out the per-sample strand bias annotation
|
||||
* @return the table used for the FisherStrand p-value calculation, will be null if none of the genotypes contain the per-sample SB annotation
|
||||
*/
|
||||
private int[][] getTableFromSamples( final GenotypesContext genotypes ) {
|
||||
if( genotypes == null ) { throw new IllegalArgumentException("Genotypes cannot be null."); }
|
||||
|
||||
final int[] sbArray = {0,0,0,0}; // forward-reverse -by- alternate-reference
|
||||
boolean foundData = false;
|
||||
|
||||
for( final Genotype g : genotypes ) {
|
||||
if( g.isNoCall() || ! g.hasAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME) )
|
||||
continue;
|
||||
|
||||
foundData = true;
|
||||
final String sbbsString = (String) g.getAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME);
|
||||
final int[] data = encodeSBBS(sbbsString);
|
||||
for( int index = 0; index < sbArray.length; index++ ) {
|
||||
sbArray[index] += data[index];
|
||||
}
|
||||
}
|
||||
|
||||
return ( foundData ? decodeSBBS(sbArray) : null );
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an annotation for the highest (i.e., least significant) p-value of table1 and table2
|
||||
*
|
||||
|
|
@ -148,12 +183,56 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
}
|
||||
|
||||
public List<String> getKeyNames() {
|
||||
return Arrays.asList(FS);
|
||||
return Collections.singletonList(FS);
|
||||
}
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||
return Arrays.asList(
|
||||
new VCFInfoHeaderLine(FS, 1, VCFHeaderLineType.Float, "Phred-scaled p-value using Fisher's exact test to detect strand bias"));
|
||||
return Collections.singletonList(new VCFInfoHeaderLine(FS, 1, VCFHeaderLineType.Float, "Phred-scaled p-value using Fisher's exact test to detect strand bias"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to turn the FisherStrand table into the SB annotation array
|
||||
* @param table the table used by the FisherStrand annotation
|
||||
* @return the array used by the per-sample Strand Bias annotation
|
||||
*/
|
||||
public static int[] getContingencyArray( final int[][] table ) {
|
||||
if(table.length != 2) { throw new IllegalArgumentException("Expecting a 2x2 strand bias table."); }
|
||||
if(table[0].length != 2) { throw new IllegalArgumentException("Expecting a 2x2 strand bias table."); }
|
||||
final int[] array = new int[4]; // TODO - if we ever want to do something clever with multi-allelic sites this will need to change
|
||||
array[0] = table[0][0];
|
||||
array[1] = table[0][1];
|
||||
array[2] = table[1][0];
|
||||
array[3] = table[1][1];
|
||||
return array;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to parse the genotype annotation into the SB annotation array
|
||||
* @param string the string that is returned by genotype.getAnnotation("SB")
|
||||
* @return the array used by the per-sample Strand Bias annotation
|
||||
*/
|
||||
private static int[] encodeSBBS( final String string ) {
|
||||
final int[] array = new int[4];
|
||||
final StringTokenizer tokenizer = new StringTokenizer(string, ",", false);
|
||||
for( int index = 0; index < 4; index++ ) {
|
||||
array[index] = Integer.parseInt(tokenizer.nextToken());
|
||||
}
|
||||
return array;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to turn the SB annotation array into the FisherStrand table
|
||||
* @param array the array used by the per-sample Strand Bias annotation
|
||||
* @return the table used by the FisherStrand annotation
|
||||
*/
|
||||
private static int[][] decodeSBBS( final int[] array ) {
|
||||
if(array.length != 4) { throw new IllegalArgumentException("Expecting a length = 4 strand bias array."); }
|
||||
final int[][] table = new int[2][2];
|
||||
table[0][0] = array[0];
|
||||
table[0][1] = array[1];
|
||||
table[1][0] = array[2];
|
||||
table[1][1] = array[3];
|
||||
return table;
|
||||
}
|
||||
|
||||
private Double pValueForContingencyTable(int[][] originalTable) {
|
||||
|
|
@ -284,13 +363,16 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
* allele2 # #
|
||||
* @return a 2x2 contingency table
|
||||
*/
|
||||
private static int[][] getContingencyTable( final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap, final VariantContext vc) {
|
||||
public static int[][] getContingencyTable( final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap, final VariantContext vc) {
|
||||
if( stratifiedPerReadAlleleLikelihoodMap == null ) { throw new IllegalArgumentException("stratifiedPerReadAlleleLikelihoodMap cannot be null"); }
|
||||
if( vc == null ) { throw new IllegalArgumentException("input vc cannot be null"); }
|
||||
|
||||
final Allele ref = vc.getReference();
|
||||
final Allele alt = vc.getAltAlleleWithHighestAlleleCount();
|
||||
int[][] table = new int[2][2];
|
||||
final int[][] table = new int[2][2];
|
||||
|
||||
for (PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) {
|
||||
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) {
|
||||
for (final PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) {
|
||||
for (final Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) {
|
||||
final MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
|
||||
final GATKSAMRecord read = el.getKey();
|
||||
final int representativeCount = read.isReducedRead() ? read.getReducedCount(ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, vc.getStart(), ReadUtils.ClippingTail.RIGHT_TAIL)) : 1;
|
||||
|
|
|
|||
|
|
@ -76,8 +76,10 @@ import java.util.*;
|
|||
public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
|
||||
private static final int MIN_SAMPLES = 10;
|
||||
private static final String INBREEDING_COEFFICIENT_KEY_NAME = "InbreedingCoeff";
|
||||
private Set<String> founderIds;
|
||||
|
||||
@Override
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
|
|
@ -92,15 +94,15 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
|
|||
|
||||
private Map<String, Object> calculateIC(final VariantContext vc) {
|
||||
final GenotypesContext genotypes = (founderIds == null || founderIds.isEmpty()) ? vc.getGenotypes() : vc.getGenotypes(founderIds);
|
||||
if ( genotypes == null || genotypes.size() < MIN_SAMPLES || !vc.isVariant())
|
||||
if (genotypes == null || genotypes.size() < MIN_SAMPLES || !vc.isVariant())
|
||||
return null;
|
||||
|
||||
int idxAA = 0, idxAB = 1, idxBB = 2;
|
||||
|
||||
if (!vc.isBiallelic()) {
|
||||
// for non-bliallelic case, do test with most common alt allele.
|
||||
// Get then corresponding indeces in GL vectors to retrieve GL of AA,AB and BB.
|
||||
int[] idxVector = vc.getGLIndecesOfAlternateAllele(vc.getAltAlleleWithHighestAlleleCount());
|
||||
// Get then corresponding indices in GL vectors to retrieve GL of AA,AB and BB.
|
||||
final int[] idxVector = vc.getGLIndecesOfAlternateAllele(vc.getAltAlleleWithHighestAlleleCount());
|
||||
idxAA = idxVector[0];
|
||||
idxAB = idxVector[1];
|
||||
idxBB = idxVector[2];
|
||||
|
|
@ -132,12 +134,12 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
|
|||
final double q = 1.0 - p; // expected alternative allele frequency
|
||||
final double F = 1.0 - ( hetCount / ( 2.0 * p * q * (double)N ) ); // inbreeding coefficient
|
||||
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%.4f", F));
|
||||
return map;
|
||||
return Collections.singletonMap(getKeyNames().get(0), (Object)String.format("%.4f", F));
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("InbreedingCoeff"); }
|
||||
@Override
|
||||
public List<String> getKeyNames() { return Collections.singletonList(INBREEDING_COEFFICIENT_KEY_NAME); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("InbreedingCoeff", 1, VCFHeaderLineType.Float, "Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation")); }
|
||||
@Override
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Collections.singletonList(new VCFInfoHeaderLine(INBREEDING_COEFFICIENT_KEY_NAME, 1, VCFHeaderLineType.Float, "Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation")); }
|
||||
}
|
||||
|
|
@ -0,0 +1,100 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.variant.variantcontext.Genotype;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypeBuilder;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.vcf.VCFFormatHeaderLine;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Per-sample component statistics which comprise the Fisher's Exact Test to detect strand bias
|
||||
* User: rpoplin
|
||||
* Date: 8/28/13
|
||||
*/
|
||||
|
||||
public class StrandBiasBySample extends GenotypeAnnotation implements ExperimentalAnnotation {
|
||||
|
||||
public final static String STRAND_BIAS_BY_SAMPLE_KEY_NAME = "SB";
|
||||
|
||||
@Override
|
||||
public void annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final AlignmentContext stratifiedContext,
|
||||
final VariantContext vc,
|
||||
final Genotype g,
|
||||
final GenotypeBuilder gb,
|
||||
final PerReadAlleleLikelihoodMap alleleLikelihoodMap) {
|
||||
if ( g == null || !g.isCalled() || ( stratifiedContext == null && alleleLikelihoodMap == null) )
|
||||
return;
|
||||
|
||||
if (alleleLikelihoodMap == null )
|
||||
throw new IllegalStateException("StrandBiasBySample can only be used with likelihood based annotations in the HaplotypeCaller");
|
||||
|
||||
final int[][] table = FisherStrand.getContingencyTable(Collections.singletonMap(g.getSampleName(), alleleLikelihoodMap), vc);
|
||||
|
||||
gb.attribute(STRAND_BIAS_BY_SAMPLE_KEY_NAME, FisherStrand.getContingencyArray(table));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getKeyNames() { return Collections.singletonList(STRAND_BIAS_BY_SAMPLE_KEY_NAME); }
|
||||
|
||||
@Override
|
||||
public List<VCFFormatHeaderLine> getDescriptions() { return Collections.singletonList(new VCFFormatHeaderLine(getKeyNames().get(0), 4, VCFHeaderLineType.Integer, "Per-sample component statistics which comprise the Fisher's Exact Test to detect strand bias.")); }
|
||||
|
||||
}
|
||||
|
|
@ -76,16 +76,13 @@ public class ConsensusAlleleCounter {
|
|||
private final int minIndelCountForGenotyping;
|
||||
private final boolean doMultiAllelicCalls;
|
||||
private final double minFractionInOneSample;
|
||||
private final GenomeLocParser locParser;
|
||||
|
||||
public ConsensusAlleleCounter(final GenomeLocParser locParser,
|
||||
final boolean doMultiAllelicCalls,
|
||||
public ConsensusAlleleCounter(final boolean doMultiAllelicCalls,
|
||||
final int minIndelCountForGenotyping,
|
||||
final double minFractionInOneSample) {
|
||||
this.minIndelCountForGenotyping = minIndelCountForGenotyping;
|
||||
this.doMultiAllelicCalls = doMultiAllelicCalls;
|
||||
this.minFractionInOneSample = minFractionInOneSample;
|
||||
this.locParser = locParser;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -289,7 +286,7 @@ public class ConsensusAlleleCounter {
|
|||
if (vcs.isEmpty())
|
||||
return Collections.emptyList(); // nothing else to do, no alleles passed minimum count criterion
|
||||
|
||||
final VariantContext mergedVC = GATKVariantContextUtils.simpleMerge(vcs, null, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GATKVariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false);
|
||||
final VariantContext mergedVC = GATKVariantContextUtils.simpleMerge(vcs, null, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GATKVariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false, false);
|
||||
return mergedVC.getAlleles();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -108,7 +108,7 @@ public class GeneralPloidyIndelGenotypeLikelihoodsCalculationModel extends Gener
|
|||
final List<Allele> allAllelesToUse){
|
||||
|
||||
|
||||
List<Allele> alleles = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true);
|
||||
List<Allele> alleles = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, UAC,true);
|
||||
|
||||
if (alleles.size() > MAX_NUM_ALLELES_TO_GENOTYPE)
|
||||
alleles = alleles.subList(0,MAX_NUM_ALLELES_TO_GENOTYPE);
|
||||
|
|
|
|||
|
|
@ -89,9 +89,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
protected static List<Allele> computeConsensusAlleles(final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> contexts,
|
||||
final AlignmentContextUtils.ReadOrientation contextType,
|
||||
final GenomeLocParser locParser,
|
||||
final UnifiedArgumentCollection UAC) {
|
||||
ConsensusAlleleCounter counter = new ConsensusAlleleCounter(locParser, true, UAC.MIN_INDEL_COUNT_FOR_GENOTYPING, UAC.MIN_INDEL_FRACTION_PER_SAMPLE);
|
||||
ConsensusAlleleCounter counter = new ConsensusAlleleCounter(true, UAC.MIN_INDEL_COUNT_FOR_GENOTYPING, UAC.MIN_INDEL_FRACTION_PER_SAMPLE);
|
||||
return counter.computeConsensusAlleles(ref, contexts, contextType);
|
||||
}
|
||||
|
||||
|
|
@ -113,7 +112,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
// starting a new site: clear allele list
|
||||
haplotypeMap.clear();
|
||||
perReadAlleleLikelihoodMap.clear(); // clean mapping sample-> per read, per allele likelihoods
|
||||
alleleList = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
|
||||
alleleList = getInitialAlleleList(tracker, ref, contexts, contextType, UAC, ignoreSNPAllelesWhenGenotypingIndels);
|
||||
if (alleleList.isEmpty())
|
||||
return null;
|
||||
}
|
||||
|
|
@ -212,7 +211,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> contexts,
|
||||
final AlignmentContextUtils.ReadOrientation contextType,
|
||||
final GenomeLocParser locParser,
|
||||
final UnifiedArgumentCollection UAC,
|
||||
final boolean ignoreSNPAllelesWhenGenotypingIndels) {
|
||||
|
||||
|
|
@ -244,7 +242,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
}
|
||||
|
||||
} else {
|
||||
alleles = computeConsensusAlleles(ref, contexts, contextType, locParser, UAC);
|
||||
alleles = computeConsensusAlleles(ref, contexts, contextType, UAC);
|
||||
}
|
||||
return alleles;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -101,24 +101,26 @@ class ActiveRegionTrimmer {
|
|||
*
|
||||
* @param region our full active region
|
||||
* @param allVariantsWithinExtendedRegion all of the variants found in the entire region, sorted by their start position
|
||||
* @param emitReferenceConfidence are we going to estimate the reference confidence with this active region?
|
||||
* @return a new ActiveRegion trimmed down to just what's needed for genotyping, or null if we couldn't do this successfully
|
||||
*/
|
||||
public ActiveRegion trimRegion(final ActiveRegion region, final TreeSet<VariantContext> allVariantsWithinExtendedRegion) {
|
||||
public ActiveRegion trimRegion(final ActiveRegion region, final TreeSet<VariantContext> allVariantsWithinExtendedRegion, final boolean emitReferenceConfidence) {
|
||||
if ( allVariantsWithinExtendedRegion.isEmpty() ) // no variants, so just return the current region
|
||||
return null;
|
||||
|
||||
final List<VariantContext> withinActiveRegion = new LinkedList<VariantContext>();
|
||||
int pad = snpPadding;
|
||||
final List<VariantContext> withinActiveRegion = new LinkedList<>();
|
||||
boolean foundNonSnp = false;
|
||||
GenomeLoc trimLoc = null;
|
||||
for ( final VariantContext vc : allVariantsWithinExtendedRegion ) {
|
||||
final GenomeLoc vcLoc = parser.createGenomeLoc(vc);
|
||||
if ( region.getLocation().overlapsP(vcLoc) ) {
|
||||
if ( ! vc.isSNP() ) // if anything isn't a SNP use the bigger padding
|
||||
pad = nonSnpPadding;
|
||||
foundNonSnp = true;
|
||||
trimLoc = trimLoc == null ? vcLoc : trimLoc.endpointSpan(vcLoc);
|
||||
withinActiveRegion.add(vc);
|
||||
}
|
||||
}
|
||||
final int pad = ( emitReferenceConfidence || foundNonSnp ? nonSnpPadding : snpPadding );
|
||||
|
||||
// we don't actually have anything in the region after removing variants that don't overlap the region's full location
|
||||
if ( trimLoc == null ) return null;
|
||||
|
|
|
|||
|
|
@ -183,7 +183,7 @@ public class GenotypingEngine {
|
|||
final List<String> priorityList = makePriorityList(eventsAtThisLoc);
|
||||
|
||||
// Merge the event to find a common reference representation
|
||||
final VariantContext mergedVC = GATKVariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
|
||||
final VariantContext mergedVC = GATKVariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false, false);
|
||||
if( mergedVC == null ) { continue; }
|
||||
|
||||
if( eventsAtThisLoc.size() != mergedVC.getAlternateAlleles().size() ) {
|
||||
|
|
|
|||
|
|
@ -290,7 +290,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
* B <= X < C
|
||||
* X >= C
|
||||
*
|
||||
* The default bands give the following GQ blocks:
|
||||
* The default bands with (1, 10, 20, 30, 40, 50) give the following GQ blocks:
|
||||
*
|
||||
* [0, 0]
|
||||
* (0, 10]
|
||||
|
|
@ -304,7 +304,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
*/
|
||||
@Advanced
|
||||
@Argument(fullName="GVCFGQBands", shortName="GQB", doc="Emit experimental reference confidence scores", required = false)
|
||||
protected List<Integer> GVCFGQBands = Arrays.asList(1, 10, 20, 30, 40, 50);
|
||||
protected List<Integer> GVCFGQBands = Arrays.asList(5, 20, 60);
|
||||
|
||||
/**
|
||||
* This parameter determines the maximum size of an indel considered as potentially segregating in the
|
||||
|
|
@ -541,7 +541,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
samplesList.addAll( samples );
|
||||
// initialize the UnifiedGenotyper Engine which is used to call into the exact model
|
||||
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection( SCAC ); // this adapter is used so that the full set of unused UG arguments aren't exposed to the HC user
|
||||
// HC GGA mode depends critically on EMIT_ALL_SITES being set for the UG engine // TODO -- why is this?
|
||||
// HC GGA mode depends critically on EMIT_ALL_SITES being set for the UG engine
|
||||
UAC.OutputMode = SCAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES
|
||||
? UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES : UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
|
||||
|
|
@ -553,11 +553,11 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
simpleUAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.min( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING ); // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||
simpleUAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.min( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING ); // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||
simpleUAC.CONTAMINATION_FRACTION = 0.0;
|
||||
simpleUAC.CONTAMINATION_FRACTION_FILE=null;
|
||||
simpleUAC.CONTAMINATION_FRACTION_FILE = null;
|
||||
simpleUAC.exactCallsLog = null;
|
||||
UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
|
||||
|
||||
if( UAC.CONTAMINATION_FRACTION_FILE !=null) {
|
||||
if( UAC.CONTAMINATION_FRACTION_FILE != null ) {
|
||||
UAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(UAC.CONTAMINATION_FRACTION_FILE, UAC.CONTAMINATION_FRACTION, samples, logger));
|
||||
}
|
||||
|
||||
|
|
@ -867,17 +867,17 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
// Create ReadErrorCorrector object if requested - will be used within assembly engine.
|
||||
ReadErrorCorrector readErrorCorrector = null;
|
||||
if (errorCorrectReads)
|
||||
readErrorCorrector = new ReadErrorCorrector(kmerLengthForReadErrorCorrection, MIN_TAIL_QUALITY_WITH_ERROR_CORRECTION, minObservationsForKmerToBeSolid, DEBUG,fullReferenceWithPadding);
|
||||
readErrorCorrector = new ReadErrorCorrector(kmerLengthForReadErrorCorrection, MIN_TAIL_QUALITY_WITH_ERROR_CORRECTION, minObservationsForKmerToBeSolid, DEBUG, fullReferenceWithPadding);
|
||||
|
||||
try {
|
||||
final List<Haplotype> haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, paddedReferenceLoc, activeAllelesToGenotype,readErrorCorrector );
|
||||
if ( ! emitReferenceConfidence() && ! dontTrimActiveRegions ) {
|
||||
final List<Haplotype> haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, paddedReferenceLoc, activeAllelesToGenotype, readErrorCorrector );
|
||||
if ( ! dontTrimActiveRegions ) {
|
||||
return trimActiveRegion(activeRegion, haplotypes, activeAllelesToGenotype, fullReferenceWithPadding, paddedReferenceLoc);
|
||||
} else {
|
||||
// we don't want to trim active regions, so go ahead and use the old one
|
||||
return new AssemblyResult(haplotypes, activeRegion, fullReferenceWithPadding, paddedReferenceLoc, true);
|
||||
}
|
||||
} catch ( Exception e ) {
|
||||
} catch ( final Exception e ) {
|
||||
// Capture any exception that might be thrown, and write out the assembly failure BAM if requested
|
||||
if ( captureAssemblyFailureBAM ) {
|
||||
final SAMFileWriter writer = ReadUtils.createSAMFileWriterWithCompression(getToolkit().getSAMFileHeader(), true, "assemblyFailure.bam", 5);
|
||||
|
|
@ -969,7 +969,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
EventMap.buildEventMapsForHaplotypes(haplotypes, fullReferenceWithPadding, paddedReferenceLoc, DEBUG);
|
||||
final TreeSet<VariantContext> allVariantsWithinFullActiveRegion = EventMap.getAllVariantContexts(haplotypes);
|
||||
allVariantsWithinFullActiveRegion.addAll(activeAllelesToGenotype);
|
||||
final ActiveRegion trimmedActiveRegion = trimmer.trimRegion(originalActiveRegion, allVariantsWithinFullActiveRegion);
|
||||
final ActiveRegion trimmedActiveRegion = trimmer.trimRegion(originalActiveRegion, allVariantsWithinFullActiveRegion, false); // TODO -- should pass emitReferenceConfidence()
|
||||
|
||||
if ( trimmedActiveRegion == null ) {
|
||||
// there were no variants found within the active region itself, so just return null
|
||||
|
|
@ -1001,7 +1001,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
// trim down the reads and add them to the trimmed active region
|
||||
final List<GATKSAMRecord> trimmedReads = new ArrayList<>(originalActiveRegion.getReads().size());
|
||||
for( final GATKSAMRecord read : originalActiveRegion.getReads() ) {
|
||||
|
|
@ -1096,7 +1095,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
activeRegion.addAll(downsampledReads);
|
||||
}
|
||||
|
||||
private Set<GATKSAMRecord> filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
|
||||
private Set<GATKSAMRecord> filterNonPassingReads( final ActiveRegion activeRegion ) {
|
||||
final Set<GATKSAMRecord> readsToRemove = new LinkedHashSet<>();
|
||||
for( final GATKSAMRecord rec : activeRegion.getReads() ) {
|
||||
if( rec.getReadLength() < MIN_READ_LENGTH || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) {
|
||||
|
|
@ -1107,7 +1106,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
return readsToRemove;
|
||||
}
|
||||
|
||||
private GenomeLoc getPaddedLoc( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
|
||||
private GenomeLoc getPaddedLoc( final ActiveRegion activeRegion ) {
|
||||
final int padLeft = Math.max(activeRegion.getExtendedLoc().getStart()-REFERENCE_PADDING, 1);
|
||||
final int padRight = Math.min(activeRegion.getExtendedLoc().getStop()+REFERENCE_PADDING, referenceReader.getSequenceDictionary().getSequence(activeRegion.getExtendedLoc().getContig()).getSequenceLength());
|
||||
return getToolkit().getGenomeLocParser().createGenomeLoc(activeRegion.getExtendedLoc().getContig(), padLeft, padRight);
|
||||
|
|
|
|||
|
|
@ -68,5 +68,13 @@ final class RefVsAnyResult {
|
|||
/**
|
||||
* @return Get the DP (sum of AD values)
|
||||
*/
|
||||
public int getDP() { return AD_Ref_Any[0] + AD_Ref_Any[1]; }
|
||||
protected int getDP() { return AD_Ref_Any[0] + AD_Ref_Any[1]; }
|
||||
|
||||
/**
|
||||
* Cap the het and hom var likelihood values by the hom ref likelihood.
|
||||
*/
|
||||
protected void capByHomRefLikelihood() {
|
||||
genotypeLikelihoods[1] = Math.min(genotypeLikelihoods[0], genotypeLikelihoods[1]);
|
||||
genotypeLikelihoods[2] = Math.min(genotypeLikelihoods[0], genotypeLikelihoods[2]);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
|||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.vcf.VCFFormatHeaderLine;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLine;
|
||||
|
|
@ -81,10 +82,9 @@ import java.util.*;
|
|||
* Time: 12:52 PM
|
||||
*/
|
||||
public class ReferenceConfidenceModel {
|
||||
public final static String NON_REF_SYMBOLIC_ALLELE_NAME = "NON_REF";
|
||||
public final static Allele NON_REF_SYMBOLIC_ALLELE = Allele.create("<"+NON_REF_SYMBOLIC_ALLELE_NAME+">", false); // represents any possible non-ref allele at this site
|
||||
|
||||
public final static String INDEL_INFORMATIVE_DEPTH = "CD";
|
||||
//public final static String INDEL_INFORMATIVE_DEPTH = "CD"; // temporarily taking this extra genotype level information out for now
|
||||
public final static String ALTERNATE_ALLELE_STRING = "ALT"; // arbitrary alternate allele
|
||||
|
||||
private final GenomeLocParser genomeLocParser;
|
||||
private final Set<String> samples;
|
||||
|
|
@ -94,6 +94,8 @@ public class ReferenceConfidenceModel {
|
|||
private final static boolean WRITE_DEBUGGING_BAM = false;
|
||||
private final SAMFileWriter debuggingWriter;
|
||||
|
||||
private final static byte REF_MODEL_DELETION_QUAL = (byte) 30;
|
||||
|
||||
/**
|
||||
* Create a new ReferenceConfidenceModel
|
||||
*
|
||||
|
|
@ -124,6 +126,8 @@ public class ReferenceConfidenceModel {
|
|||
} else {
|
||||
debuggingWriter = null;
|
||||
}
|
||||
|
||||
initializeIndelPLCache();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -132,8 +136,9 @@ public class ReferenceConfidenceModel {
|
|||
*/
|
||||
public Set<VCFHeaderLine> getVCFHeaderLines() {
|
||||
final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>();
|
||||
headerLines.add(new VCFSimpleHeaderLine("ALT", NON_REF_SYMBOLIC_ALLELE_NAME, "Represents any possible alternative allele at this location"));
|
||||
headerLines.add(new VCFFormatHeaderLine(INDEL_INFORMATIVE_DEPTH, 1, VCFHeaderLineType.Integer, "Number of reads at locus that are informative about an indel of size <= " + indelInformativeDepthIndelSize));
|
||||
// TODO - do we need a new kind of VCF Header subclass for specifying arbitrary alternate alleles?
|
||||
headerLines.add(new VCFSimpleHeaderLine(ALTERNATE_ALLELE_STRING, GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE_NAME, "Represents any possible alternative allele at this location"));
|
||||
//headerLines.add(new VCFFormatHeaderLine(INDEL_INFORMATIVE_DEPTH, 1, VCFHeaderLineType.Integer, "Number of reads at locus that are informative about an indel of size <= " + indelInformativeDepthIndelSize));
|
||||
return headerLines;
|
||||
}
|
||||
|
||||
|
|
@ -161,7 +166,7 @@ public class ReferenceConfidenceModel {
|
|||
* @param stratifiedReadMap a map from a single sample to its PerReadAlleleLikelihoodMap for each haplotype in calledHaplotypes
|
||||
* @param variantCalls calls made in this region. The return result will contain any variant call in this list in the
|
||||
* correct order by genomic position, and any variant in this list will stop us emitting a ref confidence
|
||||
* under any position is covers (for snps that 1 bp, but for deletion its the entire ref span)
|
||||
* under any position it covers (for snps and insertions that is 1 bp, but for deletions its the entire ref span)
|
||||
* @return an ordered list of variant contexts that spans activeRegion.getLoc() and includes both reference confidence
|
||||
* contexts as well as calls from variantCalls if any were provided
|
||||
*/
|
||||
|
|
@ -181,7 +186,7 @@ public class ReferenceConfidenceModel {
|
|||
if ( refHaplotype.length() != activeRegion.getExtendedLoc().size() ) throw new IllegalArgumentException("refHaplotype " + refHaplotype.length() + " and activeRegion location size " + activeRegion.getLocation().size() + " are different");
|
||||
|
||||
final GenomeLoc refSpan = activeRegion.getLocation();
|
||||
final List<ReadBackedPileup> refPileups = getPileupsOverReference(refHaplotype, calledHaplotypes, paddedReferenceLoc, refSpan, stratifiedReadMap);
|
||||
final List<ReadBackedPileup> refPileups = getPileupsOverReference(refHaplotype, calledHaplotypes, paddedReferenceLoc, activeRegion, refSpan, stratifiedReadMap);
|
||||
final byte[] ref = refHaplotype.getBases();
|
||||
final List<VariantContext> results = new ArrayList<>(refSpan.size());
|
||||
final String sampleName = stratifiedReadMap.keySet().iterator().next();
|
||||
|
|
@ -201,9 +206,10 @@ public class ReferenceConfidenceModel {
|
|||
final int refOffset = offset + globalRefOffset;
|
||||
final byte refBase = ref[refOffset];
|
||||
final RefVsAnyResult homRefCalc = calcGenotypeLikelihoodsOfRefVsAny(pileup, refBase, (byte)6, null);
|
||||
homRefCalc.capByHomRefLikelihood();
|
||||
|
||||
final Allele refAllele = Allele.create(refBase, true);
|
||||
final List<Allele> refSiteAlleles = Arrays.asList(refAllele, NON_REF_SYMBOLIC_ALLELE);
|
||||
final List<Allele> refSiteAlleles = Arrays.asList(refAllele, GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
final VariantContextBuilder vcb = new VariantContextBuilder("HC", curPos.getContig(), curPos.getStart(), curPos.getStart(), refSiteAlleles);
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(sampleName, Arrays.asList(refAllele, refAllele));
|
||||
gb.AD(homRefCalc.AD_Ref_Any);
|
||||
|
|
@ -224,7 +230,7 @@ public class ReferenceConfidenceModel {
|
|||
|
||||
gb.GQ((int) (-10 * leastConfidenceGLs.getLog10GQ(GenotypeType.HOM_REF)));
|
||||
gb.PL(leastConfidenceGLs.getAsPLs());
|
||||
gb.attribute(INDEL_INFORMATIVE_DEPTH, nIndelInformativeReads);
|
||||
//gb.attribute(INDEL_INFORMATIVE_DEPTH, nIndelInformativeReads);
|
||||
|
||||
vcb.genotypes(gb.make());
|
||||
results.add(vcb.make());
|
||||
|
|
@ -252,14 +258,21 @@ public class ReferenceConfidenceModel {
|
|||
* @return non-null GenotypeLikelihoods given N
|
||||
*/
|
||||
protected final GenotypeLikelihoods getIndelPLs(final int nInformativeReads) {
|
||||
// TODO -- optimization -- this could easily be optimized with some caching
|
||||
final double homRef = 0.0;
|
||||
final double het = - LOG10_2 * nInformativeReads;
|
||||
final double homVar = INDEL_ERROR_RATE * nInformativeReads;
|
||||
return GenotypeLikelihoods.fromLog10Likelihoods(new double[]{homRef, het, homVar});
|
||||
return indelPLCache[nInformativeReads > MAX_N_INDEL_INFORMATIVE_READS ? MAX_N_INDEL_INFORMATIVE_READS : nInformativeReads];
|
||||
}
|
||||
|
||||
protected static final int MAX_N_INDEL_INFORMATIVE_READS = 40; // more than this is overkill because GQs are capped at 99 anyway
|
||||
private static final GenotypeLikelihoods[] indelPLCache = new GenotypeLikelihoods[MAX_N_INDEL_INFORMATIVE_READS + 1];
|
||||
private static final double INDEL_ERROR_RATE = -4.5; // 10^-4.5 indel errors per bp
|
||||
|
||||
private void initializeIndelPLCache() {
|
||||
for( int nInformativeReads = 0; nInformativeReads <= MAX_N_INDEL_INFORMATIVE_READS; nInformativeReads++ ) {
|
||||
final double homRef = 0.0;
|
||||
final double het = MathUtils.LOG_ONE_HALF * nInformativeReads;
|
||||
final double homVar = INDEL_ERROR_RATE * nInformativeReads;
|
||||
indelPLCache[nInformativeReads] = GenotypeLikelihoods.fromLog10Likelihoods(new double[]{homRef, het, homVar});
|
||||
}
|
||||
}
|
||||
private final static double LOG10_2 = Math.log10(2);
|
||||
private final static double INDEL_ERROR_RATE = -4.5; // 10^-4.5 indel errors per bp
|
||||
|
||||
/**
|
||||
* Calculate the genotype likelihoods for the sample in pileup for being hom-ref contrasted with being ref vs. alt
|
||||
|
|
@ -274,8 +287,8 @@ public class ReferenceConfidenceModel {
|
|||
final RefVsAnyResult result = new RefVsAnyResult();
|
||||
|
||||
for( final PileupElement p : pileup ) {
|
||||
final byte qual = p.getQual();
|
||||
if( p.isDeletion() || qual > minBaseQual) {
|
||||
final byte qual = (p.isDeletion() ? REF_MODEL_DELETION_QUAL : p.getQual());
|
||||
if( p.isDeletion() || qual > minBaseQual ) {
|
||||
int AA = 0; final int AB = 1; int BB = 2;
|
||||
if( p.getBase() != refBase || p.isDeletion() || p.isBeforeDeletionStart() || p.isAfterDeletionEnd() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ) {
|
||||
AA = 2;
|
||||
|
|
@ -302,20 +315,37 @@ public class ReferenceConfidenceModel {
|
|||
private List<ReadBackedPileup> getPileupsOverReference(final Haplotype refHaplotype,
|
||||
final Collection<Haplotype> calledHaplotypes,
|
||||
final GenomeLoc paddedReferenceLoc,
|
||||
final ActiveRegion activeRegion,
|
||||
final GenomeLoc activeRegionSpan,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap) {
|
||||
final ReadDestination.ToList realignedReadsDest = new ReadDestination.ToList(header, "FOO");
|
||||
final HaplotypeBAMWriter writer = HaplotypeBAMWriter.create(HaplotypeBAMWriter.Type.CALLED_HAPLOTYPES, realignedReadsDest);
|
||||
writer.setWriteHaplotypesAsWell(false); // don't write out reads for the haplotypes, as we only want the realigned reads themselves
|
||||
writer.writeReadsAlignedToHaplotypes(calledHaplotypes.isEmpty() ? Collections.singleton(refHaplotype) : calledHaplotypes, paddedReferenceLoc, stratifiedReadMap);
|
||||
final List<GATKSAMRecord> realignedReads = ReadUtils.sortReadsByCoordinate(realignedReadsDest.getReads());
|
||||
|
||||
if ( refHaplotype == null ) throw new IllegalArgumentException("refHaplotype cannot be null");
|
||||
if ( calledHaplotypes == null ) throw new IllegalArgumentException("calledHaplotypes cannot be null");
|
||||
if ( !calledHaplotypes.contains(refHaplotype)) throw new IllegalArgumentException("calledHaplotypes must contain the refHaplotype");
|
||||
if ( paddedReferenceLoc == null ) throw new IllegalArgumentException("paddedReferenceLoc cannot be null");
|
||||
if ( activeRegion == null ) throw new IllegalArgumentException("activeRegion cannot be null");
|
||||
if ( stratifiedReadMap == null ) throw new IllegalArgumentException("stratifiedReadMap cannot be null");
|
||||
if ( stratifiedReadMap.size() != 1 ) throw new IllegalArgumentException("stratifiedReadMap must contain exactly one sample but it contained " + stratifiedReadMap.size());
|
||||
|
||||
List<GATKSAMRecord> realignedReads;
|
||||
|
||||
if( calledHaplotypes.size() == 1 ) { // only contains ref haplotype so an optimization is to just trust the alignments to the reference haplotype as provided by the aligner
|
||||
realignedReads = activeRegion.getReads();
|
||||
} else {
|
||||
final ReadDestination.ToList realignedReadsDest = new ReadDestination.ToList(header, "FOO");
|
||||
final HaplotypeBAMWriter writer = HaplotypeBAMWriter.create(HaplotypeBAMWriter.Type.CALLED_HAPLOTYPES, realignedReadsDest);
|
||||
writer.setWriteHaplotypesAsWell(false); // don't write out reads for the haplotypes, as we only want the realigned reads themselves
|
||||
writer.setOnlyRealignInformativeReads(true);
|
||||
writer.writeReadsAlignedToHaplotypes(calledHaplotypes, paddedReferenceLoc, stratifiedReadMap);
|
||||
realignedReads = ReadUtils.sortReadsByCoordinate(realignedReadsDest.getReads());
|
||||
}
|
||||
|
||||
if ( debuggingWriter != null )
|
||||
for ( final GATKSAMRecord read : realignedReads )
|
||||
debuggingWriter.addAlignment(read);
|
||||
|
||||
final LocusIteratorByState libs = new LocusIteratorByState(realignedReads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING,
|
||||
false, genomeLocParser, samples, false);
|
||||
true, genomeLocParser, samples, false);
|
||||
|
||||
final List<ReadBackedPileup> pileups = new LinkedList<>();
|
||||
final int startPos = activeRegionSpan.getStart();
|
||||
|
|
@ -378,7 +408,7 @@ public class ReferenceConfidenceModel {
|
|||
final byte refBase = refBases[refStart + i];
|
||||
if ( readBase != refBase ) {
|
||||
sum += readQuals[readStart + i];
|
||||
if ( sum > maxSum )
|
||||
if ( sum > maxSum ) // abort early
|
||||
return sum;
|
||||
}
|
||||
}
|
||||
|
|
@ -403,7 +433,10 @@ public class ReferenceConfidenceModel {
|
|||
final byte[] refBases,
|
||||
final int refStart,
|
||||
final int maxIndelSize) {
|
||||
// todo -- fast exit when n bases left < maxIndelSize
|
||||
// fast exit when n bases left < maxIndelSize
|
||||
if( readBases.length - readStart < maxIndelSize || refBases.length - refStart < maxIndelSize ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
final int baselineMMSum = sumMismatchingQualities(readBases, readQuals, readStart, refBases, refStart, Integer.MAX_VALUE);
|
||||
|
||||
|
|
@ -445,12 +478,16 @@ public class ReferenceConfidenceModel {
|
|||
final int offset = p.getOffset();
|
||||
|
||||
// doesn't count as evidence
|
||||
if ( p.isBeforeDeletionStart() || p.isBeforeInsertion() )
|
||||
if ( p.isBeforeDeletionStart() || p.isBeforeInsertion() || p.isDeletion() )
|
||||
continue;
|
||||
|
||||
// todo -- this code really should handle CIGARs directly instead of relying on the above tests
|
||||
if ( isReadInformativeAboutIndelsOfSize(read.getReadBases(), read.getBaseQualities(), offset, ref, pileupOffsetIntoRef, maxIndelSize))
|
||||
if ( isReadInformativeAboutIndelsOfSize(read.getReadBases(), read.getBaseQualities(), offset, ref, pileupOffsetIntoRef, maxIndelSize) ) {
|
||||
nInformative++;
|
||||
if( nInformative > MAX_N_INDEL_INFORMATIVE_READS ) {
|
||||
return MAX_N_INDEL_INFORMATIVE_READS;
|
||||
}
|
||||
}
|
||||
}
|
||||
return nInformative;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -230,6 +230,7 @@ public class GVCFWriter implements VariantContextWriter {
|
|||
gb.DP(block.getMedianDP());
|
||||
gb.attribute(MIN_DP_FORMAT_FIELD, block.getMinDP());
|
||||
gb.attribute(MIN_GQ_FORMAT_FIELD, block.getMinGQ());
|
||||
gb.PL(block.getMinPLs());
|
||||
|
||||
return vcb.genotypes(gb.make()).make();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -69,10 +69,11 @@ import java.util.List;
|
|||
*/
|
||||
final class HomRefBlock {
|
||||
private final VariantContext startingVC;
|
||||
int stop;
|
||||
private int stop;
|
||||
private final int minGQ, maxGQ;
|
||||
private List<Integer> GQs = new ArrayList<>(100);
|
||||
private List<Integer> DPs = new ArrayList<>(100);
|
||||
private int[] minPLs = null;
|
||||
final private List<Integer> GQs = new ArrayList<>(100);
|
||||
final private List<Integer> DPs = new ArrayList<>(100);
|
||||
private final Allele ref;
|
||||
|
||||
/**
|
||||
|
|
@ -116,9 +117,23 @@ final class HomRefBlock {
|
|||
public void add(final int pos, final Genotype g) {
|
||||
if ( g == null ) throw new IllegalArgumentException("g cannot be null");
|
||||
if ( ! g.hasGQ() ) throw new IllegalArgumentException("g must have GQ field");
|
||||
if ( ! g.hasPL() ) throw new IllegalArgumentException("g must have PL field");
|
||||
if ( ! g.hasDP() ) throw new IllegalArgumentException("g must have DP field");
|
||||
if ( pos != stop + 1 ) throw new IllegalArgumentException("adding genotype at pos " + pos + " isn't contiguous with previous stop " + stop);
|
||||
|
||||
if( minPLs == null ) { // if the minPLs vector has not been set yet, create it here by copying the provided genotype's PLs
|
||||
final int[] PL = g.getPL();
|
||||
if( PL.length == 3 ) {
|
||||
minPLs = PL.clone();
|
||||
}
|
||||
} else { // otherwise take the min with the provided genotype's PLs
|
||||
final int[] PL = g.getPL();
|
||||
if( PL.length == 3 ) {
|
||||
minPLs[0] = Math.min(minPLs[0], PL[0]);
|
||||
minPLs[1] = Math.min(minPLs[1], PL[1]);
|
||||
minPLs[2] = Math.min(minPLs[2], PL[2]);
|
||||
}
|
||||
}
|
||||
stop = pos;
|
||||
GQs.add(Math.min(g.getGQ(), 99)); // cap the GQs by the max. of 99 emission
|
||||
DPs.add(g.getDP());
|
||||
|
|
@ -141,6 +156,8 @@ final class HomRefBlock {
|
|||
public int getMinDP() { return MathUtils.arrayMin(DPs); }
|
||||
/** Get the median DP observed within this band */
|
||||
public int getMedianDP() { return MathUtils.median(DPs); }
|
||||
/** Get the min PLs observed within this band, can be null if no PLs have yet been observed */
|
||||
public int[] getMinPLs() { return minPLs; }
|
||||
|
||||
protected int getGQUpperBound() { return maxGQ; }
|
||||
protected int getGQLowerBound() { return minGQ; }
|
||||
|
|
|
|||
|
|
@ -46,7 +46,6 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.haplotypeBAMWriter;
|
||||
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.haplotype.Haplotype;
|
||||
import org.broadinstitute.sting.utils.genotyper.MostLikelyAllele;
|
||||
|
|
|
|||
|
|
@ -77,8 +77,9 @@ public abstract class HaplotypeBAMWriter {
|
|||
protected final static String READ_GROUP_ID = "ArtificialHaplotype";
|
||||
protected final static String HAPLOTYPE_TAG = "HC";
|
||||
|
||||
final ReadDestination output;
|
||||
boolean writeHaplotypesAsWell = true;
|
||||
private final ReadDestination output;
|
||||
private boolean writeHaplotypesAsWell = true;
|
||||
private boolean onlyRealignInformativeReads = false;
|
||||
|
||||
/**
|
||||
* Possible modes for writing haplotypes to BAMs
|
||||
|
|
@ -181,9 +182,16 @@ public abstract class HaplotypeBAMWriter {
|
|||
final Haplotype haplotype,
|
||||
final int referenceStart,
|
||||
final boolean isInformative) {
|
||||
final GATKSAMRecord alignedToRef = createReadAlignedToRef(originalRead, haplotype, referenceStart, isInformative);
|
||||
if ( alignedToRef != null )
|
||||
output.add(alignedToRef);
|
||||
if( onlyRealignInformativeReads && !isInformative ) {
|
||||
if( originalRead != null ) {
|
||||
output.add(originalRead);
|
||||
}
|
||||
} else {
|
||||
final GATKSAMRecord alignedToRef = createReadAlignedToRef(originalRead, haplotype, referenceStart, isInformative);
|
||||
if ( alignedToRef != null ) {
|
||||
output.add(alignedToRef);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -305,7 +313,15 @@ public abstract class HaplotypeBAMWriter {
|
|||
return writeHaplotypesAsWell;
|
||||
}
|
||||
|
||||
public void setWriteHaplotypesAsWell(boolean writeHaplotypesAsWell) {
|
||||
public void setWriteHaplotypesAsWell(final boolean writeHaplotypesAsWell) {
|
||||
this.writeHaplotypesAsWell = writeHaplotypesAsWell;
|
||||
}
|
||||
|
||||
public boolean getOnlyRealignInformativeReads() {
|
||||
return onlyRealignInformativeReads;
|
||||
}
|
||||
|
||||
public void setOnlyRealignInformativeReads(final boolean onlyRealignInformativeReads) {
|
||||
this.onlyRealignInformativeReads = onlyRealignInformativeReads;
|
||||
}
|
||||
}
|
||||
|
|
@ -46,14 +46,25 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.readers.LineIterator;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.vcf.VCFCodec;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||
|
||||
final static String REF = b37KGReference;
|
||||
final static String CEUTRIO_BAM = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam";
|
||||
|
||||
public static String baseTestString() {
|
||||
return "-T VariantAnnotator -R " + b36KGReference + " --no_cmdline_in_header -o %s";
|
||||
}
|
||||
|
|
@ -290,4 +301,48 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
executeTest("Testing InbreedingCoeff annotation with PED file", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStrandBiasBySample() throws IOException {
|
||||
final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, CEUTRIO_BAM) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(""));
|
||||
final File outputVCF = executeTest("testStrandBiasBySample", spec).getFirst().get(0);
|
||||
|
||||
final String baseNoFS = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, CEUTRIO_BAM) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800 -XA FisherStrand -A StrandBiasBySample";
|
||||
final WalkerTestSpec specNoFS = new WalkerTestSpec(baseNoFS, 1, Arrays.asList(""));
|
||||
specNoFS.disableShadowBCF();
|
||||
final File outputVCFNoFS = executeTest("testStrandBiasBySample component stand bias annotation", specNoFS).getFirst().get(0);
|
||||
|
||||
final String baseAnn = String.format("-T VariantAnnotator -R %s -V %s", REF, outputVCFNoFS.getAbsolutePath()) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800 -A FisherStrand";
|
||||
final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList(""));
|
||||
specAnn.disableShadowBCF();
|
||||
final File outputVCFAnn = executeTest("testStrandBiasBySample re-annotation of FisherStrand", specAnn).getFirst().get(0);
|
||||
|
||||
// confirm that the FisherStrand values are identical for the two pipelines
|
||||
final VCFCodec codec = new VCFCodec();
|
||||
final FileInputStream s = new FileInputStream(outputVCF);
|
||||
final LineIterator lineIterator = codec.makeSourceFromStream(new PositionalBufferedStream(s));
|
||||
codec.readHeader(lineIterator);
|
||||
|
||||
final VCFCodec codecAnn = new VCFCodec();
|
||||
final FileInputStream sAnn = new FileInputStream(outputVCFAnn);
|
||||
final LineIterator lineIteratorAnn = codecAnn.makeSourceFromStream(new PositionalBufferedStream(sAnn));
|
||||
codecAnn.readHeader(lineIteratorAnn);
|
||||
|
||||
while( lineIterator.hasNext() && lineIteratorAnn.hasNext() ) {
|
||||
final String line = lineIterator.next();
|
||||
Assert.assertFalse(line == null);
|
||||
final VariantContext vc = codec.decode(line);
|
||||
|
||||
final String lineAnn = lineIteratorAnn.next();
|
||||
Assert.assertFalse(lineAnn == null);
|
||||
final VariantContext vcAnn = codecAnn.decode(lineAnn);
|
||||
|
||||
Assert.assertTrue(vc.hasAttribute("FS"));
|
||||
Assert.assertTrue(vcAnn.hasAttribute("FS"));
|
||||
Assert.assertEquals(vc.getAttributeAsDouble("FS", 0.0), vcAnn.getAttributeAsDouble("FS", -1.0));
|
||||
}
|
||||
|
||||
Assert.assertFalse(lineIterator.hasNext());
|
||||
Assert.assertFalse(lineIteratorAnn.hasNext());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -129,7 +129,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
private List<Allele> getConsensusAlleles(int eventLength, boolean isInsertion, int minCnt, double minFraction, String altBases) {
|
||||
final ConsensusAlleleCounter counter = new ConsensusAlleleCounter(pileupProvider.genomeLocParser, true, minCnt, minFraction);
|
||||
final ConsensusAlleleCounter counter = new ConsensusAlleleCounter(true, minCnt, minFraction);
|
||||
return counter.computeConsensusAlleles(pileupProvider.referenceContext,
|
||||
pileupProvider.getAlignmentContextFromAlleles(isInsertion?eventLength:-eventLength,altBases,numReadsPerAllele),
|
||||
AlignmentContextUtils.ReadOrientation.COMPLETE);
|
||||
|
|
|
|||
|
|
@ -57,18 +57,18 @@ import java.util.List;
|
|||
public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
||||
@DataProvider(name = "MyDataProvider")
|
||||
public Object[][] makeMyDataProvider() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
List<Object[]> tests = new ArrayList<>();
|
||||
|
||||
final String PCRFreeIntervals = "-L 20:10,000,000-10,010,000";
|
||||
final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;
|
||||
|
||||
// this functionality can be adapted to provide input data for whatever you might want in your data
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "3ce9c42e7e97a45a82315523dbd77fcf"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "e32b7fc4de29ed141dcafc0d789d5ed6"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "ecac86e8ef4856e6dfa306c436e9b545"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "c5a55196e10680a02c833a8a44733306"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "9b9923ef41bfc7346c905fdecf918f92"});
|
||||
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.NONE, WExIntervals, "7cb1e431119df00ec243a6a115fa74b8"});
|
||||
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "7828256b82df377cc3a26a55dbf68f91"});
|
||||
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "e41e0acf172a994e938a150390badd39"});
|
||||
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "90e22230149e6c32d1115d0e2f03cab1"});
|
||||
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "b39a4bc19a0acfbade22a011cd229262"});
|
||||
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
|
|
|
|||
|
|
@ -136,16 +136,16 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
}
|
||||
|
||||
private boolean containsDuplicateRecord( final File vcf, final GenomeLocParser parser ) {
|
||||
final List<Pair<GenomeLoc, GenotypingEngine.Event>> VCs = new ArrayList<Pair<GenomeLoc, GenotypingEngine.Event>>();
|
||||
final List<Pair<GenomeLoc, GenotypingEngine.Event>> VCs = new ArrayList<>();
|
||||
try {
|
||||
for( final VariantContext vc : GATKVCFUtils.readVCF(vcf).getSecond() ) {
|
||||
VCs.add(new Pair<GenomeLoc, GenotypingEngine.Event>(parser.createGenomeLoc(vc), new GenotypingEngine.Event(vc)));
|
||||
VCs.add(new Pair<>(parser.createGenomeLoc(vc), new GenotypingEngine.Event(vc)));
|
||||
}
|
||||
} catch( IOException e ) {
|
||||
throw new IllegalStateException("Somehow the temporary VCF from the integration test could not be read.");
|
||||
}
|
||||
|
||||
final Set<Pair<GenomeLoc, GenotypingEngine.Event>> VCsAsSet = new HashSet<Pair<GenomeLoc, GenotypingEngine.Event>>(VCs);
|
||||
final Set<Pair<GenomeLoc, GenotypingEngine.Event>> VCsAsSet = new HashSet<>(VCs);
|
||||
return VCsAsSet.size() != VCs.size(); // The set will remove duplicate Events.
|
||||
}
|
||||
|
||||
|
|
@ -233,7 +233,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestDBSNPAnnotationWGS() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
|
||||
Arrays.asList("f3e636d64042e766cc6515987e85a968"));
|
||||
Arrays.asList("a43d6226a51eb525f0774f88e3778189"));
|
||||
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
@ -256,7 +256,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestAggressivePcrIndelModelWGS() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model AGGRESSIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
|
||||
Arrays.asList("ab49f80783e5db5f9ab6b13ba2ad00cb"));
|
||||
Arrays.asList("19c2992541ede7407192660fdc1fadbf"));
|
||||
executeTest("HC calling with aggressive indel error modeling on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
@ -264,7 +264,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestConservativePcrIndelModelWGS() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
|
||||
Arrays.asList("16f7ffa063511c70bad795639a1c2638"));
|
||||
Arrays.asList("f4ab037915db3a40ba26e9ee30d40e16"));
|
||||
executeTest("HC calling with conservative indel error modeling on WGS intervals", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -58,10 +58,10 @@ import java.util.List;
|
|||
public class HaplotypeCallerParallelIntegrationTest extends WalkerTest {
|
||||
@DataProvider(name = "NCTDataProvider")
|
||||
public Object[][] makeNCTDataProvider() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
List<Object[]> tests = new ArrayList<>();
|
||||
|
||||
for ( final int nct : Arrays.asList(1, 2, 4) ) {
|
||||
tests.add(new Object[]{nct, "e4bf389676fa090c95980349310ba5ca"});
|
||||
tests.add(new Object[]{nct, "29cb04cca87f42b4762c34dfea5d15b7"});
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
|
|||
|
||||
@DataProvider(name = "CalcNIndelInformativeReadsData")
|
||||
public Object[][] makeMyDataProvider() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
List<Object[]> tests = new ArrayList<>();
|
||||
|
||||
{ // very basic testing
|
||||
final String ref = "ACGT";
|
||||
|
|
@ -187,7 +187,7 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
|
|||
Assert.assertEquals(prev.getAsPLs(), new int[]{0, 0, 0});
|
||||
Assert.assertEquals(-10 * prev.getLog10GQ(GenotypeType.HOM_REF), 0.0);
|
||||
|
||||
for ( int i = 1; i < 10000; i++ ) {
|
||||
for ( int i = 1; i <= ReferenceConfidenceModel.MAX_N_INDEL_INFORMATIVE_READS; i++ ) {
|
||||
final GenotypeLikelihoods current = model.getIndelPLs(i);
|
||||
final double prevGQ = -10 * prev.getLog10GQ(GenotypeType.HOM_REF);
|
||||
final double currGQ = -10 * current.getLog10GQ(GenotypeType.HOM_REF);
|
||||
|
|
@ -379,7 +379,7 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
|
|||
Assert.assertEquals(refModel.getEnd(), loc.getStart() + i);
|
||||
Assert.assertFalse(refModel.hasLog10PError());
|
||||
Assert.assertEquals(refModel.getAlternateAlleles().size(), 1);
|
||||
Assert.assertEquals(refModel.getAlternateAllele(0), ReferenceConfidenceModel.NON_REF_SYMBOLIC_ALLELE);
|
||||
Assert.assertEquals(refModel.getAlternateAllele(0), GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
Assert.assertTrue(refModel.hasGenotype(sample));
|
||||
|
||||
final Genotype g = refModel.getGenotype(sample);
|
||||
|
|
@ -388,7 +388,6 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
|
|||
Assert.assertEquals(g.getDP(), expectedDP);
|
||||
Assert.assertTrue(g.hasGQ());
|
||||
Assert.assertTrue(g.hasPL());
|
||||
Assert.assertTrue(g.hasExtendedAttribute(ReferenceConfidenceModel.INDEL_INFORMATIVE_DEPTH));
|
||||
}
|
||||
|
||||
final VariantContext vc = call == null ? refModel : call;
|
||||
|
|
|
|||
|
|
@ -67,7 +67,11 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
// TODO TODO TODO TODO TODO TODO TODO TODO
|
||||
//
|
||||
private static String baseTestString(String args) {
|
||||
return "-T CombineVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s -R " + b36KGReference + args;
|
||||
return baseTestString(args, b36KGReference);
|
||||
}
|
||||
|
||||
private static String baseTestString(String args, String ref) {
|
||||
return "-T CombineVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s -R " + ref + args;
|
||||
//return "-T CombineVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s -U LENIENT_VCF_PROCESSING -R " + b36KGReference + args;
|
||||
}
|
||||
|
||||
|
|
@ -181,6 +185,19 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "46bbbbb8fc9ae6467a4f8fe35b8d7d14"); }
|
||||
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "46bbbbb8fc9ae6467a4f8fe35b8d7d14"); }
|
||||
|
||||
@Test public void combineSingleSamplePipelineGVCF() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -V:sample1 " + privateTestDir + "combine.single.sample.pipeline.1.vcf" +
|
||||
" -V:sample2 " + privateTestDir + "combine.single.sample.pipeline.2.vcf" +
|
||||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" -multipleAllelesMergeType MIX_TYPES" +
|
||||
" --excludeNonVariants -combineAnnotations -setKey null" +
|
||||
" -L 20:10,000,000-10,001,000", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("2e15db35359144683f1e58e147362679"));
|
||||
cvExecuteTest("combineSingleSamplePipelineGVCF", spec, true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void combineDBSNPDuplicateSites() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
|
|
|
|||
|
|
@ -49,7 +49,6 @@ package org.broadinstitute.sting.utils.gvcf;
|
|||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.ReferenceConfidenceModel;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
|
|
@ -91,7 +90,7 @@ public class GVCFWriterUnitTest extends BaseTest {
|
|||
private List<Integer> standardPartition = Arrays.asList(1, 10, 20);
|
||||
private Allele REF = Allele.create("N", true);
|
||||
private Allele ALT = Allele.create("A");
|
||||
private List<Allele> ALLELES = Arrays.asList(REF, ReferenceConfidenceModel.NON_REF_SYMBOLIC_ALLELE);
|
||||
private List<Allele> ALLELES = Arrays.asList(REF, GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
private final String SAMPLE_NAME = "XXYYZZ";
|
||||
|
||||
@BeforeMethod
|
||||
|
|
@ -223,10 +222,10 @@ public class GVCFWriterUnitTest extends BaseTest {
|
|||
Assert.assertEquals(vc.getStart(), start);
|
||||
Assert.assertEquals(vc.getEnd(), stop);
|
||||
if ( nonRef ) {
|
||||
Assert.assertNotEquals(vc.getAlternateAllele(0), ReferenceConfidenceModel.NON_REF_SYMBOLIC_ALLELE);
|
||||
Assert.assertNotEquals(vc.getAlternateAllele(0), GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
} else {
|
||||
Assert.assertEquals(vc.getNAlleles(), 2);
|
||||
Assert.assertEquals(vc.getAlternateAllele(0), ReferenceConfidenceModel.NON_REF_SYMBOLIC_ALLELE);
|
||||
Assert.assertEquals(vc.getAlternateAllele(0), GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
Assert.assertEquals(vc.getAttributeAsInt(GVCFWriter.BLOCK_SIZE_INFO_FIELD, -1), stop - start + 1);
|
||||
Assert.assertEquals(vc.getAttributeAsInt(VCFConstants.END_KEY, -1), stop);
|
||||
Assert.assertTrue(vc.hasGenotypes());
|
||||
|
|
@ -234,8 +233,9 @@ public class GVCFWriterUnitTest extends BaseTest {
|
|||
Assert.assertEquals(vc.getGenotypes().size(), 1);
|
||||
final Genotype g = vc.getGenotype(SAMPLE_NAME);
|
||||
Assert.assertEquals(g.hasAD(), false);
|
||||
Assert.assertEquals(g.hasLikelihoods(), false);
|
||||
Assert.assertEquals(g.hasPL(), false);
|
||||
Assert.assertEquals(g.hasLikelihoods(), true);
|
||||
Assert.assertEquals(g.hasPL(), true);
|
||||
Assert.assertEquals(g.getPL().length == 3, true);
|
||||
Assert.assertEquals(g.hasDP(), true);
|
||||
Assert.assertEquals(g.hasGQ(), true);
|
||||
}
|
||||
|
|
@ -307,7 +307,7 @@ public class GVCFWriterUnitTest extends BaseTest {
|
|||
|
||||
@DataProvider(name = "BandPartitionData")
|
||||
public Object[][] makeBandPartitionData() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
List<Object[]> tests = new ArrayList<>();
|
||||
|
||||
tests.add(new Object[]{null, false});
|
||||
tests.add(new Object[]{Collections.emptyList(), false});
|
||||
|
|
|
|||
|
|
@ -85,32 +85,34 @@ public class HomRefBlockUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testMinMedian() {
|
||||
//TODO - might be better to make this test use a data provider?
|
||||
final HomRefBlock band = new HomRefBlock(vc, 10, 20);
|
||||
final GenotypeBuilder gb = new GenotypeBuilder("NA12878");
|
||||
|
||||
int pos = vc.getStart();
|
||||
band.add(pos++, gb.DP(10).GQ(11).make());
|
||||
band.add(pos++, gb.DP(10).GQ(11).PL(new int[]{0,11,100}).make());
|
||||
Assert.assertEquals(band.getStop(), pos - 1);
|
||||
assertValues(band, 10, 10, 11, 11);
|
||||
|
||||
band.add(pos++, gb.DP(11).GQ(10).make());
|
||||
band.add(pos++, gb.DP(11).GQ(10).PL(new int[]{0,10,100}).make());
|
||||
Assert.assertEquals(band.getStop(), pos - 1);
|
||||
assertValues(band, 10, 11, 10, 11);
|
||||
|
||||
band.add(pos++, gb.DP(12).GQ(12).make());
|
||||
band.add(pos++, gb.DP(12).GQ(12).PL(new int[]{0,12,100}).make());
|
||||
Assert.assertEquals(band.getStop(), pos - 1);
|
||||
assertValues(band, 10, 11, 10, 11);
|
||||
|
||||
band.add(pos++, gb.DP(13).GQ(15).make());
|
||||
band.add(pos++, gb.DP(13).GQ(15).PL(new int[]{0,15,100}).make());
|
||||
Assert.assertEquals(band.getStop(), pos - 1);
|
||||
band.add(pos++, gb.DP(14).GQ(16).make());
|
||||
band.add(pos++, gb.DP(14).GQ(16).PL(new int[]{0,16,100}).make());
|
||||
Assert.assertEquals(band.getStop(), pos - 1);
|
||||
band.add(pos++, gb.DP(15).GQ(17).make());
|
||||
band.add(pos++, gb.DP(15).GQ(17).PL(new int[]{0,17,100}).make());
|
||||
Assert.assertEquals(band.getStop(), pos - 1);
|
||||
band.add(pos++, gb.DP(16).GQ(18).make());
|
||||
band.add(pos++, gb.DP(16).GQ(18).PL(new int[]{0,18,100}).make());
|
||||
Assert.assertEquals(band.getStop(), pos - 1);
|
||||
assertValues(band, 10, 13, 10, 15);
|
||||
Assert.assertEquals(band.getSize(), pos - vc.getStart());
|
||||
Assert.assertTrue(Arrays.equals(band.getMinPLs(), new int[]{0,10,100}));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -118,7 +120,7 @@ public class HomRefBlockUnitTest extends BaseTest {
|
|||
final HomRefBlock band = new HomRefBlock(vc, 10, 20);
|
||||
final GenotypeBuilder gb = new GenotypeBuilder("NA12878");
|
||||
|
||||
band.add(vc.getStart(), gb.DP(1000).GQ(1000).make());
|
||||
band.add(vc.getStart(), gb.DP(1000).GQ(1000).PL(new int[]{0,10,100}).make());
|
||||
assertValues(band, 1000, 1000, 99, 99);
|
||||
}
|
||||
|
||||
|
|
@ -127,7 +129,7 @@ public class HomRefBlockUnitTest extends BaseTest {
|
|||
final HomRefBlock band = new HomRefBlock(vc, 10, 20);
|
||||
final GenotypeBuilder gb = new GenotypeBuilder("NA12878");
|
||||
|
||||
band.add(vc.getStart() + 10, gb.DP(10).GQ(11).make());
|
||||
band.add(vc.getStart() + 10, gb.DP(10).GQ(11).PL(new int[]{0,10,100}).make());
|
||||
}
|
||||
|
||||
private void assertValues(final HomRefBlock band, final int minDP, final int medianDP, final int minGQ, final int medianGQ) {
|
||||
|
|
@ -140,7 +142,7 @@ public class HomRefBlockUnitTest extends BaseTest {
|
|||
|
||||
@DataProvider(name = "ContiguousData")
|
||||
public Object[][] makeContiguousData() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
List<Object[]> tests = new ArrayList<>();
|
||||
|
||||
for ( final String chrMod : Arrays.asList("", ".mismatch") ) {
|
||||
for ( final int offset : Arrays.asList(-10, -1, 0, 1, 10) ) {
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
|||
private int maxRegionSize = -1;
|
||||
private int minRegionSize = -1;
|
||||
|
||||
private final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
|
||||
private final LinkedList<ActiveRegion> workQueue = new LinkedList<>();
|
||||
|
||||
private TAROrderedReadCache myReads = null;
|
||||
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
|
|
@ -83,6 +84,7 @@ import java.util.*;
|
|||
@Requires(value={})
|
||||
@Allows(value={DataSource.READS, DataSource.REFERENCE})
|
||||
@Reference(window=@Window(start=-50,stop=50))
|
||||
@Downsample(by= DownsampleType.BY_SAMPLE, toCoverage=250)
|
||||
@By(DataSource.REFERENCE)
|
||||
public class VariantAnnotator extends RodWalker<Integer, Integer> implements AnnotatorCompatible, TreeReducible<Integer> {
|
||||
|
||||
|
|
@ -132,21 +134,21 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
* See the -list argument to view available annotations.
|
||||
*/
|
||||
@Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false)
|
||||
protected List<String> annotationsToUse = new ArrayList<String>();
|
||||
protected List<String> annotationsToUse = new ArrayList<>();
|
||||
|
||||
/**
|
||||
* Note that this argument has higher priority than the -A or -G arguments,
|
||||
* so annotations will be excluded even if they are explicitly included with the other options.
|
||||
*/
|
||||
@Argument(fullName="excludeAnnotation", shortName="XA", doc="One or more specific annotations to exclude", required=false)
|
||||
protected List<String> annotationsToExclude = new ArrayList<String>();
|
||||
protected List<String> annotationsToExclude = new ArrayList<>();
|
||||
|
||||
/**
|
||||
* If specified, all available annotations in the group will be applied. See the VariantAnnotator -list argument to view available groups.
|
||||
* Keep in mind that RODRequiringAnnotations are not intended to be used as a group, because they require specific ROD inputs.
|
||||
*/
|
||||
@Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false)
|
||||
protected List<String> annotationGroupsToUse = new ArrayList<String>();
|
||||
protected List<String> annotationGroupsToUse = new ArrayList<>();
|
||||
|
||||
/**
|
||||
* This option enables you to add annotations from one VCF to another.
|
||||
|
|
@ -193,8 +195,8 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
}
|
||||
|
||||
// get the list of all sample names from the variant VCF input rod, if applicable
|
||||
List<String> rodName = Arrays.asList(variantCollection.variants.getName());
|
||||
Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName);
|
||||
final List<String> rodName = Arrays.asList(variantCollection.variants.getName());
|
||||
final Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName);
|
||||
|
||||
if ( USE_ALL_ANNOTATIONS )
|
||||
engine = new VariantAnnotatorEngine(annotationsToExclude, this, getToolkit());
|
||||
|
|
@ -204,23 +206,23 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
|
||||
// setup the header fields
|
||||
// note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
final Set<VCFHeaderLine> hInfo = new HashSet<>();
|
||||
hInfo.addAll(engine.getVCFAnnotationDescriptions());
|
||||
for ( VCFHeaderLine line : GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variantCollection.variants.getName())) ) {
|
||||
for ( final VCFHeaderLine line : GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variantCollection.variants.getName())) ) {
|
||||
if ( isUniqueHeaderLine(line, hInfo) )
|
||||
hInfo.add(line);
|
||||
}
|
||||
// for the expressions, pull the info header line from the header of the resource rod
|
||||
for ( VariantAnnotatorEngine.VAExpression expression : engine.getRequestedExpressions() ) {
|
||||
for ( final VariantAnnotatorEngine.VAExpression expression : engine.getRequestedExpressions() ) {
|
||||
// special case the ID field
|
||||
if ( expression.fieldName.equals("ID") ) {
|
||||
hInfo.add(new VCFInfoHeaderLine(expression.fullName, 1, VCFHeaderLineType.String, "ID field transferred from external VCF resource"));
|
||||
continue;
|
||||
}
|
||||
VCFInfoHeaderLine targetHeaderLine = null;
|
||||
for ( VCFHeaderLine line : GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(expression.binding.getName())) ) {
|
||||
for ( final VCFHeaderLine line : GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(expression.binding.getName())) ) {
|
||||
if ( line instanceof VCFInfoHeaderLine ) {
|
||||
VCFInfoHeaderLine infoline = (VCFInfoHeaderLine)line;
|
||||
final VCFInfoHeaderLine infoline = (VCFInfoHeaderLine)line;
|
||||
if ( infoline.getID().equals(expression.fieldName) ) {
|
||||
targetHeaderLine = infoline;
|
||||
break;
|
||||
|
|
@ -285,7 +287,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
Map<String, AlignmentContext> stratifiedContexts;
|
||||
if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) {
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getBasePileup());
|
||||
annotatedVCs = new ArrayList<VariantContext>(VCs.size());
|
||||
annotatedVCs = new ArrayList<>(VCs.size());
|
||||
for ( VariantContext vc : VCs )
|
||||
annotatedVCs.add(engine.annotateContext(tracker, ref, stratifiedContexts, vc));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -58,15 +58,15 @@ public class VariantAnnotatorEngine {
|
|||
public RodBinding<VariantContext> binding;
|
||||
|
||||
public VAExpression(String fullExpression, List<RodBinding<VariantContext>> bindings) {
|
||||
int indexOfDot = fullExpression.lastIndexOf(".");
|
||||
final int indexOfDot = fullExpression.lastIndexOf(".");
|
||||
if ( indexOfDot == -1 )
|
||||
throw new UserException.BadArgumentValue(fullExpression, "it should be in rodname.value format");
|
||||
|
||||
fullName = fullExpression;
|
||||
fieldName = fullExpression.substring(indexOfDot+1);
|
||||
|
||||
String bindingName = fullExpression.substring(0, indexOfDot);
|
||||
for ( RodBinding<VariantContext> rod : bindings ) {
|
||||
final String bindingName = fullExpression.substring(0, indexOfDot);
|
||||
for ( final RodBinding<VariantContext> rod : bindings ) {
|
||||
if ( rod.getName().equals(bindingName) ) {
|
||||
binding = rod;
|
||||
break;
|
||||
|
|
@ -96,7 +96,7 @@ public class VariantAnnotatorEngine {
|
|||
// select specific expressions to use
|
||||
public void initializeExpressions(Set<String> expressionsToUse) {
|
||||
// set up the expressions
|
||||
for ( String expression : expressionsToUse )
|
||||
for ( final String expression : expressionsToUse )
|
||||
requestedExpressions.add(new VAExpression(expression, walker.getResourceRodBindings()));
|
||||
}
|
||||
|
||||
|
|
@ -113,15 +113,15 @@ public class VariantAnnotatorEngine {
|
|||
if ( annotationsToExclude.size() == 0 )
|
||||
return;
|
||||
|
||||
List<InfoFieldAnnotation> tempRequestedInfoAnnotations = new ArrayList<InfoFieldAnnotation>(requestedInfoAnnotations.size());
|
||||
for ( InfoFieldAnnotation annotation : requestedInfoAnnotations ) {
|
||||
final List<InfoFieldAnnotation> tempRequestedInfoAnnotations = new ArrayList<>(requestedInfoAnnotations.size());
|
||||
for ( final InfoFieldAnnotation annotation : requestedInfoAnnotations ) {
|
||||
if ( !annotationsToExclude.contains(annotation.getClass().getSimpleName()) )
|
||||
tempRequestedInfoAnnotations.add(annotation);
|
||||
}
|
||||
requestedInfoAnnotations = tempRequestedInfoAnnotations;
|
||||
|
||||
List<GenotypeAnnotation> tempRequestedGenotypeAnnotations = new ArrayList<GenotypeAnnotation>(requestedGenotypeAnnotations.size());
|
||||
for ( GenotypeAnnotation annotation : requestedGenotypeAnnotations ) {
|
||||
final List<GenotypeAnnotation> tempRequestedGenotypeAnnotations = new ArrayList<>(requestedGenotypeAnnotations.size());
|
||||
for ( final GenotypeAnnotation annotation : requestedGenotypeAnnotations ) {
|
||||
if ( !annotationsToExclude.contains(annotation.getClass().getSimpleName()) )
|
||||
tempRequestedGenotypeAnnotations.add(annotation);
|
||||
}
|
||||
|
|
@ -143,24 +143,24 @@ public class VariantAnnotatorEngine {
|
|||
variantOverlapAnnotator = new VariantOverlapAnnotator(dbSNPBinding, overlapBindings, engine.getGenomeLocParser());
|
||||
}
|
||||
|
||||
public void invokeAnnotationInitializationMethods( Set<VCFHeaderLine> headerLines ) {
|
||||
for ( VariantAnnotatorAnnotation annotation : requestedInfoAnnotations ) {
|
||||
public void invokeAnnotationInitializationMethods( final Set<VCFHeaderLine> headerLines ) {
|
||||
for ( final VariantAnnotatorAnnotation annotation : requestedInfoAnnotations ) {
|
||||
annotation.initialize(walker, toolkit, headerLines);
|
||||
}
|
||||
|
||||
for ( VariantAnnotatorAnnotation annotation : requestedGenotypeAnnotations ) {
|
||||
for ( final VariantAnnotatorAnnotation annotation : requestedGenotypeAnnotations ) {
|
||||
annotation.initialize(walker, toolkit, headerLines);
|
||||
}
|
||||
}
|
||||
|
||||
public Set<VCFHeaderLine> getVCFAnnotationDescriptions() {
|
||||
Set<VCFHeaderLine> descriptions = new HashSet<VCFHeaderLine>();
|
||||
final Set<VCFHeaderLine> descriptions = new HashSet<>();
|
||||
|
||||
for ( InfoFieldAnnotation annotation : requestedInfoAnnotations )
|
||||
for ( final InfoFieldAnnotation annotation : requestedInfoAnnotations )
|
||||
descriptions.addAll(annotation.getDescriptions());
|
||||
for ( GenotypeAnnotation annotation : requestedGenotypeAnnotations )
|
||||
for ( final GenotypeAnnotation annotation : requestedGenotypeAnnotations )
|
||||
descriptions.addAll(annotation.getDescriptions());
|
||||
for ( String db : variantOverlapAnnotator.getOverlapNames() ) {
|
||||
for ( final String db : variantOverlapAnnotator.getOverlapNames() ) {
|
||||
if ( VCFStandardHeaderLines.getInfoLine(db, false) != null )
|
||||
descriptions.add(VCFStandardHeaderLines.getInfoLine(db));
|
||||
else
|
||||
|
|
@ -170,10 +170,10 @@ public class VariantAnnotatorEngine {
|
|||
return descriptions;
|
||||
}
|
||||
|
||||
public VariantContext annotateContext(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
VariantContext vc) {
|
||||
public VariantContext annotateContext(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc) {
|
||||
return annotateContext(tracker, ref, stratifiedContexts, vc, null);
|
||||
}
|
||||
|
||||
|
|
@ -182,20 +182,20 @@ public class VariantAnnotatorEngine {
|
|||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
Map<String, Object> infoAnnotations = new LinkedHashMap<String, Object>(vc.getAttributes());
|
||||
final Map<String, Object> infoAnnotations = new LinkedHashMap<>(vc.getAttributes());
|
||||
|
||||
// annotate expressions where available
|
||||
annotateExpressions(tracker, ref.getLocus(), infoAnnotations);
|
||||
|
||||
// go through all the requested info annotationTypes
|
||||
for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations ) {
|
||||
Map<String, Object> annotationsFromCurrentType = annotationType.annotate(tracker, walker, ref, stratifiedContexts, vc, perReadAlleleLikelihoodMap);
|
||||
for ( final InfoFieldAnnotation annotationType : requestedInfoAnnotations ) {
|
||||
final Map<String, Object> annotationsFromCurrentType = annotationType.annotate(tracker, walker, ref, stratifiedContexts, vc, perReadAlleleLikelihoodMap);
|
||||
if ( annotationsFromCurrentType != null )
|
||||
infoAnnotations.putAll(annotationsFromCurrentType);
|
||||
}
|
||||
|
||||
// generate a new annotated VC
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vc).attributes(infoAnnotations);
|
||||
final VariantContextBuilder builder = new VariantContextBuilder(vc).attributes(infoAnnotations);
|
||||
|
||||
// annotate genotypes, creating another new VC in the process
|
||||
final VariantContext annotated = builder.genotypes(annotateGenotypes(tracker, ref, stratifiedContexts, vc, perReadAlleleLikelihoodMap)).make();
|
||||
|
|
@ -210,11 +210,11 @@ public class VariantAnnotatorEngine {
|
|||
final Map<String, Object> infoAnnotations = new LinkedHashMap<>(vc.getAttributes());
|
||||
|
||||
// go through all the requested info annotationTypes
|
||||
for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations ) {
|
||||
for ( final InfoFieldAnnotation annotationType : requestedInfoAnnotations ) {
|
||||
if ( !(annotationType instanceof ActiveRegionBasedAnnotation) )
|
||||
continue;
|
||||
|
||||
Map<String, Object> annotationsFromCurrentType = annotationType.annotate(perReadAlleleLikelihoodMap, vc);
|
||||
final Map<String, Object> annotationsFromCurrentType = annotationType.annotate(perReadAlleleLikelihoodMap, vc);
|
||||
if ( annotationsFromCurrentType != null ) {
|
||||
infoAnnotations.putAll(annotationsFromCurrentType);
|
||||
}
|
||||
|
|
@ -244,12 +244,12 @@ public class VariantAnnotatorEngine {
|
|||
}
|
||||
|
||||
private void annotateExpressions(final RefMetaDataTracker tracker, final GenomeLoc loc, final Map<String, Object> infoAnnotations) {
|
||||
for ( VAExpression expression : requestedExpressions ) {
|
||||
Collection<VariantContext> VCs = tracker.getValues(expression.binding, loc);
|
||||
for ( final VAExpression expression : requestedExpressions ) {
|
||||
final Collection<VariantContext> VCs = tracker.getValues(expression.binding, loc);
|
||||
if ( VCs.size() == 0 )
|
||||
continue;
|
||||
|
||||
VariantContext vc = VCs.iterator().next();
|
||||
final VariantContext vc = VCs.iterator().next();
|
||||
// special-case the ID field
|
||||
if ( expression.fieldName.equals("ID") ) {
|
||||
if ( vc.hasID() )
|
||||
|
|
|
|||
|
|
@ -164,6 +164,9 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
@Argument(fullName="minimalVCF", shortName="minimalVCF", doc="If true, then the output VCF will contain no INFO or genotype FORMAT fields", required=false)
|
||||
public boolean minimalVCF = false;
|
||||
|
||||
@Argument(fullName="excludeNonVariants", shortName="env", doc="Don't include loci found to be non-variant after the combining procedure", required=false)
|
||||
public boolean EXCLUDE_NON_VARIANTS = false;
|
||||
|
||||
/**
|
||||
* Set to 'null' if you don't want the set field emitted.
|
||||
*/
|
||||
|
|
@ -171,7 +174,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
public String SET_KEY = "set";
|
||||
|
||||
/**
|
||||
* This option allows the user to perform a simple merge (concatenation) to combine the VCFs, drastically reducing the runtime..
|
||||
* This option allows the user to perform a simple merge (concatenation) to combine the VCFs, drastically reducing the runtime.
|
||||
*/
|
||||
@Argument(fullName="assumeIdenticalSamples", shortName="assumeIdenticalSamples", doc="If true, assume input VCFs have identical sample sets and disjoint calls", required=false)
|
||||
public boolean ASSUME_IDENTICAL_SAMPLES = false;
|
||||
|
|
@ -188,6 +191,9 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
@Argument(fullName="mergeInfoWithMaxAC", shortName="mergeInfoWithMaxAC", doc="If true, when VCF records overlap the info field is taken from the one with the max AC instead of only taking the fields which are identical across the overlapping records.", required=false)
|
||||
public boolean MERGE_INFO_WITH_MAX_AC = false;
|
||||
|
||||
@Argument(fullName="combineAnnotations", shortName="combineAnnotations", doc="If true, combine the annotation values in some straightforward manner assuming the input callsets are i.i.d.", required=false)
|
||||
public boolean COMBINE_ANNOTATIONS = false;
|
||||
|
||||
private List<String> priority = null;
|
||||
|
||||
/** Optimization to strip out genotypes before merging if we are doing a sites_only output */
|
||||
|
|
@ -238,7 +244,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
throw new UserException.MissingArgument("rod_priority_list", "Priority string must be provided if you want to prioritize genotypes");
|
||||
|
||||
if ( PRIORITY_STRING != null){
|
||||
priority = new ArrayList<String>(Arrays.asList(PRIORITY_STRING.split(",")));
|
||||
priority = new ArrayList<>(Arrays.asList(PRIORITY_STRING.split(",")));
|
||||
if ( rodNames.size() != priority.size() )
|
||||
throw new UserException.BadArgumentValue("rod_priority_list", "The priority list must contain exactly one rod binding per ROD provided to the GATK: rodNames=" + rodNames + " priority=" + priority);
|
||||
|
||||
|
|
@ -252,13 +258,16 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
if ( tracker == null ) // RodWalkers can make funky map calls
|
||||
return 0;
|
||||
|
||||
Set<String> rodNames = SampleUtils.getRodNamesWithVCFHeader(getToolkit(), null);
|
||||
final Set<String> rodNames = SampleUtils.getRodNamesWithVCFHeader(getToolkit(), null);
|
||||
// get all of the vcf rods at this locus
|
||||
// Need to provide reference bases to simpleMerge starting at current locus
|
||||
Collection<VariantContext> vcs = tracker.getValues(variants, context.getLocation());
|
||||
Collection<VariantContext> potentialRefVCs = tracker.getValues(variants);
|
||||
potentialRefVCs.removeAll(vcs);
|
||||
|
||||
if ( sitesOnlyVCF ) {
|
||||
vcs = VariantContextUtils.sitesOnlyVariantContexts(vcs);
|
||||
potentialRefVCs = VariantContextUtils.sitesOnlyVariantContexts(potentialRefVCs);
|
||||
}
|
||||
|
||||
if ( ASSUME_IDENTICAL_SAMPLES ) {
|
||||
|
|
@ -270,7 +279,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
}
|
||||
|
||||
int numFilteredRecords = 0;
|
||||
for (VariantContext vc : vcs) {
|
||||
for (final VariantContext vc : vcs) {
|
||||
if (vc.filtersWereApplied() && vc.isFiltered())
|
||||
numFilteredRecords++;
|
||||
}
|
||||
|
|
@ -278,16 +287,16 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
if (minimumN > 1 && (vcs.size() - numFilteredRecords < minimumN))
|
||||
return 0;
|
||||
|
||||
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
|
||||
final List<VariantContext> mergedVCs = new ArrayList<>();
|
||||
|
||||
if (multipleAllelesMergeType == GATKVariantContextUtils.MultipleAllelesMergeType.BY_TYPE) {
|
||||
Map<VariantContext.Type, List<VariantContext>> VCsByType = GATKVariantContextUtils.separateVariantContextsByType(vcs);
|
||||
final Map<VariantContext.Type, List<VariantContext>> VCsByType = GATKVariantContextUtils.separateVariantContextsByType(vcs);
|
||||
|
||||
// TODO -- clean this up in a refactoring
|
||||
// merge NO_VARIATION into another type of variant (based on the ordering in VariantContext.Type)
|
||||
if ( VCsByType.containsKey(VariantContext.Type.NO_VARIATION) && VCsByType.size() > 1 ) {
|
||||
final List<VariantContext> refs = VCsByType.remove(VariantContext.Type.NO_VARIATION);
|
||||
for ( VariantContext.Type type : VariantContext.Type.values() ) {
|
||||
for ( final VariantContext.Type type : VariantContext.Type.values() ) {
|
||||
if ( VCsByType.containsKey(type) ) {
|
||||
VCsByType.get(type).addAll(refs);
|
||||
break;
|
||||
|
|
@ -296,23 +305,27 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
}
|
||||
|
||||
// iterate over the types so that it's deterministic
|
||||
for (VariantContext.Type type : VariantContext.Type.values()) {
|
||||
if (VCsByType.containsKey(type))
|
||||
mergedVCs.add(GATKVariantContextUtils.simpleMerge(VCsByType.get(type),
|
||||
priority, rodNames.size(), filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
|
||||
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
|
||||
for (final VariantContext.Type type : VariantContext.Type.values()) {
|
||||
// make sure that it is a variant or in case it is not, that we want to include the sites with no variants
|
||||
if (!EXCLUDE_NON_VARIANTS || !type.equals(VariantContext.Type.NO_VARIATION)) {
|
||||
if (VCsByType.containsKey(type)) {
|
||||
mergedVCs.add(GATKVariantContextUtils.simpleMerge(VCsByType.get(type), potentialRefVCs,
|
||||
priority, rodNames.size(), filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
|
||||
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC, COMBINE_ANNOTATIONS));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (multipleAllelesMergeType == GATKVariantContextUtils.MultipleAllelesMergeType.MIX_TYPES) {
|
||||
mergedVCs.add(GATKVariantContextUtils.simpleMerge(vcs,
|
||||
mergedVCs.add(GATKVariantContextUtils.simpleMerge(vcs, potentialRefVCs,
|
||||
priority, rodNames.size(), filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
|
||||
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
|
||||
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC, COMBINE_ANNOTATIONS));
|
||||
}
|
||||
else {
|
||||
logger.warn("Ignoring all records at site " + ref.getLocus());
|
||||
}
|
||||
|
||||
for ( VariantContext mergedVC : mergedVCs ) {
|
||||
for ( final VariantContext mergedVC : mergedVCs ) {
|
||||
// only operate at the start of events
|
||||
if ( mergedVC == null )
|
||||
continue;
|
||||
|
|
@ -320,9 +333,12 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
final VariantContextBuilder builder = new VariantContextBuilder(mergedVC);
|
||||
// re-compute chromosome counts
|
||||
VariantContextUtils.calculateChromosomeCounts(builder, false);
|
||||
|
||||
if ( minimalVCF )
|
||||
GATKVariantContextUtils.pruneVariantContext(builder, Arrays.asList(SET_KEY));
|
||||
vcfWriter.add(builder.make());
|
||||
final VariantContext vc = builder.make();
|
||||
if( !EXCLUDE_NON_VARIANTS || vc.isPolymorphicInSamples() )
|
||||
vcfWriter.add(builder.make());
|
||||
}
|
||||
|
||||
return vcs.isEmpty() ? 0 : 1;
|
||||
|
|
|
|||
|
|
@ -845,17 +845,29 @@ public class MathUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* Compute the median element of the array of integers
|
||||
* Compute the median element of the list of integers
|
||||
* @param array a list of integers
|
||||
* @return the median element
|
||||
*/
|
||||
public static int median(final List<Integer> array) {
|
||||
public static <T extends Comparable<? super T>> T median(final List<T> array) {
|
||||
/* TODO -- from Valentin
|
||||
the current implementation is not the usual median when the input is of even length. More concretely it returns the ith element of the list where i = floor(input.size() / 2).
|
||||
|
||||
But actually that is not the "usual" definition of a median, as it is supposed to return the average of the two middle values when the sample length is an even number (i.e. median(1,2,3,4,5,6) == 3.5). [Sources: R and wikipedia]
|
||||
|
||||
My suggestion for a solution is then:
|
||||
|
||||
unify median and medianDoubles to public static <T extends Number> T median(Collection<T>)
|
||||
check on null elements and throw an exception if there are any or perhaps return a null; documented in the javadoc.
|
||||
relocate, rename and refactor MathUtils.median(X) to Utils.ithElement(X,X.size()/2)
|
||||
In addition, the current median implementation sorts the whole input list witch is O(n log n). However find out the ith element (thus calculate the median) can be done in O(n)
|
||||
*/
|
||||
if ( array == null ) throw new IllegalArgumentException("Array must be non-null");
|
||||
final int size = array.size();
|
||||
if ( size == 0 ) throw new IllegalArgumentException("Array cannot have size 0");
|
||||
else if ( size == 1 ) return array.get(0);
|
||||
else {
|
||||
final ArrayList<Integer> sorted = new ArrayList<>(array);
|
||||
final ArrayList<T> sorted = new ArrayList<>(array);
|
||||
Collections.sort(sorted);
|
||||
return sorted.get(size / 2);
|
||||
}
|
||||
|
|
@ -1405,7 +1417,7 @@ public class MathUtils {
|
|||
* @return
|
||||
*/
|
||||
public static List<Integer> log10LinearRange(final int start, final int stop, final double eps) {
|
||||
final LinkedList<Integer> values = new LinkedList<Integer>();
|
||||
final LinkedList<Integer> values = new LinkedList<>();
|
||||
final double log10range = Math.log10(stop - start);
|
||||
|
||||
if ( start == 0 )
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ import java.util.List;
|
|||
* Time: 8:54:05 AM
|
||||
*/
|
||||
public class PileupElement implements Comparable<PileupElement> {
|
||||
private final static LinkedList<CigarElement> EMPTY_LINKED_LIST = new LinkedList<CigarElement>();
|
||||
private final static LinkedList<CigarElement> EMPTY_LINKED_LIST = new LinkedList<>();
|
||||
|
||||
private final static EnumSet<CigarOperator> ON_GENOME_OPERATORS =
|
||||
EnumSet.of(CigarOperator.M, CigarOperator.EQ, CigarOperator.X, CigarOperator.D);
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ import net.sf.samtools.Cigar;
|
|||
/**
|
||||
* Generic interface for SmithWaterman calculations
|
||||
*
|
||||
* This interface allows clients to use a generic SmithWaterman variable, without propogating the specific
|
||||
* This interface allows clients to use a generic SmithWaterman variable, without propagating the specific
|
||||
* implementation of SmithWaterman throughout their code:
|
||||
*
|
||||
* SmithWaterman sw = new SpecificSmithWatermanImplementation(ref, read, params)
|
||||
|
|
|
|||
|
|
@ -45,7 +45,11 @@ public class GATKVariantContextUtils {
|
|||
|
||||
public static final int DEFAULT_PLOIDY = 2;
|
||||
public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
|
||||
protected static final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
|
||||
public final static List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
public final static String NON_REF_SYMBOLIC_ALLELE_NAME = "NON_REF";
|
||||
public final static Allele NON_REF_SYMBOLIC_ALLELE = Allele.create("<"+NON_REF_SYMBOLIC_ALLELE_NAME+">", false); // represents any possible non-ref allele at this site
|
||||
|
||||
public final static String MERGE_FILTER_PREFIX = "filterIn";
|
||||
public final static String MERGE_REF_IN_ALL = "ReferenceInAll";
|
||||
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
|
||||
|
|
@ -108,7 +112,7 @@ public class GATKVariantContextUtils {
|
|||
int averageLengthNum = 0;
|
||||
int averageLengthDenom = 0;
|
||||
int refLength = vc.getReference().length();
|
||||
for ( Allele a : vc.getAlternateAlleles() ) {
|
||||
for ( final Allele a : vc.getAlternateAlleles() ) {
|
||||
int numAllele = vc.getCalledChrCount(a);
|
||||
int alleleSize;
|
||||
if ( a.length() == refLength ) {
|
||||
|
|
@ -182,8 +186,8 @@ public class GATKVariantContextUtils {
|
|||
*/
|
||||
public static VariantContext reverseComplement(VariantContext vc) {
|
||||
// create a mapping from original allele to reverse complemented allele
|
||||
HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
|
||||
for ( Allele originalAllele : vc.getAlleles() ) {
|
||||
HashMap<Allele, Allele> alleleMap = new HashMap<>(vc.getAlleles().size());
|
||||
for ( final Allele originalAllele : vc.getAlleles() ) {
|
||||
Allele newAllele;
|
||||
if ( originalAllele.isNoCall() )
|
||||
newAllele = originalAllele;
|
||||
|
|
@ -195,8 +199,8 @@ public class GATKVariantContextUtils {
|
|||
// create new Genotype objects
|
||||
GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
|
||||
for ( final Genotype genotype : vc.getGenotypes() ) {
|
||||
List<Allele> newAlleles = new ArrayList<Allele>();
|
||||
for ( Allele allele : genotype.getAlleles() ) {
|
||||
List<Allele> newAlleles = new ArrayList<>();
|
||||
for ( final Allele allele : genotype.getAlleles() ) {
|
||||
Allele newAllele = alleleMap.get(allele);
|
||||
if ( newAllele == null )
|
||||
newAllele = Allele.NO_CALL;
|
||||
|
|
@ -267,7 +271,7 @@ public class GATKVariantContextUtils {
|
|||
final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length());
|
||||
|
||||
byte[] repeatUnit = null;
|
||||
final ArrayList<Integer> lengths = new ArrayList<Integer>();
|
||||
final ArrayList<Integer> lengths = new ArrayList<>();
|
||||
|
||||
for ( final Allele allele : vc.getAlternateAlleles() ) {
|
||||
Pair<int[],byte[]> result = getNumTandemRepeatUnits(refAlleleBases, Arrays.copyOfRange(allele.getBases(), 1, allele.length()), refBasesStartingAtVCWithoutPad.getBytes());
|
||||
|
|
@ -317,7 +321,7 @@ public class GATKVariantContextUtils {
|
|||
repetitionCount[0] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(refBases, remainingRefContext), true)-repetitionsInRef;
|
||||
repetitionCount[1] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(altBases, remainingRefContext), true)-repetitionsInRef;
|
||||
|
||||
return new Pair<int[], byte[]>(repetitionCount, repeatUnit);
|
||||
return new Pair<>(repetitionCount, repeatUnit);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -528,7 +532,7 @@ public class GATKVariantContextUtils {
|
|||
} else {
|
||||
newLikelihoods = new double[likelihoodIndexesToUse.size()];
|
||||
int newIndex = 0;
|
||||
for ( int oldIndex : likelihoodIndexesToUse )
|
||||
for ( final int oldIndex : likelihoodIndexesToUse )
|
||||
newLikelihoods[newIndex++] = originalLikelihoods[oldIndex];
|
||||
|
||||
// might need to re-normalize
|
||||
|
|
@ -718,6 +722,7 @@ public class GATKVariantContextUtils {
|
|||
* @param setKey the key name of the set
|
||||
* @param filteredAreUncalled are filtered records uncalled?
|
||||
* @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count?
|
||||
* @param combineAnnotations should we merge info field annotations by assuming the incoming VCs are i.i.d.
|
||||
* @return new VariantContext representing the merge of unsortedVCs
|
||||
*/
|
||||
public static VariantContext simpleMerge(final Collection<VariantContext> unsortedVCs,
|
||||
|
|
@ -728,9 +733,10 @@ public class GATKVariantContextUtils {
|
|||
final boolean printMessages,
|
||||
final String setKey,
|
||||
final boolean filteredAreUncalled,
|
||||
final boolean mergeInfoWithMaxAC ) {
|
||||
final boolean mergeInfoWithMaxAC,
|
||||
final boolean combineAnnotations ) {
|
||||
int originalNumOfVCs = priorityListOfVCs == null ? 0 : priorityListOfVCs.size();
|
||||
return simpleMerge(unsortedVCs, priorityListOfVCs, originalNumOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, setKey, filteredAreUncalled, mergeInfoWithMaxAC);
|
||||
return simpleMerge(unsortedVCs, Collections.<VariantContext>emptyList(), priorityListOfVCs, originalNumOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, setKey, filteredAreUncalled, mergeInfoWithMaxAC, combineAnnotations);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -738,11 +744,12 @@ public class GATKVariantContextUtils {
|
|||
* If uniquifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
|
||||
* the sample name.
|
||||
* simpleMerge does not verify any more unique sample names EVEN if genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE. One should use
|
||||
* SampleUtils.verifyUniqueSamplesNames to check that before using sempleMerge.
|
||||
* SampleUtils.verifyUniqueSamplesNames to check that before using simpleMerge.
|
||||
*
|
||||
* For more information on this method see: http://www.thedistractionnetwork.com/programmer-problem/
|
||||
*
|
||||
* @param unsortedVCs collection of unsorted VCs
|
||||
* @param potentialRefVCs collection of unsorted VCs that overlap this locus which should only be searched for potential reference records
|
||||
* @param priorityListOfVCs priority list detailing the order in which we should grab the VCs
|
||||
* @param filteredRecordMergeType merge type for filtered records
|
||||
* @param genotypeMergeOptions merge option for genotypes
|
||||
|
|
@ -751,9 +758,11 @@ public class GATKVariantContextUtils {
|
|||
* @param setKey the key name of the set
|
||||
* @param filteredAreUncalled are filtered records uncalled?
|
||||
* @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count?
|
||||
* @param combineAnnotations should we merge info field annotations by assuming the incoming VCs are i.i.d.
|
||||
* @return new VariantContext representing the merge of unsortedVCs
|
||||
*/
|
||||
public static VariantContext simpleMerge(final Collection<VariantContext> unsortedVCs,
|
||||
final Collection<VariantContext> potentialRefVCs,
|
||||
final List<String> priorityListOfVCs,
|
||||
final int originalNumOfVCs,
|
||||
final FilteredRecordMergeType filteredRecordMergeType,
|
||||
|
|
@ -762,7 +771,8 @@ public class GATKVariantContextUtils {
|
|||
final boolean printMessages,
|
||||
final String setKey,
|
||||
final boolean filteredAreUncalled,
|
||||
final boolean mergeInfoWithMaxAC ) {
|
||||
final boolean mergeInfoWithMaxAC,
|
||||
final boolean combineAnnotations ) {
|
||||
|
||||
if ( unsortedVCs == null || unsortedVCs.size() == 0 )
|
||||
return null;
|
||||
|
|
@ -775,12 +785,16 @@ public class GATKVariantContextUtils {
|
|||
|
||||
final List<VariantContext> preFilteredVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions);
|
||||
// Make sure all variant contexts are padded with reference base in case of indels if necessary
|
||||
final List<VariantContext> VCs = new ArrayList<VariantContext>();
|
||||
List<VariantContext> VCs = new ArrayList<>();
|
||||
|
||||
for (final VariantContext vc : preFilteredVCs) {
|
||||
if ( ! filteredAreUncalled || vc.isNotFiltered() )
|
||||
VCs.add(vc);
|
||||
}
|
||||
|
||||
// cycle through and fill in NON_REF_SYMBOLIC_ALLELEs with the actual alternate allele if possible
|
||||
VCs = fillInNonRefSymbolicAlleles(VCs, potentialRefVCs);
|
||||
|
||||
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
|
||||
return null;
|
||||
|
||||
|
|
@ -789,17 +803,18 @@ public class GATKVariantContextUtils {
|
|||
final String name = first.getSource();
|
||||
final Allele refAllele = determineReferenceAllele(VCs);
|
||||
|
||||
final Set<Allele> alleles = new LinkedHashSet<Allele>();
|
||||
final Set<String> filters = new HashSet<String>();
|
||||
final Map<String, Object> attributes = new LinkedHashMap<String, Object>();
|
||||
final Set<String> inconsistentAttributes = new HashSet<String>();
|
||||
final Set<String> variantSources = new HashSet<String>(); // contains the set of sources we found in our set of VCs that are variant
|
||||
final Set<String> rsIDs = new LinkedHashSet<String>(1); // most of the time there's one id
|
||||
final Set<Allele> alleles = new LinkedHashSet<>();
|
||||
final Set<String> filters = new HashSet<>();
|
||||
final Map<String, Object> attributes = new LinkedHashMap<>();
|
||||
final Set<String> inconsistentAttributes = new HashSet<>();
|
||||
final Set<String> variantSources = new HashSet<>(); // contains the set of sources we found in our set of VCs that are variant
|
||||
final Set<String> rsIDs = new LinkedHashSet<>(1); // most of the time there's one id
|
||||
|
||||
VariantContext longestVC = first;
|
||||
int depth = 0;
|
||||
int maxAC = -1;
|
||||
final Map<String, Object> attributesWithMaxAC = new LinkedHashMap<String, Object>();
|
||||
final Map<String, Object> attributesWithMaxAC = new LinkedHashMap<>();
|
||||
final Map<String, List<Comparable>> annotationMap = new LinkedHashMap<>();
|
||||
double log10PError = CommonInfo.NO_LOG10_PERROR;
|
||||
boolean anyVCHadFiltersApplied = false;
|
||||
VariantContext vcWithMaxAC = null;
|
||||
|
|
@ -811,7 +826,6 @@ public class GATKVariantContextUtils {
|
|||
boolean remapped = false;
|
||||
|
||||
// cycle through and add info from the other VCs, making sure the loc/reference matches
|
||||
|
||||
for ( final VariantContext vc : VCs ) {
|
||||
if ( longestVC.getStart() != vc.getStart() )
|
||||
throw new IllegalStateException("BUG: attempting to merge VariantContexts with different start sites: first="+ first.toString() + " second=" + vc.toString());
|
||||
|
|
@ -846,10 +860,10 @@ public class GATKVariantContextUtils {
|
|||
if ( vc.hasID() ) rsIDs.add(vc.getID());
|
||||
if (mergeInfoWithMaxAC && vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) {
|
||||
String rawAlleleCounts = vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY, null);
|
||||
// lets see if the string contains a , separator
|
||||
// lets see if the string contains a "," separator
|
||||
if (rawAlleleCounts.contains(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)) {
|
||||
List<String> alleleCountArray = Arrays.asList(rawAlleleCounts.substring(1, rawAlleleCounts.length() - 1).split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR));
|
||||
for (String alleleCount : alleleCountArray) {
|
||||
final List<String> alleleCountArray = Arrays.asList(rawAlleleCounts.substring(1, rawAlleleCounts.length() - 1).split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR));
|
||||
for (final String alleleCount : alleleCountArray) {
|
||||
final int ac = Integer.valueOf(alleleCount.trim());
|
||||
if (ac > maxAC) {
|
||||
maxAC = ac;
|
||||
|
|
@ -866,21 +880,36 @@ public class GATKVariantContextUtils {
|
|||
}
|
||||
|
||||
for (final Map.Entry<String, Object> p : vc.getAttributes().entrySet()) {
|
||||
String key = p.getKey();
|
||||
// if we don't like the key already, don't go anywhere
|
||||
if ( ! inconsistentAttributes.contains(key) ) {
|
||||
final boolean alreadyFound = attributes.containsKey(key);
|
||||
final Object boundValue = attributes.get(key);
|
||||
final boolean boundIsMissingValue = alreadyFound && boundValue.equals(VCFConstants.MISSING_VALUE_v4);
|
||||
final String key = p.getKey();
|
||||
final Object value = p.getValue();
|
||||
boolean badAnnotation = false;
|
||||
if ( combineAnnotations ) { // add the annotation values to a list for combining later
|
||||
List<Comparable> values = annotationMap.get(key);
|
||||
if( values == null ) {
|
||||
values = new ArrayList<>();
|
||||
annotationMap.put(key, values);
|
||||
}
|
||||
try {
|
||||
final String stringValue = value.toString();
|
||||
values.add(stringValue.contains(".") ? Double.parseDouble(stringValue) : Integer.parseInt(stringValue));
|
||||
} catch (NumberFormatException e) {
|
||||
badAnnotation = true;
|
||||
}
|
||||
}
|
||||
if ( ! combineAnnotations || badAnnotation ) { // only output annotations that have the same value in every input VC
|
||||
// if we don't like the key already, don't go anywhere
|
||||
if ( ! inconsistentAttributes.contains(key) ) {
|
||||
final boolean alreadyFound = attributes.containsKey(key);
|
||||
final Object boundValue = attributes.get(key);
|
||||
final boolean boundIsMissingValue = alreadyFound && boundValue.equals(VCFConstants.MISSING_VALUE_v4);
|
||||
|
||||
if ( alreadyFound && ! boundValue.equals(p.getValue()) && ! boundIsMissingValue ) {
|
||||
// we found the value but we're inconsistent, put it in the exclude list
|
||||
//System.out.printf("Inconsistent INFO values: %s => %s and %s%n", key, boundValue, p.getValue());
|
||||
inconsistentAttributes.add(key);
|
||||
attributes.remove(key);
|
||||
} else if ( ! alreadyFound || boundIsMissingValue ) { // no value
|
||||
//if ( vc != first ) System.out.printf("Adding key %s => %s%n", p.getKey(), p.getValue());
|
||||
attributes.put(key, p.getValue());
|
||||
if ( alreadyFound && ! boundValue.equals(value) && ! boundIsMissingValue ) {
|
||||
// we found the value but we're inconsistent, put it in the exclude list
|
||||
inconsistentAttributes.add(key);
|
||||
attributes.remove(key);
|
||||
} else if ( ! alreadyFound || boundIsMissingValue ) { // no value
|
||||
attributes.put(key, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -906,6 +935,12 @@ public class GATKVariantContextUtils {
|
|||
// take the VC with the maxAC and pull the attributes into a modifiable map
|
||||
if ( mergeInfoWithMaxAC && vcWithMaxAC != null ) {
|
||||
attributesWithMaxAC.putAll(vcWithMaxAC.getAttributes());
|
||||
} else if ( combineAnnotations ) { // when combining annotations use the median value from all input VCs which had annotations provided
|
||||
for ( final Map.Entry<String, List<Comparable>> p : annotationMap.entrySet() ) {
|
||||
if ( ! p.getValue().isEmpty() ) {
|
||||
attributes.put(p.getKey(), combineAnnotationValues(p.getValue()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if at least one record was unfiltered and we want a union, clear all of the filters
|
||||
|
|
@ -922,7 +957,7 @@ public class GATKVariantContextUtils {
|
|||
else if ( variantSources.isEmpty() ) // everyone was reference
|
||||
setValue = MERGE_REF_IN_ALL;
|
||||
else {
|
||||
final LinkedHashSet<String> s = new LinkedHashSet<String>();
|
||||
final LinkedHashSet<String> s = new LinkedHashSet<>();
|
||||
for ( final VariantContext vc : VCs )
|
||||
if ( vc.isVariant() )
|
||||
s.add( vc.isFiltered() ? MERGE_FILTER_PREFIX + vc.getSource() : vc.getSource() );
|
||||
|
|
@ -950,7 +985,12 @@ public class GATKVariantContextUtils {
|
|||
if ( anyVCHadFiltersApplied ) {
|
||||
builder.filters(filters.isEmpty() ? filters : new TreeSet<>(filters));
|
||||
}
|
||||
builder.attributes(new TreeMap<String, Object>(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes));
|
||||
builder.attributes(new TreeMap<>(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes));
|
||||
if( combineAnnotations ) {
|
||||
// unfortunately some attributes are just too dangerous to try to combine together
|
||||
builder.rmAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY);
|
||||
builder.rmAttribute(VCFConstants.MLE_ALLELE_FREQUENCY_KEY);
|
||||
}
|
||||
|
||||
// Trim the padded bases of all alleles if necessary
|
||||
final VariantContext merged = builder.make();
|
||||
|
|
@ -958,6 +998,68 @@ public class GATKVariantContextUtils {
|
|||
return merged;
|
||||
}
|
||||
|
||||
private static final Comparable combineAnnotationValues( final List<Comparable> array ) {
|
||||
return MathUtils.median(array); // right now we take the median but other options could be explored
|
||||
}
|
||||
|
||||
/**
|
||||
* cycle through and fill in NON_REF_SYMBOLIC_ALLELEs with the actual alternate allele if possible
|
||||
* @param VCs the list of VCs in which to fill in symbolic alleles
|
||||
* @param potentialRefVCs the list of VCs which are overlapping the current locus-- need to look for reference blocks and fill in with alternate alleles
|
||||
* @return the list of VCs to merge in which all the NON_REF_SYMBOLIC_ALLELEs have been replaced with the correct alternate allele
|
||||
*/
|
||||
protected static final List<VariantContext> fillInNonRefSymbolicAlleles( final List<VariantContext> VCs, final Collection<VariantContext> potentialRefVCs ) {
|
||||
if( VCs == null ) { throw new IllegalArgumentException("VCs cannot be null"); }
|
||||
if( potentialRefVCs == null ) { throw new IllegalArgumentException("potentialRefVCs cannot be null"); }
|
||||
|
||||
final List<VariantContext> VCsToReturn = new ArrayList<>(VCs.size());
|
||||
boolean containsNonRefSymbolicAllele = false;
|
||||
VariantContext nonRefVC = null;
|
||||
for( final VariantContext vc : VCs ) {
|
||||
if( vc.hasAllele(NON_REF_SYMBOLIC_ALLELE) ) {
|
||||
containsNonRefSymbolicAllele = true;
|
||||
} else if ( nonRefVC == null ) {
|
||||
nonRefVC = vc;
|
||||
}
|
||||
if( nonRefVC != null && containsNonRefSymbolicAllele == true ) {
|
||||
break; // break out so that we don't run over the whole list unnecessarily
|
||||
}
|
||||
}
|
||||
for( final VariantContext vc : potentialRefVCs ) {
|
||||
if( vc.hasAllele(NON_REF_SYMBOLIC_ALLELE) ) {
|
||||
containsNonRefSymbolicAllele = true;
|
||||
VCs.add(vc); // add the overlapping non-ref symbolic records to the VCs list in order to be filled in below
|
||||
}
|
||||
}
|
||||
|
||||
if( !containsNonRefSymbolicAllele ) {
|
||||
return VCs;
|
||||
}
|
||||
|
||||
for( final VariantContext vc : VCs ) {
|
||||
if( vc.hasAllele(NON_REF_SYMBOLIC_ALLELE) ) { // create a new record based on the current record but instead has the symbolic allele replaced by the alternate allele for this site
|
||||
if( nonRefVC != null ) {
|
||||
final GenotypesContext genotypes = GenotypesContext.create(vc.getSampleNames().size());
|
||||
int depth = 0;
|
||||
for( final String sample : vc.getSampleNames() ) {
|
||||
final Genotype gt = vc.getGenotype(sample);
|
||||
final ArrayList<Allele> refAlleles = new ArrayList<>(2);
|
||||
refAlleles.add(nonRefVC.getReference());
|
||||
refAlleles.add(nonRefVC.getReference());
|
||||
final int[] pl = ( nonRefVC.isBiallelic() ? gt.getPL() : null ); // PLs only works for biallelic sites for now
|
||||
depth += ( gt.hasDP() ? gt.getDP() : Integer.parseInt((String)gt.getAnyAttribute("MIN_DP")) ); // DP is special-cased in CombineVariants so fill it in here
|
||||
genotypes.add(new GenotypeBuilder(gt).alleles(refAlleles).PL(pl).make());
|
||||
}
|
||||
VCsToReturn.add(new VariantContextBuilder(nonRefVC).attributes(null).attribute("DP", depth).genotypes(genotypes).make());
|
||||
}
|
||||
} else {
|
||||
VCsToReturn.add(vc);
|
||||
}
|
||||
}
|
||||
|
||||
return VCsToReturn;
|
||||
}
|
||||
|
||||
private static final boolean hasPLIncompatibleAlleles(final Collection<Allele> alleleSet1, final Collection<Allele> alleleSet2) {
|
||||
final Iterator<Allele> it1 = alleleSet1.iterator();
|
||||
final Iterator<Allele> it2 = alleleSet2.iterator();
|
||||
|
|
@ -989,8 +1091,8 @@ public class GATKVariantContextUtils {
|
|||
static private Allele determineReferenceAllele(List<VariantContext> VCs) {
|
||||
Allele ref = null;
|
||||
|
||||
for ( VariantContext vc : VCs ) {
|
||||
Allele myRef = vc.getReference();
|
||||
for ( final VariantContext vc : VCs ) {
|
||||
final Allele myRef = vc.getReference();
|
||||
if ( ref == null || ref.length() < myRef.length() )
|
||||
ref = myRef;
|
||||
else if ( ref.length() == myRef.length() && ! ref.equals(myRef) )
|
||||
|
|
@ -1024,13 +1126,13 @@ public class GATKVariantContextUtils {
|
|||
// System.out.printf("myref %s%n", myRef );
|
||||
// System.out.printf("extrabases %s%n", new String(extraBases));
|
||||
|
||||
Map<Allele, Allele> map = new HashMap<Allele, Allele>();
|
||||
for ( Allele a : vc.getAlleles() ) {
|
||||
Map<Allele, Allele> map = new HashMap<>();
|
||||
for ( final Allele a : vc.getAlleles() ) {
|
||||
if ( a.isReference() )
|
||||
map.put(a, refAllele);
|
||||
else {
|
||||
Allele extended = Allele.extend(a, extraBases);
|
||||
for ( Allele b : allAlleles )
|
||||
for ( final Allele b : allAlleles )
|
||||
if ( extended.equals(b) )
|
||||
extended = b;
|
||||
// System.out.printf(" Extending %s => %s%n", a, extended);
|
||||
|
|
@ -1050,23 +1152,23 @@ public class GATKVariantContextUtils {
|
|||
throw new IllegalArgumentException("Cannot merge calls by priority with a null priority list");
|
||||
|
||||
if ( priorityListOfVCs == null || mergeOption == GenotypeMergeType.UNSORTED )
|
||||
return new ArrayList<VariantContext>(unsortedVCs);
|
||||
return new ArrayList<>(unsortedVCs);
|
||||
else {
|
||||
ArrayList<VariantContext> sorted = new ArrayList<VariantContext>(unsortedVCs);
|
||||
ArrayList<VariantContext> sorted = new ArrayList<>(unsortedVCs);
|
||||
Collections.sort(sorted, new CompareByPriority(priorityListOfVCs));
|
||||
return sorted;
|
||||
}
|
||||
}
|
||||
|
||||
private static void mergeGenotypes(GenotypesContext mergedGenotypes, VariantContext oneVC, AlleleMapper alleleMapping, boolean uniqifySamples) {
|
||||
private static void mergeGenotypes(GenotypesContext mergedGenotypes, VariantContext oneVC, AlleleMapper alleleMapping, boolean uniquifySamples) {
|
||||
//TODO: should we add a check for cases when the genotypeMergeOption is REQUIRE_UNIQUE
|
||||
for ( Genotype g : oneVC.getGenotypes() ) {
|
||||
String name = mergedSampleName(oneVC.getSource(), g.getSampleName(), uniqifySamples);
|
||||
for ( final Genotype g : oneVC.getGenotypes() ) {
|
||||
final String name = mergedSampleName(oneVC.getSource(), g.getSampleName(), uniquifySamples);
|
||||
if ( ! mergedGenotypes.containsSample(name) ) {
|
||||
// only add if the name is new
|
||||
Genotype newG = g;
|
||||
|
||||
if ( uniqifySamples || alleleMapping.needsRemapping() ) {
|
||||
if ( uniquifySamples || alleleMapping.needsRemapping() ) {
|
||||
final List<Allele> alleles = alleleMapping.needsRemapping() ? alleleMapping.remap(g.getAlleles()) : g.getAlleles();
|
||||
newG = new GenotypeBuilder(g).name(name).alleles(alleles).make();
|
||||
}
|
||||
|
|
@ -1076,8 +1178,8 @@ public class GATKVariantContextUtils {
|
|||
}
|
||||
}
|
||||
|
||||
public static String mergedSampleName(String trackName, String sampleName, boolean uniqify ) {
|
||||
return uniqify ? sampleName + "." + trackName : sampleName;
|
||||
public static String mergedSampleName(String trackName, String sampleName, boolean uniquify ) {
|
||||
return uniquify ? sampleName + "." + trackName : sampleName;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1104,8 +1206,8 @@ public class GATKVariantContextUtils {
|
|||
* Trim the alleles in inputVC forward and reverse, as requested
|
||||
*
|
||||
* @param inputVC a non-null input VC whose alleles might need a haircut
|
||||
* @param trimForward should we trim up the alleles from the foward direction?
|
||||
* @param trimReverse shold we trim up the alleles from the reverse direction?
|
||||
* @param trimForward should we trim up the alleles from the forward direction?
|
||||
* @param trimReverse should we trim up the alleles from the reverse direction?
|
||||
* @return a non-null VariantContext (may be == to inputVC) with trimmed up alleles
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
|
|
@ -1140,8 +1242,8 @@ public class GATKVariantContextUtils {
|
|||
if( fwdTrimEnd == -1 && revTrim == 0 ) // nothing to do, so just return inputVC unmodified
|
||||
return inputVC;
|
||||
|
||||
final List<Allele> alleles = new LinkedList<Allele>();
|
||||
final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
|
||||
final List<Allele> alleles = new LinkedList<>();
|
||||
final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<>();
|
||||
|
||||
for (final Allele a : inputVC.getAlleles()) {
|
||||
if (a.isSymbolic()) {
|
||||
|
|
@ -1300,7 +1402,7 @@ public class GATKVariantContextUtils {
|
|||
}
|
||||
|
||||
private final static Map<String, Object> subsetAttributes(final CommonInfo igc, final Collection<String> keysToPreserve) {
|
||||
Map<String, Object> attributes = new HashMap<String, Object>(keysToPreserve.size());
|
||||
Map<String, Object> attributes = new HashMap<>(keysToPreserve.size());
|
||||
for ( final String key : keysToPreserve ) {
|
||||
if ( igc.hasAttribute(key) )
|
||||
attributes.put(key, igc.getAttribute(key));
|
||||
|
|
@ -1343,7 +1445,7 @@ public class GATKVariantContextUtils {
|
|||
if (!vc1.getReference().equals(vc2.getReference()))
|
||||
return false;
|
||||
|
||||
for (Allele a :vc1.getAlternateAlleles()) {
|
||||
for (final Allele a :vc1.getAlternateAlleles()) {
|
||||
if (!vc2.getAlternateAlleles().contains(a))
|
||||
return false;
|
||||
}
|
||||
|
|
@ -1351,17 +1453,24 @@ public class GATKVariantContextUtils {
|
|||
return true;
|
||||
}
|
||||
|
||||
public static Map<VariantContext.Type, List<VariantContext>> separateVariantContextsByType(Collection<VariantContext> VCs) {
|
||||
HashMap<VariantContext.Type, List<VariantContext>> mappedVCs = new HashMap<VariantContext.Type, List<VariantContext>>();
|
||||
for ( VariantContext vc : VCs ) {
|
||||
public static Map<VariantContext.Type, List<VariantContext>> separateVariantContextsByType( final Collection<VariantContext> VCs ) {
|
||||
if( VCs == null ) { throw new IllegalArgumentException("VCs cannot be null."); }
|
||||
|
||||
final HashMap<VariantContext.Type, List<VariantContext>> mappedVCs = new HashMap<>();
|
||||
for ( final VariantContext vc : VCs ) {
|
||||
VariantContext.Type vcType = vc.getType();
|
||||
if( vc.hasAllele(NON_REF_SYMBOLIC_ALLELE) ) {
|
||||
if( vc.getAlternateAlleles().size() > 1 ) { throw new IllegalStateException("Reference records should not have more than one alternate allele"); }
|
||||
vcType = VariantContext.Type.NO_VARIATION;
|
||||
}
|
||||
|
||||
// look at previous variant contexts of different type. If:
|
||||
// a) otherVC has alleles which are subset of vc, remove otherVC from its list and add otherVC to vc's list
|
||||
// b) vc has alleles which are subset of otherVC. Then, add vc to otherVC's type list (rather, do nothing since vc will be added automatically to its list)
|
||||
// c) neither: do nothing, just add vc to its own list
|
||||
boolean addtoOwnList = true;
|
||||
for (VariantContext.Type type : VariantContext.Type.values()) {
|
||||
if (type.equals(vc.getType()))
|
||||
for (final VariantContext.Type type : VariantContext.Type.values()) {
|
||||
if (type.equals(vcType))
|
||||
continue;
|
||||
|
||||
if (!mappedVCs.containsKey(type))
|
||||
|
|
@ -1376,9 +1485,9 @@ public class GATKVariantContextUtils {
|
|||
// avoid having empty lists
|
||||
if (vcList.size() == 0)
|
||||
mappedVCs.remove(type);
|
||||
if ( !mappedVCs.containsKey(vc.getType()) )
|
||||
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
|
||||
mappedVCs.get(vc.getType()).add(otherVC);
|
||||
if ( !mappedVCs.containsKey(vcType) )
|
||||
mappedVCs.put(vcType, new ArrayList<VariantContext>());
|
||||
mappedVCs.get(vcType).add(otherVC);
|
||||
break;
|
||||
}
|
||||
else if (allelesAreSubset(vc,otherVC)) {
|
||||
|
|
@ -1390,9 +1499,9 @@ public class GATKVariantContextUtils {
|
|||
}
|
||||
}
|
||||
if (addtoOwnList) {
|
||||
if ( !mappedVCs.containsKey(vc.getType()) )
|
||||
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
|
||||
mappedVCs.get(vc.getType()).add(vc);
|
||||
if ( !mappedVCs.containsKey(vcType) )
|
||||
mappedVCs.put(vcType, new ArrayList<VariantContext>());
|
||||
mappedVCs.get(vcType).add(vc);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1403,10 +1512,10 @@ public class GATKVariantContextUtils {
|
|||
if ( allowedAttributes == null )
|
||||
return vc;
|
||||
|
||||
GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
|
||||
final GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
|
||||
for ( final Genotype genotype : vc.getGenotypes() ) {
|
||||
Map<String, Object> attrs = new HashMap<String, Object>();
|
||||
for ( Map.Entry<String, Object> attr : genotype.getExtendedAttributes().entrySet() ) {
|
||||
final Map<String, Object> attrs = new HashMap<>();
|
||||
for ( final Map.Entry<String, Object> attr : genotype.getExtendedAttributes().entrySet() ) {
|
||||
if ( allowedAttributes.contains(attr.getKey()) )
|
||||
attrs.put(attr.getKey(), attr.getValue());
|
||||
}
|
||||
|
|
@ -1427,8 +1536,8 @@ public class GATKVariantContextUtils {
|
|||
public Allele remap(Allele a) { return map != null && map.containsKey(a) ? map.get(a) : a; }
|
||||
|
||||
public List<Allele> remap(List<Allele> as) {
|
||||
List<Allele> newAs = new ArrayList<Allele>();
|
||||
for ( Allele a : as ) {
|
||||
List<Allele> newAs = new ArrayList<>();
|
||||
for ( final Allele a : as ) {
|
||||
//System.out.printf(" Remapping %s => %s%n", a, remap(a));
|
||||
newAs.add(remap(a));
|
||||
}
|
||||
|
|
@ -1467,7 +1576,7 @@ public class GATKVariantContextUtils {
|
|||
if ( alleleStrings == null || alleleStrings.isEmpty() )
|
||||
throw new IllegalArgumentException("alleleStrings must be non-empty, non-null list");
|
||||
|
||||
final List<Allele> alleles = new LinkedList<Allele>();
|
||||
final List<Allele> alleles = new LinkedList<>();
|
||||
final int length = alleleStrings.get(0).length();
|
||||
|
||||
boolean first = true;
|
||||
|
|
@ -1503,7 +1612,7 @@ public class GATKVariantContextUtils {
|
|||
if ( ref.length != alt.length )
|
||||
throw new IllegalStateException("ref and alt alleles for MNP have different lengths");
|
||||
|
||||
final List<VariantContext> result = new ArrayList<VariantContext>(ref.length);
|
||||
final List<VariantContext> result = new ArrayList<>(ref.length);
|
||||
|
||||
for ( int i = 0; i < ref.length; i++ ) {
|
||||
|
||||
|
|
@ -1518,7 +1627,7 @@ public class GATKVariantContextUtils {
|
|||
final VariantContextBuilder newVC = new VariantContextBuilder(vc).start(vc.getStart() + i).stop(vc.getStart() + i).alleles(Arrays.asList(newRefAllele, newAltAllele));
|
||||
|
||||
// create new genotypes with updated alleles
|
||||
final Map<Allele, Allele> alleleMap = new HashMap<Allele, Allele>();
|
||||
final Map<Allele, Allele> alleleMap = new HashMap<>();
|
||||
alleleMap.put(vc.getReference(), newRefAllele);
|
||||
alleleMap.put(vc.getAlternateAllele(0), newAltAllele);
|
||||
final GenotypesContext newGenotypes = updateGenotypesWithMappedAlleles(vc.getGenotypes(), new AlleleMapper(alleleMap));
|
||||
|
|
|
|||
|
|
@ -500,7 +500,7 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
|
||||
@DataProvider(name = "MedianData")
|
||||
public Object[][] makeMedianData() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
final List<Object[]> tests = new ArrayList<>();
|
||||
|
||||
// this functionality can be adapted to provide input data for whatever you might want in your data
|
||||
tests.add(new Object[]{Arrays.asList(10), 10});
|
||||
|
|
@ -510,12 +510,16 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
tests.add(new Object[]{values, 1});
|
||||
}
|
||||
|
||||
for ( final List<Double> values : Utils.makePermutations(Arrays.asList(1.1,2.1,-3.1), 3, false) ) {
|
||||
tests.add(new Object[]{values, 1.1});
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "MedianData")
|
||||
public void testMedian(final List<Integer> values, final int expected) {
|
||||
final int actual = MathUtils.median(values);
|
||||
public void testMedian(final List<Comparable> values, final Comparable expected) {
|
||||
final Comparable actual = MathUtils.median(values);
|
||||
Assert.assertEquals(actual, expected, "Failed with " + values);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -107,7 +107,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
|||
|
||||
private MergeAllelesTest(List<Allele>... arg) {
|
||||
super(MergeAllelesTest.class);
|
||||
LinkedList<List<Allele>> all = new LinkedList<List<Allele>>(Arrays.asList(arg));
|
||||
LinkedList<List<Allele>> all = new LinkedList<>(Arrays.asList(arg));
|
||||
expected = all.pollLast();
|
||||
inputs = all;
|
||||
}
|
||||
|
|
@ -185,7 +185,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
|||
final VariantContext merged = GATKVariantContextUtils.simpleMerge(
|
||||
inputs, priority,
|
||||
GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||
GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, "set", false, false);
|
||||
GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, "set", false, false, false);
|
||||
|
||||
Assert.assertEquals(merged.getAlleles(), cfg.expected);
|
||||
}
|
||||
|
|
@ -243,7 +243,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
|||
final VariantContext merged = GATKVariantContextUtils.simpleMerge(
|
||||
inputs, null,
|
||||
GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||
GATKVariantContextUtils.GenotypeMergeType.UNSORTED, false, false, "set", false, false);
|
||||
GATKVariantContextUtils.GenotypeMergeType.UNSORTED, false, false, "set", false, false, false);
|
||||
Assert.assertEquals(merged.getID(), cfg.expected);
|
||||
}
|
||||
|
||||
|
|
@ -358,7 +358,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
|||
public void testMergeFiltered(MergeFilteredTest cfg) {
|
||||
final List<String> priority = vcs2priority(cfg.inputs);
|
||||
final VariantContext merged = GATKVariantContextUtils.simpleMerge(
|
||||
cfg.inputs, priority, cfg.type, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false);
|
||||
cfg.inputs, priority, cfg.type, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false, false);
|
||||
|
||||
// test alleles are equal
|
||||
Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles());
|
||||
|
|
@ -485,7 +485,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
|||
public void testMergeGenotypes(MergeGenotypesTest cfg) {
|
||||
final VariantContext merged = GATKVariantContextUtils.simpleMerge(
|
||||
cfg.inputs, cfg.priority, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||
GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false);
|
||||
GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false, false);
|
||||
|
||||
// test alleles are equal
|
||||
Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles());
|
||||
|
|
@ -526,10 +526,10 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
|||
|
||||
final VariantContext merged = GATKVariantContextUtils.simpleMerge(
|
||||
Arrays.asList(vc1, vc2), null, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||
GATKVariantContextUtils.GenotypeMergeType.UNIQUIFY, false, false, "set", false, false);
|
||||
GATKVariantContextUtils.GenotypeMergeType.UNIQUIFY, false, false, "set", false, false, false);
|
||||
|
||||
// test genotypes
|
||||
Assert.assertEquals(merged.getSampleNames(), new HashSet<String>(Arrays.asList("s1.1", "s1.2")));
|
||||
Assert.assertEquals(merged.getSampleNames(), new HashSet<>(Arrays.asList("s1.1", "s1.2")));
|
||||
}
|
||||
|
||||
// TODO: remove after testing
|
||||
|
|
@ -540,7 +540,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
|||
//
|
||||
// final VariantContext merged = VariantContextUtils.simpleMerge(
|
||||
// Arrays.asList(vc1, vc2), null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||
// VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE, false, false, "set", false, false);
|
||||
// VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE, false, false, "set", false, false, false);
|
||||
// }
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
|
|
@ -559,7 +559,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
|||
|
||||
final VariantContext merged = GATKVariantContextUtils.simpleMerge(
|
||||
Arrays.asList(vc1, vc2), priority, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||
GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, annotate, false, set, false, false);
|
||||
GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, annotate, false, set, false, false, false);
|
||||
|
||||
if ( annotate )
|
||||
Assert.assertEquals(merged.getAttribute(set), GATKVariantContextUtils.MERGE_INTERSECTION);
|
||||
|
|
@ -570,7 +570,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
private static final List<String> vcs2priority(final Collection<VariantContext> vcs) {
|
||||
final List<String> priority = new ArrayList<String>();
|
||||
final List<String> priority = new ArrayList<>();
|
||||
|
||||
for ( final VariantContext vc : vcs ) {
|
||||
priority.add(vc.getSource());
|
||||
|
|
@ -997,7 +997,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
|||
|
||||
@DataProvider(name = "PrimitiveAlleleSplittingData")
|
||||
public Object[][] makePrimitiveAlleleSplittingData() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
List<Object[]> tests = new ArrayList<>();
|
||||
|
||||
// no split
|
||||
tests.add(new Object[]{"A", "C", 0, null});
|
||||
|
|
@ -1039,6 +1039,26 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(enabled = !DEBUG)
|
||||
public void testFillInNonRefSymbolicAlleles() {
|
||||
final int start = 10;
|
||||
final String ref = "A";
|
||||
final String alt = "C";
|
||||
final VariantContext vcAlt = GATKVariantContextUtils.makeFromAlleles("test", "20", start, Arrays.asList(ref, alt));
|
||||
final VariantContext vcRef = GATKVariantContextUtils.makeFromAlleles("test", "20", start, Arrays.asList(ref, "<"+GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE_NAME+">"));
|
||||
|
||||
List<VariantContext> VCs = Arrays.asList(vcAlt, vcRef);
|
||||
VCs = GATKVariantContextUtils.fillInNonRefSymbolicAlleles(VCs, Collections.<VariantContext>emptyList());
|
||||
|
||||
// make sure the non ref symbolic alleles have all been filled in with the appropriate alternate allele
|
||||
for( final VariantContext vc : VCs ) {
|
||||
Assert.assertTrue(vc.getAlternateAlleles().size() == 1);
|
||||
Assert.assertTrue(vc.getAlternateAllele(0).isNonReference());
|
||||
Assert.assertTrue(!vc.getReference().isSymbolic());
|
||||
Assert.assertTrue(!vc.getAlternateAllele(0).isSymbolic());
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// test allele remapping
|
||||
|
|
@ -1047,7 +1067,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
|||
|
||||
@DataProvider(name = "AlleleRemappingData")
|
||||
public Object[][] makeAlleleRemappingData() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
List<Object[]> tests = new ArrayList<>();
|
||||
|
||||
final Allele originalBase1 = Allele.create((byte)'A');
|
||||
final Allele originalBase2 = Allele.create((byte)'T');
|
||||
|
|
@ -1055,7 +1075,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
|||
for ( final byte base1 : BaseUtils.BASES ) {
|
||||
for ( final byte base2 : BaseUtils.BASES ) {
|
||||
for ( final int numGenotypes : Arrays.asList(0, 1, 2, 5) ) {
|
||||
Map<Allele, Allele> map = new HashMap<Allele, Allele>(2);
|
||||
Map<Allele, Allele> map = new HashMap<>(2);
|
||||
map.put(originalBase1, Allele.create(base1));
|
||||
map.put(originalBase2, Allele.create(base2));
|
||||
|
||||
|
|
|
|||
|
|
@ -147,7 +147,7 @@ public class VariantContextBenchmark extends SimpleBenchmark {
|
|||
Set<String> samples;
|
||||
public void run(final VariantContext vc) {
|
||||
if ( samples == null )
|
||||
samples = new HashSet<String>(new ArrayList<String>(vc.getSampleNames()).subList(0, nSamplesToTake));
|
||||
samples = new HashSet<>(new ArrayList<>(vc.getSampleNames()).subList(0, nSamplesToTake));
|
||||
VariantContext sub = vc.subContextFromSamples(samples);
|
||||
sub.getNSamples();
|
||||
}
|
||||
|
|
@ -176,7 +176,7 @@ public class VariantContextBenchmark extends SimpleBenchmark {
|
|||
Set<String> samples;
|
||||
public void run(final VariantContext vc) {
|
||||
if ( samples == null )
|
||||
samples = new HashSet<String>(new ArrayList<String>(vc.getSampleNames()).subList(0, nSamplesToTake));
|
||||
samples = new HashSet<>(new ArrayList<>(vc.getSampleNames()).subList(0, nSamplesToTake));
|
||||
vc.getGenotypes(samples).size();
|
||||
}
|
||||
};
|
||||
|
|
@ -221,7 +221,7 @@ public class VariantContextBenchmark extends SimpleBenchmark {
|
|||
case MERGE:
|
||||
return new FunctionToBenchmark<VariantContext>() {
|
||||
public void run(final VariantContext vc) {
|
||||
List<VariantContext> toMerge = new ArrayList<VariantContext>();
|
||||
List<VariantContext> toMerge = new ArrayList<>();
|
||||
|
||||
for ( int i = 0; i < dupsToMerge; i++ ) {
|
||||
GenotypesContext gc = GenotypesContext.create(vc.getNSamples());
|
||||
|
|
@ -234,7 +234,7 @@ public class VariantContextBenchmark extends SimpleBenchmark {
|
|||
GATKVariantContextUtils.simpleMerge(toMerge, null,
|
||||
GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||
GATKVariantContextUtils.GenotypeMergeType.UNSORTED,
|
||||
true, false, "set", false, true);
|
||||
true, false, "set", false, true, false);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -363,7 +363,7 @@ public class VariantContextBenchmark extends SimpleBenchmark {
|
|||
// toMerge, null,
|
||||
// org.broadinstitute.variant.variantcontext.v13.VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||
// org.broadinstitute.variant.variantcontext.v13.VariantContextUtils.GenotypeMergeType.UNSORTED,
|
||||
// true, false, "set", false, true);
|
||||
// true, false, "set", false, true, false);
|
||||
// }
|
||||
// };
|
||||
//
|
||||
|
|
|
|||
Loading…
Reference in New Issue