Merge branch 'master' of ssh://gsa4.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
1aa856e0e3
|
|
@ -42,14 +42,12 @@ import java.util.*;
|
||||||
public class GenotypingEngine {
|
public class GenotypingEngine {
|
||||||
|
|
||||||
private final boolean DEBUG;
|
private final boolean DEBUG;
|
||||||
private final int MNP_LOOK_AHEAD;
|
|
||||||
private final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE;
|
private final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE;
|
||||||
private final static List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
|
private final static List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
|
||||||
private final static Allele SYMBOLIC_UNASSEMBLED_EVENT_ALLELE = Allele.create("<UNASSEMBLED_EVENT>", false);
|
private final static Allele SYMBOLIC_UNASSEMBLED_EVENT_ALLELE = Allele.create("<UNASSEMBLED_EVENT>", false);
|
||||||
|
|
||||||
public GenotypingEngine( final boolean DEBUG, final int MNP_LOOK_AHEAD, final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE ) {
|
public GenotypingEngine( final boolean DEBUG, final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE ) {
|
||||||
this.DEBUG = DEBUG;
|
this.DEBUG = DEBUG;
|
||||||
this.MNP_LOOK_AHEAD = MNP_LOOK_AHEAD;
|
|
||||||
this.OUTPUT_FULL_HAPLOTYPE_SEQUENCE = OUTPUT_FULL_HAPLOTYPE_SEQUENCE;
|
this.OUTPUT_FULL_HAPLOTYPE_SEQUENCE = OUTPUT_FULL_HAPLOTYPE_SEQUENCE;
|
||||||
noCall.add(Allele.NO_CALL);
|
noCall.add(Allele.NO_CALL);
|
||||||
}
|
}
|
||||||
|
|
@ -120,7 +118,7 @@ public class GenotypingEngine {
|
||||||
System.out.println( "> Cigar = " + h.getCigar() );
|
System.out.println( "> Cigar = " + h.getCigar() );
|
||||||
}
|
}
|
||||||
// Walk along the alignment and turn any difference from the reference into an event
|
// Walk along the alignment and turn any difference from the reference into an event
|
||||||
h.setEventMap( generateVCsFromAlignment( h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++, MNP_LOOK_AHEAD ) );
|
h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++ ) );
|
||||||
startPosKeySet.addAll(h.getEventMap().keySet());
|
startPosKeySet.addAll(h.getEventMap().keySet());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -203,7 +201,7 @@ public class GenotypingEngine {
|
||||||
if( DEBUG ) { System.out.println("=== Best Haplotypes ==="); }
|
if( DEBUG ) { System.out.println("=== Best Haplotypes ==="); }
|
||||||
for( final Haplotype h : haplotypes ) {
|
for( final Haplotype h : haplotypes ) {
|
||||||
// Walk along the alignment and turn any difference from the reference into an event
|
// Walk along the alignment and turn any difference from the reference into an event
|
||||||
h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++, MNP_LOOK_AHEAD ) );
|
h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++ ) );
|
||||||
if( activeAllelesToGenotype.isEmpty() ) { startPosKeySet.addAll(h.getEventMap().keySet()); }
|
if( activeAllelesToGenotype.isEmpty() ) { startPosKeySet.addAll(h.getEventMap().keySet()); }
|
||||||
if( DEBUG ) {
|
if( DEBUG ) {
|
||||||
System.out.println( h.toString() );
|
System.out.println( h.toString() );
|
||||||
|
|
@ -521,11 +519,7 @@ public class GenotypingEngine {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static HashMap<Integer,VariantContext> generateVCsFromAlignment( final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd, final int MNP_LOOK_AHEAD ) {
|
protected static HashMap<Integer,VariantContext> generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd ) {
|
||||||
return generateVCsFromAlignment(null, alignmentStartHapwrtRef, cigar, ref, alignment, refLoc, sourceNameToAdd, MNP_LOOK_AHEAD); // BUGBUG: needed for compatibility with HaplotypeResolver code
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static HashMap<Integer,VariantContext> generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd, final int MNP_LOOK_AHEAD ) {
|
|
||||||
final HashMap<Integer,VariantContext> vcs = new HashMap<Integer,VariantContext>();
|
final HashMap<Integer,VariantContext> vcs = new HashMap<Integer,VariantContext>();
|
||||||
|
|
||||||
int refPos = alignmentStartHapwrtRef;
|
int refPos = alignmentStartHapwrtRef;
|
||||||
|
|
@ -543,7 +537,7 @@ public class GenotypingEngine {
|
||||||
if( BaseUtils.isRegularBase(refByte) ) {
|
if( BaseUtils.isRegularBase(refByte) ) {
|
||||||
insertionAlleles.add( Allele.create(refByte, true) );
|
insertionAlleles.add( Allele.create(refByte, true) );
|
||||||
}
|
}
|
||||||
if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
|
if( (haplotype.leftBreakPoint != 0 || haplotype.rightBreakPoint != 0) && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
|
||||||
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
|
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
|
||||||
} else {
|
} else {
|
||||||
byte[] insertionBases = new byte[]{};
|
byte[] insertionBases = new byte[]{};
|
||||||
|
|
@ -590,39 +584,16 @@ public class GenotypingEngine {
|
||||||
case EQ:
|
case EQ:
|
||||||
case X:
|
case X:
|
||||||
{
|
{
|
||||||
int numSinceMismatch = -1;
|
|
||||||
int stopOfMismatch = -1;
|
|
||||||
int startOfMismatch = -1;
|
|
||||||
int refPosStartOfMismatch = -1;
|
|
||||||
for( int iii = 0; iii < elementLength; iii++ ) {
|
for( int iii = 0; iii < elementLength; iii++ ) {
|
||||||
if( ref[refPos] != alignment[alignmentPos] && alignment[alignmentPos] != ((byte) 'N') ) {
|
final byte refByte = ref[refPos];
|
||||||
// SNP or start of possible MNP
|
final byte altByte = alignment[alignmentPos];
|
||||||
if( stopOfMismatch == -1 ) {
|
if( refByte != altByte ) { // SNP!
|
||||||
startOfMismatch = alignmentPos;
|
if( BaseUtils.isRegularBase(refByte) && BaseUtils.isRegularBase(altByte) ) {
|
||||||
stopOfMismatch = alignmentPos;
|
|
||||||
numSinceMismatch = 0;
|
|
||||||
refPosStartOfMismatch = refPos;
|
|
||||||
} else {
|
|
||||||
stopOfMismatch = alignmentPos;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if( stopOfMismatch != -1) {
|
|
||||||
numSinceMismatch++;
|
|
||||||
}
|
|
||||||
if( numSinceMismatch > MNP_LOOK_AHEAD || (iii == elementLength - 1 && stopOfMismatch != -1) ) {
|
|
||||||
final byte[] refBases = Arrays.copyOfRange( ref, refPosStartOfMismatch, refPosStartOfMismatch + (stopOfMismatch - startOfMismatch) + 1 );
|
|
||||||
final byte[] mismatchBases = Arrays.copyOfRange( alignment, startOfMismatch, stopOfMismatch + 1 );
|
|
||||||
if( BaseUtils.isAllRegularBases(refBases) && BaseUtils.isAllRegularBases(mismatchBases) ) {
|
|
||||||
final ArrayList<Allele> snpAlleles = new ArrayList<Allele>();
|
final ArrayList<Allele> snpAlleles = new ArrayList<Allele>();
|
||||||
snpAlleles.add( Allele.create( refBases, true ) );
|
snpAlleles.add( Allele.create( refByte, true ) );
|
||||||
snpAlleles.add( Allele.create( mismatchBases, false ) );
|
snpAlleles.add( Allele.create( altByte, false ) );
|
||||||
final int snpStart = refLoc.getStart() + refPosStartOfMismatch;
|
vcs.put(refLoc.getStart() + refPos, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), refLoc.getStart() + refPos, refLoc.getStart() + refPos, snpAlleles).make());
|
||||||
vcs.put(snpStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), snpStart, snpStart + (stopOfMismatch - startOfMismatch), snpAlleles).make());
|
|
||||||
}
|
}
|
||||||
numSinceMismatch = -1;
|
|
||||||
stopOfMismatch = -1;
|
|
||||||
startOfMismatch = -1;
|
|
||||||
refPosStartOfMismatch = -1;
|
|
||||||
}
|
}
|
||||||
refPos++;
|
refPos++;
|
||||||
alignmentPos++;
|
alignmentPos++;
|
||||||
|
|
|
||||||
|
|
@ -119,10 +119,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
@Argument(fullName="keepRG", shortName="keepRG", doc="Only use read from this read group when making calls (but use all reads to build the assembly)", required = false)
|
@Argument(fullName="keepRG", shortName="keepRG", doc="Only use read from this read group when making calls (but use all reads to build the assembly)", required = false)
|
||||||
protected String keepRG = null;
|
protected String keepRG = null;
|
||||||
|
|
||||||
@Hidden
|
|
||||||
@Argument(fullName="mnpLookAhead", shortName="mnpLookAhead", doc = "The number of bases to combine together to form MNPs out of nearby consecutive SNPs on the same haplotype", required = false)
|
|
||||||
protected int MNP_LOOK_AHEAD = 0;
|
|
||||||
|
|
||||||
@Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false)
|
@Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false)
|
||||||
protected int MIN_PRUNE_FACTOR = 1;
|
protected int MIN_PRUNE_FACTOR = 1;
|
||||||
|
|
||||||
|
|
@ -135,7 +131,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
protected boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE = false;
|
protected boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE = false;
|
||||||
|
|
||||||
@Advanced
|
@Advanced
|
||||||
@Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Gap continuation penalty for use in the Pair HMM", required = false)
|
@Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Flat gap continuation penalty for use in the Pair HMM", required = false)
|
||||||
protected int gcpHMM = 10;
|
protected int gcpHMM = 10;
|
||||||
|
|
||||||
@Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false)
|
@Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false)
|
||||||
|
|
@ -189,21 +185,21 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
@ArgumentCollection
|
@ArgumentCollection
|
||||||
private StandardCallerArgumentCollection SCAC = new StandardCallerArgumentCollection();
|
private StandardCallerArgumentCollection SCAC = new StandardCallerArgumentCollection();
|
||||||
|
|
||||||
// the calculation arguments
|
|
||||||
private UnifiedGenotyperEngine UG_engine = null;
|
|
||||||
private UnifiedGenotyperEngine UG_engine_simple_genotyper = null;
|
|
||||||
|
|
||||||
@Argument(fullName="debug", shortName="debug", doc="If specified, print out very verbose debug information about each triggering active region", required = false)
|
@Argument(fullName="debug", shortName="debug", doc="If specified, print out very verbose debug information about each triggering active region", required = false)
|
||||||
protected boolean DEBUG;
|
protected boolean DEBUG;
|
||||||
|
|
||||||
|
// the UG engines
|
||||||
|
private UnifiedGenotyperEngine UG_engine = null;
|
||||||
|
private UnifiedGenotyperEngine UG_engine_simple_genotyper = null;
|
||||||
|
|
||||||
// the assembly engine
|
// the assembly engine
|
||||||
LocalAssemblyEngine assemblyEngine = null;
|
private LocalAssemblyEngine assemblyEngine = null;
|
||||||
|
|
||||||
// the likelihoods engine
|
// the likelihoods engine
|
||||||
LikelihoodCalculationEngine likelihoodCalculationEngine = null;
|
private LikelihoodCalculationEngine likelihoodCalculationEngine = null;
|
||||||
|
|
||||||
// the genotyping engine
|
// the genotyping engine
|
||||||
GenotypingEngine genotypingEngine = null;
|
private GenotypingEngine genotypingEngine = null;
|
||||||
|
|
||||||
// the annotation engine
|
// the annotation engine
|
||||||
private VariantAnnotatorEngine annotationEngine;
|
private VariantAnnotatorEngine annotationEngine;
|
||||||
|
|
@ -289,7 +285,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
|
|
||||||
assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter );
|
assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter );
|
||||||
likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, false );
|
likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, false );
|
||||||
genotypingEngine = new GenotypingEngine( DEBUG, MNP_LOOK_AHEAD, OUTPUT_FULL_HAPLOTYPE_SEQUENCE );
|
genotypingEngine = new GenotypingEngine( DEBUG, OUTPUT_FULL_HAPLOTYPE_SEQUENCE );
|
||||||
}
|
}
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.walkers.Reference;
|
||||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Window;
|
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.Haplotype;
|
||||||
import org.broadinstitute.sting.utils.SWPairwiseAlignment;
|
import org.broadinstitute.sting.utils.SWPairwiseAlignment;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||||
|
|
@ -337,8 +338,8 @@ public class HaplotypeResolver extends RodWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// order results by start position
|
// order results by start position
|
||||||
final TreeMap<Integer, VariantContext> source1Map = new TreeMap<Integer, VariantContext>(GenotypingEngine.generateVCsFromAlignment(0, swConsensus1.getCigar(), refContext.getBases(), source1Haplotype, refContext.getWindow(), source1, 0));
|
final TreeMap<Integer, VariantContext> source1Map = new TreeMap<Integer, VariantContext>(GenotypingEngine.generateVCsFromAlignment(new Haplotype(source1Haplotype), 0, swConsensus1.getCigar(), refContext.getBases(), source1Haplotype, refContext.getWindow(), source1));
|
||||||
final TreeMap<Integer, VariantContext> source2Map = new TreeMap<Integer, VariantContext>(GenotypingEngine.generateVCsFromAlignment(0, swConsensus2.getCigar(), refContext.getBases(), source2Haplotype, refContext.getWindow(), source2, 0));
|
final TreeMap<Integer, VariantContext> source2Map = new TreeMap<Integer, VariantContext>(GenotypingEngine.generateVCsFromAlignment(new Haplotype(source2Haplotype), 0, swConsensus2.getCigar(), refContext.getBases(), source2Haplotype, refContext.getWindow(), source2));
|
||||||
if ( source1Map.size() == 0 || source2Map.size() == 0 ) {
|
if ( source1Map.size() == 0 || source2Map.size() == 0 ) {
|
||||||
// TODO -- handle errors appropriately
|
// TODO -- handle errors appropriately
|
||||||
logger.debug("No source alleles; aborting at " + refContext.getLocus());
|
logger.debug("No source alleles; aborting at " + refContext.getLocus());
|
||||||
|
|
|
||||||
|
|
@ -293,7 +293,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
||||||
final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() );
|
final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() );
|
||||||
if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
|
if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
|
||||||
if( !activeAllelesToGenotype.isEmpty() ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
|
if( !activeAllelesToGenotype.isEmpty() ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
|
||||||
final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place
|
final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly" ); // BUGBUG: need to put this function in a shared place
|
||||||
for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
|
for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
|
||||||
final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart());
|
final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart());
|
||||||
if( vcOnHaplotype == null || !vcOnHaplotype.hasSameAllelesAs(compVC) ) {
|
if( vcOnHaplotype == null || !vcOnHaplotype.hasSameAllelesAs(compVC) ) {
|
||||||
|
|
|
||||||
|
|
@ -142,7 +142,6 @@ public class GenotypingEngineUnitTest extends BaseTest {
|
||||||
byte[] ref;
|
byte[] ref;
|
||||||
byte[] hap;
|
byte[] hap;
|
||||||
HashMap<Integer,Byte> expected;
|
HashMap<Integer,Byte> expected;
|
||||||
GenotypingEngine ge = new GenotypingEngine(false, 0, false);
|
|
||||||
|
|
||||||
public BasicGenotypingTestProvider(String refString, String hapString, HashMap<Integer, Byte> expected) {
|
public BasicGenotypingTestProvider(String refString, String hapString, HashMap<Integer, Byte> expected) {
|
||||||
super(BasicGenotypingTestProvider.class, String.format("Haplotype to VCF test: ref = %s, alignment = %s", refString,hapString));
|
super(BasicGenotypingTestProvider.class, String.format("Haplotype to VCF test: ref = %s, alignment = %s", refString,hapString));
|
||||||
|
|
@ -153,7 +152,7 @@ public class GenotypingEngineUnitTest extends BaseTest {
|
||||||
|
|
||||||
public HashMap<Integer,VariantContext> calcAlignment() {
|
public HashMap<Integer,VariantContext> calcAlignment() {
|
||||||
final SWPairwiseAlignment alignment = new SWPairwiseAlignment(ref, hap);
|
final SWPairwiseAlignment alignment = new SWPairwiseAlignment(ref, hap);
|
||||||
return ge.generateVCsFromAlignment( alignment.getAlignmentStart2wrt1(), alignment.getCigar(), ref, hap, genomeLocParser.createGenomeLoc("4",1,1+ref.length), "name", 0);
|
return GenotypingEngine.generateVCsFromAlignment( new Haplotype(hap), alignment.getAlignmentStart2wrt1(), alignment.getCigar(), ref, hap, genomeLocParser.createGenomeLoc("4",1,1+ref.length), "name");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,9 +8,10 @@ import java.util.Arrays;
|
||||||
public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
final static String REF = b37KGReference;
|
final static String REF = b37KGReference;
|
||||||
final String NA12878_BAM = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.bam";
|
final String NA12878_BAM = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.bam";
|
||||||
|
final String NA12878_CHR20_BAM = validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam";
|
||||||
final String CEUTRIO_BAM = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam";
|
final String CEUTRIO_BAM = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam";
|
||||||
|
final String NA12878_RECALIBRATED_BAM = privateTestDir + "NA12878.100kb.BQSRv2.example.bam";
|
||||||
final String INTERVALS_FILE = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals";
|
final String INTERVALS_FILE = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals";
|
||||||
//final String RECAL_FILE = validationDataLocation + "NA12878.kmer.8.subset.recal_data.bqsr";
|
|
||||||
|
|
||||||
private void HCTest(String bam, String args, String md5) {
|
private void HCTest(String bam, String args, String md5) {
|
||||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s -L %s", REF, bam, INTERVALS_FILE) + " --no_cmdline_in_header -o %s -minPruning 3";
|
final String base = String.format("-T HaplotypeCaller -R %s -I %s -L %s", REF, bam, INTERVALS_FILE) + " --no_cmdline_in_header -o %s -minPruning 3";
|
||||||
|
|
@ -34,14 +35,35 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void HCTestComplexVariants(String bam, String args, String md5) {
|
private void HCTestComplexVariants(String bam, String args, String md5) {
|
||||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 3";
|
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10028767-10028967 -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 2";
|
||||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||||
executeTest("testHaplotypeCallerComplexVariants: args=" + args, spec);
|
executeTest("testHaplotypeCallerComplexVariants: args=" + args, spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerMultiSampleComplex() {
|
public void testHaplotypeCallerMultiSampleComplex() {
|
||||||
HCTestComplexVariants(CEUTRIO_BAM, "", "316a0fb9c66c0a6aa40a170d5d8c0021");
|
HCTestComplexVariants(CEUTRIO_BAM, "", "3424b398a9f47c8ac606a5c56eb7d8a7");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||||
|
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:5947969-5948369 -L 20:61091236-61091636 --no_cmdline_in_header -o %s -minPruning 2";
|
||||||
|
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||||
|
executeTest("testHaplotypeCallerSymbolicVariants: args=" + args, spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHaplotypeCallerSingleSampleSymbolic() {
|
||||||
|
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "b71cfaea9390136c584c9671b149d573");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void HCTestIndelQualityScores(String bam, String args, String md5) {
|
||||||
|
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10,005,000-10,025,000 --no_cmdline_in_header -o %s -minPruning 2";
|
||||||
|
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||||
|
executeTest("testHaplotypeCallerIndelQualityScores: args=" + args, spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
|
||||||
|
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "e1f88fac91424740c0eaac1de48b3970");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,8 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,6 @@ import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
|
|
|
||||||
|
|
@ -265,7 +265,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
@Argument(fullName="restrictAllelesTo", shortName="restrictAllelesTo", doc="Select only variants of a particular allelicity. Valid options are ALL (default), MULTIALLELIC or BIALLELIC", required=false)
|
@Argument(fullName="restrictAllelesTo", shortName="restrictAllelesTo", doc="Select only variants of a particular allelicity. Valid options are ALL (default), MULTIALLELIC or BIALLELIC", required=false)
|
||||||
private NumberAlleleRestriction alleleRestriction = NumberAlleleRestriction.ALL;
|
private NumberAlleleRestriction alleleRestriction = NumberAlleleRestriction.ALL;
|
||||||
|
|
||||||
@Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Don't update the AC, AF, or AN values in the INFO field after selecting", required=false)
|
@Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Store the original AC, AF, and AN values in the INFO field after selecting (using keys AC_Orig, AF_Orig, and AN_Orig)", required=false)
|
||||||
private boolean KEEP_ORIGINAL_CHR_COUNTS = false;
|
private boolean KEEP_ORIGINAL_CHR_COUNTS = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package org.broadinstitute.sting.utils.activeregion;
|
package org.broadinstitute.sting.utils.activeregion;
|
||||||
|
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
import net.sf.samtools.util.StringUtil;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||||
|
|
@ -58,9 +59,7 @@ public class ActiveRegion implements HasGenomeLocation {
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
|
public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
|
||||||
return referenceReader.getSubsequenceAt( extendedLoc.getContig(),
|
return getReference( referenceReader, padding, extendedLoc );
|
||||||
Math.max(1, extendedLoc.getStart() - padding),
|
|
||||||
Math.min(referenceReader.getSequenceDictionary().getSequence(extendedLoc.getContig()).getSequenceLength(), extendedLoc.getStop() + padding) ).getBases();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader ) {
|
public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader ) {
|
||||||
|
|
@ -68,11 +67,16 @@ public class ActiveRegion implements HasGenomeLocation {
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
|
public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
|
||||||
return referenceReader.getSubsequenceAt( fullExtentReferenceLoc.getContig(),
|
return getReference( referenceReader, padding, fullExtentReferenceLoc );
|
||||||
Math.max(1, fullExtentReferenceLoc.getStart() - padding),
|
|
||||||
Math.min(referenceReader.getSequenceDictionary().getSequence(fullExtentReferenceLoc.getContig()).getSequenceLength(), fullExtentReferenceLoc.getStop() + padding) ).getBases();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private byte[] getReference( final IndexedFastaSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) {
|
||||||
|
final byte[] reference = referenceReader.getSubsequenceAt( genomeLoc.getContig(),
|
||||||
|
Math.max(1, genomeLoc.getStart() - padding),
|
||||||
|
Math.min(referenceReader.getSequenceDictionary().getSequence(genomeLoc.getContig()).getSequenceLength(), genomeLoc.getStop() + padding) ).getBases();
|
||||||
|
StringUtil.toUpperCase(reference);
|
||||||
|
return reference;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public GenomeLoc getLocation() { return activeRegionLoc; }
|
public GenomeLoc getLocation() { return activeRegionLoc; }
|
||||||
|
|
|
||||||
|
|
@ -94,6 +94,7 @@ public class VariantContextBuilder {
|
||||||
this.start = start;
|
this.start = start;
|
||||||
this.stop = stop;
|
this.stop = stop;
|
||||||
this.alleles = alleles;
|
this.alleles = alleles;
|
||||||
|
this.attributes = Collections.emptyMap(); // immutable
|
||||||
toValidate.add(VariantContext.Validation.ALLELES);
|
toValidate.add(VariantContext.Validation.ALLELES);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -750,6 +750,10 @@ public class VariantContextUnitTest extends BaseTest {
|
||||||
modified = new VariantContextBuilder(modified).attributes(null).attribute("AC", 1).make();
|
modified = new VariantContextBuilder(modified).attributes(null).attribute("AC", 1).make();
|
||||||
Assert.assertEquals(modified.getAttribute("AC"), 1);
|
Assert.assertEquals(modified.getAttribute("AC"), 1);
|
||||||
|
|
||||||
|
// test the behavior when the builder's attribute object is not initialized
|
||||||
|
modified = new VariantContextBuilder(modified.getSource(), modified.getChr(), modified.getStart(), modified.getEnd(), modified.getAlleles()).attribute("AC", 1).make();
|
||||||
|
|
||||||
|
// test normal attribute modification
|
||||||
modified = new VariantContextBuilder(cfg.vc).attribute("AC", 1).make();
|
modified = new VariantContextBuilder(cfg.vc).attribute("AC", 1).make();
|
||||||
Assert.assertEquals(modified.getAttribute("AC"), 1);
|
Assert.assertEquals(modified.getAttribute("AC"), 1);
|
||||||
modified = new VariantContextBuilder(modified).attribute("AC", 2).make();
|
modified = new VariantContextBuilder(modified).attribute("AC", 2).make();
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,7 @@ my %ref_order;
|
||||||
my $n = 0;
|
my $n = 0;
|
||||||
while ( <DICT> ) {
|
while ( <DICT> ) {
|
||||||
chomp;
|
chomp;
|
||||||
my ($contig, $rest) = split "\t";
|
my ($contig, $rest) = split '\s';
|
||||||
die("Dictionary file is probably corrupt: multiple instances of contig $contig") if ( defined $ref_order{$contig} );
|
die("Dictionary file is probably corrupt: multiple instances of contig $contig") if ( defined $ref_order{$contig} );
|
||||||
|
|
||||||
$ref_order{$contig} = $n;
|
$ref_order{$contig} = $n;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue