Merge pull request #29 from broadinstitute/gda_gga_hc_GSA-722
Gda gga hc gsa 722
This commit is contained in:
commit
961f2533a5
|
|
@ -79,7 +79,25 @@ public class GenotypingEngine {
|
|||
noCall.add(Allele.NO_CALL);
|
||||
}
|
||||
|
||||
/**
|
||||
* Main entry point of class - given a particular set of haplotypes, samples and reference context, compute
|
||||
* genotype likelihoods and assemble into a list of variant contexts and genomic events ready for calling
|
||||
*
|
||||
* @param UG_engine UG Engine with basic input parameters
|
||||
* @param haplotypes Haplotypes to assign likelihoods to
|
||||
* @param samples Samples to genotype
|
||||
* @param haplotypeReadMap Map from reads->(haplotypes,likelihoods)
|
||||
* @param perSampleFilteredReadList
|
||||
* @param ref Reference bytes at active region
|
||||
* @param refLoc Corresponding active region genome location
|
||||
* @param activeRegionWindow Active window
|
||||
* @param genomeLocParser GenomeLocParser
|
||||
* @param activeAllelesToGenotype Alleles to genotype
|
||||
* @return List of VC's with genotyped events
|
||||
*/
|
||||
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
|
||||
@Ensures("result != null")
|
||||
// TODO - can this be refactored? this is hard to follow!
|
||||
public List<VariantContext> assignGenotypeLikelihoods( final UnifiedGenotyperEngine UG_engine,
|
||||
final List<Haplotype> haplotypes,
|
||||
final List<String> samples,
|
||||
|
|
@ -90,6 +108,25 @@ public class GenotypingEngine {
|
|||
final GenomeLoc activeRegionWindow,
|
||||
final GenomeLocParser genomeLocParser,
|
||||
final List<VariantContext> activeAllelesToGenotype ) {
|
||||
// sanity check input arguments
|
||||
if (UG_engine == null)
|
||||
throw new IllegalArgumentException("UG_Engine input can't be null, got "+UG_engine);
|
||||
if (haplotypes == null || haplotypes.isEmpty())
|
||||
throw new IllegalArgumentException("haplotypes input should be non-empty and non-null, got "+haplotypes);
|
||||
if (samples == null || samples.isEmpty())
|
||||
throw new IllegalArgumentException("samples input must be non-empty and non-null, got "+samples);
|
||||
if (haplotypeReadMap == null || haplotypeReadMap.isEmpty())
|
||||
throw new IllegalArgumentException("haplotypeReadMap input should be non-empty and non-null, got "+haplotypeReadMap);
|
||||
if (ref == null || ref.length == 0 )
|
||||
throw new IllegalArgumentException("ref bytes input should be non-empty and non-null, got "+ref);
|
||||
if (refLoc == null || refLoc.getStop()-refLoc.getStart()+1 != ref.length)
|
||||
throw new IllegalArgumentException(" refLoc must be non-null and length must match ref bytes, got "+refLoc);
|
||||
if (activeRegionWindow == null )
|
||||
throw new IllegalArgumentException("activeRegionWindow must be non-null, got "+activeRegionWindow);
|
||||
if (activeAllelesToGenotype == null )
|
||||
throw new IllegalArgumentException("activeAllelesToGenotype must be non-null, got "+activeAllelesToGenotype);
|
||||
if (genomeLocParser == null )
|
||||
throw new IllegalArgumentException("genomeLocParser must be non-null, got "+genomeLocParser);
|
||||
|
||||
final List<VariantContext> returnCalls = new ArrayList<VariantContext>();
|
||||
final boolean in_GGA_mode = !activeAllelesToGenotype.isEmpty();
|
||||
|
|
@ -180,7 +217,7 @@ public class GenotypingEngine {
|
|||
if( eventsAtThisLoc.size() != mergedVC.getAlternateAlleles().size() ) {
|
||||
throw new ReviewedStingException("Record size mismatch! Something went wrong in the merging of alleles.");
|
||||
}
|
||||
final Map<VariantContext, Allele> mergeMap = new HashMap<VariantContext, Allele>();
|
||||
final Map<VariantContext, Allele> mergeMap = new LinkedHashMap<VariantContext, Allele>();
|
||||
mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele
|
||||
for(int iii = 0; iii < mergedVC.getAlternateAlleles().size(); iii++) {
|
||||
mergeMap.put(eventsAtThisLoc.get(iii), mergedVC.getAlternateAllele(iii)); // BUGBUG: This is assuming that the order of alleles is the same as the priority list given to simpleMerge function
|
||||
|
|
@ -214,6 +251,15 @@ public class GenotypingEngine {
|
|||
return returnCalls;
|
||||
}
|
||||
|
||||
/**
|
||||
* For a particular event described in inputVC, form PL vector for each sample by looking into allele read map and filling likelihood matrix for each allele
|
||||
* @param samples List of samples to genotype
|
||||
* @param alleleReadMap Allele map describing mapping from reads to alleles and corresponding likelihoods
|
||||
* @param mergedVC Input VC with event to genotype
|
||||
* @return GenotypesContext object wrapping genotype objects with PLs
|
||||
*/
|
||||
@Requires({"samples != null","alleleReadMap!= null", "mergedVC != null"})
|
||||
@Ensures("result != null")
|
||||
private GenotypesContext calculateGLsForThisEvent( final List<String> samples, final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap, final VariantContext mergedVC ) {
|
||||
final GenotypesContext genotypes = GenotypesContext.create(samples.size());
|
||||
// Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample
|
||||
|
|
@ -254,7 +300,7 @@ public class GenotypingEngine {
|
|||
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
|
||||
final VariantContext call ) {
|
||||
|
||||
final Map<String, PerReadAlleleLikelihoodMap> returnMap = new HashMap<String, PerReadAlleleLikelihoodMap>();
|
||||
final Map<String, PerReadAlleleLikelihoodMap> returnMap = new LinkedHashMap<String, PerReadAlleleLikelihoodMap>();
|
||||
final GenomeLoc callLoc = parser.createGenomeLoc(call);
|
||||
for( final Map.Entry<String, PerReadAlleleLikelihoodMap> sample : perSampleReadMap.entrySet() ) {
|
||||
final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap();
|
||||
|
|
@ -283,6 +329,12 @@ public class GenotypingEngine {
|
|||
return returnMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes symbolic events from list of haplotypes
|
||||
* @param haplotypes Input/output list of haplotypes, before/after removal
|
||||
*/
|
||||
// TODO - split into input haplotypes and output haplotypes as not to share I/O arguments
|
||||
@Requires("haplotypes != null")
|
||||
protected static void cleanUpSymbolicUnassembledEvents( final List<Haplotype> haplotypes ) {
|
||||
final List<Haplotype> haplotypesToRemove = new ArrayList<Haplotype>();
|
||||
for( final Haplotype h : haplotypes ) {
|
||||
|
|
@ -308,7 +360,7 @@ public class GenotypingEngine {
|
|||
final double downsamplingFraction,
|
||||
final PrintStream downsamplingLog ) {
|
||||
|
||||
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = new HashMap<String, PerReadAlleleLikelihoodMap>();
|
||||
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = new LinkedHashMap<String, PerReadAlleleLikelihoodMap>();
|
||||
for( final Map.Entry<String, PerReadAlleleLikelihoodMap> haplotypeReadMapEntry : haplotypeReadMap.entrySet() ) { // for each sample
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = new PerReadAlleleLikelihoodMap();
|
||||
for( final Map.Entry<Allele, List<Haplotype>> alleleMapperEntry : alleleMapper.entrySet() ) { // for each output allele
|
||||
|
|
@ -330,6 +382,15 @@ public class GenotypingEngine {
|
|||
return alleleReadMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* TODO - comment me, clean me, refactor me!
|
||||
* @param haplotypes
|
||||
* @param samples
|
||||
* @param haplotypeReadMap
|
||||
* @param startPosKeySet
|
||||
* @param ref
|
||||
* @param refLoc
|
||||
*/
|
||||
protected void mergeConsecutiveEventsBasedOnLD( final List<Haplotype> haplotypes,
|
||||
final List<String> samples,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> haplotypeReadMap,
|
||||
|
|
@ -474,7 +535,7 @@ public class GenotypingEngine {
|
|||
}
|
||||
|
||||
protected static Map<Allele, List<Haplotype>> createAlleleMapper( final Map<VariantContext, Allele> mergeMap, final Map<Event, List<Haplotype>> eventMap ) {
|
||||
final Map<Allele, List<Haplotype>> alleleMapper = new HashMap<Allele, List<Haplotype>>();
|
||||
final Map<Allele, List<Haplotype>> alleleMapper = new LinkedHashMap<Allele, List<Haplotype>>();
|
||||
for( final Map.Entry<VariantContext, Allele> entry : mergeMap.entrySet() ) {
|
||||
alleleMapper.put(entry.getValue(), eventMap.get(new Event(entry.getKey())));
|
||||
}
|
||||
|
|
@ -485,7 +546,7 @@ public class GenotypingEngine {
|
|||
@Ensures({"result.size() == eventsAtThisLoc.size() + 1"})
|
||||
protected static Map<Event, List<Haplotype>> createEventMapper( final int loc, final List<VariantContext> eventsAtThisLoc, final List<Haplotype> haplotypes ) {
|
||||
|
||||
final Map<Event, List<Haplotype>> eventMapper = new HashMap<Event, List<Haplotype>>(eventsAtThisLoc.size()+1);
|
||||
final Map<Event, List<Haplotype>> eventMapper = new LinkedHashMap<Event, List<Haplotype>>(eventsAtThisLoc.size()+1);
|
||||
VariantContext refVC = eventsAtThisLoc.get(0); // the genome loc is the only safe thing to pull out of this VC because ref/alt pairs might change reference basis
|
||||
eventMapper.put(new Event(null), new ArrayList<Haplotype>());
|
||||
for( final VariantContext vc : eventsAtThisLoc ) {
|
||||
|
|
@ -598,7 +659,7 @@ public class GenotypingEngine {
|
|||
}
|
||||
|
||||
protected static Map<Integer,VariantContext> generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd ) {
|
||||
final Map<Integer,VariantContext> vcs = new HashMap<Integer,VariantContext>();
|
||||
final Map<Integer,VariantContext> vcs = new LinkedHashMap<Integer,VariantContext>();
|
||||
|
||||
int refPos = alignmentStartHapwrtRef;
|
||||
if( refPos < 0 ) { return null; } // Protection against SW failures
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "", "042b76d4ba0c8f76e2e9cadd1c20d90d");
|
||||
HCTest(CEUTRIO_BAM, "", "1e49fd927d79594a993ea6c4a1d10004");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -99,7 +99,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
"76d4c4a112cf60080adf74c3e116d1fb");
|
||||
}
|
||||
|
||||
@Test(enabled = false) // TODO -- https://jira.broadinstitute.org/browse/GSA-722
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
|
||||
"23a4bfa0300683d8cf2ec16ce96e89ad");
|
||||
|
|
@ -146,7 +146,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void HCTestProblematicReadsModifiedInActiveRegions() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("866406b43d22a262b2d852e7252eb430"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("598d245498c0d0b55e263f0a061a77e3"));
|
||||
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -64,15 +64,13 @@ public class PerReadAlleleLikelihoodMap {
|
|||
if ( a == null ) throw new IllegalArgumentException("Cannot add a null allele to the allele likelihood map");
|
||||
if ( likelihood == null ) throw new IllegalArgumentException("Likelihood cannot be null");
|
||||
if ( likelihood > 0.0 ) throw new IllegalArgumentException("Likelihood must be negative (L = log(p))");
|
||||
Map<Allele,Double> likelihoodMap;
|
||||
if (likelihoodReadMap.containsKey(read)){
|
||||
// seen pileup element before
|
||||
likelihoodMap = likelihoodReadMap.get(read);
|
||||
}
|
||||
else {
|
||||
likelihoodMap = new HashMap<Allele, Double>();
|
||||
likelihoodReadMap.put(read,likelihoodMap);
|
||||
Map<Allele,Double> likelihoodMap = likelihoodReadMap.get(read);
|
||||
if (likelihoodMap == null){
|
||||
// LinkedHashMap will ensure iterating through alleles will be in consistent order
|
||||
likelihoodMap = new LinkedHashMap<Allele, Double>();
|
||||
}
|
||||
likelihoodReadMap.put(read,likelihoodMap);
|
||||
|
||||
likelihoodMap.put(a,likelihood);
|
||||
|
||||
if (!alleles.contains(a))
|
||||
|
|
@ -221,4 +219,27 @@ public class PerReadAlleleLikelihoodMap {
|
|||
}
|
||||
return (maxLike - prevMaxLike > INFORMATIVE_LIKELIHOOD_THRESHOLD ? mostLikelyAllele : Allele.NO_CALL );
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Debug method to dump contents of object into string for display
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
sb.append("Alelles in map:");
|
||||
for (Allele a:alleles) {
|
||||
sb.append(a.getDisplayString()+",");
|
||||
|
||||
}
|
||||
sb.append("\n");
|
||||
for (Map.Entry <GATKSAMRecord, Map<Allele, Double>> el : getLikelihoodReadMap().entrySet() ) {
|
||||
for (Map.Entry<Allele,Double> eli : el.getValue().entrySet()) {
|
||||
sb.append("Read "+el.getKey().getReadName()+". Allele:"+eli.getKey().getDisplayString()+" has likelihood="+Double.toString(eli.getValue())+"\n");
|
||||
}
|
||||
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue