Merge branch 'master' of ssh://gsa4/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Guillermo del Angel 2012-09-10 09:56:47 -04:00
commit 10c720cbba
5 changed files with 35 additions and 15 deletions

View File

@ -52,7 +52,11 @@ public class GenotypingEngine {
noCall.add(Allele.NO_CALL);
}
// This function is the streamlined approach, currently not being used
// WARN
// This function is the streamlined approach, currently not being used by default
// WARN
// WARN: This function is currently only being used by Menachem. Slated for removal/merging with the rest of the code.
// WARN
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallHaplotypeEvents( final UnifiedGenotyperEngine UG_engine,
final ArrayList<Haplotype> haplotypes,
@ -184,6 +188,7 @@ public class GenotypingEngine {
return returnCalls;
}
// BUGBUG: Create a class to hold this complicated return type
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine,
final ArrayList<Haplotype> haplotypes,
@ -210,13 +215,8 @@ public class GenotypingEngine {
System.out.println( ">> Events = " + h.getEventMap());
}
}
// Create the VC merge priority list
final ArrayList<String> priorityList = new ArrayList<String>();
for( int iii = 0; iii < haplotypes.size(); iii++ ) {
priorityList.add("HC" + iii);
}
cleanUpSymbolicUnassembledEvents( haplotypes, priorityList );
cleanUpSymbolicUnassembledEvents( haplotypes );
if( activeAllelesToGenotype.isEmpty() && haplotypes.get(0).getSampleKeySet().size() >= 3 ) { // if not in GGA mode and have at least 3 samples try to create MNP and complex events by looking at LD structure
mergeConsecutiveEventsBasedOnLD( haplotypes, startPosKeySet, ref, refLoc );
}
@ -229,13 +229,16 @@ public class GenotypingEngine {
// Walk along each position in the key set and create each event to be outputted
for( final int loc : startPosKeySet ) {
if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) {
final ArrayList<VariantContext> eventsAtThisLoc = new ArrayList<VariantContext>();
final ArrayList<VariantContext> eventsAtThisLoc = new ArrayList<VariantContext>(); // the overlapping events to merge into a common reference view
final ArrayList<String> priorityList = new ArrayList<String>(); // used to merge overlapping events into common reference view
if( activeAllelesToGenotype.isEmpty() ) {
for( final Haplotype h : haplotypes ) {
final HashMap<Integer,VariantContext> eventMap = h.getEventMap();
final VariantContext vc = eventMap.get(loc);
if( vc != null && !containsVCWithMatchingAlleles(eventsAtThisLoc, vc) ) {
eventsAtThisLoc.add(vc);
priorityList.add(vc.getSource());
}
}
} else { // we are in GGA mode!
@ -260,6 +263,22 @@ public class GenotypingEngine {
// Create the allele mapping object which maps the original haplotype alleles to the alleles present in just this event
final ArrayList<ArrayList<Haplotype>> alleleMapper = createAlleleMapper( loc, eventsAtThisLoc, haplotypes );
// Sanity check the priority list
for( final VariantContext vc : eventsAtThisLoc ) {
if( !priorityList.contains(vc.getSource()) ) {
throw new ReviewedStingException("Event found on haplotype that wasn't added to priority list. Something went wrong in the merging of alleles.");
}
}
for( final String name : priorityList ) {
boolean found = false;
for( final VariantContext vc : eventsAtThisLoc ) {
if(vc.getSource().equals(name)) { found = true; break; }
}
if( !found ) {
throw new ReviewedStingException("Event added to priority list but wasn't found on any haplotype. Something went wrong in the merging of alleles.");
}
}
// Merge the event to find a common reference representation
final VariantContext mergedVC = VariantContextUtils.simpleMerge(genomeLocParser, eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
if( mergedVC == null ) { continue; }
@ -299,9 +318,8 @@ public class GenotypingEngine {
return returnCalls;
}
protected static void cleanUpSymbolicUnassembledEvents( final ArrayList<Haplotype> haplotypes, final ArrayList<String> priorityList ) {
protected static void cleanUpSymbolicUnassembledEvents( final ArrayList<Haplotype> haplotypes ) {
final ArrayList<Haplotype> haplotypesToRemove = new ArrayList<Haplotype>();
final ArrayList<String> stringsToRemove = new ArrayList<String>();
for( final Haplotype h : haplotypes ) {
for( final VariantContext vc : h.getEventMap().values() ) {
if( vc.isSymbolic() ) {
@ -309,7 +327,6 @@ public class GenotypingEngine {
for( final VariantContext vc2 : h2.getEventMap().values() ) {
if( vc.getStart() == vc2.getStart() && vc2.isIndel() ) {
haplotypesToRemove.add(h);
stringsToRemove.add(vc.getSource());
break;
}
}
@ -318,7 +335,6 @@ public class GenotypingEngine {
}
}
haplotypes.removeAll(haplotypesToRemove);
priorityList.removeAll(stringsToRemove);
}
protected void mergeConsecutiveEventsBasedOnLD( final ArrayList<Haplotype> haplotypes, final TreeSet<Integer> startPosKeySet, final byte[] ref, final GenomeLoc refLoc ) {

View File

@ -16,7 +16,7 @@ import java.util.*;
/**
* The u-based z-approximation from the Mann-Whitney Rank Sum Test for base qualities (ref bases vs. bases of the alternate allele).
* Note that the base quality rank sum test can not be calculated for homozygous sites.
* Note that the base quality rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles.
*/
public class BaseQualityRankSumTest extends RankSumTest implements StandardAnnotation {
public List<String> getKeyNames() { return Arrays.asList("BaseQRankSum"); }

View File

@ -16,6 +16,10 @@ import java.util.*;
* Date: 6/28/12
*/
/**
* The u-based z-approximation from the Mann-Whitney Rank Sum Test for reads with clipped bases (reads with ref bases vs. those with the alternate allele)
* Note that the clipping rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles.
*/
public class ClippingRankSumTest extends RankSumTest {
public List<String> getKeyNames() { return Arrays.asList("ClippingRankSum"); }

View File

@ -17,7 +17,7 @@ import java.util.*;
/**
* The u-based z-approximation from the Mann-Whitney Rank Sum Test for mapping qualities (reads with ref bases vs. those with the alternate allele)
* Note that the mapping quality rank sum test can not be calculated for homozygous sites.
* Note that the mapping quality rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles.
*/
public class MappingQualityRankSumTest extends RankSumTest implements StandardAnnotation {

View File

@ -20,7 +20,7 @@ import java.util.*;
/**
* The u-based z-approximation from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele; if the alternate allele is only seen near the ends of reads this is indicative of error).
* Note that the read position rank sum test can not be calculated for homozygous sites.
* Note that the read position rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles.
*/
public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotation {