- Calculation of parent/child pairs corrected

- Separated the reporting of single and double mendelian violations in trios
This commit is contained in:
Laurent Francioli 2011-11-02 18:35:31 +01:00
parent 119ca7d742
commit 19ad5b635a
1 changed files with 120 additions and 57 deletions

View File

@ -107,6 +107,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
private final Byte NUM_TRIO_GENOTYPES_PHASED = 2; private final Byte NUM_TRIO_GENOTYPES_PHASED = 2;
private final Byte NUM_TRIO_HET_HET_HET = 3; private final Byte NUM_TRIO_HET_HET_HET = 3;
private final Byte NUM_TRIO_VIOLATIONS = 4; private final Byte NUM_TRIO_VIOLATIONS = 4;
private final Byte NUM_TRIO_DOUBLE_VIOLATIONS = 10;
private final Byte NUM_PAIR_GENOTYPES_CALLED = 5; private final Byte NUM_PAIR_GENOTYPES_CALLED = 5;
private final Byte NUM_PAIR_GENOTYPES_NOCALL = 6; private final Byte NUM_PAIR_GENOTYPES_NOCALL = 6;
private final Byte NUM_PAIR_GENOTYPES_PHASED = 7; private final Byte NUM_PAIR_GENOTYPES_PHASED = 7;
@ -507,17 +508,14 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
return count; return count;
} }
//Get a Map of genotype likelihoods. If the genotype is NO_CALL or UNAVAILABLE, the Map will contain a single //Get a Map of genotype likelihoods.
//NO_CALL resp. UNAVAILABLE element with a likelihood of 1.0 //In case of null, unavailable or no call, all likelihoods are 1/3.
private EnumMap<Genotype.Type,Double> getLikelihoodsAsMapSafeNull(Genotype genotype){ private EnumMap<Genotype.Type,Double> getLikelihoodsAsMapSafeNull(Genotype genotype){
if(genotype == null || !genotype.isAvailable()){ if(genotype == null || !genotype.isCalled()){
EnumMap<Genotype.Type,Double> likelihoods = new EnumMap<Genotype.Type, Double>(Genotype.Type.class); EnumMap<Genotype.Type,Double> likelihoods = new EnumMap<Genotype.Type, Double>(Genotype.Type.class);
likelihoods.put(Genotype.Type.UNAVAILABLE,1.0); likelihoods.put(Genotype.Type.HOM_REF,1.0/3.0);
return likelihoods; likelihoods.put(Genotype.Type.HET,1.0/3.0);
} likelihoods.put(Genotype.Type.HOM_VAR,1.0/3.0);
else if(genotype.isNoCall()){
EnumMap<Genotype.Type,Double> likelihoods = new EnumMap<Genotype.Type, Double>(Genotype.Type.class);
likelihoods.put(Genotype.Type.NO_CALL,1.0);
return likelihoods; return likelihoods;
} }
return genotype.getLikelihoods().getAsMap(true); return genotype.getLikelihoods().getAsMap(true);
@ -541,67 +539,123 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
* @param finalGenotypes: An ArrayList<Genotype> that will be added the genotypes phased by transmission in the following order: Mother, Father, Child * @param finalGenotypes: An ArrayList<Genotype> that will be added the genotypes phased by transmission in the following order: Mother, Father, Child
* @return * @return
*/ */
private boolean phaseTrioGenotypes(Allele ref, Allele alt, Genotype mother, Genotype father, Genotype child,ArrayList<Genotype> finalGenotypes) { private int phaseTrioGenotypes(Allele ref, Allele alt, Genotype mother, Genotype father, Genotype child,ArrayList<Genotype> finalGenotypes) {
//Get the PL //Check whether it is a pair or trio
Map<Genotype.Type,Double> motherLikelihoods = getLikelihoodsAsMapSafeNull(mother); //Always assign the first parent as the parent having genotype information in pairs
Map<Genotype.Type,Double> fatherLikelihoods = getLikelihoodsAsMapSafeNull(father); //Always assign the mother as the first parent in trios
int parentsCalled = 0;
Map<Genotype.Type,Double> firstParentLikelihoods;
Map<Genotype.Type,Double> secondParentLikelihoods;
Genotype.Type pairSecondParentGenotype = null;
if(mother == null || !mother.isCalled()){
firstParentLikelihoods = getLikelihoodsAsMapSafeNull(father);
secondParentLikelihoods = getLikelihoodsAsMapSafeNull(mother);
pairSecondParentGenotype = mother == null ? Genotype.Type.UNAVAILABLE : mother.getType();
if(father != null && father.isCalled())
parentsCalled = 1;
}
else{
firstParentLikelihoods = getLikelihoodsAsMapSafeNull(mother);
secondParentLikelihoods = getLikelihoodsAsMapSafeNull(father);
if(father == null || !father.isCalled()){
parentsCalled = 1;
pairSecondParentGenotype = father == null ? Genotype.Type.UNAVAILABLE : father.getType();
}else{
parentsCalled = 2;
}
}
Map<Genotype.Type,Double> childLikelihoods = getLikelihoodsAsMapSafeNull(child); Map<Genotype.Type,Double> childLikelihoods = getLikelihoodsAsMapSafeNull(child);
//Prior vars //Prior vars
double bestConfigurationLikelihood = 0.0; double bestConfigurationLikelihood = 0.0;
double norm = 0.0; double norm = 0.0;
int configuration_index =0; int configuration_index =0;
ArrayList<Boolean> isMV = new ArrayList<Boolean>(); ArrayList<Integer> bestMVCount = new ArrayList<Integer>();
isMV.add(false); bestMVCount.add(0);
ArrayList<Genotype.Type> bestMotherGenotype = new ArrayList<Genotype.Type>();
bestMotherGenotype.add(getTypeSafeNull(mother)); ArrayList<Genotype.Type> bestFirstParentGenotype = new ArrayList<Genotype.Type>();
ArrayList<Genotype.Type> bestFatherGenotype = new ArrayList<Genotype.Type>(); ArrayList<Genotype.Type> bestSecondParentGenotype = new ArrayList<Genotype.Type>();
bestFatherGenotype.add(getTypeSafeNull(father));
ArrayList<Genotype.Type> bestChildGenotype = new ArrayList<Genotype.Type>(); ArrayList<Genotype.Type> bestChildGenotype = new ArrayList<Genotype.Type>();
bestFirstParentGenotype.add(getTypeSafeNull(mother));
bestSecondParentGenotype.add(getTypeSafeNull(father));
bestChildGenotype.add(getTypeSafeNull(child)); bestChildGenotype.add(getTypeSafeNull(child));
//Get the most likely combination //Get the most likely combination
//Only check for most likely combination if at least a parent and the child have genotypes //Only check for most likely combination if at least a parent and the child have genotypes
if(childLikelihoods.size()>2 && (motherLikelihoods.size() + fatherLikelihoods.size())>3){ if(child.isCalled() && parentsCalled > 0){
int mvCount; int mvCount;
double configurationLikelihood; int cumulativeMVCount = 0;
for(Map.Entry<Genotype.Type,Double> motherGenotype : motherLikelihoods.entrySet()){ double configurationLikelihood = 0;
for(Map.Entry<Genotype.Type,Double> fatherGenotype : fatherLikelihoods.entrySet()){
for(Map.Entry<Genotype.Type,Double> childGenotype : childLikelihoods.entrySet()){ for(Map.Entry<Genotype.Type,Double> childGenotype : childLikelihoods.entrySet()){
mvCount = mvCountMatrix.get(motherGenotype.getKey()).get(fatherGenotype.getKey()).get(childGenotype.getKey()); for(Map.Entry<Genotype.Type,Double> firstParentGenotype : firstParentLikelihoods.entrySet()){
configurationLikelihood = mvCount>0 ? Math.pow(deNovoPrior,mvCount)*motherGenotype.getValue()*fatherGenotype.getValue()*childGenotype.getValue() : (1.0-11*deNovoPrior)*motherGenotype.getValue()*fatherGenotype.getValue()*childGenotype.getValue(); for(Map.Entry<Genotype.Type,Double> secondParentGenotype : secondParentLikelihoods.entrySet()){
mvCount = mvCountMatrix.get(firstParentGenotype.getKey()).get(secondParentGenotype.getKey()).get(childGenotype.getKey());
//For parent/child pairs, sum over the possible genotype configurations of the missing parent
if(parentsCalled<2){
cumulativeMVCount += mvCount;
configurationLikelihood += mvCount>0 ? Math.pow(deNovoPrior,mvCount)*firstParentGenotype.getValue()*secondParentGenotype.getValue()*childGenotype.getValue() : (1.0-11*deNovoPrior)*firstParentGenotype.getValue()*secondParentGenotype.getValue()*childGenotype.getValue();
}
//Evaluate configurations of trios
else{
configurationLikelihood = mvCount>0 ? Math.pow(deNovoPrior,mvCount)*firstParentGenotype.getValue()*secondParentGenotype.getValue()*childGenotype.getValue() : (1.0-11*deNovoPrior)*firstParentGenotype.getValue()*secondParentGenotype.getValue()*childGenotype.getValue();
norm += configurationLikelihood; norm += configurationLikelihood;
//Keep this combination if //Keep this combination if
//It has a better likelihood //It has a better likelihood
//Or it has the same likelihood but requires less changes from original genotypes //Or it has the same likelihood but requires less changes from original genotypes
if (configurationLikelihood > bestConfigurationLikelihood){ if (configurationLikelihood > bestConfigurationLikelihood){
bestConfigurationLikelihood = configurationLikelihood; bestConfigurationLikelihood = configurationLikelihood;
isMV.clear(); bestMVCount.clear();
isMV.add(mvCount>0); bestMVCount.add(mvCount);
bestMotherGenotype.clear(); bestFirstParentGenotype.clear();
bestMotherGenotype.add(motherGenotype.getKey()); bestFirstParentGenotype.add(firstParentGenotype.getKey());
bestFatherGenotype.clear(); bestSecondParentGenotype.clear();
bestFatherGenotype.add(fatherGenotype.getKey()); bestSecondParentGenotype.add(secondParentGenotype.getKey());
bestChildGenotype.clear(); bestChildGenotype.clear();
bestChildGenotype.add(childGenotype.getKey()); bestChildGenotype.add(childGenotype.getKey());
} }
else if(configurationLikelihood == bestConfigurationLikelihood) { else if(configurationLikelihood == bestConfigurationLikelihood) {
bestMotherGenotype.add(motherGenotype.getKey()); bestFirstParentGenotype.add(firstParentGenotype.getKey());
bestFatherGenotype.add(fatherGenotype.getKey()); bestSecondParentGenotype.add(secondParentGenotype.getKey());
bestChildGenotype.add(childGenotype.getKey()); bestChildGenotype.add(childGenotype.getKey());
isMV.add(mvCount>0); bestMVCount.add(mvCount);
} }
} }
} }
//Evaluate configurations of parent/child pairs
if(parentsCalled<2){
norm += configurationLikelihood;
//Keep this combination if
//It has a better likelihood
//Or it has the same likelihood but requires less changes from original genotypes
if (configurationLikelihood > bestConfigurationLikelihood){
bestConfigurationLikelihood = configurationLikelihood;
bestMVCount.clear();
bestMVCount.add(cumulativeMVCount/3);
bestChildGenotype.clear();
bestFirstParentGenotype.clear();
bestSecondParentGenotype.clear();
bestChildGenotype.add(childGenotype.getKey());
bestFirstParentGenotype.add(firstParentGenotype.getKey());
bestSecondParentGenotype.add(pairSecondParentGenotype);
}
else if(configurationLikelihood == bestConfigurationLikelihood) {
bestFirstParentGenotype.add(firstParentGenotype.getKey());
bestSecondParentGenotype.add(pairSecondParentGenotype);
bestChildGenotype.add(childGenotype.getKey());
bestMVCount.add(cumulativeMVCount/3);
}
configurationLikelihood = 0;
}
}
} }
//normalize the best configuration probability //normalize the best configuration probability
bestConfigurationLikelihood = bestConfigurationLikelihood / norm; bestConfigurationLikelihood = bestConfigurationLikelihood / norm;
//In case of multiple equally likely combinations, take a random one //In case of multiple equally likely combinations, take a random one
if(bestMotherGenotype.size()>1){ if(bestFirstParentGenotype.size()>1){
configuration_index = rand.nextInt(bestMotherGenotype.size()-1); configuration_index = rand.nextInt(bestFirstParentGenotype.size()-1);
} }
} }
@ -609,16 +663,20 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
bestConfigurationLikelihood = NO_TRANSMISSION_PROB; bestConfigurationLikelihood = NO_TRANSMISSION_PROB;
} }
TrioPhase phasedTrioGenotypes = transmissionMatrix.get(bestMotherGenotype.get(configuration_index)).get(bestFatherGenotype.get(configuration_index)).get(bestChildGenotype.get(configuration_index)); TrioPhase phasedTrioGenotypes;
if(parentsCalled < 2 && mother == null || !mother.isCalled())
phasedTrioGenotypes = transmissionMatrix.get(bestSecondParentGenotype.get(configuration_index)).get(bestFirstParentGenotype.get(configuration_index)).get(bestChildGenotype.get(configuration_index));
else
phasedTrioGenotypes = transmissionMatrix.get(bestFirstParentGenotype.get(configuration_index)).get(bestSecondParentGenotype.get(configuration_index)).get(bestChildGenotype.get(configuration_index));
//Return the phased genotypes //Return the phased genotypes
phasedTrioGenotypes.getPhasedGenotypes(ref,alt,mother,father,child,bestConfigurationLikelihood,finalGenotypes); phasedTrioGenotypes.getPhasedGenotypes(ref,alt,mother,father,child,bestConfigurationLikelihood,finalGenotypes);
return isMV.get(configuration_index); return bestMVCount.get(configuration_index);
} }
private void updatePairMetricsCounters(Genotype parent, Genotype child, boolean isMV, HashMap<Byte,Integer> counters){ private void updatePairMetricsCounters(Genotype parent, Genotype child, int mvCount, HashMap<Byte,Integer> counters){
//Increment metrics counters //Increment metrics counters
if(parent.isCalled() && child.isCalled()){ if(parent.isCalled() && child.isCalled()){
@ -626,11 +684,9 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
if(parent.isPhased()) if(parent.isPhased())
counters.put(NUM_PAIR_GENOTYPES_PHASED,counters.get(NUM_PAIR_GENOTYPES_PHASED)+1); counters.put(NUM_PAIR_GENOTYPES_PHASED,counters.get(NUM_PAIR_GENOTYPES_PHASED)+1);
else{ else{
counters.put(NUM_PAIR_VIOLATIONS,counters.get(NUM_PAIR_VIOLATIONS)+mvCount);
if(parent.isHet() && child.isHet()) if(parent.isHet() && child.isHet())
counters.put(NUM_PAIR_HET_HET,counters.get(NUM_PAIR_HET_HET)+1); counters.put(NUM_PAIR_HET_HET,counters.get(NUM_PAIR_HET_HET)+1);
else if(isMV)
counters.put(NUM_PAIR_VIOLATIONS,counters.get(NUM_PAIR_VIOLATIONS)+1);
} }
}else{ }else{
counters.put(NUM_PAIR_GENOTYPES_NOCALL,counters.get(NUM_PAIR_GENOTYPES_NOCALL)+1); counters.put(NUM_PAIR_GENOTYPES_NOCALL,counters.get(NUM_PAIR_GENOTYPES_NOCALL)+1);
@ -638,7 +694,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
} }
private void updateTrioMetricsCounters(Genotype mother, Genotype father, Genotype child, boolean isMV, HashMap<Byte,Integer> counters){ private void updateTrioMetricsCounters(Genotype mother, Genotype father, Genotype child, int mvCount, HashMap<Byte,Integer> counters){
//Increment metrics counters //Increment metrics counters
if(mother.isCalled() && father.isCalled() && child.isCalled()){ if(mother.isCalled() && father.isCalled() && child.isCalled()){
@ -647,11 +703,14 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
counters.put(NUM_TRIO_GENOTYPES_PHASED,counters.get(NUM_TRIO_GENOTYPES_PHASED)+1); counters.put(NUM_TRIO_GENOTYPES_PHASED,counters.get(NUM_TRIO_GENOTYPES_PHASED)+1);
else{ else{
if(mother.isHet() && father.isHet() && child.isHet()) if(mvCount > 0){
counters.put(NUM_TRIO_HET_HET_HET,counters.get(NUM_TRIO_HET_HET_HET)+1); if(mvCount >1)
counters.put(NUM_TRIO_DOUBLE_VIOLATIONS,counters.get(NUM_TRIO_DOUBLE_VIOLATIONS)+1);
else if(isMV) else
counters.put(NUM_TRIO_VIOLATIONS,counters.get(NUM_TRIO_VIOLATIONS)+1); counters.put(NUM_TRIO_VIOLATIONS,counters.get(NUM_TRIO_VIOLATIONS)+1);
}
else if(mother.isHet() && father.isHet() && child.isHet())
counters.put(NUM_TRIO_HET_HET_HET,counters.get(NUM_TRIO_HET_HET_HET)+1);
} }
}else{ }else{
@ -681,14 +740,15 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
metricsCounters.put(NUM_PAIR_GENOTYPES_PHASED,0); metricsCounters.put(NUM_PAIR_GENOTYPES_PHASED,0);
metricsCounters.put(NUM_PAIR_HET_HET,0); metricsCounters.put(NUM_PAIR_HET_HET,0);
metricsCounters.put(NUM_PAIR_VIOLATIONS,0); metricsCounters.put(NUM_PAIR_VIOLATIONS,0);
String mvfLine = ""; metricsCounters.put(NUM_TRIO_DOUBLE_VIOLATIONS,0);
String mvfLine;
if (tracker != null) { if (tracker != null) {
VariantContext vc = tracker.getFirstValue(variantCollection.variants, context.getLocation()); VariantContext vc = tracker.getFirstValue(variantCollection.variants, context.getLocation());
Map<String, Genotype> genotypeMap = vc.getGenotypes(); Map<String, Genotype> genotypeMap = vc.getGenotypes();
boolean isMV; int mvCount;
for (Sample sample : trios) { for (Sample sample : trios) {
Genotype mother = vc.getGenotype(sample.getMaternalID()); Genotype mother = vc.getGenotype(sample.getMaternalID());
@ -700,7 +760,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
continue; continue;
ArrayList<Genotype> trioGenotypes = new ArrayList<Genotype>(3); ArrayList<Genotype> trioGenotypes = new ArrayList<Genotype>(3);
isMV = phaseTrioGenotypes(vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), mother, father, child,trioGenotypes); mvCount = phaseTrioGenotypes(vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), mother, father, child,trioGenotypes);
Genotype phasedMother = trioGenotypes.get(0); Genotype phasedMother = trioGenotypes.get(0);
Genotype phasedFather = trioGenotypes.get(1); Genotype phasedFather = trioGenotypes.get(1);
@ -712,23 +772,23 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
genotypeMap.put(phasedMother.getSampleName(), phasedMother); genotypeMap.put(phasedMother.getSampleName(), phasedMother);
if(father != null){ if(father != null){
genotypeMap.put(phasedFather.getSampleName(), phasedFather); genotypeMap.put(phasedFather.getSampleName(), phasedFather);
updateTrioMetricsCounters(phasedMother,phasedFather,phasedChild,isMV,metricsCounters); updateTrioMetricsCounters(phasedMother,phasedFather,phasedChild,mvCount,metricsCounters);
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t%s:%s:%s:%s\t%s:%s:%s:%s",vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),phasedMother.getAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getAttribute(VCFConstants.DEPTH_KEY),phasedMother.getAttribute("AD"),phasedMother.getLikelihoods().toString(),phasedFather.getGenotypeString(),phasedFather.getAttribute(VCFConstants.DEPTH_KEY),phasedFather.getAttribute("AD"),phasedFather.getLikelihoods().toString(),phasedChild.getGenotypeString(),phasedChild.getAttribute(VCFConstants.DEPTH_KEY),phasedChild.getAttribute("AD"),phasedChild.getLikelihoods().toString()); mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t%s:%s:%s:%s\t%s:%s:%s:%s",vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),phasedMother.getAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getAttribute(VCFConstants.DEPTH_KEY),phasedMother.getAttribute("AD"),phasedMother.getLikelihoods().toString(),phasedFather.getGenotypeString(),phasedFather.getAttribute(VCFConstants.DEPTH_KEY),phasedFather.getAttribute("AD"),phasedFather.getLikelihoods().toString(),phasedChild.getGenotypeString(),phasedChild.getAttribute(VCFConstants.DEPTH_KEY),phasedChild.getAttribute("AD"),phasedChild.getLikelihoods().toString());
} }
else{ else{
updatePairMetricsCounters(phasedMother,phasedChild,isMV,metricsCounters); updatePairMetricsCounters(phasedMother,phasedChild,mvCount,metricsCounters);
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t.:.:.:.\t%s:%s:%s:%s",vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),phasedMother.getAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getAttribute(VCFConstants.DEPTH_KEY),phasedMother.getAttribute("AD"),phasedMother.getLikelihoods().toString(),phasedChild.getGenotypeString(),phasedChild.getAttribute(VCFConstants.DEPTH_KEY),phasedChild.getAttribute("AD"),phasedChild.getLikelihoods().toString()); mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t.:.:.:.\t%s:%s:%s:%s",vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),phasedMother.getAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getAttribute(VCFConstants.DEPTH_KEY),phasedMother.getAttribute("AD"),phasedMother.getLikelihoods().toString(),phasedChild.getGenotypeString(),phasedChild.getAttribute(VCFConstants.DEPTH_KEY),phasedChild.getAttribute("AD"),phasedChild.getLikelihoods().toString());
} }
} }
else{ else{
genotypeMap.put(phasedFather.getSampleName(),phasedFather); genotypeMap.put(phasedFather.getSampleName(),phasedFather);
updatePairMetricsCounters(phasedFather,phasedChild,isMV,metricsCounters); updatePairMetricsCounters(phasedFather,phasedChild,mvCount,metricsCounters);
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t.:.:.:.\t%s:%s:%s:%s\t%s:%s:%s:%s",vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),phasedFather.getAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedFather.getGenotypeString(),phasedFather.getAttribute(VCFConstants.DEPTH_KEY),phasedFather.getAttribute("AD"),phasedFather.getLikelihoods().toString(),phasedChild.getGenotypeString(),phasedChild.getAttribute(VCFConstants.DEPTH_KEY),phasedChild.getAttribute("AD"),phasedChild.getLikelihoods().toString()); mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t.:.:.:.\t%s:%s:%s:%s\t%s:%s:%s:%s",vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),phasedFather.getAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedFather.getGenotypeString(),phasedFather.getAttribute(VCFConstants.DEPTH_KEY),phasedFather.getAttribute("AD"),phasedFather.getLikelihoods().toString(),phasedChild.getGenotypeString(),phasedChild.getAttribute(VCFConstants.DEPTH_KEY),phasedChild.getAttribute("AD"),phasedChild.getLikelihoods().toString());
} }
//Report violation if set so //Report violation if set so
//TODO: ADAPT FOR PAIRS TOO!! //TODO: ADAPT FOR PAIRS TOO!!
if(isMV && mvFile != null) if(mvCount>0 && mvFile != null)
mvFile.println(mvfLine); mvFile.println(mvfLine);
} }
@ -759,6 +819,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
metricsCounters.put(NUM_PAIR_GENOTYPES_PHASED,0); metricsCounters.put(NUM_PAIR_GENOTYPES_PHASED,0);
metricsCounters.put(NUM_PAIR_HET_HET,0); metricsCounters.put(NUM_PAIR_HET_HET,0);
metricsCounters.put(NUM_PAIR_VIOLATIONS,0); metricsCounters.put(NUM_PAIR_VIOLATIONS,0);
metricsCounters.put(NUM_TRIO_DOUBLE_VIOLATIONS,0);
return metricsCounters; return metricsCounters;
} }
@ -781,6 +842,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
sum.put(NUM_PAIR_GENOTYPES_PHASED,value.get(NUM_PAIR_GENOTYPES_PHASED)+sum.get(NUM_PAIR_GENOTYPES_PHASED)); sum.put(NUM_PAIR_GENOTYPES_PHASED,value.get(NUM_PAIR_GENOTYPES_PHASED)+sum.get(NUM_PAIR_GENOTYPES_PHASED));
sum.put(NUM_PAIR_HET_HET,value.get(NUM_PAIR_HET_HET)+sum.get(NUM_PAIR_HET_HET)); sum.put(NUM_PAIR_HET_HET,value.get(NUM_PAIR_HET_HET)+sum.get(NUM_PAIR_HET_HET));
sum.put(NUM_PAIR_VIOLATIONS,value.get(NUM_PAIR_VIOLATIONS)+sum.get(NUM_PAIR_VIOLATIONS)); sum.put(NUM_PAIR_VIOLATIONS,value.get(NUM_PAIR_VIOLATIONS)+sum.get(NUM_PAIR_VIOLATIONS));
sum.put(NUM_TRIO_DOUBLE_VIOLATIONS,value.get(NUM_TRIO_DOUBLE_VIOLATIONS)+sum.get(NUM_TRIO_DOUBLE_VIOLATIONS));
return sum; return sum;
} }
@ -795,7 +857,8 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
logger.info("Number of trio-genotypes containing no call(s): " + result.get(NUM_TRIO_GENOTYPES_NOCALL)); logger.info("Number of trio-genotypes containing no call(s): " + result.get(NUM_TRIO_GENOTYPES_NOCALL));
logger.info("Number of trio-genotypes phased: " + result.get(NUM_TRIO_GENOTYPES_PHASED)); logger.info("Number of trio-genotypes phased: " + result.get(NUM_TRIO_GENOTYPES_PHASED));
logger.info("Number of resulting Het/Het/Het trios: " + result.get(NUM_TRIO_HET_HET_HET)); logger.info("Number of resulting Het/Het/Het trios: " + result.get(NUM_TRIO_HET_HET_HET));
logger.info("Number of remaining mendelian violations in trios: " + result.get(NUM_TRIO_VIOLATIONS)); logger.info("Number of remaining single mendelian violations in trios: " + result.get(NUM_TRIO_VIOLATIONS));
logger.info("Number of remaining double mendelian violations in trios: " + result.get(NUM_TRIO_DOUBLE_VIOLATIONS));
logger.info("Number of complete pair-genotypes: " + result.get(NUM_PAIR_GENOTYPES_CALLED)); logger.info("Number of complete pair-genotypes: " + result.get(NUM_PAIR_GENOTYPES_CALLED));
logger.info("Number of pair-genotypes containing no call(s): " + result.get(NUM_PAIR_GENOTYPES_NOCALL)); logger.info("Number of pair-genotypes containing no call(s): " + result.get(NUM_PAIR_GENOTYPES_NOCALL));
logger.info("Number of pair-genotypes phased: " + result.get(NUM_PAIR_GENOTYPES_PHASED)); logger.info("Number of pair-genotypes phased: " + result.get(NUM_PAIR_GENOTYPES_PHASED));