From 66f5fe075bd9cd32e9f5e9d3fdee2b2ce7f7f7dc Mon Sep 17 00:00:00 2001 From: Laura Gauthier Date: Fri, 14 Jul 2017 12:10:10 -0400 Subject: [PATCH] Changed ASRankSum single-sample output so it's combine-able single sample gVCF now gets a "histogram" with one entry --- .../walkers/annotator/AS_RankSumTest.java | 74 ++++++------------- .../HaplotypeCallerGVCFIntegrationTest.java | 10 +-- .../CombineGVCFsIntegrationTest.java | 6 +- .../GenotypeGVCFsIntegrationTest.java | 12 +-- .../tools/walkers/annotator/Histogram.java | 12 ++- 5 files changed, 48 insertions(+), 66 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AS_RankSumTest.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AS_RankSumTest.java index 5181baea8..7fbfb4efa 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AS_RankSumTest.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AS_RankSumTest.java @@ -113,47 +113,13 @@ public abstract class AS_RankSumTest extends RankSumTest implements ReducibleAnn final Map annotations = new HashMap<>(); final AlleleSpecificAnnotationData> myRawData = initializeNewRawAnnotationData(vc.getAlleles()); calculateRawData(vc, perReadAlleleLikelihoodMap, myRawData); - Map> myRankSumStats = calculateRankSum(myRawData.getAttributeMap(), myRawData.getRefAllele()); + Map myRankSumStats = calculateRankSum(myRawData.getAttributeMap(), myRawData.getRefAllele()); final String annotationString = makeRawAnnotationString(vc.getAlleles(), myRankSumStats); annotations.put(getRawKeyName(), annotationString); return annotations; } protected void parseRawDataString(final ReducibleAnnotationData myData) { - final String rawDataString = myData.getRawData(); - String rawDataNoBrackets; - final Map perAlleleValues = new HashMap<>(); - //Initialize maps - for (final Allele current : myData.getAlleles()) { - perAlleleValues.put(current, new Histogram()); - } - //Map gives back list with [] - if (rawDataString.charAt(0) == '[') { - rawDataNoBrackets = rawDataString.substring(1, rawDataString.length() - 1); - } - else { - rawDataNoBrackets = rawDataString; - } - //rawDataPerAllele is a per-sample list of the rank sum statistic for each allele - final String[] rawDataPerAllele = rawDataNoBrackets.split(splitDelim); - for (int i=0; i myData) { final String rawDataString = myData.getRawData(); String rawDataNoBrackets; final Map perAlleleValues = new HashMap<>(); @@ -179,10 +145,11 @@ public abstract class AS_RankSumTest extends RankSumTest implements ReducibleAnn int count; if (!rawListEntriesAsStringVector[j].isEmpty()) { value = Double.parseDouble(rawListEntriesAsStringVector[j].trim()); + if(value.isNaN()) + continue; if (!rawListEntriesAsStringVector[j + 1].isEmpty()) { count = Integer.parseInt(rawListEntriesAsStringVector[j + 1].trim()); - if(!value.isNaN()) - alleleList.add(value,count); + alleleList.add(value,count); } } } @@ -239,18 +206,18 @@ public abstract class AS_RankSumTest extends RankSumTest implements ReducibleAnn } } - protected String makeRawAnnotationString(final List vcAlleles, final Map> perAlleleValues) { + protected String makeRawAnnotationString(final List vcAlleles, final Map perAlleleValues) { String annotationString = ""; for (int i = 0; i< vcAlleles.size(); i++) { if (vcAlleles.get(i).isReference()) continue; - if (i != 0) + if (i != 0) //strings will always start with a printDelim because we won't have values for the reference allele, but keep this for consistency with other annotations annotationString += printDelim; - final List alleleValue = perAlleleValues.get(vcAlleles.get(i)); + final Double alleleValue = perAlleleValues.get(vcAlleles.get(i)); //can be null if there are no ref reads if (alleleValue == null) continue; - annotationString += formatListAsString(alleleValue); + annotationString += outputSingletonValueAsHistogram(alleleValue); } return annotationString; } @@ -260,11 +227,11 @@ public abstract class AS_RankSumTest extends RankSumTest implements ReducibleAnn for (int i = 0; i< vcAlleles.size(); i++) { if (vcAlleles.get(i).isReference()) continue; - if (i != 0) + if (i != 0) //strings will always start with a printDelim because we won't have values for the reference allele, but keep this for consistency with other annotations annotationString += printDelim; final Histogram alleleValue = perAlleleValues.get(vcAlleles.get(i)); - //can be null if there are no ref reads - if (alleleValue == null) + //can be empty if there are no ref reads + if (alleleValue.isEmpty()) continue; annotationString += alleleValue.toString(); } @@ -300,7 +267,7 @@ public abstract class AS_RankSumTest extends RankSumTest implements ReducibleAnn final Map annotations = new HashMap<>(); final AlleleSpecificAnnotationData myData = new AlleleSpecificAnnotationData(originalVC.getAlleles(), rawRankSumData); - parseCombinedDataString(myData); + parseRawDataString(myData); final Map perAltRankSumResults = calculateReducedData(myData.getAttributeMap(), myData.getRefAllele()); //shortcut for no ref values @@ -356,8 +323,8 @@ public abstract class AS_RankSumTest extends RankSumTest implements ReducibleAnn } } - public Map> calculateRankSum(final Map> perAlleleValues, final Allele ref) { - final Map> perAltRankSumResults = new HashMap<>(); + public Map calculateRankSum(final Map> perAlleleValues, final Allele ref) { + final Map perAltRankSumResults = new HashMap<>(); //shortcut to not try to calculate rank sum if there are no reads that unambiguously support the ref if (perAlleleValues.get(ref).isEmpty()) return perAltRankSumResults; @@ -389,7 +356,7 @@ public abstract class AS_RankSumTest extends RankSumTest implements ReducibleAnn } // we are testing that set1 (the alt bases) have lower quality scores than set2 (the ref bases) final MannWhitneyU.Result result = mannWhitneyU.test(convertToArray(alts), convertToArray(refs), MannWhitneyU.TestType.FIRST_DOMINATES); - perAltRankSumResults.put(alt, Collections.singletonList(result.getZ())); + perAltRankSumResults.put(alt, result.getZ()); } return perAltRankSumResults; } @@ -410,10 +377,17 @@ public abstract class AS_RankSumTest extends RankSumTest implements ReducibleAnn for (int i=0; i