A VariantContext can now be isSymbolic. More importantly, multi-allelic variants are now properly handled in determining their type [using isMixed only if any of the biallelic variant types differ between the alt alleles].

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5937 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
fromer 2011-06-03 18:02:47 +00:00
parent b4c479bcb0
commit e4eb8087bc
1 changed files with 48 additions and 30 deletions

View File

@ -53,11 +53,11 @@ public class CNVstatsWalker extends RodWalker<CNVstatistics, CNVstatistics> {
@Output(doc = "File to which copy number counts should be written", required = true) @Output(doc = "File to which copy number counts should be written", required = true)
protected PrintStream out; protected PrintStream out;
@Argument(fullName = "alleleCountsCopyCounts", shortName = "AC_CC", doc = "File to which discovered allele copy and copy number counts should be written", required = false) @Argument(fullName = "alleleCountsCopyNumberFreqs", shortName = "AC_CNF", doc = "File to which discovered allele copy and copy number frequencies should be written", required = false)
private PrintStream alleleCountsCopyCounts = null; private PrintStream alleleCountsCopyNumberFreqs = null;
@Argument(fullName = "allowFilteredGenotypes", shortName = "allowFilteredGenotypes", doc = "Include filtered genotypes in the output/calculations", required = false) @Argument(fullName = "minFracPassGt", shortName = "minFracPassGt", doc = "Minimum fraction of callable genotypes required to report any genotypes at all", required = false)
private boolean ALLOW_FILTERED_GENOTYPES = false; private double minFracPassGt = 0.0;
private LinkedList<String> rodNames = null; private LinkedList<String> rodNames = null;
@ -100,7 +100,7 @@ public class CNVstatsWalker extends RodWalker<CNVstatistics, CNVstatistics> {
boolean requireStartHere = true; // only see each VariantContext once boolean requireStartHere = true; // only see each VariantContext once
boolean takeFirstOnly = false; // take as many entries as the VCF file has boolean takeFirstOnly = false; // take as many entries as the VCF file has
for (VariantContext vc : tracker.getVariantContexts(ref, rodNames, null, context.getLocation(), requireStartHere, takeFirstOnly)) { for (VariantContext vc : tracker.getVariantContexts(ref, rodNames, null, context.getLocation(), requireStartHere, takeFirstOnly)) {
if (vc.isMixed() && vc.isBiallelic()) { if (vc.isSymbolic() && vc.isBiallelic()) {
Allele altAll = vc.getAlternateAllele(0); Allele altAll = vc.getAlternateAllele(0);
if (altAll.isSymbolic() && altAll.getDisplayString().equals(CNV_TAG)) { if (altAll.isSymbolic() && altAll.getDisplayString().equals(CNV_TAG)) {
logger.debug("Found CNV at locus..."); logger.debug("Found CNV at locus...");
@ -113,7 +113,7 @@ public class CNVstatsWalker extends RodWalker<CNVstatistics, CNVstatistics> {
for (Map.Entry<String, Genotype> gtEntry : vc.getGenotypes().entrySet()) { for (Map.Entry<String, Genotype> gtEntry : vc.getGenotypes().entrySet()) {
Genotype gt = gtEntry.getValue(); Genotype gt = gtEntry.getValue();
Integer copyNum = gt.getAttributeAsIntegerNoException(CN_FIELD); Integer copyNum = gt.getAttributeAsIntegerNoException(CN_FIELD);
if (copyNum != null && (ALLOW_FILTERED_GENOTYPES || gt.isNotFiltered())) { if (copyNum != null && gt.isNotFiltered()) {
cnc.incrementCopyNumber(copyNum); cnc.incrementCopyNumber(copyNum);
if (copyNum == DIPLOID) if (copyNum == DIPLOID)
@ -123,6 +123,11 @@ public class CNVstatsWalker extends RodWalker<CNVstatistics, CNVstatistics> {
} }
} }
double calledFreq = ((double) cnc.calledCount()) / vc.getNSamples();
if (calledFreq < minFracPassGt) { // reset data as if it did not appear
cnc.resetCounts();
}
else {
if (hasDiploidGt && hasNonDiploidGt) { if (hasDiploidGt && hasNonDiploidGt) {
stats.diploidAndNonDiploidLoci++; stats.diploidAndNonDiploidLoci++;
} }
@ -132,6 +137,7 @@ public class CNVstatsWalker extends RodWalker<CNVstatistics, CNVstatistics> {
if (hasNonDiploidGt) if (hasNonDiploidGt)
stats.nonDiploidOnlyLoci++; stats.nonDiploidOnlyLoci++;
} }
}
int cnvEnd = vc.getEnd(); int cnvEnd = vc.getEnd();
Integer cnvLength = vc.getAttributeAsIntegerNoException(SVLEN_FIELD); Integer cnvLength = vc.getAttributeAsIntegerNoException(SVLEN_FIELD);
@ -145,12 +151,12 @@ public class CNVstatsWalker extends RodWalker<CNVstatistics, CNVstatistics> {
} }
out.println(); out.println();
if (alleleCountsCopyCounts != null) { if (alleleCountsCopyNumberFreqs != null) {
Integer ac = vc.getAttributeAsIntegerNoException(AC_FIELD); Integer ac = vc.getAttributeAsIntegerNoException(AC_FIELD);
CopyNumberCounts.DeletionDuplicationCounts counts = cnc.deletionDuplicationCounts(); CopyNumberCounts.DeletionDuplicationFreqs freqs = cnc.deletionDuplicationFreqs();
double cnvCount = counts.deletionCount + counts.duplicationCount; double cnvCount = freqs.deletionFreq + freqs.duplicationFreq;
alleleCountsCopyCounts.println(vcLoc + "\t" + ac + "\t" + counts.deletionCount + "\t" + counts.duplicationCount + "\t" + cnvCount); alleleCountsCopyNumberFreqs.println(vcLoc + "\t" + ac + "\t" + freqs.deletionFreq + "\t" + freqs.duplicationFreq + "\t" + cnvCount);
} }
} }
} }
@ -235,9 +241,11 @@ class CNVstatistics {
class CopyNumberCounts { class CopyNumberCounts {
private Map<Integer, Integer> copyNumToCountsMap; private Map<Integer, Integer> copyNumToCountsMap;
private int calledCount;
public CopyNumberCounts() { public CopyNumberCounts() {
this.copyNumToCountsMap = new TreeMap<Integer, Integer>(); this.copyNumToCountsMap = new TreeMap<Integer, Integer>();
this.resetCounts();
} }
public void incrementCopyNumber(int copyNum) { public void incrementCopyNumber(int copyNum) {
@ -246,43 +254,53 @@ class CopyNumberCounts {
count = 0; count = 0;
copyNumToCountsMap.put(copyNum, count + 1); copyNumToCountsMap.put(copyNum, count + 1);
calledCount++;
} }
public Set<Map.Entry<Integer, Integer>> entrySet() { public Set<Map.Entry<Integer, Integer>> entrySet() {
return copyNumToCountsMap.entrySet(); return copyNumToCountsMap.entrySet();
} }
class DeletionDuplicationCounts { public int calledCount() {
public double deletionCount; return calledCount;
public double duplicationCount; }
public DeletionDuplicationCounts() { public void resetCounts() {
this.deletionCount = 0; copyNumToCountsMap.clear();
this.duplicationCount = 0; calledCount = 0;
}
class DeletionDuplicationFreqs {
public double deletionFreq;
public double duplicationFreq;
public DeletionDuplicationFreqs() {
this.deletionFreq = 0;
this.duplicationFreq = 0;
} }
} }
public DeletionDuplicationCounts deletionDuplicationCounts() { public DeletionDuplicationFreqs deletionDuplicationFreqs() {
int total = 0; int total = 0;
DeletionDuplicationCounts counts = new DeletionDuplicationCounts(); DeletionDuplicationFreqs freqs = new DeletionDuplicationFreqs();
for (Map.Entry<Integer, Integer> copyNumEntry : this.entrySet()) { for (Map.Entry<Integer, Integer> copyNumEntry : this.entrySet()) {
int copyNum = copyNumEntry.getKey(); int copyNum = copyNumEntry.getKey();
int count = copyNumEntry.getValue(); int count = copyNumEntry.getValue();
if (copyNum < CNVstatsWalker.DIPLOID) { if (copyNum < CNVstatsWalker.DIPLOID) {
counts.deletionCount += count; freqs.deletionFreq += count;
} }
else if (copyNum > CNVstatsWalker.DIPLOID) { else if (copyNum > CNVstatsWalker.DIPLOID) {
counts.duplicationCount += count; freqs.duplicationFreq += count;
} }
total += count; total += count;
} }
//counts.deletionCount /= total; freqs.deletionFreq /= total;
//counts.duplicationCount /= total; freqs.duplicationFreq /= total;
return counts; return freqs;
} }
} }