diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/IndelSubsets.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/IndelSubsets.java deleted file mode 100755 index 9f4d0f0dc..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/IndelSubsets.java +++ /dev/null @@ -1,105 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.concordance; - -import org.broad.tribble.vcf.*; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.StingException; - -import java.util.*; - -/** - * filter an indel callset based on given criteria - */ -public class IndelSubsets implements ConcordanceType { - - public final static int HOMOPOLYMER_MAX = 20; - - // Hint: to disable either of these features, set the value to zero (via command-line arguments); - // then empty files will be output for the "below threshold" set, effectively disabling it - private int homopolymerCutoff = 1; - private int sizeCutoff = 2; - - private String[][][][] tags = new String[2][2][2][2]; - private String sample1, sample2; - - public IndelSubsets() {} - - public void initialize(Map args, Set samples) { - if ( samples.size() != 2 ) - throw new StingException("IndelSubsets concordance test cannot handle anything other than 2 VCF records"); - - if ( args.get("sizeCutoff") != null ) - sizeCutoff = Integer.valueOf(args.get("sizeCutoff")); - if ( args.get("homopolymerCutoff") != null ) - homopolymerCutoff = Integer.valueOf(args.get("homopolymerCutoff")); - - Iterator iter = samples.iterator(); - sample1 = iter.next(); - sample2 = iter.next(); - - for (int i = 0; i < 2; i++) { - String name1 = i == 0 ? sample1 : "no_" + sample1; - for (int j = 0; j < 2; j++) { - String name2 = j == 0 ? sample2 : "no_" + sample2; - for (int k = 0; k < 2; k++) { - String name3 = "size" + (k == 0 ? Integer.toString(sizeCutoff)+"AndUnder" : "GreaterThan"+Integer.toString(sizeCutoff)); - for (int m = 0; m < 2; m++) { - String name4 = "homopolymer" + (m == 0 ? Integer.toString(homopolymerCutoff)+"AndUnder" : "GreaterThan"+Integer.toString(homopolymerCutoff)); - tags[i][j][k][m] = name1 + "." + name2 + "." + name3 + "." + name4; - } - } - } - } - } - - public String computeConcordance(Map samplesToRecords, ReferenceContext ref) { - - VCFGenotypeRecord indel1 = samplesToRecords.get(sample1); - VCFGenotypeRecord indel2 = samplesToRecords.get(sample2); - - int set1 = ( indel1 != null && !indel1.isPointGenotype() ? 0 : 1 ); - int set2 = ( indel2 != null && !indel2.isPointGenotype() ? 0 : 1 ); - - // skip this locus if they're both not valid indels - if ( set1 == 1 && set2 == 1 ) - return null; - - // only deal with a valid indel - VCFRecord indel = ( indel1 != null ? indel1.getRecord() : indel2.getRecord() ); - - // we only deal with the first allele - int size = ( indel.getAlternateAlleleList().get(0).length() <= sizeCutoff ? 0 : 1 ); - int homopol = ( homopolymerRunSize(ref, indel) <= homopolymerCutoff ? 0 : 1 ); - - return tags[set1][set2][size][homopol]; - } - - private int homopolymerRunSize(ReferenceContext ref, VCFRecord indel) { - byte[] bases = ref.getBases(); - GenomeLoc window = ref.getWindow(); - GenomeLoc locus = ref.getLocus(); - - int refBasePos = (int)(locus.getStart() - window.getStart()); - byte indelBase = indel.isDeletion() ? bases[refBasePos+1] : (byte)indel.getAlternateAlleleList().get(0).charAt(0); - int leftRun = 0; - for ( int i = refBasePos; i >= 0; i--) { - if ( bases[i] != indelBase ) - break; - leftRun++; - } - - indelBase = indel.isDeletion() ? bases[Math.min(refBasePos+indel.getAlternateAlleleList().get(0).length(),bases.length-1)] : (byte)indel.getAlternateAlleleList().get(0).charAt(indel.getAlternateAlleleList().get(0).length()-1); - int rightRun = 0; - for ( int i = refBasePos + (indel.isDeletion() ? 1+indel.getAlternateAlleleList().get(0).length() : 1); i < bases.length; i++) { - if ( bases[i] != indelBase ) - break; - rightRun++; - } - - //System.out.println(String.valueOf(bases) + ": " + leftRun + " / " + rightRun); - return Math.max(leftRun, rightRun); - } - - public String getInfoName() { return "IndelSubsets"; } - public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFHeaderLineType.String, "Indel-related subsets"); } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/NWayVenn.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/NWayVenn.java deleted file mode 100755 index eb50ad78f..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/NWayVenn.java +++ /dev/null @@ -1,46 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.concordance; - -import org.broad.tribble.vcf.VCFGenotypeRecord; -import org.broad.tribble.vcf.VCFHeaderLineType; -import org.broad.tribble.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; - -import java.util.*; -import java.util.Map.Entry; - -/** - * Split up N call sets into their various "Venn diagram" sets. - * Note that to minimize complexity (unlike SimpleVenn), this module does NOT check for discordance - * (i.e. the intersections contain calls that are present in multiple sets, regardless of whether - * they agree on the actual variant). - */ -public class NWayVenn implements ConcordanceType { - - public NWayVenn() {} - - public void initialize(Map args, Set samples) { } - - public String computeConcordance(Map samplesToRecords, ReferenceContext ref) { - if ( samplesToRecords.size() == 0 ) - return null; - - TreeSet concordantSamples = new TreeSet(); - for ( Entry entry : samplesToRecords.entrySet() ) { - if ( !entry.getValue().isNoCall() ) - concordantSamples.add(entry.getKey()); - } - - StringBuffer tag = new StringBuffer(); - Iterator iter = concordantSamples.iterator(); - while ( iter.hasNext() ) { - tag.append(iter.next()); - if ( iter.hasNext() ) - tag.append("-"); - } - - return tag.toString(); - } - - public String getInfoName() { return "NwayVenn"; } - public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFHeaderLineType.String, "N-way Venn split"); } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SimpleVenn.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SimpleVenn.java deleted file mode 100755 index 480dedb1e..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SimpleVenn.java +++ /dev/null @@ -1,66 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.concordance; - -import org.broad.tribble.vcf.*; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.StingException; - -import java.util.*; - -/** - * Split up two call sets into their various "Venn diagram" sets - */ -public class SimpleVenn implements ConcordanceType { - - private String sample1, sample2; - - public SimpleVenn() {} - - public void initialize(Map args, Set samples) { - if ( samples.size() != 2 ) - throw new StingException("SimpleVenn concordance test cannot handle anything other than 2 VCF records"); - - Iterator iter = samples.iterator(); - sample1 = iter.next(); - sample2 = iter.next(); - } - - public String computeConcordance(Map samplesToRecords, ReferenceContext ref ) { - - VCFGenotypeRecord call1 = samplesToRecords.get(sample1); - if ( call1 != null && call1.isNoCall() ) - call1 = null; - VCFGenotypeRecord call2 = samplesToRecords.get(sample2); - if ( call2 != null && call2.isNoCall() ) - call2 = null; - - if ( call1 == null && call2 == null ) - return null; - - // set 1 only - if ( call2 == null ) - return sample1 + "_only"; - - // set 2 only - else if ( call1 == null ) - return sample2 + "_only"; - - // at this point we know that neither is null, so now we need to test for alternate allele concordance - VCFRecord callV1 = call1.getRecord(); - VCFRecord callV2 = call2.getRecord(); - - // we can't really deal with multi-allelic variants - if ( callV1.isBiallelic() && callV2.isBiallelic() ) { - // intersection (concordant) - if ( callV1.getAlternativeBaseForSNP() == callV2.getAlternativeBaseForSNP() ) - return "concordant"; - // intersection (discordant) - else - return "discordant"; - } - - return "concordant"; - } - - public String getInfoName() { return "Venn"; } - public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFHeaderLineType.String, "2-way Venn split"); } -} diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java index 81ac7b734..182d9c230 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java @@ -10,43 +10,11 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest { return "-T CallsetConcordance -R " + oneKGLocation + "reference/human_b36_both.fasta -L 1:1-8000 -CO %s"; } - @Test - public void testSimpleVenn() { - WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -CT SimpleVenn", 1, - Arrays.asList("a1970effe9c51923d52af9034e778de4")); - executeTest("testSimpleVenn", spec); - } - - @Test + //@Test public void testSNPConcordance() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -CT SNPGenotypeConcordance:qscore=5", 1, Arrays.asList("e7a0d52c266ba3c76283111674c7168f")); executeTest("testSNPConcordance", spec); } - - @Test - public void testNWayVenn() { - WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -B set3,VCF," + validationDataLocation + "CEU.sample.vcf -CT NWayVenn", 1, - Arrays.asList("e65fc811137fca7d6c32125240c7468f")); - executeTest("testNWayVenn", spec); - } - - @Test - public void testMulti() { - WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -CT SimpleVenn -CT NWayVenn -CT SNPGenotypeConcordance:qscore=5", 1, - Arrays.asList("ddc2507590e28743e9cb4b132cb066e7")); - executeTest("testMulti", spec); - } - - @Test - public void testComplex() { - WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -B set1,VCF," + validationDataLocation + "complexExample.vcf -B set2,VCF," + validationDataLocation + "complexExample.vcf -CT NWayVenn", 1, - Arrays.asList("250df7bde7a8cf9c7ee7c5704183ea88")); - executeTest("testComplex", spec); - } }