git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3775 348d0f76-0448-11de-a6fe-93d51630548a

2010-07-13 00:56:12 +00:00 · 2010-07-13 00:56:12 +00:00 · 1bef7dd170
parent de969f7cc7
commit 1bef7dd170
4 changed files with 1 additions and 250 deletions
--- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/IndelSubsets.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/IndelSubsets.java
@ -1,105 +0,0 @@
-package org.broadinstitute.sting.gatk.walkers.concordance;
-
-import org.broad.tribble.vcf.*;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.StingException;
-
-import java.util.*;
-
-/**
- * filter an indel callset based on given criteria
- */
-public class IndelSubsets implements ConcordanceType {
-
-    public final static int HOMOPOLYMER_MAX = 20;
-
-    // Hint: to disable either of these features, set the value to zero (via command-line arguments);
-    // then empty files will be output for the "below threshold" set, effectively disabling it
-    private int homopolymerCutoff = 1;
-    private int sizeCutoff = 2;
-
-    private String[][][][] tags = new String[2][2][2][2];
-    private String sample1, sample2;
-
-    public IndelSubsets() {}
-
-    public void initialize(Map<String, String> args, Set<String> samples) {
-        if ( samples.size() != 2 )
-            throw new StingException("IndelSubsets concordance test cannot handle anything other than 2 VCF records");
-
-        if ( args.get("sizeCutoff") != null )
-            sizeCutoff = Integer.valueOf(args.get("sizeCutoff"));
-        if ( args.get("homopolymerCutoff") != null )
-            homopolymerCutoff = Integer.valueOf(args.get("homopolymerCutoff"));
-
-        Iterator<String> iter = samples.iterator();
-        sample1 = iter.next();
-        sample2 = iter.next();
-
-        for (int i = 0; i < 2; i++) {
-            String name1 = i == 0 ? sample1 : "no_" + sample1;
-            for (int j = 0; j < 2; j++) {
-                String name2 = j == 0 ? sample2 : "no_" + sample2;
-                for (int k = 0; k < 2; k++) {
-                    String name3 = "size" + (k == 0 ? Integer.toString(sizeCutoff)+"AndUnder" : "GreaterThan"+Integer.toString(sizeCutoff));
-                    for (int m = 0; m < 2; m++) {
-                        String name4 = "homopolymer" + (m == 0 ? Integer.toString(homopolymerCutoff)+"AndUnder" : "GreaterThan"+Integer.toString(homopolymerCutoff));
-                        tags[i][j][k][m] = name1 + "." + name2 + "." + name3 + "." + name4;
-                    }
-                }
-            }
-        }
-    }
-
-    public String computeConcordance(Map<String, VCFGenotypeRecord> samplesToRecords, ReferenceContext ref) {
-
-        VCFGenotypeRecord indel1 = samplesToRecords.get(sample1);
-        VCFGenotypeRecord indel2 = samplesToRecords.get(sample2);
-
-        int set1 = ( indel1 != null && !indel1.isPointGenotype() ? 0 : 1 );
-        int set2 = ( indel2 != null && !indel2.isPointGenotype() ? 0 : 1 );
-
-        // skip this locus if they're both not valid indels
-        if ( set1 == 1 && set2 == 1 )
-            return null;
-
-        // only deal with a valid indel
-        VCFRecord indel = ( indel1 != null ? indel1.getRecord() : indel2.getRecord() );
-
-        // we only deal with the first allele
-        int size = ( indel.getAlternateAlleleList().get(0).length() <= sizeCutoff ? 0 : 1 );
-        int homopol = ( homopolymerRunSize(ref, indel) <= homopolymerCutoff ? 0 : 1 );
-
-        return tags[set1][set2][size][homopol];
-    }
-
-    private int homopolymerRunSize(ReferenceContext ref, VCFRecord indel) {
-        byte[] bases = ref.getBases();
-        GenomeLoc window = ref.getWindow();
-        GenomeLoc locus = ref.getLocus();
-
-        int refBasePos = (int)(locus.getStart() - window.getStart());
-        byte indelBase = indel.isDeletion() ? bases[refBasePos+1] : (byte)indel.getAlternateAlleleList().get(0).charAt(0);
-        int leftRun = 0;
-        for ( int i = refBasePos; i >= 0; i--) {
-            if ( bases[i] != indelBase )
-                break;
-            leftRun++;
-        }
-
-        indelBase = indel.isDeletion() ? bases[Math.min(refBasePos+indel.getAlternateAlleleList().get(0).length(),bases.length-1)] : (byte)indel.getAlternateAlleleList().get(0).charAt(indel.getAlternateAlleleList().get(0).length()-1);
-        int rightRun = 0;
-        for ( int i = refBasePos + (indel.isDeletion() ? 1+indel.getAlternateAlleleList().get(0).length() : 1); i < bases.length; i++) {
-            if ( bases[i] != indelBase )
-                break;
-            rightRun++;
-        }
-
-        //System.out.println(String.valueOf(bases) + ": " + leftRun + " / " + rightRun);
-        return Math.max(leftRun, rightRun);
-    }
-
-    public String getInfoName() { return "IndelSubsets"; }
-    public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFHeaderLineType.String, "Indel-related subsets"); }
-}
--- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/NWayVenn.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/NWayVenn.java
@ -1,46 +0,0 @@
-package org.broadinstitute.sting.gatk.walkers.concordance;
-
-import org.broad.tribble.vcf.VCFGenotypeRecord;
-import org.broad.tribble.vcf.VCFHeaderLineType;
-import org.broad.tribble.vcf.VCFInfoHeaderLine;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-
-import java.util.*;
-import java.util.Map.Entry;
-
-/**
- * Split up N call sets into their various "Venn diagram" sets.
- * Note that to minimize complexity (unlike SimpleVenn), this module does NOT check for discordance
- * (i.e. the intersections contain calls that are present in multiple sets, regardless of whether
- * they agree on the actual variant).
- */
-public class NWayVenn implements ConcordanceType {
-
-    public NWayVenn() {}
-
-    public void initialize(Map<String, String> args, Set<String> samples) { }
-
-    public String computeConcordance(Map<String, VCFGenotypeRecord> samplesToRecords, ReferenceContext ref) {
-        if ( samplesToRecords.size() == 0 )
-            return null;
-
-        TreeSet<String> concordantSamples = new TreeSet<String>();
-        for ( Entry<String, VCFGenotypeRecord> entry : samplesToRecords.entrySet() ) {
-            if ( !entry.getValue().isNoCall() )
-                concordantSamples.add(entry.getKey());
-        }
-
-        StringBuffer tag = new StringBuffer();
-        Iterator<String> iter = concordantSamples.iterator();
-        while ( iter.hasNext() ) {
-            tag.append(iter.next());
-            if ( iter.hasNext() )
-                tag.append("-");
-        }
-
-        return tag.toString();
-    }
-
-    public String getInfoName() { return "NwayVenn"; }    
-    public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFHeaderLineType.String, "N-way Venn split"); }
-}
--- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SimpleVenn.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SimpleVenn.java
@ -1,66 +0,0 @@
-package org.broadinstitute.sting.gatk.walkers.concordance;
-
-import org.broad.tribble.vcf.*;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.utils.StingException;
-
-import java.util.*;
-
-/**
- * Split up two call sets into their various "Venn diagram" sets
- */
-public class SimpleVenn implements ConcordanceType {
-
-    private String sample1, sample2;
-
-    public SimpleVenn() {}
-
-    public void initialize(Map<String, String> args, Set<String> samples) {
-        if ( samples.size() != 2 )
-            throw new StingException("SimpleVenn concordance test cannot handle anything other than 2 VCF records");        
-
-        Iterator<String> iter = samples.iterator();
-        sample1 = iter.next();
-        sample2 = iter.next();
-    }
-
-    public String computeConcordance(Map<String, VCFGenotypeRecord> samplesToRecords, ReferenceContext ref ) {
-
-        VCFGenotypeRecord call1 = samplesToRecords.get(sample1);
-        if ( call1 != null && call1.isNoCall() )
-            call1 = null;
-        VCFGenotypeRecord call2 = samplesToRecords.get(sample2);
-        if ( call2 != null && call2.isNoCall() )
-            call2 = null;
-
-        if ( call1 == null && call2 == null )
-            return null;
-
-        // set 1 only
-        if ( call2 == null )
-            return sample1 + "_only";
-
-        // set 2 only
-        else if ( call1 == null )
-            return sample2 + "_only";
-
-        // at this point we know that neither is null, so now we need to test for alternate allele concordance
-        VCFRecord callV1 = call1.getRecord();
-        VCFRecord callV2 = call2.getRecord();
-
-        // we can't really deal with multi-allelic variants
-        if ( callV1.isBiallelic() && callV2.isBiallelic() ) {
-            // intersection (concordant)
-            if ( callV1.getAlternativeBaseForSNP() == callV2.getAlternativeBaseForSNP() )
-                return "concordant";
-            // intersection (discordant)
-            else
-                return "discordant";
-        }
-
-        return "concordant";
-    }
-
-    public String getInfoName() { return "Venn"; }
-    public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFHeaderLineType.String, "2-way Venn split"); }
-}
--- a/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java
@ -10,43 +10,11 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
        return "-T CallsetConcordance -R " + oneKGLocation + "reference/human_b36_both.fasta -L 1:1-8000 -CO %s";
    }

-    @Test
-    public void testSimpleVenn() {
-        WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -CT SimpleVenn", 1,
-                Arrays.asList("a1970effe9c51923d52af9034e778de4"));
-        executeTest("testSimpleVenn", spec);
-    }
-
-    @Test
+    //@Test
    public void testSNPConcordance() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -CT SNPGenotypeConcordance:qscore=5", 1,
                Arrays.asList("e7a0d52c266ba3c76283111674c7168f"));
        executeTest("testSNPConcordance", spec);
    }
-
-    @Test
-    public void testNWayVenn() {
-        WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -B set3,VCF," + validationDataLocation + "CEU.sample.vcf -CT NWayVenn", 1,
-                Arrays.asList("e65fc811137fca7d6c32125240c7468f"));
-        executeTest("testNWayVenn", spec);
-    }
-
-    @Test
-    public void testMulti() {
-        WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -CT SimpleVenn -CT NWayVenn -CT SNPGenotypeConcordance:qscore=5", 1,
-                Arrays.asList("ddc2507590e28743e9cb4b132cb066e7"));
-        executeTest("testMulti", spec);
-    }
-
-    @Test
-    public void testComplex() {
-        WalkerTestSpec spec = new WalkerTestSpec(
-                baseTestString() + " -B set1,VCF," + validationDataLocation + "complexExample.vcf -B set2,VCF," + validationDataLocation + "complexExample.vcf -CT NWayVenn", 1,
-                Arrays.asList("250df7bde7a8cf9c7ee7c5704183ea88"));
-        executeTest("testComplex", spec);
-    }
 }