AnalyzeAnnotations can now process only variants that are found in samples that match the -sampleName argument. X-axis of plots no longer use annoying scientific notation.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2684 348d0f76-0448-11de-a6fe-93d51630548a
2010-01-25 20:52:11 +00:00 · 2010-01-25 20:52:11 +00:00 · 24d4082925
parent 022601b1a5
commit 24d4082925
3 changed files with 16 additions and 5 deletions
--- a/R/plot_Annotations_BinnedTiTv.R
+++ b/R/plot_Annotations_BinnedTiTv.R
@ -28,7 +28,8 @@ xmax = max(d$value)
 outfile = paste(outputDir, "binnedTiTv.", annotationName, ".pdf", sep="")
 pdf(outfile, height=7, width=7)
 par(cex=1.1)
-plot(all$value,all$titv,xlab=annotationName,ylab="Ti/Tv ratio",pch=20,ylim=c(ymin,ymax));
+plot(all$value,all$titv,xlab=annotationName,ylab="Ti/Tv ratio",pch=20,ylim=c(ymin,ymax),xaxt="n");
 axis(1,axTicks(1), format(axTicks(1), scientific=F))
 m = weighted.mean(all$value,all$numVariants/sum(all$numVariants))
 ma = all[all$value > m,]
 mb = all[all$value < m,]
@ -45,7 +46,8 @@ dev.off()
 outfile = paste(outputDir, "binnedTiTv_log.", annotationName, ".pdf", sep="")
 pdf(outfile, height=7, width=7)
 par(cex=1.1)
-plot(all$value,all$titv,xlab=annotationName,log="x",ylab="Ti/Tv ratio",pch=20,ylim=c(ymin,ymax));
+plot(all$value,all$titv,xlab=annotationName,log="x",ylab="Ti/Tv ratio",pch=20,ylim=c(ymin,ymax),xaxt="n");
 axis(1,axTicks(1), format(axTicks(1), scientific=F))
 abline(v=m,lty=2)
 abline(v=m75,lty=2)
 abline(v=m25,lty=2)
@ -58,5 +60,6 @@ dev.off()
 outfile = paste(outputDir, "binnedTiTv_hist.", annotationName, ".pdf", sep="")
 pdf(outfile, height=7, width=7)
 par(cex=1.1)
-plot(all$value,all$numVariants,xlab=annotationName,ylab="num Variants in bin",type="h");
+plot(all$value,all$numVariants,xlab=annotationName,ylab="num Variants in bin",type="h",xaxt="n");
 axis(1,axTicks(1), format(axTicks(1), scientific=F))
 dev.off()
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java
@ -56,6 +56,8 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
    private int MIN_VARIANTS_PER_BIN = 1000;
    @Argument(fullName = "max_variants_per_bin", shortName = "maxBinSize", doc = "The maximum number of variants in a bin.", required = false)
    private int MAX_VARIANTS_PER_BIN = 20000;
    @Argument(fullName = "sampleName", shortName = "sampleName", doc = "Only process variants for this sample.", required = false)
    private String SAMPLE_NAME = null;
    /////////////////////////////
@ -95,7 +97,7 @@ public class AnalyzeAnnotationsWalker extends RodWalker<Integer, Integer> {
                if( rod != null && rod instanceof RodVCF ) {
                    RodVCF variant = (RodVCF) rod;
                    if( variant.isSNP() ) {
-                        dataManager.addAnnotations( variant );
+                        dataManager.addAnnotations( variant, SAMPLE_NAME );
                    }
                }
            }
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java
@ -53,7 +53,13 @@ public class AnnotationDataManager {
        dataTruthSet = new HashMap<String, TreeSet<AnnotationDatum>>();
    }
-    public void addAnnotations( RodVCF variant ) {
+    public void addAnnotations( RodVCF variant, String sampleName ) {
        if( sampleName != null ) { // only process variants that are found in the sample with this sampleName
            if( variant.getGenotype(sampleName).isNoCall() ) { // this variant isn't found in this sample so break out
                return;
            }
        } // else, process all samples
        // Loop over each annotation in the vcf record
        final Map<String,String> infoField = variant.getInfoValues();