From 3b5a7c34d74518739ca588be62e21f591d053aa1 Mon Sep 17 00:00:00 2001
From: Guillermo del Angel <delangel@broadinstitute.org>
Date: Sun, 4 Mar 2012 10:24:29 -0500
Subject: [PATCH] Added argument to ValidationAmplicons to only output valid
 sequences - useful for not having to post-filter or grep resulting files
 before delivering downstream

---
 .../validation/ValidationAmplicons.java       | 23 +++++++++++++------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
index b27bef265..e812fb53a 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
@@ -110,6 +110,13 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
     @Argument(doc="Lower case SNPs rather than replacing with 'N'",fullName="lowerCaseSNPs",required=false)
     boolean lowerCaseSNPs = false;
 
+    /**
+     * If onlyOutputValidAmplicons is true, the output fasta file will contain only valid sequences.
+     * Useful for producing delivery-ready files.
+     */
+    @Argument(doc="Only output valid sequences.",fullName="onlyOutputValidAmplicons",required=false)
+    boolean onlyOutputValidAmplicons = false;
+
     /**
      * BWA single-end alignment is used as a primer specificity proxy. Low-complexity regions (that don't align back to themselves as a best hit) are lowercased.
      * This changes the size of the k-mer used for alignment.
@@ -486,14 +493,16 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
             valid = "Valid";
         }
 
-        String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D');
 
-        if (!sequenomOutput)
-            out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity);
-        else {
-            seqIdentity = seqIdentity.replace("*",""); // identifier < 20 letters long, no * in ref allele, one line per record
-            probeName = probeName.replace("amplicon_","a");
-            out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity);
+        if (!onlyOutputValidAmplicons || !sequenceInvalid) {
+            String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D');
+            if (!sequenomOutput)
+                out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity);
+            else {
+                seqIdentity = seqIdentity.replace("*",""); // identifier < 20 letters long, no * in ref allele, one line per record
+                probeName = probeName.replace("amplicon_","a");
+                out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity);
+            }
         }
     }
 }