diff --git a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R index 46bbf7eda..876cf5cbc 100644 --- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R +++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R @@ -4,11 +4,9 @@ colnames(d) = tableHeader; for (i in 1:ncol(d)) { - v = suppressWarnings(as.numeric(d[,i])); - - if (length(na.omit(as.numeric(v))) == length(d[,i])) { - d[,i] = v; - } + # use the general type.convert infrastructure of read.table to convert column data to R types + v = type.convert(d[,i]) + d[,i] = v; } usedNames = ls(envir=tableEnv, pattern=tableName); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index 3f555f780..00968943d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -43,15 +43,15 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar final ArrayList altQuals = new ArrayList(); if ( vc.isSNP() ) { + final List altAlleles = new ArrayList(); + for ( final Allele a : vc.getAlternateAlleles() ) + altAlleles.add(a.getBases()[0]); + for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) { final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName()); if ( context == null ) continue; - final List altAlleles = new ArrayList(); - for ( final Allele a : vc.getAlternateAlleles() ) - altAlleles.add(a.getBases()[0]); - fillQualsFromPileup(ref.getBase(), altAlleles, context.getBasePileup(), refQuals, altQuals); } } else if ( vc.isIndel() || vc.isMixed() ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java index c192d04e7..d7a48d321 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java @@ -36,13 +36,14 @@ import org.broadinstitute.sting.utils.Median; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.PrintStream; +import java.text.DateFormat; import java.util.HashMap; import java.util.Map; /** - * Emits a GATKReport containing read group, sample, library, platform, center, paired end status, - * simple read type name (e.g. 2x76) median insert size and median read length for each read group - * in every provided BAM file + * Emits a GATKReport containing read group, sample, library, platform, center, sequencing data, + * paired end status, simple read type name (e.g. 2x76) median insert size and median read length + * for each read group in every provided BAM file * * Note that this walker stops when all read groups have been observed at least a few thousand times so that * the median statistics are well determined. It is safe to run it WG and it'll finish in an appropriate @@ -61,23 +62,23 @@ import java.util.Map; * *
  *      ##:GATKReport.v0.2 ReadGroupProperties : Table of read group properties
- *      readgroup  sample   library       platform  center  has.any.reads  is.paired.end  n.reads.analyzed  simple.read.type  median.read.length  median.insert.size
- *      20FUK.1    NA12878  Solexa-18483  illumina  BI      true           true                      10100  2x101                            101                 387
- *      20FUK.2    NA12878  Solexa-18484  illumina  BI      true           true                      10115  2x101                            101                 415
- *      20FUK.3    NA12878  Solexa-18483  illumina  BI      true           true                      10090  2x101                            101                 388
- *      20FUK.4    NA12878  Solexa-18484  illumina  BI      true           true                      10081  2x101                            101                 415
- *      20FUK.5    NA12878  Solexa-18483  illumina  BI      true           true                      10078  2x101                            101                 387
- *      20FUK.6    NA12878  Solexa-18484  illumina  BI      true           true                      10072  2x101                            101                 415
- *      20FUK.7    NA12878  Solexa-18483  illumina  BI      true           true                      10086  2x101                            101                 388
- *      20FUK.8    NA12878  Solexa-18484  illumina  BI      true           true                      10097  2x101                            101                 415
- *      20GAV.1    NA12878  Solexa-18483  illumina  BI      true           true                      10135  2x101                            101                 388
- *      20GAV.2    NA12878  Solexa-18484  illumina  BI      true           true                      10172  2x101                            101                 415
- *      20GAV.3    NA12878  Solexa-18483  illumina  BI      true           true                      10141  2x101                            101                 388
- *      20GAV.4    NA12878  Solexa-18484  illumina  BI      true           true                      10251  2x101                            101                 416
- *      20GAV.5    NA12878  Solexa-18483  illumina  BI      true           true                      10145  2x101                            101                 388
- *      20GAV.6    NA12878  Solexa-18484  illumina  BI      true           true                      10184  2x101                            101                 415
- *      20GAV.7    NA12878  Solexa-18483  illumina  BI      true           true                      10174  2x101                            101                 387
- *      20GAV.8    NA12878  Solexa-18484  illumina  BI      true           true                      10141  2x101                            101                 414
+ *      readgroup  sample   library       platform  center  date     has.any.reads  is.paired.end  n.reads.analyzed  simple.read.type  median.read.length  median.insert.size
+ *      20FUK.1    NA12878  Solexa-18483  illumina  BI      2/2/10   true           true                        498  2x101                            101                 386
+ *      20FUK.2    NA12878  Solexa-18484  illumina  BI      2/2/10   true           true                        476  2x101                            101                 417
+ *      20FUK.3    NA12878  Solexa-18483  illumina  BI      2/2/10   true           true                        407  2x101                            101                 387
+ *      20FUK.4    NA12878  Solexa-18484  illumina  BI      2/2/10   true           true                        389  2x101                            101                 415
+ *      20FUK.5    NA12878  Solexa-18483  illumina  BI      2/2/10   true           true                        433  2x101                            101                 386
+ *      20FUK.6    NA12878  Solexa-18484  illumina  BI      2/2/10   true           true                        480  2x101                            101                 418
+ *      20FUK.7    NA12878  Solexa-18483  illumina  BI      2/2/10   true           true                        450  2x101                            101                 386
+ *      20FUK.8    NA12878  Solexa-18484  illumina  BI      2/2/10   true           true                        438  2x101                            101                 418
+ *      20GAV.1    NA12878  Solexa-18483  illumina  BI      1/26/10  true           true                        490  2x101                            101                 391
+ *      20GAV.2    NA12878  Solexa-18484  illumina  BI      1/26/10  true           true                        485  2x101                            101                 417
+ *      20GAV.3    NA12878  Solexa-18483  illumina  BI      1/26/10  true           true                        460  2x101                            101                 392
+ *      20GAV.4    NA12878  Solexa-18484  illumina  BI      1/26/10  true           true                        434  2x101                            101                 415
+ *      20GAV.5    NA12878  Solexa-18483  illumina  BI      1/26/10  true           true                        479  2x101                            101                 389
+ *      20GAV.6    NA12878  Solexa-18484  illumina  BI      1/26/10  true           true                        461  2x101                            101                 416
+ *      20GAV.7    NA12878  Solexa-18483  illumina  BI      1/26/10  true           true                        509  2x101                            101                 386
+ *      20GAV.8    NA12878  Solexa-18484  illumina  BI      1/26/10  true           true                        476  2x101                            101                 410                           101                 414
  *      
*

* @@ -172,6 +173,7 @@ public class ReadGroupProperties extends ReadWalker { final GATKReport report = new GATKReport(); report.addTable(TABLE_NAME, "Table of read group properties"); GATKReportTable table = report.getTable(TABLE_NAME); + DateFormat dateFormatter = DateFormat.getDateInstance(DateFormat.SHORT); table.addPrimaryKey("readgroup"); //* Emits a GATKReport containing read group, sample, library, platform, center, median insert size and @@ -180,6 +182,7 @@ public class ReadGroupProperties extends ReadWalker { table.addColumn("library", "NA"); table.addColumn("platform", "NA"); table.addColumn("center", "NA"); + table.addColumn("date", "NA"); table.addColumn("has.any.reads", "false"); table.addColumn("is.paired.end", "false"); table.addColumn("n.reads.analyzed", "NA"); @@ -196,18 +199,28 @@ public class ReadGroupProperties extends ReadWalker { final boolean hasAnyReads = info.nReadsSeen > 0; final int readLength = info.readLength.getMedian(0); - table.set(rgID, "sample", rg.getSample()); - table.set(rgID, "library", rg.getLibrary()); - table.set(rgID, "platform", rg.getPlatform()); - table.set(rgID, "center", rg.getSequencingCenter()); - table.set(rgID, "has.any.reads", hasAnyReads); - table.set(rgID, "is.paired.end", isPaired); - table.set(rgID, "n.reads.analyzed", info.nReadsSeen); - table.set(rgID, "simple.read.type", hasAnyReads ? String.format("%dx%d", isPaired ? 2 : 1, readLength) : "NA"); - table.set(rgID, "median.read.length", hasAnyReads ? readLength : "NA" ); - table.set(rgID, "median.insert.size", hasAnyReads && isPaired ? info.insertSize.getMedian(0) : "NA" ); + setTableValue(table, rgID, "sample", rg.getSample()); + setTableValue(table, rgID, "library", rg.getLibrary()); + setTableValue(table, rgID, "platform", rg.getPlatform()); + setTableValue(table, rgID, "center", rg.getSequencingCenter()); + try { + setTableValue(table, rgID, "date", rg.getRunDate() != null ? dateFormatter.format(rg.getRunDate()) : "NA"); + } catch ( NullPointerException e ) { + // TODO: remove me when bug in Picard is fixed that causes NPE when date isn't present + setTableValue(table, rgID, "date", "NA"); + } + setTableValue(table, rgID, "has.any.reads", hasAnyReads); + setTableValue(table, rgID, "is.paired.end", isPaired); + setTableValue(table, rgID, "n.reads.analyzed", info.nReadsSeen); + setTableValue(table, rgID, "simple.read.type", hasAnyReads ? String.format("%dx%d", isPaired ? 2 : 1, readLength) : "NA"); + setTableValue(table, rgID, "median.read.length", hasAnyReads ? readLength : "NA" ); + setTableValue(table, rgID, "median.insert.size", hasAnyReads && isPaired ? info.insertSize.getMedian(0) : "NA" ); } report.print(out); } + + private final void setTableValue(GATKReportTable table, final String rgID, final String key, final Object value) { + table.set(rgID, key, value == null ? "NA" : value); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java index b27bef265..e812fb53a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java @@ -110,6 +110,13 @@ public class ValidationAmplicons extends RodWalker { @Argument(doc="Lower case SNPs rather than replacing with 'N'",fullName="lowerCaseSNPs",required=false) boolean lowerCaseSNPs = false; + /** + * If onlyOutputValidAmplicons is true, the output fasta file will contain only valid sequences. + * Useful for producing delivery-ready files. + */ + @Argument(doc="Only output valid sequences.",fullName="onlyOutputValidAmplicons",required=false) + boolean onlyOutputValidAmplicons = false; + /** * BWA single-end alignment is used as a primer specificity proxy. Low-complexity regions (that don't align back to themselves as a best hit) are lowercased. * This changes the size of the k-mer used for alignment. @@ -486,14 +493,16 @@ public class ValidationAmplicons extends RodWalker { valid = "Valid"; } - String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D'); - if (!sequenomOutput) - out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity); - else { - seqIdentity = seqIdentity.replace("*",""); // identifier < 20 letters long, no * in ref allele, one line per record - probeName = probeName.replace("amplicon_","a"); - out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity); + if (!onlyOutputValidAmplicons || !sequenceInvalid) { + String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D'); + if (!sequenomOutput) + out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity); + else { + seqIdentity = seqIdentity.replace("*",""); // identifier < 20 letters long, no * in ref allele, one line per record + probeName = probeName.replace("amplicon_","a"); + out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity); + } } } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupPropertiesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupPropertiesIntegrationTest.java index 04c30a8fe..1a4c8db30 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupPropertiesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupPropertiesIntegrationTest.java @@ -38,7 +38,7 @@ public class ReadGroupPropertiesIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T ReadGroupProperties -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-11,000,000 -o %s", 1, - Arrays.asList("8e4d09665c0b65c971bd5dead24f95fe")); + Arrays.asList("6b8cce223af28cbadcfe87a3b841fc56")); executeTest("ReadGroupProperties:", spec); } } \ No newline at end of file