Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
f879daa7d0
|
|
@ -4,11 +4,9 @@
|
||||||
colnames(d) = tableHeader;
|
colnames(d) = tableHeader;
|
||||||
|
|
||||||
for (i in 1:ncol(d)) {
|
for (i in 1:ncol(d)) {
|
||||||
v = suppressWarnings(as.numeric(d[,i]));
|
# use the general type.convert infrastructure of read.table to convert column data to R types
|
||||||
|
v = type.convert(d[,i])
|
||||||
if (length(na.omit(as.numeric(v))) == length(d[,i])) {
|
d[,i] = v;
|
||||||
d[,i] = v;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
usedNames = ls(envir=tableEnv, pattern=tableName);
|
usedNames = ls(envir=tableEnv, pattern=tableName);
|
||||||
|
|
|
||||||
|
|
@ -43,15 +43,15 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar
|
||||||
final ArrayList<Double> altQuals = new ArrayList<Double>();
|
final ArrayList<Double> altQuals = new ArrayList<Double>();
|
||||||
|
|
||||||
if ( vc.isSNP() ) {
|
if ( vc.isSNP() ) {
|
||||||
|
final List<Byte> altAlleles = new ArrayList<Byte>();
|
||||||
|
for ( final Allele a : vc.getAlternateAlleles() )
|
||||||
|
altAlleles.add(a.getBases()[0]);
|
||||||
|
|
||||||
for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) {
|
for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) {
|
||||||
final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||||
if ( context == null )
|
if ( context == null )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
final List<Byte> altAlleles = new ArrayList<Byte>();
|
|
||||||
for ( final Allele a : vc.getAlternateAlleles() )
|
|
||||||
altAlleles.add(a.getBases()[0]);
|
|
||||||
|
|
||||||
fillQualsFromPileup(ref.getBase(), altAlleles, context.getBasePileup(), refQuals, altQuals);
|
fillQualsFromPileup(ref.getBase(), altAlleles, context.getBasePileup(), refQuals, altQuals);
|
||||||
}
|
}
|
||||||
} else if ( vc.isIndel() || vc.isMixed() ) {
|
} else if ( vc.isIndel() || vc.isMixed() ) {
|
||||||
|
|
|
||||||
|
|
@ -36,13 +36,14 @@ import org.broadinstitute.sting.utils.Median;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
import java.text.DateFormat;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Emits a GATKReport containing read group, sample, library, platform, center, paired end status,
|
* Emits a GATKReport containing read group, sample, library, platform, center, sequencing data,
|
||||||
* simple read type name (e.g. 2x76) median insert size and median read length for each read group
|
* paired end status, simple read type name (e.g. 2x76) median insert size and median read length
|
||||||
* in every provided BAM file
|
* for each read group in every provided BAM file
|
||||||
*
|
*
|
||||||
* Note that this walker stops when all read groups have been observed at least a few thousand times so that
|
* Note that this walker stops when all read groups have been observed at least a few thousand times so that
|
||||||
* the median statistics are well determined. It is safe to run it WG and it'll finish in an appropriate
|
* the median statistics are well determined. It is safe to run it WG and it'll finish in an appropriate
|
||||||
|
|
@ -61,23 +62,23 @@ import java.util.Map;
|
||||||
*
|
*
|
||||||
* <pre>
|
* <pre>
|
||||||
* ##:GATKReport.v0.2 ReadGroupProperties : Table of read group properties
|
* ##:GATKReport.v0.2 ReadGroupProperties : Table of read group properties
|
||||||
* readgroup sample library platform center has.any.reads is.paired.end n.reads.analyzed simple.read.type median.read.length median.insert.size
|
* readgroup sample library platform center date has.any.reads is.paired.end n.reads.analyzed simple.read.type median.read.length median.insert.size
|
||||||
* 20FUK.1 NA12878 Solexa-18483 illumina BI true true 10100 2x101 101 387
|
* 20FUK.1 NA12878 Solexa-18483 illumina BI 2/2/10 true true 498 2x101 101 386
|
||||||
* 20FUK.2 NA12878 Solexa-18484 illumina BI true true 10115 2x101 101 415
|
* 20FUK.2 NA12878 Solexa-18484 illumina BI 2/2/10 true true 476 2x101 101 417
|
||||||
* 20FUK.3 NA12878 Solexa-18483 illumina BI true true 10090 2x101 101 388
|
* 20FUK.3 NA12878 Solexa-18483 illumina BI 2/2/10 true true 407 2x101 101 387
|
||||||
* 20FUK.4 NA12878 Solexa-18484 illumina BI true true 10081 2x101 101 415
|
* 20FUK.4 NA12878 Solexa-18484 illumina BI 2/2/10 true true 389 2x101 101 415
|
||||||
* 20FUK.5 NA12878 Solexa-18483 illumina BI true true 10078 2x101 101 387
|
* 20FUK.5 NA12878 Solexa-18483 illumina BI 2/2/10 true true 433 2x101 101 386
|
||||||
* 20FUK.6 NA12878 Solexa-18484 illumina BI true true 10072 2x101 101 415
|
* 20FUK.6 NA12878 Solexa-18484 illumina BI 2/2/10 true true 480 2x101 101 418
|
||||||
* 20FUK.7 NA12878 Solexa-18483 illumina BI true true 10086 2x101 101 388
|
* 20FUK.7 NA12878 Solexa-18483 illumina BI 2/2/10 true true 450 2x101 101 386
|
||||||
* 20FUK.8 NA12878 Solexa-18484 illumina BI true true 10097 2x101 101 415
|
* 20FUK.8 NA12878 Solexa-18484 illumina BI 2/2/10 true true 438 2x101 101 418
|
||||||
* 20GAV.1 NA12878 Solexa-18483 illumina BI true true 10135 2x101 101 388
|
* 20GAV.1 NA12878 Solexa-18483 illumina BI 1/26/10 true true 490 2x101 101 391
|
||||||
* 20GAV.2 NA12878 Solexa-18484 illumina BI true true 10172 2x101 101 415
|
* 20GAV.2 NA12878 Solexa-18484 illumina BI 1/26/10 true true 485 2x101 101 417
|
||||||
* 20GAV.3 NA12878 Solexa-18483 illumina BI true true 10141 2x101 101 388
|
* 20GAV.3 NA12878 Solexa-18483 illumina BI 1/26/10 true true 460 2x101 101 392
|
||||||
* 20GAV.4 NA12878 Solexa-18484 illumina BI true true 10251 2x101 101 416
|
* 20GAV.4 NA12878 Solexa-18484 illumina BI 1/26/10 true true 434 2x101 101 415
|
||||||
* 20GAV.5 NA12878 Solexa-18483 illumina BI true true 10145 2x101 101 388
|
* 20GAV.5 NA12878 Solexa-18483 illumina BI 1/26/10 true true 479 2x101 101 389
|
||||||
* 20GAV.6 NA12878 Solexa-18484 illumina BI true true 10184 2x101 101 415
|
* 20GAV.6 NA12878 Solexa-18484 illumina BI 1/26/10 true true 461 2x101 101 416
|
||||||
* 20GAV.7 NA12878 Solexa-18483 illumina BI true true 10174 2x101 101 387
|
* 20GAV.7 NA12878 Solexa-18483 illumina BI 1/26/10 true true 509 2x101 101 386
|
||||||
* 20GAV.8 NA12878 Solexa-18484 illumina BI true true 10141 2x101 101 414
|
* 20GAV.8 NA12878 Solexa-18484 illumina BI 1/26/10 true true 476 2x101 101 410 101 414
|
||||||
* </pre>
|
* </pre>
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
|
|
@ -172,6 +173,7 @@ public class ReadGroupProperties extends ReadWalker<Integer, Integer> {
|
||||||
final GATKReport report = new GATKReport();
|
final GATKReport report = new GATKReport();
|
||||||
report.addTable(TABLE_NAME, "Table of read group properties");
|
report.addTable(TABLE_NAME, "Table of read group properties");
|
||||||
GATKReportTable table = report.getTable(TABLE_NAME);
|
GATKReportTable table = report.getTable(TABLE_NAME);
|
||||||
|
DateFormat dateFormatter = DateFormat.getDateInstance(DateFormat.SHORT);
|
||||||
|
|
||||||
table.addPrimaryKey("readgroup");
|
table.addPrimaryKey("readgroup");
|
||||||
//* Emits a GATKReport containing read group, sample, library, platform, center, median insert size and
|
//* Emits a GATKReport containing read group, sample, library, platform, center, median insert size and
|
||||||
|
|
@ -180,6 +182,7 @@ public class ReadGroupProperties extends ReadWalker<Integer, Integer> {
|
||||||
table.addColumn("library", "NA");
|
table.addColumn("library", "NA");
|
||||||
table.addColumn("platform", "NA");
|
table.addColumn("platform", "NA");
|
||||||
table.addColumn("center", "NA");
|
table.addColumn("center", "NA");
|
||||||
|
table.addColumn("date", "NA");
|
||||||
table.addColumn("has.any.reads", "false");
|
table.addColumn("has.any.reads", "false");
|
||||||
table.addColumn("is.paired.end", "false");
|
table.addColumn("is.paired.end", "false");
|
||||||
table.addColumn("n.reads.analyzed", "NA");
|
table.addColumn("n.reads.analyzed", "NA");
|
||||||
|
|
@ -196,18 +199,28 @@ public class ReadGroupProperties extends ReadWalker<Integer, Integer> {
|
||||||
final boolean hasAnyReads = info.nReadsSeen > 0;
|
final boolean hasAnyReads = info.nReadsSeen > 0;
|
||||||
final int readLength = info.readLength.getMedian(0);
|
final int readLength = info.readLength.getMedian(0);
|
||||||
|
|
||||||
table.set(rgID, "sample", rg.getSample());
|
setTableValue(table, rgID, "sample", rg.getSample());
|
||||||
table.set(rgID, "library", rg.getLibrary());
|
setTableValue(table, rgID, "library", rg.getLibrary());
|
||||||
table.set(rgID, "platform", rg.getPlatform());
|
setTableValue(table, rgID, "platform", rg.getPlatform());
|
||||||
table.set(rgID, "center", rg.getSequencingCenter());
|
setTableValue(table, rgID, "center", rg.getSequencingCenter());
|
||||||
table.set(rgID, "has.any.reads", hasAnyReads);
|
try {
|
||||||
table.set(rgID, "is.paired.end", isPaired);
|
setTableValue(table, rgID, "date", rg.getRunDate() != null ? dateFormatter.format(rg.getRunDate()) : "NA");
|
||||||
table.set(rgID, "n.reads.analyzed", info.nReadsSeen);
|
} catch ( NullPointerException e ) {
|
||||||
table.set(rgID, "simple.read.type", hasAnyReads ? String.format("%dx%d", isPaired ? 2 : 1, readLength) : "NA");
|
// TODO: remove me when bug in Picard is fixed that causes NPE when date isn't present
|
||||||
table.set(rgID, "median.read.length", hasAnyReads ? readLength : "NA" );
|
setTableValue(table, rgID, "date", "NA");
|
||||||
table.set(rgID, "median.insert.size", hasAnyReads && isPaired ? info.insertSize.getMedian(0) : "NA" );
|
}
|
||||||
|
setTableValue(table, rgID, "has.any.reads", hasAnyReads);
|
||||||
|
setTableValue(table, rgID, "is.paired.end", isPaired);
|
||||||
|
setTableValue(table, rgID, "n.reads.analyzed", info.nReadsSeen);
|
||||||
|
setTableValue(table, rgID, "simple.read.type", hasAnyReads ? String.format("%dx%d", isPaired ? 2 : 1, readLength) : "NA");
|
||||||
|
setTableValue(table, rgID, "median.read.length", hasAnyReads ? readLength : "NA" );
|
||||||
|
setTableValue(table, rgID, "median.insert.size", hasAnyReads && isPaired ? info.insertSize.getMedian(0) : "NA" );
|
||||||
}
|
}
|
||||||
|
|
||||||
report.print(out);
|
report.print(out);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private final void setTableValue(GATKReportTable table, final String rgID, final String key, final Object value) {
|
||||||
|
table.set(rgID, key, value == null ? "NA" : value);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -110,6 +110,13 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
||||||
@Argument(doc="Lower case SNPs rather than replacing with 'N'",fullName="lowerCaseSNPs",required=false)
|
@Argument(doc="Lower case SNPs rather than replacing with 'N'",fullName="lowerCaseSNPs",required=false)
|
||||||
boolean lowerCaseSNPs = false;
|
boolean lowerCaseSNPs = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If onlyOutputValidAmplicons is true, the output fasta file will contain only valid sequences.
|
||||||
|
* Useful for producing delivery-ready files.
|
||||||
|
*/
|
||||||
|
@Argument(doc="Only output valid sequences.",fullName="onlyOutputValidAmplicons",required=false)
|
||||||
|
boolean onlyOutputValidAmplicons = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* BWA single-end alignment is used as a primer specificity proxy. Low-complexity regions (that don't align back to themselves as a best hit) are lowercased.
|
* BWA single-end alignment is used as a primer specificity proxy. Low-complexity regions (that don't align back to themselves as a best hit) are lowercased.
|
||||||
* This changes the size of the k-mer used for alignment.
|
* This changes the size of the k-mer used for alignment.
|
||||||
|
|
@ -486,14 +493,16 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
||||||
valid = "Valid";
|
valid = "Valid";
|
||||||
}
|
}
|
||||||
|
|
||||||
String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D');
|
|
||||||
|
|
||||||
if (!sequenomOutput)
|
if (!onlyOutputValidAmplicons || !sequenceInvalid) {
|
||||||
out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity);
|
String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D');
|
||||||
else {
|
if (!sequenomOutput)
|
||||||
seqIdentity = seqIdentity.replace("*",""); // identifier < 20 letters long, no * in ref allele, one line per record
|
out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity);
|
||||||
probeName = probeName.replace("amplicon_","a");
|
else {
|
||||||
out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity);
|
seqIdentity = seqIdentity.replace("*",""); // identifier < 20 letters long, no * in ref allele, one line per record
|
||||||
|
probeName = probeName.replace("amplicon_","a");
|
||||||
|
out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,7 @@ public class ReadGroupPropertiesIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T ReadGroupProperties -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-11,000,000 -o %s",
|
"-T ReadGroupProperties -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-11,000,000 -o %s",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("8e4d09665c0b65c971bd5dead24f95fe"));
|
Arrays.asList("6b8cce223af28cbadcfe87a3b841fc56"));
|
||||||
executeTest("ReadGroupProperties:", spec);
|
executeTest("ReadGroupProperties:", spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue