Allows GatherBqsrReports to accept a .list file as input.

This commit is contained in:
Samuel Lee 2016-06-20 13:30:39 -04:00
parent ae8ab33cdd
commit 76bb8fd9e5
4 changed files with 147 additions and 50 deletions

View File

@ -53,6 +53,8 @@ package org.broadinstitute.gatk.tools;
import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.IOUtil;
import org.broadinstitute.gatk.engine.recalibration.BQSRGatherer; import org.broadinstitute.gatk.engine.recalibration.BQSRGatherer;
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
import org.broadinstitute.gatk.utils.help.HelpConstants;
import picard.cmdline.CommandLineProgram; import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties; import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option; import picard.cmdline.Option;
@ -97,10 +99,7 @@ import java.util.List;
* *
*/ */
@CommandLineProgramProperties( @DocumentedGATKFeature(groupName = HelpConstants.DOCS_CAT_QC)
usage = "Gathers scattered BQSR recalibration reports into a single file",
usageShort = "Gathers scattered BQSR recalibration reports into a single file"
)
public class GatherBqsrReports extends CommandLineProgram { public class GatherBqsrReports extends CommandLineProgram {
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc="List of scattered BQSR files") @Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc="List of scattered BQSR files")
public List<File> INPUT; public List<File> INPUT;

View File

@ -59,8 +59,12 @@ import org.broadinstitute.gatk.engine.recalibration.RecalUtils;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import java.io.BufferedWriter;
import java.io.File; import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
@ -70,38 +74,36 @@ import java.util.List;
*/ */
public class BQSRGathererUnitTest extends BaseTest { public class BQSRGathererUnitTest extends BaseTest {
private static File recal1 = new File(privateTestDir + "HiSeq.1mb.1RG.sg1.table"); private static File recal1 = new File(privateTestDir, "HiSeq.1mb.1RG.sg1.table");
private static File recal2 = new File(privateTestDir + "HiSeq.1mb.1RG.sg2.table"); private static File recal2 = new File(privateTestDir, "HiSeq.1mb.1RG.sg2.table");
private static File recal3 = new File(privateTestDir + "HiSeq.1mb.1RG.sg3.table"); private static File recal3 = new File(privateTestDir, "HiSeq.1mb.1RG.sg3.table");
private static File recal4 = new File(privateTestDir + "HiSeq.1mb.1RG.sg4.table"); private static File recal4 = new File(privateTestDir, "HiSeq.1mb.1RG.sg4.table");
private static File recal5 = new File(privateTestDir + "HiSeq.1mb.1RG.sg5.table"); private static File recal5 = new File(privateTestDir, "HiSeq.1mb.1RG.sg5.table");
private static File recalEmpty = new File(privateTestDir + "HiSeq.1mb.1RG.empty.table"); private static File recalEmpty = new File(privateTestDir, "HiSeq.1mb.1RG.empty.table");
private static File recal_original = new File(privateTestDir + "HiSeq.1mb.1RG.noSG.table"); private static File recal_original = new File(privateTestDir, "HiSeq.1mb.1RG.noSG.table");
private static File recal_many = new File(privateTestDir, "bqsr.manyObservations.full.table");
@Test(enabled = true) @Test
public void testManyObservations() { public void testManyObservations() {
File recal = new File(privateTestDir + "bqsr.manyObservations.piece.table"); final File recal = new File(privateTestDir, "bqsr.manyObservations.piece.table");
final File output = BaseTest.createTempFile("BQSRgathererTest", ".table"); final File output = BaseTest.createTempFile("BQSRgathererTest", ".table");
List<File> recalFiles = new LinkedList<File> (); final List<File> recalFiles = new LinkedList<>();
for ( int i=0; i < 5; i++ ) for ( int i=0; i < 5; i++ )
recalFiles.add(recal); recalFiles.add(recal);
BQSRGatherer gatherer = new BQSRGatherer(); final BQSRGatherer gatherer = new BQSRGatherer();
gatherer.gather(recalFiles, output); gatherer.gather(recalFiles, output);
GATKReport originalReport = new GATKReport(new File(privateTestDir + "bqsr.manyObservations.full.table")); testReports(recal_many, output);
GATKReport calculatedReport = new GATKReport(output);
testReports(originalReport, calculatedReport);
} }
@Test(enabled = true) @Test
public void testGatherBQSR() { public void testGatherBQSR() {
BQSRGatherer gatherer = new BQSRGatherer(); final BQSRGatherer gatherer = new BQSRGatherer();
List<File> recalFiles = new LinkedList<File> (); final List<File> recalFiles = new LinkedList<>();
final File output = BaseTest.createTempFile("BQSRgathererTest", ".table"); final File output = BaseTest.createTempFile("BQSRgathererTest", ".table");
recalFiles.add(recal1); recalFiles.add(recal1);
@ -111,16 +113,13 @@ public class BQSRGathererUnitTest extends BaseTest {
recalFiles.add(recal5); recalFiles.add(recal5);
gatherer.gather(recalFiles, output); gatherer.gather(recalFiles, output);
GATKReport originalReport = new GATKReport(recal_original); testReports(recal_original, output);
GATKReport calculatedReport = new GATKReport(output);
testReports(originalReport, calculatedReport);
} }
@Test(enabled = true) @Test
public void testGatherBQSRWithEmptyFile() { public void testGatherBQSRWithEmptyFile() {
BQSRGatherer gatherer = new BQSRGatherer(); final BQSRGatherer gatherer = new BQSRGatherer();
List<File> recalFiles = new LinkedList<File> (); final List<File> recalFiles = new LinkedList<>();
final File output = BaseTest.createTempFile("BQSRgathererTest", ".table"); final File output = BaseTest.createTempFile("BQSRgathererTest", ".table");
recalFiles.add(recal1); recalFiles.add(recal1);
@ -131,13 +130,12 @@ public class BQSRGathererUnitTest extends BaseTest {
recalFiles.add(recalEmpty); recalFiles.add(recalEmpty);
gatherer.gather(recalFiles, output); gatherer.gather(recalFiles, output);
GATKReport originalReport = new GATKReport(recal_original); testReports(recal_original, output);
GATKReport calculatedReport = new GATKReport(output);
testReports(originalReport, calculatedReport);
} }
private void testReports(final GATKReport originalReport, final GATKReport calculatedReport) { private void testReports(final File originalFile, final File calculatedFile) {
final GATKReport originalReport = new GATKReport(originalFile);
final GATKReport calculatedReport = new GATKReport(calculatedFile);
// test the Arguments table // test the Arguments table
List<String> columnsToTest = Arrays.asList(RecalUtils.ARGUMENT_COLUMN_NAME, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME); List<String> columnsToTest = Arrays.asList(RecalUtils.ARGUMENT_COLUMN_NAME, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
@ -177,11 +175,11 @@ public class BQSRGathererUnitTest extends BaseTest {
* @param calculated the calculated table * @param calculated the calculated table
* @param columnsToTest list of columns to test. All columns will be tested with the same criteria (equality given factor) * @param columnsToTest list of columns to test. All columns will be tested with the same criteria (equality given factor)
*/ */
private void testTablesWithColumns(GATKReportTable original, GATKReportTable calculated, List<String> columnsToTest) { private void testTablesWithColumns(final GATKReportTable original, final GATKReportTable calculated, final List<String> columnsToTest) {
for (int row = 0; row < original.getNumRows(); row++ ) { for (int row = 0; row < original.getNumRows(); row++) {
for (String column : columnsToTest) { for (final String column : columnsToTest) {
Object actual = calculated.get(new Integer(row), column); final Object actual = calculated.get(Integer.valueOf(row), column);
Object expected = original.get(row, column); final Object expected = original.get(row, column);
//if ( !actual.equals(expected) ) //if ( !actual.equals(expected) )
// System.out.println("Row=" + row + " Table=" + original.getTableName() + " Column=" + column + " Expected=" + expected + " Actual=" + actual); // System.out.println("Row=" + row + " Table=" + original.getTableName() + " Column=" + column + " Expected=" + expected + " Actual=" + actual);
Assert.assertEquals(actual, expected, "Row: " + row + " Original Table: " + original.getTableName() + " Column=" + column); Assert.assertEquals(actual, expected, "Row: " + row + " Original Table: " + original.getTableName() + " Column=" + column);
@ -196,12 +194,83 @@ public class BQSRGathererUnitTest extends BaseTest {
// TODO: - Doesn't end up in protected / private github // TODO: - Doesn't end up in protected / private github
// TODO: - IS otherwise available for local debugging unlike /humgen NFS mounts // TODO: - IS otherwise available for local debugging unlike /humgen NFS mounts
// Hand modified subset of problematic gather inputs submitted by George Grant // Hand modified subset of problematic gather inputs submitted by George Grant
File input1 = new File(privateTestDir + "NA12878.rg_subset.chr1.recal_data.table"); final File input1 = new File(privateTestDir, "NA12878.rg_subset.chr1.recal_data.table");
File input2 = new File(privateTestDir + "NA12878.rg_subset.chrY_Plus.recal_data.table"); final File input2 = new File(privateTestDir, "NA12878.rg_subset.chrY_Plus.recal_data.table");
GATKReport report12 = BQSRGatherer.gatherReport(Arrays.asList(input1, input2)); final GATKReport report12 = BQSRGatherer.gatherReport(Arrays.asList(input1, input2));
GATKReport report21 = BQSRGatherer.gatherReport(Arrays.asList(input2, input1)); final GATKReport report21 = BQSRGatherer.gatherReport(Arrays.asList(input2, input1));
Assert.assertTrue(report12.equals(report21), "GATK reports are different when gathered in a different order."); Assert.assertTrue(report12.equals(report21), "GATK reports are different when gathered in a different order.");
} }
}
@Test
public void testParseInputsAsList() {
final File inputListFile = BaseTest.createTempFile("BQSRGatherer.parse.input", ".list");
try (final BufferedWriter bw = new BufferedWriter(new FileWriter(inputListFile))) {
bw.write(recal1.getAbsolutePath() + "\n");
bw.write(recal2.getAbsolutePath() + "\n");
bw.write(recal3.getAbsolutePath() + "\n");
bw.write(recal4.getAbsolutePath() + "\n");
bw.write(recal5.getAbsolutePath() + "\n");
} catch (final IOException ioe) {
Assert.fail("Could not create temporary list of input files for BQSRGatherer unit test.");
}
final File output = BaseTest.createTempFile("BQSRgathererTest", ".table");
final BQSRGatherer gatherer = new BQSRGatherer();
gatherer.gather(Collections.singletonList(inputListFile), output);
testReports(recal_original, output);
}
@Test
public void testParseInputsAsMultipleFiles() {
final File output = BaseTest.createTempFile("BQSRgathererTest", ".table");
final BQSRGatherer gatherer = new BQSRGatherer();
gatherer.gather(Arrays.asList(recal1, recal2, recal3, recal4, recal5), output);
testReports(recal_original, output);
}
@Test
public void testParseInputsMixedSingleList() {
final File inputListFile = BaseTest.createTempFile("BQSRGatherer.parse.input", ".list");
try (final BufferedWriter bw = new BufferedWriter(new FileWriter(inputListFile))) {
bw.write(recal2.getAbsolutePath() + "\n");
bw.write(recal3.getAbsolutePath() + "\n");
bw.write(recal4.getAbsolutePath() + "\n");
} catch (final IOException ioe) {
Assert.fail("Could not create temporary list of input files for BQSRGatherer unit test.");
}
final File output = BaseTest.createTempFile("BQSRgathererTest", ".table");
final BQSRGatherer gatherer = new BQSRGatherer();
gatherer.gather(Arrays.asList(recal1, inputListFile, recal5), output);
testReports(recal_original, output);
}
@Test
public void testParseInputsMixedMultipleLists() {
final File inputListFile1 = BaseTest.createTempFile("BQSRGatherer.parse.input.1", ".list");
final File inputListFile2 = BaseTest.createTempFile("BQSRGatherer.parse.input.2", ".list");
try (final BufferedWriter bw1 = new BufferedWriter(new FileWriter(inputListFile1));
final BufferedWriter bw2 = new BufferedWriter(new FileWriter(inputListFile2))) {
bw1.write(recal2.getAbsolutePath() + "\n");
bw1.write(recal3.getAbsolutePath() + "\n");
bw2.write(recal5.getAbsolutePath() + "\n");
} catch (final IOException ioe) {
Assert.fail("Could not create temporary lists of input files for BQSRGatherer unit test.");
}
final File output = BaseTest.createTempFile("BQSRgathererTest", ".table");
final BQSRGatherer gatherer = new BQSRGatherer();
gatherer.gather(Arrays.asList(recal1, inputListFile1, recal4, inputListFile2), output);
testReports(recal_original, output);
}
}

View File

@ -31,9 +31,11 @@ import org.broadinstitute.gatk.utils.commandline.Gatherer;
import org.broadinstitute.gatk.utils.report.GATKReport; import org.broadinstitute.gatk.utils.report.GATKReport;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.text.XReadLines;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.*; import java.util.*;
@ -55,7 +57,7 @@ public class BQSRGatherer extends Gatherer {
final PrintStream outputFile; final PrintStream outputFile;
try { try {
outputFile = new PrintStream(output); outputFile = new PrintStream(output);
} catch(FileNotFoundException e) { } catch(final FileNotFoundException e) {
throw new UserException.MissingArgument("output", MISSING_OUTPUT_FILE); throw new UserException.MissingArgument("output", MISSING_OUTPUT_FILE);
} }
final GATKReport report = gatherReport(inputs); final GATKReport report = gatherReport(inputs);
@ -70,10 +72,13 @@ public class BQSRGatherer extends Gatherer {
*/ */
public static GATKReport gatherReport(final List<File> inputs) { public static GATKReport gatherReport(final List<File> inputs) {
final SortedSet<String> allReadGroups = new TreeSet<String>(); final SortedSet<String> allReadGroups = new TreeSet<String>();
final LinkedHashMap<File, Set<String>> inputReadGroups = new LinkedHashMap<File, Set<String>>(); final LinkedHashMap<File, Set<String>> inputReadGroups = new LinkedHashMap<>();
// Parse the input list for .list files and replace them with the files contained within them
final List<File> parsedInputs = parseInputList(inputs);
// Get the read groups from each input report // Get the read groups from each input report
for (final File input : inputs) { for (final File input : parsedInputs) {
final Set<String> readGroups = RecalibrationReport.getReadGroups(input); final Set<String> readGroups = RecalibrationReport.getReadGroups(input);
inputReadGroups.put(input, readGroups); inputReadGroups.put(input, readGroups);
allReadGroups.addAll(readGroups); allReadGroups.addAll(readGroups);
@ -93,7 +98,7 @@ public class BQSRGatherer extends Gatherer {
} }
RecalibrationReport generalReport = null; RecalibrationReport generalReport = null;
for (File input : inputs) { for (final File input : parsedInputs) {
final RecalibrationReport inputReport = new RecalibrationReport(input, allReadGroups); final RecalibrationReport inputReport = new RecalibrationReport(input, allReadGroups);
if( inputReport.isEmpty() ) { continue; } if( inputReport.isEmpty() ) { continue; }
@ -109,4 +114,27 @@ public class BQSRGatherer extends Gatherer {
return generalReport.createGATKReport(); return generalReport.createGATKReport();
} }
/**
* Replaces any .list files in rawFileList with the files named in said .list file.
* Identical to parseVariantList method in CatVariants.
* @param rawFileList the original file list, possibly including .list files
* @return a new List, with .list files replaced
*/
private static List<File> parseInputList(final List<File> rawFileList) {
final List<File> result = new ArrayList<>(rawFileList.size());
for (final File rawFile : rawFileList) {
if (rawFile.getName().endsWith(".list")) {
try {
for (final String line : new XReadLines(rawFile, true))
result.add(new File(line));
} catch (final IOException e) {
throw new UserException.CouldNotReadInputFile(rawFile, e);
}
} else {
result.add(rawFile);
}
}
return result;
}
} }

View File

@ -195,7 +195,8 @@ public class CatVariants extends CommandLineProgram {
} }
/** /**
* Replaces any .list files in rawFileList with the files named in said .list file * Replaces any .list files in rawFileList with the files named in said .list file.
* Identical to {@link org.broadinstitute.gatk.engine.recalibration.BQSRGatherer#parseInputList}.
* @param rawFileList the original file list, possibly including .list files * @param rawFileList the original file list, possibly including .list files
* @return a new List, with .list files replaced * @return a new List, with .list files replaced
*/ */
@ -206,7 +207,7 @@ public class CatVariants extends CommandLineProgram {
try { try {
for (final String line : new XReadLines(rawFile, true)) for (final String line : new XReadLines(rawFile, true))
result.add(new File(line)); result.add(new File(line));
} catch (IOException e) { } catch (final IOException e) {
throw new UserException.CouldNotReadInputFile(rawFile, e); throw new UserException.CouldNotReadInputFile(rawFile, e);
} }
} else { } else {