Merge pull request #1412 from broadinstitute/sl_issue_1285
Allows GatherBqsrReports to accept a .list file as input.
This commit is contained in:
commit
679941c1b1
|
|
@ -53,6 +53,8 @@ package org.broadinstitute.gatk.tools;
|
|||
|
||||
import htsjdk.samtools.util.IOUtil;
|
||||
import org.broadinstitute.gatk.engine.recalibration.BQSRGatherer;
|
||||
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.gatk.utils.help.HelpConstants;
|
||||
import picard.cmdline.CommandLineProgram;
|
||||
import picard.cmdline.CommandLineProgramProperties;
|
||||
import picard.cmdline.Option;
|
||||
|
|
@ -97,10 +99,7 @@ import java.util.List;
|
|||
*
|
||||
*/
|
||||
|
||||
@CommandLineProgramProperties(
|
||||
usage = "Gathers scattered BQSR recalibration reports into a single file",
|
||||
usageShort = "Gathers scattered BQSR recalibration reports into a single file"
|
||||
)
|
||||
@DocumentedGATKFeature(groupName = HelpConstants.DOCS_CAT_QC)
|
||||
public class GatherBqsrReports extends CommandLineProgram {
|
||||
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc="List of scattered BQSR files")
|
||||
public List<File> INPUT;
|
||||
|
|
|
|||
|
|
@ -59,8 +59,12 @@ import org.broadinstitute.gatk.engine.recalibration.RecalUtils;
|
|||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -70,38 +74,36 @@ import java.util.List;
|
|||
*/
|
||||
public class BQSRGathererUnitTest extends BaseTest {
|
||||
|
||||
private static File recal1 = new File(privateTestDir + "HiSeq.1mb.1RG.sg1.table");
|
||||
private static File recal2 = new File(privateTestDir + "HiSeq.1mb.1RG.sg2.table");
|
||||
private static File recal3 = new File(privateTestDir + "HiSeq.1mb.1RG.sg3.table");
|
||||
private static File recal4 = new File(privateTestDir + "HiSeq.1mb.1RG.sg4.table");
|
||||
private static File recal5 = new File(privateTestDir + "HiSeq.1mb.1RG.sg5.table");
|
||||
private static File recalEmpty = new File(privateTestDir + "HiSeq.1mb.1RG.empty.table");
|
||||
private static File recal1 = new File(privateTestDir, "HiSeq.1mb.1RG.sg1.table");
|
||||
private static File recal2 = new File(privateTestDir, "HiSeq.1mb.1RG.sg2.table");
|
||||
private static File recal3 = new File(privateTestDir, "HiSeq.1mb.1RG.sg3.table");
|
||||
private static File recal4 = new File(privateTestDir, "HiSeq.1mb.1RG.sg4.table");
|
||||
private static File recal5 = new File(privateTestDir, "HiSeq.1mb.1RG.sg5.table");
|
||||
private static File recalEmpty = new File(privateTestDir, "HiSeq.1mb.1RG.empty.table");
|
||||
|
||||
private static File recal_original = new File(privateTestDir + "HiSeq.1mb.1RG.noSG.table");
|
||||
private static File recal_original = new File(privateTestDir, "HiSeq.1mb.1RG.noSG.table");
|
||||
private static File recal_many = new File(privateTestDir, "bqsr.manyObservations.full.table");
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test
|
||||
public void testManyObservations() {
|
||||
File recal = new File(privateTestDir + "bqsr.manyObservations.piece.table");
|
||||
final File recal = new File(privateTestDir, "bqsr.manyObservations.piece.table");
|
||||
|
||||
final File output = BaseTest.createTempFile("BQSRgathererTest", ".table");
|
||||
|
||||
List<File> recalFiles = new LinkedList<File> ();
|
||||
final List<File> recalFiles = new LinkedList<>();
|
||||
for ( int i=0; i < 5; i++ )
|
||||
recalFiles.add(recal);
|
||||
|
||||
BQSRGatherer gatherer = new BQSRGatherer();
|
||||
final BQSRGatherer gatherer = new BQSRGatherer();
|
||||
gatherer.gather(recalFiles, output);
|
||||
|
||||
GATKReport originalReport = new GATKReport(new File(privateTestDir + "bqsr.manyObservations.full.table"));
|
||||
GATKReport calculatedReport = new GATKReport(output);
|
||||
|
||||
testReports(originalReport, calculatedReport);
|
||||
testReports(recal_many, output);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test
|
||||
public void testGatherBQSR() {
|
||||
BQSRGatherer gatherer = new BQSRGatherer();
|
||||
List<File> recalFiles = new LinkedList<File> ();
|
||||
final BQSRGatherer gatherer = new BQSRGatherer();
|
||||
final List<File> recalFiles = new LinkedList<>();
|
||||
final File output = BaseTest.createTempFile("BQSRgathererTest", ".table");
|
||||
|
||||
recalFiles.add(recal1);
|
||||
|
|
@ -111,16 +113,13 @@ public class BQSRGathererUnitTest extends BaseTest {
|
|||
recalFiles.add(recal5);
|
||||
gatherer.gather(recalFiles, output);
|
||||
|
||||
GATKReport originalReport = new GATKReport(recal_original);
|
||||
GATKReport calculatedReport = new GATKReport(output);
|
||||
|
||||
testReports(originalReport, calculatedReport);
|
||||
testReports(recal_original, output);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
@Test
|
||||
public void testGatherBQSRWithEmptyFile() {
|
||||
BQSRGatherer gatherer = new BQSRGatherer();
|
||||
List<File> recalFiles = new LinkedList<File> ();
|
||||
final BQSRGatherer gatherer = new BQSRGatherer();
|
||||
final List<File> recalFiles = new LinkedList<>();
|
||||
final File output = BaseTest.createTempFile("BQSRgathererTest", ".table");
|
||||
|
||||
recalFiles.add(recal1);
|
||||
|
|
@ -131,13 +130,12 @@ public class BQSRGathererUnitTest extends BaseTest {
|
|||
recalFiles.add(recalEmpty);
|
||||
gatherer.gather(recalFiles, output);
|
||||
|
||||
GATKReport originalReport = new GATKReport(recal_original);
|
||||
GATKReport calculatedReport = new GATKReport(output);
|
||||
|
||||
testReports(originalReport, calculatedReport);
|
||||
testReports(recal_original, output);
|
||||
}
|
||||
|
||||
private void testReports(final GATKReport originalReport, final GATKReport calculatedReport) {
|
||||
private void testReports(final File originalFile, final File calculatedFile) {
|
||||
final GATKReport originalReport = new GATKReport(originalFile);
|
||||
final GATKReport calculatedReport = new GATKReport(calculatedFile);
|
||||
|
||||
// test the Arguments table
|
||||
List<String> columnsToTest = Arrays.asList(RecalUtils.ARGUMENT_COLUMN_NAME, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
|
||||
|
|
@ -177,11 +175,11 @@ public class BQSRGathererUnitTest extends BaseTest {
|
|||
* @param calculated the calculated table
|
||||
* @param columnsToTest list of columns to test. All columns will be tested with the same criteria (equality given factor)
|
||||
*/
|
||||
private void testTablesWithColumns(GATKReportTable original, GATKReportTable calculated, List<String> columnsToTest) {
|
||||
for (int row = 0; row < original.getNumRows(); row++ ) {
|
||||
for (String column : columnsToTest) {
|
||||
Object actual = calculated.get(new Integer(row), column);
|
||||
Object expected = original.get(row, column);
|
||||
private void testTablesWithColumns(final GATKReportTable original, final GATKReportTable calculated, final List<String> columnsToTest) {
|
||||
for (int row = 0; row < original.getNumRows(); row++) {
|
||||
for (final String column : columnsToTest) {
|
||||
final Object actual = calculated.get(Integer.valueOf(row), column);
|
||||
final Object expected = original.get(row, column);
|
||||
//if ( !actual.equals(expected) )
|
||||
// System.out.println("Row=" + row + " Table=" + original.getTableName() + " Column=" + column + " Expected=" + expected + " Actual=" + actual);
|
||||
Assert.assertEquals(actual, expected, "Row: " + row + " Original Table: " + original.getTableName() + " Column=" + column);
|
||||
|
|
@ -196,12 +194,83 @@ public class BQSRGathererUnitTest extends BaseTest {
|
|||
// TODO: - Doesn't end up in protected / private github
|
||||
// TODO: - IS otherwise available for local debugging unlike /humgen NFS mounts
|
||||
// Hand modified subset of problematic gather inputs submitted by George Grant
|
||||
File input1 = new File(privateTestDir + "NA12878.rg_subset.chr1.recal_data.table");
|
||||
File input2 = new File(privateTestDir + "NA12878.rg_subset.chrY_Plus.recal_data.table");
|
||||
final File input1 = new File(privateTestDir, "NA12878.rg_subset.chr1.recal_data.table");
|
||||
final File input2 = new File(privateTestDir, "NA12878.rg_subset.chrY_Plus.recal_data.table");
|
||||
|
||||
GATKReport report12 = BQSRGatherer.gatherReport(Arrays.asList(input1, input2));
|
||||
GATKReport report21 = BQSRGatherer.gatherReport(Arrays.asList(input2, input1));
|
||||
final GATKReport report12 = BQSRGatherer.gatherReport(Arrays.asList(input1, input2));
|
||||
final GATKReport report21 = BQSRGatherer.gatherReport(Arrays.asList(input2, input1));
|
||||
|
||||
Assert.assertTrue(report12.equals(report21), "GATK reports are different when gathered in a different order.");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseInputsAsList() {
|
||||
final File inputListFile = BaseTest.createTempFile("BQSRGatherer.parse.input", ".list");
|
||||
try (final BufferedWriter bw = new BufferedWriter(new FileWriter(inputListFile))) {
|
||||
bw.write(recal1.getAbsolutePath() + "\n");
|
||||
bw.write(recal2.getAbsolutePath() + "\n");
|
||||
bw.write(recal3.getAbsolutePath() + "\n");
|
||||
bw.write(recal4.getAbsolutePath() + "\n");
|
||||
bw.write(recal5.getAbsolutePath() + "\n");
|
||||
} catch (final IOException ioe) {
|
||||
Assert.fail("Could not create temporary list of input files for BQSRGatherer unit test.");
|
||||
}
|
||||
|
||||
final File output = BaseTest.createTempFile("BQSRgathererTest", ".table");
|
||||
|
||||
final BQSRGatherer gatherer = new BQSRGatherer();
|
||||
gatherer.gather(Collections.singletonList(inputListFile), output);
|
||||
|
||||
testReports(recal_original, output);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseInputsAsMultipleFiles() {
|
||||
final File output = BaseTest.createTempFile("BQSRgathererTest", ".table");
|
||||
|
||||
final BQSRGatherer gatherer = new BQSRGatherer();
|
||||
gatherer.gather(Arrays.asList(recal1, recal2, recal3, recal4, recal5), output);
|
||||
|
||||
testReports(recal_original, output);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseInputsMixedSingleList() {
|
||||
final File inputListFile = BaseTest.createTempFile("BQSRGatherer.parse.input", ".list");
|
||||
try (final BufferedWriter bw = new BufferedWriter(new FileWriter(inputListFile))) {
|
||||
bw.write(recal2.getAbsolutePath() + "\n");
|
||||
bw.write(recal3.getAbsolutePath() + "\n");
|
||||
bw.write(recal4.getAbsolutePath() + "\n");
|
||||
} catch (final IOException ioe) {
|
||||
Assert.fail("Could not create temporary list of input files for BQSRGatherer unit test.");
|
||||
}
|
||||
|
||||
final File output = BaseTest.createTempFile("BQSRgathererTest", ".table");
|
||||
|
||||
final BQSRGatherer gatherer = new BQSRGatherer();
|
||||
gatherer.gather(Arrays.asList(recal1, inputListFile, recal5), output);
|
||||
|
||||
testReports(recal_original, output);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseInputsMixedMultipleLists() {
|
||||
final File inputListFile1 = BaseTest.createTempFile("BQSRGatherer.parse.input.1", ".list");
|
||||
final File inputListFile2 = BaseTest.createTempFile("BQSRGatherer.parse.input.2", ".list");
|
||||
try (final BufferedWriter bw1 = new BufferedWriter(new FileWriter(inputListFile1));
|
||||
final BufferedWriter bw2 = new BufferedWriter(new FileWriter(inputListFile2))) {
|
||||
bw1.write(recal2.getAbsolutePath() + "\n");
|
||||
bw1.write(recal3.getAbsolutePath() + "\n");
|
||||
bw2.write(recal5.getAbsolutePath() + "\n");
|
||||
} catch (final IOException ioe) {
|
||||
Assert.fail("Could not create temporary lists of input files for BQSRGatherer unit test.");
|
||||
}
|
||||
|
||||
final File output = BaseTest.createTempFile("BQSRgathererTest", ".table");
|
||||
|
||||
final BQSRGatherer gatherer = new BQSRGatherer();
|
||||
gatherer.gather(Arrays.asList(recal1, inputListFile1, recal4, inputListFile2), output);
|
||||
|
||||
testReports(recal_original, output);
|
||||
}
|
||||
}
|
||||
|
|
@ -31,9 +31,11 @@ import org.broadinstitute.gatk.utils.commandline.Gatherer;
|
|||
import org.broadinstitute.gatk.utils.report.GATKReport;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.text.XReadLines;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -55,7 +57,7 @@ public class BQSRGatherer extends Gatherer {
|
|||
final PrintStream outputFile;
|
||||
try {
|
||||
outputFile = new PrintStream(output);
|
||||
} catch(FileNotFoundException e) {
|
||||
} catch(final FileNotFoundException e) {
|
||||
throw new UserException.MissingArgument("output", MISSING_OUTPUT_FILE);
|
||||
}
|
||||
final GATKReport report = gatherReport(inputs);
|
||||
|
|
@ -70,10 +72,13 @@ public class BQSRGatherer extends Gatherer {
|
|||
*/
|
||||
public static GATKReport gatherReport(final List<File> inputs) {
|
||||
final SortedSet<String> allReadGroups = new TreeSet<String>();
|
||||
final LinkedHashMap<File, Set<String>> inputReadGroups = new LinkedHashMap<File, Set<String>>();
|
||||
final LinkedHashMap<File, Set<String>> inputReadGroups = new LinkedHashMap<>();
|
||||
|
||||
// Parse the input list for .list files and replace them with the files contained within them
|
||||
final List<File> parsedInputs = parseInputList(inputs);
|
||||
|
||||
// Get the read groups from each input report
|
||||
for (final File input : inputs) {
|
||||
for (final File input : parsedInputs) {
|
||||
final Set<String> readGroups = RecalibrationReport.getReadGroups(input);
|
||||
inputReadGroups.put(input, readGroups);
|
||||
allReadGroups.addAll(readGroups);
|
||||
|
|
@ -93,7 +98,7 @@ public class BQSRGatherer extends Gatherer {
|
|||
}
|
||||
|
||||
RecalibrationReport generalReport = null;
|
||||
for (File input : inputs) {
|
||||
for (final File input : parsedInputs) {
|
||||
final RecalibrationReport inputReport = new RecalibrationReport(input, allReadGroups);
|
||||
if( inputReport.isEmpty() ) { continue; }
|
||||
|
||||
|
|
@ -109,4 +114,27 @@ public class BQSRGatherer extends Gatherer {
|
|||
|
||||
return generalReport.createGATKReport();
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces any .list files in rawFileList with the files named in said .list file.
|
||||
* Identical to parseVariantList method in CatVariants.
|
||||
* @param rawFileList the original file list, possibly including .list files
|
||||
* @return a new List, with .list files replaced
|
||||
*/
|
||||
private static List<File> parseInputList(final List<File> rawFileList) {
|
||||
final List<File> result = new ArrayList<>(rawFileList.size());
|
||||
for (final File rawFile : rawFileList) {
|
||||
if (rawFile.getName().endsWith(".list")) {
|
||||
try {
|
||||
for (final String line : new XReadLines(rawFile, true))
|
||||
result.add(new File(line));
|
||||
} catch (final IOException e) {
|
||||
throw new UserException.CouldNotReadInputFile(rawFile, e);
|
||||
}
|
||||
} else {
|
||||
result.add(rawFile);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -195,7 +195,8 @@ public class CatVariants extends CommandLineProgram {
|
|||
}
|
||||
|
||||
/**
|
||||
* Replaces any .list files in rawFileList with the files named in said .list file
|
||||
* Replaces any .list files in rawFileList with the files named in said .list file.
|
||||
* Identical to {@link org.broadinstitute.gatk.engine.recalibration.BQSRGatherer#parseInputList}.
|
||||
* @param rawFileList the original file list, possibly including .list files
|
||||
* @return a new List, with .list files replaced
|
||||
*/
|
||||
|
|
@ -206,7 +207,7 @@ public class CatVariants extends CommandLineProgram {
|
|||
try {
|
||||
for (final String line : new XReadLines(rawFile, true))
|
||||
result.add(new File(line));
|
||||
} catch (IOException e) {
|
||||
} catch (final IOException e) {
|
||||
throw new UserException.CouldNotReadInputFile(rawFile, e);
|
||||
}
|
||||
} else {
|
||||
|
|
|
|||
Loading…
Reference in New Issue