Now in Picard, and released to the public

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5439 348d0f76-0448-11de-a6fe-93d51630548a
2011-03-14 19:36:56 +00:00 · 2011-03-14 19:36:56 +00:00 · f75ad0dee3
parent 9dfe4c9cb7
commit f75ad0dee3
2 changed files with 0 additions and 269 deletions
--- a/java/src/org/broadinstitute/sting/playground/tools/ReorderSam.java
+++ b/java/src/org/broadinstitute/sting/playground/tools/ReorderSam.java
@ -1,191 +0,0 @@
-package org.broadinstitute.sting.playground.tools;
-
-import net.sf.picard.PicardException;
-import net.sf.picard.cmdline.CommandLineProgram;
-import net.sf.picard.cmdline.Option;
-import net.sf.picard.cmdline.Usage;
-import net.sf.picard.reference.ReferenceSequenceFile;
-import net.sf.picard.reference.ReferenceSequenceFileFactory;
-import net.sf.samtools.*;
-
-import java.io.File;
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * User: mdepristo
- *
- * Reorders a SAM/BAM input file according to the order of contigs in a second reference sequence
- */
-public class ReorderSam extends CommandLineProgram {
-    @Usage(programVersion="1.0") public String USAGE = "Reorders reads in a SAM/BAM file to match the contig ordering in a provided reference file, as determined by exact name matching of contigs.  Reads mapped to contigs absent in the new reference are dropped";
-    @Option(shortName="I", doc="Input file (bam or sam) to extract reads from.", optional=false)
-    public File IN = null;
-    @Option(shortName="O",doc="Output file (bam or sam) to write extracted reads to.", optional=false)
-    public File OUT = null;
-    @Option(shortName="R", doc="Reference sequence to reorder reads to match", optional=false)
-    public File REFERENCE = null;
-    @Option(shortName="S", doc="If true, then allows only a partial overlap of the BAM contigs with the new reference sequence contigs.  By default, this tool requires a corresponding contig in the new reference for each read contig", optional=true)
-    public Boolean ALLOW_INCOMPLETE_DICT_CONCORDANCE = false;
-    @Option(shortName="U", doc="If true, then permits mapping from a read contig to a new reference contig with the same name but a different length.  Highly dangerous, only use if you know what you are doing.", optional=true)
-    public Boolean ALLOW_CONTIG_LENGTH_DISCORDANCE = false;
-
-    /** Required main method implementation. */
-    public static void main(final String[] argv) {
-        System.exit(new ReorderSam().instanceMain(argv));
-    }
-
-    protected int doWork() {
-        SAMFileReader inReader = new SAMFileReader(IN);
-
-        ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE);
-        SAMSequenceDictionary refDict = reference.getSequenceDictionary();
-
-        if ( refDict == null ) {
-        	System.out.println("No reference sequence dictionary found. Aborting.");
-        	inReader.close();
-        	System.exit(1);
-        }
-
-        printDictionary("SAM/BAM file", inReader.getFileHeader().getSequenceDictionary());
-        printDictionary("Reference", refDict);
-        Map<Integer, Integer> newOrder = buildSequenceDictionaryMap(refDict, inReader.getFileHeader().getSequenceDictionary());
-
-        // has to be after we create the newOrder
-        SAMFileHeader outHeader = inReader.getFileHeader().clone();
-        outHeader.setSequenceDictionary(refDict);
-        SAMFileWriter outWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(outHeader, true, OUT) ;
-
-        //
-        // write the reads in contig order
-        //
-        System.out.println("Writing reads...");
-        for ( SAMSequenceRecord contig : refDict.getSequences() ) {
-            SAMRecordIterator it = inReader.query(contig.getSequenceName(), 0, 0, false);
-            writeReads(outWriter, it, newOrder, contig.getSequenceName());
-        }
-        // don't forget the unmapped reads
-        writeReads( outWriter, inReader.queryUnmapped(), newOrder, "unmapped" );
-
-        // cleanup
-        inReader.close();
-        outWriter.close();
-        return 0;
-    }
-
-    /**
-     * Low-level helper function that returns the new reference index for oldIndex according to the
-     * ordering map newOrder.  Read is provided in case an error occurs, so that an informative message
-     * can be made.
-     *
-     * @param read
-     * @param oldIndex
-     * @param newOrder
-     * @return
-     */
-    private static int newOrderIndex(SAMRecord read, int oldIndex, Map<Integer, Integer> newOrder) {
-        if ( oldIndex == -1 )
-            return -1; // unmapped read
-        else if ( ! newOrder.containsKey(oldIndex) )
-            throw new PicardException("BUG: no mapping found for read " + read.format());
-        else
-            return newOrder.get(oldIndex);
-    }
-
-    /**
-     * Helper function that writes reads from iterator it into writer out, updating each SAMRecord along the way
-     * according to the newOrder mapping from dictionary index -> index.  Name is used for printing only.
-     *
-     * @param out
-     * @param it
-     * @param newOrder
-     * @param name
-     */
-    private static void writeReads(SAMFileWriter out, SAMRecordIterator it, Map<Integer, Integer> newOrder, String name) {
-        long counter = 0;
-        System.out.print("  Processing " + name);
-        System.out.flush();
-
-        while ( it.hasNext() ) {
-            counter++;
-            SAMRecord read = it.next();
-            read.setHeader(out.getFileHeader());
-            //System.out.println("Writing read " + read.format());
-
-            int oldRefIndex = read.getReferenceIndex();
-            int newRefIndex = newOrderIndex(read, oldRefIndex, newOrder);
-            read.setReferenceIndex(newRefIndex);
-
-            int oldMateIndex = read.getMateReferenceIndex();
-            int newMateIndex = newOrderIndex(read, oldMateIndex, newOrder);
-            if ( oldMateIndex != -1 && newMateIndex == -1 ) { // becoming unmapped
-                read.setMateAlignmentStart(0);
-                read.setMateUnmappedFlag(true);
-            }
-            read.setMateReferenceIndex(newMateIndex);
-
-            out.addAlignment(read);
-        }
-
-        it.close();
-        System.out.println(" => Wrote " + counter + " reads");
-    }
-
-    /**
-     * Constructs a mapping from read sequence records index -> new sequence dictionary index for use in
-     * reordering the reference index and mate reference index in each read.  -1 means unmapped.
-     * @param refDict
-     * @param readsDict
-     * @return
-     */
-    private Map<Integer, Integer> buildSequenceDictionaryMap(SAMSequenceDictionary refDict, SAMSequenceDictionary readsDict) {
-        Map<Integer, Integer> newOrder = new HashMap<Integer, Integer>();
-
-        System.out.println("Reordering SAM/BAM file:");
-        for ( SAMSequenceRecord refRec : refDict.getSequences() ) {
-            SAMSequenceRecord readsRec = readsDict.getSequence(refRec.getSequenceName());
-            if ( readsRec != null ) {
-                if ( refRec.getSequenceLength() != readsRec.getSequenceLength() ) {
-                    String msg = String.format("Discordant contig lengths: read %s LN=%d, ref %s LN=%d",
-                            readsRec.getSequenceName(), readsRec.getSequenceLength(),
-                            refRec.getSequenceName(), refRec.getSequenceLength());
-                    if ( ALLOW_CONTIG_LENGTH_DISCORDANCE ) {
-                        System.err.println("WARN: " + msg);
-                    } else {
-                        throw new PicardException(msg);
-                    }
-                }
-
-                System.out.printf("  Reordering read contig %s [index=%d] to => ref contig %s [index=%d]%n",
-                        readsRec.getSequenceName(), readsRec.getSequenceIndex(),
-                        refRec.getSequenceName(), refRec.getSequenceIndex());
-                newOrder.put(readsRec.getSequenceIndex(), refRec.getSequenceIndex());
-            }
-        }
-
-        for ( SAMSequenceRecord readsRec : readsDict.getSequences() ) {
-            if ( ! newOrder.containsKey(readsRec.getSequenceIndex()) ) {
-                if ( ALLOW_INCOMPLETE_DICT_CONCORDANCE )
-                    newOrder.put(readsRec.getSequenceIndex(), -1);
-                else
-                    throw new PicardException("New reference sequence does not contain a matching contig for " + readsRec.getSequenceName());
-            }
-        }
-
-        return newOrder;
-    }
-
-    /**
-     * Helper function to print out a sequence dictionary
-     * @param name
-     * @param dict
-     */
-    private static void printDictionary(String name, SAMSequenceDictionary dict) {
-        System.out.println(name);
-        for ( SAMSequenceRecord contig : dict.getSequences() ) {
-            System.out.printf( "  SN=%s LN=%d%n", contig.getSequenceName(), contig.getSequenceLength());
-        }
-    }
-}
-
-
--- a/java/src/org/broadinstitute/sting/playground/tools/ReplaceReadGroups.java
+++ b/java/src/org/broadinstitute/sting/playground/tools/ReplaceReadGroups.java
@ -1,78 +0,0 @@
-package org.broadinstitute.sting.playground.tools;
-
-import net.sf.picard.cmdline.CommandLineProgram;
-import net.sf.picard.cmdline.Option;
-import net.sf.picard.cmdline.Usage;
-import net.sf.samtools.*;
-
-import java.io.File;
-import java.util.Arrays;
-
-/**
- * User: mdepristo
- *
- * Replaces read groups in a BAM file
- */
-public class ReplaceReadGroups extends CommandLineProgram {
-    @Usage(programVersion="1.0") public String USAGE = "Creates a new read group, and assigns all reads from the I BAM file to this read group in the O BAM";
-    @Option(shortName="I", doc="Input file (bam or sam).", optional=false)
-    public File IN = null;
-    @Option(shortName="O",doc="Output file (bam or sam).", optional=false)
-    public File OUT = null;
-
-    @Option(shortName="ID",doc="Read Group ID", optional=false)
-    public String RGID = null;
-
-    @Option(shortName="LB",doc="Read Group Library", optional=false)
-    public String RGLB = null;
-
-    @Option(shortName="PL",doc="Read Group platform", optional=false)
-    public String RGPL = null;
-
-    @Option(shortName="SM",doc="Read Group sample", optional=false)
-    public String RGSM = null;
-
-    private static final String RGFIELD = "RG"; // todo -- use binary tag that's private in picard
-
-    // todo -- is it worth supporting these fields?
-    // CN Name of sequencing center producing the read.
-    // DS Description.
-    // DT Date the run was produced (ISO8601 date or date/time).
-    // PU Platform unit (e.g. flowcell-barcode.lane for Illumina or slide for SOLiD). Unique identier.
-
-    /** Required main method implementation. */
-    public static void main(final String[] argv) {
-        System.exit(new ReplaceReadGroups().instanceMain(argv));
-    }
-
-    protected int doWork() {
-        SAMFileReader inReader = new SAMFileReader(IN);
-
-        // create the read group we'll be using
-        SAMReadGroupRecord rg = new SAMReadGroupRecord(RGID);
-        rg.setLibrary(RGLB);
-        rg.setPlatform(RGPL);
-        rg.setSample(RGSM);
-        System.out.printf("Created read group ID=%s PL=%s LB=%s SM=%s%n", rg.getId(), rg.getPlatform(), rg.getLibrary(), rg.getSample());
-
-        // create the new header and output file
-        SAMFileHeader outHeader = inReader.getFileHeader().clone();
-        outHeader.setReadGroups(Arrays.asList(rg));
-        SAMFileWriter outWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(outHeader, true, OUT) ;
-
-        //
-        // write the reads in contig order
-        //
-        for ( SAMRecord read : inReader ) {
-            read.setAttribute(RGFIELD, rg.getId());
-            outWriter.addAlignment(read);
-        }
-
-        // cleanup
-        inReader.close();
-        outWriter.close();
-        return 0;
-    }
-}
-
-