Tests seem to pass (can't be positive though because I ran before Tad's recent push), so I'm going to push now (this push touches so many files that I don't want to keep it around much longer).
Merge branch 'master' of github.com:broadinstitute/gsa-unstable
This commit is contained in:
commit
5371613ad1
|
|
@ -0,0 +1,209 @@
|
|||
package org.broadinstitute.sting.tools;
|
||||
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
||||
import org.apache.log4j.BasicConfigurator;
|
||||
import org.apache.log4j.Level;
|
||||
import org.broad.tribble.AbstractFeatureReader;
|
||||
import org.broad.tribble.FeatureReader;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||
import org.broadinstitute.variant.bcf2.BCF2Codec;
|
||||
import org.broadinstitute.variant.vcf.VCFCodec;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.writer.Options;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriterFactory;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* Usage: java -cp dist/GenomeAnalysisTK.jar org.broadinstitute.sting.tools.AppendVariants <reference> <input VCF or BCF files> <outputFile> [sorted (optional)]");
|
||||
* The input files can be of type: VCF (ends in .vcf or .VCF)");
|
||||
* BCF2 (ends in .bcf or .BCF)");
|
||||
* Output file must be vcf or bcf file (.vcf or .bcf)");
|
||||
* If the input files are already sorted, the last argument can indicate that");
|
||||
*/
|
||||
public class CatVariants extends CommandLineProgram {
|
||||
// setup the logging system, used by some codecs
|
||||
private static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getRootLogger();
|
||||
|
||||
@Input(fullName = "reference", shortName = "R", doc = "genome reference file <name>.fasta", required = true)
|
||||
private File refFile = null;
|
||||
|
||||
@Input(fullName="variant", shortName="V", doc="Input VCF file/s named <name>.vcf or <name>.bcf", required = true)
|
||||
private List<File> variant = null;
|
||||
|
||||
@Output(fullName = "outputFile", shortName = "out", doc = "output file name <name>.vcf or <name>.bcf", required = true)
|
||||
private File outputFile = null;
|
||||
|
||||
@Argument(fullName = "assumeSorted", shortName = "assumeSorted", doc = "assumeSorted should be true if he input files are already sorted (based on the position of the variants", required = false)
|
||||
private Boolean assumeSorted = false;
|
||||
|
||||
/*
|
||||
* print usage information
|
||||
*/
|
||||
private static void printUsage() {
|
||||
System.err.println("Usage: java -cp dist/GenomeAnalysisTK.jar org.broadinstitute.sting.tools.AppendVariants <reference> <input VCF or BCF files> <outputFile> [sorted (optional)]");
|
||||
System.err.println(" The input files can be of type: VCF (ends in .vcf or .VCF)");
|
||||
System.err.println(" BCF2 (ends in .bcf or .BCF)");
|
||||
System.err.println(" Output file must be vcf or bcf file (.vcf or .bcf)");
|
||||
System.err.println(" if the input files are already sorted, the last argument can indicate that");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int execute() throws Exception {
|
||||
//if(help){
|
||||
// printUsage();
|
||||
// return 1;
|
||||
//}
|
||||
|
||||
BasicConfigurator.configure();
|
||||
logger.setLevel(Level.INFO);
|
||||
|
||||
if ( ! refFile.getName().endsWith(".fasta")) {
|
||||
throw new UserException("Reference file "+refFile+"name must end with .fasta");
|
||||
}
|
||||
|
||||
if ( ! refFile.exists() ) {
|
||||
throw new UserException(String.format("Reference file %s does not exist", refFile.getAbsolutePath()));
|
||||
}
|
||||
|
||||
// Comparator<Pair<Integer,FeatureReader<VariantContext>>> comparator = new PositionComparator();
|
||||
Comparator<Pair<Integer,File>> positionComparator = new PositionComparator();
|
||||
|
||||
|
||||
//PriorityQueue<Pair<Integer,FeatureReader<VariantContext>>> queue =
|
||||
// new PriorityQueue<Pair<Integer,FeatureReader<VariantContext>>>(2000, comparator);
|
||||
Queue<Pair<Integer,File>> priorityQueue;
|
||||
if(assumeSorted)
|
||||
priorityQueue = new LinkedList<Pair<Integer,File>>();
|
||||
else
|
||||
priorityQueue = new PriorityQueue<Pair<Integer,File>>(10000, positionComparator);
|
||||
|
||||
Iterator<File> files = variant.iterator();
|
||||
File file;
|
||||
while (files.hasNext()) {
|
||||
file = files.next();
|
||||
if (!(file.getName().endsWith(".vcf") || file.getName().endsWith(".VCF") || file.getName().endsWith(".bcf") || file.getName().endsWith(".BCF"))){
|
||||
System.err.println("File " + file.getAbsolutePath() + " should be <name>.vcf or <name>.bcf");
|
||||
printUsage();
|
||||
return 1;
|
||||
}
|
||||
if (assumeSorted){
|
||||
priorityQueue.add(new Pair<Integer, File>(0,file));
|
||||
}
|
||||
else{
|
||||
if (!file.exists()) {
|
||||
throw new UserException(String.format("File %s doesn't exist",file.getAbsolutePath()));
|
||||
}
|
||||
FeatureReader<VariantContext> reader;
|
||||
boolean useVCF = (file.getName().endsWith(".vcf") || file.getName().endsWith(".VCF"));
|
||||
if(useVCF)
|
||||
reader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new VCFCodec(), false);
|
||||
else
|
||||
reader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new BCF2Codec(), false);
|
||||
Iterator<VariantContext> it = reader.iterator();
|
||||
if(!it.hasNext()){
|
||||
System.err.println(String.format("File %s is empty. This file will be ignored",file.getAbsolutePath()));
|
||||
continue;
|
||||
}
|
||||
VariantContext vc = it.next();
|
||||
int firstPosition = vc.getStart();
|
||||
reader.close();
|
||||
//queue.add(new Pair<Integer, FeatureReader<VariantContext>>(firstPosition,reader));
|
||||
priorityQueue.add(new Pair<Integer, File>(firstPosition,file));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (!(outputFile.getName().endsWith(".vcf") || outputFile.getName().endsWith(".VCF"))){
|
||||
throw new UserException(String.format("Output file %s should be <name>.vcf", outputFile));
|
||||
}
|
||||
ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(refFile);
|
||||
|
||||
|
||||
FileOutputStream outputStream = new FileOutputStream(outputFile);
|
||||
EnumSet<Options> options = EnumSet.of(Options.INDEX_ON_THE_FLY);
|
||||
final VariantContextWriter outputWriter = VariantContextWriterFactory.create(outputFile, outputStream, ref.getSequenceDictionary(), options);
|
||||
|
||||
boolean firstFile = true;
|
||||
int count =0;
|
||||
//while(!queue.isEmpty()){
|
||||
while(!priorityQueue.isEmpty() ){
|
||||
count++;
|
||||
//FeatureReader<VariantContext> reader = queue.remove().getSecond();
|
||||
file = priorityQueue.remove().getSecond();
|
||||
if (!file.exists()) {
|
||||
throw new UserException(String.format("File %s doesn't exist",file.getAbsolutePath()));
|
||||
}
|
||||
FeatureReader<VariantContext> reader;
|
||||
boolean useVCF = (file.getName().endsWith(".vcf") || file.getName().endsWith(".VCF"));
|
||||
if(useVCF)
|
||||
reader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new VCFCodec(), false);
|
||||
else
|
||||
reader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new BCF2Codec(), false);
|
||||
|
||||
if(count%10 ==0)
|
||||
System.out.print(count);
|
||||
else
|
||||
System.out.print(".");
|
||||
if (firstFile){
|
||||
VCFHeader header = (VCFHeader)reader.getHeader();
|
||||
outputWriter.writeHeader(header);
|
||||
firstFile = false;
|
||||
}
|
||||
|
||||
Iterator<VariantContext> it = reader.iterator();
|
||||
|
||||
while (it.hasNext()){
|
||||
VariantContext vc = it.next();
|
||||
outputWriter.add(vc);
|
||||
}
|
||||
|
||||
reader.close();
|
||||
|
||||
}
|
||||
System.out.println();
|
||||
|
||||
outputStream.close();
|
||||
outputWriter.close();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
public static void main(String[] args){
|
||||
try {
|
||||
CatVariants instance = new CatVariants();
|
||||
start(instance, args);
|
||||
System.exit(CommandLineProgram.result);
|
||||
} catch ( UserException e ) {
|
||||
printUsage();
|
||||
exitSystemWithUserError(e);
|
||||
} catch ( Exception e ) {
|
||||
exitSystemWithError(e);
|
||||
}
|
||||
}
|
||||
|
||||
private static class PositionComparator implements Comparator<Pair<Integer,File>> {
|
||||
|
||||
@Override
|
||||
public int compare(Pair<Integer,File> p1, Pair<Integer,File> p2) {
|
||||
int startPositionP1 = p1.getFirst();
|
||||
int startPositionP2 = p2.getFirst();
|
||||
if (startPositionP1 == startPositionP2)
|
||||
return 0;
|
||||
return startPositionP1 < startPositionP2 ? -1 : 1 ;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue