Modified CreatePedFileWalker to output PED file given HLA allele names
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1763 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
56bc4fa21a
commit
98076db6b4
|
|
@ -8,10 +8,6 @@ import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.BufferedReader;
|
|
||||||
import java.io.DataInputStream;
|
|
||||||
import java.io.InputStreamReader;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Hashtable;
|
import java.util.Hashtable;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -85,6 +85,9 @@ public class CalculateBaseLikelihoodsWalker extends LocusWalker<Integer, Pair<Lo
|
||||||
similarityReader.ReadFile(filterFile);
|
similarityReader.ReadFile(filterFile);
|
||||||
ReadsToDiscard = similarityReader.GetReadsToDiscard();
|
ReadsToDiscard = similarityReader.GetReadsToDiscard();
|
||||||
out.printf("Done! Found %s misaligned reads to discard.\n",ReadsToDiscard.size());
|
out.printf("Done! Found %s misaligned reads to discard.\n",ReadsToDiscard.size());
|
||||||
|
for (int i = 0; i < ReadsToDiscard.size(); i++){
|
||||||
|
out.printf("MISALIGNED %s\n", ReadsToDiscard.get(i).toString());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return new Pair<Long,Long>(0l,0l);
|
return new Pair<Long,Long>(0l,0l);
|
||||||
|
|
@ -130,12 +133,12 @@ public class CalculateBaseLikelihoodsWalker extends LocusWalker<Integer, Pair<Lo
|
||||||
|
|
||||||
//consider base in likelihood calculations if it looks good and has high mapping score
|
//consider base in likelihood calculations if it looks good and has high mapping score
|
||||||
G.add(base, qual, read, offset);
|
G.add(base, qual, read, offset);
|
||||||
if (DEBUG){
|
//if (DEBUG){
|
||||||
if (base == 'A'){numAs++;}
|
if (base == 'A'){numAs++;}
|
||||||
else if (base == 'C'){numCs++;}
|
else if (base == 'C'){numCs++;}
|
||||||
else if (base == 'T'){numTs++;}
|
else if (base == 'T'){numTs++;}
|
||||||
else if (base == 'G'){numGs++;}
|
else if (base == 'G'){numGs++;}
|
||||||
}
|
//}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller;
|
package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
|
import org.broadinstitute.sting.gatk.GATKArgumentCollection;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
@ -32,9 +33,14 @@ public class CalculatePhaseLikelihoodsWalker extends ReadWalker<Integer, Integer
|
||||||
@Argument(fullName = "debugAlleles", shortName = "debugAlleles", doc = "Print likelihood scores for these alleles", required = false)
|
@Argument(fullName = "debugAlleles", shortName = "debugAlleles", doc = "Print likelihood scores for these alleles", required = false)
|
||||||
public String debugAlleles = "";
|
public String debugAlleles = "";
|
||||||
|
|
||||||
|
@Argument(fullName = "phaseInterval", shortName = "phaseInterval", doc = "Use only these intervals in phase calculation", required = false)
|
||||||
|
public String phaseIntervalFile = "";
|
||||||
|
|
||||||
@Argument(fullName = "onlyfrequent", shortName = "onlyfrequent", doc = "Only consider alleles with frequency > 0.0001", required = false)
|
@Argument(fullName = "onlyfrequent", shortName = "onlyfrequent", doc = "Only consider alleles with frequency > 0.0001", required = false)
|
||||||
public boolean ONLYFREQUENT = false;
|
public boolean ONLYFREQUENT = false;
|
||||||
|
|
||||||
|
GATKArgumentCollection args = this.getToolkit().getArguments();
|
||||||
|
|
||||||
String CaucasianAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_CaucasiansUSA.freq";
|
String CaucasianAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_CaucasiansUSA.freq";
|
||||||
String BlackAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_BlackUSA.freq";
|
String BlackAlleleFrequencyFile = "/humgen/gsa-scr1/GSA/sjia/454_HLA/HLA/HLA_BlackUSA.freq";
|
||||||
String AlleleFrequencyFile;
|
String AlleleFrequencyFile;
|
||||||
|
|
@ -45,11 +51,13 @@ public class CalculatePhaseLikelihoodsWalker extends ReadWalker<Integer, Integer
|
||||||
String[] HLAnames, HLAreads;
|
String[] HLAnames, HLAreads;
|
||||||
ArrayList<String> ReadsToDiscard;
|
ArrayList<String> ReadsToDiscard;
|
||||||
Integer[] HLAstartpos, HLAstoppos, PolymorphicSites;
|
Integer[] HLAstartpos, HLAstoppos, PolymorphicSites;
|
||||||
|
|
||||||
int[][] numObservations, totalObservations;
|
|
||||||
|
int[][] numObservations, totalObservations, intervals;
|
||||||
int[] SNPnumInRead, SNPposInRead;
|
int[] SNPnumInRead, SNPposInRead;
|
||||||
CigarParser cigarparser = new CigarParser();
|
CigarParser cigarparser = new CigarParser();
|
||||||
Hashtable AlleleFrequencies;
|
Hashtable AlleleFrequencies;
|
||||||
|
int numIntervals;
|
||||||
|
|
||||||
public Integer reduceInit() {
|
public Integer reduceInit() {
|
||||||
|
|
||||||
|
|
@ -109,10 +117,37 @@ public class CalculatePhaseLikelihoodsWalker extends ReadWalker<Integer, Integer
|
||||||
PolymorphicSites = FindPolymorphicSites(HLADictionaryReader.GetMinStartPos(),HLADictionaryReader.GetMaxStopPos());
|
PolymorphicSites = FindPolymorphicSites(HLADictionaryReader.GetMinStartPos(),HLADictionaryReader.GetMaxStopPos());
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
//Determine intervals
|
||||||
|
if (!phaseIntervalFile.equals("")){
|
||||||
|
TextFileReader fileReader = new TextFileReader();
|
||||||
|
fileReader.ReadFile(phaseIntervalFile);
|
||||||
|
String[] lines = fileReader.GetLines();
|
||||||
|
intervals = new int[lines.length][2];
|
||||||
|
for (int i = 0; i < lines.length; i++) {
|
||||||
|
String[] s = lines[i].split(":");
|
||||||
|
String[] intervalPieces = s[0].split("-");
|
||||||
|
intervals[i][0] = Integer.valueOf(intervalPieces[0]);
|
||||||
|
intervals[i][1] = Integer.valueOf(intervalPieces[1]);
|
||||||
|
}
|
||||||
|
numIntervals = intervals.length;
|
||||||
|
for (int i = 0; i < numIntervals; i++){
|
||||||
|
out.printf("INFO Interval %s: %s-%s\n",i+1,intervals[i][0],intervals[i][1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean IsWithinInterval(int pos){
|
||||||
|
boolean isWithinInterval = false;
|
||||||
|
for (int i = 0; i < numIntervals; i++){
|
||||||
|
if (pos >= intervals[i][0] && pos <= intervals[i][1]){
|
||||||
|
isWithinInterval = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return isWithinInterval;
|
||||||
|
}
|
||||||
|
|
||||||
public Integer map(char[] ref, SAMRecord read) {
|
public Integer map(char[] ref, SAMRecord read) {
|
||||||
if (!ReadsToDiscard.contains(read.getReadName())){
|
if (!ReadsToDiscard.contains(read.getReadName())){
|
||||||
|
|
@ -264,7 +299,7 @@ public class CalculatePhaseLikelihoodsWalker extends ReadWalker<Integer, Integer
|
||||||
|
|
||||||
//Find all SNPs in read
|
//Find all SNPs in read
|
||||||
for (i = 0; i < numPositions; i++){
|
for (i = 0; i < numPositions; i++){
|
||||||
if (PolymorphicSites[i] > combinedstart && PolymorphicSites[i] < combinedstop){
|
if (PolymorphicSites[i] > combinedstart && PolymorphicSites[i] < combinedstop && IsWithinInterval(PolymorphicSites[i])){
|
||||||
SNPnumInRead[i] = SNPcount;
|
SNPnumInRead[i] = SNPcount;
|
||||||
SNPposInRead[i] = PolymorphicSites[i]-combinedstart;
|
SNPposInRead[i] = PolymorphicSites[i]-combinedstart;
|
||||||
SNPcount++;
|
SNPcount++;
|
||||||
|
|
|
||||||
|
|
@ -4,29 +4,41 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller;
|
package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.sting.playground.gatk.walkers.HLAcaller.ReadCigarFormatter;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.BufferedReader;
|
|
||||||
import java.io.DataInputStream;
|
|
||||||
import java.io.InputStreamReader;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Hashtable;
|
import java.util.Hashtable;
|
||||||
import java.util.List;
|
|
||||||
import java.lang.Math;
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @author shermanjia
|
* @author shermanjia
|
||||||
*/
|
*/
|
||||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||||
public class CreatePedFileWalker extends ReadWalker<Integer, Integer> {
|
public class CreatePedFileWalker extends ReadWalker<Integer, Integer> {
|
||||||
|
@Argument(fullName = "allelesFile", shortName = "allelesFile", doc = "Create ped file for HLA alleles named in this file", required = true)
|
||||||
|
public String alleleNamesFile = "";
|
||||||
|
|
||||||
|
@Argument(fullName = "pedIntervals", shortName = "pedIntervals", doc = "Create genotypes in these intervals", required = false)
|
||||||
|
public String pedIntervalsFile = "";
|
||||||
|
|
||||||
|
String[] HLAnames, HLAreads, inputFileContents;
|
||||||
|
Integer[] HLAstartpos, HLAstoppos;
|
||||||
|
ArrayList<String> HLAnamesAL, HLAreadsAL;
|
||||||
|
ArrayList<Integer> HLAstartposAL, HLAstopposAL;
|
||||||
|
int[][] intervals;
|
||||||
|
int numIntervals;
|
||||||
|
|
||||||
ReadCigarFormatter formatter = new ReadCigarFormatter();
|
ReadCigarFormatter formatter = new ReadCigarFormatter();
|
||||||
char c;
|
char c;
|
||||||
boolean DEBUG = false;
|
boolean DEBUG = false;
|
||||||
|
boolean FilesLoaded = false;
|
||||||
int HLA_A_start = 30018310;
|
int HLA_A_start = 30018310;
|
||||||
int HLA_A_end = 30021211;
|
int HLA_A_end = 30021211;
|
||||||
|
int HLA_B_start = 31430239;
|
||||||
|
int HLA_B_end = 31432914;
|
||||||
|
int HLA_C_start = 31344925;
|
||||||
|
int HLA_C_end = 31347827;
|
||||||
|
|
||||||
String[] SNPnames;
|
String[] SNPnames;
|
||||||
String SNPname;
|
String SNPname;
|
||||||
int start, end;
|
int start, end;
|
||||||
|
|
@ -35,60 +47,155 @@ public class CreatePedFileWalker extends ReadWalker<Integer, Integer> {
|
||||||
Hashtable indexer = new Hashtable();
|
Hashtable indexer = new Hashtable();
|
||||||
|
|
||||||
public Integer reduceInit() {
|
public Integer reduceInit() {
|
||||||
SNPnames = new String[HLA_A_end-HLA_A_start+1];
|
if (!FilesLoaded){
|
||||||
start = HLA_A_start;
|
FilesLoaded = true;
|
||||||
end = HLA_A_end;
|
HLAnamesAL = new ArrayList<String>();
|
||||||
|
HLAreadsAL = new ArrayList<String>();
|
||||||
|
HLAstartposAL = new ArrayList<Integer>();
|
||||||
|
HLAstopposAL = new ArrayList<Integer>();
|
||||||
|
|
||||||
indexer.put('A', (Integer) 1);
|
TextFileReader fileReader = new TextFileReader();
|
||||||
indexer.put('C', (Integer) 2);
|
fileReader.ReadFile(alleleNamesFile);
|
||||||
indexer.put('G', (Integer) 3);
|
inputFileContents = fileReader.GetLines();
|
||||||
indexer.put('T', (Integer) 4);
|
|
||||||
indexer.put('D', (Integer) 0); // D for deletion
|
|
||||||
out.print("Reads:\n");
|
//Determine intervals
|
||||||
|
if (!pedIntervalsFile.equals("")){
|
||||||
|
fileReader = new TextFileReader();
|
||||||
|
fileReader.ReadFile(pedIntervalsFile);
|
||||||
|
String[] lines = fileReader.GetLines();
|
||||||
|
intervals = new int[lines.length][2];
|
||||||
|
for (int i = 0; i < lines.length; i++) {
|
||||||
|
String[] s = lines[i].split(":");
|
||||||
|
String[] intervalPieces = s[0].split("-");
|
||||||
|
intervals[i][0] = Integer.valueOf(intervalPieces[0]);
|
||||||
|
intervals[i][1] = Integer.valueOf(intervalPieces[1]);
|
||||||
|
}
|
||||||
|
numIntervals = intervals.length;
|
||||||
|
for (int i = 0; i < numIntervals; i++){
|
||||||
|
out.printf("INFO Interval %s: %s-%s\n",i+1,intervals[i][0],intervals[i][1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private int BaseCharToInt(char c){
|
||||||
|
switch(c){
|
||||||
|
case 'A': return 1;
|
||||||
|
case 'C': return 2;
|
||||||
|
case 'G': return 3;
|
||||||
|
case 'T': return 4;
|
||||||
|
default: return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public Integer map(char[] ref, SAMRecord read) {
|
public Integer map(char[] ref, SAMRecord read) {
|
||||||
|
HLAnamesAL.add(read.getReadName());
|
||||||
int readstart = read.getAlignmentStart();
|
HLAreadsAL.add(formatter.FormatRead(read.getCigarString(), read.getReadString()));
|
||||||
int readstop = read.getAlignmentEnd();
|
HLAstartposAL.add(read.getAlignmentStart());
|
||||||
|
HLAstopposAL.add(read.getAlignmentEnd());
|
||||||
if(readstart <= HLA_A_end && readstop >= HLA_A_start){
|
|
||||||
String s = formatter.FormatRead(read.getCigarString(),read.getReadString());
|
|
||||||
String name = read.getReadName();
|
|
||||||
|
|
||||||
out.printf("%s\t%s\t0\t0\tM",name,name);
|
|
||||||
|
|
||||||
for (int i = start; i <= end; i++){
|
|
||||||
|
|
||||||
if (i - readstart < s.length() && i - readstart >= 0){
|
|
||||||
c = s.charAt(i-readstart);
|
|
||||||
I = (Integer) indexer.get(c);
|
|
||||||
out.printf("\t%s %s",I,I);
|
|
||||||
}else{
|
|
||||||
out.print("\t0 0");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
out.printf("\n");
|
|
||||||
}
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void PrintGenotypes(String alleleName1, String alleleName2, int startpos, int stoppos){
|
||||||
|
//prints genotypes for allele1 and allele2 at given interval
|
||||||
|
int i1 = GetAlleleIndex(alleleName1);
|
||||||
|
int i2 = GetAlleleIndex(alleleName2);
|
||||||
|
String s1, s2;
|
||||||
|
int start1, start2, stop1, stop2;
|
||||||
|
char c1, c2;
|
||||||
|
|
||||||
|
if (i1 > -1){
|
||||||
|
s1 = HLAreads[i1];
|
||||||
|
start1 = HLAstartpos[i1];
|
||||||
|
stop1 = HLAstoppos[i1];
|
||||||
|
}else{
|
||||||
|
s1 = "";
|
||||||
|
start1 = -1;
|
||||||
|
stop1 = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i2 > -1){
|
||||||
|
s2 = HLAreads[i2];
|
||||||
|
start2 = HLAstartpos[i2];
|
||||||
|
stop2 = HLAstoppos[i2];
|
||||||
|
}else{
|
||||||
|
s2 = "";
|
||||||
|
start2 = -1;
|
||||||
|
stop2 = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int pos = startpos; pos <= stoppos; pos++){
|
||||||
|
if (pos >= start1 && pos <= stop1){
|
||||||
|
c1 = s1.charAt(pos-start1);
|
||||||
|
if (c1 == 'D'){c1 = '0';}
|
||||||
|
}else{
|
||||||
|
c1 = '0';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pos >= start2 && pos <= stop2){
|
||||||
|
c2 = s2.charAt(pos-start2);
|
||||||
|
if (c2 == 'D'){c2 = '0';}
|
||||||
|
}else{
|
||||||
|
c2 = '0';
|
||||||
|
}
|
||||||
|
|
||||||
|
out.printf("\t%s %s",c1,c2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private int GetAlleleIndex(String alleleName){
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < HLAnames.length; i++){
|
||||||
|
if (HLAnames[i].equals(alleleName)){
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
public Integer reduce(Integer value, Integer sum) {
|
public Integer reduce(Integer value, Integer sum) {
|
||||||
|
|
||||||
return value + sum;
|
return value + sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void onTraversalDone(Integer value) {
|
public void onTraversalDone(Integer numreads) {
|
||||||
out.print("\nSNP names:\n");
|
HLAnames = HLAnamesAL.toArray(new String[numreads]);
|
||||||
for (int pos = start; pos <= end; pos++){
|
HLAreads = HLAreadsAL.toArray(new String[numreads]);
|
||||||
SNPname = "M CHR6_POS" + String.valueOf(pos);
|
HLAstartpos = HLAstartposAL.toArray(new Integer[numreads]);
|
||||||
SNPnames[pos-start]=SNPname;
|
HLAstoppos = HLAstopposAL.toArray(new Integer[numreads]);
|
||||||
out.printf("%s\n",SNPname);
|
|
||||||
|
//Print individual info and genotypes
|
||||||
|
for (int i = 0; i < inputFileContents.length; i++){
|
||||||
|
String[] s = inputFileContents[i].split(" ");
|
||||||
|
//out.printf("%s\t%s\n",inputFileContents[i],s.length);
|
||||||
|
if (s.length > 10){
|
||||||
|
out.printf("%s\t%s\t%s\t%s\t%s\t%s",s[0],s[1],s[2],s[3],s[4],s[5]);
|
||||||
|
String HLA_A1 = "HLA_A" + s[6];
|
||||||
|
String HLA_A2 = "HLA_A" + s[7];
|
||||||
|
String HLA_B1 = "HLA_B" + s[8];
|
||||||
|
String HLA_B2 = "HLA_B" + s[9];
|
||||||
|
String HLA_C1 = "HLA_C" + s[10];
|
||||||
|
String HLA_C2 = "HLA_C" + s[11];
|
||||||
|
PrintGenotypes(HLA_A1,HLA_A2, HLA_A_start,HLA_A_end);
|
||||||
|
PrintGenotypes(HLA_C1,HLA_C2, HLA_C_start,HLA_C_end);
|
||||||
|
PrintGenotypes(HLA_B1,HLA_B2, HLA_B_start,HLA_B_end);
|
||||||
|
out.printf("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//Prints SNP names for each site
|
||||||
|
PrintSNPS(HLA_A_start,HLA_A_end);
|
||||||
|
PrintSNPS(HLA_C_start,HLA_C_end);
|
||||||
|
PrintSNPS(HLA_B_start,HLA_B_end);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void PrintSNPS(int startpos, int stoppos){
|
||||||
|
for (int pos = startpos; pos <= stoppos; pos++){
|
||||||
|
SNPname = "CHR6_POS" + String.valueOf(pos);
|
||||||
|
out.printf("6\t%s\t0\t%s\n",SNPname,pos);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -177,7 +177,7 @@ public class FindClosestAlleleWalker extends ReadWalker<Integer, Integer> {
|
||||||
if (pos >= readstart && pos <= readstop && pos >= allelestart && pos <= allelestop){
|
if (pos >= readstart && pos <= readstop && pos >= allelestart && pos <= allelestop){
|
||||||
c1 = s1.charAt(pos-readstart);
|
c1 = s1.charAt(pos-readstart);
|
||||||
c2 = s2.charAt(pos-allelestart);
|
c2 = s2.charAt(pos-allelestart);
|
||||||
if (c1 != 'D'){
|
if (c1 != 'D'){//allow for deletions (sequencing errors)
|
||||||
numcompared[i]++;
|
numcompared[i]++;
|
||||||
if (c1 == c2){
|
if (c1 == c2){
|
||||||
nummatched[i]++;
|
nummatched[i]++;
|
||||||
|
|
@ -197,7 +197,7 @@ public class FindClosestAlleleWalker extends ReadWalker<Integer, Integer> {
|
||||||
if (pos >= readstart && pos <= readstop && pos >= allelestart && pos <= allelestop){
|
if (pos >= readstart && pos <= readstop && pos >= allelestart && pos <= allelestop){
|
||||||
c1 = s1.charAt(pos-readstart);
|
c1 = s1.charAt(pos-readstart);
|
||||||
c2 = s2.charAt(pos-allelestart);
|
c2 = s2.charAt(pos-allelestart);
|
||||||
if (c1 != c2){
|
if (c1 != c2 && c1 != 'D'){//allow for deletions (sequencing errors)
|
||||||
numcompared[i]++;
|
numcompared[i]++;
|
||||||
if (debugRead.equals(read.getReadName()) && debugAllele.equals(HLAnames[i])){
|
if (debugRead.equals(read.getReadName()) && debugAllele.equals(HLAnames[i])){
|
||||||
out.printf("%s\t%s\t%s\t%s\t%s\n",read.getReadName(), HLAnames[i], j, c1,c2);
|
out.printf("%s\t%s\t%s\t%s\t%s\n",read.getReadName(), HLAnames[i], j, c1,c2);
|
||||||
|
|
@ -241,7 +241,7 @@ public class FindClosestAlleleWalker extends ReadWalker<Integer, Integer> {
|
||||||
String readname = read.getReadName(), allelename = ""; double freq;
|
String readname = read.getReadName(), allelename = ""; double freq;
|
||||||
//For input bam files that contain HLA alleles, find and print allele frequency
|
//For input bam files that contain HLA alleles, find and print allele frequency
|
||||||
freq = GetAlleleFrequency(readname);
|
freq = GetAlleleFrequency(readname);
|
||||||
out.printf("%s\t%.4f", readname,freq);
|
out.printf("%s\t%s-%s", readname,read.getAlignmentStart(),read.getAlignmentEnd());
|
||||||
|
|
||||||
//Find the maximum frequency of the alleles most concordant with the read
|
//Find the maximum frequency of the alleles most concordant with the read
|
||||||
double maxFreq = FindMaxAlleleFrequency(maxConcordance);
|
double maxFreq = FindMaxAlleleFrequency(maxConcordance);
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,43 @@
|
||||||
|
/*
|
||||||
|
* To change this template, choose Tools | Templates
|
||||||
|
* and open the template in the editor.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @author shermanjia
|
||||||
|
*/
|
||||||
|
public class TextFileReader {
|
||||||
|
ArrayList<String> lines = new ArrayList<String>();
|
||||||
|
int numLines = 0;
|
||||||
|
|
||||||
|
public String[] GetLines(){
|
||||||
|
return lines.toArray(new String[lines.size()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int GetNumLines(){
|
||||||
|
return numLines;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void ReadFile(String filename){
|
||||||
|
try{
|
||||||
|
FileInputStream fstream = new FileInputStream(filename);
|
||||||
|
DataInputStream in = new DataInputStream(fstream);
|
||||||
|
BufferedReader br = new BufferedReader(new InputStreamReader(in));
|
||||||
|
String strLine;
|
||||||
|
//Read File Line By Line
|
||||||
|
while ((strLine = br.readLine()) != null) {
|
||||||
|
lines.add(strLine);
|
||||||
|
numLines++;
|
||||||
|
}
|
||||||
|
in.close();
|
||||||
|
}catch (Exception e){//Catch exception if any
|
||||||
|
System.err.println("TextFileReader Error: " + e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Loading…
Reference in New Issue