diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
index a90f8959d..1fb873e81 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
@@ -71,6 +71,13 @@ public class LikelihoodCalculationEngine {
     private final PairHMM pairHMM;
     private final int minReadLength = 20;
 
+    /**
+     * The expected rate of random sequencing errors for a read originating from its true haplotype.
+     *
+     * For example, if this is 0.01, then we'd expect 1 error per 100 bp.
+     */
+    private final double EXPECTED_ERROR_RATE_PER_BASE = 0.02;
+
     public LikelihoodCalculationEngine( final byte constantGCP, final boolean debug, final PairHMM.HMM_IMPLEMENTATION hmmType ) {
 
         switch (hmmType) {
@@ -127,7 +134,14 @@ public class LikelihoodCalculationEngine {
         for( final Map.Entry<String, List<GATKSAMRecord>> sampleEntry : perSampleReadList.entrySet() ) {
             //if( DEBUG ) { System.out.println("Evaluating sample " + sample + " with " + perSampleReadList.get( sample ).size() + " passing reads"); }
             // evaluate the likelihood of the reads given those haplotypes
-            stratifiedReadMap.put(sampleEntry.getKey(), computeReadLikelihoods(haplotypes, sampleEntry.getValue()));
+            final PerReadAlleleLikelihoodMap map = computeReadLikelihoods(haplotypes, sampleEntry.getValue());
+
+            final List<GATKSAMRecord> removedReads = map.filterPoorlyModelledReads(EXPECTED_ERROR_RATE_PER_BASE);
+//            logger.info("Removed " + removedReads.size() + " reads because of bad likelihoods from sample " + sampleEntry.getKey());
+//            for ( final GATKSAMRecord read : removedReads )
+//                logger.info("\tRemoved " + read.getReadName());
+
+            stratifiedReadMap.put(sampleEntry.getKey(), map);
         }
 
         return stratifiedReadMap;
@@ -170,6 +184,7 @@ public class LikelihoodCalculationEngine {
                 perReadAlleleLikelihoodMap.add(read, alleleVersions.get(haplotype), log10l);
             }
         }
+
         return perReadAlleleLikelihoodMap;
     }
 
diff --git a/protected/java/test/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMapUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMapUnitTest.java
index 84bdfd19b..c50849a54 100644
--- a/protected/java/test/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMapUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMapUnitTest.java
@@ -46,6 +46,8 @@
 
 package org.broadinstitute.sting.utils.genotyper;
 
+import net.sf.samtools.*;
+import org.apache.commons.lang.ArrayUtils;
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.variant.variantcontext.Allele;
 import org.broadinstitute.sting.utils.BaseUtils;
@@ -54,33 +56,16 @@ import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
-import org.broadinstitute.variant.variantcontext.Allele;
 import org.broadinstitute.sting.utils.Utils;
 import java.util.Map;
 import java.util.List;
 import org.testng.Assert;
 import org.testng.annotations.Test;
 import net.sf.picard.reference.IndexedFastaSequenceFile;
-import net.sf.samtools.SAMFileHeader;
-import net.sf.samtools.SAMFileReader;
-import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.Utils;
-import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
-import org.broadinstitute.sting.utils.activeregion.ActivityProfileState;
 import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
-import org.broadinstitute.sting.utils.pileup.PileupElement;
-import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
-import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
-import org.broadinstitute.sting.utils.sam.ArtificialBAMBuilder;
 import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
-import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
-import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;
-import org.broadinstitute.variant.variantcontext.Allele;
-import org.broadinstitute.variant.variantcontext.VariantContext;
-import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
-import org.broadinstitute.variant.vcf.VCFCodec;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.util.*;
@@ -235,7 +220,82 @@ public class PerReadAlleleLikelihoodMapUnitTest extends BaseTest {
         Assert.assertEquals(downsampledStrat.get(base_A).size(),(int) (pileup.depthOfCoverage()/2) - 1);
         Assert.assertEquals(downsampledStrat.get(base_C).size(),(int) (pileup.depthOfCoverage()/2));
         Assert.assertEquals(downsampledStrat.get(base_T).size(),0);
+    }
+
+    @DataProvider(name = "PoorlyModelledReadData")
+    public Object[][] makePoorlyModelledReadData() {
+        List<Object[]> tests = new ArrayList<Object[]>();
+
+        // this functionality can be adapted to provide input data for whatever you might want in your data
+        tests.add(new Object[]{10, 0.1, false, Arrays.asList(0.0)});
+        tests.add(new Object[]{10, 0.1, true, Arrays.asList(-10.0)});
+        tests.add(new Object[]{10, 0.1, false, Arrays.asList(0.0, -10.0)});
+        tests.add(new Object[]{10, 0.1, true, Arrays.asList(-5.0, -10.0)});
+        tests.add(new Object[]{100, 0.1, false, Arrays.asList(-5.0, -10.0)});
+        tests.add(new Object[]{100, 0.01, true, Arrays.asList(-5.0, -10.0)});
+        tests.add(new Object[]{100, 0.01, false, Arrays.asList(-5.0, -10.0, -3.0)});
+        tests.add(new Object[]{100, 0.01, false, Arrays.asList(-5.0, -10.0, -2.0)});
+        tests.add(new Object[]{100, 0.01, true, Arrays.asList(-5.0, -10.0, -4.0)});
+        tests.add(new Object[]{100, 0.001, true, Arrays.asList(-5.0, -10.0)});
+        tests.add(new Object[]{100, 0.001, false, Arrays.asList(-5.0, -10.0, 0.0)});
+
+        return tests.toArray(new Object[][]{});
+    }
+
+    @Test(dataProvider = "PoorlyModelledReadData")
+    public void testPoorlyModelledRead(final int readLen, final double maxErrorRatePerBase, final boolean expected, final List<Double> log10likelihoods) {
+        final byte[] bases = Utils.dupBytes((byte)'A', readLen);
+        final byte[] quals = Utils.dupBytes((byte) 30, readLen);
+
+        final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, readLen + "M");
+
+        final PerReadAlleleLikelihoodMap map = new PerReadAlleleLikelihoodMap();
+        final boolean actual = map.readIsPoorlyModelled(read, log10likelihoods, maxErrorRatePerBase);
+        Assert.assertEquals(actual, expected);
+    }
 
 
+    @DataProvider(name = "RemovingPoorlyModelledReadData")
+    public Object[][] makeRemovingPoorlyModelledReadData() {
+        List<Object[]> tests = new ArrayList<Object[]>();
+
+        // this functionality can be adapted to provide input data for whatever you might want in your data
+        final int readLen = 10;
+        for ( int nReads = 0; nReads < 4; nReads++ ) {
+            for ( int nBad = 0; nBad <= nReads; nBad++ ) {
+                final int nGood = nReads - nBad;
+                tests.add(new Object[]{readLen, nReads, nBad, nGood});
+            }
+        }
+
+        return tests.toArray(new Object[][]{});
+    }
+
+    @Test(dataProvider = "RemovingPoorlyModelledReadData")
+    public void testRemovingPoorlyModelledReads(final int readLen, final int nReads, final int nBad, final int nGood) {
+        final PerReadAlleleLikelihoodMap map = new PerReadAlleleLikelihoodMap();
+        final Set<GATKSAMRecord> goodReads = new HashSet<GATKSAMRecord>();
+        final Set<GATKSAMRecord> badReads = new HashSet<GATKSAMRecord>();
+        for ( int readI = 0; readI < nReads; readI++ ) {
+            final boolean bad = readI < nBad;
+            final double likelihood = bad ? -100.0 : 0.0;
+
+            final byte[] bases = Utils.dupBytes((byte)'A', readLen);
+            final byte[] quals = Utils.dupBytes((byte) 30, readLen);
+
+            final Allele allele = Allele.create(Utils.dupString("A", readI+1));
+
+            final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, readLen + "M");
+            read.setReadName("readName" + readI);
+            map.add(read, allele, likelihood);
+            (bad ? badReads : goodReads).add(read);
+        }
+
+        final List<GATKSAMRecord> removedReads = map.filterPoorlyModelledReads(0.01);
+        Assert.assertEquals(removedReads.size(), nBad, "nBad " + nBad + " nGood " + nGood);
+        Assert.assertEquals(new HashSet<GATKSAMRecord>(removedReads), badReads, "nBad " + nBad + " nGood " + nGood);
+        Assert.assertEquals(map.size(), nGood, "nBad " + nBad + " nGood " + nGood);
+        Assert.assertTrue(map.getStoredElements().containsAll(goodReads), "nBad " + nBad + " nGood " + nGood);
+        Assert.assertEquals(map.getStoredElements().size(), nGood, "nBad " + nBad + " nGood " + nGood);
     }
 }
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java
index 02618100d..201e3b9b4 100644
--- a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java
+++ b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java
@@ -251,4 +251,62 @@ public class PerReadAlleleLikelihoodMap {
         }
         return sb.toString();
     }
+
+    /**
+     * Remove reads from this map that are poorly modelled w.r.t. their per allele likelihoods
+     *
+     * Goes through each read in this map, and if it is poorly modelled removes it from the map.
+     *
+     * @see #readIsPoorlyModelled(org.broadinstitute.sting.utils.sam.GATKSAMRecord, java.util.Collection, double)
+     * for more information about the poorly modelled test.
+     *
+     * @param maxErrorRatePerBase see equivalent parameter in #readIsPoorlyModelled
+     * @return the list of reads removed from this map because they are poorly modelled
+     */
+    public List<GATKSAMRecord> filterPoorlyModelledReads(final double maxErrorRatePerBase) {
+        final List<GATKSAMRecord> removedReads = new LinkedList<GATKSAMRecord>();
+        final Iterator<Map.Entry<GATKSAMRecord, Map<Allele, Double>>> it = likelihoodReadMap.entrySet().iterator();
+        while ( it.hasNext() ) {
+            final Map.Entry<GATKSAMRecord, Map<Allele, Double>> record = it.next();
+            if ( readIsPoorlyModelled(record.getKey(), record.getValue().values(), maxErrorRatePerBase) ) {
+                it.remove();
+                removedReads.add(record.getKey());
+            }
+        }
+
+        return removedReads;
+    }
+
+    /**
+     * Is this read poorly modelled by any of the alleles in this map?
+     *
+     * A read is poorly modeled when it's likelihood is below what would be expected for a read
+     * originating from one of the alleles given the maxErrorRatePerBase of the reads in general.
+     *
+     * This function makes a number of key assumptions.  First, that the likelihoods reflect the total likelihood
+     * of the read.  In other words, that the read would be fully explained by one of the alleles.  This means
+     * that the allele should be something like the full haplotype from which the read might originate.
+     *
+     * It further assumes that each error in the read occurs with likelihood of -3 (Q30 confidence per base).  So
+     * a read with a 10% error rate with Q30 bases that's 100 bp long we'd expect to see 10 real Q30 errors
+     * even against the true haplotype.  So for this read to be well modelled by at least one allele we'd expect
+     * a likelihood to be >= 10 * -3.
+     *
+     * @param read the read we want to evaluate
+     * @param log10Likelihoods a list of the log10 likelihoods of the read against a set of haplotypes.
+     * @param maxErrorRatePerBase the maximum error rate we'd expect for this read per base, in real space.  So
+     *                            0.01 means a 1% error rate
+     * @return true if none of the log10 likelihoods imply that the read truly originated from one of the haplotypes
+     */
+    protected boolean readIsPoorlyModelled(final GATKSAMRecord read, final Collection<Double> log10Likelihoods, final double maxErrorRatePerBase) {
+        final double maxErrorsForRead = Math.ceil(read.getReadLength() * maxErrorRatePerBase);
+        final double log10QualPerBase = -3.0;
+        final double log10MaxLikelihoodForTrueAllele = maxErrorsForRead * log10QualPerBase;
+
+        for ( final double log10Likelihood : log10Likelihoods )
+            if ( log10Likelihood >= log10MaxLikelihoodForTrueAllele )
+                return false;
+
+        return true;
+    }
 }