diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 9fc6104e6..b0c8f226f 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -29,6 +29,7 @@ test-job:
     - export ASAN_OPTIONS="detect_leaks=0"
     - CGL_DEBUG=ultra make -j 8
     - CACTUS_BINARIES_MODE=local SON_TRACE_DATASETS=$(pwd)/cactusTestData CACTUS_TEST_CHOICE=normal make test
+    - pip install -U newick    
     - make -j 8 hal_test
     # rebuild without all the debug flags
     - make clean
diff --git a/BIN-INSTALL.md b/BIN-INSTALL.md
index 3a5dc11bc..d14f05106 100644
--- a/BIN-INSTALL.md
+++ b/BIN-INSTALL.md
@@ -6,18 +6,18 @@ pre-compile binary, static linked distribution.
 ## Extracting
 If you have not already extract the distribution and cd into the cactus directory:
 ```
-tar -xzf cactus-bin-v2.6.8.tar.gz
-cd cactus-bin-v2.6.8
+tar -xzf cactus-bin-v2.6.13.tar.gz
+cd cactus-bin-v2.6.13
 ```
 
 ## Setup
 
 To build a python virtualenv and activate, do the following steps. This requires Python version >= 3.7 (so Ubuntu 18.04 users should use `-p python3.8` below):
 ```
-virtualenv -p python3 venv-cactus-v2.6.8
-printf "export PATH=$(pwd)/bin:\$PATH\nexport PYTHONPATH=$(pwd)/lib:\$PYTHONPATH\n" >> venv-cactus-v2.6.8/bin/activate
-source venv-cactus-v2.6.8/bin/activate
-python3 -m pip install -U setuptools pip
+virtualenv -p python3 venv-cactus-v2.6.13
+printf "export PATH=$(pwd)/bin:\$PATH\nexport PYTHONPATH=$(pwd)/lib:\$PYTHONPATH\n" >> venv-cactus-v2.6.13/bin/activate
+source venv-cactus-v2.6.13/bin/activate
+python3 -m pip install -U setuptools pip wheel
 python3 -m pip install -U .
 python3 -m pip install -U -r ./toil-requirement.txt
 ```
diff --git a/Dockerfile b/Dockerfile
index 9c196e693..412f8fa52 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -87,6 +87,9 @@ RUN chmod 777 /opt/cactus/wrapper.sh
 # log the memory usage (with --realTimeLogging) for local commands
 ENV CACTUS_LOG_MEMORY 1
 
+# remember we're in a docker to help with error handling
+ENV CACTUS_INSIDE_CONTAINER 1
+
 # UCSC convention is to work in /data
 RUN mkdir -p /data
 WORKDIR /data
diff --git a/Makefile b/Makefile
index fa7c2afc0..5b6fbc710 100644
--- a/Makefile
+++ b/Makefile
@@ -92,24 +92,6 @@ unitTests = \
 
 #paffyTests \ # This is removed for now
 
-# these are slow, but added to CI here since hal no longer has its own
-halTests = \
-	hal4dExtractTest \
-	halAlignmentTreesTest \
-	halBottomSegmentTest \
-	halColumnIteratorTest \
-	halGappedSegmentIteratorTest \
-	halGenomeTest \
-	halHdf5Tests \
-	halLiftoverTests \
-	halMafTests \
-	halMappedSegmentTest \
-	halMetaDataTest \
-	halRearrangementTest \
-	halSequenceTest \
-	halTopSegmentTest \
-	halValidateTest
-
 # if running travis or gitlab, we want output to go to stdout/stderr so it can
 # be seen in the log file, as opposed to individual files, which are much
 # easier to read when running tests in parallel.
@@ -133,7 +115,8 @@ testLogDir = ${testOutDir}/logs
 test: ${testModules:%=%_runtest} ${unitTests:%=%_run_unit_test}
 test_blast: ${testModules:%=%_runtest_blast}
 test_nonblast: ${testModules:%=%_runtest_nonblast}
-hal_test: ${halTests:%=%_run_unit_test}
+hal_test:
+	cd ${CWD}/submodules/hal && make test
 
 # run one test and save output
 %_runtest: ${versionPy}
diff --git a/README.md b/README.md
index 29b2393a8..7e3007eff 100644
--- a/README.md
+++ b/README.md
@@ -54,14 +54,14 @@ virtualenv -p python3 cactus_env
 echo "export PATH=$(pwd)/bin:\$PATH" >> cactus_env/bin/activate
 echo "export PYTHONPATH=$(pwd)/lib:\$PYTHONPATH" >> cactus_env/bin/activate
 source cactus_env/bin/activate
-python3 -m pip install -U setuptools pip
+python3 -m pip install -U setuptools pip wheel
 python3 -m pip install -U .
 python3 -m pip install -U -r ./toil-requirement.txt
 ```
 
 If you have Docker installed, you can now run Cactus.  All binaries, such as `lastz` and `cactus-consolidated` will be run via Docker.  Singularity binaries can be used in place of docker binaries with the `--binariesMode singularity` flag.  Note, you must use Singularity 2.3 - 2.6 or Singularity 3.1.0+. Singularity 3 versions below 3.1.0 are incompatible with cactus (see [issue #55](https://github.com/ComparativeGenomicsToolkit/cactus/issues/55) and [issue #60](https://github.com/ComparativeGenomicsToolkit/cactus/issues/60)).
 
-By default, cactus will use the image, `quay.io/comparative-genomics-toolkit/cactus:<CACTUS_COMMIT>` when running binaries. This is usually okay, but can be overridden with the `CACTUS_DOCKER_ORG` and `CACTUS_DOCKER_TAG` environment variables.  For example, to use GPU release 2.4.4, run `export CACTUS_DOCKER_TAG=v2.4.4-gpu` before running cactus.
+By default, cactus will use the image corresponding to the latest release when running docker binaries. This is usually okay, but can be overridden with the `CACTUS_DOCKER_ORG` and `CACTUS_DOCKER_TAG` environment variables.  For example, to use GPU release 2.4.4, run `export CACTUS_DOCKER_TAG=v2.4.4-gpu` before running cactus.
 
 ### Compiling Binaries Locally
 In order to compile the binaries locally and not use a Docker image, you need some dependencies installed.  On Ubuntu (we've tested on 20.04 and 22.04), you can look at the [Cactus Dockerfile](./Dockerfile) for guidance. To obtain the `apt-get` command:
diff --git a/ReleaseNotes.md b/ReleaseNotes.md
index 950644411..4f92b5dbf 100644
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -1,3 +1,51 @@
+# Release 2.6.13 2023-11-15
+
+This release fixes an issue where Toil can ask for way too much memory for minigraph construction
+- Cut default minigraph construction memory estimate by half
+- Add `--mgMemory` option to override minigraph construction memory estimate no matter what
+- Exit with a clear error message (instead of more cryptic crash) when user tries to run container binaries in a container
+- Fix double Toil delete that seems to cause fatal error in some environments
+- Fix `gfaffix` regular expression bug that could cause paths other than the reference to be protoected from collapse.
+
+# Release 2.6.12 2023-11-07
+
+The release contains fixes some recent regressions:
+
+- Include more portable (at least on Ubuntu) `gfaffix` binary.
+- Fix error where gpu support on singularity is completely broken.
+- Fix `export_hal` and `export_vg` job memory estimates when `--consMemory` not provided.
+
+# Release 2.6.11 2023-10-31
+
+This release fixes a bug introduced in v2.6.10 that prevents diploid samples from working with `cactus-pangenome`
+
+- Remove stray `assert False` from diploid mash distance that was accidentally included in previous release
+
+# Release 2.6.10 2023-10-30
+
+This release contains bug fixes for MAF export and the pangenome pipeline
+
+- Patch `taffy` to fix bug where sometimes length fields in output MAF can be wrong when using `cactus-hal2maf --filterGapCausingDupes`
+- Fix regression `cactus-graphmap-split / cactus-pangenome` so that small poorly aligned reference contigs (like some tiny unplaced GRCh38 contigs) do not get unintentionally filtered out. These contigs do not help the graph in any way, but the tool should do what it says and make a component for every single reference contig no matter what, which it is now fixed to do.
+- Cactus will now terminate with a clear error message if any other `--batchSystem` than `single_machine` is attempted from *inside* a docker container.
+- Mash distance order into `minigraph` construction fixed so that haplotypes from the same sample are always added contiguously in the presence of ties.
+- CI fixed to run all `hal` tests, and not just a subset.
+- `pip install wheel` added to installation instructions, as apparently that's needed to install Cactus with some (newer?) Pythons.
+
+# Release 2.6.9 2023-10-20
+
+This release contains some bug fixes and changes to docker image uploads
+
+- GFAffix updated to latest release
+- CI no longer pushes a docker image to quay.io for every single commit.
+- CPU docker release now made locally as done for GPU
+- `--binariesMode docker` will automatically point to release image (using GPU one as appropriate)
+- `--consMemory` overrides `hal2vg` memory as well
+- `--defaulMemory` defaults to `4Gi` when using docker binaries
+- SegAlign job memory specification increased to something more realistic
+- `--lastzMemory` option added to override SegAlign memory -- highly recommended on SLURM
+- chromosome (.vg / .og) outputs from pangenome pipeline will have ref paths of form `GRCh38#0#chr1` instead of `GRCh38#chr1` to be more consistent with full-genome indexes (and PanSN in general)
+
 # Release 2.6.8 2023-09-28
 
 This release includes several bug fixes for the pangenome pipeline
diff --git a/api/tests/cactusParamsTest.c b/api/tests/cactusParamsTest.c
index 938363a85..32e5821e3 100644
--- a/api/tests/cactusParamsTest.c
+++ b/api/tests/cactusParamsTest.c
@@ -27,7 +27,7 @@ static void testCactusParams(CuTest *testCase) {
     CuAssertTrue(testCase, length >= 3);
     CuAssertIntEquals(testCase, l[0], 2);
     CuAssertIntEquals(testCase, l[1], 32);
-    CuAssertIntEquals(testCase, l[2], 512);
+    CuAssertIntEquals(testCase, l[2], 256);
 
     // Test moving the root of the search
     cactusParams_set_root(p, 1, "caf");
diff --git a/build-tools/downloadMafTools b/build-tools/downloadMafTools
index 0d8a1a360..8cee67cd5 100755
--- a/build-tools/downloadMafTools
+++ b/build-tools/downloadMafTools
@@ -51,7 +51,7 @@ export HTSLIB_LIBS="$(pwd)/libhts.a -lbz2 -ldeflate -lm -lpthread -lz -llzma -pt
 cd ${mafBuildDir}
 git clone https://github.com/ComparativeGenomicsToolkit/taffy.git
 cd taffy
-git checkout c75ce895b7975e7ac17cb1ce964db3016615de47
+git checkout ee50639be3d86451590de8ea4d3a7a037eeaf427
 git submodule update --init --recursive
 export HALDIR=${CWD}/submodules/hal
 make -j ${numcpu}
@@ -66,7 +66,7 @@ fi
 cd ${mafBuildDir}
 git clone https://github.com/ComparativeGenomicsToolkit/mafTools.git
 cd mafTools
-git checkout 837b8f27c7bf781c7cbee3972b94e91aa6a77790
+git checkout b88cd313cb18764d87bc801fbbbb00f982c1a48f
 find . -name "*.mk" | xargs sed -ie "s/-Werror//g"
 find . -name "Makefile*" | xargs sed -ie "s/-Werror//g"
 # hack in flags support
diff --git a/build-tools/downloadPangenomeTools b/build-tools/downloadPangenomeTools
index 4491ba113..5ebdd7753 100755
--- a/build-tools/downloadPangenomeTools
+++ b/build-tools/downloadPangenomeTools
@@ -156,7 +156,7 @@ fi
 cd ${pangenomeBuildDir}
 git clone https://github.com/ComparativeGenomicsToolkit/cactus-gfa-tools.git
 cd cactus-gfa-tools
-git checkout 9b26caa961d6e72ad3747e5c2ce81cdf1e9b63c3
+git checkout 0c17bc4ae9a7cf174fa40805cde7f8f1f6de8225
 make -j ${numcpu}
 if [[ $STATIC_CHECK -ne 1 || $(ldd paf2lastz | grep so | wc -l) -eq 0 ]]
 then
@@ -279,7 +279,7 @@ fi
 # vg
 cd ${pangenomeBuildDir}
 #wget -q https://github.com/vgteam/vg/releases/download/v1.51.0/vg
-wget -q http://public.gi.ucsc.edu/~hickey/vg-patch/vg.9df2a056197cafd817cf48c76cf662dd775d265d -O vg
+wget -q http://public.gi.ucsc.edu/~hickey/vg-patch/vg.98e3b7c867eb64178298535b076189ef7fda5031 -O vg
 chmod +x vg
 if [[ $STATIC_CHECK -ne 1 || $(ldd vg | grep so | wc -l) -eq 0 ]]
 then
@@ -290,12 +290,12 @@ fi
 
 # gfaffix
 cd ${pangenomeBuildDir}
-wget -q https://github.com/marschall-lab/GFAffix/releases/download/0.1.5/GFAffix-0.1.5_linux_x86_64.tar.gz
-tar xzf GFAffix-0.1.5_linux_x86_64.tar.gz
-chmod +x GFAffix-0.1.5_linux_x86_64/gfaffix
-if [[ $STATIC_CHECK -ne 1 || $(ldd GFAffix-0.1.5_linux_x86_64/gfaffix | grep so | wc -l) -eq 0 ]]
+wget -q https://github.com/marschall-lab/GFAffix/releases/download/0.1.5b/GFAffix-0.1.5b_linux_x86_64.tar.gz
+tar xzf GFAffix-0.1.5b_linux_x86_64.tar.gz
+chmod +x GFAffix-0.1.5b_linux_x86_64/gfaffix
+if [[ $STATIC_CHECK -ne 1 || $(ldd GFAffix-0.1.5b_linux_x86_64/gfaffix | grep so | wc -l) -eq 0 ]]
 then
-	 mv GFAffix-0.1.5_linux_x86_64/gfaffix ${binDir}
+	 mv GFAffix-0.1.5b_linux_x86_64/gfaffix ${binDir}
 else
 	 exit 1
 fi
diff --git a/build-tools/makeCpuDockerRelease b/build-tools/makeCpuDockerRelease
index e59e05892..57a2f01c3 100755
--- a/build-tools/makeCpuDockerRelease
+++ b/build-tools/makeCpuDockerRelease
@@ -24,10 +24,11 @@ git checkout "${REL_TAG}"
 git submodule update --init --recursive
 
 docker build . -f Dockerfile -t ${dockname}:${REL_TAG}
+docker tag ${dockname}:${REL_TAG} ${dockname}:latest
 
 read -p "Are you sure you want to push ${dockname}:${REL_TAG} to quay?" yn
 case $yn in
-    [Yy]* ) docker push ${dockname}:${REL_TAG}; break;;
+    [Yy]* ) docker push ${dockname}:${REL_TAG} && docker push ${dockname}:latest ; break;;
     [Nn]* ) exit;;
     * ) echo "Please answer yes or no.";;
 esac
diff --git a/doc/pangenome.md b/doc/pangenome.md
index a78934687..3d7ab05a3 100644
--- a/doc/pangenome.md
+++ b/doc/pangenome.md
@@ -128,6 +128,7 @@ The Minigraph-Cactus pipeline is run via the `cactus-pangenome` command. It cons
 **Before running large jobs, it is important to consider the following options:**
 
 * `--mgCores` the number of cores for `minigraph` construction (default: all available)
+* `--mgMemory` the amount of memory for `minigraph` construction. The default estimate can be quite conservative (ie high), so if it is too high for your system, you can lower it with this option (default: estimate based on input size). 
 * `--mapCores` the number of cores for each `minigraph` mapping job (default: up to 6)
 * `--consCores` the number of cores for each `cactus-consolidated` job (default: all available)
 * `--consMemory` the amount of memory for each `cactus-consolidated` job. By default, it is estimated from the data but these estimates being wrong can be catastrophic on [SLURM](./progressive.md#running-on-a-cluster). Consider setting to the maximum memory you have available when running on a cluster to be extra safe (seems to be more of an issue for non-human data)
diff --git a/doc/progressive.md b/doc/progressive.md
index b43e32802..c418b5428 100644
--- a/doc/progressive.md
+++ b/doc/progressive.md
@@ -170,12 +170,12 @@ The Cactus Docker image contains everything you need to run Cactus (python envir
 
 ```
 wget -q https://raw.githubusercontent.com/ComparativeGenomicsToolkit/cactus/master/examples/evolverMammals.txt -O evolverMammals.txt
-docker run -v $(pwd):/data --rm -it quay.io/comparative-genomics-toolkit/cactus:v2.6.8 cactus /data/jobStore /data/evolverMammals.txt /data/evolverMammals.hal
+docker run -v $(pwd):/data --rm -it quay.io/comparative-genomics-toolkit/cactus:v2.6.13 cactus /data/jobStore /data/evolverMammals.txt /data/evolverMammals.hal
 ```
 
 Or you can proceed interactively by running
 ```
-docker run -v $(pwd):/data --rm -it quay.io/comparative-genomics-toolkit/cactus:v2.6.8 bash
+docker run -v $(pwd):/data --rm -it quay.io/comparative-genomics-toolkit/cactus:v2.6.13 bash
 cactus /data/jobStore /data/evolverMammals.txt /data/evolverMammals.hal
 
 ```
@@ -204,6 +204,16 @@ export TOIL_SLURM_ARGS="--nice=5000"
 
 to avoid making too many enemies.
 
+You can (and probably should) use the `--batchLogsDir` option in order to enable more SLURM logging.  You must pass it a directory that already exists.  Ex.
+
+```
+mkdir -p batch-logs
+cactus ./js ./examples/evolverMammals.txt evolverMammals.hal --batchSystem slurm --batchLogsDir batch-logs
+```
+
+You'll want to clean out this directory after a successful run. 
+
+
 You cannot run `cactus --batchSystem slurm` from *inside* the Cactus docker container, because the Cactus docker container doesn't contain slurm.  Therefore in order to use slurm, you must be able to `pip install` Cactus inside a virtualenv on the head node. You can still use `--binariesMode docker` or `--binariesMode` singularity to run cactus *binaries* from a container, but the Cactus Python module needs to be installed locally.
 
 **IMPORTANT**
@@ -211,7 +221,7 @@ You cannot run `cactus --batchSystem slurm` from *inside* the Cactus docker cont
 To run Progressive Cactus with CPU (default) lastz, you should increase the chunk size.  This will divide the input assemblies into fewer pieces, resulting in fewer jobs on the cluster.
 
 ```
-cp cactus-bin-v2.6.8/src/cactus/cactus_progressive_config.xml ./config-slurm.xml
+cp cactus-bin-v2.6.13/src/cactus/cactus_progressive_config.xml ./config-slurm.xml
 sed -i config-slurm.xml -e 's/blast chunkSize="30000000"/blast chunkSize="90000000"/g'
 sed -i config-slurm.xml -e 's/dechunkBatchSize="1000"/dechunkBatchSize="200"/g'
 ```
@@ -340,6 +350,11 @@ We've tested SegAlign on Nvidia V100 and A10G GPUs. See the Terra example above
 
 Please [cite SegAlign](https://doi.ieeecomputersociety.org/10.1109/SC41405.2020.00043).
 
+### Using GPU Acceleration on a Cluster
+
+Since `SegAlign` is only released in the GPU-enabled docker image, that's the easiest way to run it. When running on a cluster, this usually means the best way to use it is with `--binariesMode docker --gpu <N>`.  This way cactus is installed locally on your virtual environment and can run slurm commands like `sbatch` (that aren't available in the Cactus container), but SegAlign itself will be run from inside Docker.
+
+**Important**: Consider using `--lastzMemory` when using GPU acceleration on a cluster. Like `--consMemory`, it lets you override the amount of memory Toil requests which can help with errors if Cactus's automatic estimate is either too low (cluster evicts the job) or too high (cluster cannot schedule the job).  
 
 ## Pre-Alignment Checklist
 
diff --git a/examples/evolverPrimates.txt b/examples/evolverPrimates.txt
index 54495a980..9c8623807 100644
--- a/examples/evolverPrimates.txt
+++ b/examples/evolverPrimates.txt
@@ -1,4 +1,6 @@
-simHuman	https://raw.githubusercontent.com/ComparativeGenomicsToolkit/cactusTestData/master/evolver/primates/loci1/simHuman.chr6
+(simOrang:0.00993,((simChimp:0.00272,simHuman:0.00269)cb:0.00415,simGorilla:0.00644)hcb:0.00046);
+
+simOrang	https://raw.githubusercontent.com/ComparativeGenomicsToolkit/cactusTestData/master/evolver/primates/loci1/simOrang.chr6
 simChimp	https://raw.githubusercontent.com/ComparativeGenomicsToolkit/cactusTestData/master/evolver/primates/loci1/simChimp.chr6
+simHuman	https://raw.githubusercontent.com/ComparativeGenomicsToolkit/cactusTestData/master/evolver/primates/loci1/simHuman.chr6
 simGorilla	https://raw.githubusercontent.com/ComparativeGenomicsToolkit/cactusTestData/master/evolver/primates/loci1/simGorilla.chr6
-simOrang	https://raw.githubusercontent.com/ComparativeGenomicsToolkit/cactusTestData/master/evolver/primates/loci1/simOrang.chr6
diff --git a/setup.py b/setup.py
index bb1085b1a..d8c482a37 100644
--- a/setup.py
+++ b/setup.py
@@ -24,7 +24,7 @@ def run(self):
 
 setup(
     name = "Cactus",
-    version = "2.6.8",
+    version = "2.6.13",
     author = "Benedict Paten",
     package_dir = {'': 'src'},
     packages = find_packages(where='src'),
diff --git a/src/cactus/blast/cactus_blast.py b/src/cactus/blast/cactus_blast.py
index 96469d769..f4deadccb 100644
--- a/src/cactus/blast/cactus_blast.py
+++ b/src/cactus/blast/cactus_blast.py
@@ -19,6 +19,7 @@
 from cactus.shared.common import enableDumpStack
 from cactus.shared.common import cactus_override_toil_options
 from cactus.shared.version import cactus_commit
+from cactus.progressive.cactus_prepare import human2bytesN
 
 from cactus.paf.local_alignment import sanitize_then_make_paf_alignments
 
@@ -60,8 +61,11 @@ def main():
     parser.add_argument("--binariesMode", choices=["docker", "local", "singularity"],
                         help="The way to run the Cactus binaries", default=None)
     parser.add_argument("--gpu", nargs='?', const='all', default=None, help="toggle on GPU-enabled lastz, and specify number of GPUs (all available if no value provided)")
-    parser.add_argument("--lastzCores", type=int, default=None, help="Number of cores for each lastz job, only relevant when running with --gpu")
-    
+    parser.add_argument("--lastzCores", type=int, default=None, help="Number of cores for each lastz/segalign job, only relevant when running with --gpu")
+    parser.add_argument("--lastzMemory", type=human2bytesN,
+                        help="Memory in bytes for each lastz/segalign job (defaults to an estimate based on the input data size). "
+                        "Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes))", default=None)
+        
     options = parser.parse_args()
 
     setupBinaries(options)
@@ -95,7 +99,7 @@ def runCactusBlastOnly(options):
             # load up the seqfile and figure out the outgroups and schedule
             config_node = ET.parse(options.configFile).getroot()
             config_wrapper = ConfigWrapper(config_node)
-            config_wrapper.substituteAllPredefinedConstantsWithLiterals()
+            config_wrapper.substituteAllPredefinedConstantsWithLiterals(options)
             # apply gpu override
             config_wrapper.initGPU(options)
             mc_tree, input_seq_map, og_candidates = parse_seqfile(options.seqFile, config_wrapper)
diff --git a/src/cactus/cactus_progressive_config.xml b/src/cactus/cactus_progressive_config.xml
index a777118eb..58a6d8c33 100644
--- a/src/cactus/cactus_progressive_config.xml
+++ b/src/cactus/cactus_progressive_config.xml
@@ -57,6 +57,9 @@
 	<!-- trimOutgroups Remove outgroup sequences that don't have an alignment to an ingroup sequence-->
 	<!-- outputSecondaryAlignments Include secondary alignments in the output. If included CAF will use these -->
 	<!-- dechunkBatchSize Parallelize paf_dechunks into batches of at most this size-->
+	<!-- pickIngroupPrimaryAlignmentsSeparatelyToOutgroups Separately make ingroups pick their primary alignment to
+	     other ingroups without outgroups, then get the outgroups to pick their primary alignment to the ingroups. If 0
+	     get every sequence to pick its primary alignment without regard to if the other sequence is an ingroup or outgroup -->
 	<blast chunkSize="30000000"
 		   overlapSize="10000"
 		   mapper="lastz"
@@ -77,10 +80,11 @@
 		   trimIngroups="1"
 		   trimOutgroups="1"
 		   trimMinSize="100"
-		   trimFlanking="10"
+		   trimFlanking="100"
 		   trimOutgroupFlanking="2000"
 		   outputSecondaryAlignments="0"
 		   dechunkBatchSize="1000"
+		   pickIngroupPrimaryAlignmentsSeparatelyToOutgroups="1"
 		   >
 
 		<!-- The following are parametrised to produce the same results as the default settings,
@@ -131,7 +135,7 @@
 	<!-- minimumBlockHomologySupport TODO-->
 	<!-- writeInputAlignmentsTo Debug option to write the alignment chains fed to CAF to the specified path. Off by default.-->
 	<caf
-		 deannealingRounds="2 32 512"
+		 deannealingRounds="2 32 256"
 		 trim="3"
 		 blockTrim="5"
 		 minimumBlockDegree="2"
@@ -165,7 +169,7 @@
 				three="1024"
 				four="512"
 				five="512"
-				default="512"
+				default="256"
 		/>
 	</caf>
 
diff --git a/src/cactus/maf/cactus_hal2chains.py b/src/cactus/maf/cactus_hal2chains.py
index 1c3c88840..a3a7c5fe7 100644
--- a/src/cactus/maf/cactus_hal2chains.py
+++ b/src/cactus/maf/cactus_hal2chains.py
@@ -102,7 +102,7 @@ def main():
             #load cactus config
             configNode = ET.parse(options.configFile).getroot()
             config = ConfigWrapper(configNode)
-            config.substituteAllPredefinedConstantsWithLiterals()
+            config.substituteAllPredefinedConstantsWithLiterals(options)
             
             logger.info("Importing {}".format(options.halFile))
             hal_id = toil.importFile(options.halFile)            
diff --git a/src/cactus/maf/cactus_hal2maf.py b/src/cactus/maf/cactus_hal2maf.py
index 54b07dcdd..331f89c2b 100644
--- a/src/cactus/maf/cactus_hal2maf.py
+++ b/src/cactus/maf/cactus_hal2maf.py
@@ -179,7 +179,7 @@ def main():
             #load cactus config
             configNode = ET.parse(options.configFile).getroot()
             config = ConfigWrapper(configNode)
-            config.substituteAllPredefinedConstantsWithLiterals()
+            config.substituteAllPredefinedConstantsWithLiterals(options)
             
             logger.info("Importing {}".format(options.halFile))
             hal_id = toil.importFile(options.halFile)
diff --git a/src/cactus/maf/cactus_maf2bigmaf.py b/src/cactus/maf/cactus_maf2bigmaf.py
index 04b06dfd2..1677dce9d 100644
--- a/src/cactus/maf/cactus_maf2bigmaf.py
+++ b/src/cactus/maf/cactus_maf2bigmaf.py
@@ -84,7 +84,7 @@ def main():
             #load cactus config
             configNode = ET.parse(options.configFile).getroot()
             config = ConfigWrapper(configNode)
-            config.substituteAllPredefinedConstantsWithLiterals()
+            config.substituteAllPredefinedConstantsWithLiterals(options)
             
             logger.info("Importing {}".format(options.mafFile))
             maf_id = toil.importFile(options.mafFile)
diff --git a/src/cactus/paf/local_alignment.py b/src/cactus/paf/local_alignment.py
index da8716048..876cd9ebe 100755
--- a/src/cactus/paf/local_alignment.py
+++ b/src/cactus/paf/local_alignment.py
@@ -129,7 +129,8 @@ def combine_chunks(job, chunked_alignment_files, batch_size):
                         outfile=alignment_file, outappend=True)
             job.fileStore.deleteGlobalFile(chunk) # Cleanup the old files
         return job.fileStore.writeGlobalFile(alignment_file)  # Return the final alignments file copied to the jobstore
-        
+
+
 def merge_combined_chunks(job, combined_chunks):
     # mege up and return some chunks, deleting them too
     output_path = job.fileStore.getLocalTempFile()
@@ -138,10 +139,10 @@ def merge_combined_chunks(job, combined_chunks):
             chunk_path = job.fileStore.readGlobalFile(chunk, mutable=True)
             with open(chunk_path, 'r') as chunk_file:
                 shutil.copyfileobj(chunk_file, output_file)
-            os.remove(chunk_path)
             job.fileStore.deleteGlobalFile(chunk)
     return job.fileStore.writeGlobalFile(output_path)
 
+
 def make_chunked_alignments(job, event_a, genome_a, event_b, genome_b, distance, params):
     lastz_params_node = params.find("blast")
     gpu = getOptionalAttrib(lastz_params_node, 'gpu', typeFn=int, default=0)
@@ -149,6 +150,7 @@ def make_chunked_alignments(job, event_a, genome_a, event_b, genome_b, distance,
         # wga-gpu has a 6G limit, so we always override
         lastz_params_node.attrib['chunkSize'] = '6000000000'
     lastz_cores = getOptionalAttrib(lastz_params_node, 'cpu', typeFn=int, default=None)
+    lastz_memory = getOptionalAttrib(lastz_params_node, 'lastz_memory', typeFn=int, default=None)
     
     def make_chunks(genome):
         output_chunks_dir = job.fileStore.getLocalTempDir()
@@ -171,16 +173,37 @@ def make_chunks(genome):
         for j, chunk_b in enumerate(chunks_b):
             mappers = { "lastz":run_lastz, "minimap2":run_minimap2}
             mappingFn = mappers[params.find("blast").attrib["mapper"]]
+            memory = lastz_memory if lastz_memory else max(200000000, 15*(chunk_a.size+chunk_b.size))
             chunked_alignment_files.append(job.addChildJobFn(mappingFn, '{}_{}'.format(event_a, i), chunk_a,
                                                              '{}_{}'.format(event_b, j), chunk_b, distance, params,
-                                                             cores=lastz_cores, disk=4*(chunk_a.size+chunk_b.size),
-                                                             memory=max(200000000, 15*(chunk_a.size+chunk_b.size)),
+                                                             cores=lastz_cores,
+                                                             disk=max(4*(chunk_a.size+chunk_b.size), memory),
+                                                             memory=memory,
                                                              accelerators=accelerators).rv())
 
     dechunk_batch_size = getOptionalAttrib(lastz_params_node, 'dechunkBatchSize', typeFn=int, default=1e9)
     return job.addFollowOnJobFn(combine_chunks, chunked_alignment_files, dechunk_batch_size).rv()  # Combine the chunked alignment files
 
 
+def invert_alignments(job, alignment_file):
+    """ Invert the pafs in the alignment_file """
+    alignment_file_local = job.fileStore.readGlobalFile(alignment_file)
+    inverted_alignment_file_local = job.fileStore.getLocalTempFile()  # Get a temporary file to store the alignments in
+    cactus_call(parameters=['paffy', 'invert', "-i", alignment_file_local], outfile=inverted_alignment_file_local, outappend=True,
+                job_memory=job.memory)
+    job.fileStore.deleteGlobalFile(alignment_file)
+    return job.fileStore.writeGlobalFile(inverted_alignment_file_local)
+
+
+def make_ingroup_to_outgroup_alignments_0(job, ingroup_event, outgroup_events, event_names_to_sequences, distances, params):
+    # Generate the alignments fle
+    alignment_file = job.addChildJobFn(make_ingroup_to_outgroup_alignments_1, ingroup_event, outgroup_events,
+                                            event_names_to_sequences, distances, params).rv()
+
+    # Invert the final alignment so that the query is the outgroup and the target is the ingroup
+    return job.addFollowOnJobFn(invert_alignments, alignment_file).rv()
+
+
 def make_ingroup_to_outgroup_alignments_1(job, ingroup_event, outgroup_events, event_names_to_sequences, distances, params):
     #  a job should never set its own follow-on, so we hang everything off root_job here to encapsulate
     root_job = Job()
@@ -265,8 +288,56 @@ def make_ingroup_to_outgroup_alignments_3(job, ingroup_event, ingroup_seq_file,
     return job.fileStore.writeGlobalFile(merged_alignments)
 
 
-def chain_alignments(job, alignment_files, alignment_names, reference_event_name, params):
-    # The following recapitulates the pipeline showing in paffy/tests/paf_pipeline_test.sh
+def merge_alignments(job, alignment_file1, alignment_file2):
+    """" Merge together two alignment files """
+    # Get a temporary directory to work in
+    work_dir = job.fileStore.getLocalTempDir()
+
+    # The merged alignment file
+    merged_alignments_file = os.path.join(work_dir, 'output_alignments.paf')
+
+    # Read the files to local disk and merge them together, don't bother to log
+    cactus_call(parameters=['cat',
+                            job.fileStore.readGlobalFile(alignment_file1, os.path.join(work_dir, 'alignment_1.paf')),
+                            job.fileStore.readGlobalFile(alignment_file2, os.path.join(work_dir, 'alignment_2.paf'))],
+                outfile=merged_alignments_file, job_memory=job.memory)
+
+    # Write the merged file back to the job store and return its path
+    return job.fileStore.writeGlobalFile(merged_alignments_file)
+
+
+def chain_alignments_splitting_ingroups_and_outgroups(job, ingroup_alignment_files, ingroup_alignment_names,
+                                                      outgroup_alignment_files, outgroup_alignment_names,
+                                                      reference_event_name, params):
+    """ Chains/tiles/etc. the ingroup alignments to each other so that every ingroup sequence has a primary alignment to
+    another ingroup sequence, and separately each outgroup sequence has a primary alignment to an ingroup."""
+
+    # Check we have the expected number of alignment files
+    assert len(ingroup_alignment_files) == len(ingroup_alignment_names)
+    assert len(outgroup_alignment_files) == len(outgroup_alignment_names)
+
+    # Chain and pick the primary alignments of the ingroups to each other
+    chained_ingroup_alignments = job.addChildJobFn(chain_alignments, ingroup_alignment_files,
+                                                   ingroup_alignment_names, reference_event_name, params).rv()
+
+    # Separately pick the primary of the outgroups to the ingroups. By setting include_inverted_alignments=False
+    # we only get outgroup-to-ingroup alignments and not imgroup-to-ouygroup alignments and therefore primary
+    # alignments along the outgroup sequences
+    chained_outgroup_alignments = job.addChildJobFn(chain_alignments, outgroup_alignment_files,
+                                                    outgroup_alignment_names, reference_event_name, params,
+                                                    include_inverted_alignments=False).rv()
+
+    # Calculate approximately total alignment file size
+    total_file_size = sum(alignment_file.size for alignment_file in ingroup_alignment_files + outgroup_alignment_files)
+
+    # Merge the resulting two alignment files into a single set of alignments
+    return job.addFollowOnJobFn(merge_alignments, chained_ingroup_alignments, chained_outgroup_alignments,
+                                disk=total_file_size*10).rv()
+
+
+def chain_alignments(job, alignment_files, alignment_names, reference_event_name, params,
+                     include_inverted_alignments=True):
+    """The following recapitulates the pipeline showing in paffy/tests/paf_pipeline_test.sh"""
 
     root_job = Job()
     job.addChild(root_job)
@@ -276,7 +347,8 @@ def chain_alignments(job, alignment_files, alignment_names, reference_event_name
     # do the chaining
     chained_alignment_files = []
     for alignment_file, alignment_name in zip(alignment_files, alignment_names):
-        chained_alignment_files.append(root_job.addChildJobFn(chain_one_alignment, alignment_file, alignment_name, params,
+        chained_alignment_files.append(root_job.addChildJobFn(chain_one_alignment, alignment_file, alignment_name,
+                                                              params, include_inverted_alignments,
                                                               disk=6*alignment_file.size,
                                                               memory=32*alignment_file.size).rv())
         
@@ -285,8 +357,11 @@ def chain_alignments(job, alignment_files, alignment_names, reference_event_name
                                      disk=6*sum([alignment_file.size for alignment_file in alignment_files]),
                                      memory=32*sum([alignment_file.size for alignment_file in alignment_files])).rv()
 
-def chain_one_alignment(job, alignment_file, alignment_name, params):
-    # run paffy chain on one PAF
+
+def chain_one_alignment(job, alignment_file, alignment_name, params, include_inverted_alignments):
+    """ run paffy chain on one PAF. include_inverted_alignemnts is a flag to control if we additionally include
+    the inverted paf records for chaining.
+    """
 
     work_dir = job.fileStore.getLocalTempDir()
     alignment_path = os.path.join(work_dir, alignment_name + '.paf')
@@ -296,13 +371,15 @@ def chain_one_alignment(job, alignment_file, alignment_name, params):
     # Copy the alignments from the job store
     job.fileStore.readGlobalFile(alignment_file, alignment_path)
 
-    # Get the forward and reverse versions of each alignment for symmetry with chaining
-    shutil.copyfile(alignment_path, alignment_inv_path)
-    cactus_call(parameters=['paffy', 'invert', "-i", alignment_path], outfile=alignment_inv_path, outappend=True,
-                job_memory=job.memory)
+    # Get the forward and reverse versions of each alignment for symmetry with chaining if include_inverted_alignments
+    # is set
+    if include_inverted_alignments:
+        shutil.copyfile(alignment_path, alignment_inv_path)
+        cactus_call(parameters=['paffy', 'invert', "-i", alignment_inv_path], outfile=alignment_path, outappend=True,
+                    job_memory=job.memory)
 
     # Now chain the alignments
-    cactus_call(parameters=['paffy', 'chain', "-i", alignment_inv_path,
+    cactus_call(parameters=['paffy', 'chain', "-i", alignment_path,
                             "--maxGapLength", params.find("blast").attrib["chainMaxGapLength"],
                             "--chainGapOpen", params.find("blast").attrib["chainGapOpen"],
                             "--chainGapExtend", params.find("blast").attrib["chainGapExtend"],
@@ -400,6 +477,7 @@ def sanitize_then_make_paf_alignments(job, event_tree_string, event_names_to_seq
     paf_job = sanitize_job.addFollowOnJobFn(make_paf_alignments, event_tree_string, sanitize_job.rv(), ancestor_event_string, params)
     return paf_job.rv()
 
+
 def make_paf_alignments(job, event_tree_string, event_names_to_sequences, ancestor_event_string, params):
     # a job should never set its own follow-on, so we hang everything off the root_job here to encapsulate
     root_job = Job()
@@ -441,11 +519,12 @@ def make_paf_alignments(job, event_tree_string, event_names_to_sequences, ancest
 
     # for each ingroup make alignments to the outgroups
     if int(params.find("blast").attrib["trimIngroups"]):  # Trim the ingroup sequences
-        outgroup_alignments = [root_job.addChildJobFn(make_ingroup_to_outgroup_alignments_1, ingroup, outgroup_events,
+        outgroup_alignments = [root_job.addChildJobFn(make_ingroup_to_outgroup_alignments_0, ingroup, outgroup_events,
                                                       dict(event_names_to_sequences), distances, params).rv()
                                 for ingroup in ingroup_events] if len(outgroup_events) > 0 else []
     else:
         outgroup_alignments = [root_job.addChildJobFn(make_chunked_alignments,
+                                                      # Ingroup will be the target, outgroup the query
                                                       ingroup.iD, event_names_to_sequences[ingroup.iD],
                                                       outgroup.iD, event_names_to_sequences[outgroup.iD],
                                                       distances[ingroup, outgroup], params,
@@ -454,10 +533,18 @@ def make_paf_alignments(job, event_tree_string, event_names_to_sequences, ancest
     # for better logs
     outgroup_alignment_names = ['{}-og_{}'.format(ancestor_event_string, i) for i in range(len(outgroup_alignments))]
 
-    # Now do the chaining
-    return root_job.addFollowOnJobFn(chain_alignments, ingroup_alignments + outgroup_alignments,
-                                     ingroup_alignment_names + outgroup_alignment_names,
-                                     ancestor_event_string, params).rv()
+    # Now do the chaining, either getting ingroup primary alignments separately to primary ingroup to outgroup
+    # alignments if the following option is true, otherwise get every sequence to pick its primary alignment without
+    # regard to whether the other sequence is an ingroup or an outgroup
+    if int(params.find("blast").attrib["pickIngroupPrimaryAlignmentsSeparatelyToOutgroups"]):
+        return root_job.addFollowOnJobFn(chain_alignments_splitting_ingroups_and_outgroups,
+                                         ingroup_alignments, ingroup_alignment_names,
+                                         outgroup_alignments, outgroup_alignment_names,
+                                         ancestor_event_string, params).rv()
+
+    return job.addFollowOnJobFn(chain_alignments, ingroup_alignments + outgroup_alignments,
+                                ingroup_alignment_names + outgroup_alignment_names,
+                                ancestor_event_string, params).rv()
 
 
 def trim_unaligned_sequences(job, sequences, alignments, params):
diff --git a/src/cactus/preprocessor/cactus_preprocessor.py b/src/cactus/preprocessor/cactus_preprocessor.py
index 82ab661df..0f73bd91f 100644
--- a/src/cactus/preprocessor/cactus_preprocessor.py
+++ b/src/cactus/preprocessor/cactus_preprocessor.py
@@ -43,11 +43,12 @@
 from cactus.preprocessor.dnabrnnMasking import DnabrnnMaskJob, loadDnaBrnnModel
 from cactus.preprocessor.cutHeaders import CutHeadersJob
 from cactus.preprocessor.fileMasking import maskJobOverride, FileMaskingJob
+from cactus.progressive.cactus_prepare import human2bytesN
 
 class PreprocessorOptions:
     def __init__(self, chunkSize, memory, cpu, check, proportionToSample, unmask,
                  preprocessJob, checkAssemblyHub=None, lastzOptions=None, minPeriod=None,
-                 gpu=0, dnabrnnOpts=None,
+                 gpu=0, lastz_memory=None, dnabrnnOpts=None,
                  dnabrnnAction=None, eventName=None, minLength=None,
                  cutBefore=None, cutBeforeOcc=None, cutAfter=None, inputBedID=None):
         self.chunkSize = chunkSize
@@ -64,6 +65,7 @@ def __init__(self, chunkSize, memory, cpu, check, proportionToSample, unmask,
         self.gpuLastzInterval = self.chunkSize
         if self.gpu:
             self.chunkSize = 0
+        self.lastz_memory= lastz_memory
         self.dnabrnnOpts = dnabrnnOpts
         self.dnabrnnAction = dnabrnnAction
         assert dnabrnnAction in ('softmask', 'hardmask', 'clip')
@@ -146,6 +148,7 @@ def getChunkedJobForCurrentStage(self, seqIDs, proportionSampled, inChunkID, chu
                                                   lastzOpts=self.prepOptions.lastzOptions,
                                                   gpu=self.prepOptions.gpu,
                                                   cpu=self.prepOptions.cpu,
+                                                  lastz_memory=self.prepOptions.lastz_memory,
                                                   gpuLastzInterval=self.prepOptions.gpuLastzInterval,
                                                   eventName='{}_{}'.format(self.prepOptions.eventName, chunk_i))
             return LastzRepeatMaskJob(repeatMaskOptions=repeatMaskOptions,
@@ -256,6 +259,7 @@ def run(self, fileStore):
                                               minPeriod = getOptionalAttrib(prepNode, "minPeriod", typeFn=int, default=0),
                                               checkAssemblyHub = getOptionalAttrib(prepNode, "checkAssemblyHub", typeFn=bool, default=False),
                                               gpu = getOptionalAttrib(prepNode, "gpu", typeFn=int, default=0),
+                                              lastz_memory = getOptionalAttrib(prepNode, "lastz_memory", typeFn=int, default=None),
                                               dnabrnnOpts = getOptionalAttrib(prepNode, "dna-brnnOpts", default=""),
                                               dnabrnnAction = getOptionalAttrib(prepNode, "action", typeFn=str, default="softmask"),
                                               eventName = getOptionalAttrib(prepNode, "eventName", typeFn=str, default=None),
@@ -341,7 +345,7 @@ def run(self, fileStore):
 def stageWorkflow(outputSequenceDir, configNode, inputSequences, toil, restart=False,
                   outputSequences = [], maskMode=None, maskAction=None,
                   maskFile=None, minLength=None, inputEventNames=None, brnnCores=None,
-                  gpu_override=False):
+                  gpu_override=False, options=None):
     #Replace any constants
     if not outputSequences:
         outputSequences = CactusPreprocessor.getOutputSequenceFiles(inputSequences, outputSequenceDir)
@@ -352,7 +356,7 @@ def stageWorkflow(outputSequenceDir, configNode, inputSequences, toil, restart=F
     loadDnaBrnnModel(toil, configNode, maskAlpha = maskMode == 'brnn')
         
     if configNode.find("constants") != None:
-        ConfigWrapper(configNode).substituteAllPredefinedConstantsWithLiterals()
+        ConfigWrapper(configNode).substituteAllPredefinedConstantsWithLiterals(options)
     if maskMode:
         lastz = maskMode == 'lastz'
         brnn = maskMode == 'brnn'
@@ -406,7 +410,7 @@ def runCactusPreprocessor(outputSequenceDir, configFile, inputSequences, toilDir
     toilOptions.logLevel = "INFO"
     toilOptions.disableCaching = True
     with Toil(toilOptions) as toil:
-        stageWorkflow(outputSequenceDir, ET.parse(options.configFile).getroot(), inputSequences, toil)
+        stageWorkflow(outputSequenceDir, ET.parse(options.configFile).getroot(), inputSequences, toil, options=toilOptions)
 
 def main():
     parser = ArgumentParser()
@@ -432,7 +436,11 @@ def main():
     parser.add_argument("--binariesMode", choices=["docker", "local", "singularity"],
                         help="The way to run the Cactus binaries", default=None)
     parser.add_argument("--gpu", nargs='?', const='all', default=None, help="toggle on GPU-enabled lastz, and specify number of GPUs (all available if no value provided)")
-    parser.add_argument("--lastzCores", type=int, default=None, help="Number of cores for each lastz job, only relevant when running with --gpu")    
+    parser.add_argument("--lastzCores", type=int, default=None, help="Number of cores for each lastz/segalign job, only relevant when running with --gpu")
+    parser.add_argument("--lastzMemory", type=human2bytesN,
+                        help="Memory in bytes for each lastz/segalign job (defaults to an estimate based on the input data size). "
+                        "Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes))", default=None)
+
     parser.add_argument("--pangenome", action="store_true", help='Do not mask. Just add Cactus-style unique prefixes and strip anything up to and including last #')
 
     options = parser.parse_args()
@@ -552,7 +560,8 @@ def main():
                       maskAction=options.maskAction,
                       minLength=options.minLength,
                       inputEventNames=inNames,
-                      brnnCores=options.brnnCores)
+                      brnnCores=options.brnnCores,
+                      options=options)
 
 if __name__ == '__main__':
     main()
diff --git a/src/cactus/preprocessor/lastzRepeatMasking/cactus_lastzRepeatMask.py b/src/cactus/preprocessor/lastzRepeatMasking/cactus_lastzRepeatMask.py
index a8ed28b5d..4d37e5633 100644
--- a/src/cactus/preprocessor/lastzRepeatMasking/cactus_lastzRepeatMask.py
+++ b/src/cactus/preprocessor/lastzRepeatMasking/cactus_lastzRepeatMask.py
@@ -25,6 +25,7 @@ def __init__(self,
             proportionSampled=1.0,
             gpu=0,
             cpu=None,
+            lastz_memory=None,
             gpuLastzInterval=3000000,
             eventName='seq'):
         self.fragment = fragment
@@ -35,6 +36,7 @@ def __init__(self,
         self.proportionSampled = proportionSampled
         self.gpu = gpu
         self.cpu = cpu
+        self.lastz_memory = lastz_memory
         self.gpuLastzInterval = gpuLastzInterval
         self.eventName = eventName
 
@@ -48,8 +50,13 @@ def __init__(self,
 class LastzRepeatMaskJob(RoundedJob):
     def __init__(self, repeatMaskOptions, queryID, targetIDs):
         targetsSize = sum(targetID.size for targetID in targetIDs)
-        memory = 4*1024*1024*1024
-        disk = 4*(queryID.size + targetsSize)
+        if repeatMaskOptions.lastz_memory:
+            memory = repeatMaskOptions.lastz_memory
+        elif repeatMaskOptions.gpu:
+            memory = min(40 * targetsSize, 512e9)
+        else:
+            memory = 4*1024*1024*1024
+        disk = max(4*(queryID.size + targetsSize), memory)
         cores = repeatMaskOptions.cpu
         accelerators = ['cuda:{}'.format(repeatMaskOptions.gpu)] if repeatMaskOptions.gpu else None            
         RoundedJob.__init__(self, memory=memory, disk=disk, cores=cores, accelerators=accelerators, preemptable=True)
diff --git a/src/cactus/progressive/cactus_prepare.py b/src/cactus/progressive/cactus_prepare.py
index dae86c615..267b7c569 100644
--- a/src/cactus/progressive/cactus_prepare.py
+++ b/src/cactus/progressive/cactus_prepare.py
@@ -25,7 +25,7 @@
 from cactus.progressive.progressive_decomposition import compute_outgroups, get_subtree, check_branch_lengths
 from cactus.shared.common import cactusRootPath
 from cactus.shared.common import enableDumpStack, setupBinaries
-from cactus.shared.common import getDockerImage, getDockerRelease
+from cactus.shared.common import getDockerImage
 from cactus.shared.configWrapper import ConfigWrapper
 from cactus.shared.version import cactus_commit
 from cactus.shared.common import findRequiredNode
@@ -746,7 +746,7 @@ def wdl_task_preprocess(options):
         s += '        gpuCount: {}\n'.format(options.gpu)
         s += '        bootDiskSizeGb: 20\n'
         s += '        nvidiaDriverVersion: \"{}\"\n'.format(options.nvidiaDriver)
-        s += '        docker: \"{}\"\n'.format(getDockerRelease(gpu=True))
+        s += '        docker: \"{}\"\n'.format(getDockerImage(gpu=True))
         s += '        zones: \"{}\"\n'.format(options.gpuZone)
     else:
         s += '        docker: \"{}\"\n'.format(options.dockerImage)
@@ -828,7 +828,7 @@ def wdl_task_blast(options):
         s += '        gpuCount: {}\n'.format(options.gpu)
         s += '        bootDiskSizeGb: 20\n'
         s += '        nvidiaDriverVersion: \"{}\"\n'.format(options.nvidiaDriver)
-        s += '        docker: \"{}\"\n'.format(getDockerRelease(gpu=True))
+        s += '        docker: \"{}\"\n'.format(getDockerImage(gpu=True))
         s += '        zones: \"{}\"\n'.format(options.gpuZone)
     else:
         s += '        docker: \"{}\"\n'.format(options.dockerImage)
diff --git a/src/cactus/progressive/cactus_progressive.py b/src/cactus/progressive/cactus_progressive.py
index dedfd6f69..0d39dba54 100755
--- a/src/cactus/progressive/cactus_progressive.py
+++ b/src/cactus/progressive/cactus_progressive.py
@@ -206,7 +206,7 @@ def progressive_step_2(job, trimmed_outgroups_and_alignments, options, config_no
 
 def export_hal(job, mc_tree, config_node, seq_id_map, og_map, results, event=None, cacheBytes=None,
                cacheMDC=None, cacheRDC=None, cacheW0=None, chunk=None, deflate=None, inMemory=False,
-               checkpointInfo=None, acyclicEvent=None, has_resources=False):
+               checkpointInfo=None, acyclicEvent=None, has_resources=False, memory_override=None):
 
     # todo: going through list nonsense because (i think) it helps with promises, should at least clean up
     work_dir = job.fileStore.getLocalTempDir()
@@ -268,8 +268,8 @@ def export_hal(job, mc_tree, config_node, seq_id_map, og_map, results, event=Non
         disk = 3 * sum([file_id.size for file_id in fa_file_ids + c2h_file_ids])
         mem = 5 * (max([file_id.size for file_id in fa_file_ids]) + max([file_id.size for file_id in c2h_file_ids]))
         # allows pass-through of memory override from --consMemory
-        if job.memory:
-            mem = job.memory
+        if memory_override:
+            mem = memory_override
         return job.addChildJobFn(export_hal, mc_tree, config_node, seq_id_map, og_map, results, event=event,
                                  cacheBytes=cacheBytes, cacheMDC=cacheMDC, cacheRDC=cacheRDC, cacheW0=cacheW0,
                                  chunk=chunk, deflate=deflate, inMemory=inMemory, checkpointInfo=checkpointInfo,
@@ -310,7 +310,7 @@ def progressive_workflow(job, options, config_node, mc_tree, og_map, input_seq_i
 
     # then do the hal export
     hal_export_job = progressive_job.addFollowOnJobFn(export_hal, mc_tree, config_node, seq_id_map, og_map,
-                                                      progressive_job.rv(), event=root_event, memory=options.consMemory)
+                                                      progressive_job.rv(), event=root_event, memory_override=options.consMemory)
 
     return hal_export_job.rv()
 
@@ -340,7 +340,10 @@ def main():
     parser.add_argument("--binariesMode", choices=["docker", "local", "singularity"],
                         help="The way to run the Cactus binaries", default=None)
     parser.add_argument("--gpu", nargs='?', const='all', default=None, help="toggle on GPU-enabled lastz, and specify number of GPUs (all available if no value provided)")
-    parser.add_argument("--lastzCores", type=int, default=None, help="Number of cores for each lastz job, only relevant when running with --gpu")    
+    parser.add_argument("--lastzCores", type=int, default=None, help="Number of cores for each lastz/segalign job, only relevant when running with --gpu")
+    parser.add_argument("--lastzMemory", type=human2bytesN,
+                        help="Memory in bytes for each lastz/segalign job (defaults to an estimate based on the input data size). "
+                        "Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes))", default=None)    
     parser.add_argument("--consCores", type=int, 
                         help="Number of cores for each cactus_consolidated job (defaults to all available / maxCores on single_machine)", default=None)
     parser.add_argument("--consMemory", type=human2bytesN,
@@ -398,7 +401,7 @@ def main():
             # load up the seqfile and figure out the outgroups and schedule
             config_node = ET.parse(options.configFile).getroot()
             config_wrapper = ConfigWrapper(config_node)
-            config_wrapper.substituteAllPredefinedConstantsWithLiterals()            
+            config_wrapper.substituteAllPredefinedConstantsWithLiterals(options)
             config_wrapper.setSystemMemory(options)
 
             # apply gpu override
diff --git a/src/cactus/progressive/outgroup.py b/src/cactus/progressive/outgroup.py
index 873ab61bc..a222c0a55 100644
--- a/src/cactus/progressive/outgroup.py
+++ b/src/cactus/progressive/outgroup.py
@@ -253,7 +253,7 @@ def main():
     # can't use progressive_decomposition.py for circular deps...
     config_node = ET.parse(options.configFile).getroot()
     config_wrapper = ConfigWrapper(config_node)
-    config_wrapper.substituteAllPredefinedConstantsWithLiterals()
+    config_wrapper.substituteAllPredefinedConstantsWithLiterals(options)
     seq_file = SeqFile(args[0])
     mc_tree = MultiCactusTree(seq_file.tree)
     mc_tree.nameUnlabeledInternalNodes(config_wrapper.getDefaultInternalNodePrefix())
diff --git a/src/cactus/refmap/cactus_graphmap.py b/src/cactus/refmap/cactus_graphmap.py
index 68447333e..077ba4884 100644
--- a/src/cactus/refmap/cactus_graphmap.py
+++ b/src/cactus/refmap/cactus_graphmap.py
@@ -111,7 +111,7 @@ def graph_map(options):
             paf_id, gfa_fa_id, gaf_id, unfiltered_paf_id, paf_filter_log = toil.restart()            
         else:
             # load up the seqfile and figure out the outgroups and schedule
-            config_wrapper.substituteAllPredefinedConstantsWithLiterals()
+            config_wrapper.substituteAllPredefinedConstantsWithLiterals(options)
             mc_tree, input_seq_map, og_candidates = parse_seqfile(options.seqFile, config_wrapper, pangenome=True)
             og_map = compute_outgroups(mc_tree, config_wrapper, set(og_candidates))
             event_set = get_event_set(mc_tree, config_wrapper, og_map, mc_tree.getRootName())
@@ -429,7 +429,7 @@ def extract_paf_from_gfa(job, gfa_id, gfa_path, ref_event, graph_event, ignore_p
     cactus_call(parameters=cmd, outfile=paf_path)
     return job.fileStore.writeGlobalFile(paf_path)
 
-def filter_paf(job, paf_id, config):
+def filter_paf(job, paf_id, config, reference=None):
     """ run basic paf-filtering.  these are quick filters that are best to do on-the-fly when reading the paf and 
         as such, they are called by cactus-graphmap-split and cactus-align, not here """
     work_dir = job.fileStore.getLocalTempDir()
@@ -444,6 +444,7 @@ def filter_paf(job, paf_id, config):
     with open(paf_path, 'r') as paf_file, open(filter_paf_path, 'w') as filter_paf_file:
         for line in paf_file:
             toks = line.split('\t')
+            is_ref = reference and toks[0].startswith('id={}|'.format(reference))
             mapq = int(toks[11])
             query_len = int(toks[1])
             ident = float(toks[9]) / (float(toks[10]) + 0.00000001)
@@ -456,7 +457,7 @@ def filter_paf(job, paf_id, config):
                 # we can also get the identity of the parent gaf block 
                 if tok.startswith('gi:i:'):
                     ident = min(ident, float(toks[5:]))
-            if mapq >= min_mapq and (bl is None or query_len <= min_block or bl >= min_block) and ident >= min_ident:
+            if is_ref or (mapq >= min_mapq and (bl is None or query_len <= min_block or bl >= min_block) and ident >= min_ident):
                 filter_paf_file.write(line)
 
     overlap_ratio = getOptionalAttrib(findRequiredNode(config.xmlRoot, "graphmap"), "PAFOverlapFilterRatio", typeFn=float, default=0)
diff --git a/src/cactus/refmap/cactus_graphmap_join.py b/src/cactus/refmap/cactus_graphmap_join.py
index a42724ce3..8a6b4e78d 100644
--- a/src/cactus/refmap/cactus_graphmap_join.py
+++ b/src/cactus/refmap/cactus_graphmap_join.py
@@ -320,7 +320,7 @@ def graphmap_join(options):
             #load cactus config
             configNode = ET.parse(options.configFile).getroot()
             config = ConfigWrapper(configNode)
-            config.substituteAllPredefinedConstantsWithLiterals()
+            config.substituteAllPredefinedConstantsWithLiterals(options)
                 
             # load up the vgs
             vg_ids = []
@@ -613,7 +613,7 @@ def clip_vg(job, options, config, vg_path, vg_id, phase):
         cactus_call(parameters=['vg', 'convert', '-W', '-f', vg_path], outfile=gfa_in_path, job_memory=job.memory)
         fix_cmd = ['gfaffix', gfa_in_path, '--output_refined', gfa_out_path, '--check_transformation']
         if options.reference:
-            fix_cmd += ['--dont_collapse', options.reference[0] + '*']
+            fix_cmd += ['--dont_collapse', options.reference[0] + '#[.]*']
         cactus_call(parameters=fix_cmd, job_memory=job.memory)
         # GFAffix strips the header, until this is fixed we need to add it back (for the RS tag)
         gfa_header = cactus_call(parameters=['head', '-1', gfa_in_path], check_output=True).strip()
diff --git a/src/cactus/refmap/cactus_graphmap_split.py b/src/cactus/refmap/cactus_graphmap_split.py
index f08f333c6..1ab36bf31 100644
--- a/src/cactus/refmap/cactus_graphmap_split.py
+++ b/src/cactus/refmap/cactus_graphmap_split.py
@@ -100,7 +100,7 @@ def cactus_graphmap_split(options):
         #load cactus config
         config_node = ET.parse(options.configFile).getroot()
         config = ConfigWrapper(config_node)
-        config.substituteAllPredefinedConstantsWithLiterals()
+        config.substituteAllPredefinedConstantsWithLiterals(options)
 
         #Run the workflow
         if options.restart:
@@ -225,7 +225,8 @@ def graphmap_split_workflow(job, options, config, seq_id_map, seq_name_map, gfa_
 
     # do some basic paf filtering
     paf_filter_mem = max(paf_id.size * 10, 2**32)
-    paf_filter_job = root_job.addFollowOnJobFn(filter_paf, paf_id, config, disk = paf_id.size * 10, memory=paf_filter_mem)
+    paf_filter_job = root_job.addFollowOnJobFn(filter_paf, paf_id, config, reference=options.reference,
+                                               disk = paf_id.size * 10, memory=paf_filter_mem)
     paf_id = paf_filter_job.rv()
     root_job = paf_filter_job
 
diff --git a/src/cactus/refmap/cactus_minigraph.py b/src/cactus/refmap/cactus_minigraph.py
index 60c8a7011..2039f04e1 100644
--- a/src/cactus/refmap/cactus_minigraph.py
+++ b/src/cactus/refmap/cactus_minigraph.py
@@ -24,12 +24,14 @@
 from cactus.shared.common import cactus_call
 from cactus.shared.common import getOptionalAttrib, findRequiredNode
 from cactus.shared.version import cactus_commit
+from cactus.progressive.cactus_prepare import human2bytesN
 from cactus.preprocessor.checkUniqueHeaders import sanitize_fasta_headers
 from toil.job import Job
 from toil.common import Toil
 from toil.statsAndLogging import logger
 from toil.statsAndLogging import set_logging_from_options
 from toil.realtimeLogger import RealtimeLogger
+from toil.lib.conversions import bytes2human
 from cactus.shared.common import cactus_cpu_count
 from cactus.progressive.multiCactusTree import MultiCactusTree
 from sonLib.bioio import getTempDirectory
@@ -43,7 +45,10 @@ def main():
     parser.add_argument("--reference", required=True, nargs='+', type=str,
                         help = "Reference genome name(s) (added to minigraph first). Mash distance to 1st reference to determine order of other genomes (use minigraphSortInput in the config xml to toggle this behavior).")
     parser.add_argument("--mgCores", type=int, help = "Number of cores for minigraph construction (defaults to the same as --maxCores).")
-    
+    parser.add_argument("--mgMemory", type=human2bytesN,
+                        help="Memory in bytes for the minigraph construction job (defaults to an estimate based on the input data size). "
+                        "Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes))", default=None)   
+        
     #Progressive Cactus Options
     parser.add_argument("--configFile", dest="configFile",
                         help="Specify cactus configuration file",
@@ -79,7 +84,7 @@ def main():
             # load up the seqfile
             config_node = ET.parse(options.configFile).getroot()
             config_wrapper = ConfigWrapper(config_node)
-            config_wrapper.substituteAllPredefinedConstantsWithLiterals()
+            config_wrapper.substituteAllPredefinedConstantsWithLiterals(options)
             graph_event = getOptionalAttrib(findRequiredNode(config_node, "graphmap"), "assemblyName", default="_MINIGRAPH_")
 
             # load the seqfile
@@ -241,6 +246,8 @@ def parse_mash_output(mash_output):
         catFiles(query_paths, cat_path)
         cat_mash_output = cactus_call(parameters=['mash', 'dist', cat_path, ref_sketch_path], check_output=True)
         cat_mash_dist = parse_mash_output(cat_mash_output)
+        # we want samples to stay together in the event of ties (which happen).  So add a a little bit to make unique
+        cat_mash_dist += float(sorted(seq_id_map.keys()).index(query_seqs[0])) * sys.float_info.epsilon
 
     # make the individual distance
     for query_seq, query_path in zip(query_seqs, query_paths):
@@ -293,8 +300,11 @@ def minigraph_construct(job, options, config_node, seq_id_map, seq_order, gfa_pa
         max_size = max([x.size for x in seq_id_map.values()])
         total_size = sum([x.size for x in seq_id_map.values()])
         disk = total_size * 2
-        mem = 128 * max_size + int(total_size / 4)
-        mem = max(mem, 2**30)
+        mem = 60 * max_size + int(total_size / 4)
+        mem = max(mem, 2**31)
+        if options.mgMemory is not None:
+            RealtimeLogger.info('Overriding minigraph_construct memory estimate of {} with {} value {} from --mgMemory'.format(bytes2human(mem), 'greater' if options.mgMemory > mem else 'lesser', bytes2human(options.mgMemory)))     
+            mem = options.mgMemory
         return job.addChildJobFn(minigraph_construct, options, config_node, seq_id_map, seq_order, gfa_path,
                                  has_resources=True, disk=disk, memory=mem, cores=options.mgCores).rv()
 
diff --git a/src/cactus/refmap/cactus_pangenome.py b/src/cactus/refmap/cactus_pangenome.py
index dbef90d82..4b15b26e5 100644
--- a/src/cactus/refmap/cactus_pangenome.py
+++ b/src/cactus/refmap/cactus_pangenome.py
@@ -57,6 +57,9 @@ def main():
 
     # cactus-minigraph options
     parser.add_argument("--mgCores", type=int, help = "Number of cores for minigraph construction (defaults to the same as --maxCores).")
+    parser.add_argument("--mgMemory", type=human2bytesN,
+                        help="Memory in bytes for the minigraph construction job (defaults to an estimate based on the input data size). "
+                        "Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes))", default=None)       
 
     # cactus-graphmap options
     parser.add_argument("--mapCores", type=int, help = "Number of cores for minigraph map.  Overrides graphmap cpu in configuration")
@@ -167,7 +170,7 @@ def main():
             # load up the seqfile
             config_node = ET.parse(options.configFile).getroot()
             config_wrapper = ConfigWrapper(config_node)
-            config_wrapper.substituteAllPredefinedConstantsWithLiterals()
+            config_wrapper.substituteAllPredefinedConstantsWithLiterals(options)
             graph_event = getOptionalAttrib(findRequiredNode(config_node, "graphmap"), "assemblyName", default="_MINIGRAPH_")
 
             # load the seqfile
diff --git a/src/cactus/refmap/cactus_refmap.py b/src/cactus/refmap/cactus_refmap.py
index 081cda301..f6f451bd3 100644
--- a/src/cactus/refmap/cactus_refmap.py
+++ b/src/cactus/refmap/cactus_refmap.py
@@ -295,7 +295,7 @@ def main():
         # load up the seqfile and figure out the outgroups and schedule
         config_node = ET.parse(options.configFile).getroot()
         config_wrapper = ConfigWrapper(config_node)
-        config_wrapper.substituteAllPredefinedConstantsWithLiterals()
+        config_wrapper.substituteAllPredefinedConstantsWithLiterals(options)
         mc_tree, input_seq_map, og_candidates = parse_seqfile(options.seqFile, config_wrapper)
         og_map = compute_outgroups(mc_tree, config_wrapper, set(og_candidates))
         event_set = get_event_set(mc_tree, config_wrapper, og_map, mc_tree.getRootName())
diff --git a/src/cactus/setup/cactus_align.py b/src/cactus/setup/cactus_align.py
index 28371a8d8..82a8978fb 100644
--- a/src/cactus/setup/cactus_align.py
+++ b/src/cactus/setup/cactus_align.py
@@ -248,7 +248,7 @@ def make_align_job(options, toil, config_wrapper=None, chrom_name=None):
     else:
         config_node = ET.parse(options.configFile).getroot()
         config_wrapper = ConfigWrapper(config_node)
-        config_wrapper.substituteAllPredefinedConstantsWithLiterals()
+        config_wrapper.substituteAllPredefinedConstantsWithLiterals(options)
         config_wrapper.initGPU(options)
     config_wrapper.setSystemMemory(options)
     
@@ -387,12 +387,12 @@ def cactus_align(job, config_wrapper, mc_tree, input_seq_map, input_seq_id_map,
     # run the hal export
     hal_job = cons_job.addFollowOnJobFn(export_hal, sub_tree, config_wrapper.xmlRoot, new_seq_id_map, og_map, results, event=root_name, inMemory=True,
                                         checkpointInfo=checkpointInfo, acyclicEvent=referenceEvents[0] if referenceEvents else None,
-                                        memory=cons_memory)
+                                        memory_override=cons_memory)
 
     # optionally create the VG
     if doVG or doGFA:
         vg_export_job = hal_job.addFollowOnJobFn(export_vg, hal_job.rv(), config_wrapper, doVG, doGFA, referenceEvents,
-                                                 checkpointInfo=checkpointInfo, memory=cons_memory)
+                                                 checkpointInfo=checkpointInfo, memory_override=cons_memory)
         vg_file_id, gfa_file_id = vg_export_job.rv(0), vg_export_job.rv(1)
     else:
         vg_file_id, gfa_file_id = None, None
@@ -400,7 +400,8 @@ def cactus_align(job, config_wrapper, mc_tree, input_seq_map, input_seq_id_map,
     return hal_job.rv(), vg_file_id, gfa_file_id
 
 
-def export_vg(job, hal_id, config_wrapper, doVG, doGFA, referenceEvents, checkpointInfo=None, resource_spec = False):
+def export_vg(job, hal_id, config_wrapper, doVG, doGFA, referenceEvents, checkpointInfo=None, resource_spec = False,
+              memory_override=None):
     """ use hal2vg to convert the HAL to vg format """
 
     if not resource_spec:
@@ -410,7 +411,7 @@ def export_vg(job, hal_id, config_wrapper, doVG, doGFA, referenceEvents, checkpo
                                  resource_spec = True,
                                  disk=hal_id.size * 3,
                                  # allow override with cons_memory
-                                 memory=hal_id.size * 60 if not job.memory else job.memory).rv()
+                                 memory=hal_id.size * 60 if not memory_override else memory_override).rv()
         
     work_dir = job.fileStore.getLocalTempDir()
     hal_path = os.path.join(work_dir, "out.hal")
diff --git a/src/cactus/shared/common.py b/src/cactus/shared/common.py
index 6afbdcf92..3d650ad1e 100644
--- a/src/cactus/shared/common.py
+++ b/src/cactus/shared/common.py
@@ -40,7 +40,7 @@
 from toil.common import Toil
 from toil.job import Job
 from toil.realtimeLogger import RealtimeLogger
-from toil.lib.humanize import bytes2human
+from toil.lib.humanize import bytes2human, human2bytes
 from toil.lib.threading import cpu_count
 from sonLib.bioio import popenCatch
 from sonLib.bioio import getTempDirectory
@@ -77,6 +77,26 @@ def cactus_override_toil_options(options):
         # lead to weird toil errors?
         # https://github.com/DataBiosphere/toil/issues/4218
         options.disableCaching = True
+
+    if 'CACTUS_INSIDE_CONTAINER' in os.environ and str(os.environ['CACTUS_INSIDE_CONTAINER']) == '1':
+        # some people get confused when trying to use their cluster from inside the cactus
+        # docker. it doesn't work (without tons of hackery) since slurm isnt in the image
+        if options.batchSystem.lower() not in ['single_machine', 'singleMachine']:
+            e = RuntimeError('"--batchSystem {}" is not supported from inside the Cactus Docker container. In order to use this batch system, you must run directly from a Python virutalenv on your head node. You can use --binariesMode docker to execute the various binaries using docker, but the cactus command itself must be run directly on your system where it will have access to your cluster environment (ex: sbatch).'.format(options.batchSystem))
+            raise e
+        if options.binariesMode in ['docker', 'singularity']:
+            e = RuntimeError('"--binariesMode {}" is not supported from inside the Cactus Docker container, which does not support nested containerization. So if you are running Cactus from inside a Docker or Singularity container, please do not use the --binariesMode option.'.format(options.binariesMode))
+            raise e
+
+    if options.binariesMode in ['docker', 'singularity']:
+        # jobs, even when seemingly not using docker, sometimes disappear on
+        # slurm when using non-local binaries. to date, the solution has been
+        # to boost memory. this override is to help apply this globally to all
+        # the little jobs
+        opt_default_mem = options.defaultMemory if options.defaultMemory else 0
+        if isinstance(opt_default_mem, str):
+            opt_default_mem = human2bytes(opt_default_mem)
+        options.defaultMemory = int(max(opt_default_mem, 2**32))
     
     if not options.realTimeLogging:
         # Too much valuable debugging information to pass up
@@ -268,20 +288,6 @@ def runGetChunks(sequenceFiles, chunksDir, chunkSize, overlapSize, work_dir=None
                                      "--dir", chunksDir] + sequenceFiles)
     return [chunk for chunk in chunks.split("\n") if chunk != ""]
 
-def pullCactusImage():
-    """Ensure that the cactus Docker image is pulled."""
-    if os.environ.get('CACTUS_DOCKER_MODE') == "0":
-        return
-    if os.environ.get('CACTUS_USE_LOCAL_IMAGE', 0) == "1":
-        return
-    image = getDockerImage()
-    call = ["docker", "pull", image]
-    process = subprocess.Popen(call, stdout=subprocess.PIPE,
-                                 stderr=sys.stderr, bufsize=-1)
-    output, _ = process.communicate()
-    if process.returncode != 0:
-        raise RuntimeError("Command %s failed with output: %s" % (call, output))
-
 def getDockerOrg():
     """Get where we should find the cactus containers."""
     if "CACTUS_DOCKER_ORG" in os.environ:
@@ -289,27 +295,22 @@ def getDockerOrg():
     else:
         return "quay.io/comparative-genomics-toolkit"
 
-def getDockerTag():
+def getDockerTag(gpu=False):
     """Get what docker tag we should use for the cactus image
-    (either forced to be latest or the current cactus commit)."""
+    Default: latest release (which is all there is on quay these days)
+    otherwise, either forced to be latest or the current cactus commit."""
     if 'CACTUS_DOCKER_TAG' in os.environ:
         return os.environ['CACTUS_DOCKER_TAG']
     elif 'CACTUS_USE_LATEST' in os.environ:
         return "latest"    
     else:
-        return cactus_commit
+        # must be manually kept current with each release        
+        return 'v2.6.13' + ('-gpu' if gpu else '')
 
-def getDockerImage():
+def getDockerImage(gpu=False):
     """Get fully specified Docker image name."""
     return "%s/cactus:%s" % (getDockerOrg(), getDockerTag())
 
-def getDockerRelease(gpu=False):
-    """Get the most recent docker release."""
-    r = "quay.io/comparative-genomics-toolkit/cactus:v2.6.8"
-    if gpu:
-        r += "-gpu"
-    return r
-
 def maxMemUsageOfContainer(containerInfo):
     """Return the max RSS usage (in bytes) of a container, or None if something failed."""
     if containerInfo['id'] is None:
@@ -435,13 +436,14 @@ def importSingularityImage(options):
             oldCWD = os.getcwd()
             os.chdir(os.path.dirname(imgPath))
             # --size is deprecated starting in 2.4, but is needed for 2.3 support. Keeping it in for now.
+            gpu = bool(options.gpu) if 'gpu' in options else False
             try:
                 subprocess.check_call(["singularity", "pull", "--size", "2000", "--name", os.path.basename(imgPath),
-                                       "docker://" + getDockerImage()], stderr=subprocess.PIPE)
+                                       "docker://" + getDockerImage(gpu=gpu)], stderr=subprocess.PIPE)
             except subprocess.CalledProcessError:
                 # Call failed, try without --size, required for singularity 3+
                 subprocess.check_call(["singularity", "pull", "--name", os.path.basename(imgPath),
-                                       "docker://" + getDockerImage()])
+                                       "docker://" + getDockerImage(gpu=gpu)])
             os.chdir(oldCWD)
         else:
             logger.info("Using pre-built singularity image: '{}'".format(imgPath))
@@ -498,7 +500,7 @@ def singularityCommand(tool=None,
 
         # hack to transform back to docker image
         if tool == 'cactus':
-            tool = getDockerImage()
+            tool = getDockerImage(gpu = bool(gpus))
         # not a url or local file? try it as a Docker specifier
         if not tool.startswith('/') and '://' not in tool:
             tool = 'docker://' + tool
@@ -600,7 +602,7 @@ def dockerCommand(tool=None,
     if rm:
         base_docker_call.append('--rm')
 
-    docker_tag = getDockerTag()
+    docker_tag = getDockerTag(gpu=bool(gpus))
     tool = "%s/%s:%s" % (dockstore, tool, docker_tag)
     call = base_docker_call + [tool] + parameters
     return call, containerInfo
diff --git a/src/cactus/shared/configWrapper.py b/src/cactus/shared/configWrapper.py
index b40299f39..b902e1dcc 100644
--- a/src/cactus/shared/configWrapper.py
+++ b/src/cactus/shared/configWrapper.py
@@ -150,9 +150,17 @@ def getDefaultMemory(self):
         constantsElem = self.xmlRoot.find("constants")
         return int(constantsElem.attrib["defaultMemory"])
 
-    def substituteAllPredefinedConstantsWithLiterals(self):
+    def substituteAllPredefinedConstantsWithLiterals(self, options):
         constants = findRequiredNode(self.xmlRoot, "constants")
         defines = constants.find("defines")
+        if options.binariesMode in ['docker', 'singularity']:
+            # hack to boost default memory to 4 Gigs when using docker            
+            for attrib_name in defines.attrib:
+                if attrib_name.endswith('Memory'):
+                    mem_val = int(defines.attrib[attrib_name])
+                    assert not options.defaultMemory or isinstance(options.defaultMemory, int)
+                    mem_val = max(mem_val, options.defaultMemory if options.defaultMemory else 2**32)
+                    defines.attrib[attrib_name] = str(mem_val)
         def replaceAllConstants(node, defines):
             for attrib in node.attrib:
                 if node.attrib[attrib] in defines.attrib:
@@ -232,7 +240,9 @@ def initGPU(self, options):
             findRequiredNode(self.xmlRoot, "blast").attrib["gpu"] = str(options.gpu)
             for node in self.xmlRoot.findall("preprocessor"):
                 if getOptionalAttrib(node, "preprocessJob") == "lastzRepeatMask":
-                    node.attrib["gpu"] = str(options.gpu)        
+                    node.attrib["gpu"] = str(options.gpu)
+            if 'latest' in options and options.latest:
+                raise RuntimeError('--latest cannot be used with --gpu')
             
         # we need to make sure that gpu is set to an integer (replacing 'all' with a count)
         def get_gpu_count():
@@ -272,13 +282,16 @@ def get_gpu_count():
                     raise RuntimeError('Invalid value for repeatmask gpu count, {}. Please specify a numeric value with --gpu'.format(lastz_gpu))
                 node.attrib["gpu"] = str(pp_gpu)
 
-        # segalign still can't contorl the number of cores it uses (!).  So we give all available on
-        # single machine.  
+        if 'lastzCores' not in options:
+            options.lastzCores = None
+        if 'lastzMemory' not in options:
+            options.lastzMemory = None            
+        lastz_cores = options.lastzCores if options.lastzCores else None
+            
         if getOptionalAttrib(findRequiredNode(self.xmlRoot, "blast"), 'gpu', typeFn=int, default=0):
-            if options.lastzCores:
-                lastz_cores = options.lastzCores
-            else:
-                # single machine: we give all the cores to segalign
+            if not lastz_cores:
+                # segalign still can't contorl the number of cores it uses (!).  So we give all available on
+                # single machine.  
                 if options.batchSystem.lower() in ['single_machine', 'singlemachine']:
                     if options.maxCores is not None:
                         lastz_cores = options.maxCores
@@ -286,10 +299,21 @@ def get_gpu_count():
                         lastz_cores = cactus_cpu_count()
                 else:
                     raise RuntimeError('--lastzCores must be used with --gpu on non-singlemachine batch systems')
+
+        # override blast cores and memory if specified
+        if lastz_cores:
             findRequiredNode(self.xmlRoot, "blast").attrib["cpu"] = str(lastz_cores)
-            for node in self.xmlRoot.findall("preprocessor"):
-                if getOptionalAttrib(node, "preprocessJob") == "lastzRepeatMask":
-                    node.attrib["cpu"] = str(lastz_cores)            
+        if options.lastzMemory:
+            findRequiredNode(self.xmlRoot, "blast").attrib["lastz_memory"] = str(options.lastzMemory)
+
+        # override preprocess-repeatmask cores and memory if specified
+        for node in self.xmlRoot.findall("preprocessor"):
+            if getOptionalAttrib(node, "preprocessJob") == "lastzRepeatMask":
+                if lastz_cores:
+                    node.attrib["cpu"] = str(lastz_cores)
+                if options.lastzMemory:
+                    # there is already a general "memory" option, but we need something lastz-specific
+                    node.attrib["lastz_memory"] = str(options.lastzMemory)
                     
         # make absolutely sure realign is never turned on with the gpu.  they don't work together because
         # realign requires small chunks, and segalign needs big chunks
diff --git a/submodules/hal b/submodules/hal
index afe2d3267..f2068109d 160000
--- a/submodules/hal
+++ b/submodules/hal
@@ -1 +1 @@
-Subproject commit afe2d3267a64e1ebe7bdbbbb6ed75e273c1f5083
+Subproject commit f2068109dd4b95fa50516f52aa1627686a346d31
diff --git a/test/evolverTest.py b/test/evolverTest.py
index 631345169..83ffe90c4 100644
--- a/test/evolverTest.py
+++ b/test/evolverTest.py
@@ -12,7 +12,7 @@ def setUp(self):
         self.tempDir = getTempDirectory(os.getcwd())
         unittest.TestCase.setUp(self)
         self.cromwell = False
-        
+
     def tearDown(self):
         unittest.TestCase.tearDown(self)
         if self.cromwell:
@@ -55,7 +55,7 @@ def _update_evolver_add_to_node(self, binariesMode, configFile = None, new_leaf=
         children = subprocess.check_output(['halStats', hal_path, '--children', new_root]).strip().split()
         children = [c.decode("utf-8") for c in children]
         genomes = children + [new_root]
-        
+
         # make the fasta files
         fa_paths = [os.path.join(self.tempDir, '{}.fa'.format(genome)) for genome in genomes]
         for genome, genome_path in zip(genomes, fa_paths):
@@ -69,7 +69,7 @@ def _update_evolver_add_to_node(self, binariesMode, configFile = None, new_leaf=
         assert new_child_line
         # make the seqfile
         # todo: we could fish branch lengths out of the tree, but it's not really relevant to the test
-        tree_string = '({}){};'.format(','.join(children + [new_leaf]), new_root)        
+        tree_string = '({}){};'.format(','.join(children + [new_leaf]), new_root)
         seq_file_path = os.path.join(self.tempDir, 'seqfile.{}'.format(new_root))
         with open(seq_file_path, 'w') as seq_file:
             seq_file.write(tree_string + '\n')
@@ -90,7 +90,7 @@ def _update_evolver_add_to_node(self, binariesMode, configFile = None, new_leaf=
                 cmd += ['--latest']
             sys.stderr.write('Running {}\n'.format(' '.format(cmd)))
             subprocess.check_call(cmd)
-            
+
             cmd = ['cactus-align', self._job_store(binariesMode), seq_file_path, out_paf, out_hal, '--includeRoot', '--root', new_root,
                    '--binariesMode', binariesMode, '--logInfo', '--realTimeLogging', '--workDir', self.tempDir]
             if configFile:
@@ -101,7 +101,7 @@ def _update_evolver_add_to_node(self, binariesMode, configFile = None, new_leaf=
             sys.stderr.write('Running {}\n'.format(' '.format(cmd)))
             subprocess.check_call(cmd)
         else:
-            cmd = ['cactus', self._job_store(binariesMode), seq_file_path, out_hal, 
+            cmd = ['cactus', self._job_store(binariesMode), seq_file_path, out_hal,
                    '--binariesMode', binariesMode, '--logInfo', '--realTimeLogging', '--workDir', self.tempDir]
             if configFile:
                 cmd += ['--configFile', configFile]
@@ -139,29 +139,29 @@ def genome_path(genome):
                 bottom_seq_file.write('{}\t{}\n'.format(genome, genome_path(genome)))
 
         # make the bottom hal
-        bottom_hal_path = os.path.join(self.tempDir, 'bottom.hal')            
+        bottom_hal_path = os.path.join(self.tempDir, 'bottom.hal')
         subprocess.check_call(['cactus', self._job_store(binariesMode), bottom_seq_path, bottom_hal_path, '--root', 'AncGorilla',
                                '--binariesMode', binariesMode, '--logInfo', '--realTimeLogging', '--workDir', self.tempDir])
 
         # dump the new ancestor
         with open(genome_path('AncGorilla'), 'w') as anc_file:
             subprocess.check_call(['hal2fasta', bottom_hal_path, 'AncGorilla'], stdout=anc_file)
-            
+
         # make the top seq_ile
         top_seq_path = os.path.join(self.tempDir, 'top.txt')
         with open(top_seq_path, 'w') as top_seq_file:
             top_seq_file.write('((simHuman_chr6:0.144018,AncGorilla:0.1)Anc1:0.020593,(simCow_chr6:0.18908,simDog_chr6:0.16303)Anc2:0.032898)Anc0;\n')
             for genome in ['simCow_chr6', 'simDog_chr6', 'simHuman_chr6', 'AncGorilla', 'Anc0', 'Anc1', 'Anc2']:
                 top_seq_file.write('{}\t{}\n'.format(genome, genome_path(genome)))
-        
+
         # make the top hal
         top_hal_path = os.path.join(self.tempDir, 'top.hal')
         subprocess.check_call(['cactus', self._job_store(binariesMode), top_seq_path, top_hal_path, '--root', 'Anc1',
                                '--binariesMode', binariesMode, '--logInfo', '--realTimeLogging', '--workDir', self.tempDir])
-        
+
         # update the hal
         subprocess.check_call(['halAddToBranch', hal_path, bottom_hal_path, top_hal_path, 'Anc1', 'AncGorilla', 'mr', 'simGorilla', '0.1', '0.2'])
-        
+
     def _run_evolver_in_docker(self, seqFile = './examples/evolverMammals.txt'):
 
         out_hal = self._out_hal('in_docker')
@@ -170,11 +170,11 @@ def _run_evolver_in_docker(self, seqFile = './examples/evolverMammals.txt'):
                'cactus /data/js /workdir/{} /data/{}'.format(seqFile, os.path.basename(out_hal))]
         sys.stderr.write('Running {}\n'.format(' '.format(cmd)))
         subprocess.check_call(' '.join(cmd), shell=True)
-               
+
     def _write_primates_seqfile(self, seq_file_path):
         """ create the primates seqfile at given path"""
         with open(seq_file_path, 'w') as seq_file:
-            seq_file.write('simHuman\thttps://raw.githubusercontent.com/UCSantaCruzComputationalGenomicsLab/cactusTestData/master/evolver/primates/loci1/simHuman.chr6\n')            
+            seq_file.write('simHuman\thttps://raw.githubusercontent.com/UCSantaCruzComputationalGenomicsLab/cactusTestData/master/evolver/primates/loci1/simHuman.chr6\n')
             seq_file.write('simChimp\thttps://raw.githubusercontent.com/UCSantaCruzComputationalGenomicsLab/cactusTestData/master/evolver/primates/loci1/simChimp.chr6\n')
             seq_file.write('simGorilla\thttps://raw.githubusercontent.com/UCSantaCruzComputationalGenomicsLab/cactusTestData/master/evolver/primates/loci1/simGorilla.chr6\n')
             seq_file.write('simOrang\thttps://raw.githubusercontent.com/UCSantaCruzComputationalGenomicsLab/cactusTestData/master/evolver/primates/loci1/simOrang.chr6\n')
@@ -220,7 +220,7 @@ def _run_evolver_decomposed_wdl(self, name):
         # hack to use docker to remove filetree in teardown, as cromwell
         # can leave root files hanging around there
         self.cromwell = True
-        
+
         out_seqfile = os.path.join(self.tempDir, 'evolverMammalsOut.txt')
         in_seqfile = './examples/evolverMammals.txt'
         out_wdl = os.path.join(self.tempDir, 'prepared.wdl')
@@ -310,13 +310,13 @@ def _run_evolver_primates_refmap(self, binariesMode):
         # todo: it'd be nice to have an interface for setting tag to something not latest or commit
         if binariesMode == 'docker':
             cactus_opts += ['--latest']
-        
+
         subprocess.check_call(['cactus-refmap', self._job_store(binariesMode), seq_file_path, "simHuman", cigar_path] + cactus_opts)
 
         # do the alignment
         subprocess.check_call(['cactus-align', self._job_store(binariesMode), seq_file_path, cigar_path, self._out_hal(binariesMode),
-                               '--pangenome'] + cactus_opts)            
-                
+                               '--pangenome'] + cactus_opts)
+
 
     def _run_evolver_primates_graphmap(self, binariesMode):
         """ primates star test but using graphmap pangenome pipeline
@@ -328,7 +328,7 @@ def _run_evolver_primates_graphmap(self, binariesMode):
 
         # use the same logic cactus does to get default config
         config_path = 'src/cactus/cactus_progressive_config.xml'
-        
+
         xml_root = ET.parse(config_path).getroot()
         bar_elem = xml_root.find("bar")
         bar_elem.attrib["partialOrderAlignment"] = "1"
@@ -345,7 +345,7 @@ def _run_evolver_primates_graphmap(self, binariesMode):
         subprocess.check_call(['cactus-prepare', seq_file_path, '--outDir', os.path.dirname(out_seq_file_path)])
 
         cactus_opts = ['--binariesMode', binariesMode, '--logInfo', '--realTimeLogging', '--workDir', self.tempDir, '--configFile', poa_config_path]
-        
+
         # preprocess with dna-brnn
         subprocess.check_call(['cactus-preprocess', self._job_store(binariesMode), seq_file_path, out_seq_file_path, '--maskMode', 'brnn'] + cactus_opts)
 
@@ -368,7 +368,7 @@ def _run_evolver_primates_graphmap(self, binariesMode):
         # todo: it'd be nice to have an interface for setting tag to something not latest or commit
         if binariesMode == 'docker':
             cactus_opts += ['--latest']
-        
+
         subprocess.check_call(['cactus-graphmap', self._job_store(binariesMode), out_seq_file_path, mg_path, paf_path,
                                '--outputFasta', fa_path, '--mapCores', '4'] + cactus_opts)
 
@@ -404,7 +404,7 @@ def _run_yeast_pangenome_step_by_step(self, binariesMode):
         # make a config that replaces stuff like SC4.chrI with id=SC4|chrI
         # also run dna brnn with softmasking
         orig_seq_file_path = './examples/yeastPangenome.txt'
-        seq_file_path = os.path.join(self.tempDir, 'pp', os.path.basename(orig_seq_file_path))        
+        seq_file_path = os.path.join(self.tempDir, 'pp', os.path.basename(orig_seq_file_path))
         subprocess.check_call(['cactus-prepare',  orig_seq_file_path, '--outDir', os.path.join(self.tempDir, 'pp'), '--seqFileOnly'])
         cactus_opts = ['--binariesMode', binariesMode, '--logInfo', '--realTimeLogging', '--workDir', self.tempDir, '--maxCores', '4']
         subprocess.check_call(['cactus-preprocess', self._job_store(binariesMode),
@@ -417,18 +417,18 @@ def _run_yeast_pangenome_step_by_step(self, binariesMode):
                 toks = line.strip().split()
                 if len(toks) == 2:
                     events.append(toks[0])
-        
+
         # build the minigraph
         mg_path = os.path.join(self.tempDir, 'yeast.sv.gfa.gz')
         mg_cmd = ['cactus-minigraph', self._job_store(binariesMode), seq_file_path, mg_path, '--reference', 'S288C'] + cactus_opts
         subprocess.check_call(mg_cmd)
-                
+
         # run graphmap in base mode
         paf_path = os.path.join(self.tempDir, 'yeast.paf')
-        fa_path = os.path.join(self.tempDir, 'yeast.sv.gfa.fa')        
+        fa_path = os.path.join(self.tempDir, 'yeast.sv.gfa.fa')
         subprocess.check_call(['cactus-graphmap', self._job_store(binariesMode), seq_file_path, mg_path, paf_path,
                                '--outputFasta', fa_path, '--delFilter', '2000000'] + cactus_opts + ['--mapCores', '1'])
-            
+
         # split into chromosomes
         chroms = ['chrI', 'chrII', 'chrIII', 'chrIV', 'chrV', 'chrVI', 'chrVII', 'chrVIII', 'chrIX', 'chrX', 'chrXI', 'chrXIV', 'chrXV']
         gm_out_dir = os.path.join(self.tempDir, 'chroms')
@@ -451,7 +451,7 @@ def _run_yeast_pangenome_step_by_step(self, binariesMode):
         # join up the graphs and index for giraffe
         join_path = os.path.join(self.tempDir, 'join')
         subprocess.check_call(['cactus-graphmap-join', self._job_store(binariesMode), '--outDir', join_path, '--outName', 'yeast',
-                               '--reference', 'S288C', '--vg'] +  vg_files + ['--hal'] + hal_files + 
+                               '--reference', 'S288C', '--vg'] +  vg_files + ['--hal'] + hal_files +
                                ['--vcf', '--giraffe', 'clip', 'filter'] + cactus_opts + ['--indexCores', '4'])
 
     def _run_yeast_pangenome(self, binariesMode):
@@ -466,13 +466,13 @@ def _run_yeast_pangenome(self, binariesMode):
 
         chroms = ['chrI', 'chrII', 'chrIII', 'chrIV', 'chrV', 'chrVI', 'chrVII', 'chrVIII', 'chrIX', 'chrX', 'chrXI', 'chrXIV', 'chrXV']
         cactus_pangenome_cmd = ['cactus-pangenome', self._job_store(binariesMode), orig_seq_file_path, '--outDir', join_path, '--outName', 'yeast',
-                                '--refContigs'] + chroms + ['--reference', 'S288C', 'DBVPG6044', '--vcf', '--vcfReference','DBVPG6044', 'S288C', 
+                                '--refContigs'] + chroms + ['--reference', 'S288C', 'DBVPG6044', '--vcf', '--vcfReference','DBVPG6044', 'S288C',
                                                             '--giraffe', 'clip', 'filter',  '--chrom-vg', 'clip', 'filter',
                                                             '--viz', '--chrom-og', 'clip', 'full', '--odgi', '--haplo', 'clip', '--rgfa',
                                                             '--indexCores', '4', '--consCores', '2']
         subprocess.check_call(cactus_pangenome_cmd + cactus_opts)
 
-        #compatibility with older test        
+        #compatibility with older test
         subprocess.check_call(['mkdir', '-p', os.path.join(self.tempDir, 'chroms')])
         subprocess.check_call(['mv', os.path.join(join_path, 'chrom-subproblems', 'contig_sizes.tsv'), os.path.join(self.tempDir, 'chroms')])
 
@@ -535,7 +535,7 @@ def _check_yeast_pangenome(self, binariesMode, other_ref=None, expect_odgi=False
         for giraffe_idx in ['yeast.gbz', 'yeast.dist', 'yeast.min', 'yeast.d2.gbz', 'yeast.d2.dist', 'yeast.d2.min']:
             idx_bytes = os.path.getsize(os.path.join(join_path, giraffe_idx))
             self.assertGreaterEqual(idx_bytes, 500000)
-            
+
         if expect_haplo:
             # make sure we have the haplo indexes:
             for haplo_idx in ['yeast.ri', 'yeast.hapl']:
@@ -576,13 +576,13 @@ def _check_yeast_pangenome(self, binariesMode, other_ref=None, expect_odgi=False
             self.assertEqual(len(sizes), 10)
             self.assertGreaterEqual(int(sizes[9]), 200000)
 
-        # make sure the gbz stats are sane        
+        # make sure the gbz stats are sane
         clip_degree_dist = subprocess.check_output(['vg', 'stats', '-D', os.path.join(join_path, 'yeast.gbz')]).strip().decode('utf-8')
-        clip_degree_dist = len(clip_degree_dist.strip().split('\n'))        
+        clip_degree_dist = len(clip_degree_dist.strip().split('\n'))
         self.assertGreaterEqual(clip_degree_dist, 6)
 
         filter_degree_dist = subprocess.check_output(['vg', 'stats', '-D', os.path.join(join_path, 'yeast.d2.gbz')]).strip().decode('utf-8')
-        filter_degree_dist = len(filter_degree_dist.strip().split('\n'))        
+        filter_degree_dist = len(filter_degree_dist.strip().split('\n'))
         self.assertGreaterEqual(filter_degree_dist, 6)
         self.assertLess(filter_degree_dist, clip_degree_dist)
 
@@ -615,8 +615,8 @@ def _check_yeast_pangenome(self, binariesMode, other_ref=None, expect_odgi=False
                 self.assertGreaterEqual(os.path.getsize(os.path.join(join_path, 'yeast.viz', chr + '.full.viz.png')), 700)
                 self.assertGreaterEqual(os.path.getsize(os.path.join(join_path, 'yeast.chroms', chr + '.full.og')), 3000000)
                 self.assertGreaterEqual(os.path.getsize(os.path.join(join_path, 'yeast.chroms', chr + '.og')), 3000000)
-        
-        
+
+
     def _csvstr_to_table(self, csvstr, header_fields):
         """ Hacky csv parse """
         output_stats = {}
@@ -736,7 +736,7 @@ def _check_maf_accuracy(self, halPath, delta, dataset="mammals"):
         # run it with --dupeMode consensus just to make sure it doesn't crash
         subprocess.check_call(['cactus-hal2maf', self._job_store('h2m'), halPath,  halPath + '.consensus.maf.gz', '--chunkSize', '10000', '--batchCount', '2',
                                '--refGenome', 'Anc0', '--dupeMode', 'consensus'], shell=False)
-        
+
         # cactus-hal2maf doesnt support --onlySequenceNames because the genome names are needed by taf_add_gap_bases
         # so we manually filter here
         for genome in subprocess.check_output(['halStats', halPath, '--genomes']).strip().decode('utf-8').split():
@@ -747,7 +747,7 @@ def _check_maf_accuracy(self, halPath, delta, dataset="mammals"):
         def parse_mafcomp_output(xml_path):
             xml_root = ET.parse(xml_path).getroot()
             comp_roots = xml_root.findall("homologyTests")
-            assert len(comp_roots) == 2            
+            assert len(comp_roots) == 2
             first, second = None, None
             for comp_root in comp_roots:
                 avg_val = float(comp_root.find("aggregateResults").find("all").attrib["average"])
@@ -773,7 +773,7 @@ def _check_valid_hal(self, halPath, expected_tree=None):
         hal_tree = subprocess.check_output(['halStats', halPath, '--tree']).strip().decode('utf-8')
         if expected_tree:
             self.assertEqual(hal_tree, expected_tree)
-            
+
     def testEvolverLocal(self):
         """ Check that the output of halStats on a hal file produced by running cactus with --binariesMode local is
         is reasonable
@@ -785,20 +785,20 @@ def testEvolverLocal(self):
         # check the output
         #self._check_stats(self._out_hal(name), delta_pct=0.25)
         #self._check_coverage(self._out_hal(name), delta_pct=0.20)
-        self._check_maf_accuracy(self._out_hal(name), delta=(0.05,0.14))
+        self._check_maf_accuracy(self._out_hal(name), delta=(0.05,0.13))
 
     def testEvolverUpdateNodeLocal(self):
         """ Check that the "add a genome to ancestor" modification steps give sane/valid results """
         name = "local"
         self._run_evolver(name)
-        
+
         self._update_evolver_add_to_node(name, new_leaf='simChimp', new_root='Anc1', step_by_step = True)
         self._update_evolver_add_to_node(name, new_leaf='simOrang', new_root='mr', step_by_step = False)
 
         self._check_valid_hal(self._out_hal(name),
                               expected_tree='((simHuman_chr6:0.144018,(simMouse_chr6:0.084509,simRat_chr6:0.091589,simOrang:1)mr:0.271974,simChimp:1)Anc1:0.020593,(simCow_chr6:0.18908,simDog_chr6:0.16303)Anc2:0.032898)Anc0;')
 
-        self._check_maf_accuracy(self._out_hal(name), delta=(0.1,0.14))
+        self._check_maf_accuracy(self._out_hal(name), delta=(0.1,0.13))
 
     def testEvolverUpdateBranchLocal(self):
         """ Check that the "add a genome to branch" modification steps give sane/valid results """
@@ -809,7 +809,7 @@ def testEvolverUpdateBranchLocal(self):
         self._check_valid_hal(self._out_hal(name),
                               expected_tree='((simHuman_chr6:0.144018,((simMouse_chr6:0.084509,simRat_chr6:0.091589)mr:0.171974,simGorilla:0.2)AncGorilla:0.1)Anc1:0.020593,(simCow_chr6:0.18908,simDog_chr6:0.16303)Anc2:0.032898)Anc0;')
 
-        self._check_maf_accuracy(self._out_hal(name), delta=(0.1,0.14))
+        self._check_maf_accuracy(self._out_hal(name), delta=(0.1,0.13))
 
 
     def testEvolverPrepareWDL(self):
@@ -820,7 +820,7 @@ def testEvolverPrepareWDL(self):
         # check the output
         #self._check_stats(self._out_hal("wdl"), delta_pct=0.25)
         #self._check_coverage(self._out_hal("wdl"), delta_pct=0.20)
-        self._check_maf_accuracy(self._out_hal("wdl"), delta=(0.05,0.14))
+        self._check_maf_accuracy(self._out_hal("wdl"), delta=(0.05,0.13))
 
     def testEvolverPrepareToil(self):
 
@@ -831,7 +831,7 @@ def testEvolverPrepareToil(self):
         # check the output
         #self._check_stats(self._out_hal(name), delta_pct=0.25)
         #self._check_coverage(self._out_hal(name), delta_pct=0.20)
-        self._check_maf_accuracy(self._out_hal(name), delta=(0.05,0.14))
+        self._check_maf_accuracy(self._out_hal(name), delta=(0.05,0.13))
 
     def testEvolverDecomposedLocal(self):
         """ Check that the output of halStats on a hal file produced by running cactus with --binariesMode local is
@@ -844,7 +844,7 @@ def testEvolverDecomposedLocal(self):
         # check the output
         #self._check_stats(self._out_hal(name), delta_pct=0.25)
         #self._check_coverage(self._out_hal(name), delta_pct=0.20)
-        self._check_maf_accuracy(self._out_hal(name), delta=(0.05,0.14))
+        self._check_maf_accuracy(self._out_hal(name), delta=(0.05,0.13))
 
     def testEvolverDecomposedDocker(self):
         """ Check that the output of halStats on a hal file produced by running cactus with --binariesMode docker is
@@ -857,8 +857,8 @@ def testEvolverDecomposedDocker(self):
         # check the output
         #self._check_stats(self._out_hal(name), delta_pct=0.25)
         #self._check_coverage(self._out_hal(name), delta_pct=0.20)
-        self._check_maf_accuracy(self._out_hal(name), delta=(0.05,0.14))
-        
+        self._check_maf_accuracy(self._out_hal(name), delta=(0.05,0.13))
+
     def testEvolverDocker(self):
         """ Check that the output of halStats on a hal file produced by running cactus with --binariesMode docker is
         is reasonable.  Note: the local image being tested should be set up via CACTUS_DOCKER_ORG (with tag==latest)
@@ -869,7 +869,7 @@ def testEvolverDocker(self):
         # check the output
         #self._check_stats(self._out_hal("docker"), delta_pct=0.25)
         #self._check_coverage(self._out_hal("docker"), delta_pct=0.20)
-        self._check_maf_accuracy(self._out_hal("docker"), delta=(0.05,0.14))
+        self._check_maf_accuracy(self._out_hal("docker"), delta=(0.05,0.13))
 
     def testEvolverInDocker(self):
         """ Check that the output of halStats on a hal file produced by running cactus in docker
@@ -881,8 +881,8 @@ def testEvolverInDocker(self):
         # check the output
         #self._check_stats(self._out_hal("docker"), delta_pct=0.25)
         #self._check_coverage(self._out_hal("docker"), delta_pct=0.20)
-        self._check_maf_accuracy(self._out_hal("in_docker"), delta=(0.05,0.14))
-        
+        self._check_maf_accuracy(self._out_hal("in_docker"), delta=(0.05,0.13))
+
     def testEvolverPrepareNoOutgroupDocker(self):
 
         # run cactus step by step via the plan made by cactus-prepare
@@ -907,7 +907,7 @@ def testEvolverPOALocal(self):
         """
         # use the same logic cactus does to get default config
         config_path = 'src/cactus/cactus_progressive_config.xml'
-        
+
         xml_root = ET.parse(config_path).getroot()
         bar_elem = xml_root.find("bar")
         bar_elem.attrib["partialOrderAlignment"] = "1"
@@ -953,13 +953,13 @@ def testEvolverPrimatesPangenomeLocal(self):
 
         # check the output
         # todo: tune config so that delta can be reduced
-        self._check_maf_accuracy(self._out_hal("local"), delta=(0.025,0.025), dataset='primates')        
-    
+        self._check_maf_accuracy(self._out_hal("local"), delta=(0.025,0.025), dataset='primates')
+
     def testYeastPangenomeStepByStepLocal(self):
         """ Run pangenome pipeline (including contig splitting!) on yeast dataset step by step"""
         name = "local"
         self._run_yeast_pangenome_step_by_step(name)
-        
+
         # check the output
         self._check_yeast_pangenome(name)
 
@@ -967,7 +967,7 @@ def testYeastPangenomeLocal(self):
         """ Run pangenome pipeline (including contig splitting!) on yeast dataset using cactus-pangenome """
         name = "local"
         self._run_yeast_pangenome(name)
-        
+
         # check the output
         self._check_yeast_pangenome(name, other_ref='DBVPG6044', expect_odgi=True, expect_haplo=True, expect_rgfa=True)