diff --git a/.dag.png b/.dag.png
index d8806e5..f3e4928 100644
Binary files a/.dag.png and b/.dag.png differ
diff --git a/.github/workflows/test_apptainer.yml b/.github/workflows/test_apptainer.yml
index d70f359..452af0d 100644
--- a/.github/workflows/test_apptainer.yml
+++ b/.github/workflows/test_apptainer.yml
@@ -26,7 +26,7 @@ jobs:
           environment-file: .test/environment_apptainer.yaml
       - name: Run test pipeline
         shell: bash -el {0}
-        run: snakemake --snakefile .test/Snakefile --configfile config/config.yaml .test/targets.yaml -c1 --sdm apptainer conda
+        run: snakemake --snakefile .test/Snakefile --configfile config/config.yaml .test/targets.yaml -c1 --sdm apptainer conda --keep-going --notemp
       - name: Pack logs
         if: success() || failure()
         shell: bash -el {0}
diff --git a/.github/workflows/test_v7.yml b/.github/workflows/test_v7.yml
index 93c6a4e..3416131 100644
--- a/.github/workflows/test_v7.yml
+++ b/.github/workflows/test_v7.yml
@@ -26,7 +26,7 @@ jobs:
           environment-file: .test/environment_v7.yaml
       - name: Run test pipeline
         shell: bash -el {0}
-        run: snakemake --snakefile .test/Snakefile --configfile config/config.yaml .test/targets.yaml --use-conda -c1 --conda-frontend conda
+        run: snakemake --snakefile .test/Snakefile --configfile config/config.yaml .test/targets.yaml --use-conda -c1 --conda-frontend conda --keep-going --notemp
       - name: Pack logs
         if: success() || failure()
         shell: bash -el {0}
diff --git a/.github/workflows/test_v8.yml b/.github/workflows/test_v8.yml
index 0043c36..8fd912d 100644
--- a/.github/workflows/test_v8.yml
+++ b/.github/workflows/test_v8.yml
@@ -26,7 +26,7 @@ jobs:
           environment-file: .test/environment_v8.yaml
       - name: Run test pipeline
         shell: bash -el {0}
-        run: snakemake --snakefile .test/Snakefile --configfile config/config.yaml .test/targets.yaml --use-conda -c1 --conda-frontend conda
+        run: snakemake --snakefile .test/Snakefile --configfile config/config.yaml .test/targets.yaml --use-conda -c1 --conda-frontend conda --keep-going --notemp
       - name: Pack logs
         if: success() || failure()
         shell: bash -el {0}
diff --git a/.github/workflows/test_v9.yml b/.github/workflows/test_v9.yml
index 1f5371a..a3e211c 100644
--- a/.github/workflows/test_v9.yml
+++ b/.github/workflows/test_v9.yml
@@ -26,7 +26,7 @@ jobs:
           environment-file: .test/environment_v9.yaml
       - name: Run test pipeline
         shell: bash -el {0}
-        run: snakemake --snakefile .test/Snakefile --configfile config/config.yaml .test/targets.yaml -c1 --sdm conda
+        run: snakemake --snakefile .test/Snakefile --configfile config/config.yaml .test/targets.yaml -c1 --sdm conda --keep-going --notemp
       - name: Pack logs
         if: success() || failure()
         shell: bash -el {0}
diff --git a/.rulegraph.png b/.rulegraph.png
index dc85134..d3bcf3c 100644
Binary files a/.rulegraph.png and b/.rulegraph.png differ
diff --git a/.rulegraph_sv.svg b/.rulegraph_sv.svg
new file mode 100644
index 0000000..d7ff0c7
--- /dev/null
+++ b/.rulegraph_sv.svg
@@ -0,0 +1 @@
+<svg baseProfile="full" height="100%" version="1.1" viewBox="25.0,5.0,925.0,1280.0" width="100%" xmlns="http://www.w3.org/2000/svg" xmlns:ev="http://www.w3.org/2001/xml-events" xmlns:xlink="http://www.w3.org/1999/xlink"><defs /><g><line stroke="lightgrey" stroke-dasharray="2" x1="470.0" x2="469.0" y1="15.0" y2="15.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="450.0" x2="469.0" y1="35.0" y2="35.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="430.0" x2="469.0" y1="55.0" y2="55.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="410.0" x2="469.0" y1="75.0" y2="75.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="390.0" x2="469.0" y1="95.0" y2="95.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="370.0" x2="469.0" y1="115.0" y2="115.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="350.0" x2="469.0" y1="135.0" y2="135.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="330.0" x2="469.0" y1="155.0" y2="155.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="470.0" x2="469.0" y1="175.0" y2="175.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="410.0" x2="469.0" y1="195.0" y2="195.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="310.0" x2="469.0" y1="215.0" y2="215.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="470.0" x2="469.0" y1="235.0" y2="235.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="290.0" x2="469.0" y1="255.0" y2="255.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="270.0" x2="469.0" y1="275.0" y2="275.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="250.0" x2="469.0" y1="295.0" y2="295.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="470.0" x2="469.0" y1="315.0" y2="315.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="290.0" x2="469.0" y1="335.0" y2="335.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="230.0" x2="469.0" y1="355.0" y2="355.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="270.0" x2="469.0" y1="375.0" y2="375.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="470.0" x2="469.0" y1="395.0" y2="395.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="210.0" x2="469.0" y1="415.0" y2="415.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="230.0" x2="469.0" y1="435.0" y2="435.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="470.0" x2="469.0" y1="455.0" y2="455.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="210.0" x2="469.0" y1="475.0" y2="475.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="230.0" x2="469.0" y1="495.0" y2="495.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="470.0" x2="469.0" y1="515.0" y2="515.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="370.0" x2="469.0" y1="535.0" y2="535.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="310.0" x2="469.0" y1="555.0" y2="555.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="190.0" x2="469.0" y1="575.0" y2="575.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="230.0" x2="469.0" y1="595.0" y2="595.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="370.0" x2="469.0" y1="615.0" y2="615.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="290.0" x2="469.0" y1="635.0" y2="635.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="170.0" x2="469.0" y1="655.0" y2="655.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="370.0" x2="469.0" y1="675.0" y2="675.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="190.0" x2="469.0" y1="695.0" y2="695.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="150.0" x2="469.0" y1="715.0" y2="715.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="150.0" x2="469.0" y1="735.0" y2="735.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="150.0" x2="469.0" y1="755.0" y2="755.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="150.0" x2="469.0" y1="775.0" y2="775.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="150.0" x2="469.0" y1="795.0" y2="795.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="150.0" x2="469.0" y1="815.0" y2="815.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="370.0" x2="469.0" y1="835.0" y2="835.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="130.0" x2="469.0" y1="855.0" y2="855.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="350.0" x2="469.0" y1="875.0" y2="875.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="110.0" x2="469.0" y1="895.0" y2="895.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="390.0" x2="469.0" y1="915.0" y2="915.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="310.0" x2="469.0" y1="935.0" y2="935.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="150.0" x2="469.0" y1="955.0" y2="955.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="430.0" x2="469.0" y1="975.0" y2="975.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="90.0" x2="469.0" y1="995.0" y2="995.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="70.0" x2="469.0" y1="1015.0" y2="1015.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="50.0" x2="469.0" y1="1035.0" y2="1035.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="350.0" x2="469.0" y1="1055.0" y2="1055.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="130.0" x2="469.0" y1="1075.0" y2="1075.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="110.0" x2="469.0" y1="1095.0" y2="1095.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="210.0" x2="469.0" y1="1115.0" y2="1115.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="310.0" x2="469.0" y1="1135.0" y2="1135.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="90.0" x2="469.0" y1="1155.0" y2="1155.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="210.0" x2="469.0" y1="1175.0" y2="1175.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="210.0" x2="469.0" y1="1195.0" y2="1195.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="410.0" x2="469.0" y1="1215.0" y2="1215.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="210.0" x2="469.0" y1="1235.0" y2="1235.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="250.0" x2="469.0" y1="1255.0" y2="1255.0" /><line stroke="lightgrey" stroke-dasharray="2" x1="410.0" x2="469.0" y1="1275.0" y2="1275.0" /></g><g><line stroke="#c65353" stroke-width="2.0" x1="455.0" x2="455.0" y1="15.0" y2="175.0" /><line stroke="#53c6c6" stroke-width="2.0" x1="395.0" x2="395.0" y1="75.0" y2="195.0" /><line stroke="#d9d98c" stroke-width="2.0" x1="455.0" x2="455.0" y1="175.0" y2="235.0" /><line stroke="#8c8cd9" stroke-width="2.0" x1="455.0" x2="455.0" y1="235.0" y2="315.0" /><line stroke="#d98cd9" stroke-width="2.0" x1="275.0" x2="275.0" y1="255.0" y2="335.0" /><line stroke="#d98c8c" stroke-width="2.0" x1="255.0" x2="255.0" y1="275.0" y2="375.0" /><line stroke="#999933" stroke-width="2.0" x1="455.0" x2="455.0" y1="315.0" y2="395.0" /><line stroke="#339999" stroke-width="2.0" x1="215.0" x2="215.0" y1="355.0" y2="435.0" /><line stroke="#993399" stroke-width="2.0" x1="455.0" x2="455.0" y1="395.0" y2="455.0" /><line stroke="#993333" stroke-width="2.0" x1="195.0" x2="195.0" y1="415.0" y2="475.0" /><line stroke="#c65353" stroke-width="2.0" x1="215.0" x2="215.0" y1="435.0" y2="495.0" /><line stroke="#c6c653" stroke-width="2.0" x1="455.0" x2="455.0" y1="455.0" y2="515.0" /><line stroke="#c653c6" stroke-width="2.0" x1="355.0" x2="355.0" y1="115.0" y2="535.0" /><line stroke="#8cd9d9" stroke-width="2.0" x1="295.0" x2="295.0" y1="215.0" y2="555.0" /><line stroke="#53c6c6" stroke-width="2.0" x1="215.0" x2="215.0" y1="495.0" y2="595.0" /><line stroke="#339933" stroke-width="2.0" x1="275.0" x2="275.0" y1="335.0" y2="580.0" /><line stroke="#c653c6" stroke-width="2.0" x1="355.0" x2="355.0" y1="535.0" y2="615.0" /><line stroke="#8cd98c" stroke-width="2.0" x1="355.0" x2="355.0" y1="615.0" y2="675.0" /><line stroke="#d98c8c" stroke-width="2.0" x1="175.0" x2="175.0" y1="575.0" y2="695.0" /><line stroke="#993333" stroke-width="2.0" x1="135.0" x2="135.0" y1="715.0" y2="735.0" /><line stroke="#999933" stroke-width="2.0" x1="135.0" x2="135.0" y1="735.0" y2="755.0" /><line stroke="#339933" stroke-width="2.0" x1="135.0" x2="135.0" y1="755.0" y2="775.0" /><line stroke="#339999" stroke-width="2.0" x1="135.0" x2="135.0" y1="775.0" y2="795.0" /><line stroke="#d98cd9" stroke-width="2.0" x1="355.0" x2="355.0" y1="675.0" y2="780.0" /><line stroke="#333399" stroke-width="2.0" x1="135.0" x2="135.0" y1="795.0" y2="815.0" /><line stroke="#c65353" stroke-width="2.0" x1="335.0" x2="335.0" y1="135.0" y2="875.0" /><line stroke="#5353c6" stroke-width="2.0" x1="375.0" x2="375.0" y1="95.0" y2="915.0" /><line stroke="#53c653" stroke-width="2.0" x1="415.0" x2="415.0" y1="55.0" y2="900.0" /><line stroke="#c65353" stroke-width="2.0" x1="295.0" x2="295.0" y1="555.0" y2="935.0" /><line stroke="#993399" stroke-width="2.0" x1="135.0" x2="135.0" y1="815.0" y2="955.0" /><line stroke="#c65353" stroke-width="2.0" x1="115.0" x2="115.0" y1="855.0" y2="1040.0" /><line stroke="#c6c653" stroke-width="2.0" x1="335.0" x2="335.0" y1="875.0" y2="1055.0" /><line stroke="#993333" stroke-width="2.0" x1="355.0" x2="355.0" y1="835.0" y2="1040.0" /><line stroke="#53c653" stroke-width="2.0" x1="95.0" x2="95.0" y1="895.0" y2="1095.0" /><line stroke="#53c653" stroke-width="2.0" x1="195.0" x2="195.0" y1="475.0" y2="1115.0" /><line stroke="#53c6c6" stroke-width="2.0" x1="375.0" x2="375.0" y1="915.0" y2="1100.0" /><line stroke="#5353c6" stroke-width="2.0" x1="295.0" x2="295.0" y1="935.0" y2="1135.0" /><line stroke="#8cd98c" stroke-width="2.0" x1="35.0" x2="35.0" y1="1035.0" y2="1140.0" /><line stroke="#d98c8c" stroke-width="2.0" x1="75.0" x2="75.0" y1="995.0" y2="1155.0" /><line stroke="#993333" stroke-width="2.0" x1="235.0" x2="235.0" y1="295.0" y2="1140.0" /><line stroke="#d98c8c" stroke-width="2.0" x1="315.0" x2="315.0" y1="155.0" y2="1140.0" /><line stroke="#d98c8c" stroke-width="2.0" x1="195.0" x2="195.0" y1="1115.0" y2="1175.0" /><line stroke="#339933" stroke-width="2.0" x1="195.0" x2="195.0" y1="1175.0" y2="1195.0" /><line stroke="#8cd98c" stroke-width="2.0" x1="395.0" x2="395.0" y1="195.0" y2="1215.0" /><line stroke="#339999" stroke-width="2.0" x1="195.0" x2="195.0" y1="1195.0" y2="1235.0" /><line stroke="#d9d98c" stroke-width="2.0" x1="55.0" x2="55.0" y1="1015.0" y2="1260.0" /><line stroke="#999933" stroke-width="2.0" x1="75.0" x2="75.0" y1="1155.0" y2="1260.0" /><line stroke="#d98cd9" stroke-width="2.0" x1="95.0" x2="95.0" y1="1095.0" y2="1260.0" /><line stroke="#8c8cd9" stroke-width="2.0" x1="115.0" x2="115.0" y1="1075.0" y2="1260.0" /><line stroke="#c653c6" stroke-width="2.0" x1="135.0" x2="135.0" y1="955.0" y2="1260.0" /><line stroke="#8c8cd9" stroke-width="2.0" x1="155.0" x2="155.0" y1="655.0" y2="1260.0" /><line stroke="#d98c8c" stroke-width="2.0" x1="175.0" x2="175.0" y1="695.0" y2="1260.0" /><line stroke="#993399" stroke-width="2.0" x1="195.0" x2="195.0" y1="1235.0" y2="1260.0" /><line stroke="#d9d98c" stroke-width="2.0" x1="215.0" x2="215.0" y1="595.0" y2="1260.0" /><line stroke="#993333" stroke-width="2.0" x1="235.0" x2="235.0" y1="1255.0" y2="1260.0" /><line stroke="#333399" stroke-width="2.0" x1="255.0" x2="255.0" y1="375.0" y2="1260.0" /><line stroke="#8cd9d9" stroke-width="2.0" x1="275.0" x2="275.0" y1="635.0" y2="1260.0" /><line stroke="#993333" stroke-width="2.0" x1="295.0" x2="295.0" y1="1135.0" y2="1260.0" /><line stroke="#8cd9d9" stroke-width="2.0" x1="335.0" x2="335.0" y1="1055.0" y2="1260.0" /><line stroke="#333399" stroke-width="2.0" x1="395.0" x2="395.0" y1="1215.0" y2="1275.0" /><line stroke="#c65353" stroke-width="2.0" x1="415.0" x2="415.0" y1="975.0" y2="1260.0" /><line stroke="#c6c653" stroke-width="2.0" x1="435.0" x2="435.0" y1="35.0" y2="1260.0" /><line stroke="#5353c6" stroke-width="2.0" x1="455.0" x2="455.0" y1="515.0" y2="1260.0" /></g><g><line stroke="white" stroke-width="6.0" x1="295.0" x2="320.0" y1="215.0" y2="215.0" /><line stroke="white" stroke-width="6.0" x1="275.0" x2="400.0" y1="255.0" y2="255.0" /><line stroke="white" stroke-width="6.0" x1="255.0" x2="380.0" y1="275.0" y2="275.0" /><line stroke="white" stroke-width="6.0" x1="235.0" x2="280.0" y1="295.0" y2="295.0" /><line stroke="white" stroke-width="6.0" x1="450.0" x2="455.0" y1="315.0" y2="315.0" /><line stroke="white" stroke-width="6.0" x1="275.0" x2="400.0" y1="335.0" y2="335.0" /><line stroke="white" stroke-width="6.0" x1="215.0" x2="240.0" y1="355.0" y2="355.0" /><line stroke="white" stroke-width="6.0" x1="195.0" x2="260.0" y1="415.0" y2="415.0" /><line stroke="white" stroke-width="6.0" x1="215.0" x2="400.0" y1="435.0" y2="435.0" /><line stroke="white" stroke-width="6.0" x1="215.0" x2="400.0" y1="495.0" y2="495.0" /><line stroke="white" stroke-width="6.0" x1="210.0" x2="355.0" y1="535.0" y2="535.0" /><line stroke="white" stroke-width="6.0" x1="210.0" x2="295.0" y1="555.0" y2="555.0" /><line stroke="white" stroke-width="6.0" x1="175.0" x2="260.0" y1="575.0" y2="575.0" /><line stroke="white" stroke-width="6.0" x1="215.0" x2="260.0" y1="595.0" y2="595.0" /><line stroke="white" stroke-width="6.0" x1="355.0" x2="360.0" y1="615.0" y2="615.0" /><line stroke="white" stroke-width="6.0" x1="190.0" x2="275.0" y1="635.0" y2="635.0" /><line stroke="white" stroke-width="6.0" x1="275.0" x2="380.0" y1="635.0" y2="635.0" /><line stroke="white" stroke-width="6.0" x1="155.0" x2="200.0" y1="655.0" y2="655.0" /><line stroke="white" stroke-width="6.0" x1="175.0" x2="260.0" y1="695.0" y2="695.0" /><line stroke="white" stroke-width="6.0" x1="135.0" x2="340.0" y1="715.0" y2="715.0" /><line stroke="white" stroke-width="6.0" x1="135.0" x2="340.0" y1="795.0" y2="795.0" /><line stroke="white" stroke-width="6.0" x1="150.0" x2="355.0" y1="835.0" y2="835.0" /><line stroke="white" stroke-width="6.0" x1="115.0" x2="120.0" y1="855.0" y2="855.0" /><line stroke="white" stroke-width="6.0" x1="150.0" x2="335.0" y1="875.0" y2="875.0" /><line stroke="white" stroke-width="6.0" x1="95.0" x2="120.0" y1="895.0" y2="895.0" /><line stroke="white" stroke-width="6.0" x1="150.0" x2="375.0" y1="915.0" y2="915.0" /><line stroke="white" stroke-width="6.0" x1="375.0" x2="400.0" y1="915.0" y2="915.0" /><line stroke="white" stroke-width="6.0" x1="150.0" x2="295.0" y1="935.0" y2="935.0" /><line stroke="white" stroke-width="6.0" x1="370.0" x2="415.0" y1="975.0" y2="975.0" /><line stroke="white" stroke-width="6.0" x1="75.0" x2="220.0" y1="995.0" y2="995.0" /><line stroke="white" stroke-width="6.0" x1="55.0" x2="320.0" y1="1015.0" y2="1015.0" /><line stroke="white" stroke-width="6.0" x1="35.0" x2="320.0" y1="1035.0" y2="1035.0" /><line stroke="white" stroke-width="6.0" x1="130.0" x2="335.0" y1="1055.0" y2="1055.0" /><line stroke="white" stroke-width="6.0" x1="335.0" x2="340.0" y1="1055.0" y2="1055.0" /><line stroke="white" stroke-width="6.0" x1="110.0" x2="115.0" y1="1075.0" y2="1075.0" /><line stroke="white" stroke-width="6.0" x1="195.0" x2="360.0" y1="1115.0" y2="1115.0" /><line stroke="white" stroke-width="6.0" x1="50.0" x2="75.0" y1="1155.0" y2="1155.0" /><line stroke="white" stroke-width="6.0" x1="75.0" x2="300.0" y1="1155.0" y2="1155.0" /><line stroke="white" stroke-width="6.0" x1="210.0" x2="395.0" y1="1215.0" y2="1215.0" /><line stroke="white" stroke-width="6.0" x1="210.0" x2="235.0" y1="1255.0" y2="1255.0" /><line stroke="white" stroke-width="6.0" x1="70.0" x2="395.0" y1="1275.0" y2="1275.0" /><line stroke="white" stroke-width="6.0" x1="395.0" x2="440.0" y1="1275.0" y2="1275.0" /></g><g><path d="M 335.0,200.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#c65353" stroke-width="2.0" /><line stroke="#c65353" stroke-width="2.0" x1="295.0" x2="320.0" y1="215.0" y2="215.0" /><path d="M 395.0,240.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#8cd98c" stroke-width="2.0" /><path d="M 415.0,240.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#53c653" stroke-width="2.0" /><line stroke="#53c653" stroke-width="2.0" x1="275.0" x2="400.0" y1="255.0" y2="255.0" /><path d="M 395.0,260.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#8cd98c" stroke-width="2.0" /><line stroke="#8cd98c" stroke-width="2.0" x1="255.0" x2="380.0" y1="275.0" y2="275.0" /><path d="M 295.0,280.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#8cd9d9" stroke-width="2.0" /><line stroke="#8cd9d9" stroke-width="2.0" x1="235.0" x2="280.0" y1="295.0" y2="295.0" /><path d="M 435.0,300.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#c6c653" stroke-width="2.0" /><line stroke="#c6c653" stroke-width="2.0" x1="450.0" x2="455.0" y1="315.0" y2="315.0" /><path d="M 375.0,320.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#5353c6" stroke-width="2.0" /><path d="M 415.0,320.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#53c653" stroke-width="2.0" /><line stroke="#53c653" stroke-width="2.0" x1="275.0" x2="400.0" y1="335.0" y2="335.0" /><path d="M 255.0,340.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#d98c8c" stroke-width="2.0" /><line stroke="#d98c8c" stroke-width="2.0" x1="215.0" x2="240.0" y1="355.0" y2="355.0" /><path d="M 275.0,400.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#339933" stroke-width="2.0" /><line stroke="#339933" stroke-width="2.0" x1="195.0" x2="260.0" y1="415.0" y2="415.0" /><path d="M 415.0,420.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#53c653" stroke-width="2.0" /><line stroke="#53c653" stroke-width="2.0" x1="215.0" x2="400.0" y1="435.0" y2="435.0" /><path d="M 375.0,480.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#5353c6" stroke-width="2.0" /><path d="M 415.0,480.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#53c653" stroke-width="2.0" /><line stroke="#53c653" stroke-width="2.0" x1="215.0" x2="400.0" y1="495.0" y2="495.0" /><path d="M 195.0,520.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#53c653" stroke-width="2.0" /><line stroke="#53c653" stroke-width="2.0" x1="210.0" x2="355.0" y1="535.0" y2="535.0" /><path d="M 195.0,540.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#53c653" stroke-width="2.0" /><line stroke="#53c653" stroke-width="2.0" x1="210.0" x2="295.0" y1="555.0" y2="555.0" /><path d="M 215.0,560.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#53c6c6" stroke-width="2.0" /><path d="M 275.0,560.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#339933" stroke-width="2.0" /><line stroke="#339933" stroke-width="2.0" x1="175.0" x2="260.0" y1="575.0" y2="575.0" /><path d="M 275.0,580.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#339933" stroke-width="2.0" /><line stroke="#339933" stroke-width="2.0" x1="215.0" x2="260.0" y1="595.0" y2="595.0" /><path d="M 375.0,600.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#5353c6" stroke-width="2.0" /><line stroke="#5353c6" stroke-width="2.0" x1="355.0" x2="360.0" y1="615.0" y2="615.0" /><path d="M 175.0,620.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#d98c8c" stroke-width="2.0" /><line stroke="#d98c8c" stroke-width="2.0" x1="190.0" x2="275.0" y1="635.0" y2="635.0" /><path d="M 395.0,620.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#8cd98c" stroke-width="2.0" /><line stroke="#8cd98c" stroke-width="2.0" x1="275.0" x2="380.0" y1="635.0" y2="635.0" /><path d="M 215.0,640.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#d9d98c" stroke-width="2.0" /><line stroke="#d9d98c" stroke-width="2.0" x1="155.0" x2="200.0" y1="655.0" y2="655.0" /><path d="M 275.0,680.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#8cd9d9" stroke-width="2.0" /><line stroke="#8cd9d9" stroke-width="2.0" x1="175.0" x2="260.0" y1="695.0" y2="695.0" /><path d="M 355.0,700.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#d98cd9" stroke-width="2.0" /><line stroke="#d98cd9" stroke-width="2.0" x1="135.0" x2="340.0" y1="715.0" y2="715.0" /><path d="M 355.0,780.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#d98cd9" stroke-width="2.0" /><line stroke="#d98cd9" stroke-width="2.0" x1="135.0" x2="340.0" y1="795.0" y2="795.0" /><path d="M 135.0,820.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#993399" stroke-width="2.0" /><line stroke="#993399" stroke-width="2.0" x1="150.0" x2="355.0" y1="835.0" y2="835.0" /><path d="M 135.0,840.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#993399" stroke-width="2.0" /><line stroke="#993399" stroke-width="2.0" x1="115.0" x2="120.0" y1="855.0" y2="855.0" /><path d="M 135.0,860.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#993399" stroke-width="2.0" /><line stroke="#993399" stroke-width="2.0" x1="150.0" x2="335.0" y1="875.0" y2="875.0" /><path d="M 135.0,880.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#993399" stroke-width="2.0" /><line stroke="#993399" stroke-width="2.0" x1="95.0" x2="120.0" y1="895.0" y2="895.0" /><path d="M 135.0,900.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#993399" stroke-width="2.0" /><path d="M 195.0,900.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#53c653" stroke-width="2.0" /><line stroke="#993399" stroke-width="2.0" x1="150.0" x2="375.0" y1="915.0" y2="915.0" /><path d="M 415.0,900.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#53c653" stroke-width="2.0" /><line stroke="#53c653" stroke-width="2.0" x1="375.0" x2="400.0" y1="915.0" y2="915.0" /><path d="M 135.0,920.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#993399" stroke-width="2.0" /><line stroke="#993399" stroke-width="2.0" x1="150.0" x2="295.0" y1="935.0" y2="935.0" /><path d="M 355.0,960.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#993333" stroke-width="2.0" /><line stroke="#993333" stroke-width="2.0" x1="370.0" x2="415.0" y1="975.0" y2="975.0" /><path d="M 115.0,980.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#c65353" stroke-width="2.0" /><path d="M 235.0,980.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#993333" stroke-width="2.0" /><line stroke="#993333" stroke-width="2.0" x1="75.0" x2="220.0" y1="995.0" y2="995.0" /><path d="M 115.0,1000.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#c65353" stroke-width="2.0" /><path d="M 235.0,1000.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#993333" stroke-width="2.0" /><path d="M 335.0,1000.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#c6c653" stroke-width="2.0" /><line stroke="#c6c653" stroke-width="2.0" x1="55.0" x2="320.0" y1="1015.0" y2="1015.0" /><path d="M 235.0,1020.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#993333" stroke-width="2.0" /><path d="M 335.0,1020.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#c6c653" stroke-width="2.0" /><line stroke="#c6c653" stroke-width="2.0" x1="35.0" x2="320.0" y1="1035.0" y2="1035.0" /><path d="M 115.0,1040.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#c65353" stroke-width="2.0" /><line stroke="#c65353" stroke-width="2.0" x1="130.0" x2="335.0" y1="1055.0" y2="1055.0" /><path d="M 355.0,1040.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#993333" stroke-width="2.0" /><line stroke="#993333" stroke-width="2.0" x1="335.0" x2="340.0" y1="1055.0" y2="1055.0" /><path d="M 95.0,1060.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#53c653" stroke-width="2.0" /><line stroke="#53c653" stroke-width="2.0" x1="110.0" x2="115.0" y1="1075.0" y2="1075.0" /><path d="M 375.0,1100.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#53c6c6" stroke-width="2.0" /><line stroke="#53c6c6" stroke-width="2.0" x1="195.0" x2="360.0" y1="1115.0" y2="1115.0" /><path d="M 35.0,1140.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#8cd98c" stroke-width="2.0" /><line stroke="#8cd98c" stroke-width="2.0" x1="50.0" x2="75.0" y1="1155.0" y2="1155.0" /><path d="M 235.0,1140.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#993333" stroke-width="2.0" /><path d="M 315.0,1140.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#d98c8c" stroke-width="2.0" /><line stroke="#d98c8c" stroke-width="2.0" x1="75.0" x2="300.0" y1="1155.0" y2="1155.0" /><path d="M 195.0,1200.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#339999" stroke-width="2.0" /><line stroke="#339999" stroke-width="2.0" x1="210.0" x2="395.0" y1="1215.0" y2="1215.0" /><path d="M 195.0,1240.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#993399" stroke-width="2.0" /><line stroke="#993399" stroke-width="2.0" x1="210.0" x2="235.0" y1="1255.0" y2="1255.0" /><path d="M 55.0,1260.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#d9d98c" stroke-width="2.0" /><path d="M 75.0,1260.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#999933" stroke-width="2.0" /><path d="M 95.0,1260.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#d98cd9" stroke-width="2.0" /><path d="M 115.0,1260.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#8c8cd9" stroke-width="2.0" /><path d="M 135.0,1260.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#c653c6" stroke-width="2.0" /><path d="M 155.0,1260.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#8c8cd9" stroke-width="2.0" /><path d="M 175.0,1260.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#d98c8c" stroke-width="2.0" /><path d="M 195.0,1260.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#993399" stroke-width="2.0" /><path d="M 215.0,1260.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#d9d98c" stroke-width="2.0" /><path d="M 235.0,1260.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#993333" stroke-width="2.0" /><path d="M 255.0,1260.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#333399" stroke-width="2.0" /><path d="M 275.0,1260.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#8cd9d9" stroke-width="2.0" /><path d="M 295.0,1260.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#993333" stroke-width="2.0" /><path d="M 335.0,1260.0 a15.0,15.0 0 0,0 15.0,15.0" fill="none" stroke="#8cd9d9" stroke-width="2.0" /><line stroke="#d9d98c" stroke-width="2.0" x1="70.0" x2="395.0" y1="1275.0" y2="1275.0" /><path d="M 415.0,1260.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#c65353" stroke-width="2.0" /><path d="M 435.0,1260.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#c6c653" stroke-width="2.0" /><path d="M 455.0,1260.0 a15.0,15.0 0 0,1 -15.0,15.0" fill="none" stroke="#5353c6" stroke-width="2.0" /><line stroke="#5353c6" stroke-width="2.0" x1="395.0" x2="440.0" y1="1275.0" y2="1275.0" /></g><g><circle cx="455.0" cy="15.0" fill="#c65353" id="Nread_bam_refs" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="15.0">read_bam_refs</text><circle cx="435.0" cy="35.0" fill="#c6c653" id="Ndemix_barcode_update" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="35.0">demix_barcode_update</text><circle cx="415.0" cy="55.0" fill="#53c653" id="Nfetch_alignment_reference" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="55.0">fetch_alignment_reference</text><circle cx="395.0" cy="75.0" fill="#53c6c6" id="Nrename_fastas" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="75.0">rename_fastas</text><circle cx="375.0" cy="95.0" fill="#5353c6" id="Nfetch_problematic_vcf" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="95.0">fetch_problematic_vcf</text><circle cx="355.0" cy="115.0" fill="#c653c6" id="Nfetch_alignment_annotation" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="115.0">fetch_alignment_annotation</text><circle cx="335.0" cy="135.0" fill="#c65353" id="Nfetch_reference_gb" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="135.0">fetch_reference_gb</text><circle cx="315.0" cy="155.0" fill="#d98c8c" id="Ncreate_empty_file" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="155.0">create_empty_file</text><circle cx="455.0" cy="175.0" fill="#d9d98c" id="Nfetch_mapping_references" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="175.0">fetch_mapping_references</text><circle cx="395.0" cy="195.0" fill="#8cd98c" id="Nconcat_fasta" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="195.0">concat_fasta</text><circle cx="295.0" cy="215.0" fill="#8cd9d9" id="Nfilter_genbank_features" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="215.0">filter_genbank_features</text><circle cx="455.0" cy="235.0" fill="#8c8cd9" id="Ndemix_preprocessing" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="235.0">demix_preprocessing</text><circle cx="275.0" cy="255.0" fill="#d98cd9" id="Nalign_fasta" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="255.0">align_fasta</text><circle cx="255.0" cy="275.0" fill="#d98c8c" id="Npangolin_report" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="275.0">pangolin_report</text><circle cx="235.0" cy="295.0" fill="#993333" id="Nextract_genbank_regions" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="295.0">extract_genbank_regions</text><circle cx="455.0" cy="315.0" fill="#999933" id="Ndemix" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="315.0">demix</text><circle cx="275.0" cy="335.0" fill="#339933" id="Nmask_alignment" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="335.0">mask_alignment</text><circle cx="215.0" cy="355.0" fill="#339999" id="Ndownload_context" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="355.0">download_context</text><circle cx="255.0" cy="375.0" fill="#333399" id="Nsummary_table" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="375.0">summary_table</text><circle cx="455.0" cy="395.0" fill="#993399" id="Nsummarise_demix" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="395.0">summarise_demix</text><circle cx="195.0" cy="415.0" fill="#993333" id="Nreconstruct_ancestral_sequence" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="415.0">reconstruct_ancestral_sequence</text><circle cx="215.0" cy="435.0" fill="#c65353" id="Nalign_context" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="435.0">align_context</text><circle cx="455.0" cy="455.0" fill="#c6c653" id="Ndemix_plot_data" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="455.0">demix_plot_data</text><circle cx="195.0" cy="475.0" fill="#53c653" id="Nancestor_fasta" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="475.0">ancestor_fasta</text><circle cx="215.0" cy="495.0" fill="#53c6c6" id="Nmask_context" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="495.0">mask_context</text><circle cx="455.0" cy="515.0" fill="#5353c6" id="Ndemix_plot" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="515.0">demix_plot</text><circle cx="355.0" cy="535.0" fill="#c653c6" id="Nsnps_to_ancestor" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="535.0">snps_to_ancestor</text><circle cx="295.0" cy="555.0" fill="#c65353" id="Nn_s_sites" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="555.0">n_s_sites</text><circle cx="175.0" cy="575.0" fill="#d98c8c" id="Nml_context_tree" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="575.0">ml_context_tree</text><circle cx="215.0" cy="595.0" fill="#d9d98c" id="Ndiversity_data" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="595.0">diversity_data</text><circle cx="355.0" cy="615.0" fill="#8cd98c" id="Nmask_tsv" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="615.0">mask_tsv</text><circle cx="275.0" cy="635.0" fill="#8cd9d9" id="Ncontext_phylogeny_data" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="635.0">context_phylogeny_data</text><circle cx="155.0" cy="655.0" fill="#8c8cd9" id="Ndiversity_plot" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="655.0">diversity_plot</text><circle cx="355.0" cy="675.0" fill="#d98cd9" id="Nfilter_tsv" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="675.0">filter_tsv</text><circle cx="175.0" cy="695.0" fill="#d98c8c" id="Ncontext_phylogeny_plot" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="695.0">context_phylogeny_plot</text><circle cx="135.0" cy="715.0" fill="#993333" id="Ntsv_to_vcf" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="715.0">tsv_to_vcf</text><circle cx="135.0" cy="735.0" fill="#999933" id="Nvariants_effect" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="735.0">variants_effect</text><circle cx="135.0" cy="755.0" fill="#339933" id="Nextract_vcf_fields" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="755.0">extract_vcf_fields</text><circle cx="135.0" cy="775.0" fill="#339999" id="Nformat_vcf_fields_longer" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="775.0">format_vcf_fields_longer</text><circle cx="135.0" cy="795.0" fill="#333399" id="Nmerge_annotation" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="795.0">merge_annotation</text><circle cx="135.0" cy="815.0" fill="#993399" id="Nconcat_variants" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="815.0">concat_variants</text><circle cx="355.0" cy="835.0" fill="#993333" id="Npolymorphic_sites_over_time_data" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="835.0">polymorphic_sites_over_time_data</text><circle cx="115.0" cy="855.0" fill="#c65353" id="Nnv_panel_data" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="855.0">nv_panel_data</text><circle cx="335.0" cy="875.0" fill="#c6c653" id="Nwindow_data" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="875.0">window_data</text><circle cx="95.0" cy="895.0" fill="#53c653" id="Naf_time_correlation_data" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="895.0">af_time_correlation_data</text><circle cx="375.0" cy="915.0" fill="#53c6c6" id="Nextract_afwdist_variants" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="915.0">extract_afwdist_variants</text><circle cx="295.0" cy="935.0" fill="#5353c6" id="Ncalculate_dnds" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="935.0">calculate_dnds</text><circle cx="135.0" cy="955.0" fill="#c653c6" id="Npairwise_trajectory_correlation" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="955.0">pairwise_trajectory_correlation</text><circle cx="415.0" cy="975.0" fill="#c65353" id="Npolymorphic_sites_over_time_plot" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="975.0">polymorphic_sites_over_time_plot</text><circle cx="75.0" cy="995.0" fill="#d98c8c" id="Nnv_panel_zoom_on_feature_data" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="995.0">nv_panel_zoom_on_feature_data</text><circle cx="55.0" cy="1015.0" fill="#d9d98c" id="Nnv_panel_plot" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="1015.0">nv_panel_plot</text><circle cx="35.0" cy="1035.0" fill="#8cd98c" id="Nwindow_zoom_on_feature_data" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="1035.0">window_zoom_on_feature_data</text><circle cx="335.0" cy="1055.0" fill="#8cd9d9" id="Nmerge_json_files" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="1055.0">merge_json_files</text><circle cx="115.0" cy="1075.0" fill="#8c8cd9" id="Naf_time_correlation_plot" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="1075.0">af_time_correlation_plot</text><circle cx="95.0" cy="1095.0" fill="#d98cd9" id="Naf_trajectory_panel_plot" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="1095.0">af_trajectory_panel_plot</text><circle cx="195.0" cy="1115.0" fill="#d98c8c" id="Nafwdist_weighted_distances" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="1115.0">afwdist_weighted_distances</text><circle cx="295.0" cy="1135.0" fill="#993333" id="Ndnds_plots" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="1135.0">dnds_plots</text><circle cx="75.0" cy="1155.0" fill="#999933" id="Nnv_panel_plot_S" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="1155.0">nv_panel_plot_S</text><circle cx="195.0" cy="1175.0" fill="#339933" id="Nformat_afwdist_results" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="1175.0">format_afwdist_results</text><circle cx="195.0" cy="1195.0" fill="#339999" id="Nallele_freq_tree_data" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="1195.0">allele_freq_tree_data</text><circle cx="395.0" cy="1215.0" fill="#333399" id="Nallele_freq_tree_plot" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="1215.0">allele_freq_tree_plot</text><circle cx="195.0" cy="1235.0" fill="#993399" id="Ntime_signal_data" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="1235.0">time_signal_data</text><circle cx="235.0" cy="1255.0" fill="#993333" id="Ntime_signal_plot" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="1255.0">time_signal_plot</text><circle cx="395.0" cy="1275.0" fill="#c65353" id="Nreport" r="6.0" stroke="white" stroke-width="2.0" /><text dominant-baseline="middle" font-family="sans-serif" font-size="inherit" x="475.0" y="1275.0">report</text></g></svg>
\ No newline at end of file
diff --git a/.test/Snakefile b/.test/Snakefile
index 66eaf84..d3ff278 100644
--- a/.test/Snakefile
+++ b/.test/Snakefile
@@ -8,7 +8,7 @@ import subprocess
 min_version("7.19")
 
 # Workflow version
-__version__ = "1.2.1"
+__version__ = "1.3.0"
 
 # Rules
 include: "../workflow/core.smk"
diff --git a/.test/targets.yaml b/.test/targets.yaml
index b9e4784..bac704f 100644
--- a/.test/targets.yaml
+++ b/.test/targets.yaml
@@ -23,11 +23,9 @@ PLOTS:
 PROBLEMATIC_VCF:
   "https://raw.githubusercontent.com/W-L/ProblematicSites_SARS-CoV2/da322c32004f7b16bdaa6a8ee7fd24d56e79d9dc/problematic_sites_sarsCov2.vcf"
 VC:
-  MAX_DEPTH: 0
   MIN_QUALITY: 0
-  IVAR_QUALITY: 0
-  IVAR_FREQ: 0.05
-  IVAR_DEPTH: 5
+  MIN_FREQ: 0.05
+  MIN_DEPTH: 5
 DEMIX:
   MIN_QUALITY: 0
   COV_CUTOFF: 0
@@ -40,7 +38,5 @@ PLOT_GENOME_REGIONS:
   "config/nsp_annotation.csv"
 REPORT_QMD:
   "template.qmd"
-FEATURES_JSON:
-  "config/sarscov2_features.json"
 GENETIC_CODE_JSON:
   "config/standard_genetic_code.json"
diff --git a/Dockerfile b/Dockerfile
index 7f39843..7f6ac6a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,12 +1,23 @@
 FROM condaforge/miniforge3:latest
 LABEL io.github.snakemake.containerized="true"
-LABEL io.github.snakemake.conda_env_hash="bd9b69b913e7b4e17d7d5ca7169a5815c3145ea775af87163f18c0b92abc1bf8"
+LABEL io.github.snakemake.conda_env_hash="7071b22b1161190c06be3ac061ab0019a1a8d3038532c9134e070a3875414ef5"
 
 # Step 2: Retrieve conda environments
 
+# Conda environment:
+#   source: workflow/envs/afwdist.yaml
+#   prefix: /conda-envs/9c24a867826615972cc288081976e7fc
+#   channels:
+#     - bioconda
+#     - conda-forge
+#   dependencies:
+#     - afwdist==1.0.0
+RUN mkdir -p /conda-envs/9c24a867826615972cc288081976e7fc
+COPY workflow/envs/afwdist.yaml /conda-envs/9c24a867826615972cc288081976e7fc/environment.yaml
+
 # Conda environment:
 #   source: workflow/envs/biopython.yaml
-#   prefix: /conda-envs/bd81c49fcb540d7706807c1683ba7200
+#   prefix: /conda-envs/162796cecea22d99c8702138f0c48e2f
 #   channels:
 #     - conda-forge
 #     - bioconda
@@ -14,12 +25,8 @@ LABEL io.github.snakemake.conda_env_hash="bd9b69b913e7b4e17d7d5ca7169a5815c3145e
 #     - python==3.10
 #     - biopython==1.81
 #     - pandas==2.0.3
-#     - pip==23.2.1
-#     - mafft==7.525
-#     - pip:
-#       - gb2seq==0.2.20
-RUN mkdir -p /conda-envs/bd81c49fcb540d7706807c1683ba7200
-COPY workflow/envs/biopython.yaml /conda-envs/bd81c49fcb540d7706807c1683ba7200/environment.yaml
+RUN mkdir -p /conda-envs/162796cecea22d99c8702138f0c48e2f
+COPY workflow/envs/biopython.yaml /conda-envs/162796cecea22d99c8702138f0c48e2f/environment.yaml
 
 # Conda environment:
 #   source: workflow/envs/fetch.yaml
@@ -35,14 +42,14 @@ COPY workflow/envs/fetch.yaml /conda-envs/9439457f932a4fbca3665c9ea1ac2f0a/envir
 
 # Conda environment:
 #   source: workflow/envs/freyja.yaml
-#   prefix: /conda-envs/ee7a2e1b4ec9a7a9999f34dddaea0605
+#   prefix: /conda-envs/bb4c5f3a509433cc08861582fab4a705
 #   channels:
 #     - conda-forge
 #     - bioconda
 #   dependencies:
-#     - freyja==1.4.2
-RUN mkdir -p /conda-envs/ee7a2e1b4ec9a7a9999f34dddaea0605
-COPY workflow/envs/freyja.yaml /conda-envs/ee7a2e1b4ec9a7a9999f34dddaea0605/environment.yaml
+#     - freyja==2.0.1
+RUN mkdir -p /conda-envs/bb4c5f3a509433cc08861582fab4a705
+COPY workflow/envs/freyja.yaml /conda-envs/bb4c5f3a509433cc08861582fab4a705/environment.yaml
 
 # Conda environment:
 #   source: workflow/envs/gisaidr.yaml
@@ -93,56 +100,52 @@ COPY workflow/envs/pangolin.yaml /conda-envs/fd645c541ee7a3d43fb9167441b77888/en
 
 # Conda environment:
 #   source: workflow/envs/quarto_render.yaml
-#   prefix: /conda-envs/f2a098519cf1f8c4cecb3c13f8c92883
+#   prefix: /conda-envs/96f3c1cec4b3ce5d72f708992272e9c1
 #   channels:
 #     - conda-forge
-#     - bioconda
 #   dependencies:
-#     - r-base==4.3.1
-#     - r-gt==0.9.0
-#     - quarto==1.3.450
-#     - r-jsonlite==1.8.5
+#     - r-base==4.5.2
+#     - r-gt==1.1.0
+#     - quarto==1.8.25
+#     - deno==2.3.1
 #     - r-tidyverse==2.0.0
-#     - r-quarto==1.2
-#     - r-heatmaply==1.4.2
-#     - r-readr==2.1.4
-RUN mkdir -p /conda-envs/f2a098519cf1f8c4cecb3c13f8c92883
-COPY workflow/envs/quarto_render.yaml /conda-envs/f2a098519cf1f8c4cecb3c13f8c92883/environment.yaml
+#     - r-heatmaply==1.6.0
+RUN mkdir -p /conda-envs/96f3c1cec4b3ce5d72f708992272e9c1
+COPY workflow/envs/quarto_render.yaml /conda-envs/96f3c1cec4b3ce5d72f708992272e9c1/environment.yaml
 
 # Conda environment:
 #   source: workflow/envs/renv.yaml
-#   prefix: /conda-envs/4b57bfc237ddc217c1f0b04d34dc06ef
+#   prefix: /conda-envs/8ad6cdcf265d30289788da99d5bf9fff
 #   channels:
 #     - conda-forge
 #     - bioconda
 #   dependencies:
-#     - r-base=4.1.3
+#     - r-base=4.3.2
 #     - r-tidyverse==2.0.0
 #     - r-ggrepel==0.9.3
-#     - r-stringi==1.7.12
 #     - r-ggpubr==0.6.0
-#     - bioconductor-ggtree==3.2.0
-#     - r-ape==5.7
+#     - bioconductor-ggtree==3.10.0
+#     - r-ape==5.8
 #     - r-adephylo==1.1_13
 #     - r-pegas==1.2
 #     - r-data.table==1.14.8
 #     - r-future.apply==1.11.0
-#     - r-scales==1.2.1
+#     - r-scales==1.3.0
 #     - r-showtext==0.9_6
-#     - r-jsonlite==1.8.5
 #     - r-logger==0.2.2
-RUN mkdir -p /conda-envs/4b57bfc237ddc217c1f0b04d34dc06ef
-COPY workflow/envs/renv.yaml /conda-envs/4b57bfc237ddc217c1f0b04d34dc06ef/environment.yaml
+RUN mkdir -p /conda-envs/8ad6cdcf265d30289788da99d5bf9fff
+COPY workflow/envs/renv.yaml /conda-envs/8ad6cdcf265d30289788da99d5bf9fff/environment.yaml
 
 # Conda environment:
 #   source: workflow/envs/snpeff.yaml
-#   prefix: /conda-envs/1934df0e4df02a7ee33c52f53f9e3c30
+#   prefix: /conda-envs/0adafb79cb1bec58ef4c77bf4cca4f95
 #   channels:
 #     - bioconda
 #   dependencies:
 #     - snpeff==5.1d
-RUN mkdir -p /conda-envs/1934df0e4df02a7ee33c52f53f9e3c30
-COPY workflow/envs/snpeff.yaml /conda-envs/1934df0e4df02a7ee33c52f53f9e3c30/environment.yaml
+#     - snpsift==5.1d
+RUN mkdir -p /conda-envs/0adafb79cb1bec58ef4c77bf4cca4f95
+COPY workflow/envs/snpeff.yaml /conda-envs/0adafb79cb1bec58ef4c77bf4cca4f95/environment.yaml
 
 # Conda environment:
 #   source: workflow/envs/var_calling.yaml
@@ -158,16 +161,17 @@ COPY workflow/envs/var_calling.yaml /conda-envs/5150d0f0a91d7f7a789a06f453d63479
 
 # Step 3: Generate conda environments
 
-RUN conda env create --prefix /conda-envs/bd81c49fcb540d7706807c1683ba7200 --file /conda-envs/bd81c49fcb540d7706807c1683ba7200/environment.yaml && \
+RUN conda env create --prefix /conda-envs/9c24a867826615972cc288081976e7fc --file /conda-envs/9c24a867826615972cc288081976e7fc/environment.yaml && \
+    conda env create --prefix /conda-envs/162796cecea22d99c8702138f0c48e2f --file /conda-envs/162796cecea22d99c8702138f0c48e2f/environment.yaml && \
     conda env create --prefix /conda-envs/9439457f932a4fbca3665c9ea1ac2f0a --file /conda-envs/9439457f932a4fbca3665c9ea1ac2f0a/environment.yaml && \
-    conda env create --prefix /conda-envs/ee7a2e1b4ec9a7a9999f34dddaea0605 --file /conda-envs/ee7a2e1b4ec9a7a9999f34dddaea0605/environment.yaml && \
+    conda env create --prefix /conda-envs/bb4c5f3a509433cc08861582fab4a705 --file /conda-envs/bb4c5f3a509433cc08861582fab4a705/environment.yaml && \
     conda env create --prefix /conda-envs/3fad3c9cdfa40bee9404f6a2e8fda69f --file /conda-envs/3fad3c9cdfa40bee9404f6a2e8fda69f/environment.yaml && \
     conda env create --prefix /conda-envs/0a608afb24723cb6fa8aef748f5efbc8 --file /conda-envs/0a608afb24723cb6fa8aef748f5efbc8/environment.yaml && \
     conda env create --prefix /conda-envs/04a3347f94ddf7e21c34bc49e5246076 --file /conda-envs/04a3347f94ddf7e21c34bc49e5246076/environment.yaml && \
     conda env create --prefix /conda-envs/fd645c541ee7a3d43fb9167441b77888 --file /conda-envs/fd645c541ee7a3d43fb9167441b77888/environment.yaml && \
-    conda env create --prefix /conda-envs/f2a098519cf1f8c4cecb3c13f8c92883 --file /conda-envs/f2a098519cf1f8c4cecb3c13f8c92883/environment.yaml && \
-    conda env create --prefix /conda-envs/4b57bfc237ddc217c1f0b04d34dc06ef --file /conda-envs/4b57bfc237ddc217c1f0b04d34dc06ef/environment.yaml && \
-    conda env create --prefix /conda-envs/1934df0e4df02a7ee33c52f53f9e3c30 --file /conda-envs/1934df0e4df02a7ee33c52f53f9e3c30/environment.yaml && \
+    conda env create --prefix /conda-envs/96f3c1cec4b3ce5d72f708992272e9c1 --file /conda-envs/96f3c1cec4b3ce5d72f708992272e9c1/environment.yaml && \
+    conda env create --prefix /conda-envs/8ad6cdcf265d30289788da99d5bf9fff --file /conda-envs/8ad6cdcf265d30289788da99d5bf9fff/environment.yaml && \
+    conda env create --prefix /conda-envs/0adafb79cb1bec58ef4c77bf4cca4f95 --file /conda-envs/0adafb79cb1bec58ef4c77bf4cca4f95/environment.yaml && \
     conda env create --prefix /conda-envs/5150d0f0a91d7f7a789a06f453d63479 --file /conda-envs/5150d0f0a91d7f7a789a06f453d63479/environment.yaml && \
     conda clean --all -y
 
diff --git a/config/README.md b/config/README.md
index 23e0367..f5cc413 100644
--- a/config/README.md
+++ b/config/README.md
@@ -12,16 +12,28 @@ For example, existing [profiles](https://snakemake.readthedocs.io/en/stable/gett
 are cross-compatible as well, but note that the `--use-conda` flag is deprecated starting with Snakemake 8.
 Instead, use `--software-deployment-method conda`.
 
+## tl;dr
+
+- **Needs**:
+  - Snakemake 7.19 or later.
+  - One FASTA per sample.
+  - One BAM per sample.
+  - One metadata CSV with columns `ID`, `CollectionDate` (YYYY-MM-DD), `ResidenceCity` and `GISAIDEPI` (can be empty).
+- **Setup**: edit [targets.yaml](/config/targets.yaml) (set `SAMPLES` and `METADATA`, at least) or build it using [`build_targets.py`](/build_targets.py). Leave `CONTEXT_FASTA: null` to auto-download from GISAID (needs `config/gisaid.yaml` with your username and password) or set a local FASTA path if download fails (see GISAID disclaimer).
+- **Run**: `snakemake (--use-conda | --sdm conda) -c4`.
+
 ## Inputs and outputs
 
 The workflow requires a set of FASTA files (one per target sample), a corresponding set of
 BAM files (also one per target sample), and a metadata table in CSV format with one row per
-sample. The metadata must include the following columns: unique sample identifier (default column `ID`,
-used to match sequencing files with metadata), the date the sample was collected (default `CollectionDate`),
-the location where the sample was collected (default `ResidenceCity`), and GISAID accession (default `GISAIDEPI`).
-The default column names but can be customized if needed via the workflow parameters.
+sample. The metadata must include at least the following columns:
 
-These parameters are set in two configuration files in YAML format:
+- `ID`: unique sample identifier, used to match sequencing files with metadata.
+- `CollectionDate`: the date the sample was collected (YYYY-MM-DD).
+- `ResidenceCity`: the location where the sample was collected.
+- `GISAIDEPI`: GISAID accession identifier (`EPI_ISL_...` or empty).
+
+The path to these input files is set in two configuration files in YAML format:
 [config.yaml](/config/config.yaml) (for general workflow settings) and
 [targets.yaml](/config/targets.yaml) (for specific dataset-related settings).
 The latter must be modified by the user to point the `SAMPLES` and `METADATA`
@@ -130,24 +142,50 @@ All of the following variables are pre-defined in [config.yaml](/config/config.y
 
 - `ALIGNMENT_REFERENCE`: NCBI accession number of the reference record for sequence alignment.
 - `PROBLEMATIC_VCF`: URL or path of a VCF file containing problematic genome positions for masking.
-- `FEATURES_JSON`: path of a JSON file containing name equivalences of genome features for data visualization.
 - `GENETIC_CODE_JSON`: path of a JSON file containing a genetic code for gene translation.
 - `TREE_MODEL`: substitution model used by IQTREE (see [docs](http://www.iqtree.org/doc/Substitution-Models)).
-- `UFBOOT_REPS`: ultrafast bootstrap replicates for IQTREE (see [UFBoot](https://doi.org/10.1093/molbev/msx281)).
-- `SHALRT_REPS`: Shimodaira–Hasegawa approximate likelihood ratio test bootstrap replicates for IQTREE (see [SH-aLRT](https://doi.org/10.1093/sysbio/syq010)).
+- `UFBOOT` & `SHALRT`: settings for ultrafast bootstrap (see [UFBoot](https://doi.org/10.1093/molbev/msx281))
+  and Shimodaira–Hasegawa approximate likelihood ratio test bootstrap
+  (see [SH-aLRT](https://doi.org/10.1093/sysbio/syq010)) in IQTREE runs:
+  - `REPS`: number of replicates.
+  - `THRESHOLD`: value cutoff for visualization.
 - `VC`: variant calling configuration:
-  - `MAX_DEPTH`: maximum depth at a position for `samtools mpileup` (option `-d`).
-  - `MIN_QUALITY`: minimum base quality for `samtools mpileup` (option `-Q`).
-  - `IVAR_QUALITY`: minimum base quality for `ivar variants` (option `-q`).
-  - `IVAR_FREQ`: minimum frequency threshold for `ivar variants` (option `-t`).
-  - `IVAR_DEPTH`: minimum read depth for `ivar variants` (option `-m`).
-- `DEMIX`: demixing configuration:
+  - `MIN_QUALITY`: minimum base quality for `ivar variants` (option `-q`).
+  - `MIN_FREQ`: minimum frequency threshold for `ivar variants` (option `-t`).
+  - `MIN_DEPTH`: minimum read depth for `ivar variants` (option `-m`).
+  - `MAX_DEPTH`: maximum read depth for `samtools mpileup` (option `-d`).
+- `DEMIX`: demixing configuration (uses [Freyja](https://github.com/andersen-lab/Freyja), see also [its docs](https://andersen-lab.github.io/Freyja/index.html)):
+  - `PATHOGEN`: pathogen of interest for `freyja update` (option `--pathogen`); must be 'SARS-CoV-2'.
   - `MIN_QUALITY`: minimum quality for `freyja variants` (option `--minq`).
-  - `COV_CUTOFF`: minimum depth for `freyja demix` (option `--covcut`).
+  - `MAX_DEPTH`: maximum read depth for `samtools mpileup` (option `-d`).
+  - `COV_CUTOFF`: minimum depth to calculate the reported "coverage" (percent of sites with that depth) for `freyja demix` (option `--covcut`).
   - `MIN_ABUNDANCE`: minimum lineage estimated abundance for `freyja demix` (option `--eps`).
+  - `CONFIRMED_ONLY`: exclude unconfirmed lineages in `freyja demix` (option `--confirmedonly`).
+  - `DEPTH_CUTOFF`: minimum depth on each site for `freyja demix` (option `--depthcutoff`).
+  - `RELAXED_MRCA`: assign clusters using relaxed (as opposed to strict) MRCA, used with `DEPTH_CUTOFF`, for `freyja demix` (option `--relaxedmrca`).
+  - `RELAXED_MRCA_THRESH`: `RELAXED_MRCA` threshold for `freyja demix` (option `--relaxedthresh`).
+  - `AUTO_ADAPT`: use error profile to set adaptive lasso penalty parameter for `freyja demix` (option `--autoadapt`).
 - `WINDOW`: sliding window of nucleotide variants per site configuration:
   - `WIDTH`: number of sites within windows.
   - `STEP`: number of sites between windows.
+- `GB_FEATURES`: optional mapping to filter which features from the GenBank file are used
+  by some analyses (e.g. rules `window` and `n_s_sites`).
+  If `GB_FEATURES` is empty or unset, all features are used.
+  Filtering is applied in order:
+  - `INCLUDE`: mapping of qualifier names to sequences of values.
+    If present, only features that match at least one key/value pair in `INCLUDE`
+    are included in the analyses. For example, having `INCLUDE: {gene: [S, N]}` keeps features whose `gene`
+    qualifier equals `S` or `N`.
+  - `EXCLUDE`: mapping of qualifier names to sequences of values.
+    After `INCLUDE` is applied, any feature that matches any key/value pair in `EXCLUDE`
+    is omitted. For example, having `EXCLUDE: {gene: [S, N]}` removes features whose
+    `gene` qualifier equals `S` or `N`.
+- `ANNOTATION`: settings for variant annotation and functional effect prediction using [SnpEff](https://pcingola.github.io/SnpEff/), which uses `ALIGNMENT_REFERENCE` for selecting the database to annotate.
+  - `SNPEFF_COLS`: mapping of column names (which appear in result tables) to VCF fields (extracted after annotation with [SnpSift](https://pcingola.github.io/SnpEff/#snpsift)). Some columns are hard-coded in the code, so removing them is not advised. Additional columns can be added as needed.
+  - `FILTER_INCLUDE` & `FILTER_EXCLUDE`: mapping of column names (from `SNPEFF_COLS`) to lists of values used for filtering the annotated variants table. `FILTER_INCLUDE` is applied first, then `FILTER_EXCLUDE`.
+    - `FILTER_INCLUDE`: keeps variants that match at least one listed value.
+    - `FILTER_EXCLUDE`: removes variant that matches any listed value.
+  - `VARIANT_NAME_PATTERN`: string template used to build the variant name shown in the results table (column `VARIANT_NAME`). The template is interpreted with [glue](https://glue.tidyverse.org/) and can use any column name from `SNPEFF_COLS` and some R functions. For example, `"{GENE}:{coalesce(HGVS_P, HGVS_C)}"` creates names like `S:p.D614G` (using `HGVS_P` when available, otherwise `HGVS_C`).
 - `GISAID`: automatic context download configuration.
   - `CREDENTIALS`: path of the GISAID credentials in YAML format.
   - `DATE_COLUMN`: name of the column that contains sampling dates (YYYY-MM-DD) in the input target metadata.
@@ -160,27 +198,35 @@ All of the following variables are pre-defined in [config.yaml](/config/config.y
   - `EXACT`: boolean flag indicating whether to compute an exact p-value when possible. This option applies only to certain methods and may be set to `null` (default) to let R decide automatically.
 - `LOG_PY_FMT`: logging format string for Python scripts.
 - `PLOTS`: path of the R script that sets the design and style of data visualizations.
-- `PLOT_GENOME_REGIONS`: path of a CSV file containing genome regions, e.g. SARS-CoV-2 non-structural protein (NSP) coordinates, for data visualization.
+- `PLOT_GENOME_REGIONS`: path of a CSV file containing genome regions, e.g. SARS-CoV-2 non-structural protein (NSP) coordinates, for data visualization (columns: `region`, `start`, `end`).
 - `REPORT_QMD`: path of the report template in Quarto markdown (QMD) format.
+- `REPORT_CSS`: path of the report stylesheet definition in CSS format.
 
-## Workflow graphs
+## Workflow visualization
 
-To generate a simplified rule graph, run:
+Snakemake enables easy visualization of workflows and rule relationships. The `--rulegraph` option outputs a DOT file that describes dependencies between rules. The example below produces an image using Graphviz:
 
 ```shell
-snakemake --rulegraph | dot -Tpng > .rulegraph.png
+snakemake --forceall --rulegraph | dot -Tpng >.rulegraph.png
 ```
 
 ![Snakemake rule graph](/.rulegraph.png)
 
-To generate the directed acyclic graph (DAG) of all rules
-to be executed, run:
+The same graph can also be rendered with other tools such as [snakevision](https://github.com/OpenOmics/snakevision) (v0.1.0).
+
+```shell
+snakemake --forceall --rulegraph | snakevision -s all -o .rulegraph_sv.svg
+```
+
+![Snakemake rule graph using snakevision](/.rulegraph_sv.svg)
+
+The `--dag` option emits an directed acyclic graph (DAG) that corresponds to the rule instances that would be executed for the current dataset. The example below produces an image using Graphviz:
 
 ```shell
-snakemake --forceall --dag | dot -Tpng > .dag.png
+snakemake --forceall --dag | dot -Tpng >.dag.png
 ```
 
-![Snakemake rule graph](/.dag.png)
+![Snakemake DAG](/.dag.png)
 
 ## Run modes
 
diff --git a/config/config.yaml b/config/config.yaml
index 7bb2fbe..54d5097 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -2,29 +2,82 @@ ALIGNMENT_REFERENCE:
   "NC_045512.2"
 PROBLEMATIC_VCF:
   "https://raw.githubusercontent.com/W-L/ProblematicSites_SARS-CoV2/da322c32004f7b16bdaa6a8ee7fd24d56e79d9dc/problematic_sites_sarsCov2.vcf"
-FEATURES_JSON:
-  "config/sarscov2_features.json"
 GENETIC_CODE_JSON:
   "config/standard_genetic_code.json"
 TREE_MODEL:
   "GTR+F+I+G4"
-UFBOOT_REPS:
-  1000
-SHALRT_REPS:
-  1000
+UFBOOT:
+  REPS: 1000
+  THRESHOLD: 0.95
+SHALRT:
+  REPS: 1000
+  THRESHOLD: 0.80
 VC:
-  MAX_DEPTH: 0
-  MIN_QUALITY: 0
-  IVAR_QUALITY: 20
-  IVAR_FREQ: 0.05
-  IVAR_DEPTH: 30
+  MIN_QUALITY: 20
+  MIN_FREQ: 0.05
+  MIN_DEPTH: 30
+  MAX_DEPTH: 1000000
 DEMIX:
+  PATHOGEN: "SARS-CoV-2"
   MIN_QUALITY: 20
+  MAX_DEPTH: 1000000
   COV_CUTOFF: 30
   MIN_ABUNDANCE: 0.0001
+  CONFIRMED_ONLY: false
+  DEPTH_CUTOFF: 0
+  RELAXED_MRCA: false
+  RELAXED_MRCA_THRESH: 0.9
+  AUTO_ADAPT: false
+  SOLVER: "CLARABEL"
 WINDOW:
   WIDTH: 1000
   STEP: 50
+GB_FEATURES:
+  INCLUDE:        # any
+    product:
+      - "ORF1ab polyprotein"
+      - "surface glycoprotein"
+      - "ORF3a protein"
+      - "envelope protein"
+      - "membrane glycoprotein"
+      - "ORF6 protein"
+      - "ORF7a protein"
+      - "ORF7b"
+      - "ORF8 protein"
+      - "nucleocapsid phosphoprotein"
+      - "ORF10 protein"
+  # EXCLUDE: ...  # all
+ANNOTATION:
+  # see: https://pcingola.github.io/SnpEff/adds/VCFannotationformat_v1.0.pdf
+  SNPEFF_COLS:
+    CHROM: CHROM
+    POS: POS
+    REF: REF
+    ALT: ALT
+    EFFECT: "ANN[*].EFFECT"
+    IMPACT: "ANN[*].IMPACT"
+    BIOTYPE: "ANN[*].BIOTYPE"
+    GENE: "ANN[*].GENE"
+    GENEID: "ANN[*].GENEID"
+    FEATURE: "ANN[*].FEATURE"
+    FEATUREID: "ANN[*].FEATUREID"
+    HGVS_P: "ANN[*].HGVS_P"
+    HGVS_C: "ANN[*].HGVS_C"
+    ERRORS: "ANN[*].ERRORS"
+  FILTER_INCLUDE:
+  #  IMPACT: [HIGH, MODERATE, LOW]
+  FILTER_EXCLUDE:
+    EFFECT: [upstream_gene_variant, downstream_gene_variant]
+    ERRORS:
+      - ERROR_CHROMOSOME_NOT_FOUND
+      - ERROR_OUT_OF_CHROMOSOME_RANGE
+      - WARNING_REF_DOES_NOT_MATCH_GENOME
+      - WARNING_SEQUENCE_NOT_AVAILABLE
+      - WARNING_TRANSCRIPT_INCOMPLETE
+      - WARNING_TRANSCRIPT_MULTIPLE_STOP_CODONS
+      - WARNING_TRANSCRIPT_NO_START_CODON
+      - WARNING_TRANSCRIPT_NO_STOP_CODON
+  VARIANT_NAME_PATTERN: "{GENE}:{coalesce(HGVS_P, HGVS_C)}"
 GISAID:
   CREDENTIALS: "config/gisaid.yaml"
   DATE_COLUMN: "CollectionDate"
@@ -44,3 +97,5 @@ PLOT_GENOME_REGIONS:
   "config/nsp_annotation.csv"
 REPORT_QMD:
   "template.qmd"
+REPORT_CSS:
+  "config/report.styles.css"
diff --git a/config/design_plots.R b/config/design_plots.R
index 971265c..74e87d8 100644
--- a/config/design_plots.R
+++ b/config/design_plots.R
@@ -1,6 +1,6 @@
 # Jordi Sevilla
 
-library(tidyverse)
+library(ggplot2)
 library(showtext)
 
 # Ajustes ####
@@ -8,13 +8,13 @@ showtext_auto(enable = FALSE)
 showtext_opts(dpi = 200)
 
 # Tema
-font_add_google("Montserrat", "Montserrat")
+font_add_google("Noto Sans", "Noto Sans")
 showtext_auto()
 
 theme_set(theme_minimal())
 
 theme_update(
-  text = element_text(size = 16, family = "Montserrat"),
+  text = element_text(size = 16, family = "Noto Sans"),
   axis.title = element_text(size = 16),
   axis.line = element_line(
     linewidth = 0.5,
@@ -25,12 +25,11 @@ theme_update(
   panel.grid = element_line(linewidth = 0.17, color = "lightgray")
 )
 
-
-# CONFIG
-gene_colors = c(
+# Gene palette
+GENE_PALETTE <- c(
   M = "#B4D4B4",
   N = "#B7B7B8",
-  orf1ab = "#9CC4DC",
+  ORF1ab = "#9CC4DC",
   ORF3a = "#ECB4B7",
   ORF8 = "#996D2B",
   S = "#F5CC9E",
@@ -40,20 +39,32 @@ gene_colors = c(
   ORF10 = "#CACB5D"
 )
 
+# Nucleotide diversity
+DIVERSITY_PALETTE <- c(
+  density_fill = "#fcbf49",
+  density_color = "#ffaa00",
+  value_color = "#D944AA",
+  dnorm_color = "#f77f00"
+)
 
 # M-L tree colors and labels
-tree_colors = c(
+TREE_PALETTE <- c(
   tip_label = "#D944AA99",
   boot_alrt_pass = "#64ACEEB2"
 )
 
-node.size <- c(
+TREE_NODE_SIZE <- c(
   tip_label = 2,
   boot_alrt_pass = 0.8
 )
 
+TREE_LEGEND_NAMES <- c(
+  tip_label = "Target samples",
+  boot_alrt_pass = "UFBoot ≥ %s%s & SH-aLRT ≥ %s%s"
+)
+
 # Nucleotide variants classification colors and labels
-NV_colors <- c(
+NV_TYPE_PALETTE <- c(
   Frameshift = "#568D63",
   "In frame" = "black",
   Intergenic = "#B27CF9",
@@ -61,7 +72,7 @@ NV_colors <- c(
   Yes = "#0248FD"
 )
 
-NV_names <- c(
+NV_TYPE_NAMES <- c(
   Frameshift = "Frameshift",
   "In frame" = "Inframe",
   Intergenic = "Intergenic",
@@ -70,17 +81,23 @@ NV_names <- c(
 )
 
 # dn ds colors and labels
-dnds.labels <- c(
+DNDS_LABELS <- c(
   dn = "dN",
   ds = "dS"
 )
 
-dnds.colors <- c(
-  dn = "#E53E47",
-  ds = "#2C47F5"
+DNDS_COLORS <- c(
+  dN = "#E53E47",
+  dS = "#2C47F5"
 )
 
-dnds.shapes <- c(
-  dn = 2,
-  ds = 4
+DNDS_SHAPES <- c(
+  dN = 2,
+  dS = 4
 )
+
+# Allele frequency trajectories panel color
+ALL.COLORS <- grDevices::colors()
+TRAJECTORY.PANEL.COLORS <- ALL.COLORS[
+  !grepl("(gray|grey|white|snow|azure|beige)", ALL.COLORS)
+]
diff --git a/config/nsp_annotation.csv b/config/nsp_annotation.csv
index 02126d0..78ab294 100644
--- a/config/nsp_annotation.csv
+++ b/config/nsp_annotation.csv
@@ -1,17 +1,7 @@
 region,start,end
-nsp1,266,805
-nsp2,806,2719
-nsp3,2720,8554
-nsp4,8555,10054
-nsp5,10055,10972
-nsp6,10973,11842
-nsp7,11843,12091
-nsp8,12092,12685
-nsp9,12686,13024
-nsp10,13025,13441
-nsp11,13442,13468
-nsp12,13468,16236
-nsp13,16237,18039
-nsp14,18040,19620
-nsp15,19621,20658
-nsp16,20659,21552
+NSP2,806,2719
+NSP3,2720,8554
+NSP4-11,8555,13468
+RdRP,13468,16236
+NSP13,16237,18039
+NSP14-16,18040,21552
diff --git a/config/report.styles.css b/config/report.styles.css
index f81e0a9..9ea2783 100644
--- a/config/report.styles.css
+++ b/config/report.styles.css
@@ -1,35 +1,42 @@
-.quarto-title-block .quarto-title-banner {
-    background: #27445C;
-    background-image: url(logo_pathogenomics.png);
-    background-size: 100px;
-    background-position: right;
-    background-repeat: no-repeat;
-    padding-right: 10px;
-    background-origin: content-box; 
-  } 
+html {
+  font-family: "Noto Sans";
+  text-align: justify;
+}
 
-.title {
-  font-family: Montserrat;
-  color: #FFFFFF
+body {
+  color: black;
 }
-.quarto-title p {
-  font-family: Montserrat;
-  color: #FFFFFF
+
+.quarto-title-block .quarto-title-banner {
+  background-color: #27445C;
+  background-image: url("logo_pathogenomics.png");
+  background-size: 100px;
+  background-position: right;
+  background-repeat: no-repeat;
+  background-origin: content-box;
+  padding-right: 10px;
+  background-blend-mode: normal;
+  filter: none;
+  mix-blend-mode: normal;
+  color: white;
 }
-html {
-  font-family: Montserrat;
-  color: #000000;
-  text-align: justify;
+.quarto-title-block .quarto-title-banner .title,
+.quarto-title-block .quarto-title-banner p {
+  color: inherit;
+  -webkit-text-fill-color: inherit;
+  opacity: 1;
 }
 
 h1, h2, h3 {
   text-align: left;
   font-weight: bold;
+  color: #27445C
 }
+
 #quarto-sidebar-toc-left {
   text-align: left;
 }
 
 .header-section-number {
-  color:#000000
-}
\ No newline at end of file
+  color: #27445C
+}
diff --git a/config/sarscov2_features.json b/config/sarscov2_features.json
deleted file mode 100644
index 2031f32..0000000
--- a/config/sarscov2_features.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-    "ORF1ab polyprotein": "orf1ab",
-    "ORF1a polyprotein": "orf1ab",
-    "surface glycoprotein": "S",
-    "ORF3a protein": "ORF3a",
-    "envelope protein": "E",
-    "membrane glycoprotein": "M",
-    "ORF6 protein": "ORF6",
-    "ORF7a protein": "ORF7",
-    "ORF7b": "ORF7",
-    "ORF8 protein": "ORF8",
-    "nucleocapsid phosphoprotein": "N",
-    "ORF10 protein": "ORF10"
-}
diff --git a/template.qmd b/template.qmd
index eb5b82b..3c2d044 100644
--- a/template.qmd
+++ b/template.qmd
@@ -4,8 +4,8 @@ subtitle: "Workflow version: `r params$workflow_version`"
 date: last-modified
 date-format: "YYYY-MM-DD"
 title-block-banner: true
-format: 
-  html: 
+format:
+  html:
     page-layout: article
     embed-resources: true
     smooth-scroll: true
@@ -15,41 +15,48 @@ format:
     toc-location: left
     toc-title: Summary
     number-sections: true
-css: config/report.styles.css
-editor: visual
+css: __CSSPLACEHOLDER__
+editor: source
 params:
-  ufboot_reps: ""
-  shalrt_reps: ""
-  min_ivar_freq: ""
-  workflow_version: ""
-  use_bionj: ""
-  cor_method: ""
-  div: ""
-  freyja: ""
-  tree: ""
-  tempest: ""
-  SNV: ""
-  SNV_s: ""
-  evo: ""
-  div_value: ""
-  panel: ""
-  volcano: ""
-  tree_ml: ""
-  fig_cor_snp: ""
-  stats_lm: ""
-  table: ""
-  sum_nv: ""
-  heat_tab: ""
-  omega_plot: ""
-  name: ""
+  ufboot_reps:
+  shalrt_reps:
+  min_ivar_freq:
+  workflow_version:
+  use_bionj:
+  cor_method:
+  div:
+  demix:
+  tree:
+  tempest:
+  SNV:
+  SNV_s:
+  evo:
+  div_value:
+  panel:
+  volcano:
+  tree_ml:
+  fig_cor_snp:
+  stats_lm:
+  stats_ml:
+  table:
+  sum_nv:
+  heat_tab:
+  omega_plot:
+  name:
+  freyja_ts:
 output-file: report.html
 ---
 <head>
   <link rel="preconnect" href="https://fonts.googleapis.com">
   <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
-  <link href="https://fonts.googleapis.com/css2?family=Montserrat&display=swap" rel="stylesheet">
+  <link href="https://fonts.googleapis.com/css2?family=Noto+Sans&display=swap" rel="stylesheet">
 </head>
 
+```{r setup, echo = F, message = F, include = F}
+display.num <- function(n, digits) {
+  format(n, digits = digits, nsmall = digits)
+}
+```
 
 ```{r read, echo = F, message = F, include = F}
 library(jsonlite)
@@ -62,11 +69,14 @@ library(readr)
 # Diversity
 div_values <- fromJSON(params$div_value)
 
-# Temporal signal
-stats <- fromJSON(params$stats_lm)
+# Temporal signal and context tree
+stats <- append(
+  fromJSON(params$stats_lm),
+  fromJSON(params$stats_ml)
+)
 correlation <- stats[["r2"]]
 sub_rate <- stats[["sub_rate"]]
-sub_rate <- round(sub_rate, digits = 2)
+sub_rate <- display.num(sub_rate, 2)
 p_value_lm <- stats[["pvalue"]]
 
 # NV counts
@@ -79,10 +89,10 @@ table <- read.csv(params$table)
 n.samples <- table %>% pull(Sample) %>% unique() %>% length()
 
 # Heatmap
-vcf <- read_csv(params$heat_tab) 
-row.names(vcf) <- vcf$`...1`
-vcf <- vcf[-1]
-cor.mat <- cor(vcf, method = params$cor_method)
+heatmap.df <- read.csv(params$heat_tab)
+row.names(heatmap.df) <- heatmap.df$`...1`
+heatmap.df[1] <- NULL
+cor.mat <- cor(heatmap.df, method = params$cor_method)
 cor.mat[is.na(cor.mat)] <- 0
 if (params$cor_method == "pearson") {
   cor.method.name <- "Pearson's"
@@ -100,12 +110,16 @@ if (params$use_bionj) {
 } else {
   dist.tree.algo <- "neighbor-joining (NJ)"
 }
+
+# Freyja timestamp
+freyja.timestamp <- read_file(params$freyja_ts) %>%
+  as.POSIXct(., format = "%m_%d_%Y-%H-%M") %>%
+  strftime(., format = "%Y-%m-%d at %H:%M")
 ```
 
 ## Summary of the target samples dataset
 
-```{r summary_table, echo = F,message = F}
-#| label: tbl-summary
+```{r tbl-summary, echo = F,message = F}
 #| tbl-cap: Target dataset summary
 
 table %>%
@@ -133,7 +147,7 @@ table %>%
     columns = everything()
   ) %>% 
   opt_table_font(
-    font = google_font("Montserrat")
+    font = google_font("Noto Sans")
   )
 ```
 
@@ -141,11 +155,11 @@ table %>%
 
 ### Lineage admixture
 
-The estimated lineage admixture for each sample has been calculated
-using [Freyja](https://github.com/andersen-lab/Freyja).
+The lineage admixture for each sample has been estimated
+using [Freyja](https://github.com/andersen-lab/Freyja) (@fig-demix).
 
-![Estimated lineage admixture of each sample.
-Samples in the X-axis are ordered chronologically, from more ancient to newer.](`r params$freyja`){#fig-freyja}
+![Estimated lineage admixture of each sample (Freyja barcode timestamp: `r freyja.timestamp`).
+Samples in the X-axis are ordered chronologically, from more ancient to newer.](`r params$demix`){#fig-demix}
 
 ### Phylogenetic reconstruction
 
@@ -159,26 +173,25 @@ $`r stats[["boot"]]`$% and a SH-aLRT score of $`r stats[["alrt"]]`$% (@fig-tree_
 ![Maximum-likelihood phylogeny with $`r params$ufboot_reps`$ UFBoot
 and $`r params$shalrt_reps`$ SH-aLRT support replicates of the
 target dataset and its context samples. The clade that contains the target
-samples is squared in red.](`r params$tree_ml`){#fig-tree_ml}
+samples is squared.](`r params$tree_ml`){#fig-tree_ml}
 
 ### Nucleotide diversity comparison
 
 Nucleotide diversity (π) has been calculated for $`r div_values[["boot.reps"]]`$ random
-sample subsets of size $`r div_values[["sample.size"]]`$, extracted
-from the context dataset. The distribution of the nuclotide diversity is assumed to
-`r div_values[["norm.text"]]` be normal after performing a Shapiro-Wilk test
-(p-value of $`r div_values[["normal.pvalue"]]`$).
-
-The nucleotide diversity of the target samples is $`r div_values[["diversity"]] `$ (red line in @fig-div).
-Assuming the independence of the context samples, the `r div_values[["type.test"]]`
-p-value of the context samples having a nucleotide diversity (in orange in @fig-div)
-as low as that of the target dataset is $`r div_values[["p.value"]]`$.
-
-![Analysis of the nucleotide diversity (π). The orange line describes
-a normal distribution with the same mean and standard deviation as the distribution
-of π from $`r div_values[["boot.reps"]]`$ subsets of $`r div_values[["sample.size"]]`$
-sequences from the context. The red vertical line indicates the π value of the
-target samples.](`r params$div`){#fig-div}
+subsets of size $`r div_values[["sample.size"]]`$ drawn from the context dataset.
+A Shapiro–Wilk test indicates the distribution is `r div_values[["norm.text"]]`
+normal (p-value of $`r div_values[["normal.pvalue"]]`$).
+
+The nucleotide diversity of the target samples is $`r display.num(div_values[["diversity"]], 3)`$
+(vertical line in @fig-div). Assuming the context subsets are independent, the
+`r div_values[["type.test"]]` test gives a p-value of $`r display.num(div_values[["p.value"]], 3)`$
+for observing a nucleotide diversity as low as that of the target samples.
+
+![Distribution of nucleotide diversity (π) calculated from $`r div_values[["boot.reps"]]`$
+random subsets of $`r div_values[["sample.size"]]`$ sequences from the context dataset.
+The shaded area shows the empirical density; the overlaid curve is the normal distribution
+with the same mean and standard deviation.
+The vertical line marks the π value of the target samples.](`r params$div`){#fig-div}
 
 ## Evolutionary trajectory of the serially-sampled SARS-CoV-2 infection
 
@@ -186,7 +199,7 @@ target samples.](`r params$div`){#fig-div}
 
 Sites with minor allele frequency $> `r params$min_ivar_freq`$ are considered polymorphic.
 The linear association between the collection date of the samples and the number of
-polymorphic sites has an $R^2$ of $`r nv.counts[["r2"]]`$ and a p-value of
+polymorphic sites has an $R^2$ of $`r display.num(nv.counts[["r2"]], 4)`$ and a p-value of
 $`r nv.counts[["value"]]`$ (@fig-fig_cor_snp).
 
 ![Number of polymorphic sites along time. The
@@ -194,7 +207,7 @@ blue line shows the linear model fit.](`r params$fig_cor_snp`){#fig-fig_cor_snp}
 
 ### Description of intra-host nucleotide variants
 
-A total of $`r n_SNV`$ different single nucleotide variants (SNV) and $`r n_INDELS`$
+A total of $`r n_SNV`$ different nucleotide variants (NV) and $`r n_INDELS`$
 insertions and deletions (indels) have been detected along the genome (@fig-SNV).
 
 ::: {.panel-tabset}
@@ -203,7 +216,7 @@ insertions and deletions (indels) have been detected along the genome (@fig-SNV)
 
 ![Summary of the intra-host accumulation of nucleotide variants,
 using the reconstructed dataset ancestor as reference. A) Nucleotide
-variants per site along the SARS-CoV-2 genome. Relative abundance of NVs is calculated
+variants per site along the SARS-CoV-2 genome. Relative abundance of variants is calculated
 with a sliding window of width $`r nv.counts[["window"]]`$ nucleotides and a step of
 $`r nv.counts[["step"]]`$. Labels indicate the coding regions of the non structural
 proteins (NSP) within ORF1ab. B) Genome variation along the genome for each sample.
@@ -214,7 +227,7 @@ at the bottom, and the latest, at the top.](`r params$SNV`){#fig-SNV}
 
 ![Summary of the intra-host accumulation of nucleotide variants
 in the spike sequence, using the reconstructed dataset ancestor as reference. A) Nucleotide
-variants per site along the S gene. Relative abundance of NVs is calculated
+variants per site along the S gene. Relative abundance of variants is calculated
 with a sliding window of width $`r nv.counts[["window"]]`$ nucleotides and a step of
 $`r nv.counts[["step"]]`$. B) Genome variation along the S gene for each sample.
 The Y-axis displays samples in chronological order, with the earliest collection date
@@ -224,12 +237,12 @@ at the bottom, and the latest, at the top.](`r params$SNV_s`){#fig-SNV_s}
 
 ### Temporal signal of the intra-host mutations
 
-The correlation of the allele frequency of each NV with the time since the
+`r cor.method.name` correlation of the allele frequency of each variant with the time since the
 initial sampling has been calculated (@fig-volcano).
 
 ![`r cor.method.name` correlation coefficients and adjusted p-values of
-allele frequencies with time. Red dashed line indicates adjusted $p = 0.05$.
-Labeled dots represent nucleotide variants correlated with time
+allele frequencies with time. The dashed line indicates adjusted (false discovery rate) $p = 0.05$.
+Labeled dots represent nucleotide variants whose allele frequency is correlated with time
 (adjusted $p < 0.05$).](`r params$volcano`){#fig-volcano}
 
 Significantly correlated nucleotide variants are described in more detail in @fig-panel.
@@ -248,28 +261,31 @@ frequency-weighted distances.](`r params$tree`){#fig-tree}
 
 To estimate the evolutionary rate, root-to-tip distances measured on the previous
 tree (@fig-tree) have been correlated with time, obtaining a $R^2$ of
-$`r correlation`$ and a p-value of $`r p_value_lm`$. The estimated evolutionary
+$`r display.num(correlation, 4)`$ and a p-value of $`r p_value_lm`$. The estimated evolutionary
 rate is $`r sub_rate`$ number of changes per year (@fig-tempest).
 
 ![Scatterplot depicting the relationship between root-to-tip
-distances and the number of days passed since the first sample. The red
-line shows the linear model fit.](`r params$tempest`){#fig-tempest}
+distances and the number of days passed since the first sample. The solid
+line represents the linear model fit.](`r params$tempest`){#fig-tempest}
 
 ### Correlation between alternative alleles
 
-To detect possible interactions between mutations, pairwise correlation between allele
-frequencies are calculated (@fig-heatmap). The heatmap is an interactive figure that allows
-zooming in on specific regions.
+`r cor.method.name` correlation coefficients of allele frequencies between pairs
+of variants were calculated to detect interactions between them (@fig-heatmap).
+The heatmap is interactive and allows zooming in on specific regions.
 
-```{r heatmap, echo = F, message = F, fig.align = 'center'}
-#| label: fig-heatmap
-#| fig-cap: "Interactive hierarchically clustered heatmap of the pairwise correlation coefficients between the time series of allele frequencies in the case study."
+```{r fig-heatmap, echo = F, message = F, warning = F, fig.align = 'center'}
+#| fig-cap: "Interactive heatmap with hierarchical clustering of the pairwise correlation coefficients between the time series of allele frequencies in the case study."
 
 heatmaply_cor(
   cor.mat,
-  fontsize_row = 8, 
-  fontsize_col = 8,
-  column_text_angle = 90
+  grid_gap = 0.25,
+  fontsize_row = 7, 
+  fontsize_col = 7,
+  column_text_angle = 45,
+  label_names = c("row", "column", "coefficient"),
+  width  = 600,
+  height = 600
 )
 ```
 
diff --git a/workflow/Snakefile b/workflow/Snakefile
index e8456e1..97381f5 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -6,7 +6,7 @@ import subprocess
 
 min_version("7.19")
 
-__version__ = "1.2.2"
+__version__ = "1.3.0"
 
 containerized: "docker://ahmig/vipera:v" + __version__
 
diff --git a/workflow/envs/afwdist.yaml b/workflow/envs/afwdist.yaml
new file mode 100644
index 0000000..8e29d0d
--- /dev/null
+++ b/workflow/envs/afwdist.yaml
@@ -0,0 +1,5 @@
+channels:
+  - bioconda
+  - conda-forge
+dependencies:
+  - afwdist==1.0.0
diff --git a/workflow/envs/biopython.yaml b/workflow/envs/biopython.yaml
index 172f7bf..aaafd09 100644
--- a/workflow/envs/biopython.yaml
+++ b/workflow/envs/biopython.yaml
@@ -5,7 +5,3 @@ dependencies:
   - python==3.10
   - biopython==1.81
   - pandas==2.0.3
-  - pip==23.2.1
-  - mafft==7.525
-  - pip:
-    - gb2seq==0.2.20
diff --git a/workflow/envs/freyja.yaml b/workflow/envs/freyja.yaml
index 94d9be6..6673ab1 100644
--- a/workflow/envs/freyja.yaml
+++ b/workflow/envs/freyja.yaml
@@ -2,4 +2,4 @@ channels:
   - conda-forge
   - bioconda
 dependencies:
-  - freyja==1.4.2
+  - freyja==2.0.1
diff --git a/workflow/envs/quarto_render.yaml b/workflow/envs/quarto_render.yaml
index 71fa4cb..a1ab3ba 100644
--- a/workflow/envs/quarto_render.yaml
+++ b/workflow/envs/quarto_render.yaml
@@ -1,12 +1,9 @@
 channels:
   - conda-forge
-  - bioconda
 dependencies:
-  - r-base==4.3.1
-  - r-gt==0.9.0
-  - quarto==1.3.450
-  - r-jsonlite==1.8.5
+  - r-base==4.5.2
+  - r-gt==1.1.0
+  - quarto==1.8.25
+  - deno==2.3.1
   - r-tidyverse==2.0.0
-  - r-quarto==1.2
-  - r-heatmaply==1.4.2
-  - r-readr==2.1.4
+  - r-heatmaply==1.6.0
diff --git a/workflow/envs/renv.yaml b/workflow/envs/renv.yaml
index 43568d8..ceb6a99 100644
--- a/workflow/envs/renv.yaml
+++ b/workflow/envs/renv.yaml
@@ -5,7 +5,6 @@ dependencies:
   - r-base=4.3.2
   - r-tidyverse==2.0.0
   - r-ggrepel==0.9.3
-  - r-stringi==1.7.12
   - r-ggpubr==0.6.0
   - bioconductor-ggtree==3.10.0
   - r-ape==5.8
@@ -15,5 +14,4 @@ dependencies:
   - r-future.apply==1.11.0
   - r-scales==1.3.0
   - r-showtext==0.9_6
-  - r-jsonlite==1.8.5
   - r-logger==0.2.2
diff --git a/workflow/envs/snpeff.yaml b/workflow/envs/snpeff.yaml
index 1423a3c..612ab76 100644
--- a/workflow/envs/snpeff.yaml
+++ b/workflow/envs/snpeff.yaml
@@ -2,3 +2,4 @@ channels:
   - bioconda
 dependencies:
   - snpeff==5.1d
+  - snpsift==5.1d
diff --git a/workflow/rules/asr.smk b/workflow/rules/asr.smk
index 5acaded..d20c6d4 100644
--- a/workflow/rules/asr.smk
+++ b/workflow/rules/asr.smk
@@ -2,6 +2,7 @@ rule reconstruct_ancestral_sequence:
     threads: 4
     conda: "../envs/iqtree.yaml"
     params:
+        seed = 7291,
         seqtype = "DNA",
         name = OUTPUT_NAME,
         outgroup = config["ALIGNMENT_REFERENCE"],
@@ -14,13 +15,11 @@ rule reconstruct_ancestral_sequence:
     log:
         LOGDIR / "reconstruct_ancestral_sequence" / "log.txt"
     shell:
-        """
-        mkdir -p {output.folder}
-        iqtree2 \
-            -asr \
-            -o {params.outgroup} -T AUTO --threads-max {threads} -s {input.fasta} \
-            --seqtype {params.seqtype} -m {params.model} --prefix {output.folder}/{params.name} >{log} 2>&1
-        """
+        "mkdir -p {output.folder} && "
+        "iqtree2 -seed {params.seed} "
+            "-asr "
+            "-o {params.outgroup} -T AUTO --threads-max {threads} -s {input.fasta} "
+            "--seqtype {params.seqtype} -m {params.model} --prefix {output.folder}/{params.name} >{log} 2>&1"
 
 
 rule ancestor_fasta:
@@ -29,7 +28,7 @@ rule ancestor_fasta:
     params:
         node_id = "Node1",
         indeterminate_char = "N",
-        name = OUTPUT_NAME
+        name = "case_ancestor",
     input:
         state_file = OUTDIR/"tree"/f"{OUTPUT_NAME}.state"
     output:
diff --git a/workflow/rules/context.smk b/workflow/rules/context.smk
index 623060c..9f8fa05 100644
--- a/workflow/rules/context.smk
+++ b/workflow/rules/context.smk
@@ -74,12 +74,14 @@ rule ml_context_tree:
     conda: "../envs/iqtree.yaml"
     shadow: "shallow"
     params:
+        seed = 7291,
         seqtype = "DNA",
         name = OUTPUT_NAME,
-        ufboot = config["UFBOOT_REPS"],
-        alrt = config["SHALRT_REPS"],
+        ufboot = config["UFBOOT"]["REPS"],
+        alrt = config["SHALRT"]["REPS"],
         outgroup = config["ALIGNMENT_REFERENCE"],
-        model = config["TREE_MODEL"]
+        model = config["TREE_MODEL"],
+        max_iterations_convergence = 1000,
     input:
         fasta = OUTDIR/"nextalign"/f"{OUTPUT_NAME}.aligned.masked.fasta",
         outgroup_aln = OUTDIR/"context"/"nextalign"/"context_sequences.aligned.masked.fasta"
@@ -89,14 +91,11 @@ rule ml_context_tree:
     log:
         LOGDIR / "ml_context_tree" / "log.txt"
     shell:
-        """
-        exec >{log}                                                                    
-        exec 2>&1
-        
-        awk '/^>/{{p=seen[$0]++}}!p' {input.fasta} {input.outgroup_aln} > aln.fasta
-        mkdir -p {output.folder}
-        iqtree2 \
-            -B {params.ufboot} -alrt {params.alrt} \
-            -o {params.outgroup} -T AUTO --threads-max {threads} -s aln.fasta \
-            --seqtype {params.seqtype} -m {params.model} --prefix {output.folder}/{params.name}
-        """
+        "exec >{log} && exec 2>&1; "
+        "awk '/^>/{{p=seen[$0]++}}!p' {input.fasta} {input.outgroup_aln} >aln.fasta && "
+        "mkdir -p {output.folder} && "
+        "iqtree2 -seed {params.seed} "
+            "-bb {params.ufboot} -alrt {params.alrt} "
+            "-o {params.outgroup} -T AUTO --threads-max {threads} -s aln.fasta "
+            "-nm {params.max_iterations_convergence} "
+            "--seqtype {params.seqtype} -m {params.model} --prefix {output.folder}/{params.name}"
diff --git a/workflow/rules/demix.smk b/workflow/rules/demix.smk
index db6f6c2..4973b3e 100644
--- a/workflow/rules/demix.smk
+++ b/workflow/rules/demix.smk
@@ -1,26 +1,47 @@
+rule demix_barcode_update:
+    threads: 1
+    shadow: "shallow"
+    conda:
+        "../envs/freyja.yaml"
+    params:
+        pathogen = config["DEMIX"]["PATHOGEN"]
+    output:
+        folder = directory(OUTDIR/"demixing"/"freyja_data"),
+        curated_lineages = OUTDIR/"demixing"/"freyja_data"/"curated_lineages.json",
+        last_barcode_update = OUTDIR/"demixing"/"freyja_data"/"last_barcode_update.txt",
+        lineage_mutations = OUTDIR/"demixing"/"freyja_data"/"lineage_mutations.json",
+        lineage_yml = OUTDIR/"demixing"/"freyja_data"/"lineages.yml",
+        pathogens = OUTDIR/"demixing"/"freyja_data"/"pathogen_config.yml",
+        usher_barcodes = OUTDIR/"demixing"/"freyja_data"/"usher_barcodes.feather"
+    log:
+        LOGDIR / "demix_barcode_update" / "log.txt"
+    shell:
+        "mkdir -p {output.folder:q} && "
+        "freyja update --outdir {output.folder:q} --pathogen {params.pathogen:q} >{log} 2>&1"
+
+
 rule demix_preprocessing:
     threads: 1
-    conda: "../envs/freyja.yaml"
+    conda: "../envs/var_calling.yaml"
     shadow: "minimal"
     input:
         bam = get_input_bam,
         ref_fasta = lambda wildcards: select_mapping_references_fasta()
     params:
-        minq = config["DEMIX"]["MIN_QUALITY"]
+        max_depth = config["DEMIX"]["MAX_DEPTH"],
+        minq = config["DEMIX"]["MIN_QUALITY"],
     output:
         depth_file = OUTDIR/"demixing"/"{sample}/{sample}_depth.txt",
-        variants_file = OUTDIR/"demixing"/"{sample}/{sample}_variants.tsv"
+        variants_file = OUTDIR/"demixing"/"{sample}/{sample}_variants.tsv",
     log:
-        LOGDIR / "demix_preprocessing" / "{sample}.log.txt"
+        pileup = LOGDIR / "demix_preprocessing" / "{sample}_pileup.log.txt",
+        ivar = LOGDIR / "demix_preprocessing" / "{sample}_ivar.log.txt",
     shell:
-        """
-        freyja variants \
-            "{input.bam}" \
-            --variants {output.variants_file} \
-            --depths {output.depth_file} \
-            --minq {params.minq} \
-            --ref {input.ref_fasta} >{log} 2>&1
-        """
+        "set -euo pipefail && "
+        "samtools mpileup -aa -A -d {params.max_depth} -Q {params.minq} -q 0 -B -f {input.ref_fasta:q} {input.bam:q} >sample.pileup 2>{log.pileup:q} && "
+        "ivar variants -p variants -q {params.minq} -r {input.ref_fasta:q} >{log.ivar:q} 2>&1 <sample.pileup && "
+        "cut -f1-4 sample.pileup >{output.depth_file:q} && "
+        "mv variants.tsv {output.variants_file:q}"
 
 
 rule demix:
@@ -29,34 +50,52 @@ rule demix:
     shadow: "minimal"
     input:
         depth_file = OUTDIR/"demixing"/"{sample}/{sample}_depth.txt",
-        variants_file = OUTDIR/"demixing"/"{sample}/{sample}_variants.tsv"
+        variants_file = OUTDIR/"demixing"/"{sample}/{sample}_variants.tsv",
+        barcodes = OUTDIR/"demixing"/"freyja_data"/"usher_barcodes.feather",
+        curated_lineages = OUTDIR/"demixing"/"freyja_data"/"curated_lineages.json",
+        lineage_yml = OUTDIR/"demixing"/"freyja_data"/"lineages.yml",
     params:
         coverage_cutoff = config["DEMIX"]["COV_CUTOFF"],
-        minimum_abundance = config["DEMIX"]["MIN_ABUNDANCE"]
+        minimum_abundance = config["DEMIX"]["MIN_ABUNDANCE"],
+        confirmed_only = "--confirmedonly " if config["DEMIX"]["CONFIRMED_ONLY"] else "",
+        depth_cutoff = config["DEMIX"]["DEPTH_CUTOFF"],
+        auto_adapt = "--autoadapt " if config["DEMIX"]["AUTO_ADAPT"] else "",
+        relaxed_mrca = "--relaxedmrca " if config["DEMIX"]["RELAXED_MRCA"] else "",
+        relaxed_mrca_thresh = config["DEMIX"]["RELAXED_MRCA_THRESH"],
+        solver = config["DEMIX"]["SOLVER"],
     output:
-        demix_file = OUTDIR/"demixing"/"{sample}/{sample}_demixed.tsv"
+        demix_file = OUTDIR/"demixing"/"samples"/"{sample}/{sample}_demixed.tsv"
     log:
         LOGDIR / "demix" / "{sample}.log.txt"
     shell:
-        """
-        freyja demix \
-            "{input.variants_file}" \
-            "{input.depth_file}" \
-            --eps {params.minimum_abundance} \
-            --covcut {params.coverage_cutoff} \
-            --output {output.demix_file} >{log} 2>&1
-        """
+        "freyja demix "
+        "{input.variants_file:q} "
+        "{input.depth_file:q} "
+        "--barcodes {input.barcodes:q} "
+        "--meta {input.curated_lineages:q} "
+        "--lineageyml {input.lineage_yml:q} "
+        "--eps {params.minimum_abundance} "
+        "--covcut {params.coverage_cutoff} "
+        "--depthcutoff {params.depth_cutoff} "
+        "{params.confirmed_only}"
+        "{params.auto_adapt}"
+        "{params.relaxed_mrca}"
+        "--relaxedthresh {params.relaxed_mrca_thresh} "
+        "--solver {params.solver} "
+        "--output {output.demix_file} "
+        "--max-solver-threads {threads} "
+        ">{log} 2>&1"
 
 
-rule summarise_demixing:
+rule summarise_demix:
     threads: 1
     conda: "../envs/renv.yaml"
     shadow: "shallow"
     input:
-        tables = expand(OUTDIR/"demixing"/"{sample}/{sample}_demixed.tsv", sample=iter_samples())
+        tables = expand(OUTDIR/"demixing"/"samples"/"{sample}/{sample}_demixed.tsv", sample=iter_samples())
     output:
-        summary_df = report(OUTDIR/"summary_freyja_demixing.csv")
+        summary_df = report(OUTDIR/"demixing"/"summary.csv")
     log:
-        LOGDIR / "summarise_demixing" / "log.txt"
+        LOGDIR / "summarise_demix" / "log.txt"
     script: 
-        "../scripts/summary_demixing.R"
+        "../scripts/summarise_demix.R"
diff --git a/workflow/rules/distances.smk b/workflow/rules/distances.smk
index 4161df9..11fae38 100644
--- a/workflow/rules/distances.smk
+++ b/workflow/rules/distances.smk
@@ -1,17 +1,52 @@
-rule weighted_distances:
-    threads: 1
+rule extract_afwdist_variants:
     conda: "../envs/biopython.yaml"
     params:
-        samples = expand("{sample}", sample = iter_samples()),
-        mask_class = ["mask"]
+        sample_col = "SAMPLE",
+        position_col = "POS",
+        sequence_col = "ALT",
+        frequency_col = "ALT_FREQ",
+        mask_class = ["mask"],
     input:
-        tsv = OUTDIR/f"{OUTPUT_NAME}.masked.filtered.tsv",
-        vcf = lambda wildcards: select_problematic_vcf(),
+        variants = OUTDIR/f"{OUTPUT_NAME}.variants.tsv",
+        mask_vcf = lambda wildcards: select_problematic_vcf(),
         ancestor = OUTDIR/f"{OUTPUT_NAME}.ancestor.fasta",
-        reference = OUTDIR/"reference.fasta"
+        reference = OUTDIR/"reference.fasta",
     output:
-        distances = REPORT_DIR_TABLES/f"figure_8.csv"
+        variants = temp(OUTDIR/f"{OUTPUT_NAME}.variants.afwdist.csv"),
     log:
-        LOGDIR / "weighted_distances" / "log.txt"
+        LOGDIR/"extract_afwdist_variants"/"log.txt"
     script:
-        "../scripts/weighted_distances.py"
+        "../scripts/extract_afwdist_variants.py"
+
+
+rule afwdist_weighted_distances:
+    conda: "../envs/afwdist.yaml"
+    params:
+        extra_args = "",
+    input:
+        variants = OUTDIR/f"{OUTPUT_NAME}.variants.afwdist.csv",
+        reference = OUTDIR/f"{OUTPUT_NAME}.ancestor.fasta",
+    output:
+        distances = temp(OUTDIR/f"{OUTPUT_NAME}.distances.raw.csv"),
+    log:
+        LOGDIR/"afwdist_weighted_distances"/"log.txt"
+    shell:
+        "afwdist "
+        "-i {input.variants:q} "
+        "-r {input.reference:q} "
+        "-o {output.distances:q} "
+        "{params.extra_args} >{log:q} 2>&1"
+
+
+rule format_afwdist_results:
+    conda: "../envs/biopython.yaml"
+    params:
+        samples = sorted(iter_samples()) + [config["ALIGNMENT_REFERENCE"]],
+    input:
+        distances = OUTDIR/f"{OUTPUT_NAME}.distances.raw.csv",
+    output:
+        distances = OUTDIR/f"{OUTPUT_NAME}.distances.csv",
+    log:
+        LOGDIR/"format_afwdist_results"/"log.txt"
+    script:
+        "../scripts/format_afwdist_results.py"
diff --git a/workflow/rules/evolution.smk b/workflow/rules/evolution.smk
index e89d358..0380940 100644
--- a/workflow/rules/evolution.smk
+++ b/workflow/rules/evolution.smk
@@ -1,14 +1,45 @@
-rule N_S_sites:
+rule filter_genbank_features:
     threads: 1
     conda: "../envs/biopython.yaml"
+    params:
+        included = config.get("GB_FEATURES", {}).get("INCLUDE", {}),
+        excluded = config.get("GB_FEATURES", {}).get("EXCLUDE", {}),
     input:
-        fasta = OUTDIR/f"{OUTPUT_NAME}.ancestor.fasta",
         gb = OUTDIR/"reference.gb",
-        features = Path(config["FEATURES_JSON"]).resolve(),
-        genetic_code = Path(config["GENETIC_CODE_JSON"]).resolve()
     output:
-        csv = temp(OUTDIR/f"{OUTPUT_NAME}.ancestor.N_S.sites.csv")
+        gb = OUTDIR/"reference.cds.gb",
+    log:
+        LOGDIR / "filter_genbank_features" / "log.txt"
+    script:
+        "../scripts/filter_genbank_features.py"
+
+
+rule n_s_sites:
+    threads: 1
+    conda: "../envs/biopython.yaml"
+    params:
+        gb_qualifier_display = "gene",
+    input:
+        fasta = OUTDIR/f"{OUTPUT_NAME}.ancestor.fasta",
+        gb = OUTDIR/"reference.cds.gb",
+        genetic_code = Path(config["GENETIC_CODE_JSON"]).resolve(),
+    output:
+        csv = temp(OUTDIR/f"{OUTPUT_NAME}.ancestor.n_s.sites.csv"),
+    log:
+        LOGDIR / "n_s_sites" / "log.txt"
+    script:
+        "../scripts/n_s_sites_from_fasta.py"
+
+
+rule calculate_dnds:
+    conda: "../envs/renv.yaml"
+    input: 
+        n_s_sites = OUTDIR/f"{OUTPUT_NAME}.ancestor.n_s.sites.csv",
+        variants =  OUTDIR/f"{OUTPUT_NAME}.variants.tsv",
+        metadata = config["METADATA"]
+    output:
+        table = OUTDIR/f"{OUTPUT_NAME}.dnds.csv",
     log:
-        LOGDIR / "N_S_sites" / "log.txt"
+        LOGDIR / "calculate_dnds" / "log.txt"
     script:
-        "../scripts/N_S_sites_from_fasta.py"
+        "../scripts/calculate_dnds.R"
diff --git a/workflow/rules/fetch.smk b/workflow/rules/fetch.smk
index 9f290be..3e4860c 100644
--- a/workflow/rules/fetch.smk
+++ b/workflow/rules/fetch.smk
@@ -66,3 +66,8 @@ rule fetch_problematic_vcf:
         select_problematic_vcf()
     shell:
         "curl {params.url} -o {output} -s 2> {log}"
+
+
+rule create_empty_file:
+    output:
+        temp(touch(OUTDIR/"empty.txt"))
diff --git a/workflow/rules/report.smk b/workflow/rules/report.smk
index df45ea1..77aa6aa 100644
--- a/workflow/rules/report.smk
+++ b/workflow/rules/report.smk
@@ -1,160 +1,395 @@
-rule heatmap:
+rule demix_plot_data:
     conda: "../envs/renv.yaml"
     input:
-        vcf =  OUTDIR/f"{OUTPUT_NAME}.masked.filtered.tsv",
+        summary_demixing = OUTDIR/"demixing"/"summary.csv",
         metadata = config["METADATA"]
     output:
-        table = report(REPORT_DIR_TABLES/"figure_10.csv")
+        data = REPORT_DIR_TABLES/"demix.csv"
     log:
-        LOGDIR / "heatmap" / "log.txt"
+        LOGDIR / "demix_plot_data" / "log.txt"
     script:
-        "../scripts/report/heatmap.R"
+        "../scripts/report/demix_plot_data.R"
 
 
-rule window:
-    conda: "../envs/biopython.yaml"
+rule demix_plot:
+    conda: "../envs/renv.yaml"
     params:
-        window = config["WINDOW"]["WIDTH"],
-        step = config["WINDOW"]["STEP"]
+        design = config["PLOTS"],
+        plot_width_mm = 159.2,
+        plot_height_mm = 119.4,
     input:
-        vcf = OUTDIR/f"{OUTPUT_NAME}.masked.filtered.tsv",
-        gb = OUTDIR/"reference.gb",
-        features = config["FEATURES_JSON"]
+        data = REPORT_DIR_TABLES/"demix.csv"
     output:
-        window_df = temp(OUTDIR/f"{OUTPUT_NAME}.window.csv"),
+        plot = report(REPORT_DIR_PLOTS/"demix.png")
     log:
-        LOGDIR / "window" / "log.txt"
+        LOGDIR / "demix_plot" / "log.txt"
     script:
-        "../scripts/window.py"
+        "../scripts/report/demix_plot.R"
 
 
-rule diversity:
+rule diversity_data:
     threads: 4
     conda: "../envs/renv.yaml"
     params:
-        design = config["PLOTS"],
         bootstrap_reps = config["DIVERSITY_REPS"],
-        plot_width = 159.2,
-        plot_height = 119.4
+        aln_reference = config["ALIGNMENT_REFERENCE"],
+        seed = 7291,
     input:
         study_fasta = OUTDIR/"nextalign"/f"{OUTPUT_NAME}.aligned.masked.fasta",
-        context_fasta = OUTDIR/"context"/"nextalign"/"context_sequences.aligned.masked.fasta"
+        context_fasta = OUTDIR/"context"/"nextalign"/"context_sequences.aligned.masked.fasta",
+    output:
+        divs = REPORT_DIR_TABLES/"diversity.txt",
+        json = REPORT_DIR_TABLES/"diversity.json",
+    log:
+        LOGDIR / "diversity_data" / "log.txt"
+    script:
+        "../scripts/report/diversity_data.R"
+
+
+rule diversity_plot:
+    threads: 1
+    conda: "../envs/renv.yaml"
+    params:
+        design = config["PLOTS"],
+        plot_width_mm = 159.2,
+        plot_height_mm = 119.4,
+    input:
+        divs = REPORT_DIR_TABLES/"diversity.txt",
+        json = REPORT_DIR_TABLES/"diversity.json",
     output:
-        fig = report(REPORT_DIR_PLOTS/"figure_3.png"),
-        json = temp(OUTDIR/"diversity.json"),
-        table = REPORT_DIR_TABLES/"figure_3.csv"
+        plot = report(REPORT_DIR_PLOTS/"diversity.png"),
     log:
-        LOGDIR / "diversity" / "log.txt"
+        LOGDIR / "diversity_plot" / "log.txt"
     script:
         "../scripts/report/diversity_plot.R"
 
 
-rule freyja_plot:
+rule extract_genbank_regions:
+    conda: "../envs/biopython.yaml"
+    params:
+        gb_qualifier = "gene",
+    input:
+        gb = OUTDIR/"reference.cds.gb",
+    output:
+        regions = temp(REPORT_DIR_TABLES/"genbank_regions.json"),
+    log:
+        LOGDIR / "extract_genbank_regions" / "log.txt"
+    script:
+        "../scripts/report/extract_genbank_regions.py"
+
+
+rule polymorphic_sites_over_time_data:
     conda: "../envs/renv.yaml"
     params:
-        design = config["PLOTS"]
+        max_alt_freq = 1.0 - config["VC"]["MIN_FREQ"],
     input:
-        summary_demixing =  OUTDIR/"summary_freyja_demixing.csv",
-        metadata = config["METADATA"]
+        variants = OUTDIR/f"{OUTPUT_NAME}.variants.tsv",
+        metadata = config["METADATA"],
     output:
-        fig = report(REPORT_DIR_PLOTS/"figure_1.png"),
-        table = report(REPORT_DIR_TABLES/"figure_1.csv")
+        table = REPORT_DIR_PLOTS/"polymorphic_sites_over_time.csv",
+        json = temp(REPORT_DIR_TABLES/"polymorphic_sites_over_time.json"),
     log:
-        LOGDIR / "freyja_plot" / "log.txt"
+        LOGDIR / "polymorphic_sites_over_time_data" / "log.txt"
     script:
-        "../scripts/report/freyja_plot.R"
+        "../scripts/report/polymorphic_sites_over_time_data.R"
 
 
-rule general_NV_description:
+rule polymorphic_sites_over_time_plot:
     conda: "../envs/renv.yaml"
     params:
-        samples = expand("{sample}", sample = iter_samples()),
         design = config["PLOTS"],
-        regions = config["PLOT_GENOME_REGIONS"],
+        plot_width_mm = 159.2,
+        plot_height_mm = 119.4,
+    input:
+        table = REPORT_DIR_PLOTS/"polymorphic_sites_over_time.csv",
+    output:
+        plot = report(REPORT_DIR_PLOTS/"polymorphic_sites_over_time.png"),
+    log:
+        LOGDIR / "polymorphic_sites_over_time_plot" / "log.txt"
+    script:
+        "../scripts/report/polymorphic_sites_over_time_plot.R"
+
+
+rule window_data:
+    conda: "../envs/biopython.yaml"
+    params:
         window = config["WINDOW"]["WIDTH"],
         step = config["WINDOW"]["STEP"],
-        max_alt_freq = 1.0 - config["VC"]["IVAR_FREQ"]
+        features = config.get("GB_FEATURES", {}),
+        gb_qualifier_display = "gene"
     input:
-        window = OUTDIR/f"{OUTPUT_NAME}.window.csv",
-        vcf =  OUTDIR/f"{OUTPUT_NAME}.masked.filtered.tsv",
-        metadata = config["METADATA"]
+        variants = OUTDIR/f"{OUTPUT_NAME}.variants.tsv",
+        gb = OUTDIR/"reference.gb",
     output:
-        fig = report(REPORT_DIR_PLOTS/"figure_5a.png"),
-        fig_s = report(REPORT_DIR_PLOTS/"figure_5b.png"),
-        fig_cor = report(REPORT_DIR_PLOTS/"figure_4.png"),
-        json = temp(OUTDIR/"summary_nv.json"),
-        table_1 = report(REPORT_DIR_TABLES/"figure_5a.csv"),
-        table_2 = report(REPORT_DIR_TABLES/"figure_5b.csv"),
-        table_3 = report(REPORT_DIR_TABLES/"figure_4.csv")
+        window_df = REPORT_DIR_TABLES/"window.csv",
+        json = temp(REPORT_DIR_TABLES/"window.json"),
     log:
-        LOGDIR / "general_NV_description" / "log.txt"
+        LOGDIR / "window_data" / "log.txt"
     script:
-        "../scripts/report/NV_description.R"
+        "../scripts/report/window_data.py"
 
 
-rule phylo_plots:
+rule nv_panel_data:
+    conda: "../envs/renv.yaml"
+    input:
+        variants = OUTDIR/f"{OUTPUT_NAME}.variants.tsv",
+        metadata = config["METADATA"],
+    output:
+        table = REPORT_DIR_TABLES/"nv_panel.csv",
+        json = temp(REPORT_DIR_TABLES/"nv_panel.json"),
+    log:
+        LOGDIR / "nv_panel_data" / "log.txt"
+    script:
+        "../scripts/report/nv_panel_data.R"
+
+
+rule nv_panel_zoom_on_feature_data:
+    input:
+        table = REPORT_DIR_TABLES/"nv_panel.csv",
+        regions = REPORT_DIR_TABLES/"genbank_regions.json",
+    output:
+        table = temp(REPORT_DIR_TABLES/"nv_panel.{region_name}.csv"),
+    log:
+        LOGDIR / "nv_panel_zoom_on_feature_data" / "{region_name}.log.txt"
+    script:
+        "../scripts/report/nv_panel_zoom_on_feature_data.py"
+
+
+rule window_zoom_on_feature_data:
+    input:
+        table = REPORT_DIR_TABLES/"window.csv",
+        regions = REPORT_DIR_TABLES/"genbank_regions.json",
+    output:
+        table = temp(REPORT_DIR_TABLES/"window.{region_name}.csv"),
+    log:
+        LOGDIR / "window_zoom_on_feature_data" / "{region_name}.log.txt"
+    script:
+        "../scripts/report/window_zoom_on_feature_data.py"
+
+
+rule nv_panel_plot:
+    conda: "../envs/renv.yaml"
+    params:
+        design = config["PLOTS"],
+        window_step = config["WINDOW"]["STEP"],
+        plot_height_mm = 250.0,
+        plot_width_mm = 240.0,
+    input:
+        panel = REPORT_DIR_TABLES/"nv_panel.csv",
+        window = REPORT_DIR_TABLES/"window.csv",
+        regions = REPORT_DIR_TABLES/"genbank_regions.json",
+        highlight_window_regions = config["PLOT_GENOME_REGIONS"],
+    output:
+        plot = report(REPORT_DIR_PLOTS/"nv_panel.png"),
+    log:
+        LOGDIR / "nv_panel_plot" / "log.txt"
+    script:
+        "../scripts/report/nv_panel_plot.R"
+
+
+use rule nv_panel_plot as nv_panel_plot_S with:
+    input:
+        panel = REPORT_DIR_TABLES/"nv_panel.S.csv",
+        window = REPORT_DIR_TABLES/"window.S.csv",
+        regions = REPORT_DIR_TABLES/"genbank_regions.json",
+        highlight_window_regions = OUTDIR/"empty.txt",
+    output:
+        plot = report(REPORT_DIR_PLOTS/"nv_panel.S.png"),
+    log:
+        LOGDIR / "nv_panel_plot_S" / "log.txt"
+
+
+rule merge_json_files:
+    input:
+        REPORT_DIR_TABLES/"nv_panel.json",
+        REPORT_DIR_TABLES/"polymorphic_sites_over_time.json",
+        REPORT_DIR_TABLES/"window.json",
+    output:
+        json = REPORT_DIR_TABLES/"nv_panel_summary.json",
+    run:
+        import json
+        result = {}
+        for path in input:
+            with open(path) as f:
+                d = json.load(f)
+            result |= d  # will replace existing keys
+        with open(output.json, "w") as fw:
+            json.dump(result, fw, indent=2)
+
+
+rule context_phylogeny_data:
     conda: "../envs/renv.yaml"
     params:
         design = config["PLOTS"],
         ref_name = config["ALIGNMENT_REFERENCE"],
-        boot_th = 95,
-        alrt_th = 80,
+        boot_th = config["UFBOOT"]["THRESHOLD"],
+        alrt_th = config["SHALRT"]["THRESHOLD"],
+    input:
+        target_fasta = OUTDIR/f"{OUTPUT_NAME}.fasta",
+        tree = OUTDIR/f"tree_context/{OUTPUT_NAME}.treefile",
+    output:
+        json = REPORT_DIR_TABLES/"context_phylogeny.json",
+        annotation = REPORT_DIR_TABLES/"context_phylogeny.csv",
+    log:
+        LOGDIR / "context_phylogeny_data" / "log.txt"
+    script:
+        "../scripts/report/context_phylogeny_data.R"
+
+
+rule context_phylogeny_plot:
+    conda: "../envs/renv.yaml"
+    params:
+        design = config["PLOTS"],
         plot_height_mm = 119.4,
         plot_width_mm = 159.2,
-        use_bionj = config["USE_BIONJ"]
+        boot_th = config["UFBOOT"]["THRESHOLD"],
+        alrt_th = config["SHALRT"]["THRESHOLD"],
+    input:
+        tree = OUTDIR/f"tree_context/{OUTPUT_NAME}.treefile",
+        json = REPORT_DIR_TABLES/"context_phylogeny.json",
+        annotation = REPORT_DIR_TABLES/"context_phylogeny.csv"
+    output:
+        plot = report(REPORT_DIR_PLOTS/"context_phylogeny.png"),
+    log:
+        LOGDIR / "context_phylogeny_plot" / "log.txt"
+    script:
+        "../scripts/report/context_phylogeny_plot.R"
+
+
+rule allele_freq_tree_data:
+    conda: "../envs/renv.yaml"
+    params:
+        use_bionj = config["USE_BIONJ"],
+        outgroup_id = config["ALIGNMENT_REFERENCE"],
     input:
-        dist = REPORT_DIR_TABLES/f"figure_8.csv",
+        dist = OUTDIR/f"{OUTPUT_NAME}.distances.csv",
+    output:
+        tree = REPORT_DIR_TABLES/"allele_freq_tree.nwk",
+    log:
+        LOGDIR / "allele_freq_tree_data" / "log.txt"
+    script:
+        "../scripts/report/allele_freq_tree_data.R"
+
+
+rule allele_freq_tree_plot:
+    conda: "../envs/renv.yaml"
+    params:
+        design = config["PLOTS"],
+        outgroup_id = config["ALIGNMENT_REFERENCE"],
+        plot_height_mm = 119.4,
+        plot_width_mm = 159.2,
+    input:
+        tree = report(REPORT_DIR_TABLES/"allele_freq_tree.nwk"),
         study_fasta = OUTDIR/f"{OUTPUT_NAME}.fasta",
-        ml = OUTDIR/f"tree_context/{OUTPUT_NAME}.treefile",
-        metadata = config["METADATA"]
+        metadata = config["METADATA"],
+    output:
+        plot = report(REPORT_DIR_PLOTS/"allele_freq_tree.png"),
+    log:
+        LOGDIR / "allele_freq_tree_plot" / "log.txt"
+    script:
+        "../scripts/report/allele_freq_tree_plot.R"
+
+
+rule time_signal_data:
+    conda: "../envs/renv.yaml"
+    params:
+        outgroup_id = config["ALIGNMENT_REFERENCE"],
+    input:
+        tree = report(REPORT_DIR_TABLES/"allele_freq_tree.nwk"),
+        metadata = config["METADATA"],
+    output:
+        table = report(REPORT_DIR_TABLES/"time_signal.csv"),
+        json = REPORT_DIR_TABLES/"time_signal.json",
+    log:
+        LOGDIR / "time_signal_data" / "log.txt"
+    script:
+        "../scripts/report/time_signal_data.R"
+
+
+rule time_signal_plot:
+    conda: "../envs/renv.yaml"
+    params:
+        design = config["PLOTS"],
+        plot_height_mm = 119.4,
+        plot_width_mm = 159.2,
+    input:
+        table = report(REPORT_DIR_TABLES/"time_signal.csv"),
     output:
-        temest = report(REPORT_DIR_PLOTS/"figure_9.png"),
-        tree = report(REPORT_DIR_PLOTS/"figure_8.png"),
-        tree_ml = report(REPORT_DIR_PLOTS/"figure_2.png"),
-        table = report(REPORT_DIR_TABLES/"figure_9.csv"),
-        json = temp(OUTDIR/"stats.lm.json")
+        plot = report(REPORT_DIR_PLOTS/"time_signal.png"),
     log:
-        LOGDIR / "phylo_plots" / "log.txt"
+        LOGDIR / "time_signal_plot" / "log.txt"
     script:
-        "../scripts/report/phylo_plots.R"
+        "../scripts/report/time_signal_plot.R"
 
 
-rule evo_plots:
+rule dnds_plots:
     conda: "../envs/renv.yaml"
     params: 
-        design = config["PLOTS"]
+        design = config["PLOTS"],
+        plot_height_mm = 119.4,
+        plot_width_mm = 159.2,
     input: 
-        N_S = OUTDIR/f"{OUTPUT_NAME}.ancestor.N_S.sites.csv",
-        vcf =  OUTDIR/f"{OUTPUT_NAME}.masked.filtered.tsv",
-        metadata = config["METADATA"]
+        table = OUTDIR/f"{OUTPUT_NAME}.dnds.csv",
     output:
-        plot = report(REPORT_DIR_PLOTS/"figure_11.png"),
-        plot_omega = report(REPORT_DIR_PLOTS/"figure_12.png"),
-        table = report(REPORT_DIR_TABLES/"figure_11.csv")
+        plot_dn_ds = report(REPORT_DIR_PLOTS/"dn_and_ds.png"),
+        plot_omega = report(REPORT_DIR_PLOTS/"dnds.png"),
     log:
         LOGDIR / "evo_plots" / "log.txt"
     script:
-        "../scripts/report/evo_plots.R"
+        "../scripts/report/dnds_plots.R"
 
 
-rule snp_plots:
+rule af_time_correlation_data:
     conda: "../envs/renv.yaml"
     params:
-        design = config["PLOTS"],
         cor_method = config["COR"]["METHOD"],
-        cor_exact = config["COR"]["EXACT"]
+        cor_exact = config["COR"]["EXACT"],
     input:
-        vcf =  OUTDIR/f"{OUTPUT_NAME}.masked.filtered.tsv",
-        metadata = config["METADATA"]
+        variants = OUTDIR/f"{OUTPUT_NAME}.variants.tsv",
+        metadata = config["METADATA"],
     output:
-        pseudovolcano = report(REPORT_DIR_PLOTS/"figure_6.png"),
-        snp_panel = report(REPORT_DIR_PLOTS/"figure_7.png"),
-        table_1 = report(REPORT_DIR_TABLES/"figure_6.csv"),
-        table_2 = report(REPORT_DIR_TABLES/"figure_7.csv")
+        fmt_variants = temp(REPORT_DIR_TABLES/"variants.filled.dated.tsv"),
+        correlations = report(REPORT_DIR_TABLES/"af_time_correlation.csv"),
+        subset = REPORT_DIR_TABLES/"af_time_correlation.subset.txt",
     log:
-        LOGDIR / "snp_plots" / "log.txt"
+        LOGDIR / "af_time_correlation_data" / "log.txt"
     script:
-        "../scripts/report/snp_plots.R"
+        "../scripts/report/af_time_correlation_data.R"
+
+
+rule af_time_correlation_plot:
+    conda: "../envs/renv.yaml"
+    params:
+        design = config["PLOTS"],
+        plot_height_mm = 119.4,
+        plot_width_mm = 159.2,
+    input:
+        correlations = REPORT_DIR_TABLES/"af_time_correlation.csv",
+    output:
+        plot = report(REPORT_DIR_PLOTS/"af_time_correlation.png"),
+    log:
+        LOGDIR / "af_time_correlation_plot" / "log.txt"
+    script:
+        "../scripts/report/af_time_correlation_plot.R"
+
+
+rule af_trajectory_panel_plot:
+    conda: "../envs/renv.yaml"
+    params:
+        design = config["PLOTS"],
+        n_plot_columns = 3,
+        plot_row_height_mm = 35,
+        plot_width_mm = 159.2,
+        random_color_seed = 7291,
+    input:
+        fmt_variants = REPORT_DIR_TABLES/"variants.filled.dated.tsv",
+        subset = REPORT_DIR_TABLES/"af_time_correlation.subset.txt"
+    output:
+        plot = report(REPORT_DIR_PLOTS/"af_trajectory_panel.png"),
+    log:
+        LOGDIR / "af_trajectory_panel_plot" / "log.txt"
+    script:
+        "../scripts/report/af_trajectory_panel_plot.R"
 
 
 rule summary_table:
@@ -163,7 +398,7 @@ rule summary_table:
         report = report(OUTDIR/f"{OUTPUT_NAME}.lineage_report.csv"),
         metadata = config["METADATA"]
     output:
-        table = temp(OUTDIR/"summary_table.csv")
+        table = REPORT_DIR_TABLES/"summary_table.csv"
     log:
         LOGDIR / "summary_table" / "log.txt"
     script:
@@ -172,67 +407,77 @@ rule summary_table:
 
 rule report:
     conda: "../envs/quarto_render.yaml"
-    shadow: "shallow"
+    shadow: "minimal"
     input:
         qmd        = Path(config["REPORT_QMD"]).resolve(),
-        freyja     = report(REPORT_DIR_PLOTS/"figure_1.png"),
-        tree_ml    = report(REPORT_DIR_PLOTS/"figure_2.png"),
-        diversity  = report(REPORT_DIR_PLOTS/"figure_3.png"),
-        fig_cor    = report(REPORT_DIR_PLOTS/"figure_4.png"),
-        SNV        = report(REPORT_DIR_PLOTS/"figure_5a.png"),
-        SNV_spike  = report(REPORT_DIR_PLOTS/"figure_5b.png"),
-        volcano    = report(REPORT_DIR_PLOTS/"figure_6.png"),
-        panel      = report(REPORT_DIR_PLOTS/"figure_7.png"),
-        tree       = report(REPORT_DIR_PLOTS/"figure_8.png"),
-        temest     = report(REPORT_DIR_PLOTS/"figure_9.png"),
-        heat_table = report(REPORT_DIR_TABLES/"figure_10.csv"),
-        evo        = report(REPORT_DIR_PLOTS/"figure_11.png"),
-        omega_plot = report(REPORT_DIR_PLOTS/"figure_12.png"),
-        value      = OUTDIR/"diversity.json",
-        stats_lm   = OUTDIR/"stats.lm.json",
-        table      = OUTDIR/"summary_table.csv",
-        sum_nv     = OUTDIR/"summary_nv.json"
+        css        = Path(config["REPORT_CSS"]).resolve(),
+        demix      = report(REPORT_DIR_PLOTS/"demix.png"),
+        tree_ml    = report(REPORT_DIR_PLOTS/"context_phylogeny.png"),
+        diversity  = report(REPORT_DIR_PLOTS/"diversity.png"),
+        fig_cor    = report(REPORT_DIR_PLOTS/"polymorphic_sites_over_time.png"),
+        SNV        = report(REPORT_DIR_PLOTS/"nv_panel.png"),
+        SNV_spike  = report(REPORT_DIR_PLOTS/"nv_panel.S.png"),
+        volcano    = report(REPORT_DIR_PLOTS/"af_time_correlation.png"),
+        panel      = report(REPORT_DIR_PLOTS/"af_trajectory_panel.png"),
+        tree       = report(REPORT_DIR_PLOTS/"allele_freq_tree.png"),
+        temest     = report(REPORT_DIR_PLOTS/"time_signal.png"),
+        evo        = report(REPORT_DIR_PLOTS/"dn_and_ds.png"),
+        omega_plot = report(REPORT_DIR_PLOTS/"dnds.png"),
+        heat_table = report(OUTDIR/"vaf"/"pairwise_trajectory_correlation.csv"),
+        freyja_ts  = OUTDIR/"demixing"/"freyja_data"/"last_barcode_update.txt",
+        value      = REPORT_DIR_TABLES/"diversity.json",
+        stats_lm   = REPORT_DIR_TABLES/"time_signal.json",
+        stats_ml   = REPORT_DIR_TABLES/"context_phylogeny.json",
+        table      = REPORT_DIR_TABLES/"summary_table.csv",
+        sum_nv     = REPORT_DIR_TABLES/"nv_panel_summary.json",
     params:
         workflow_version = get_repo_version(BASE_PATH.as_posix(), __version__),
-        min_ivar_freq = config["VC"]["IVAR_FREQ"],
-        ufboot_reps = config["UFBOOT_REPS"],
-        shalrt_reps = config["SHALRT_REPS"],
+        min_ivar_freq = config["VC"]["MIN_FREQ"],
+        ufboot_reps = config["UFBOOT"]["REPS"],
+        shalrt_reps = config["SHALRT"]["REPS"],
         name = config["OUTPUT_NAME"],
         use_bionj = config["USE_BIONJ"],
-        cor_method = config["COR"]["METHOD"]
+        cor_method = config["COR"]["METHOD"],
     output:
         html = report(OUTDIR/f"{OUTPUT_NAME}.report.html")
     log:
         LOGDIR / "report" / "log.txt"
     shell:
-        "set +o pipefail; "
-        "Rscript -e 'library(quarto)' "
-        "-e \"quarto_render("
-            "input = '{input.qmd}', "
-            "execute_params=list("
-                "ufboot_reps='{params.ufboot_reps}', "
-                "shalrt_reps='{params.shalrt_reps}', "
-                "min_ivar_freq='{params.min_ivar_freq}', "
-                "workflow_version='{params.workflow_version}', "
-                "use_bionj='{params.use_bionj}', "
-                "cor_method='{params.cor_method}', "
-                "div='{input.diversity}', "
-                "freyja ='{input.freyja}', "
-                "tree = '{input.tree}', "
-                "tempest = '{input.temest}', "
-                "SNV = '{input.SNV}', "
-                "SNV_s = '{input.SNV_spike}', "
-                "evo = '{input.evo}', "
-                "div_value = '{input.value}', "
-                "panel = '{input.panel}', "
-                "volcano = '{input.volcano}', "
-                "tree_ml = '{input.tree_ml}', "
-                "fig_cor_snp = '{input.fig_cor}', "
-                "stats_lm = '{input.stats_lm}', "
-                "table = '{input.table}', "
-                "sum_nv = '{input.sum_nv}', "
-                "heat_tab = '{input.heat_table}', "
-                "omega_plot = '{input.omega_plot}', "
-                "name = '{params.name}'))\" "
-        ">{log} 2>&1 && "
-        'mv "$(dirname {input.qmd:q})/report.html" {output.html:q}'
+        """
+        set +o pipefail
+        exec >{log} && exec 2>&1
+
+        printf "%s\n" \
+            "ufboot_reps: '{params.ufboot_reps}'" \
+            "shalrt_reps: '{params.shalrt_reps}'" \
+            "min_ivar_freq: '{params.min_ivar_freq}'" \
+            "workflow_version: '{params.workflow_version}'" \
+            "use_bionj: '{params.use_bionj}'" \
+            "cor_method: '{params.cor_method}'" \
+            "div: '{input.diversity}'" \
+            "demix: '{input.demix}'" \
+            "tree: '{input.tree}'" \
+            "tempest: '{input.temest}'" \
+            "SNV: '{input.SNV}'" \
+            "SNV_s: '{input.SNV_spike}'" \
+            "evo: '{input.evo}'" \
+            "div_value: '{input.value}'" \
+            "panel: '{input.panel}'" \
+            "volcano: '{input.volcano}'" \
+            "tree_ml: '{input.tree_ml}'" \
+            "fig_cor_snp: '{input.fig_cor}'" \
+            "stats_lm: '{input.stats_lm}'" \
+            "stats_ml: '{input.stats_ml}'" \
+            "table: '{input.table}'" \
+            "sum_nv: '{input.sum_nv}'" \
+            "heat_tab: '{input.heat_table}'" \
+            "omega_plot: '{input.omega_plot}'" \
+            "freyja_ts: '{input.freyja_ts}'" \
+            "name: '{params.name}'" \
+            >params.yaml
+
+        sed "s|__CSSPLACEHOLDER__|{input.css}|g" {input.qmd:q} >report.qmd
+
+        quarto render report.qmd --execute-params params.yaml
+        mv report.html {output.html:q}
+        """
diff --git a/workflow/rules/vaf.smk b/workflow/rules/vaf.smk
index bd16219..2a2560b 100644
--- a/workflow/rules/vaf.smk
+++ b/workflow/rules/vaf.smk
@@ -1,20 +1,20 @@
 rule snps_to_ancestor:
-    threads: 1
+    threads: 2
     retries: 3
     shadow: "minimal"
     conda: "../envs/var_calling.yaml"
     params:
-        max_depth = config["VC"]["MAX_DEPTH"],
-        min_quality = config["VC"]["MIN_QUALITY"],
-        ivar_quality = config["VC"]["IVAR_QUALITY"],
-        ivar_freq = config["VC"]["IVAR_FREQ"],
-        ivar_depth = config["VC"]["IVAR_DEPTH"]
+        mpileup_depth = config["VC"]["MAX_DEPTH"],
+        mpileup_quality = 0,
+        ivar_quality = config["VC"]["MIN_QUALITY"],
+        ivar_freq = config["VC"]["MIN_FREQ"],
+        ivar_depth = config["VC"]["MIN_DEPTH"],
     input:
         reference_fasta = OUTDIR/f"{OUTPUT_NAME}.ancestor.fasta",
         bam = get_input_bam,
         gff = OUTDIR/"reference.gff3"
     output:
-        tsv = temp(OUTDIR/"{sample}.tsv")
+        tsv = temp(OUTDIR/"vaf"/"{sample}.tsv")
     log:
         LOGDIR / "snps_to_ancestor" / "{sample}.log.txt"
     shell:
@@ -35,58 +35,20 @@ rule snps_to_ancestor:
         samtools mpileup \
             -aa \
             --ignore-overlaps \
-            -d {params.max_depth} \
+            -d {params.mpileup_depth} \
             --count-orphans \
             --no-BAQ \
-            -Q {params.min_quality} \
+            -Q {params.mpileup_quality} \
             -f renamed_reference.fasta \
             {input.bam} \
             | ivar variants \
-                -p {wildcards.sample} \
+                -p variants \
                 -q {params.ivar_quality} \
                 -t {params.ivar_freq} \
                 -m {params.ivar_depth} \
                 -g {input.gff} \
                 -r renamed_reference.fasta
-
-        sed 's/'$ref'/'{wildcards.sample}'/g' {wildcards.sample}.tsv | cat > {output.tsv}
-        """
-
-
-rule annotation:
-    threads:1
-    conda: "../envs/biopython.yaml"
-    shadow: "shallow"
-    input:
-        gb = OUTDIR/"reference.gb",
-        ref = OUTDIR/"reference.fasta",
-        features = Path(config["FEATURES_JSON"]).resolve()
-    output:
-        df = temp(OUTDIR/"annotation.csv")
-    log:
-        LOGDIR / "annotation" / "log.txt"
-    script:
-        "../scripts/report/get_annotation.py"
-
-
-rule format_tsv:
-    threads:1
-    shadow: "shallow"
-    input:
-        expand(OUTDIR/"{sample}.tsv", sample = iter_samples())
-    output:
-        tsv = OUTDIR/f"{OUTPUT_NAME}.tsv"
-    log:
-        LOGDIR / "format_tsv" / "log.txt"
-    shell:
-        """
-        path=`echo {input} | awk '{{print $1}}'`
-        grep "^REGION" "$path" > header
-        for tsv in {input}; do
-            tail -n +2 "$tsv"  >> body
-        done
-        cat header body > "{output.tsv}"
-        rm header
+        mv variants.tsv {output.tsv:q}
         """
 
 
@@ -96,12 +58,12 @@ rule mask_tsv:
     params:
          mask_class = ["mask"]
     input: 
-        tsv = OUTDIR/f"{OUTPUT_NAME}.tsv",
+        tsv = OUTDIR/"vaf"/"{sample}.tsv",
         vcf = lambda wildcards: select_problematic_vcf()
     output:
-        masked_tsv = temp(OUTDIR/f"{OUTPUT_NAME}.masked.tsv")
+        masked_tsv = temp(OUTDIR/"vaf"/"{sample}.masked.tsv")
     log:
-        LOGDIR / "mask_tsv" / "log.txt"
+        LOGDIR / "mask_tsv" / "{sample}.log.txt"
     script:
         "../scripts/mask_tsv.py"
 
@@ -109,13 +71,16 @@ rule mask_tsv:
 rule filter_tsv:
     threads: 1
     conda: "../envs/renv.yaml"
+    params:
+        min_depth = 20,
+        min_alt_rv = 2,
+        min_alt_dp = 2,
     input: 
-        tsv = OUTDIR/f"{OUTPUT_NAME}.masked.tsv",
-        annotation = OUTDIR/"annotation.csv"
+        tsv = OUTDIR/"vaf"/"{sample}.masked.tsv"
     output:
-        filtered_tsv = temp(OUTDIR/f"{OUTPUT_NAME}.masked.prefiltered.tsv")
+        filtered_tsv = temp(OUTDIR/"vaf"/"{sample}.masked.prefiltered.tsv")
     log:
-        LOGDIR / "filter_tsv" / "log.txt"
+        LOGDIR / "filter_tsv" / "{sample}.log.txt"
     script:
         "../scripts/filter_tsv.R"
 
@@ -123,12 +88,14 @@ rule filter_tsv:
 rule tsv_to_vcf:
     threads: 1
     conda: "../envs/biopython.yaml"
+    params:
+        ref_name = config["ALIGNMENT_REFERENCE"],
     input: 
-        tsv = OUTDIR/f"{OUTPUT_NAME}.masked.prefiltered.tsv",
+        tsv = OUTDIR/"vaf"/"{sample}.masked.prefiltered.tsv",
     output:
-        vcf = temp(OUTDIR/f"{OUTPUT_NAME}.vcf")
+        vcf = temp(OUTDIR/"vaf"/"{sample}.vcf")
     log:
-        LOGDIR / "tsv_to_vcf" / "log.txt"
+        LOGDIR / "tsv_to_vcf" / "{sample}.log.txt"
     script:
         "../scripts/tsv_to_vcf.py"
 
@@ -141,11 +108,11 @@ rule variants_effect:
         ref_name = config["ALIGNMENT_REFERENCE"],
         snpeff_data_dir = (BASE_PATH / "config" / "snpeff").resolve()
     input:
-        vcf = OUTDIR/f"{OUTPUT_NAME}.vcf"
+        vcf = OUTDIR/"vaf"/"{sample}.vcf"
     output:
-        ann_vcf = temp(OUTDIR/f"{OUTPUT_NAME}.annotated.vcf")
+        ann_vcf = OUTDIR/"vaf"/"{sample}.annotated.vcf"
     log:
-        LOGDIR / "variants_effect" / "log.txt"
+        LOGDIR / "variants_effect" / "{sample}.log.txt"
     retries: 2
     shell:
         """
@@ -159,19 +126,93 @@ rule variants_effect:
             echo "Local database not found at '{params.snpeff_data_dir}', downloading from repository"
         fi
 
-        snpEff eff -dataDir {params.snpeff_data_dir} -noStats {params.ref_name} {input.vcf} > {output.ann_vcf}
+        snpEff eff -dataDir {params.snpeff_data_dir} -noStats {params.ref_name} {input.vcf} >{output.ann_vcf}
         """
 
 
-rule vcf_to_tsv:
+rule extract_vcf_fields:
     threads: 1
+    conda: "../envs/snpeff.yaml"
+    params:
+        extract_columns = [f"'{col}'" for col in config["ANNOTATION"]["SNPEFF_COLS"].values()],
+        sep = ",",
+    input:
+        vcf = OUTDIR/"vaf"/"{sample}.annotated.vcf"
+    output:
+        tsv = OUTDIR/"vaf"/"{sample}.vcf_fields.tsv"
+    log:
+        LOGDIR / "tsv_to_vcf" / "{sample}.log.txt"
+    shell:
+        "SnpSift extractFields -e 'NA' -s {params.sep:q} {input.vcf:q} {params.extract_columns} >{output.tsv:q} 2>{log:q}"
+
+
+rule format_vcf_fields_longer:
+    conda: "../envs/renv.yaml"
+    params:
+        sample = "{sample}",
+        colnames_mapping = config["ANNOTATION"]["SNPEFF_COLS"],
+        filter_include = config["ANNOTATION"]["FILTER_INCLUDE"],
+        filter_exclude = config["ANNOTATION"]["FILTER_EXCLUDE"],
+        variant_name_pattern = lambda wildcards: config["ANNOTATION"]["VARIANT_NAME_PATTERN"],  # lambda to deactivate automatic wildcard expansion in pattern
+        sep = ",",
+    input:
+        tsv = OUTDIR/"vaf"/"{sample}.vcf_fields.tsv",
+    output:
+        tsv = OUTDIR/"vaf"/"{sample}.vcf_fields.longer.tsv",
+    log:
+        LOGDIR / "format_vcf_fields_longer" / "{sample}.log.txt"
+    script:
+        "../scripts/format_vcf_fields_longer.R"
+
+
+rule concat_vcf_fields:
+    params:
+        sep = "\t",
+    input:
+        expand(OUTDIR/"vaf"/"{sample}.vcf_fields.longer.tsv", sample=iter_samples()),
+    output:
+        OUTDIR/f"{OUTPUT_NAME}.vcf_fields.longer.tsv",
+    run:
+        import pandas as pd
+        from functools import reduce
+        reduce(
+            lambda a, b: pd.concat((a, b), axis="rows", ignore_index=True),
+            (pd.read_csv(path, sep=params.sep) for path in input)
+        ).to_csv(output[0], sep=params.sep, index=False)
+
+
+rule merge_annotation:
+    threads: 1
+    conda: "../envs/renv.yaml"
+    params:
+        sample = "{sample}",
+        ref_name = config["ALIGNMENT_REFERENCE"],
+    input:
+        tsv = OUTDIR/"vaf"/"{sample}.masked.prefiltered.tsv",
+        annot = OUTDIR/"vaf"/"{sample}.vcf_fields.longer.tsv",
+    output:
+        tsv = OUTDIR/"vaf"/"{sample}.variants.tsv"
+    log:
+        LOGDIR / "merge_annotation" / "{sample}.log.txt"
+    script:
+        "../scripts/merge_annotation.R"
+
+
+use rule concat_vcf_fields as concat_variants with:
+    input:
+        expand(OUTDIR/"vaf"/"{sample}.variants.tsv", sample=iter_samples()),
+    output:
+        OUTDIR/f"{OUTPUT_NAME}.variants.tsv",
+
+
+rule pairwise_trajectory_correlation:
     conda: "../envs/renv.yaml"
     input:
-        ann_vcf = OUTDIR/f"{OUTPUT_NAME}.annotated.vcf",
-        pre_tsv = OUTDIR/f"{OUTPUT_NAME}.masked.prefiltered.tsv"
+        variants =  OUTDIR/f"{OUTPUT_NAME}.variants.tsv",
+        metadata = config["METADATA"],
     output:
-        tsv = OUTDIR/f"{OUTPUT_NAME}.masked.filtered.tsv"
+        table = report(OUTDIR/"vaf"/"pairwise_trajectory_correlation.csv"),
     log:
-        LOGDIR / "vcf_to_tsv" / "log.txt"
+        LOGDIR / "pairwise_trajectory_correlation" / "log.txt"
     script:
-        "../scripts/vcf_to_tsv.R"
+        "../scripts/pairwise_trajectory_correlation.R"
diff --git a/workflow/scripts/N_S_sites_from_fasta.py b/workflow/scripts/N_S_sites_from_fasta.py
deleted file mode 100644
index e98ac94..0000000
--- a/workflow/scripts/N_S_sites_from_fasta.py
+++ /dev/null
@@ -1,92 +0,0 @@
-#!/usr/bin/env python3
-
-import logging
-import json
-import itertools as it
-import pandas as pd
-from gb2seq.alignment import Gb2Alignment
-from gb2seq.features import Features
-from dark.fasta import FastaReads
-
-
-def split_into_codons(seq: str) -> list:
-    """Split the complete CDS feature in to a list of codons"""
-    codons = [seq[i:i + 3] for i in range(0, len(seq), 3) if "N" not in seq[i:i + 3]]
-    return codons
-
-
-def calculate_potential_changes(genetic_code: dict) -> dict:
-    """Generate a dictionary with S and N pre-calculated for all possible codons"""
-    # Initialize structure
-    nts = set(["A", "G", "T", "C"])
-    potential_changes = {"S": {}, "N": {}}
-    for codon in it.product(nts, repeat=3):
-        potential_changes["S"]["".join(codon)] = 0.
-        potential_changes["N"]["".join(codon)] = 0.
-    # Mutate (substitutions) all possible codons in the given genetic code
-    # and count proportions of mutations that are synonymous and non-synonmyous
-    for codon in genetic_code.keys():
-        for codon_p in range(0, 3):
-            nts = ["A", "G", "T", "C"]
-            # Do not consider self substitutions, e.g. A->A
-            nts.remove(codon[codon_p]) 
-            for nt in nts:
-                codon_mutated = list(codon)
-                # Mutate the basepair
-                codon_mutated[codon_p] = nt
-                # Count how many of them are synonymous
-                if genetic_code[codon] == genetic_code["".join(codon_mutated)]:
-                    potential_changes["S"][codon] += 1/3.
-                else:
-                    potential_changes["N"][codon] += 1/3.
-    return potential_changes
-
-
-def get_feature_codons(alignment: Gb2Alignment, annotation: list) -> dict:
-    dct = {key:alignment.ntSequences(key)[1].sequence for key in annotation}
-    return {key:split_into_codons(item) for key,item in dct.items()}
-
-
-def get_df(codons: dict, genetic_code: dict) -> pd.DataFrame:
-    keys = []
-    N_sites = []
-    S_sites = []
-    values = calculate_potential_changes(genetic_code)
-    for key, item in codons.items():
-        keys.append(key)
-        N = sum([values["N"][x] for x in item if x in values["N"].keys()])
-        S = sum([values["S"][x] for x in item if x in values["S"].keys()])
-        N_sites.append(N)
-        S_sites.append(S) 
-    return pd.DataFrame({"gene": keys, "N": N_sites, "S": S_sites})
-
-
-def main():
-
-    logging.basicConfig(filename=snakemake.log[0], format=snakemake.config["LOG_PY_FMT"], level=logging.INFO)
-    
-    logging.info("Reading features")
-    with open(snakemake.input.features) as f:
-        feature_list = list(json.load(f).keys())
-
-    logging.info("Reading genetic code")
-    with open(snakemake.input.genetic_code) as f:
-        genetic_code = json.load(f)
-    
-    logging.info("Create alignment object")
-    features = Features(snakemake.input.gb)
-    seq = list(FastaReads(snakemake.input.fasta))[0]
-    aln = Gb2Alignment(seq, features)
-
-    logging.info("Splitting ancestral sequence into codons")
-    codons_dict = get_feature_codons(aln, feature_list)
-
-    logging.info("Calculating synonymous and non synonymous sites")
-    df = get_df(codons_dict, genetic_code)
-
-    logging.info("Saving results")
-    df.to_csv(snakemake.output.csv,index= False)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/workflow/scripts/calculate_dnds.R b/workflow/scripts/calculate_dnds.R
new file mode 100644
index 0000000..f545c27
--- /dev/null
+++ b/workflow/scripts/calculate_dnds.R
@@ -0,0 +1,71 @@
+#!/usr/bin/env Rscript
+
+# Write stdout and stderr to log file
+log <- file(snakemake@log[[1]], open = "wt")
+sink(log, type = "message")
+sink(log, type = "output")
+
+library(dplyr)
+library(readr)
+library(tidyr)
+library(logger)
+
+log_threshold(INFO)
+
+# Read inputs
+log_info("Reading variants table")
+variants <- read_delim(
+  snakemake@input[["variants"]],
+  col_select = c(
+    "SAMPLE",
+    "VARIANT_NAME",
+    "REGION",
+    "ALT_FREQ",
+    "SYNONYMOUS",
+    "POS"
+  )
+)
+
+log_info("Reading metadata table")
+metadata <- read_delim(snakemake@input[["metadata"]]) %>%
+  mutate(
+    interval = as.numeric(
+      as.Date(CollectionDate) - min(as.Date(CollectionDate))
+    )
+  ) %>%
+  select(ID, interval) %>%
+  rename(SAMPLE = ID)
+
+log_debug("Adding metadata to variants table")
+variants <- left_join(variants, metadata)
+
+log_info("Reading N/S sites")
+n_s_position <- read_delim(snakemake@input[["n_s_sites"]])
+
+log_info("Computing dN/dS over time (NG86)")
+dn.ds <- variants %>%
+  group_by(SAMPLE, SYNONYMOUS) %>%
+  summarise(
+    Freq = sum(ALT_FREQ, na.rm = TRUE)
+  ) %>%
+  pivot_wider(
+    names_from = SYNONYMOUS,
+    values_from = Freq,
+    values_fill = 0
+  ) %>%
+  transmute(
+    dn = No / sum(n_s_position$N),
+    ds = Yes / sum(n_s_position$S)
+  ) %>%
+  ungroup() %>%
+  left_join(unique(select(variants, SAMPLE, interval))) %>%
+  transmute(
+    sample = SAMPLE,
+    day = interval,
+    dN = dn,
+    dS = ds,
+    w = dn / ds
+  )
+
+log_info("Writing results")
+write_csv(dn.ds, snakemake@output[["table"]])
diff --git a/workflow/scripts/download_context.R b/workflow/scripts/download_context.R
index fefc7f6..8f0e119 100644
--- a/workflow/scripts/download_context.R
+++ b/workflow/scripts/download_context.R
@@ -6,9 +6,12 @@ sink(log, type = "message")
 sink(log, type = "output")
 
 library(GISAIDR)
-library(tidyverse)
+library(dplyr)
+library(readr)
 library(glue)
+library(lubridate)
 library(logger)
+
 log_threshold(INFO)
 
 CHKPT.ERROR.MSG <- paste(
@@ -34,9 +37,7 @@ needed.columns <- c(
 )
 needed.columns.mask <- needed.columns %in% colnames(sample.metadata)
 if (!all(needed.columns.mask)) {
-  print(glue(
-    "Please ensure column '{needed.columns[!needed.columns.mask]}' is present"
-  ))
+  log_error("Please ensure column '{needed.columns[!needed.columns.mask]}' is present")
   stop(glue(
     "Missing columns in '{snakemake@input[['metadata']]}'. Alternatively:\n{CHKPT.ERROR.MSG}"
   ))
@@ -51,24 +52,20 @@ log_info("Getting time windows for context samles")
 # Get time windows
 dates <- sample.metadata %>%
   pull(snakemake@params[["date_column"]]) %>%
-  as.numeric
+  as.numeric()
 window.quantile.offset <- (1 - snakemake@params[["date_window_span"]]) / 2
 min.date <- as_date(quantile(dates, window.quantile.offset, na.rm = TRUE))
 max.date <- as_date(quantile(dates, 1 - window.quantile.offset, na.rm = TRUE))
 padded.min.date <- min.date - snakemake@params[["date_window_paddding_days"]]
 padded.max.date <- max.date + snakemake@params[["date_window_paddding_days"]]
-print(glue(
-  "Time window (span={snakemake@params[['date_window_span']]}): {round(interval(min.date, max.date) / days(1))} days (from {min.date} to {max.date})"
-))
-print(glue(
-  "Padded time window (padding={snakemake@params[['date_window_paddding_days']]} days): {round(interval(padded.min.date, padded.max.date) / days(1))} days (from {padded.min.date} to {padded.max.date})"
-))
+log_info("Time window (span={snakemake@params[['date_window_span']]}): {round(interval(min.date, max.date) / days(1))} days (from {min.date} to {max.date})")
+log_info("Padded time window (padding={snakemake@params[['date_window_paddding_days']]} days): {round(interval(padded.min.date, padded.max.date) / days(1))} days (from {padded.min.date} to {padded.max.date})")
 
 log_info("Getting locations for context samples")
 # Get locations (if there are multiple, sample from all of them)
 locations <- sample.metadata %>%
   pull(snakemake@params[["location_column"]]) %>%
-  unique
+  unique()
 
 # GISAID login
 creds.list <- yaml::read_yaml(snakemake@params[["gisaid_creds"]])
@@ -105,7 +102,7 @@ samples.accids <- sample.metadata %>%
   pull(snakemake@params[["samples_gisaid_accession_column"]])
 filtered.accids <- metadata %>% filter(accession_id %in% samples.accids)
 metadata <- metadata %>% filter(!accession_id %in% samples.accids)
-print(glue("{nrow(metadata)} accession_ids remaining after GISAID ID filter"))
+log_info("{nrow(metadata)} accession_ids remaining after GISAID ID filter")
 
 # Checkpoint: enforce a minimum number of samples to have at least
 # as many possible combinations as random subsample replicates.
diff --git a/workflow/scripts/extract_afwdist_variants.py b/workflow/scripts/extract_afwdist_variants.py
new file mode 100644
index 0000000..6a7bf3f
--- /dev/null
+++ b/workflow/scripts/extract_afwdist_variants.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+
+import logging
+from typing import List
+
+import pandas as pd
+from Bio import SeqIO
+from Bio.SeqRecord import SeqRecord
+from Bio.Seq import Seq
+
+
+def read_monofasta(path: str) -> SeqRecord:
+    fasta = SeqIO.parse(path, "fasta")
+    record = next(fasta)
+    if next(fasta, None) is not None:
+        logging.warning(f"There are unread records left in '{path}'")
+    return record
+
+
+def read_masked_sites(vcf_path: str, mask_classes: List[str]) -> List[int]:
+    """
+    Parse a VCF containing positions for masking. Assumes the VCF file is
+    formatted as in:
+    github.com/W-L/ProblematicSites_SARS-CoV2/blob/master/problematic_sites_sarsCov2.vcf
+    with a "mask" or "caution" recommendation in column 7.
+    Masked sites are specified with params.
+    """
+    vcf = pd.read_csv(
+        vcf_path,
+        sep="\s+",
+        comment="#",
+        names=("CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO")
+    )
+    return vcf.loc[vcf.FILTER.isin(mask_classes), "POS"].tolist()
+
+
+def build_ancestor_variant_table(ancestor: Seq, reference: Seq, reference_name: str, masked_positions: List[int]) -> pd.DataFrame:
+    pos = []
+    alt = []
+    for i in range(1, len(ancestor) + 1):
+        if i not in masked_positions and ancestor[i-1] != reference[i-1]:
+            pos.append(i)
+            alt.append(reference[i-1])
+    df = pd.DataFrame({snakemake.params.position_col: pos, snakemake.params.sequence_col: alt})
+    df[snakemake.params.frequency_col] = 1  # As a reference genome, we assume all positions have fixed alleles
+    df[snakemake.params.sample_col] = reference_name
+    return df
+
+
+DTYPES = {
+    "sample": "object",
+    "position": "int64",
+    "sequence": "object",
+    "frequency": "float64"
+}
+
+
+if __name__ == "__main__":
+
+    logging.basicConfig(filename=snakemake.log[0], format=snakemake.config["LOG_PY_FMT"], level=logging.INFO)
+
+    colnames = {
+        snakemake.params.sample_col: "sample",
+        snakemake.params.position_col: "position",
+        snakemake.params.sequence_col: "sequence",
+        snakemake.params.frequency_col: "frequency"
+    }
+
+    logging.info("Reading input tables")
+    # Variants
+    variants = pd.read_table(snakemake.input.variants, sep="\t")
+    logging.info(f"Read {len(variants)} variant records")
+    # VCF with sites to mask
+    masked_sites = read_masked_sites(snakemake.input.mask_vcf, snakemake.params.mask_class)
+    logging.info(f"Read {len(masked_sites)} masked positions")
+
+    logging.info("Reading input FASTA files")
+    # Case ancestor
+    ancestor = read_monofasta(snakemake.input.ancestor)
+    logging.info(f"Ancestor: '{ancestor.description}', length={len(ancestor.seq)}")
+    # Alignment reference
+    reference = read_monofasta(snakemake.input.reference)
+    logging.info(f"Reference: '{reference.description}', length={len(reference.seq)}")
+
+    logging.info("Processing ancestor variants")
+    ancestor_table = build_ancestor_variant_table(ancestor.seq, reference.seq, reference.id, masked_sites)
+    logging.info(f"Ancestor has {len(ancestor_table)} variants")
+    all_variants = pd.concat([variants, ancestor_table], ignore_index=True)
+    logging.info(f"Combined table has {len(all_variants)} variants")
+
+    logging.info("Renaming and selecting columns")
+    output = (
+        all_variants
+        .rename(columns=colnames)
+        [list(colnames.values())]
+        .astype(DTYPES)
+    )
+    logging.info("Filtering sites")
+    output = output[~output.position.isin(masked_sites)]
+    logging.info(f"There are {len(output)} rows left")
+    
+    logging.info("Writing results")
+    output.to_csv(snakemake.output.variants, index=False)
diff --git a/workflow/scripts/filter_genbank_features.py b/workflow/scripts/filter_genbank_features.py
new file mode 100644
index 0000000..959a24e
--- /dev/null
+++ b/workflow/scripts/filter_genbank_features.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+
+import logging
+from typing import Dict, Iterable
+
+from Bio import SeqIO
+from Bio.SeqFeature import SeqFeature
+
+
+def iter_features_filtering(features: Iterable[SeqFeature], included: Dict[str, str], excluded: Dict[str, str]) -> Iterable[SeqFeature]:
+    # No filters
+    if len(included) == 0 and len(excluded) == 0:
+        logging.debug("Selecting all features")
+        return iter(features)
+    # Only inclusion filter
+    elif len(included) == 0 and len(excluded) != 0:
+        logging.debug(f"Selecting features excluding all of {excluded}")
+        return (
+            feature for feature in features
+            if all(
+                (qualifier_value not in excluded.get(qualifier_key, []))
+                for qualifier_key in excluded.keys()
+                for qualifier_value in feature.qualifiers.get(qualifier_key, [])
+            )
+        )
+    # Only exclusion filter
+    elif len(included) != 0 and len(excluded) == 0:
+        logging.debug(f"Selecting features including any of {included}")
+        return (
+            feature for feature in features
+            if any(
+                (qualifier_value in included.get(qualifier_key, []))
+                for qualifier_key in included.keys()
+                for qualifier_value in feature.qualifiers.get(qualifier_key, [])
+            )
+        )
+    # Inclusion then exclusion filter
+    else:
+        logging.debug(f"Selecting features including any of {included} and then excluding all of {excluded}")
+        included_features = (
+            feature for feature in features
+            if any(
+                (qualifier_value in included.get(qualifier_key, []))
+                for qualifier_key in included.keys()
+                for qualifier_value in feature.qualifiers.get(qualifier_key, [])
+            )
+        )
+        return (
+            feature for feature in included_features
+            if all(
+                (qualifier_value not in excluded.get(qualifier_key, []))
+                for qualifier_key in excluded.keys()
+                for qualifier_value in feature.qualifiers.get(qualifier_key, [])
+            )
+        )
+
+
+if __name__ == "__main__":
+    logging.basicConfig(
+        filename=snakemake.log[0], format=snakemake.config["LOG_PY_FMT"],
+        level=logging.INFO
+    )
+
+    logging.info("Reading GenBank file")
+    gb = SeqIO.read(snakemake.input.gb, format="gb")
+
+    logging.info("Extracting CDS")
+    features = []
+    for feature in iter_features_filtering(gb.features, snakemake.params.included, snakemake.params.excluded):
+        logging.debug(
+            "Processing SeqFeature: "
+            f"ID={feature.id} type={feature.type} location={feature.location} "
+            f"gene={feature.qualifiers.get('gene', [])} "
+            f"locus_tag={feature.qualifiers.get('locus_tag', [])} "
+            f"product={feature.qualifiers.get('product', [])}"
+        )
+        features.append(feature)
+    
+    logging.info("Replacing features")
+    gb.features = features
+
+    logging.info("Writing filtered GenBank files")
+    SeqIO.write(gb, snakemake.output.gb, "gb")
diff --git a/workflow/scripts/filter_tsv.R b/workflow/scripts/filter_tsv.R
index 55dc216..502ed78 100644
--- a/workflow/scripts/filter_tsv.R
+++ b/workflow/scripts/filter_tsv.R
@@ -5,38 +5,39 @@ log <- file(snakemake@log[[1]], open = "wt")
 sink(log, type = "message")
 sink(log, type = "output")
 
-library(tidyverse)
+library(dplyr)
+library(readr)
+library(stringr)
 library(logger)
+
 log_threshold(INFO)
 
 # Read inputs
 data <- read_tsv(snakemake@input[["tsv"]])
 
-# Filtering criteria:
-# - P-value < 0.05
-# - Depth >= 20
-# - For SNP more than 2 reads in each strand
+# Filtering
 is.deletion <- str_detect(
   data$ALT,
   "^[A-Z]",
   negate = TRUE
 )
-inBothStrands <- data$ALT_RV > 2 & data$ALT_DP > 2
-Depth <- (data$ALT_RV + data$ALT_DP) >= 20
+strand.mask <- data$ALT_RV > snakemake@params$min_alt_rv &
+  data$ALT_DP > snakemake@params$min_alt_dp
+depth.mask <- (data$ALT_RV + data$ALT_DP) >= snakemake@params$min_depth
 
 log_info("Filtering variants")
 data <- filter(
   data,
   as.logical(PASS),
-  Depth,
-  inBothStrands | is.deletion
+  depth.mask,
+  strand.mask | is.deletion
 )
 
 log_info("Finding synonymous and non synonymous variants")
 # Adding synonymous variable
 data <- mutate(
   data,
-  synonimous = case_when(
+  SYNONYMOUS = case_when(
     REF_AA == ALT_AA ~ "Yes",
     TRUE ~ "No"
   )
@@ -45,14 +46,6 @@ data <- mutate(
 # Remove duplicated features
 data <- distinct(data, pick(!GFF_FEATURE), .keep_all = TRUE)
 
-# Change annotation to gb2seq annotation
-features <- read_csv(snakemake@input[["annotation"]])
-
-data <- data %>%
-  select(!GFF_FEATURE) %>%
-  left_join(features) %>%
-  rename(GFF_FEATURE = GEN)
-
 log_info("Saving results")
 write_tsv(
   data,
diff --git a/workflow/scripts/format_afwdist_results.py b/workflow/scripts/format_afwdist_results.py
new file mode 100644
index 0000000..2f42ca6
--- /dev/null
+++ b/workflow/scripts/format_afwdist_results.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+
+import logging
+import pandas as pd
+
+
+if __name__ == "__main__":
+
+    logging.basicConfig(filename=snakemake.log[0], format=snakemake.config["LOG_PY_FMT"], level=logging.INFO)
+
+    logging.info("Read pairwise distances")
+    df = pd.read_csv(snakemake.input.distances)
+
+    logging.info("Initializing formatted output")
+    output = pd.DataFrame(
+        data=0.0,
+        columns=snakemake.params.samples,
+        index=snakemake.params.samples,
+        dtype="float64"
+    )
+
+    logging.info("Filling table")
+    for i, row in df.iterrows():
+        output.loc[row.sample_m, row.sample_n] = row.distance
+        output.loc[row.sample_n, row.sample_m] = row.distance
+    
+    logging.info("Writing formatted results")
+    output.to_csv(snakemake.output.distances)
diff --git a/workflow/scripts/format_vcf_fields_longer.R b/workflow/scripts/format_vcf_fields_longer.R
new file mode 100644
index 0000000..26c5512
--- /dev/null
+++ b/workflow/scripts/format_vcf_fields_longer.R
@@ -0,0 +1,75 @@
+#!/usr/bin/env Rscript
+
+# Write stdout and stderr to log file
+log <- file(snakemake@log[[1]], open = "wt")
+sink(log, type = "message")
+sink(log, type = "output")
+
+library(readr)
+library(dplyr)
+library(tidyr)
+library(stringr)
+library(purrr)
+library(rlang)
+library(glue)
+library(logger)
+
+empty.to.na <- function(x) {
+  x[x == ""] <- NA
+  x
+}
+
+# Replace "" values with NA in R filter list
+# Snakemake passes filters like: list(ERRORS = c(""))
+log_info("Preprocessing data")
+filter.include <- lapply(snakemake@params$filter_include, empty.to.na)
+filter.exclude <- lapply(snakemake@params$filter_exclude, empty.to.na)
+
+# Process input table
+log_info("Applying filters and writing results")
+read_tsv(snakemake@input$tsv) %>%
+
+  # Separate <sep>-delimited "...[*]..." columns (e.g. ANN[*].EFFECT)
+  separate_rows(
+    contains("[*]"),
+    sep = snakemake@params$sep,
+    convert = TRUE
+  ) %>%
+
+  # Rename "...[*]..." columns using the provided lookup via Snakemake config
+  rename(all_of(unlist(snakemake@params$colnames_mapping))) %>%
+
+  # Separate &-delimited error column (more than one error/warning/info message per row is possible)
+  mutate(split_errors = strsplit(ERRORS, "&")) %>%
+  # Keep rows with none of the excluded ERRORS terms, if any
+  filter(map_lgl(split_errors, ~ !any(. %in% filter.exclude[["ERRORS"]]))) %>%
+  select(-split_errors) %>%
+
+  # Apply filters
+  filter(
+    # Keep variants that include required values in each field
+    !!!map2(
+      names(filter.include),
+      filter.include,
+      ~ expr(.data[[!!.x]] %in% !!.y)
+    ),
+    # Keep variants that exclude required values in each field
+    !!!map2(
+      names(filter.exclude),
+      filter.exclude,
+      ~ expr(!(.data[[!!.x]] %in% !!.y))
+    )
+  ) %>%
+
+  # Keep unique rows
+  distinct() %>%
+
+  mutate(
+    # Assign variant name using the pattern defined via Snakemake config
+    VARIANT_NAME = str_glue(snakemake@params$variant_name_pattern),
+    # Assign sample name
+    SAMPLE = snakemake@params$sample
+  ) %>%
+
+  # Write output file
+  write_tsv(snakemake@output$tsv)
diff --git a/workflow/scripts/merge_annotation.R b/workflow/scripts/merge_annotation.R
new file mode 100644
index 0000000..9c322c4
--- /dev/null
+++ b/workflow/scripts/merge_annotation.R
@@ -0,0 +1,84 @@
+#!/usr/bin/env Rscript
+
+# Write stdout and stderr to log file
+log <- file(snakemake@log[[1]], open = "wt")
+sink(log, type = "message")
+sink(log, type = "output")
+
+library(dplyr)
+library(readr)
+library(logger)
+
+log_threshold(INFO)
+
+log_info("Reading variants table, replacing REGION with the reference name")
+variants <- read_tsv(
+  snakemake@input$tsv,
+  col_types = list(
+    REGION = col_character(),
+    POS = col_integer(),
+    REF = col_character(),
+    ALT = col_character()
+  )
+) %>%
+  mutate(
+    REGION = snakemake@params$ref_name,
+    is_insertion = startsWith(ALT, "+"),
+    is_deletion = startsWith(ALT, "-"),
+    REF_VCF = case_when(
+      is_deletion ~ paste0(REF, substring(ALT, 2)),
+      TRUE ~ REF
+    ),
+    ALT_VCF = case_when(
+      is_insertion ~ paste0(REF, substring(ALT, 2)),
+      is_deletion ~ REF,
+      TRUE ~ ALT
+    )
+  ) %>%
+  select(-is_insertion, -is_deletion)
+
+log_info("Reading annotation table")
+annotation <- read_tsv(
+  snakemake@input$annot,
+  col_select = c(
+    "CHROM",
+    "POS",
+    "REF",
+    "ALT",
+    "EFFECT",
+    "IMPACT",
+    "VARIANT_NAME"
+  ),
+  col_types = list(
+    CHROM = col_character(),
+    POS = col_integer(),
+    REF = col_character(),
+    ALT = col_character(),
+    VARIANT_NAME = col_character()
+  )
+) %>%
+  distinct() %>%
+  group_by(CHROM, POS, REF, ALT) %>%
+  mutate(VARIANT_NAME = paste(VARIANT_NAME, collapse = "|")) %>%
+  ungroup()
+
+log_info("Merging tables")
+merged <- left_join(
+  variants,
+  annotation,
+  by = c(
+    "REGION" = "CHROM",
+    "POS" = "POS",
+    "REF_VCF" = "REF",
+    "ALT_VCF" = "ALT"
+  )
+) %>%
+  mutate(
+    SAMPLE = snakemake@params$sample
+  )
+
+log_info("Saving results")
+write_tsv(
+  merged,
+  snakemake@output$tsv
+)
diff --git a/workflow/scripts/n_s_sites_from_fasta.py b/workflow/scripts/n_s_sites_from_fasta.py
new file mode 100644
index 0000000..cb7fa6f
--- /dev/null
+++ b/workflow/scripts/n_s_sites_from_fasta.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python3
+
+import logging
+import json
+import itertools as it
+from typing import Dict
+
+import pandas as pd
+from Bio import SeqIO
+from Bio.SeqRecord import SeqRecord
+from Bio.Seq import Seq
+
+
+NTS = ("A", "C", "G", "T")
+
+
+def read_monofasta(path: str) -> SeqRecord:
+    return SeqIO.read(path, format="fasta")
+
+
+def split_into_codons(seq: Seq) -> list:
+    """Split the complete CDS feature in to a list of codons"""
+    return [
+        seq[i:i + 3] for i in range(0, len(seq)-2, 3) if all(char in NTS for char in seq[i:i + 3])
+    ]
+
+
+def calculate_potential_changes(genetic_code: dict) -> dict:
+    """Generate a dictionary with S and N pre-calculated for all possible codons"""
+    # Initialize structure
+    potential_changes = {"S": {}, "N": {}}
+    for codon in it.product(NTS, repeat=3):
+        potential_changes["S"]["".join(codon)] = 0.0
+        potential_changes["N"]["".join(codon)] = 0.0
+    # Mutate (substitutions) all possible codons in the given genetic code
+    # and count proportions of mutations that are synonymous and non-synonmyous
+    for codon in genetic_code.keys():
+        for codon_p in range(0, 3):
+            nts = list(NTS)
+            # Do not consider self substitutions, e.g. A->A
+            nts.remove(codon[codon_p])
+            for nt in nts:
+                codon_mutated = list(codon)
+                # Mutate the basepair
+                codon_mutated[codon_p] = nt
+                # Count how many of them are synonymous
+                if genetic_code[codon] == genetic_code["".join(codon_mutated)]:
+                    potential_changes["S"][codon] += 1 / 3.0
+                else:
+                    potential_changes["N"][codon] += 1 / 3.0
+    return potential_changes
+
+
+def get_feature_codons(coding_records: Dict[str, SeqRecord]) -> dict:
+    return {name: split_into_codons(record.seq) for name, record in coding_records.items()}
+
+
+def calculate_ns_sites(codons: dict, genetic_code: dict) -> pd.DataFrame:
+    features = []
+    N_sites, S_sites = [], []
+    values = calculate_potential_changes(genetic_code)
+    for key, item in codons.items():
+        features.append(key)
+        N = sum([values["N"][x] for x in item if x in values["N"].keys()])
+        S = sum([values["S"][x] for x in item if x in values["S"].keys()])
+        N_sites.append(N)
+        S_sites.append(S)
+    return pd.DataFrame({"feature": features, "N": N_sites, "S": S_sites})
+
+
+def main():
+
+    logging.basicConfig(
+        filename=snakemake.log[0], format=snakemake.config["LOG_PY_FMT"],
+        level=logging.INFO
+    )
+
+    logging.info("Reading genetic code")
+    with open(snakemake.input.genetic_code) as f:
+        genetic_code = json.load(f)
+
+    logging.info("Reading GenBank file")
+    gb = SeqIO.read(snakemake.input.gb, format="gb")
+
+    logging.info("Reading input FASTA")
+    record = read_monofasta(snakemake.input.fasta)
+
+    logging.info("Extracting CDS")
+    coding_records = {}
+    for feature in gb.features:
+        identifier = "|".join(feature.qualifiers.get(snakemake.params.gb_qualifier_display, []))
+        if identifier == "":
+            logging.error(f"Feature at {feature.location} has no qualifier '{snakemake.params.gb_qualifier_display}'")
+        elif identifier in coding_records:
+            logging.warning(f"Identifier '{identifier}' is already among coding records and will not be replaced by feature at {feature.location}")
+        else:
+            logging.debug(f"Adding feature")
+            coding_records[identifier] = feature.extract(record)
+
+    logging.info(f"Splitting {len(coding_records)} records into codons")
+    codons = get_feature_codons(coding_records)
+
+    logging.info("Calculating synonymous and non synonymous sites")
+    df = calculate_ns_sites(codons, genetic_code)
+
+    logging.info("Saving results")
+    df.to_csv(snakemake.output.csv, index=False)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/workflow/scripts/pairwise_trajectory_correlation.R b/workflow/scripts/pairwise_trajectory_correlation.R
new file mode 100644
index 0000000..27a8dfc
--- /dev/null
+++ b/workflow/scripts/pairwise_trajectory_correlation.R
@@ -0,0 +1,41 @@
+#!/usr/bin/env Rscript
+
+# Write stdout and stderr to log file
+log <- file(snakemake@log[[1]], open = "wt")
+sink(log, type = "message")
+sink(log, type = "output")
+
+library(dplyr)
+library(readr)
+library(tidyr)
+library(tibble)
+library(stringr)
+library(logger)
+
+log_threshold(INFO)
+
+variants <- read_tsv(snakemake@input[["variants"]])
+
+# Obtain sample names ordered by CollectionDate
+date_order <- read_csv(snakemake@input[["metadata"]]) %>%
+  arrange(CollectionDate) %>%
+  pull(ID) %>%
+  unique()
+
+# Create SNP variable and select useful variables
+variants <- variants %>% select(VARIANT_NAME, SAMPLE, ALT_FREQ)
+
+variants <- variants %>%
+  pivot_wider(
+    names_from = VARIANT_NAME,
+    values_from = ALT_FREQ,
+    values_fill = 0,
+    values_fn = sum
+  ) %>%
+  arrange(factor(SAMPLE, levels = date_order)) %>%
+  # Removes "|"-separated annotations, keeping the first one + ellipsis (clarifies heatmap)
+  rename_with(~ str_replace(., "^([^|]+)\\|.*$", "\\1(...)"), -SAMPLE) %>%
+  column_to_rownames(var = "SAMPLE")
+
+log_info("Saving table to create heatmap")
+write.csv(variants, snakemake@output[["table"]])
diff --git a/workflow/scripts/report/NV_description.R b/workflow/scripts/report/NV_description.R
deleted file mode 100644
index d3e72fd..0000000
--- a/workflow/scripts/report/NV_description.R
+++ /dev/null
@@ -1,505 +0,0 @@
-#!/usr/bin/env Rscript
-
-# Write stdout and stderr to log file
-log <- file(snakemake@log[[1]], open = "wt")
-sink(log, type = "message")
-sink(log, type = "output")
-
-library(tidyverse)
-library(stringi)
-library(ggpubr)
-library(jsonlite)
-library(logger)
-log_threshold(INFO)
-
-
-# Import file with plots style
-source(snakemake@params[["design"]])
-
-# SARS-CoV-2 anotation for genome scheme
-SCov2_annotation <- list(
-  "five_prime_UTR" = c(1:265),
-  "orf1ab" = c(266:21555),
-  "Intergenic_1" = c(21556:21562),
-  "S" = c(21563:25384),
-  "Intergenic_2" = c(25385:25392),
-  "ORF3a" = c(25393:26220),
-  "Intergenic_3" = c(26221:26244),
-  "E" = c(26245:26472),
-  "Intergenic_4" = c(26473:26522),
-  "M" = c(26523:27191),
-  "Intergenic_5" = c(27192:27201),
-  "ORF6" = c(27202:27387),
-  "Intergenic_6" = c(27388:27393),
-  "ORF7" = c(27394:27759),
-  "Intergenic_7" = c(27760:27893),
-  "ORF8" = c(27894:28259),
-  "Intergenic_8" = c(28260:28273),
-  "N" = c(28274:29533),
-  "Intergenic_9" = c(29534:29557),
-  "ORF10" = c(29558:29674),
-  "three_prime_UTR" = c(29675:29903)
-)
-
-vcf <- read_delim(snakemake@input[["vcf"]])
-vcf_snp <- vcf
-window <- read_csv(snakemake@input[["window"]])
-metadata <- read_csv(snakemake@input[["metadata"]])
-
-# DATA PROCESSING
-# Obtain sample names ordered by CollectionDate
-date_order <- metadata %>%
-  filter(ID %in% snakemake@params[["samples"]]) %>%
-  arrange(CollectionDate) %>%
-  pull(ID) %>%
-  unique()
-
-empty_vcf <- tibble(
-  REGION = date_order,
-  variant = as.character(NA),
-  ALT_FREQ = as.numeric(NA),
-  GFF_FEATURE = as.character(NA),
-  synonimous = as.character(NA),
-  POS = as.numeric(NA),
-  ALT = as.character(NA),
-  NV_class = as.character(NA),
-  group = as.character(NA)
-)
-
-# Create SNP variable and select useful variables
-vcf <- vcf %>%
-  dplyr::select(
-    variant,
-    REGION,
-    ALT_FREQ,
-    GFF_FEATURE,
-    synonimous,
-    POS,
-    ALT
-  )
-
-# Df with gene length for scheme
-notation_empty <- data.frame(
-  gene = "",
-  len = 0
-) %>%
-  filter(len != 0)
-
-notation <- lapply(
-  names(SCov2_annotation),
-  function(x) {
-    add_row(
-      notation_empty,
-      gene = x,
-      len = length(SCov2_annotation[[x]])
-    )
-  }
-) %>%
-  bind_rows()
-
-# Classifying variants
-log_info("Classifying variants")
-vcf <- vcf %>%
-  mutate(
-    NV_class = case_when(
-      str_detect(ALT, fixed("-")) |
-        str_detect(ALT, fixed("+")) ~
-        "INDEL",
-      TRUE ~ "SNP"
-    ),
-    Class = case_when(
-      GFF_FEATURE == "Intergenic" ~ "Intergenic",
-      TRUE ~ synonimous
-    ),
-    POS = as.numeric(POS)
-  ) %>%
-  rowwise() %>%
-  mutate(
-    indel_len = case_when(
-      NV_class == "INDEL" ~ str_length(ALT) - 1
-    ),
-    indel_class = case_when(
-      GFF_FEATURE == "Intergenic" ~ "Intergenic",
-      NV_class == "INDEL" &
-        indel_len %% 3 == 0 ~
-        "In frame",
-      NV_class == "INDEL" &
-        indel_len %% 3 > 0 ~
-        "Frameshift"
-    )
-  ) %>%
-  ungroup() %>%
-  mutate(
-    group = case_when(
-      GFF_FEATURE == "Intergenic" ~ "Intergenic",
-      NV_class == "SNP" ~ Class,
-      NV_class == "INDEL" ~ indel_class
-    )
-  )
-
-# NSP data
-npc <- read_csv(snakemake@params[["regions"]]) %>%
-  mutate(
-    summary_nsp = case_when(
-      region %in% paste("nsp", seq(4, 12, 1), sep = "") ~ "nsp4-12",
-      region %in% paste("nsp", seq(14, 16, 1), sep = "") ~ "nsp14-16",
-      TRUE ~ region
-    ),
-    summary_start = case_when(
-      region %in% paste("nsp", seq(4, 12, 1), sep = "") ~ 8555,
-      region %in% paste("nsp", seq(14, 16, 1), sep = "") ~ 18040,
-      TRUE ~ start
-    ),
-    summary_end = case_when(
-      region %in% paste("nsp", seq(4, 12, 1), sep = "") ~ 16236,
-      region %in% paste("nsp", seq(14, 16, 1), sep = "") ~ 21552,
-      TRUE ~ end
-    )
-  ) %>%
-  filter(region != "nsp1")
-
-
-# PLOTS
-## SUMMARY FIGURE FOR WHOLE GENOME
-log_info("Plotting summary figure for whole genome")
-
-if (nrow(vcf) == 0) {
-  log_warn("Whole-genome VCF has no rows")
-  vcf <- empty_vcf
-}
-
-variants <- vcf %>%
-  filter(ALT_FREQ > 0) %>%
-  ggplot() +
-  aes(
-    x = POS,
-    y = factor(REGION, date_order),
-    shape = factor(NV_class, c("SNP", "INDEL")),
-    color = group,
-    alpha = ALT_FREQ
-  ) +
-  geom_point(size = 3) +
-  geom_col(
-    data = notation,
-    aes(
-      x = len,
-      y = 0.3,
-      fill = factor(gene, rev(names(SCov2_annotation)))
-    ),
-    inherit.aes = FALSE,
-    width = 0.3
-  ) +
-  scale_fill_manual(values = gene_colors) +
-  xlim(c(0, 29903)) +
-  scale_color_manual(
-    labels = NV_names,
-    values = NV_colors
-  ) +
-  scale_y_discrete(drop = FALSE) +
-  labs(
-    x = "SARS-CoV-2 genome position",
-    y = "Sample",
-    shape = "Variant class",
-    color = "Classification",
-    alpha = "Frequency",
-    fill = "Region"
-  ) +
-  guides(
-    fill = guide_legend(reverse = TRUE)
-  )
-
-# Window plot
-window_plot <- window %>%
-  ggplot() +
-  aes(
-    x = position,
-    y = fractions,
-    color = gen
-  ) +
-  geom_point() +
-  geom_line(
-    aes(group = 1),
-    colour = "black",
-    alpha = 0.3
-  ) +
-  scale_y_continuous(
-    label = scales::percent,
-    limits = c(0, max(window$fractions) + 0.005)
-  ) +
-  xlim(c(0, 29903)) +
-  scale_color_manual(values = gene_colors) +
-  labs(
-    y = "Proportion of \n sites with NV",
-    x = "",
-    color = "Gen"
-  )
-
-# Add nsp info
-window_plot_nsp <- window_plot +
-  geom_vline(
-    data = npc,
-    aes(xintercept = summary_start),
-    color = "red"
-  ) +
-  geom_vline(
-    data = npc,
-    aes(xintercept = summary_end),
-    color = "red"
-  ) +
-  geom_text(
-    data = npc,
-    aes(
-      x = (summary_start + summary_end) / 2,
-      y = max(window$fractions) + 0.002,
-      label = summary_nsp
-    ),
-    inherit.aes = FALSE,
-    size = 5,
-    angle = 60
-  )
-
-figura <- ggarrange(
-  window_plot_nsp,
-  variants,
-  nrow = 2,
-  align = "v",
-  legend.grob = get_legend(variants),
-  heights = c(2, 6),
-  legend = "right",
-  labels = c("A", "B")
-)
-
-ggsave(
-  filename = snakemake@output[["fig"]],
-  plot = figura,
-  width = 250,
-  height = 240,
-  units = "mm",
-  dpi = 250
-)
-
-
-# Zoom in in spike
-log_info("Plotting summary for variants in the spike")
-
-spike_pos <- window %>%
-  filter(gen == "S") %>%
-  pull(position)
-
-vcf_spike <- vcf %>%
-  filter(
-    ALT_FREQ > 0,
-    POS %in% c(min(spike_pos):max(spike_pos))
-  )
-
-window_plot_spike <- window %>%
-  filter(gen == "S") %>%
-  ggplot() +
-  aes(
-    x = position,
-    y = fractions,
-    color = gen
-  ) +
-  geom_point() +
-  geom_line(
-    aes(group = 1),
-    colour = "black",
-    alpha = 0.3
-  ) +
-  scale_y_continuous(
-    label = scales::percent,
-    limits = c(0, max(window$fractions) + 0.005)
-  ) +
-  xlim(c(min(spike_pos), max(spike_pos))) +
-  scale_color_manual(
-    values = gene_colors,
-    guide = "none"
-  ) +
-  labs(
-    y = "Proportion of \n sites with NV",
-    x = ""
-  )
-
-if (nrow(vcf_spike) == 0) {
-  log_warn("Spike VCF has no rows")
-  vcf_spike <- empty_vcf
-}
-
-variants_spike <- vcf_spike %>%
-  ggplot() +
-  aes(
-    x = POS,
-    y = factor(REGION, date_order),
-    shape = factor(NV_class, c("SNP", "INDEL")),
-    color = group,
-    alpha = ALT_FREQ
-  ) +
-  geom_point(size = 3) +
-  xlim(c(min(spike_pos), max(spike_pos))) +
-  scale_color_manual(
-    labels = NV_names,
-    values = NV_colors
-  ) +
-  scale_y_discrete(drop = FALSE) +
-  labs(
-    x = "SARS-CoV-2 genome position",
-    y = "Sample",
-    shape = "Variant class",
-    color = "Classification",
-    alpha = "Frequency",
-    fill = "Region"
-  ) +
-  guides(
-    fill = guide_legend(reverse = TRUE)
-  )
-
-figura_spike <- ggarrange(
-  window_plot_spike,
-  variants_spike,
-  nrow = 2,
-  align = "v",
-  legend.grob = get_legend(variants),
-  heights = c(2, 6),
-  legend = "right",
-  labels = c("A", "B")
-)
-
-ggsave(
-  filename = snakemake@output[["fig_s"]],
-  plot = figura_spike,
-  width = 250,
-  height = 240,
-  units = "mm",
-  dpi = 250
-)
-
-
-# Figure for no of heterozygous sites for each sample
-log_info("Plotting no. of heterozygous sites for each sample")
-figur_SNP_table <- vcf_snp %>%
-  filter(ALT_FREQ <= snakemake@params$max_alt_freq) %>%
-  left_join(
-    metadata,
-    by = c("REGION" = "ID")
-  ) %>%
-  group_by(REGION) %>%
-  summarise(
-    CollectionDate = min(as.Date(CollectionDate)),
-    n = n_distinct(POS)
-  ) %>%
-  ungroup()
-
-if (nrow(figur_SNP_table) == 0) {
-  log_warn("Filtered SNP table has no rows")
-  figur_SNP_table <- empty_vcf
-}
-
-figur_SNP_time <- figur_SNP_table %>%
-  ggplot() +
-  aes(
-    x = CollectionDate,
-    y = n
-  ) +
-  geom_smooth(
-    method = "lm",
-    fill = "gray95",
-    alpha = 0.6
-  ) +
-  geom_point() +
-  labs(
-    x = "Collection date",
-    y = "No. of polimorphic sites"
-  )
-
-ggsave(
-  filename = snakemake@output[["fig_cor"]],
-  plot = figur_SNP_time,
-  width = 250,
-  height = 119.4,
-  units = "mm",
-  dpi = 250
-)
-
-
-# PLOT TABLES
-log_info("Saving plot tables")
-
-# Variants summary table
-vcf %>%
-  select(
-    REGION,
-    POS,
-    variant,
-    ALT_FREQ,
-    NV_class,
-    group
-  ) %>%
-  rename(
-    sample = REGION,
-    Variant = variant,
-    Class = group
-  ) %>%
-  filter(ALT_FREQ > 0) %>%
-  mutate(
-    Class = case_when(
-      Class == "Yes" ~ "synonymous",
-      Class == "No" ~ "non_synonymous",
-      TRUE ~ Class
-    )
-  ) %>%
-  write.csv(snakemake@output[["table_2"]], row.names = FALSE)
-
-# Window plot table
-window %>%
-  transmute(
-    POS = position,
-    feature = gen,
-    prop_PolymorphicSites = fractions
-  ) %>%
-  write.csv(snakemake@output[["table_1"]], row.names = FALSE)
-
-# Heterozygous sites per sample table
-vcf_snp %>%
-  filter(ALT_FREQ <= snakemake@params$max_alt_freq) %>%
-  select(!GFF_FEATURE) %>%
-  left_join(
-    metadata,
-    by = c("REGION" = "ID")
-  ) %>%
-  group_by(REGION) %>%
-  summarise(
-    CollectionDate = min(as.Date(CollectionDate)),
-    n_PolymorphicSites = n_distinct(POS)
-  ) %>%
-  ungroup() %>%
-  rename(sample = REGION) %>%
-  unique() %>%
-  write.csv(snakemake@output[["table_3"]], row.names = FALSE)
-
-# Stats for reporting
-n_indels <- vcf %>%
-  filter(NV_class == "INDEL") %>%
-  pull(variant) %>%
-  unique() %>%
-  length()
-
-n_snv <- length(unique(vcf$variant)) - n_indels
-model <- lm(n ~ CollectionDate, data = figur_SNP_table)
-
-# Calculate correlation, if possible
-if (nrow(figur_SNP_table) > 2) {
-  p.value <- summary(model)$coefficients[2,4]
-} else {
-  p.value <- NA
-}
-
-list(
-  "INDELS" = n_indels,
-  "SNV" = n_snv,
-  "window" = snakemake@params[["window"]],
-  "step" = snakemake@params[["step"]],
-  "r2" = summary(model)$r.squared[[1]],
-  "value" = ifelse(p.value < 0.001, "< 0.001", p.value)
-) %>%
-  toJSON() %>%
-  write(snakemake@output[["json"]])
diff --git a/workflow/scripts/report/af_time_correlation_data.R b/workflow/scripts/report/af_time_correlation_data.R
new file mode 100644
index 0000000..d3415c0
--- /dev/null
+++ b/workflow/scripts/report/af_time_correlation_data.R
@@ -0,0 +1,141 @@
+#!/usr/bin/env Rscript
+
+# Write stdout and stderr to log file
+log <- file(snakemake@log[[1]], open = "wt")
+sink(log, type = "message")
+sink(log, type = "output")
+
+library(dplyr)
+library(readr)
+library(tidyr)
+library(tibble)
+library(logger)
+log_threshold(INFO)
+
+# Read data
+log_info("Reading variants table")
+variants <- read_delim(
+  snakemake@input[["variants"]],
+  col_select = c(
+    "VARIANT_NAME",
+    "SAMPLE",
+    "REGION",
+    "ALT_FREQ",
+    "POS"
+  )
+) %>%
+  # Fill positions without alt frequency with 0
+  complete(
+    nesting(REGION, VARIANT_NAME, POS),
+    SAMPLE,
+    fill = list(ALT_FREQ = 0)
+  )
+
+log_info("Reading metadata")
+metadata <- read_csv(snakemake@input[["metadata"]]) %>%
+  filter(
+    ID %in% variants$SAMPLE
+  ) %>%
+  select(
+    ID,
+    CollectionDate
+  )
+
+log_info("Dating variants")
+variants <- left_join(variants, metadata, by = c("SAMPLE" = "ID")) %>%
+  # Calculate days since first sample
+  arrange(
+    CollectionDate
+  ) %>%
+  mutate(
+    interval = as.numeric(difftime(CollectionDate, min(CollectionDate), units = "days"))
+  )
+
+# Save processed input
+log_info("Writing dated and frequency-filled variants")
+write_csv(variants, snakemake@output$fmt_variants)
+
+log_info("Calculating correlations")
+log_debug("Calculating unique SNPs")
+# Get list with all different polymorphisms
+unique.snps <- unique(variants$VARIANT_NAME)
+
+# Create an empty dataframe to be filled
+cor.df <- data.frame(
+  variant = "",
+  coefficient = 0,
+  p.value = 0,
+  p.value.adj = 0
+) %>%
+  filter(p.value != 0)
+
+log_debug("Calculating correlation using method = {snakemake@params$cor_method} and exact p-value = {snakemake@params$cor_exact}")
+correlations <- lapply(
+  unique.snps,
+  function(snp) {
+    # Select SNP
+    df <- filter(
+      variants,
+      VARIANT_NAME == snp
+    )
+    # Perform calculation
+    test <- cor.test(
+      df$ALT_FREQ,
+      df$interval,
+      method = snakemake@params$cor_method,
+      exact = snakemake@params$cor_exact
+    )
+    # Adjust p-value
+    p.value.adj <- p.adjust(
+      test$p.value,
+      method = "BH",
+      n = length(unique.snps)
+    )
+    # Add row to dataframe
+    add_row(
+      cor.df,
+      variant = snp,
+      coefficient = test$estimate,
+      p.value = test$p.value,
+      p.value.adj = p.value.adj
+    )
+  }
+) %>%
+  bind_rows()
+
+log_info("Writing correlations table")
+write_csv(
+  correlations,
+  snakemake@output[["correlations"]]
+)
+
+log_info("Selecting variants whose allele frequency is significantly correlated with time")
+significant.variants <- correlations %>%
+  filter(p.value.adj < 0.05) %>%
+  pull(variant) %>%
+  unique()
+
+log_info("Significant: {significant.variants}")
+
+log_info("Selecting variants in positions with more than one alternative allele")
+mult.alt.variants <- variants %>%
+  select(
+    VARIANT_NAME,
+    POS
+  ) %>%
+  distinct() %>%
+  group_by(POS) %>%
+  filter(n() > 1) %>%
+  ungroup() %>%
+  pull(VARIANT_NAME) %>%
+  unique()
+
+log_info("Mult all: {mult.alt.variants}")
+
+# Build selected subset to represent
+variant.selection <- unique(c(significant.variants, mult.alt.variants))
+
+log_info("Selection: {variant.selection}")
+
+log_info("Writing selected variants subset")
+write_lines(variant.selection, snakemake@output$subset)
diff --git a/workflow/scripts/report/af_time_correlation_plot.R b/workflow/scripts/report/af_time_correlation_plot.R
new file mode 100644
index 0000000..42f5b75
--- /dev/null
+++ b/workflow/scripts/report/af_time_correlation_plot.R
@@ -0,0 +1,57 @@
+#!/usr/bin/env Rscript
+
+# Write stdout and stderr to log file
+log <- file(snakemake@log[[1]], open = "wt")
+sink(log, type = "message")
+sink(log, type = "output")
+
+library(dplyr)
+library(readr)
+library(ggplot2)
+library(ggrepel)
+library(logger)
+log_threshold(INFO)
+
+# Import file with plots style
+source(snakemake@params[["design"]])
+
+log_info("Reading correlation data")
+correlations <- read_csv(snakemake@input$correlations)
+
+log_info("Plotting coorrelation coefficients and p-values of each variants")
+p <- correlations %>%
+  mutate(
+    trans.p = -log10(p.value.adj),
+    label = ifelse(p.value.adj < 0.05, variant, NA)
+  ) %>%
+  ggplot() +
+  aes(
+    x = coefficient,
+    y = trans.p
+  ) +
+  geom_text_repel(aes(label = label), max.overlaps = 10000, direction = "x") +
+  geom_point(
+    data = function(x) subset(x, !is.na(label)),
+    color = "orange",
+    size = 2
+  ) +
+  geom_point(size = 2, shape = 1) +
+  xlim(c(-1, 1)) +
+  geom_hline(
+    aes(yintercept = -log10(0.05)),
+    linetype = 2,
+    color = "orange"
+  ) +
+  labs(
+    x = "Correlation coefficient",
+    y = "-log10(p-value)"
+  )
+
+ggsave(
+  filename = snakemake@output$plot,
+  plot = p,
+  width = snakemake@params$plot_width_mm,
+  height = snakemake@params$plot_height_mm,
+  units = "mm",
+  dpi = 250
+)
diff --git a/workflow/scripts/report/af_trajectory_panel_plot.R b/workflow/scripts/report/af_trajectory_panel_plot.R
new file mode 100644
index 0000000..61e7190
--- /dev/null
+++ b/workflow/scripts/report/af_trajectory_panel_plot.R
@@ -0,0 +1,79 @@
+#!/usr/bin/env Rscript
+
+# Write stdout and stderr to log file
+log <- file(snakemake@log[[1]], open = "wt")
+sink(log, type = "message")
+sink(log, type = "output")
+
+set.seed(snakemake@params$random_color_seed)  # seed for sampling colors
+
+library(dplyr)
+library(readr)
+library(ggplot2)
+library(glue)
+library(logger)
+library(logger)
+log_threshold(INFO)
+
+# Import file with plots style
+source(snakemake@params[["design"]])
+
+log_info("Reading formatted variants table")
+variants <- read_csv(snakemake@input$fmt_variants)
+
+log_info("Reading subset of variants to represent")
+selected.variants <- read_lines(snakemake@input$subset)
+
+# Set plot height depending on the number of SNPs assuming 4 columns in the plot
+plot.rows <- ceiling(
+  length(selected.variants) / snakemake@params$n_plot_columns
+)
+plot.height <- max(100, plot.rows * snakemake@params$plot_row_height_mm)
+log_debug("Setting total plot height to {plot.height} mm with {plot.rows} rows")
+
+log_info("Plotting {length(selected.variants)} SNPs allele frequency trajectories in time")
+selected.colors <- sample(TRAJECTORY.PANEL.COLORS, length(selected.variants))
+log_debug("Selected color: {selected.colors}")
+p <- variants %>%
+  filter(VARIANT_NAME %in% selected.variants) %>%
+  mutate(
+    gPOS = paste0("g.", POS),
+    gPOS = reorder(gPOS, POS)
+  ) %>%
+  ggplot() +
+  aes(
+    x = interval,
+    y = ALT_FREQ,
+    color = reorder(glue("{gPOS}\n{VARIANT_NAME}"), POS)
+  ) +
+  scale_color_manual(values = selected.colors) +
+  geom_point() +
+  geom_line() +
+  theme(
+    legend.position = "bottom",
+    legend.text = element_text(size = 9),
+    legend.title = element_blank(),
+    legend.spacing.y = unit(3, "mm")
+  ) +
+  labs(
+    x = "Days since first sample",
+    y = "Frequency"
+  ) +
+  guides(color = guide_legend(ncol = 3))
+
+if (length(selected.variants) > 1) {
+  p <- p +
+    facet_wrap(
+      vars(gPOS),
+      ncol = snakemake@params$n_plot_columns
+    )
+}
+
+ggsave(
+  filename = snakemake@output[["plot"]],
+  plot = p,
+  width = snakemake@params$plot_width_mm,
+  height = plot.height,
+  units = "mm",
+  dpi = 250
+)
diff --git a/workflow/scripts/report/allele_freq_tree_data.R b/workflow/scripts/report/allele_freq_tree_data.R
new file mode 100644
index 0000000..9b20ce1
--- /dev/null
+++ b/workflow/scripts/report/allele_freq_tree_data.R
@@ -0,0 +1,56 @@
+#!/usr/bin/env Rscript
+
+# Write stdout and stderr to log file
+log <- file(snakemake@log[[1]], open = "wt")
+sink(log, type = "message")
+sink(log, type = "output")
+
+library(dplyr)
+library(readr)
+library(tibble)
+library(data.table)
+library(ape)
+library(logger)
+
+fix_negative_edge_length <- function(nj.tree) {
+  edge_infos <- cbind(
+    nj.tree$edge,
+    nj.tree$edge.length
+  ) %>%
+    as.data.table()
+  colnames(edge_infos) <- c("from", "to", "length")
+  nega_froms <- edge_infos[length < 0, sort(unique(from))]
+  for (nega_from in nega_froms) {
+    minus_length <- edge_infos[from == nega_from, ][order(length)][1, length]
+    edge_infos[from == nega_from, length := length - minus_length]
+    edge_infos[to == nega_from, length := length + minus_length]
+  }
+  nj.tree$edge.length <- edge_infos$length
+  nj.tree
+}
+
+log_threshold(INFO)
+
+matrix <- read_csv(snakemake@input$dist)
+
+# Tree construction
+if (snakemake@params$use_bionj) {
+  log_info("Constructing BIONJ tree based on distances")
+  tree_method <- bionj
+} else {
+  log_info("Constructing NJ tree based on distances")
+  tree_method <- nj
+}
+tree <- matrix %>%
+  column_to_rownames(var = "...1") %>%
+  as.matrix() %>%
+  as.dist() %>%
+  tree_method() %>%
+  root(snakemake@params$outgroup_id, resolve.root = TRUE)
+
+# Resolve possible negative edge lengths in tree
+log_info("Resolving negative edge lengths")
+tree <- fix_negative_edge_length(tree)
+
+log_info("Saving tree")
+write.tree(tree, snakemake@output$tree)
diff --git a/workflow/scripts/report/allele_freq_tree_plot.R b/workflow/scripts/report/allele_freq_tree_plot.R
new file mode 100644
index 0000000..2a08851
--- /dev/null
+++ b/workflow/scripts/report/allele_freq_tree_plot.R
@@ -0,0 +1,89 @@
+#!/usr/bin/env Rscript
+
+# Write stdout and stderr to log file
+log <- file(snakemake@log[[1]], open = "wt")
+sink(log, type = "message")
+sink(log, type = "output")
+
+library(dplyr)
+library(readr)
+library(ape)
+library(ggplot2)
+library(ggtree)
+library(logger)
+
+log_threshold(INFO)
+
+# Import file with plots style
+source(snakemake@params[["design"]])
+
+log_info("Building tree annotation")
+
+# Read metadata
+log_debug("Reading metadata")
+metadata <- read_csv(snakemake@input$metadata)
+
+# Extract names from records
+log_debug("Reading FASTA")
+study_records <- read.dna(
+  snakemake@input$study_fasta,
+  format = "fasta",
+  as.matrix = FALSE,
+)
+log_debug("Extracting names")
+study_records <- study_records[
+  !startsWith(names(study_records), snakemake@params$outgroup_id)
+]
+study_names <- names(study_records)
+
+# Obtain sample names ordered by CollectionDate
+log_debug("Sorting names by collection date")
+date_order <- metadata %>%
+  arrange(CollectionDate) %>%
+  filter(ID %in% study_names) %>%
+  pull(ID) %>%
+  unique()
+
+log_debug("Building annotation dataframe")
+tree_tiplab <- data.frame(
+  ID = date_order,
+  order = seq(1, length(date_order), 1)
+) %>%
+  rowwise() %>%
+  mutate(
+    tip_label = sprintf(
+      "(%s)-%s",
+      order,
+      ID
+    )
+  ) %>%
+  ungroup() %>%
+  add_row(
+    ID = snakemake@params$outgroup_id,
+    order = 0,
+    tip_label = snakemake@params$outgroup_id
+  )
+
+# Read distance tree and root
+log_info("Reading tree")
+tree <- read.tree(snakemake@input$tree) %>%
+  root(snakemake@params$outgroup_id, resolve.root = TRUE)
+
+# Plot
+log_info("Plotting distance tree")
+max.tip.length <- max(
+  node.depth.edgelength(tree)[seq_along(length(tree$tip.label))]
+)
+p <- ggtree(tree) %<+% tree_tiplab +
+  geom_tiplab(aes(label = tip_label)) +
+  geom_treescale(1.1 * max.tip.length) +
+  geom_rootedge(0.05 * max.tip.length)
+
+ggsave(
+  filename = snakemake@output$plot,
+  plot = p,
+  width = snakemake@params$plot_width_mm,
+  height = snakemake@params$plot_height_mm,
+  units = "mm",
+  dpi = 250
+)
diff --git a/workflow/scripts/report/context_phylogeny_data.R b/workflow/scripts/report/context_phylogeny_data.R
new file mode 100644
index 0000000..a23587d
--- /dev/null
+++ b/workflow/scripts/report/context_phylogeny_data.R
@@ -0,0 +1,116 @@
+#!/usr/bin/env Rscript
+
+# Write stdout and stderr to log file
+log <- file(snakemake@log[[1]], open = "wt")
+sink(log, type = "message")
+sink(log, type = "output")
+
+library(dplyr)
+library(readr)
+library(tibble)
+library(jsonlite)
+library(ape)
+library(logger)
+
+log_threshold(INFO)
+
+# Import file with plots style
+source(snakemake@params[["design"]])
+
+# Format tree label for well supported nodes
+TREE_LEGEND_NAMES["boot_alrt_pass"] <- sprintf(
+  TREE_LEGEND_NAMES["boot_alrt_pass"],
+  100 * snakemake@params[["boot_th"]], "%",
+  100 * snakemake@params[["alrt_th"]], "%"
+)
+
+log_info("Reading tree")
+tree_ml <- read.tree(snakemake@input[["tree"]]) %>%
+  root(
+    snakemake@params[["ref_name"]],
+    resolve.root = TRUE
+  )
+log_debug("Read tree with {length(tree_ml$node.label)} labels")
+
+log_info("Reading target names from FASTA")
+target_names <- read.dna(
+  snakemake@input[["target_fasta"]],
+  format = "fasta",
+  as.matrix = FALSE,
+)
+log_debug("Read {length(target_names)} records")
+
+log_info("Processing target names")
+target_names <- target_names[
+  !startsWith(names(target_names), snakemake@config[["ALIGNMENT_REFERENCE"]])
+]
+target_names <- names(target_names)
+log_debug("{length(target_names)} records remaining after processing")
+
+# ML tree with context data
+# Internal nodes color
+# Node labels contain SH-aLRT/UFboot values
+log_info("Reading support values from labels")
+labels <- strsplit(tree_ml$node.label, "/")
+aLRT.values <- sapply(
+  labels,
+  function(x) as.numeric(x[1])
+)
+bootstrap.values <- sapply(
+  labels,
+  function(x) as.numeric(x[2])
+)
+
+log_info("Calculating support mask for the given thresholds")
+aLRT.mask <- aLRT.values >= snakemake@params[["alrt_th"]]
+boot.mask <- bootstrap.values >= snakemake@params[["boot_th"]]
+
+# MRCA for target samples
+log_info("Calculating MRCA of target samples")
+target.mrca <- getMRCA(tree_ml, target_names)
+target.mrca.clade <- extract.clade(tree_ml, target.mrca)
+target.mrca.clade.ntips <- Ntip(target.mrca.clade)
+
+log_info("Building M-L tree annotation")
+tree_ml.ntips <- length(tree_ml$tip.label)
+tree_ml.nodes <- tree_ml$Nnode + tree_ml.ntips
+tree_ml.labels <- tree_ml$tip.label[1:tree_ml.nodes]
+tree_ml.node.pass <- c(rep(FALSE, tree_ml.ntips), aLRT.mask & boot.mask)
+
+ml.tree.annot <- tibble(
+  node = 1:tree_ml.nodes,
+) %>%
+  mutate(
+    Class = case_when(
+      tree_ml.labels %in% target_names ~ TREE_LEGEND_NAMES["tip_label"],
+      tree_ml.node.pass ~ TREE_LEGEND_NAMES["boot_alrt_pass"],
+      TRUE ~ NA
+    )
+  )
+
+# Write output files
+log_info("Writing tree annotation")
+write_csv(ml.tree.annot, snakemake@output$annotation)
+
+log_info("Writing JSON data")
+target.node <- tree_ml$node.label[target.mrca - length(tree_ml$tip.label)]
+list(
+  "boot" = strsplit(target.node, "/")[[1]][2] %>% as.numeric(),
+  "alrt" = strsplit(target.node, "/")[[1]][1] %>% as.numeric(),
+  "monophyly" = ifelse(
+    is.monophyletic(tree_ml, target_names),
+    "are",
+    "are not"
+  ),
+  "target_mrca" = target.mrca,
+  "clade_tips" = target.mrca.clade.ntips,
+  "max_tip_length" = max(node.depth.edgelength(tree_ml)[
+    1:length(tree_ml$tip.label)
+  ]),
+  "root" = snakemake@params[["ref_name"]]
+) %>%
+  write_json(
+    snakemake@output$json,
+    auto_unbox = TRUE,
+    digits = NA
+  )
diff --git a/workflow/scripts/report/context_phylogeny_plot.R b/workflow/scripts/report/context_phylogeny_plot.R
new file mode 100644
index 0000000..742bb24
--- /dev/null
+++ b/workflow/scripts/report/context_phylogeny_plot.R
@@ -0,0 +1,71 @@
+#!/usr/bin/env Rscript
+
+# Write stdout and stderr to log file
+log <- file(snakemake@log[[1]], open = "wt")
+sink(log, type = "message")
+sink(log, type = "output")
+
+library(dplyr)
+library(readr)
+library(jsonlite)
+library(ape)
+library(ggplot2)
+library(ggtree)
+library(logger)
+
+log_threshold(INFO)
+
+# Import file with plots style
+source(snakemake@params[["design"]])
+
+# Format tree label for well supported nodes
+TREE_LEGEND_NAMES["boot_alrt_pass"] <- sprintf(
+  TREE_LEGEND_NAMES["boot_alrt_pass"],
+  100 * snakemake@params[["boot_th"]], "%",
+  100 * snakemake@params[["alrt_th"]], "%"
+)
+
+log_info("Reading JSON data")
+json <- read_json(snakemake@input$json)
+
+log_info("Reading tree")
+tree_ml <- read.tree(snakemake@input[["tree"]]) %>%
+  root(
+    json$root,
+    resolve.root = TRUE
+  )
+
+log_info("Reading tree annotation")
+annotation <- read_csv(snakemake@input$annotation)
+
+log_info("Plotting M-L tree with context samples")
+p <- ggtree(tree_ml, layout = "circular") %<+%
+  annotation +
+  geom_highlight(node = json$target_mrca, colour = "orange", alpha = 0) +
+  geom_point(aes(color = Class, size = Class)) +
+  geom_treescale(1.05 * json$max_tip_length) +
+  geom_rootedge(0.05 * json$max_tip_length) +
+  xlim(-json$max_tip_length / 3, NA) +
+  scale_color_manual(
+    values = setNames(
+      TREE_PALETTE[names(TREE_LEGEND_NAMES)],
+      TREE_LEGEND_NAMES
+    ),
+    na.translate = FALSE
+  ) +
+  scale_size_manual(
+    values = setNames(
+      TREE_NODE_SIZE[names(TREE_LEGEND_NAMES)],
+      TREE_LEGEND_NAMES
+    ),
+    na.translate = FALSE
+  )
+
+ggsave(
+  filename = snakemake@output[["plot"]],
+  plot = p,
+  width = snakemake@params[["plot_width_mm"]],
+  height = snakemake@params[["plot_height_mm"]],
+  units = "mm",
+  dpi = 250
+)
diff --git a/workflow/scripts/report/demix_plot.R b/workflow/scripts/report/demix_plot.R
new file mode 100644
index 0000000..1cf1c3d
--- /dev/null
+++ b/workflow/scripts/report/demix_plot.R
@@ -0,0 +1,56 @@
+#!/usr/bin/env Rscript
+
+# Write stdout and stderr to log file
+log <- file(snakemake@log[[1]], open = "wt")
+sink(log, type = "message")
+sink(log, type = "output")
+
+library(dplyr)
+library(readr)
+library(ggplot2)
+library(logger)
+
+log_threshold(INFO)
+
+# Import file with plots style
+source(snakemake@params[["design"]])
+
+# Read inputs
+log_info("Plotting")
+p <- read_csv(snakemake@input$data) %>%
+  mutate(
+    sample = reorder(sample, CollectionDate),
+    lineage = ifelse(is_main, lineage, NA)
+  ) %>%
+  ggplot() +
+  aes(
+    x = sample,
+    y = abundance,
+    fill = lineage
+  ) +
+  scale_fill_viridis_d(
+    na.value = "gray50",
+    labels = function(x) {
+      ifelse(is.na(x), "Other", x)
+    }
+  ) +
+  geom_col() +
+  theme(
+    axis.text.x = element_text(angle = 60, hjust = 1),
+    legend.position = "bottom"
+  ) +
+  labs(
+    x = "Sample",
+    y = "Relative abundance",
+    fill = "Lineage"
+  )
+
+log_info("Saving plot")
+ggsave(
+  filename = snakemake@output[["plot"]],
+  plot = p,
+  width = snakemake@params$plot_width_mm,
+  height = snakemake@params$plot_height_mm,
+  units = "mm",
+  dpi = 250
+)
diff --git a/workflow/scripts/report/demix_plot_data.R b/workflow/scripts/report/demix_plot_data.R
new file mode 100644
index 0000000..cb0c08b
--- /dev/null
+++ b/workflow/scripts/report/demix_plot_data.R
@@ -0,0 +1,58 @@
+#!/usr/bin/env Rscript
+
+# Write stdout and stderr to log file
+log <- file(snakemake@log[[1]], open = "wt")
+sink(log, type = "message")
+sink(log, type = "output")
+
+library(dplyr)
+library(readr)
+library(logger)
+
+log_threshold(INFO)
+
+# Read inputs
+demix <- read_csv(snakemake@input[["summary_demixing"]])
+
+# Data processing
+log_info("Obtaining main lineages")
+main_lineages <- demix %>%
+  group_by(sample) %>%
+  top_n(1, abundance) %>%
+  ungroup() %>%
+  pull(lineage) %>%
+  unique()
+
+# Obtain sample names ordered by CollectionDate
+log_info("Sorting dates")
+metadata <- read_csv(snakemake@input[["metadata"]])
+date_order <- metadata %>%
+  arrange(CollectionDate) %>%
+  filter(ID %in% demix$sample) %>%
+  pull(ID) %>%
+  unique()
+
+# Build plot data
+log_info("Building plot data")
+plot.data <- demix %>%
+  group_by(lineage, sample) %>%
+  summarize(abundance = sum(abundance)) %>%
+  ungroup() %>%
+  left_join(
+    select(
+      metadata,
+      ID,
+      CollectionDate
+    ),
+    by = c("sample" = "ID")
+  ) %>%
+  mutate(
+    is_main = lineage %in% main_lineages
+  )
+
+# Save plot data
+log_info("Saving plot data")
+write_csv(
+  plot.data,
+  snakemake@output[["data"]]
+)
diff --git a/workflow/scripts/report/diversity_data.R b/workflow/scripts/report/diversity_data.R
new file mode 100644
index 0000000..9ca5918
--- /dev/null
+++ b/workflow/scripts/report/diversity_data.R
@@ -0,0 +1,116 @@
+#!/usr/bin/env Rscript
+
+# Write stdout and stderr to log file
+log <- file(snakemake@log[[1]], open = "wt")
+sink(log, type = "message")
+sink(log, type = "output")
+
+set.seed(snakemake@params$seed)
+
+library(ape)
+library(pegas)
+library(future)
+library(future.apply)
+library(readr)
+library(jsonlite)
+library(logger)
+
+log_threshold(INFO)
+
+# Pi calculation
+nucleotide.diversity <- function(dna_object, record.names, sample.size) {
+  sample <- sample(record.names, sample.size, replace = FALSE)
+  dna_subset <- dna_object[record.names %in% sample]
+  nuc.div(dna_subset)
+}
+
+# Parallel bootstrapping
+boot.nd.parallel <- function(aln, sample.size, reps) {
+  record.names <- names(aln)
+  future_sapply(
+    1:reps,
+    function(x) {
+      nucleotide.diversity(aln, record.names, sample.size)
+    },
+    future.seed = TRUE
+  )
+}
+
+# Read outgroup/context alignment
+log_info("Reading context")
+gene_ex <- read.dna(
+  snakemake@input[["context_fasta"]],
+  format = "fasta",
+  as.matrix = FALSE
+)
+gene_ex <- gene_ex[
+  !startsWith(names(gene_ex), snakemake@params$aln_reference)
+]
+
+# Read target (study) alignment
+log_info("Reading target alignment")
+study_aln <- read.dna(
+  snakemake@input[["study_fasta"]],
+  format = "fasta",
+  as.matrix = FALSE
+)
+study_aln <- study_aln[
+  !startsWith(names(study_aln), snakemake@params$aln_reference)
+]
+
+# Diversity value for our samples
+log_info("Calculating diversity value for studied samples")
+diversity <- nuc.div(study_aln)
+
+# Perform bootstrap
+log_info("Performing calculation for nucleotide diversity in context samples")
+plan(multisession, workers = snakemake@threads)
+divs <- boot.nd.parallel(
+  gene_ex,
+  length(study_aln),
+  snakemake@params[["bootstrap_reps"]]
+)
+plan(sequential)
+
+# Test normality
+log_info("Normality test for nucleotide diversity values")
+st <- shapiro.test(divs)
+
+# Calculate p-value (assuming normal distribution)
+log_info("Calculating p-value (assuming normal distribution)")
+test <- t.test(
+  divs,
+  alternative = "greater",
+  mu = diversity,
+  conf.level = 0.95
+)
+pvalue.norm <- test$p.value
+
+# Estimate p-value empirically
+log_info("Estimating p-value empirically")
+empirical.probs <- ecdf(divs)
+pvalue.emp <- empirical.probs(diversity)
+
+# Data for JSON file
+log_info("Building JSON data")
+p.value <- ifelse(st$p.value >= 0.05, pvalue.norm, pvalue.emp)
+list.div <- list(
+  "diversity" = diversity,
+  "p.value" = ifelse(p.value >= 0.001, p.value, "< 0.001"),
+  "normal.pvalue" = ifelse(st$p.value >= 0.001, p.value, "< 0.001"),
+  "norm.text" = ifelse(st$p.value >= 0.05, "", "not"),
+  "type.test" = ifelse(st$p.value >= 0.05, "", "empirical"),
+  "boot.reps" = snakemake@params[["bootstrap_reps"]],
+  "sample.size" = length(study_aln)
+)
+
+log_info("Writing diversity distribution")
+write_lines(divs, snakemake@output[["divs"]])
+
+log_info("Writing results JSON")
+write_json(
+  list.div,
+  snakemake@output[["json"]],
+  auto_unbox = TRUE,
+  digits = NA  # maximum precision
+)
diff --git a/workflow/scripts/report/diversity_plot.R b/workflow/scripts/report/diversity_plot.R
index 3ad0131..0c42118 100644
--- a/workflow/scripts/report/diversity_plot.R
+++ b/workflow/scripts/report/diversity_plot.R
@@ -5,145 +5,57 @@ log <- file(snakemake@log[[1]], open = "wt")
 sink(log, type = "message")
 sink(log, type = "output")
 
-library(ape)
-library(pegas)
-library(future.apply)
-library(tidyverse)
+library(dplyr)
+library(readr)
+library(ggplot2)
 library(jsonlite)
 library(logger)
-log_threshold(INFO)
-
-# Pi calculation
-nucleotide.diversity <- function(dna_object, record.names, sample.size) {
-  sample <- sample(record.names, sample.size, replace = FALSE)
-  dna_subset <- dna_object[record.names %in% sample]
-  nuc.div(dna_subset)
-}
 
-# Parallel bootstrapping
-boot.nd.parallel <- function(aln, sample.size = 12, reps = 100) {
-  record.names <- names(aln)
-  future_sapply(
-    1:reps,
-    function(x) {
-      nucleotide.diversity(aln, record.names, sample.size)
-    },
-    future.seed = TRUE
-  )
-}
+log_threshold(INFO)
 
 # Import file with plots style
 source(snakemake@params[["design"]])
 
-# Outgroup/context alignment
-gene_ex <- read.dna(
-  snakemake@input[["context_fasta"]],
-  format = "fasta",
-  as.matrix = FALSE
-)
-gene_ex <- gene_ex[
-  !startsWith(names(gene_ex), snakemake@config[["ALIGNMENT_REFERENCE"]])
-]
-
-# Study alignment
-study_aln <- read.dna(
-  snakemake@input[["study_fasta"]],
-  format = "fasta",
-  as.matrix = FALSE
-)
-study_aln <- study_aln[
-  !startsWith(names(study_aln), snakemake@config[["ALIGNMENT_REFERENCE"]])
-]
-
-# Diversity value for our samples
-log_info("Calculating diversity value for studied samples")
-diversity <- nuc.div(study_aln)
-
-
-# Perform bootstrap
-log_info("Performing calculation for nucleotide diversity in context samples")
-plan(multisession, workers = snakemake@threads)
-divs <- boot.nd.parallel(
-  gene_ex,
-  length(study_aln),
-  snakemake@params[["bootstrap_reps"]]
-)
-plan(sequential)
-
-# Test normality
-log_info("Normality test for nucleotide diversity values")
-st <- shapiro.test(divs)
-
-# Calculate p-value (assuming normal distribution)
-
-log_info("Calculating p-value (assuming normal distribution)")
-
-test <- t.test(
-  divs,
-  alternative = "greater",
-  mu = diversity,
-  conf.level = 0.95
-)
-pvalue.norm <- test$p.value
-
-# Estimate p-value empirically
-log_info("Estimating p-value empirically")
-empirical.probs <- ecdf(divs)
-pvalue.emp <- empirical.probs(diversity)
+# Read data
+log_info("Reading diversity results")
+divs <- read_lines(snakemake@input$divs) %>%
+  as.numeric()
+json <- read_json(snakemake@input$json)
 
 # Plot and save
-log_info("Plotting diversity plot")
+log_info("Plotting")
 p <- data.frame(pi = divs) %>%
   ggplot() +
   geom_density(
     aes(x = pi),
-    fill = "#fcbf49",
+    fill = DIVERSITY_PALETTE["density_fill"],
     alpha = 0.7,
     bw = 0.000001,
-    color = "#eae2b7"
-  ) +
-  geom_vline(
-    aes(xintercept = diversity),
-    color = "#d62828"
+    linewidth = 0.5,
+    color = DIVERSITY_PALETTE["density_color"]
   ) +
   stat_function(
     fun = dnorm,
     args = list(mean = mean(divs), sd = sd(divs)),
-    color = "#f77f00"
+    color = DIVERSITY_PALETTE["dnorm_color"],
+    linewidth = 1
+  ) +
+  geom_vline(
+    xintercept = json$diversity,
+    color = DIVERSITY_PALETTE["value_color"],
+    linewidth = 1
   ) +
   labs(
     x = "π",
     y = "Density"
-  )
+  ) +
+  xlim(0, NA)
 
 ggsave(
-  filename = snakemake@output[["fig"]],
+  filename = snakemake@output[["plot"]],
   plot = p,
-  width = snakemake@params[["plot_width"]],
-  height = snakemake@params[["plot_height"]],
+  width = snakemake@params[["plot_width_mm"]],
+  height = snakemake@params[["plot_height_mm"]],
   units = "mm",
   dpi = 250
 )
-
-# DATA JSON #####
-p.value <- ifelse(st$p.value >= 0.05, pvalue.norm, pvalue.emp)
-
-list.div <- list(
-  "diversity" = format(diversity, scientific = TRUE),
-  "p.value" = ifelse(p.value >= 0.001, p.value, "< 0.001"),
-  "normal.pvalue" = ifelse(st$p.value >= 0.001, p.value, "< 0.001"),
-  "norm.text" = ifelse(st$p.value >= 0.05, "", "not"),
-  "type.test" = ifelse(st$p.value >= 0.05, "", "empirical"),
-  "boot.reps" = snakemake@params[["bootstrap_reps"]],
-  "sample.size" = length(study_aln)
-)
-
-json <- toJSON(list.div)
-write(json, snakemake@output[["json"]])
-
-# PLOT TABLES
-data.frame(
-  pi = divs,
-  prop.value = diversity
-) %>%
-  write.csv(snakemake@output[["table"]], row.names = FALSE)
diff --git a/workflow/scripts/report/dnds_plots.R b/workflow/scripts/report/dnds_plots.R
new file mode 100644
index 0000000..62b2e41
--- /dev/null
+++ b/workflow/scripts/report/dnds_plots.R
@@ -0,0 +1,89 @@
+#!/usr/bin/env Rscript
+
+# Write stdout and stderr to log file
+log <- file(snakemake@log[[1]], open = "wt")
+sink(log, type = "message")
+sink(log, type = "output")
+
+library(dplyr)
+library(readr)
+library(tidyr)
+library(ggplot2)
+library(logger)
+
+log_threshold(INFO)
+
+# Import file with plots style
+source(snakemake@params[["design"]])
+
+# Read inputs
+log_info("Reading plot data")
+plot.df <- read_delim(snakemake@input$table) %>%
+  filter(w != Inf) %>%
+  pivot_longer(
+    c("dN", "dS", "w"),
+    values_to = "value",
+    names_to = "ratio"
+  )
+
+log_info("Plotting dN and dS")
+p.dn.ds <- plot.df %>%
+  filter(ratio != "w") %>%
+  ggplot() +
+  aes(
+    x = day,
+    y = value,
+    color = ratio,
+    shape = ratio
+  ) +
+  geom_point(size = 3) +
+  geom_line() +
+  scale_color_manual(
+    name = "",
+    labels = DNDS_LABELS,
+    values = DNDS_COLORS
+  ) +
+  scale_shape_manual(
+    name = "",
+    values = DNDS_SHAPES,
+    labels = DNDS_LABELS
+  ) +
+  labs(
+    y = "Substitution rate",
+    x = "Days since the initial sampling",
+    color = ""
+  )
+
+ggsave(
+  filename = snakemake@output$plot_dn_ds,
+  plot = p.dn.ds,
+  width = snakemake@params$plot_width_mm,
+  height = snakemake@params$plot_height_mm,
+  units = "mm",
+  dpi = 250
+)
+
+log_info("Plotting w (dN/dS)")
+p.omega <- plot.df %>%
+  filter(ratio == "w") %>%
+  ggplot() +
+  aes(
+    x = day,
+    y = value,
+  ) +
+  geom_point(color = "black") +
+  geom_line(color = "black") +
+  labs(
+    y = "w (dN/dS)",
+    x = "Days since the initial sampling",
+    color = ""
+  )
+
+ggsave(
+  filename = snakemake@output$plot_omega,
+  plot = p.omega,
+  width = snakemake@params$plot_width_mm,
+  height = snakemake@params$plot_height_mm,
+  units = "mm",
+  dpi = 250
+)
diff --git a/workflow/scripts/report/evo_plots.R b/workflow/scripts/report/evo_plots.R
deleted file mode 100644
index e0ed984..0000000
--- a/workflow/scripts/report/evo_plots.R
+++ /dev/null
@@ -1,159 +0,0 @@
-#!/usr/bin/env Rscript
-
-# Write stdout and stderr to log file
-log <- file(snakemake@log[[1]], open = "wt")
-sink(log, type = "message")
-sink(log, type = "output")
-
-library(tidyverse)
-library(logger)
-log_threshold(INFO)
-
-# Import file with plots style
-source(snakemake@params[["design"]])
-
-# Read inputs
-vcf <- read_delim(snakemake@input[["vcf"]])
-metadata <- read_delim(snakemake@input[["metadata"]])
-N_S_position <- read_delim(snakemake@input[["N_S"]])
-
-# DATA PROCESSING
-# Create SNP variable and select useful variables
-vcf <- vcf %>%
-  dplyr::select(
-    variant,
-    REGION,
-    ALT_FREQ,
-    GFF_FEATURE,
-    synonimous,
-    POS
-  )
-
-# Create variable for days sins first sample in metadata
-metadata <- metadata %>%
-  mutate(
-    interval = as.numeric(
-      as.Date(CollectionDate) - min(as.Date(CollectionDate))
-    )
-  ) %>%
-  select(ID, interval) %>%
-  rename(REGION = ID)
-
-vcf <- left_join(vcf, metadata)
-
-# PLOT
-log_info("Ploting dN and dS over time")
-plot_df <- vcf %>%
-  group_by(REGION, synonimous) %>%
-  summarise(
-    Freq = sum(ALT_FREQ, na.rm = TRUE)
-  ) %>%
-  pivot_wider(
-    names_from = synonimous,
-    values_from = Freq,
-    values_fill = 0
-  ) %>%
-  transmute(
-    dn = No / sum(N_S_position$N),
-    ds = Yes / sum(N_S_position$S)
-  ) %>%
-  ungroup() %>%
-  mutate(
-    w = dn / ds,
-  ) %>%
-  filter(w != Inf) %>%
-  pivot_longer(
-    c("dn", "ds", "w"),
-    values_to = "value",
-    names_to = "d"
-  ) %>%
-  left_join(unique(select(vcf, REGION, interval)))
-
-plot <- plot_df %>%
-  filter(d != "w") %>%
-  ggplot() +
-  aes(
-    x = interval,
-    y = value,
-    color = d,
-    shape = d
-  ) +
-  geom_point(size = 3) +
-  geom_line() +
-  scale_color_manual(
-    name = "Parameter",
-    labels = dnds.labels,
-    values = dnds.colors
-  ) +
-  scale_shape_manual(
-    name = "Parameter",
-    values = dnds.shapes,
-    labels = dnds.labels
-  ) +
-  labs(
-    y = "Substitution rate",
-    x = "Days since the initial sampling",
-    color = "Parameter"
-  )
-
-ggsave(
-  filename = snakemake@output[["plot"]],
-  plot = plot,
-  width = 159.2,
-  height = 119.4,
-  units = "mm",
-  dpi = 250
-)
-
-# Plot for omega
-
-plot_omega <- plot_df %>%
-  filter(d == "w") %>%
-  ggplot() +
-  aes(
-    x = interval,
-    y = value,
-  ) +
-  geom_point(color = "black") +
-  geom_line(color = "black") +
-  labs(
-    y = "w (dN/dS)",
-    x = "Days since the initial sampling",
-    color = "Parameter"
-  )
-
-ggsave(
-  filename = snakemake@output[["plot_omega"]],
-  plot = plot_omega,
-  width = 159.2,
-  height = 119.4,
-  units = "mm",
-  dpi = 250
-)
-
-# PLOT TABLES
-log_info("Saving plot table")
-vcf %>%
-  group_by(REGION, synonimous) %>%
-  summarise(
-    Freq = sum(ALT_FREQ, na.rm = TRUE)
-  ) %>%
-  pivot_wider(
-    names_from = synonimous,
-    values_from = Freq,
-    values_fill = 0
-  ) %>%
-  transmute(
-    dn = No / sum(N_S_position$N),
-    ds = Yes / sum(N_S_position$S)
-  ) %>%
-  ungroup() %>%
-  left_join(unique(select(vcf, REGION, interval))) %>%
-  transmute(
-    sample = REGION,
-    DaysSinceFirst = interval,
-    dN = dn,
-    dS = ds,
-    w = dn / ds
-  ) %>%
-  write.csv(snakemake@output[["table"]], row.names = FALSE)
diff --git a/workflow/scripts/report/extract_genbank_regions.py b/workflow/scripts/report/extract_genbank_regions.py
new file mode 100644
index 0000000..61f3a33
--- /dev/null
+++ b/workflow/scripts/report/extract_genbank_regions.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+
+import logging
+import json
+
+from Bio import SeqIO
+from Bio.SeqFeature import ExactPosition
+
+
+if __name__ == "__main__":
+    logging.basicConfig(
+        filename=snakemake.log[0], format=snakemake.config["LOG_PY_FMT"],
+        level=logging.INFO
+    )
+
+    logging.info("Reading GenBank file")
+    gb = SeqIO.read(snakemake.input.gb, format="gb")
+
+    logging.info("Calculating CDS regions")
+    regions = {}
+    for feature in gb.features:
+        identifier = "|".join(feature.qualifiers.get(snakemake.params.gb_qualifier, []))
+        if identifier == "":
+            logging.error(f"Feature at {feature.location} has no qualifier '{snakemake.params.gb_qualifier_display}'")
+        elif identifier in regions:
+            logging.warning(f"Identifier '{identifier}' is already among coding records and will not be replaced by feature at {feature.location}")
+        else:
+            logging.debug(f"Adding feature")
+            if type(feature.location.start) is not ExactPosition or type(feature.location.start) is not ExactPosition:
+                logging.warning(f"Feature '{identifier}' location is not exact but will be treated as such: {feature.location}")
+            regions[identifier] = (
+                int(feature.location.start.real + 1),
+                int(feature.location.end.real)
+            )
+    
+    logging.info("Calculating intergenic regions")
+    cds_names = tuple(regions.keys())
+    intergenic_count = 0
+    for i in range(1, len(cds_names)):
+        start1, end1 = regions[cds_names[i-1]]
+        start2, end2 = regions[cds_names[i]]
+        if (start2 - end1) > 1:
+            intergenic_count += 1
+            regions[f"Intergenic_{intergenic_count}"] = (end1 + 1, start2 - 1)
+    
+    logging.info("Writing regions to JSON file")
+    with open(snakemake.output.regions, "w") as fw:
+        json.dump(regions, fw, indent=2)
diff --git a/workflow/scripts/report/freyja_plot.R b/workflow/scripts/report/freyja_plot.R
deleted file mode 100644
index e698091..0000000
--- a/workflow/scripts/report/freyja_plot.R
+++ /dev/null
@@ -1,101 +0,0 @@
-#!/usr/bin/env Rscript
-
-# Write stdout and stderr to log file
-log <- file(snakemake@log[[1]], open = "wt")
-sink(log, type = "message")
-sink(log, type = "output")
-
-library(tidyverse)
-library(logger)
-log_threshold(INFO)
-
-# Import file with plots style
-source(snakemake@params[["design"]])
-
-# Read inputs
-demix <- read_csv(snakemake@input[["summary_demixing"]])
-
-# DATA PROCESSING
-log_info("Obtaining main lineages")
-main_lineages <- demix %>%
-  group_by(sample) %>%
-  top_n(1, abundances) %>%
-  ungroup() %>%
-  pull(lineages) %>%
-  unique()
-
-# Obtain sample names ordered by CollectionDate
-metadata <- read_csv(snakemake@input[["metadata"]])
-date_order <- metadata %>%
-  arrange(CollectionDate) %>%
-  filter(ID %in% demix$sample) %>%
-  pull(ID) %>%
-  unique()
-
-# PLOT
-log_info("Plotting summary demixing")
-demix_plot <- demix %>%
-  mutate(
-    lineages = case_when(
-      lineages %in% main_lineages ~ lineages
-    )
-  ) %>%
-  group_by(lineages, sample) %>%
-  mutate(
-    abundances = sum(abundances)
-  ) %>%
-  unique() %>%
-  ggplot() +
-  aes(
-    x = factor(sample, date_order),
-    y = as.numeric(abundances),
-    fill = lineages
-  ) +
-  scale_fill_viridis_d(
-    na.value = "gray50",
-    labels = function(x) {
-      ifelse(is.na(x), "Other", x)
-    }
-  ) +
-  geom_col() +
-  theme(
-    axis.text.x = element_text(angle = 60, hjust = 1),
-    legend.position = "bottom"
-  ) +
-  labs(
-    x = "Sample",
-    y = "Relative abundance",
-    fill = "Lineage"
-  )
-
-ggsave(
-  filename = snakemake@output[["fig"]],
-  plot = demix_plot,
-  width = 159.2,
-  height = 119.4,
-  units = "mm",
-  dpi = 250
-)
-
-
-# PLOT TABLES
-log_info("Saving plot table")
-demix %>%
-  mutate(
-    lineages = case_when(
-      lineages %in% main_lineages ~ lineages,
-      TRUE ~ "Other"
-    )
-  ) %>%
-  group_by(sample, lineages) %>%
-  summarise(abundances = sum(abundances)) %>%
-  ungroup() %>%
-  left_join(
-    select(
-      metadata,
-      ID,
-      CollectionDate
-    ),
-    by = c("sample" = "ID")
-  ) %>%
-  write.csv(snakemake@output[["table"]], row.names = FALSE)
diff --git a/workflow/scripts/report/get_annotation.py b/workflow/scripts/report/get_annotation.py
deleted file mode 100644
index 552d70d..0000000
--- a/workflow/scripts/report/get_annotation.py
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env python3
-
-import json
-import logging
-import pandas as pd
-from gb2seq.features import Features
-from Bio import SeqIO
-
-
-def main():
-    logging.basicConfig(filename=snakemake.log[0], format=snakemake.config["LOG_PY_FMT"], level=logging.INFO)
-    
-    logging.info("Reading features")
-    ft = Features(snakemake.input.gb)
-    with open(snakemake.input.features) as f:
-        feature_key = json.load(f)
-
-    logging.info("Reading reference")
-    reference = str(next(SeqIO.parse(snakemake.input.ref, "fasta")).seq)
-    positions = [x for x in range(len(reference))]
-
-    logging.info("Building annotation")
-    genes = []
-    for pos in positions:
-        if len(ft.getFeatureNames(pos)) == 0:
-            genes.append("Intergenic")
-        else:
-            genes.append(feature_key[list(ft.getFeatureNames(pos))[0]])
-
-    logging.info("Writing table")
-    df = pd.DataFrame({"POS": positions, "GEN": genes})
-    df.to_csv(snakemake.output.df, index= False)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/workflow/scripts/report/heatmap.R b/workflow/scripts/report/heatmap.R
deleted file mode 100644
index 08a2d73..0000000
--- a/workflow/scripts/report/heatmap.R
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env Rscript
-
-# Write stdout and stderr to log file
-log <- file(snakemake@log[[1]], open = "wt")
-sink(log, type = "message")
-sink(log, type = "output")
-
-library(tidyverse)
-library(logger)
-log_threshold(INFO)
-
-vcf <- read_tsv(snakemake@input[["vcf"]])
-
-# Obtain sample names ordered by CollectionDate
-date_order <- read_csv(snakemake@input[["metadata"]]) %>%
-  arrange(CollectionDate) %>%
-  pull(ID) %>%
-  unique()
-
-# Create SNP variable and select useful variables from vcf
-vcf <- vcf %>%
-  dplyr::select(variant, REGION, ALT_FREQ)
-
-vcf <- vcf %>%
-  pivot_wider(
-    names_from = variant,
-    values_from = ALT_FREQ,
-    values_fill = 0,
-    values_fn = sum
-  ) %>%
-  arrange(factor(REGION, levels = date_order)) %>%
-  column_to_rownames(var = "REGION")
-
-log_info("Saving table to create heatmap")
-write.csv(vcf, snakemake@output[["table"]])
diff --git a/workflow/scripts/report/nv_panel_data.R b/workflow/scripts/report/nv_panel_data.R
new file mode 100644
index 0000000..5cb6880
--- /dev/null
+++ b/workflow/scripts/report/nv_panel_data.R
@@ -0,0 +1,108 @@
+#!/usr/bin/env Rscript
+
+# Write stdout and stderr to log file
+log <- file(snakemake@log[[1]], open = "wt")
+sink(log, type = "message")
+sink(log, type = "output")
+
+library(dplyr)
+library(readr)
+library(stringr)
+library(tibble)
+library(jsonlite)
+library(logger)
+
+log_threshold(INFO)
+
+log_info("Reading variants")
+variants <- read_delim(
+  snakemake@input$variants,
+  col_select = c(
+    "SAMPLE",
+    "REGION",
+    "POS",
+    "ALT",
+    "VARIANT_NAME",
+    "ALT_FREQ",
+    "EFFECT",
+    "SYNONYMOUS"
+  )
+)
+log_debug("Read {nrow(variants)} rows")
+
+log_info("Reading dates from metadata")
+dates <- read_delim(
+  snakemake@input$metadata,
+  col_select = c("ID", "CollectionDate")
+)
+
+log_info("Dating variants")
+variants <- left_join(
+  variants, dates,
+  by = c("SAMPLE" = "ID")
+)
+
+log_info("Classifying variants")
+variants <- variants %>%
+  mutate(
+    NV_class = ifelse(
+      str_detect(ALT, fixed("-")) | str_detect(ALT, fixed("+")),
+      "INDEL",
+      "SNP"
+    ),
+    Class = ifelse(EFFECT == "intergenic_region", "Intergenic", SYNONYMOUS),
+    POS = as.numeric(POS)
+  ) %>%
+  # rowwise() %>%
+  mutate(
+    indel_len = ifelse(NV_class == "INDEL", str_length(ALT) - 1, NA),
+    indel_class = case_when(
+      EFFECT == "intergenic_region" ~ "Intergenic",
+      (NV_class == "INDEL") & (indel_len %% 3 == 0) ~ "In frame",
+      (NV_class == "INDEL") & (indel_len %% 3 != 0) ~ "Frameshift"
+    )
+  ) %>%
+  ungroup() %>%
+  mutate(
+    group = case_when(
+      EFFECT == "intergenic_region" ~ "Intergenic",
+      NV_class == "SNP" ~ Class,
+      NV_class == "INDEL" ~ indel_class
+    )
+  ) %>%
+  filter(ALT_FREQ > 0)
+log_debug("Processed {nrow(variants)} rows")
+
+if (nrow(variants) == 0) {
+  log_warning("No variants found, using an empty table")
+  variants <- tibble(
+    SAMPLE = date_order,
+    REGION = as.character(NA),
+    VARIANT_NAME = as.character(NA),
+    ALT_FREQ = as.numeric(NA),
+    EFFECT = as.character(NA),
+    SYNONYMOUS = as.character(NA),
+    POS = as.numeric(NA),
+    ALT = as.character(NA),
+    NV_class = as.character(NA),
+    group = as.character(NA)
+  )
+}
+
+log_info("Writing processed table")
+write_csv(variants, snakemake@output$table)
+
+log_info("Writing NV summary")
+nv_counts <- variants %>%
+  distinct(VARIANT_NAME, NV_class) %>%
+  count(NV_class) %>%
+  deframe()
+list(
+  "INDELS" = nv_counts["INDEL"],
+  "SNV" = nv_counts["SNP"]
+) %>%
+  write_json(
+    snakemake@output$json,
+    auto_unbox = TRUE,
+    digits = NA
+  )
diff --git a/workflow/scripts/report/nv_panel_plot.R b/workflow/scripts/report/nv_panel_plot.R
new file mode 100644
index 0000000..da7eca7
--- /dev/null
+++ b/workflow/scripts/report/nv_panel_plot.R
@@ -0,0 +1,165 @@
+#!/usr/bin/env Rscript
+
+# Write stdout and stderr to log file
+log <- file(snakemake@log[[1]], open = "wt")
+sink(log, type = "message")
+sink(log, type = "output")
+
+library(dplyr)
+library(readr)
+library(ggplot2)
+library(ggpubr)
+library(jsonlite)
+library(tibble)
+library(purrr)
+library(logger)
+
+log_threshold(INFO)
+
+# Import file with plots style
+source(snakemake@params[["design"]])
+
+# Subplot A: windows ============================
+log_info("Reading variant windows")
+window <- read_csv(snakemake@input$window)
+
+log_debug("Calculating X axis limits")
+xlim_values <- c(
+  max(0, min(window$position) - snakemake@params$window_step),
+  max(window$position) + snakemake@params$window_step
+)
+
+log_info("Plotting variant windows")
+p1 <- window %>%
+  ggplot() +
+  aes(
+    x = position,
+    y = fraction,
+    color = feature
+  ) +
+  geom_point() +
+  geom_line(
+    aes(group = 1),
+    colour = "black",
+    alpha = 0.3
+  ) +
+  scale_y_continuous(
+    label = scales::percent,
+    limits = c(0, max(window$fraction) + mean(window$fraction))
+  ) +
+  xlim(xlim_values) +
+  # TODO: change GENE_PALETTE to selection of TRAJECTORY.PANEL.COLORS ?
+  scale_color_manual(values = GENE_PALETTE) +
+  labs(
+    y = "Proportion of\nsites with NV",
+    x = "",
+    color = "Gen"
+  )
+
+log_info("Reading regions to highlight")
+highlight <- read_csv(snakemake@input$highlight_window_regions)
+
+if (nrow(highlight) != 0) {
+  log_info("Highlighting {nrow(highlight)} regions")
+  highlighted_sites <- unique(c(highlight$start, highlight$end))
+  p1 <- p1 +
+    geom_vline(xintercept = highlighted_sites, color = "orange") +
+    geom_text(
+      data = highlight,
+      aes(
+        x = (start + end) / 2,
+        y = max(window$fraction + mean(window$fraction) / 2),
+        label = region
+      ),
+      inherit.aes = FALSE,
+      size = 3,
+      angle = 45
+    )
+} else {
+  log_info("No highlighted regions")
+}
+
+# Subplot B: windows ============================
+log_info("Reading panel data")
+panel <- read_csv(snakemake@input$panel)
+
+log_info("Calculating sample chronological order")
+date_order <- panel %>%
+  arrange(CollectionDate) %>%
+  pull(SAMPLE) %>%
+  unique()
+
+log_info("Reading regions")
+regions <- read_json(snakemake@input$regions, simplifyVector = TRUE) %>%
+  enframe(name = "region", value = "coords") %>%
+  mutate(
+    start = map_dbl(coords, 1),
+    end = map_dbl(coords, 2),
+    length = end - start + 1
+  ) %>%
+  select(-coords)
+
+log_info("Plotting variants")
+p2 <- panel %>%
+  ggplot() +
+  aes(
+    x = POS,
+    y = factor(SAMPLE, date_order),
+    shape = factor(NV_class, c("SNP", "INDEL")),
+    color = group,
+    alpha = ALT_FREQ
+  ) +
+  geom_point(size = 3) +
+  geom_col(
+    data = regions,
+    aes(
+      x = length,
+      y = 0.3,
+      # TODO: legend shows as NA those without match in `regions`
+      fill = factor(region, rev(regions$region))
+    ),
+    inherit.aes = FALSE,
+    width = 0.3
+  ) +
+  # TODO: change GENE_PALETTE to selection of TRAJECTORY.PANEL.COLORS ?
+  scale_fill_manual(values = GENE_PALETTE) +
+  xlim(xlim_values) +
+  scale_color_manual(
+    labels = NV_TYPE_NAMES,
+    values = NV_TYPE_PALETTE
+  ) +
+  scale_y_discrete(drop = FALSE) +
+  labs(
+    x = "Genome position",
+    y = "Sample",
+    shape = "Variant class",
+    color = "Classification",
+    alpha = "Frequency",
+    fill = "Region"
+  ) +
+  guides(
+    fill = guide_legend(reverse = TRUE)
+  )
+
+# Plot arrangement ============================
+log_info("Arranging plots")
+p <- ggarrange(
+  p1,
+  p2,
+  nrow = 2,
+  align = "v",
+  legend.grob = get_legend(p2),
+  heights = c(2, 6),
+  legend = "right",
+  labels = c("A", "B")
+)
+
+log_info("Saving plot")
+ggsave(
+  filename = snakemake@output$plot,
+  plot = p,
+  width = snakemake@params$plot_width_mm,
+  height = snakemake@params$plot_height_mm,
+  units = "mm",
+  dpi = 250
+)
diff --git a/workflow/scripts/report/nv_panel_zoom_on_feature_data.py b/workflow/scripts/report/nv_panel_zoom_on_feature_data.py
new file mode 100644
index 0000000..4989e68
--- /dev/null
+++ b/workflow/scripts/report/nv_panel_zoom_on_feature_data.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+
+import sys
+import logging
+import json
+
+import pandas as pd
+
+
+if __name__ == "__main__":
+
+    logging.basicConfig(filename=snakemake.log[0], format=snakemake.config["LOG_PY_FMT"], level=logging.INFO)
+
+    logging.info("Reading regions")
+    with open(snakemake.input.regions) as f:
+        regions = json.load(f)
+
+    if snakemake.wildcards.region_name not in regions:
+        logging.error(f"Region {snakemake.wildcards.region_name} is absent in {snakemake.input.regions}")
+        sys.exit(1)
+    
+    start, end = regions[snakemake.wildcards.region_name]
+
+    logging.info("Reading input table")
+    df = pd.read_csv(snakemake.input.table)
+
+    logging.info("Filtering sites and writing results")
+    df[(df.POS >= start) & (df.POS <= end)].to_csv(snakemake.output.table, index=False)
diff --git a/workflow/scripts/report/phylo_plots.R b/workflow/scripts/report/phylo_plots.R
deleted file mode 100644
index 447d873..0000000
--- a/workflow/scripts/report/phylo_plots.R
+++ /dev/null
@@ -1,291 +0,0 @@
-#!/usr/bin/env Rscript
-
-# Write stdout and stderr to log file
-log <- file(snakemake@log[[1]], open = "wt")
-sink(log, type = "message")
-sink(log, type = "output")
-
-library(tidyverse)
-library(stringi)
-library(ape)
-library(ggtree)
-library(data.table)
-library(ggpubr)
-library(pegas)
-library(jsonlite)
-library(logger)
-
-log_threshold(INFO)
-
-# Import file with plots style
-source(snakemake@params[["design"]])
-
-# legend thresholds for ml tree
-legend.names <- c(
-  tip_label = "Target samples",
-  boot_alrt_pass = sprintf(
-    "UFBoot ≥ %s%s & SH-aLRT ≥ %s%s ",
-    snakemake@params[["boot_th"]],
-    "%",
-    snakemake@params[["alrt_th"]],
-    "%"
-  )
-)
-
-matrix <- read_csv(snakemake@input[["dist"]])
-metadata <- read_csv(snakemake@input[["metadata"]])
-tree_ml <- read.tree(snakemake@input[["ml"]]) %>%
-  root(
-    snakemake@params[["ref_name"]],
-    resolve.root = TRUE
-  )
-
-study_names <- read.dna(
-  snakemake@input[["study_fasta"]],
-  format = "fasta",
-  as.matrix = FALSE,
-)
-
-study_names <- study_names[
-  !startsWith(names(study_names), snakemake@config[["ALIGNMENT_REFERENCE"]])
-]
-study_names <- names(study_names)
-
-# Obtain sample names ordered by CollectionDate
-date_order <- metadata %>%
-  arrange(CollectionDate) %>%
-  filter(ID %in% study_names) %>%
-  pull(ID) %>%
-  unique()
-
-tree_tiplab <- data.frame(
-  ID = date_order,
-  order = seq(1, length(date_order), 1)
-) %>%
-  rowwise() %>%
-  mutate(
-    tip_label = sprintf(
-      "(%s)-%s",
-      order,
-      ID
-    )
-  ) %>%
-  ungroup() %>%
-  add_row(
-    ID = snakemake@params[["ref_name"]],
-    order = 0,
-    tip_label = snakemake@params[["ref_name"]]
-  )
-
-
-# DATA PROCESSING ####
-# Resolve possible negative edge lengths in n-j tree
-fix_negative_edge_length <- function(nj.tree) {
-  edge_infos <- cbind(
-    nj.tree$edge,
-    nj.tree$edge.length
-  ) %>%
-    as.data.table
-  colnames(edge_infos) <- c("from", "to", "length")
-
-  nega_froms <- edge_infos[length < 0, sort(unique(from))]
-
-  for (nega_from in nega_froms) {
-    minus_length <- edge_infos[from == nega_from, ][order(length)][1, length]
-    edge_infos[from == nega_from, length := length - minus_length]
-    edge_infos[to == nega_from, length := length + minus_length]
-  }
-  nj.tree$edge.length <- edge_infos$length
-  nj.tree
-}
-
-# Tree construction
-if (snakemake@params$use_bionj) {
-  log_info("Constructing BIONJ tree based on distances")
-  tree_method = bionj
-} else {
-  log_info("Constructing NJ tree based on distances")
-  tree_method = nj
-}
-tree <- matrix %>%
-  column_to_rownames(var = "...1") %>%
-  as.matrix() %>%
-  as.dist() %>%
-  tree_method() %>%
-  root(snakemake@params[["ref_name"]], resolve.root = TRUE)
-
-tree <- fix_negative_edge_length(tree)
-
-# Get patristic distances to ancestor from n-j tree
-log_info("Getting patristic distances to ancestor from n-j tree")
-tempest <- adephylo::distRoot(
-  tree,
-  "all",
-  method = "patristic"
-) %>%
-  as.data.frame() %>%
-  rownames_to_column(var = "ID") %>%
-  filter(ID != snakemake@params[["ref_name"]]) %>%
-  rename(distance = ".") %>%
-  left_join(
-    select(
-      metadata,
-      ID,
-      CollectionDate
-    )
-  ) %>%
-  mutate(
-    date_interval = as.numeric(
-      as.Date(CollectionDate) - min(as.Date(CollectionDate))
-    )
-  )
-
-
-# PLOTS ####
-# (BIO)NJ tree
-log_info("Plotting distance tree")
-tree_plot <- ggtree(tree) %<+%
-  tree_tiplab +
-  geom_tiplab(aes(label = tip_label)) +
-  geom_treescale() +
-  geom_rootedge(0.5) +
-  xlim(0, max(tempest$distance) + 3.5)
-
-ggsave(
-  filename = snakemake@output[["tree"]],
-  plot = tree_plot,
-  width = snakemake@params[["plot_width_mm"]],
-  height = snakemake@params[["plot_height_mm"]],
-  units = "mm",
-  dpi = 250
-)
-
-# TempEst
-log_info("Plotting temporal signal analysis")
-tempest_fig <- tempest %>%
-  ggplot() +
-  aes(
-    x = date_interval,
-    y = distance
-  ) +
-  geom_smooth(
-    method = "lm",
-    fill = "gray95",
-    alpha = 0.6,
-    color = "red"
-  ) +
-  geom_point() +
-  labs(
-    y = "Root-to-tip distance",
-    x = "Days since the initial sampling"
-  )
-
-ggsave(
-  filename = snakemake@output[["temest"]],
-  plot = tempest_fig,
-  width = snakemake@params[["plot_width_mm"]],
-  height = snakemake@params[["plot_height_mm"]],
-  units = "mm",
-  dpi = 250
-)
-
-# ML tree with context data
-
-# Internal nodes color
-# Node labels contain SH-aLRT/UFboot values
-aLRT.values <- sapply(
-  strsplit(tree_ml$node.label, "/"),
-  function(x) {
-    as.numeric(x[1])
-  }
-)
-
-bootstrap.values <- sapply(
-  strsplit(tree_ml$node.label, "/"),
-  function(x) {
-    as.numeric(x[2])
-  }
-)
-
-aLRT.mask <- aLRT.values >= snakemake@params[["alrt_th"]]
-boot.mask <- bootstrap.values >= snakemake@params[["boot_th"]]
-
-# MRCA for target samples
-log_info("Calculating MRCA of target samples")
-study.mrca <- getMRCA(tree_ml, study_names)
-study.mrca.clade <- extract.clade(tree_ml, study.mrca)
-study.mrca.clade.ntips <- Ntip(study.mrca.clade)
-
-log_info("Building M-L tree annotation")
-tree_ml.max.tip.length <- max(node.depth.edgelength(tree_ml)[1:length(tree$tip.label)])
-tree_ml.ntips <- length(tree_ml$tip.label)
-tree_ml.nodes <- tree_ml$Nnode + tree_ml.ntips
-tree_ml.labels <- tree_ml$tip.label[1:tree_ml.nodes]
-tree_ml.node.pass <- c(rep(FALSE, tree_ml.ntips), aLRT.mask & boot.mask)
-
-ml.tree.annot <- tibble(
-    node = 1:tree_ml.nodes,
-  ) %>%
-  mutate(
-    Class = case_when(
-      tree_ml.labels %in% study_names ~ legend.names["tip_label"],
-      tree_ml.node.pass ~ legend.names["boot_alrt_pass"],
-      TRUE ~ NA
-    )
-  )
-
-log_info("Plotting M-L tree with context samples")
-p <- ggtree(tree_ml, layout = "circular") %<+% ml.tree.annot +
-  geom_highlight(node = study.mrca, colour = "red", alpha = 0) +
-  geom_point(aes(color = Class, size = Class)) +
-  geom_treescale(1.05 * tree_ml.max.tip.length) +
-  geom_rootedge(0.05 * tree_ml.max.tip.length) +
-  xlim(-tree_ml.max.tip.length / 3, NA) +
-  scale_color_manual(
-    values = setNames(tree_colors[names(legend.names)], legend.names),
-    na.translate = FALSE
-  ) +
-  scale_size_manual(
-    values = setNames(node.size[names(legend.names)], legend.names),
-    na.translate = FALSE
-  )
-
-ggsave(
-  filename = snakemake@output[["tree_ml"]],
-  plot = p,
-  width = snakemake@params[["plot_width_mm"]],
-  height = snakemake@params[["plot_height_mm"]],
-  units = "mm",
-  dpi = 250
-)
-
-# PLOT TABLES
-log_info("Saving temporal signal table")
-tempest %>%
-  transmute(
-    sample = ID,
-    Distance = distance,
-    CollectionDate = CollectionDate,
-    DaysSinceFirst = date_interval
-  ) %>%
-  write.csv(snakemake@output[["table"]], row.names = FALSE)
-
-# TEMPEST STATS
-model <- lm(distance ~ date_interval, data = tempest)
-p.value <- summary(model)$coefficients[2,4]
-
-# TREE STATS
-study.node <- tree_ml$node.label[study.mrca - length(tree_ml$tip.label)]
-monophyletic <- ifelse(is.monophyletic(tree_ml, study_names), "are", "are not")
-
-list(
-  "sub_rate" = model$coefficients[[2]] * 365,
-  "r2" = summary(model)$r.squared[[1]],
-  "pvalue" = ifelse(p.value < 0.001, "< 0.001", p.value),
-  "boot" = strsplit(study.node, "/")[[1]][2] %>% as.numeric(),
-  "alrt" = strsplit(study.node, "/")[[1]][1] %>% as.numeric(),
-  "monophyly" = monophyletic,
-  "clade_tips" = study.mrca.clade.ntips
-) %>%
-  toJSON() %>%
-  write(snakemake@output[["json"]])
diff --git a/workflow/scripts/report/polymorphic_sites_over_time_data.R b/workflow/scripts/report/polymorphic_sites_over_time_data.R
new file mode 100644
index 0000000..1100587
--- /dev/null
+++ b/workflow/scripts/report/polymorphic_sites_over_time_data.R
@@ -0,0 +1,81 @@
+#!/usr/bin/env Rscript
+
+# Write stdout and stderr to log file
+log <- file(snakemake@log[[1]], open = "wt")
+sink(log, type = "message")
+sink(log, type = "output")
+
+library(readr)
+library(dplyr)
+library(jsonlite)
+library(logger)
+
+log_threshold(INFO)
+
+log_info("Reading variants")
+variants <- read_delim(snakemake@input$variants)
+
+log_info("Reading metadata")
+metadata <- read_delim(snakemake@input$metadata)
+
+log_info("Calculating heterozygous sites")
+sites <- variants %>%
+  filter(ALT_FREQ <= snakemake@params$max_alt_freq) %>%
+  left_join(
+    metadata,
+    by = c("SAMPLE" = "ID")
+  ) %>%
+  group_by(SAMPLE) %>%
+  summarise(
+    CollectionDate = min(as.Date(CollectionDate)),
+    n = n_distinct(POS)
+  ) %>%
+  ungroup() %>%
+  arrange(CollectionDate) %>%
+  mutate(
+    Day = as.numeric(
+      difftime(CollectionDate, min(CollectionDate), units = "days")
+    )
+  )
+
+if (nrow(sites) == 0) {
+  log_warn("There are none, using an empty table and no linear regression")
+  sites <- tibble(
+    SAMPLE = date_order,
+    REGION = as.character(NA),
+    VARIANT_NAME = as.character(NA),
+    ALT_FREQ = as.numeric(NA),
+    EFFECT = as.character(NA),
+    SYNONYMOUS = as.character(NA),
+    POS = as.numeric(NA),
+    ALT = as.character(NA),
+    NV_class = as.character(NA),
+    group = as.character(NA)
+  )
+  r_squared <- "none"
+  p_value_string <- "none"
+} else if (nrow(sites) > 2) {
+  log_info("Calculating linear regression")
+  model <- lm(n ~ CollectionDate, data = sites)
+  r_squared <- summary(model)$r.squared[[1]]
+  p_value <- summary(model)$coefficients[2, 4]
+  p_value_string <- ifelse(p_value < 0.001, "< 0.001", p_value)
+} else {
+  log_warn("Not enough data points for a linear regression")
+  r_squared <- "none"
+  p_value_string <- "none"
+}
+
+log_info("Writing JSON summary")
+list(
+  "r2" = r_squared,
+  "value" = p_value_string
+) %>%
+  write_json(
+    snakemake@output$json,
+    auto_unbox = TRUE,
+    digits = NA
+  )
+
+log_info("Writing processed table")
+write_csv(sites, snakemake@output$table)
diff --git a/workflow/scripts/report/polymorphic_sites_over_time_plot.R b/workflow/scripts/report/polymorphic_sites_over_time_plot.R
new file mode 100644
index 0000000..2596195
--- /dev/null
+++ b/workflow/scripts/report/polymorphic_sites_over_time_plot.R
@@ -0,0 +1,45 @@
+#!/usr/bin/env Rscript
+
+# Write stdout and stderr to log file
+log <- file(snakemake@log[[1]], open = "wt")
+sink(log, type = "message")
+sink(log, type = "output")
+
+library(readr)
+library(dplyr)
+library(ggplot2)
+library(logger)
+
+log_threshold(INFO)
+
+# Import file with plots style
+source(snakemake@params[["design"]])
+
+log_info("Reading plot data")
+df <- read_csv(snakemake@input$table)
+
+log_info("Plotting")
+p <- df %>%
+  ggplot() +
+  aes(x = Day, y = n) +
+  geom_smooth(
+    method = "lm",
+    fill = "gray95",
+    alpha = 0.6,
+    colour = "orange"
+  ) +
+  geom_point(size = 2, shape = 1) +
+  labs(
+    x = "Days since the initial sampling",
+    y = "No. of polimorphic sites"
+  )
+
+log_info("Saving plot")
+ggsave(
+  filename = snakemake@output$plot,
+  plot = p,
+  width = snakemake@params$plot_width_mm,
+  height = snakemake@params$plot_height_mm,
+  units = "mm",
+  dpi = 250
+)
diff --git a/workflow/scripts/report/snp_plots.R b/workflow/scripts/report/snp_plots.R
deleted file mode 100644
index 44e2ebb..0000000
--- a/workflow/scripts/report/snp_plots.R
+++ /dev/null
@@ -1,244 +0,0 @@
-#!/usr/bin/env Rscript
-
-# Write stdout and stderr to log file
-log <- file(snakemake@log[[1]], open = "wt")
-sink(log, type = "message")
-sink(log, type = "output")
-
-library(tidyverse)
-library(stringi)
-library(ggrepel)
-library(logger)
-log_threshold(INFO)
-
-# Get colors
-color <- grDevices::colors()[grep(
-  "gr(a|e)y",
-  grDevices::colors(),
-  invert = TRUE
-)]
-color <- color[grep("white", color, invert = TRUE)]
-# Import file with plots style
-source(snakemake@params[["design"]])
-
-# DATA PREPROCESSING #####
-metadata <- read_csv(snakemake@input[["metadata"]])
-
-vcf <- read_delim(snakemake@input[["vcf"]])
-data <- metadata %>%
-  filter(
-    ID %in% vcf$REGION
-  ) %>%
-  dplyr::select(
-    ID,
-    CollectionDate
-  )
-
-# Obtain sample names ordered by CollectionDate
-date_order <- metadata %>%
-  arrange(CollectionDate) %>%
-  pull(ID) %>%
-  unique()
-
-# Simplify features names and create SNP variable
-vcf <- vcf %>%
-  dplyr::select(
-    variant,
-    REGION,
-    ALT_FREQ,
-    POS
-  )
-
-# Fill positions without alt frequency with 0
-vcf <- vcf %>%
-  complete(nesting(variant, POS), REGION, fill = list(ALT_FREQ = 0))
-
-# Join variants file and metadata file
-vcf <- left_join(vcf, data, by = c("REGION" = "ID"))
-
-# Calculate days since first sample
-vcf <- arrange(
-  vcf,
-  CollectionDate
-) %>%
-  mutate(
-    interval = as.numeric(CollectionDate - min(CollectionDate))
-  )
-
-# PLOTS ####
-## VOLCANO PLOT ####
-
-# Get list with all different polymorphisms
-SNPs <- pull(
-  vcf,
-  variant
-) %>%
-  unique()
-
-# Create an empty dataframe to be filled
-cor.df <- data.frame(
-  snp = "",
-  cor = 0,
-  p.value = 0
-) %>%
-  filter(p.value != 0)
-
-cor.df.fill <- lapply(
-  SNPs,
-  function(snp) {
-    df <- filter(
-      vcf,
-      variant == snp
-    )
-
-    test <- cor.test(
-      df$ALT_FREQ,
-      df$interval,
-      method = snakemake@params$cor_method,
-      exact = snakemake@params$cor_exact
-    )
-
-    pvalue <- p.adjust(
-      test$p.value,
-      method = "BH",
-      n = length(SNPs)
-    )
-    add_row(
-      cor.df,
-      snp = snp,
-      cor = test$estimate,
-      p.value = pvalue
-    )
-  }
-) %>%
-  bind_rows()
-
-# Plot a pseudo volcano plot with coorrelation index and p-value
-log_info("Plotting pseudovolcano figure")
-volcano <- cor.df.fill %>%
-  mutate(
-    trans.p = -log10(p.value),
-    label = case_when(p.value < 0.05 ~ snp)
-  ) %>%
-  ggplot() +
-  aes(
-    x = cor,
-    y = trans.p
-  ) +
-  geom_point() +
-  geom_label_repel(aes(label = label)) +
-  xlim(c(-1, 1)) +
-  geom_hline(
-    aes(yintercept = -log10(0.05)),
-    linetype = 2,
-    color = "red"
-  ) +
-  labs(
-    x = "Correlation",
-    y = "-log10(p-value)"
-  )
-
-ggsave(
-  filename = snakemake@output[["pseudovolcano"]],
-  plot = volcano,
-  width = 159.2,
-  height = 119.4,
-  units = "mm",
-  dpi = 250
-)
-
-
-## SNP PANEL ####
-# SNPs significantly correlated with time
-sign <- filter(
-  cor.df.fill,
-  p.value < 0.05
-) %>%
-  pull(snp) %>%
-  unique()
-
-# SNPs which are in positions with more than one alternative allele
-dup <- vcf %>%
-  select(
-    variant,
-    POS
-  ) %>%
-  unique() %>%
-  group_by(POS) %>%
-  filter(n() > 1) %>%
-  ungroup() %>%
-  pull(variant) %>%
-  unique()
-
-subset <- c(sign, dup) %>%
-  unique()
-
-# Plot height depending on the number of SNPs assuming 4 columns in the plot
-plot.height <- ceiling(length(subset) / 4) * 42
-
-log_info("Plotting SNPs trends in time")
-panel <- vcf %>%
-  filter(variant %in% subset) %>%
-  ggplot() +
-  aes(
-    x = interval,
-    y = ALT_FREQ,
-    color = variant
-  ) +
-  scale_color_manual(values = sample(color, length(subset))) +
-  geom_point() +
-  geom_line() +
-  theme(
-    legend.position = "bottom",
-    legend.text = element_text(size = 9)
-  ) +
-  labs(
-    x = "Days since first sample",
-    y = "Frequency",
-    color = "NV"
-  ) +
-  guides(color = guide_legend(ncol = 3))
-
-if (length(subset) > 1) {
-  panel <- panel +
-    facet_wrap(
-      vars(POS),
-      nrow = ceiling(length(subset) / 4),
-      ncol = 4
-    )
-}
-
-ggsave(
-  filename = snakemake@output[["snp_panel"]],
-  plot = panel,
-  width = 159.2,
-  height = max(100, plot.height),
-  units = "mm",
-  dpi = 250
-)
-
-
-# PLOT TABLES ####
-log_info("Saving coorelation table")
-cor.df.fill %>%
-  transmute(
-    NV = snp,
-    correlation_coef = cor,
-    adj_pvalue = p.value
-  ) %>%
-  write.csv(
-    snakemake@output[["table_1"]],
-    row.names = FALSE
-  )
-
-log_info("Saving SNPs trends table")
-vcf %>%
-  filter(variant %in% subset) %>%
-  transmute(
-    sample = REGION,
-    POS = POS,
-    NV = variant,
-    ALT_FREQ = ALT_FREQ,
-    DaysSinceFirst = interval
-  ) %>%
-  write.csv(snakemake@output[["table_2"]], row.names = FALSE)
diff --git a/workflow/scripts/report/summary_table.R b/workflow/scripts/report/summary_table.R
index b78a103..0f96f7d 100644
--- a/workflow/scripts/report/summary_table.R
+++ b/workflow/scripts/report/summary_table.R
@@ -5,8 +5,9 @@ log <- file(snakemake@log[[1]], open = "wt")
 sink(log, type = "message")
 sink(log, type = "output")
 
-library(tidyverse)
+library(dplyr)
 library(logger)
+
 log_threshold(INFO)
 
 metadata <- read.csv(snakemake@input[["metadata"]])
diff --git a/workflow/scripts/report/time_signal_data.R b/workflow/scripts/report/time_signal_data.R
new file mode 100644
index 0000000..d209528
--- /dev/null
+++ b/workflow/scripts/report/time_signal_data.R
@@ -0,0 +1,70 @@
+#!/usr/bin/env Rscript
+
+# Write stdout and stderr to log file
+log <- file(snakemake@log[[1]], open = "wt")
+sink(log, type = "message")
+sink(log, type = "output")
+
+library(readr)
+library(dplyr)
+library(tibble)
+library(jsonlite)
+library(ape)
+library(adephylo)
+library(logger)
+
+log_threshold(INFO)
+
+# Read distance tree and root
+log_info("Reading tree")
+tree <- read.tree(snakemake@input$tree) %>%
+  root(snakemake@params$outgroup_id, resolve.root = TRUE)
+
+# Read metadata
+log_info("Reading metadata")
+metadata <- read_csv(snakemake@input$metadata)
+
+# Get patristic distances to ancestor in distance tree
+log_info("Calculating patristic distances to ancestor in distance tree")
+time.signal <- distRoot(
+  tree,
+  "all",
+  method = "patristic"
+) %>%
+  as.data.frame() %>%
+  rownames_to_column(var = "ID") %>%
+  filter(ID != snakemake@params$outgroup_id) %>%
+  rename(distance = ".") %>%
+  left_join(
+    select(
+      metadata,
+      ID,
+      CollectionDate
+    ),
+    by = "ID"
+  ) %>%
+  mutate(
+    date_interval = as.numeric(
+      as.Date(CollectionDate) - min(as.Date(CollectionDate))
+    )
+  )
+
+# Save table
+log_info("Saving time signal data")
+log_debug("Saving table")
+write_csv(time.signal, snakemake@output$table)
+
+# Time signal stats
+log_debug("Building linear model")
+model <- lm(distance ~ date_interval, data = time.signal)
+p.value <- summary(model)$coefficients[2, 4]
+
+# TREE STATS
+log_debug("Saving linear model")
+list(
+  "sub_rate" = model$coefficients[[2]] * 365,
+  "r2" = summary(model)$r.squared[[1]],
+  "pvalue" = ifelse(p.value < 0.001, "< 0.001", p.value)
+) %>%
+  toJSON() %>%
+  write(snakemake@output$json)
diff --git a/workflow/scripts/report/time_signal_plot.R b/workflow/scripts/report/time_signal_plot.R
new file mode 100644
index 0000000..d9af70c
--- /dev/null
+++ b/workflow/scripts/report/time_signal_plot.R
@@ -0,0 +1,49 @@
+#!/usr/bin/env Rscript
+
+# Write stdout and stderr to log file
+log <- file(snakemake@log[[1]], open = "wt")
+sink(log, type = "message")
+sink(log, type = "output")
+
+library(readr)
+library(dplyr)
+library(ggplot2)
+library(logger)
+
+log_threshold(INFO)
+
+# Import file with plots style
+source(snakemake@params[["design"]])
+
+log_info("Reading time signal data")
+time.signal <- read_csv(snakemake@input$table)
+
+# PLOTS ####
+# TempEst
+log_info("Plotting time signal data")
+p <- time.signal %>%
+  ggplot() +
+  aes(
+    x = date_interval,
+    y = distance
+  ) +
+  geom_smooth(
+    method = "lm",
+    fill = "gray95",
+    alpha = 0.6,
+    color = "orange"
+  ) +
+  geom_point(size = 2, shape = 1) +
+  labs(
+    y = "Root-to-tip distance",
+    x = "Days since the initial sampling"
+  )
+
+ggsave(
+  filename = snakemake@output$plot,
+  plot = p,
+  width = snakemake@params[["plot_width_mm"]],
+  height = snakemake@params[["plot_height_mm"]],
+  units = "mm",
+  dpi = 250
+)
diff --git a/workflow/scripts/report/window_data.py b/workflow/scripts/report/window_data.py
new file mode 100644
index 0000000..73233f8
--- /dev/null
+++ b/workflow/scripts/report/window_data.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+
+import logging
+import json
+from typing import NewType, Dict, Iterable, List
+
+import pandas as pd
+from Bio import SeqIO
+from Bio.SeqFeature import SeqFeature, SimpleLocation, CompoundLocation
+
+
+FeatureIndex = NewType("FeatureIndex", Dict[str, SimpleLocation | CompoundLocation])
+
+
+def iter_features_filtering(features: Iterable[SeqFeature], included: Dict[str, str], excluded: Dict[str, str]) -> Iterable[SeqFeature]:
+    # No filters
+    if len(included) == 0 and len(excluded) == 0:
+        logging.debug("Selecting all features")
+        return iter(features)
+    # Only inclusion filter
+    elif len(included) == 0 and len(excluded) != 0:
+        logging.debug(f"Selecting features excluding all of {excluded}")
+        return (
+            feature for feature in features
+            if all(
+                (qualifier_value not in excluded.get(qualifier_key, []))
+                for qualifier_key in excluded.keys()
+                for qualifier_value in feature.qualifiers.get(qualifier_key, [])
+            )
+        )
+    # Only exclusion filter
+    elif len(included) != 0 and len(excluded) == 0:
+        logging.debug(f"Selecting features including any of {included}")
+        return (
+            feature for feature in features
+            if any(
+                (qualifier_value in included.get(qualifier_key, []))
+                for qualifier_key in included.keys()
+                for qualifier_value in feature.qualifiers.get(qualifier_key, [])
+            )
+        )
+    # Inclusion then exclusion filter
+    else:
+        logging.debug(f"Selecting features including any of {included} and then excluding all of {excluded}")
+        included_features = (
+            feature for feature in features
+            if any(
+                (qualifier_value in included.get(qualifier_key, []))
+                for qualifier_key in included.keys()
+                for qualifier_value in feature.qualifiers.get(qualifier_key, [])
+            )
+        )
+        return (
+            feature for feature in included_features
+            if all(
+                (qualifier_value not in excluded.get(qualifier_key, []))
+                for qualifier_key in excluded.keys()
+                for qualifier_value in feature.qualifiers.get(qualifier_key, [])
+            )
+        )
+
+
+def index_features(features: Iterable[SeqFeature]) -> FeatureIndex:
+    index = FeatureIndex({})
+    for feature in features:
+        identifier = "|".join(feature.qualifiers.get(snakemake.params.gb_qualifier_display, []))
+        if identifier == "":
+            logging.error(f"Feature at {feature.location} has no qualifier '{snakemake.params.gb_qualifier_display}' and will be skipped")
+        elif feature.location is None:
+            logging.warning(f"Feature '{identifier}' has no location and will be skipped")
+        elif identifier in index:
+            logging.warning(f"Identifier '{identifier}' is already in feature index and will not be replaced by feature at {feature.location}")
+        else:
+            index[identifier] = feature.location
+    return index
+
+
+def feature_names_at(position: int, feature_index: FeatureIndex) -> List[str]:
+    return [name for name, location in feature_index.items() if position in location]
+
+
+def window_calculation(sites: set, window: int, step: int, size: int, feature_index: FeatureIndex) -> pd.DataFrame:
+    positions, fractions, features = [], [], []
+    lim_sup = size + 1
+    for position in range(1, lim_sup, step):
+        feature_names = ";".join(feature_names_at(position, feature_index))
+        if len(feature_names) == 0:
+            features.append("Intergenic")
+        else:
+            # Include all features on site
+            features.append(feature_names)
+        # Add percent (excluding initial and final positions)
+        if position - window not in range(1, lim_sup):
+            fractions.append(0.0)
+        else:
+            # Calculate number of polymorphisms in the window
+            num_snp = sum(
+                1 for x in sites if x in range(position - window, position + 1)
+            )
+            fractions.append(num_snp / window)
+        positions.append(position)
+    return pd.DataFrame({"position": positions, "fraction": fractions, "feature": features})
+
+
+def main():
+
+    logging.basicConfig(
+        filename=snakemake.log[0], format=snakemake.config["LOG_PY_FMT"],
+        level=logging.INFO
+    )
+
+    # Read input files
+    logging.info("Reading input variants file")
+    df = pd.read_table(snakemake.input.variants)
+    sites = set(df.POS)
+
+    logging.info("Reading GenBank file")
+    gb = SeqIO.read(snakemake.input.gb, format="gb")
+
+    logging.info("Indexing feature locations")
+    included = snakemake.params.features.get("INCLUDE", {})
+    excluded = snakemake.params.features.get("EXCLUDE", {})
+    feature_index = index_features(
+        iter_features_filtering(gb.features, included, excluded)
+    )
+
+    logging.info("Calculating polimorphic sites sliding window")
+    windows = window_calculation(
+        sites,
+        snakemake.params.window,
+        snakemake.params.step,
+        len(gb),
+        feature_index
+    )
+
+    logging.info("Saving results")
+    windows.to_csv(snakemake.output.window_df, index=False)
+
+    logging.info("Saving window settings to JSON file")
+    with open(snakemake.output.json, "w") as fw:
+        json.dump(
+            {"window": snakemake.params.window, "step": snakemake.params.step},
+            fw
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/workflow/scripts/report/window_zoom_on_feature_data.py b/workflow/scripts/report/window_zoom_on_feature_data.py
new file mode 100644
index 0000000..36f1e9f
--- /dev/null
+++ b/workflow/scripts/report/window_zoom_on_feature_data.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+
+import sys
+import logging
+import json
+
+import pandas as pd
+
+
+if __name__ == "__main__":
+
+    logging.basicConfig(filename=snakemake.log[0], format=snakemake.config["LOG_PY_FMT"], level=logging.INFO)
+
+    logging.info("Reading regions")
+    with open(snakemake.input.regions) as f:
+        regions = json.load(f)
+
+    if snakemake.wildcards.region_name not in regions:
+        logging.error(f"Region {snakemake.wildcards.region_name} is absent in {snakemake.input.regions}")
+        sys.exit(1)
+    
+    start, end = regions[snakemake.wildcards.region_name]
+
+    logging.info("Reading input table")
+    df = pd.read_csv(snakemake.input.table)
+
+    logging.info("Filtering sites and writing results")
+    df[(df.position >= start) & (df.position <= end)].to_csv(snakemake.output.table, index=False)
diff --git a/workflow/scripts/summary_demixing.R b/workflow/scripts/summarise_demix.R
similarity index 74%
rename from workflow/scripts/summary_demixing.R
rename to workflow/scripts/summarise_demix.R
index 3ae246f..9792202 100644
--- a/workflow/scripts/summary_demixing.R
+++ b/workflow/scripts/summarise_demix.R
@@ -5,14 +5,18 @@ log <- file(snakemake@log[[1]], open = "wt")
 sink(log, type = "message")
 sink(log, type = "output")
 
-library(tidyverse)
+library(dplyr)
+library(readr)
+library(tidyr)
+library(stringr)
 library(logger)
+
 log_threshold(INFO)
 
 # Empty dataframe to be filled with data
 demix <- data.frame(
-  "lineages" = NA,
-  "abundances" = NA,
+  "lineage" = NA,
+  "abundance" = NA,
   "sample" = NA
 ) %>%
   filter(!is.na(sample))
@@ -23,21 +27,25 @@ lapply(
   function(tsv_file) {
     read_tsv(
       tsv_file,
-      col_names = c("variable", "valor"),
+      col_names = c("variable", "value"),
       show_col_types = FALSE
     ) %>%
       filter(
-        row_number() %in% c(3, 4)
+        variable %in% c("lineages", "abundances")
       ) %>%
       pivot_wider(
         names_from = variable,
-        values_from = valor
+        values_from = value
       ) %>%
       separate_rows(
         lineages,
         abundances,
         sep = " "
       ) %>%
+      rename(
+        lineage = lineages,
+        abundance = abundances
+      ) %>%
       mutate(
         sample = str_extract(
           basename(tsv_file),
diff --git a/workflow/scripts/tsv_to_vcf.py b/workflow/scripts/tsv_to_vcf.py
index be43d9a..2201582 100644
--- a/workflow/scripts/tsv_to_vcf.py
+++ b/workflow/scripts/tsv_to_vcf.py
@@ -17,7 +17,7 @@ def tsv_to_vcf(tsv_file, vcf_file):
         # Process each row in the TSV file
         for index, row in tsv_df.iterrows():
             # Extract fields from the TSV row
-            chrom = 'NC_045512.2'
+            chrom = snakemake.params.ref_name
             pos = row['POS']
             ref = row['REF']
             alt = row['ALT']
diff --git a/workflow/scripts/vcf_to_tsv.R b/workflow/scripts/vcf_to_tsv.R
deleted file mode 100644
index 990f488..0000000
--- a/workflow/scripts/vcf_to_tsv.R
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/env Rscript
-
-# Write stdout and stderr to log file
-log <- file(snakemake@log[[1]], open = "wt")
-sink(log, type = "message")
-sink(log, type = "output")
-
-library(tidyverse)
-library(logger)
-log_threshold(INFO)
-
-# read data
-log_info("Reading data")
-vcf <- read_tsv(snakemake@input[["ann_vcf"]], comment = "##")
-tsv <- read_tsv(snakemake@input[["pre_tsv"]])
-
-tsv["variant"] <- str_extract(vcf$INFO, "p\\.([^|]*)", group = 1)
-tsv["nuc_variant"] <- str_extract(vcf$INFO, "c\\.([^|]*)", group = 1)
-
-tsv <- tsv %>%
-  mutate(
-    variant = case_when(
-      is.na(variant) ~ paste(POS, REF, ">", ALT, sep = ""),
-      TRUE ~ paste(GFF_FEATURE, ":", variant, sep = "")
-    )
-  )
-
-log_info("Saving results")
-write_tsv(
-  tsv,
-  snakemake@output[["tsv"]]
-)
diff --git a/workflow/scripts/weighted_distances.py b/workflow/scripts/weighted_distances.py
deleted file mode 100644
index 2a5c0e7..0000000
--- a/workflow/scripts/weighted_distances.py
+++ /dev/null
@@ -1,163 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-# Adapted script from https://github.com/PathoGenOmics-Lab/genetic-distances
-
-import logging
-from typing import List, Tuple
-
-import pandas as pd
-from Bio import SeqIO
-from Bio.SeqRecord import SeqRecord
-from Bio.Seq import Seq
-
-
-def read_monofasta(path: str) -> SeqRecord:
-    fasta = SeqIO.parse(path, "fasta")
-    record = next(fasta)
-    if next(fasta, None) is not None:
-        logging.warning(f"There are unread records left in '{path}'")
-    return record
-
-
-def read_masked_sites(vcf_path: str, mask_classes: List[str]) -> List[int]:
-    """
-    Parse a VCF containing positions for masking. Assumes the VCF file is
-    formatted as in:
-    github.com/W-L/ProblematicSites_SARS-CoV2/blob/master/problematic_sites_sarsCov2.vcf
-    with a "mask" or "caution" recommendation in column 7.
-    Masked sites are specified with params.
-    """
-    vcf = pd.read_csv(
-        vcf_path,
-        sep="\s+",
-        comment="#",
-        names=("CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO")
-    )
-    return vcf.loc[vcf.FILTER.isin(mask_classes), "POS"].tolist()
-
-
-def build_ancestor_variant_table(ancestor: Seq, reference: Seq, reference_name: str, masked_positions: List[int]) -> pd.DataFrame:
-    pos = []
-    alt = []
-    for i in range(1, len(ancestor) + 1):
-        if i not in masked_positions and ancestor[i-1] != reference[i-1]:
-            pos.append(i)
-            alt.append(reference[i-1])
-    df = pd.DataFrame({"POS": pos, "ALT": alt})
-    df["ALT_FREQ"] = 1  # As a reference genome, we assume all positions are monomorphic
-    df["REGION"] = reference_name
-    return df
-
-
-def get_frequencies_in_position(variant_table: pd.DataFrame, sample_name: str, position: int, reference: Seq) -> Tuple[float]:
-    frequencies = {alt: 0. for alt in variant_table["ALT"].unique()}
-    alt_keys = sorted(frequencies.keys())
-    sample_variant_table = variant_table[variant_table["REGION"] == sample_name]
-    # If the position has polimorphisims, allele frequencies are captured
-    if position in sample_variant_table["POS"].values:
-        alleles = sample_variant_table[sample_variant_table["POS"] == position]
-        for base in alt_keys:
-            if base in alleles["ALT"].values:
-                frequencies[base] = float(alleles["ALT_FREQ"][alleles["ALT"] == base].iloc[0])
-    # Obtain frequency for reference allele
-    reference_allele = reference[position-1]
-    reference_freq = 1 - sum(frequencies.values())
-    if reference_allele in frequencies:
-        frequencies[reference[position-1]] += reference_freq
-    else:
-        frequencies[reference[position-1]] = reference_freq
-    return tuple(frequencies[base] for base in alt_keys)
-
-
-def heterozygosity(freqs: Tuple[float]) -> float:
-    return 1 - sum([f ** 2 for f in freqs])
-
-
-def calc_fst_weir_cockerham(hs: float, ht: float) -> float:
-    return (ht - hs) / ht if ht != 0 else 0
-
-
-def build_cache(variant_table: pd.DataFrame, samples: List[str], reference: Seq):
-    cache = {"freq": {}, "hz": {}}
-    for sample_name in set(samples):
-        for position in variant_table["POS"].astype("Int64").unique():
-            if sample_name not in cache["freq"]:
-                cache["freq"][sample_name] = {}
-            if sample_name not in cache["hz"]:
-                cache["hz"][sample_name] = {}
-            cache["freq"][sample_name][position] = get_frequencies_in_position(variant_table, sample_name, position, reference)
-            logging.debug(f"Frequencies for '{sample_name}':{position} = {cache['freq'][sample_name][position]}")
-            cache["hz"][sample_name][position] = heterozygosity(cache["freq"][sample_name][position])
-            logging.debug(f"Heterozygosity for '{sample_name}':{position} = {cache['hz'][sample_name][position]}")
-    return cache
-
-
-def calc_heterozygosities(sample1_name: str, sample2_name: str, pos: int, cache: dict):
-    logging.debug(f"Calculating heterozygosities at position {pos} for '{sample1_name}' and '{sample2_name}'")
-    # Retrieve pre-computed values
-    freqs1 = cache["freq"][sample1_name][pos]
-    freqs2 = cache["freq"][sample2_name][pos]
-    hs1 = cache["hz"][sample1_name][pos]
-    hs2 = cache["hz"][sample2_name][pos]
-    # Calculate pairwise values
-    total_freqs = [(f1 + f2) / 2 for f1, f2 in zip(freqs1, freqs2)]
-    ht = heterozygosity(total_freqs)
-    hs = (hs1 + hs2) / 2
-    return hs, ht
-
-
-def calculate_pairwise_distance(positions: List[int], sample1_name: str, sample2_name: str, cache: dict) -> float:
-    if len(positions) == 0:
-        return 0
-    else:
-        return sum([calc_fst_weir_cockerham(*calc_heterozygosities(sample1_name, sample2_name, pos, cache)) for pos in positions])
-
-
-def calculate_sample_distances(positions: List[int], sample_name: str, samples: List[str], cache: dict) -> List[float]:
-    return [calculate_pairwise_distance(positions, sample_name, other_sample_name, cache) for other_sample_name in samples]
-
-
-def calculate_distance_matrix(variant_table: pd.DataFrame, samples: List[str], reference: Seq) -> pd.DataFrame:
-    positions = variant_table["POS"].astype("Int64").unique().tolist()
-    cache = build_cache(variant_table, samples, reference)
-    distance_matrix = {}
-    for sample_name in samples:
-        distance_matrix[sample_name] = calculate_sample_distances(positions, sample_name, samples, cache)
-    for i in range(len(samples)):
-        for j in range(i+1, len(samples)):
-            distance_matrix[samples[j]][i] = distance_matrix[samples[i]][j]
-    return pd.DataFrame(distance_matrix, index=samples)
-
-
-def main():
-
-    logging.basicConfig(filename=snakemake.log[0], format=snakemake.config["LOG_PY_FMT"], level=logging.INFO)
-
-    logging.info("Reading input FASTA files")
-    ancestor = read_monofasta(snakemake.input.ancestor)
-    logging.debug(f"Ancestor: '{ancestor.description}', length={len(ancestor.seq)}")
-    reference = read_monofasta(snakemake.input.reference)
-    logging.debug(f"Reference: '{reference.description}', length={len(reference.seq)}")
-
-    logging.info("Reading input tables")
-    masked_positions = read_masked_sites(snakemake.input.vcf, snakemake.params.mask_class)
-    logging.debug(f"Read {len(masked_positions)} masked positions")
-    input_table = pd.read_table(snakemake.input.tsv, sep="\t")
-    logging.debug(f"Read {len(input_table)} rows in input TSV")
-    ancestor_table = build_ancestor_variant_table(ancestor.seq, reference.seq, reference.id, masked_positions)
-    logging.debug(f"Ancestor has {len(ancestor_table)} variants")
-    variant_table = pd.concat([input_table, ancestor_table], ignore_index=True)
-    logging.debug(f"Combined table has {len(variant_table)} variants")
-
-    logging.info(f"Calculating distance matrix")
-    sample_names = snakemake.params.samples + [reference.id]
-    distances = calculate_distance_matrix(variant_table, sample_names, ancestor.seq)
-    logging.debug(f"Distance matrix has shape: {distances.shape}")
-
-    logging.info("Writing results")
-    distances.to_csv(snakemake.output.distances)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/workflow/scripts/window.py b/workflow/scripts/window.py
deleted file mode 100644
index 5ba6a8c..0000000
--- a/workflow/scripts/window.py
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/usr/bin/env python3
-
-import logging
-import json
-import pandas as pd
-from gb2seq.features import Features
-
-
-def window_calculation(sites: set, window: int, step: int, coord: str) -> pd.DataFrame:
-    ft = Features(coord) # Create Features object to obtain annotations
-    positions = []
-    pct = []
-    genes = []
-    lim_sup = len(ft.reference.sequence) + 1
-    for position in range(1, lim_sup, step):
-        if len(ft.getFeatureNames(position)) == 0:
-            genes.append("Intergenic")
-        else:
-            genes.append(list(ft.getFeatureNames(position))[0])
-        # Add percent (excluding initial and final positions)
-        if position - window not in range(1, lim_sup):
-            pct.append(0)
-        else:
-            # Calculate no. of polimorphisms in the window
-            num_snp = len([x for x in sites if x in range(position - window, position + 1)])
-            pct.append(num_snp / window)
-        positions.append(position)
-    return pd.DataFrame({"position": positions, "fractions": pct, "gen": genes})
-
-
-def main():
-
-    logging.basicConfig(filename=snakemake.log[0], format=snakemake.config["LOG_PY_FMT"], level=logging.INFO)
-
-    logging.info("Reading input VCF")
-    df = pd.read_table(snakemake.input.vcf)
-    sites = set(df.POS)
-
-    logging.info("Reading features")
-    with open(snakemake.input.features) as f:
-        features_key = json.load(f)
-
-    logging.info("Sliding window calculation of proportion of polimorphic sites")
-    frame = window_calculation(sites, snakemake.params.window, snakemake.params.step, snakemake.input.gb)
-    
-    logging.info("Saving results")
-    frame.replace(features_key, inplace = True)
-    frame.to_csv(snakemake.output.window_df, index= False)
-
-
-if __name__ == "__main__":
-    main()