configfile: "config.yaml"

rule all:
    input:
        "/output/stage1_joined_ranked.tsv",
        "/output/stage2_selection_ranked.tsv",
        "/output/stage3_sort1_ranked.tsv",
        "/output/stage4_sort2_ranked.tsv",
        "/output/summary.txt"

rule aptaselect:
    """Run the full AptaSelect pipeline: Join, Selection, Sort1, Sort2, Aggregate & Rank."""
    input:
        r1=config["r1"],
        r2=config["r2"]
    output:
        s1="/output/stage1_joined_ranked.tsv",
        s2="/output/stage2_selection_ranked.tsv",
        s3="/output/stage3_sort1_ranked.tsv",
        s4="/output/stage4_sort2_ranked.tsv",
        summary="/output/summary.txt"
    params:
        library_mode="--long-mode" if config.get("long_mode", False) else "--short-mode",
        min_overlap=config.get("min_overlap", 6),
        max_mismatch_pct=config.get("max_mismatch_pct", 0.08),
        max_mismatches=config.get("max_mismatches", 1),
        sel_left=config.get("sel_left", "CCACTTCTCCTTCCATCCTAAAC"),
        sel_right=config.get("sel_right", "GAGTAGTTTGGAGGGTTGTCTG"),
        sort1_left=config.get("sort1_left", "TCCTAAAC"),
        sort1_right=config.get("sort1_right", "GAGTAGTT"),
        sort2_left=config.get("sort2_left", "TCTCTCTCTC"),
        sort2_right=config.get("sort2_right", "GAGAGAGAGA"),
        sort2_between=config.get("sort2_between_length", 20),
        chunk_size=config.get("chunk_size", 10000)
    log:
        "/output/aptaselect.log"
    shell:
        """
        python /pipeline/scripts/aptaselect.py \
            --r1 {input.r1} \
            --r2 {input.r2} \
            --outdir /output \
            {params.library_mode} \
            --min-overlap {params.min_overlap} \
            --max-mismatch-pct {params.max_mismatch_pct} \
            --max-mismatches {params.max_mismatches} \
            --sel-left {params.sel_left} \
            --sel-right {params.sel_right} \
            --sort1-left {params.sort1_left} \
            --sort1-right {params.sort1_right} \
            --sort2-left {params.sort2_left} \
            --sort2-right {params.sort2_right} \
            --sort2-between-length {params.sort2_between} \
            --chunk-size {params.chunk_size} \
            2>&1 | tee {log}
        """