configfile: "config.yaml"

SAMPLE = config.get("sample_name", "sample")
THREADS = config.get("threads", 4)
MIN_QUAL = config.get("min_qual", 20)
MIN_DEPTH = config.get("min_depth", 5)

rule all:
    input:
        "/output/fastqc_R1.html",
        "/output/fastqc_R2.html",
        f"/output/{SAMPLE}.filtered.vcf",
        f"/output/{SAMPLE}.stats.txt"

rule fastqc:
    """Quality control of raw FASTQ reads."""
    input:
        r1=config["sample_r1"],
        r2=config["sample_r2"]
    output:
        html_r1="/output/fastqc_R1.html",
        html_r2="/output/fastqc_R2.html"
    threads: 2
    shell:
        """
        fastqc {input.r1} {input.r2} --outdir /output --threads {threads}
        # Rename to predictable names
        R1_BASE=$(basename {input.r1} | sed 's/.fastq.gz//' | sed 's/.fq.gz//')
        R2_BASE=$(basename {input.r2} | sed 's/.fastq.gz//' | sed 's/.fq.gz//')
        mv /output/${{R1_BASE}}_fastqc.html {output.html_r1} 2>/dev/null || true
        mv /output/${{R2_BASE}}_fastqc.html {output.html_r2} 2>/dev/null || true
        """

rule trim:
    """Trim adapters and low-quality bases with fastp."""
    input:
        r1=config["sample_r1"],
        r2=config["sample_r2"]
    output:
        r1=temp("/output/trimmed_R1.fastq.gz"),
        r2=temp("/output/trimmed_R2.fastq.gz"),
        json="/output/fastp.json",
        html="/output/fastp.html"
    threads: THREADS
    shell:
        """
        fastp \
            -i {input.r1} -I {input.r2} \
            -o {output.r1} -O {output.r2} \
            --json {output.json} --html {output.html} \
            --thread {threads} \
            --detect_adapter_for_pe \
            --qualified_quality_phred 20 \
            --length_required 50
        """

rule index_reference:
    """Index reference genome for BWA-MEM2 and samtools."""
    input:
        ref=config["reference"]
    output:
        bwt=config["reference"] + ".bwt.2bit.64",
        fai=config["reference"] + ".fai"
    shell:
        """
        bwa-mem2 index {input.ref}
        samtools faidx {input.ref}
        """

rule align:
    """Align trimmed reads to reference genome with BWA-MEM2."""
    input:
        r1="/output/trimmed_R1.fastq.gz",
        r2="/output/trimmed_R2.fastq.gz",
        ref=config["reference"],
        bwt=config["reference"] + ".bwt.2bit.64"
    output:
        bam=temp(f"/output/{SAMPLE}.sorted.bam"),
        bai=f"/output/{SAMPLE}.sorted.bam.bai"
    threads: THREADS
    shell:
        """
        bwa-mem2 mem -t {threads} \
            -R '@RG\\tID:{SAMPLE}\\tSM:{SAMPLE}\\tPL:ILLUMINA\\tLB:lib1' \
            {input.ref} {input.r1} {input.r2} \
        | samtools sort -@ {threads} -o {output.bam}
        samtools index {output.bam}
        """

rule freebayes:
    """Call variants with freebayes."""
    input:
        bam=f"/output/{SAMPLE}.sorted.bam",
        bai=f"/output/{SAMPLE}.sorted.bam.bai",
        ref=config["reference"],
        fai=config["reference"] + ".fai"
    output:
        vcf=f"/output/{SAMPLE}.raw.vcf"
    shell:
        """
        freebayes -f {input.ref} {input.bam} > {output.vcf}
        """

rule filter_variants:
    """Filter variants by quality and depth."""
    input:
        vcf=f"/output/{SAMPLE}.raw.vcf"
    output:
        vcf=f"/output/{SAMPLE}.filtered.vcf"
    shell:
        """
        bcftools filter \
            -i 'QUAL>{MIN_QUAL} && INFO/DP>{MIN_DEPTH}' \
            {input.vcf} > {output.vcf}
        """

rule variant_stats:
    """Generate variant statistics."""
    input:
        vcf=f"/output/{SAMPLE}.filtered.vcf"
    output:
        stats=f"/output/{SAMPLE}.stats.txt"
    shell:
        """
        bcftools stats {input.vcf} > {output.stats}
        """
