# Snakemake file for carrying out QC analysis and trimming on single-ended Illumina-style libraries

tg_params={}
##############################################################################################################
#
# Configurable parameters: 
# 
# Read trimming 
#
# For full details of these parameters, see the trim_galore user guide:
#   https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md
#
# Setting 'run_trimming' below to 'True' will result in quality and adapter trimming being carried out using 
# trim_galore/cut-adapt. (True/False)
tg_params['run_trimming'] = True
# Minimum length of trimmed read to retain. Reads shorter than this value after trimming will be discarded (--length)
tg_params['trim_length'] = 50
# Quality score threshold for base trimming (-q: int)
tg_params['trim_qual'] = 20
# Quality value encoding: phred33 or phred64 (--phred33, --phred64)
tg_params['encoding'] = 'phred33'
# Adapter to detect: default - automatic identification (-a: string)
# Valid values: 'auto','illumina', 'nextera', 'small_rna' or other adapter sequence
tg_params['adapter'] =  'auto'
# adapter_seq: Sequence of adapter to detect (overrides 'adapter' above) (-a: string)
tg_params['adapter_seq'] = ''
# Overlap with adapter sequence required to trim sequence (--stringency: int)
tg_params['stringency'] = 1
# Maximum error rate allowed (-e: float)
tg_params['error_rate'] = 0.1
# Trim N's from ends or reads (--trim-n: True/False)
tg_params['trim_n'] = True
# Maximum number of Ns to allow before being removed (--max_n: int)
tg_params['max_n'] = 5
# Clip number of bases from 5' end of read (--clip_R1: int)
tg_params['clip_5prime'] = 0
# Clip number of bases from 3' end of read (--three_prime_clip_R1: int)
tg_params['clip_3prime'] = 0
#
##############################################################################################################

from dag_core import get_jobname, pickle_params
from dag_qc_se import get_trim_galore_args, get_samples

# Directories
fastqDir = "fastq/"

# Setup workflow with functions from dag_qc_se.py
SAMPLES=get_samples(fastqDir)
JOBNAME=get_jobname()
pickle_params(tg_params, '.tgparams')
trim_galore_args = get_trim_galore_args(tg_params)

def multi_qc_inputs(SAMPLES):
    if tg_params['run_trimming']:
        return(expand("fastqc/{sample}_fastqc.html", sample=SAMPLES),
               expand("trim_galore/{sample}_trimmed.fq.gz", sample=SAMPLES),
               expand("fastqc_trimmed/{sample}_trimmed_fastqc.html", sample=SAMPLES))
    else:
        return(expand("fastqc/{sample}_fastqc.html", sample=SAMPLES))

FASTQCS = expand("fastqc/{sample}_fastqc.html", sample=SAMPLES)
MULTIQC = ["multiqc/report.html"]
REPORT = expand("{jobname}.report.html", jobname=JOBNAME)

if tg_params['run_trimming']:
    TRIMMED = expand("trim_galore/{sample}_trimmed.fq.gz", sample=SAMPLES)
    FASTQCS_TRIMMED = expand("fastqc_trimmed/{sample}_trimmed_fastqc.html", sample=SAMPLES)
    TARGETS=FASTQCS + TRIMMED + FASTQCS_TRIMMED + MULTIQC + REPORT
    MULTIQC_OUTDIRS='fastqc fastqc_trimmed trim_galore'
    MULTIQC_OUTPUTS=("multiqc/report.html", "multiqc/report_data/multiqc_data.json", 
                     "multiqc/report_data/multiqc_cutadapt.txt", "multiqc/report_data/multiqc_fastqc.txt", 
                     "multiqc/report_data/multiqc_general_stats.txt", "multiqc/report_data/multiqc.log", 
                     "multiqc/report_data/multiqc_sources.txt")
else:
    TARGETS=FASTQCS + MULTIQC + REPORT
    MULTIQC_OUTDIRS='fastqc'
    MULTIQC_OUTPUTS=("multiqc/report.html", "multiqc/report_data/multiqc_data.json", 
                     "multiqc/report_data/multiqc_fastqc.txt", "multiqc/report_data/multiqc_general_stats.txt", 
                     "multiqc/report_data/multiqc.log", "multiqc/report_data/multiqc_sources.txt")

rule all: 
        input: expand("{targets}",targets=TARGETS)

rule fastqc:
    version: "1.0"
    message: "Running fastqc on fastq file {input}..."
    input: "fastq/{sample}.fq.gz"
    output:
        ["fastqc/{sample}_fastqc.html","fastqc/{sample}_fastqc.zip"]
    threads: 1
    conda: "environment.yaml"
    shell: "mkdir -p $TMPDIR/fastq;cp -v {input} $TMPDIR/fastq;fastqc -o fastqc --threads {threads} -f fastq $TMPDIR/{input}"
    
if tg_params['run_trimming']:
    rule trim_galore:
        version: "1.0"
        message: "Running trim-galore on fastq file {input}..."
        input: "fastq/{sample}.fq.gz"
        output: ["trim_galore/{sample}_trimmed.fq.gz","trim_galore/{sample}.fq.gz_trimming_report.txt"]
        threads: 1
        conda: "environment.yaml"
        shell:"mkdir -p $TMPDIR/fastq;mkdir -p $TMPDIR/trim_galore; cp -v {input} $TMPDIR/fastq; \
                trim_galore {trim_galore_args} --gzip -o $TMPDIR/trim_galore $TMPDIR/{input};cp -Rv $TMPDIR/trim_galore ."

    rule fastqc_trimmed:
        version: "1.0"
        message: "Running fastqc on trimmed fastq file {input}..."
        input: "trim_galore/{sample}_trimmed.fq.gz"
        output:
            ["fastqc_trimmed/{sample}_trimmed_fastqc.html","fastqc_trimmed/{sample}_trimmed_fastqc.zip"]
        threads: 1
        conda: "environment.yaml"
        shell: "mkdir -p $TMPDIR/trim_galore;cp -v {input} $TMPDIR/trim_galore/; fastqc -o fastqc_trimmed --threads {threads} -f fastq $TMPDIR/{input}"

rule multiqc:
    version: "1.0"
    message: "Running multiqc on job outputs..."
    input: multi_qc_inputs(SAMPLES)
    output: expand("{multiqc_outputs}",multiqc_outputs=MULTIQC_OUTPUTS) 
    conda: "environment.yaml"
    shell:
        "multiqc -f --filename report --config $CONDA_PREFIX/etc/dag-wf/qc-se/multiqc.conf --outdir multiqc fastqc {MULTIQC_OUTDIRS}"

rule report:
        message: "Generating report..."
        input: "multiqc/report.html"
        output: expand("{jobname}.report.html",jobname=JOBNAME)
        conda: "environment.yaml"
        shell: "dag-qc-se.report.py {output}"

onerror:
    if os.path.exists('.start_date.txt'):
        os.remove(".start_date.txt")
