tg_params={} # trim_galore params
mqc_params={} # multiqc params
##############################################################################################################
#
# Configurable parameters: 
# 
# Read trimming 
#
# For full details of these parameters, see the trim_galore user guide:
#   https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md
#
#   Values defined here are initially set to the tool defaults
#
# Setting 'run_trimming' below to 'True' will result in quality and adapter trimming being carried out using 
# trim_galore/cut-adapt. (True/False)
tg_params['run_trimming'] = True
# Minimum length of trimmed read to retain. Reads shorter than this value after trimming will be discarded (--length)
tg_params['trim_length'] = 20
# Quality score threshold for base trimming (-q: int)
tg_params['trim_qual'] = 20
# Quality value encoding: phred33 or phred64 (--phred33, --phred64)
tg_params['encoding'] = 'phred33'
# Adapter to detect: default - automatic identification (-a: string)
# Valid values: 'auto','illumina', 'nextera', 'small_rna' or other adapter sequence
tg_params['adapter'] =  'auto'
# adapter_seq: Sequence of adapter to detect (overrides 'adapter' above) (-a: string)
tg_params['adapter_seq'] = ''
# Overlap with adapter sequence required to trim sequence (--stringency: int)
tg_params['stringency'] = 1
# Maximum error rate allowed (-e: float)
tg_params['error_rate'] = 0.1
# Trim N's from ends or reads (--trim-n: 1/0)
tg_params['trim_n'] = True
# Maximum number of Ns to allow before being removed (--max_n: int)
tg_params['max_n'] = 5
# Clip number of bases from 5' end of read 1 (--clip_R1: int)
tg_params['R1_clip_5prime'] = 0
# Clip number of bases from 3' end of read 1 (--three_prime_clip_R1: int)
tg_params['R1_clip_3prime'] = 0
# Clip number of bases from 5' end of read 2 (--clip_R2: int)
tg_params['R2_clip_5prime'] = 0
# Clip number of bases from 3' end of read 2 (--three_prime_clip_R2: int)
tg_params['R2_clip_3prime'] = 0
# Retain unpaired reads following trimming in .unpaired_1.fq/.unpaired_2.fq (--retain_unpaired: True/False)
tg_params['retain_unpaired'] = False
# Unpaired read length cutoff for retention of read 1 (--length_1)
tg_params['R1_retain']=35
# Unpaired read length cutoff for retention of read 2 (--length_2)
tg_params['R2_retain']=35
#
# MultiQC
#
# Interactive plots are produced by default where there are less than 100 samples, otherwise flat plots are produced
# Valid values: default/flat/interactive (--flat/--interactive)
mqc_params['plot_type']='default'
#
##############################################################################################################

from dag_core import get_jobname, pickle_params
from dag_qc_pe import get_trim_galore_args, get_multiqc_args, get_samples 

# Directories
fastqDir = "fastq/"

# Setup workflow with functions from dag_qc_pe.py
SAMPLES=get_samples(fastqDir)
sample_file = open('.sample.list','w')
for sample in SAMPLES:
    sample_file.write("%s\n" % sample)
sample_file.close()
JOBNAME=get_jobname()
pickle_params(tg_params, '.tgparams')
pickle_params(mqc_params, '.mqcparams')
trim_galore_args = get_trim_galore_args(tg_params)
multiqc_args = get_multiqc_args(mqc_params)

def multi_qc_inputs(SAMPLES):
    if tg_params['run_trimming']:
        return(expand("fastqc/{sample}_1_fastqc.html",sample=SAMPLES),
               expand("fastqc/{sample}_2_fastqc.html",sample=SAMPLES),
               expand("trim_galore/{sample}_1_val_1.fq.gz", sample=SAMPLES),
               expand("trim_galore/{sample}_2_val_2.fq.gz", sample=SAMPLES),
               expand("fastqc_trimmed/{sample}_1_val_1_fastqc.html",sample=SAMPLES),
               expand("fastqc_trimmed/{sample}_2_val_2_fastqc.html",sample=SAMPLES))
    else:
        return(expand("fastqc/{sample}_1_fastqc.html",sample=SAMPLES),
               expand("fastqc/{sample}_2_fastqc.html",sample=SAMPLES))

FASTQCS = expand("fastqc/{sample}_1_fastqc.html", sample=SAMPLES) + expand("fastqc/{sample}_2_fastqc.html", sample=SAMPLES)
MULTIQC = ["multiqc/report.html"]
REPORT = expand("{jobname}.report.html",jobname=JOBNAME)

if tg_params['run_trimming']:
    TRIMMED = expand("trim_galore/{sample}_1_val_1.fq.gz", sample=SAMPLES) + \
              expand("trim_galore/{sample}_2_val_2.fq.gz", sample=SAMPLES)
    FASTQCS_TRIMMED = expand("fastqc_trimmed/{sample}_1_val_1_fastqc.html", sample=SAMPLES) + \
                      expand("fastqc_trimmed/{sample}_2_val_2_fastqc.html", sample=SAMPLES)
    TARGETS = FASTQCS + TRIMMED + FASTQCS_TRIMMED + MULTIQC + REPORT
    MULTIQC_OUTDIRS='fastqc fastqc_trimmed trim_galore'
    MULTIQC_OUTPUTS=("multiqc/report.html", "multiqc/report_data/multiqc_data.json", 
                     "multiqc/report_data/multiqc_cutadapt.txt", "multiqc/report_data/multiqc_fastqc.txt", 
                     "multiqc/report_data/multiqc_general_stats.txt", "multiqc/report_data/multiqc.log", 
                     "multiqc/report_data/multiqc_sources.txt")
else:
    TARGETS = FASTQCS + MULTIQC + REPORT
    MULTIQC_OUTDIRS='fastqc'
    MULTIQC_OUTPUTS=("multiqc/report.html", "multiqc/report_data/multiqc_data.json", 
                     "multiqc/report_data/multiqc_fastqc.txt", "multiqc/report_data/multiqc_general_stats.txt", 
                     "multiqc/report_data/multiqc.log", "multiqc/report_data/multiqc_sources.txt")

rule all:
        input: expand("{targets}",targets=TARGETS)

rule fastqc:
    version: "1.0"
    message: "Running fastqc on sample {sample}..."
    input:
        R1 = "fastq/{sample}_1.fq.gz",
        R2 = "fastq/{sample}_2.fq.gz"
    output:
        "fastqc/{sample}_1_fastqc.html",
        "fastqc/{sample}_2_fastqc.html",
        "fastqc/{sample}_1_fastqc.zip",
        "fastqc/{sample}_2_fastqc.zip"
    threads: 2
    shell: "mkdir -p $TMPDIR/fastq;cp -v {input} $TMPDIR/fastq;fastqc -o fastqc --threads {threads} -f fastq $TMPDIR/{input.R1} $TMPDIR/{input.R2}"

if tg_params['run_trimming']:
    rule trim_galore:
        version: "1.0"
        message: "Running trim-galore on sample {sample}..."
        input:
            R1 = "fastq/{sample}_1.fq.gz",
            R2 = "fastq/{sample}_2.fq.gz"
        output: "trim_galore/{sample}_1_val_1.fq.gz",
                "trim_galore/{sample}_2_val_2.fq.gz",
                "trim_galore/{sample}_1.fq.gz_trimming_report.txt", 
                "trim_galore/{sample}_2.fq.gz_trimming_report.txt"
        threads: 1
        shell:"mkdir -p $TMPDIR/fastq;mkdir -p $TMPDIR/trim_galore;cp -v {input} $TMPDIR/fastq; \
            trim_galore {trim_galore_args} --paired --gzip -o $TMPDIR/trim_galore $TMPDIR/{input};cp -Rv $TMPDIR/trim_galore ."

    rule fastqc_trimmed:
        version: "1.0"
        message: "Running fastqc on trimmed fastq file {input}..."
        input: R1="trim_galore/{sample}_1_val_1.fq.gz",
               R2="trim_galore/{sample}_2_val_2.fq.gz"
        output: "fastqc_trimmed/{sample}_1_val_1_fastqc.html",
                "fastqc_trimmed/{sample}_1_val_1_fastqc.zip",
                "fastqc_trimmed/{sample}_2_val_2_fastqc.html",
                "fastqc_trimmed/{sample}_2_val_2_fastqc.zip"
        threads: 2
        conda: "environment.yaml"
        shell: "mkdir -p $TMPDIR/trim_galore;cp -v {input} $TMPDIR/trim_galore/; \
            fastqc -o fastqc_trimmed --threads {threads} -f fastq $TMPDIR/{input.R1} $TMPDIR/{input.R2}"

rule multiqc:
    version: "1.0"
    message: "Running multiqc on job outputs..."
    input: multi_qc_inputs(SAMPLES)
    output: expand("{multiqc_outputs}",multiqc_outputs=MULTIQC_OUTPUTS) 
    shell:
        "multiqc {multiqc_args} -f --filename report --config $CONDA_PREFIX/etc/dag-wf/qc-pe/multiqc.conf --outdir multiqc fastqc {MULTIQC_OUTDIRS}"

rule report:
        message: "Generating report..."
        input: "multiqc/report.html"
        output: expand("{jobname}.report.html",jobname=JOBNAME)
        shell: "dag-qc-pe.report.py {output}"

onerror:
    if os.path.exists('.start_date.txt'):
        os.remove(".start_date.txt")
