#!/usr/bin/env python

import subprocess
import os
import pickle
import re
import sys 
from datetime import datetime
from jinja2 import Environment, FileSystemLoader, select_autoescape
from dag_core import export_environment

def days_hours_minutes(td):
    return td.days, td.seconds//3600, (td.seconds//60)%60

def get_versions():

    fastqc_version = subprocess.run(['fastqc','--version'],stdout=subprocess.PIPE)
    cutadapt_version = subprocess.run(['cutadapt','--version'],stdout=subprocess.PIPE)
    multiqc_version = subprocess.run(['multiqc','--version'],stdout=subprocess.PIPE)
    trim_galore_version = subprocess.run(['trim_galore','--version'],stdout=subprocess.PIPE)

    citations = {
        'FastQC': 'Andrews S. (2010). FastQC: a quality control tool for high throughput sequence data. http://www.bioinformatics.babraham.ac.uk/projects/fastqc',
        'CutAdapt': 'Martin, M. (2011). Cutadapt removes adapter sequences from high-throughput sequencing reads. EMBnet.journal, 17(1), pp. 10-12, doi:https://doi.org/10.14806/ej.17.1.200'    ,
        'MultiQC': 'Ewels, P. et al. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics, 32(19)  pp. 3047-3048, doi:https://doi.org/10.1093/bioinformatics/btw354',
        'Trim Galore': 'Kreuger, F. (2016). Trim Galore: A wrapper around Cutadapt and FastQC to consistently apply adapter and quality trimming to FastQ files, with extra functionality for RRBS data, https://github.com/FelixKrueger/TrimGalore/'
    }

    p=re.compile('FastQC v([\d\.]+)')
    m=p.search(fastqc_version.stdout.decode('utf-8'))
    if m:
      fastqc_version=m.group(1)
    else:
      print("Error parsing fastqc version number")
      sys.exit(1)

    p=re.compile('version\s([\d\.]+)')
    m=p.search(multiqc_version.stdout.decode('utf-8'))
    if m:
        multiqc_version = m.group(1)
    else:
        print("Error parsing multiqc version number")
        sys.exit(1)

    m=p.search(trim_galore_version.stdout.decode('utf-8'))
    if m:
        trim_galore_version = m.group(1)
    else:
        print("Error parsing trim_galore version number")
        sys.exit(1)

    versions = {
        'FastQC': {'version': fastqc_version, 'citation': citations['FastQC']},
        'CutAdapt': {'version':cutadapt_version.stdout.decode('utf-8'), 'citation': citations['CutAdapt']},
        'MultiQC': {'version':multiqc_version, 'citation': citations['MultiQC']},
        'Trim Galore': {'version':trim_galore_version,'citation':citations['Trim Galore']}
    }
    return(versions)

report_file = sys.argv[1]

start_file=open(".start_date.txt", 'r')
start_date=start_file.readline().rstrip("\n")
start_file.close()
start_datetime=datetime.strptime(start_date,'%a %b %d %Y %X')
end_date=datetime.now()
runtime=end_date-start_datetime

jobname_file=open(".jobname",'r')
jobname=jobname_file.readline().rstrip("\n")
jobname_file.close()

args_file=open(".tgparams","rb")
params=args_file.read()
args_file.close()
tg_params=pickle.loads(params)

SAMPLES=[]
with open(".sample.list") as f:
    for line in f:
        line=line.rstrip("\n")
        SAMPLES.append(line)
f.close()
SAMPLES.sort()

versions=get_versions()
export_environment()

report=open(report_file,'w')

cp=os.environ['CONDA_PREFIX']
template_dir=("%s/etc/dag-wf/qc-se/" % cp)
env = Environment(
    loader=FileSystemLoader(template_dir), autoescape=select_autoescape(['html'])
)
template = env.get_template('report.tmpl')

count_re=re.compile('Num reads:([\d]+)\tNum Bases:\s([\d]+)')
sample_summary=[]
for sample in SAMPLES:
    sample_data={
        'name':sample,
    }

    orig_file='fastq/%s.fq.gz' % sample
    trimmed_file='trim_galore/%s_trimmed.fq.gz' % sample

    orig_stats = subprocess.run(['readfq',orig_file],stdout=subprocess.PIPE)
    m = count_re.search(orig_stats.stdout.decode('utf-8'))
    sample_data['orig_reads']=m.group(1)
    sample_data['orig_bases']=m.group(2)

    trimmed_stats = subprocess.run(['readfq',trimmed_file],stdout=subprocess.PIPE)
    m = count_re.search(trimmed_stats.stdout.decode('utf-8'))
    sample_data['trimmed_reads']=m.group(1)
    sample_data['trimmed_bases']=m.group(2)

    trimmed_prop=((int(sample_data['orig_bases'])-int(sample_data['trimmed_bases']))/int(sample_data['orig_bases']))*100
    sample_data['trimmed_prop']="%.2f %%" % trimmed_prop
    sample_data['orig_fastqc']='fastqc/%s_fastqc.html' % sample
    sample_data['trimmed_fastqc']='fastqc_trimmed/%s_trimmed_fastqc.html' % sample
    sample_data['trim_galore']='trim_galore/%s.fq.gz_trimming_report.txt' % sample

    sample_summary.append(sample_data)

if tg_params['adapter_seq']:
    adapter=tg_params['adapter_seq']
else:
    adapter=tg_params['adapter']

html=template.render(jobname=jobname, start_date=start_date, end_date=end_date.strftime("%a %b %d %Y %X"), 
    run_time=runtime,sample_data=sample_summary,software=versions,run_trimming=tg_params['run_trimming'],trim_length=tg_params['trim_length'],qual=tg_params['trim_qual'],
    encoding=tg_params['encoding'], adapter=adapter, stringency=tg_params['stringency'], error_rate=tg_params['error_rate'],
    max_n=tg_params['max_n'],clip_5prime=tg_params['clip_5prime'], clip_3prime=tg_params['clip_3prime'], trim_n=tg_params['trim_n']
)

report.write(html)
                
report.close()

