#!/usr/bin/env python

"""Adds rRNA annotations from barnnap to an annotated genome record"""

import argparse
import tempfile
import os
import shutil
import pandas as pd
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.SeqFeature import SeqFeature, FeatureLocation

from utils import check_format, to_fasta, drmaa_run

def run_barrnap(tmpdir: str, fasta_file: str):

    '''Generates gff3 rRNA annotations using barrnap

    Required parameters:
        tmpdir: path to temporary directory
        fasta_file: path to fasta file to annotate

    Returns: 
        name of generated gff file
    '''

    name=os.path.basename(fasta_file)
    name=name.replace('.fa', '')
    fd,tmpfile = tempfile.mkstemp()
    file_ = os.fdopen(fd, "w")
    file_.write('''\
#!/bin/bash
barrnap --threads 8 {} > {}/{}.gff3
'''.format(fasta_file, tmpdir, name))
    file_.close()
    os.chmod(tmpfile,0o755)
    scriptname="{}/barrnap.sh".format(tmpdir)
    shutil.copy(tmpfile,scriptname)
    os.remove(tmpfile)

    drmaa_run(tmpdir, [scriptname], 'barrnap')
    return('{}.gff3'.format(name))

def get_seqfeature(row):

    """Generate a seqfeature from a GFF line

    Required parameters: 
        row: Pandas series representing row of gff file

    Returns:
        SeqFeature
    """

    strand=1 if (row['strand']=='+') else -1
    info=row['info'].split(';')
    name=info[0].replace('Name=','')
    product=info[1].replace('Product=','')

    feature=SeqFeature(FeatureLocation(row['start'],row['end'],strand=strand),type='rRNA')
    feature.qualifiers['gene']=name
    feature.qualifiers['product']=product
    feature.qualifiers['inference']='COORDINATES:profile:Barrnap:0.9'

    return(feature)


def add_annotations(tmpdir: str, gff_file: str, genome: str, record_format: str):

    '''Adds rRNA annotations from gff3 file to genome records and writes 
    updated records

    Required parameters:
        tmpdir: path to temporary directory
        gff_file: name of gff file
        genome: path to genome record
        record_format: type of record

    Returns:
        None
    '''
    
    genome_name=os.path.basename(genome)
    genome_name,suffix=os.path.splitext(genome_name)
    outfile='{}_with_rRNA{}'.format(genome_name,suffix)

    colnames=('ref','source','type','start','end','score','strand','phase','info')
    annotations=pd.read_table('{}/{}'.format(tmpdir,gff_file,sep="\t"), header=None, 
        skiprows=1, names=colnames)

    annotated_records=[]
    for record in SeqIO.parse(genome, record_format):
        record_annots=annotations.loc[annotations['ref'].astype(str)==record.name]
        features=record_annots.apply(get_seqfeature, axis=1) 
        features_list=features.values.tolist()
        record.features.extend(features_list)
        record.features.sort(key=lambda x:x.location.start)
        annotated_records.append(record)

    SeqIO.write(annotated_records, outfile, record_format) 
    print('Annotated record written to {}'.format(outfile))

def main():

    parser = argparse.ArgumentParser(
        description="Adds rRNA annotations from barrnap output to annotated genome record")
    parser.add_argument('--subject-genome', dest='subj', 
        help='path to EMBL/Genbank file containing subject annotated genome', required=True)

    args = parser.parse_args()
    with tempfile.TemporaryDirectory(dir='.') as tmpdir:
        record_format=check_format(args.subj)
        fasta_file=to_fasta(args.subj, record_format)
        gff_file=run_barrnap(tmpdir, fasta_file)
        add_annotations(tmpdir, gff_file, args.subj, record_format)
        os.remove(fasta_file)

if __name__ == "__main__":
    main()