#!/usr/bin/env bash

# Script to setup a DAG snakemake workflow run directory

set -o noclobber
set -o pipefail

function error() {
	echo "An error has occurred:"
	echo "$1"
	exit 1
}

function usage {
	usage=$(cat <<'EOUSAGE'
Usage: dag-wf-setup [-n name] [-d directory] [-w workflow] [-i inputdir] [-r reference]

  Options:
	* -n: Name of workflow job 
 	* -d: Directory to create for containing workflow files
 	* -w: Workflow to initiate
	* -i: Directory containing input files for workflow
    * -r: reference genome sequence (fasta format)
	* -c: Copy input files into workflow (default: use symlink)
EOUSAGE
)

	IFS=", " eval 'wfs="${workflows[*]}"'
    wfs=$(echo ${wfs}|sed -r 's/core[,]?//')
	echo "$usage"
	echo
	echo "Available workflows: ${wfs}"
	echo
	exit 0
}

source $CONDA_PREFIX/bin/dag-wf-parse-yaml

if [[ -z ${CONDA_PREFIX} ]]; then
    error "No conda environment is defined. Please activate your conda environment using 'conda activate'"
fi

ETC_DIR=${CONDA_PREFIX}/etc/dag-wf

workflows=($(ls $ETC_DIR))

copy_cmd="ln -s "

while getopts "n:d:w:i:r:ch" opt; do
    case $opt in
        n)
            name=$OPTARG
        ;;
        d)
            dir=$OPTARG
        ;;
		i)
			in_dir=$OPTARG
		;;
		r)
			reference=$OPTARG
		;;
        w)
            for i in "${workflows[@]}"; do
                if [[ ${i} == $OPTARG ]]; then
                    wf=$OPTARG
                fi
            done 
            if [[ -z $wf ]]; then
	        echo "Invalid workflow: $OPTARG"
		usage
            fi
        ;;
		c)
			copy_cmd="cp -n "
            echo
			echo "N.B. Input files will be copied rather than symlinked..."
			echo "Consider whether using '-c' argument is necessary or not..."
			echo
		;;
        h)
            usage
        ;;
    esac
done


if [[ -z ${name} ]] || [[ -z ${dir} ]] || [[ -z ${wf} ]] || [[ -z ${in_dir} ]]; then
    usage
fi

if [[ -e ${ETC_DIR}/${wf}/meta.yaml ]]; then
	eval $(parse_yaml ${ETC_DIR}/${wf}/meta.yaml)
	wf_in_dir=$(echo $wf__in_dir)
	wf_in_name=$(echo $wf__in_name)
	wf_convert_in_name=$(echo $wf__convert_in_name)
    wf_reference_required=$(echo $wf__reference_required)
else 
	error "Workflow meta.yaml file missing"
fi

if [[ ! -z ${wf_reference_required} ]]; then
	if [[ -z ${reference} ]]; then
		error "${wf} requires a reference sequence to be provided"
	fi
	if [[ ! -e ${reference} ]]; then
		error "Specified reference (${reference}) does not exist"
	fi
fi

if [[ -d ${dir} ]]; then
    error "${dir} directory already exists"
fi

HAVE_ENV=$(conda env list|grep -c ${wf})

if [[ "${HAVE_ENV}" -ne '1' ]]; then
	echo "Creating dag-wf-${wf} conda environment"
	conda env create -f ${ETC_DIR}/${wf}/environment.yaml
	echo
fi

echo "Setting up ${wf} workflow in ${dir}..."
echo
mkdir -p ${dir} || error "Error creating ${dir}"
mkdir -p ${dir}/${wf_in_dir}/ || error "Error creating ${dir}/${wf_in_dir}"
echo ${name} > ${dir}/.jobname
echo ${wf} > ${dir}/.workflow

cp $CONDA_PREFIX/etc/dag-wf/${wf}/config.json ${dir}
cp $CONDA_PREFIX/etc/dag-wf/${wf}/Snakefile ${dir}

# Copy source data files over, renaming from $wf_convert_in_name if necessary
in_files=$(ls $in_dir/|grep -e ${wf_in_name} -e ${wf_convert_in_name})

echo
echo "Populating ${dir}/${wf_in_dir} directory with data files..."
echo
for file in ${in_files}; do
	outfile=$(echo $(basename $file)|sed "s/${wf_convert_in_name}/${wf_in_name}/")
    if [[ $(echo ${wf}|grep 'pe') ]]; then 
        outfile=$(echo ${outfile}|sed -r 's/\.([12].fq.gz)/_\1/')
    fi
    fq_indir=$(cd ${in_dir}; pwd -P)
    fq_infile="$fq_indir/$file"
	echo -e "\t$fq_infile -> ${dir}/${wf_in_dir}/${outfile}"		
	$copy_cmd ${fq_infile} ${dir}/${wf_in_dir}/${outfile} || error "Error running $copy_cmd $file ${dir}/${wf_in_dir}/${outfile}"
done

if [[ ! -z ${wf_reference_required} ]]; then
	# convert to fully qualified path...
	reffile=$(basename $reference)
	refdir=$(dirname $reference)
	ref_in_dir=$(cd $refdir; pwd -P)
	ref_outfile=$(echo $reffile|sed -e 's/.fa$//' -e 's/.fasta$//')
	echo -e "\t$reference -> ${dir}/reference/$reffile"
	mkdir ${dir}/reference
	$copy_cmd ${ref_in_dir}/${reffile} ${dir}/reference/$ref_outfile || error "Error running $copy_cmd ${ref_in_dir}/${reffile} ${dir}/reference/$ref_outfile"
fi

echo
echo "Please inspect the Snakefile in the '${dir}' directory and modify as required."
echo "The workflow can be run by running 'cd ${dir}; dag-wf-run'"

