#!/usr/bin/env bash

set -e
set -o errexit
set -o pipefail

RED='\033[0;31m'
DEF='\033[0m'
BOLD='\033[1m'


function usage {
	usage=$(cat <<'EOUSAGE'
Usage: dag-wf-rename-files [-i inputdir] [-m mapping_file] [-h]

  Renames batches of files to make them amenable to runnig through DAG workflows
 
  The required mapping file should be a tab delimited file containing the following fields:

  Original_filename Sample  Read

  where:
     original_filename:  the current name of the file
     sample: the sample name to which the file relates
     read:  either '1' or '2' (not required for single-ended fastq files)

	Any uncompressed fastq files found will be compressed using gzip

  Options:
	* -i: path to input directory containing files to rename
	* -m: mapping file of filename to sample mappings
	* -h: Display help
EOUSAGE
)

	echo "$usage"
	echo
	exit 0
}

# Option parsing...
optspec=":i:m:h"

while getopts "$optspec" optchar; do
    case "${optchar}" in
        i)
            in_dir=$OPTARG
            ;;
        m)
            mapping_file=$OPTARG
            ;;
        h)
			usage
            ;;
    esac
done

if [[ -z ${in_dir} ]] || [[ -z ${mapping_file} ]]; then
    usage
fi

if [[ ! -d ${in_dir} ]]; then 
	printf "\n${RED}Error: input directory '${in_dir}' not found...${DEF}\n\n"
    exit 1
fi

if [[ ! -f ${mapping_file} ]]; then 
	printf "\n${RED}Error: mapping file '${mapping_file}' not found...${DEF}\n\n"
    exit 1
fi

function rename_files() {

    dry_run=$1

    for line in "${mappings[@]}"; do
        IFS=$'\t' read -a vals <<< "${line}"
        infile=${vals[0]}

		if [[ -z ${vals[1]} ]]; then
			printf "\n${RED}Error: '${mapping_file}' is not a valid mapping file.\n"
			printf "Please check this is a tab-delimited file with 2 or more columns${DEF}\n"
			exit 1
		fi

        if [[ ! -f "${in_dir}/${infile}" ]]; then
	        printf "\n${RED}Error: File '${infile}' defined in ${mapping_file} not found in ${in_dir}...${DEF}\n\n"
            exit 1
        fi

        if [[ -z "${vals[2]}" ]]; then
            outfile="${vals[1]}.fq"
        else 
            outfile="${vals[1]}_${vals[2]}.fq"
        fi
    
        eval gzipped=$(file ${in_dir}/${infile}|grep -c 'gzip')
        if [[ "${gzipped}" == '1' ]] ; then
            outfile="${outfile}.gz"
        fi

        if [[ -z ${dry_run} ]]; then
            mv -v ${in_dir}/${infile} ${in_dir}/${outfile} | tee -a ${LOGFILE}
            if [[ "${gzipped}" == '0' ]]; then
                echo "gzip compressing ${outfile}"
                gzip -v ${in_dir}/${outfile} 2>&1 | tee -a ${LOGFILE}
            fi 
        else
            echo "${infile} -> ${outfile}"
        fi

    done
}

START_DATE=$(date +%d%m%y_%H%M%S)
LOGFILE="fastq_rename.${START_DATE}.log"
readarray mappings <${mapping_file}

echo
echo "Files will be renamed as follows:"
echo

rename_files 'dry_run'

echo
printf "${RED}Please check the renaming above is correct\n"
read -r -p "Do you wish to proceed? [y/N] " response
printf "${DEF}"

case "$response" in
    [yY][eE][sS]|[yY]) 
        echo
        rename_files
        echo
        echo "A log of changes made has been written to ${LOGFILE}"
        
        ;;
    *)
        echo "Exited without making any changes.."
        ;;
esac
