Newer
Older
!/bin/bash
###################################################################################################################################
#
# Copyright 2024 IRD
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/> or
# write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
# Intellectual property belongs to IRD
# Version 1 and latter written by Francois Sabot
####################################################################################################################################
# Name will be setup at launch
#SBATCH --export=ALL
#SBATCH --mail-user francois.sabot@ird.fr
#SBATCH --mail-type=ALL
#SBATCH -c 24
#SBATCH -J LOchain
#Data recovery
ANNOTATION=$1
SEQFOLDER=$2
REFERENCESEQ=$3
OUTPUT=$4
#Modules used
module load liftoff/1.6.3
#Standard command for liftoff: "liftoff [-h] (-g GFF | -db DB) [-o FILE] [-u FILE] [-exclude_partial] [-dir DIR] [-mm2_options =STR] [-a A] [-s S] [-d D] [-flank F] [-V] [-p P] [-m PATH] [-f TYPES] [-infer_genes] [-infer_transcripts]
# [-chroms TXT] [-unplaced TXT] [-copies] [-sc SC] [-overlap O] [-mismatch M] [-gap_open GO] [-gap_extend GE] [-polish] [-cds]
# target reference""
#Liftoff in GFF mode
echo -e "########################################\n\nRunning in GFF mode\n\n"
for TARGETSEQ in `ls $SEQFOLDER`; do
if [[ $TARGETSEQ == $REFERENCESEQ ]]; then
continue
fi
OUTFILE=$OUTPUT."/".$SEQFILE.".gff"
UNMAPPED==$OUTPUT."/".$SEQFILE."_unmapped.txt"
echo -e "\nRunning for $TARGETSEQ\n" >> timelog.txt
LIFTCOM='time liftoff -g $ANNOTATION -o $OUTFILE -u $UNMAPPED -p 1 $TARGETSEQ $REFERENCESEQ'
$LIFTCOM >> timelog.txt || exit 135
done
#Liftoff in DDB mode
echo -e "########################################\n\nRunning in DB mode\n\n"
for TARGETSEQ in `ls $SEQFOLDER`; do
if [[ $TARGETSEQ == $REFERENCESEQ ]]; then
continue
fi
DB=${ANNOTATION}"_db"
OUTFILE=$OUTPUT."/".$SEQFILE.".db.gff"
UNMAPPED==$OUTPUT."/".$SEQFILE."_unmappeddb.txt"
echo -e "\nRunning for $TARGETSEQ\n" >> timelog.txt
LIFTCOMDB='time liftoff -db $DB -o $OUTFILE -u $UNMAPPED -p 1 $TARGETSEQ $REFERENCESEQ'
$LIFTCOMDB >> timelog.txt || exit 135
done
#{ date ; time liftoff -o prout -g locus.gff IR64_GuppyFlyeMedakaRagtag.fasta irgsp5_complete.fasta ; } > time.log 2>&1 && tail -n 10 time.log > time2.log