Skip to content
Snippets Groups Projects
Commit 22c4a0b8 authored by nina.marthe_ird.fr's avatar nina.marthe_ird.fr
Browse files

added a check that the sourge and target genome names are found in the walks/paths of the graph

parent a5d749c9
No related branches found
No related tags found
No related merge requests found
......@@ -87,6 +87,23 @@ def read_args(args):
sys.exit(f"grannot: error: one of the following options is required for the option -id/--identity : -ann/--annotation, -aln/--alignment, -var/--variation")
# todo : adapt pav to have the same requirements as cov and id, or no requirement at all. change in grannot_help.md
# convert input files in Path objects
args.outdir=Path(args.outdir).resolve() # resolve() finds the absolute path, and adds the '/' if needed
seg_coord_path=Path(args.segment_coordinates_path).resolve()
args.graph=Path(args.graph.name).resolve()
args.gff=Path(args.gff.name).resolve()
# check if source and target genomes are in the graph
walks_list = os.popen(f"grep ^W {args.graph.as_posix()} | cut -f 2 | grep -v 'MINIGRAPH'").read().split('\n')
paths_list = os.popen(f"grep ^P {args.graph.as_posix()} | cut -f 2").read().split('\n')
if len(paths_list)!=0:
genome_list_path=set([elem.split('#')[0] for elem in paths_list] + walks_list)
if args.source_genome not in genome_list_path:
sys.exit(f"grannot: error: the source genome given was not found in the graph")
for target_genome in args.target[:]:
if target_genome not in genome_list_path:
sys.exit(f"grannot: error: a target genome given was not found in the graph")
# check bedtools installation
bedtools_test=subprocess.run("bedtools --version",shell=True,check=False,stdout=subprocess.DEVNULL,stderr=sys.stderr)
if int(bedtools_test.returncode)>0:
......@@ -102,12 +119,6 @@ def read_args(args):
if args.coverage==None:
args.coverage=80
# convert input files in Path objects
args.outdir=Path(args.outdir).resolve() # resolve() finds the absolute path, and adds the '/' if needed
seg_coord_path=Path(args.segment_coordinates_path).resolve()
args.graph=Path(args.graph.name).resolve()
args.gff=Path(args.gff.name).resolve()
# run getSegCoord on the target genomes + the source genome if needed
if not(args.annotation or args.variation or args.alignment or args.pav_matrix): # if we only need the graph annotation, no need to get SegCoord for all the genomes
path_list=[]
......@@ -147,7 +158,7 @@ def run_intersect(args,seg_coord_path):
first_source_haplo=min(source_haplotypes)
args.source_haplotype=first_source_haplo
haplo=f"#{first_source_haplo}#"
print('\033[35m'+f'Warning : No source haplotype was given with \'-sh\'. Haplotype {first_source_haplo} will be used.''\033[0m')
print('\033[35m'+f'Warning : No source haplotype was given with \'-sh\'. Haplotype {first_source_haplo} will be used.'+'\033[0m')
files=list(seg_coord_path.glob(f"*{args.source_genome}*{haplo}*.bed")) # get all bed files from the source genome haplotype in seg_coord
message=f'No bed files corresponding to the source genome haplotype {first_source_haplo} found in {seg_coord_path}'
else:
......@@ -160,7 +171,7 @@ def run_intersect(args,seg_coord_path):
print(f' Found {len(files)} paths corresponding to the source genome haplotype {args.source_haplotype}')
else:
files=list(seg_coord_path.glob(f"*{args.source_genome}*.bed")) # get all bed files from the source genome in seg_coord
message=f'No bed files corresponding to the source genome found in {seg_coord_path}'
message='\33[31m'+f'Error : No bed files corresponding to the source genome found in {seg_coord_path}. Please check that the source genome name is right.'+'\033[0m'
if len(files)==0:
sys.exit(message)
if args.verbose:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment