Skip to content
Snippets Groups Projects
Commit 02dfc77a authored by nina.marthe_ird.fr's avatar nina.marthe_ird.fr
Browse files

started changing transfer stats

parent c05e3852
No related branches found
No related tags found
No related merge requests found
......@@ -14,7 +14,7 @@ from tqdm import tqdm
def transfer_on_target(segments_file,genome_dir,target_genome,target_genome_paths,list_feat_absent,seg_info,args,segments_on_target_genome):
print(f' Generating the {target_genome} output')
stats=False
stats=True
list_feature_to_transfer= Features.keys()
segments_list={} # list of segments usefull for the transfer. not all segments info will be loaded.
......@@ -36,10 +36,12 @@ def transfer_on_target(segments_file,genome_dir,target_genome,target_genome_path
with open(out_gff,'w') as file_out_gff:
reason_features_not_transfered=[0,0] # absent_features, low_cov_id
diff_size_transfered_features=[0,0] # [count,sum], to get the average
gene_count=0
for feat in tqdm(list_feature_to_transfer,desc=f' Generating {target_genome} gff',unit=" feature",disable=not args.verbose):
feature=Features[feat]
if feature.parent=="": # usually a gene
gene_count+=1
for match in feature.segments_list_target: # compute cov and id for all matches.
if match.walk_name=='':
feature_target_path=[]
......@@ -142,8 +144,16 @@ def transfer_on_target(segments_file,genome_dir,target_genome,target_genome_path
if stats:
# print nb feat to transfer
absent_features=reason_features_not_transfered[0];low_cov_id=reason_features_not_transfered[1]
print(f'\n{len(Features)-(absent_features+low_cov_id)} out of {len(Features)} features are transferred on {target_genome}.')
print(f'{absent_features} out of {gene_count} main features are not transfered because they are absent in the haplotype {target_genome}.')
print(f'{low_cov_id} out of {gene_count} main features are not transfered because their coverage or sequence identity is below threshold.')
print(f'Average length difference of the transfered genes : {diff_size_transfered_features[1]/diff_size_transfered_features[0]}')
# create objects for stats on how many segments are absent in target genome, their average length, etc
feature_missing_segments=[[],[],[],[],[],[],[]] # [feature_missing_first,feature_missing_middle,feature_missing_last,feature_missing_all,feature_missing_total,feature_total,feature_ok]
#feature_missing_segments=[[],[],[],[],[],[],[]] # [feature_missing_first,feature_missing_middle,feature_missing_last,feature_missing_all,feature_missing_total,feature_total,feature_ok]
# the fist segment of the feature is missing - feature_missing_first
# the last segment of the feature is missing - feature_missing_last
# at least one middle segment of the feature is missing - feature_missing_middle
......@@ -153,19 +163,13 @@ def transfer_on_target(segments_file,genome_dir,target_genome,target_genome_path
# total number of features, with missing segments or not - feature_total
# for each feature, get list of the segments where it is and the first and last segment of the feature on the new genome
list_seg=Features[feat].segments_list_source
match=Features[feat].segments_list_target[0]
for feat in list_feature_to_transfer:
stats_feature_missing_segment(feature_missing_segments,match.first_seg,match.last_seg,list_seg,feat,match.walk_name,segments_on_target_genome)
if args.annotation:
absent_features=reason_features_not_transfered[0];low_cov_id=reason_features_not_transfered[1]
print(len(Features)-(absent_features+low_cov_id),"out of",len(Features),"features are transfered.")
print(absent_features,"out of",len(Features),"features are not transfered because they are absent in the new genome.")
print(low_cov_id,"out of",len(Features),"features are not transfered because their coverage or sequence identity is below threshold.")
print("average length difference of the transfered genes : ",diff_size_transfered_features[1]/diff_size_transfered_features[0])
stats_features(feature_missing_segments)
#list_seg=Features[feat].segments_list_source
#match=Features[feat].segments_list_target[0]
#for feat in list_feature_to_transfer:
# stats_feature_missing_segment(feature_missing_segments,match.first_seg,match.last_seg,list_seg,feat,match.walk_name,segments_on_target_genome)
#stats_features(feature_missing_segments)
#clear segment info for next transfer
segments_on_target_genome.clear() # empty dict for the next genome treated
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment