From 5e702e9ec9cc93db37a0597f2375ffad22f47937 Mon Sep 17 00:00:00 2001
From: Mouhamadou Ba <mandiayba@gmail.com>
Date: Mon, 11 Apr 2022 14:39:58 +0200
Subject: [PATCH 1/2] update path to ontologies

---
 preprocess-ontology.snakefile    | 42 ++++++++++++++++----------------
 process_CIRM_corpus.snakefile    | 12 ++++-----
 process_DSMZ_corpus.snakefile    |  4 +--
 process_GenBank_corpus.snakefile |  4 +--
 process_PubMed_corpus.snakefile  | 16 ++++++------
 5 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/preprocess-ontology.snakefile b/preprocess-ontology.snakefile
index 82f53e34..11bce226 100644
--- a/preprocess-ontology.snakefile
+++ b/preprocess-ontology.snakefile
@@ -22,14 +22,14 @@ rule all:
 		'corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.json', 
 		'corpora/florilege/alvisir/BioNLP-OST+EnovFood-Phenotype.json',
 		'corpora/florilege/alvisir/BioNLP-OST+EnovFood-Use.json',
-		'corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.tomap',
-		'corpora/florilege/alvisir/BioNLP-OST+EnovFood-Phenotype.tomap',
+		'ancillaries/BioNLP-OST+EnovFood-Habitat.tomap',
+		'ancillaries/BioNLP-OST+EnovFood-Phenotype.tomap',
 		'ancillaries/food-process-lexicon.txt',
 		'ancillaries/NCBI_taxa_ontobiotope.txt',
-		'corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo',
-		'corpora/florilege/alvisir/BioNLP-OST+EnovFood-Phenotype.obo',
-		'corpora/florilege/alvisir/BioNLP-OST+EnovFood-Use.obo',
-		expand("corpora/florilege/alvisir/{ontoname}.paths", ontoname=ONTONAMES.split(' '))
+		'ancillaries/BioNLP-OST+EnovFood-Habitat.obo',
+		'ancillaries/BioNLP-OST+EnovFood-Phenotype.obo',
+		'ancillaries/BioNLP-OST+EnovFood-Use.obo',
+		expand("ancillaries/{ontoname}.paths", ontoname=ONTONAMES.split(' '))
 
 
 '''
@@ -51,7 +51,7 @@ rule cut_subtrees_habitat:
 	input:
 		onto='ancillaries/BioNLP-OST+EnovFood-no-obsolete.obo'
 	output:
-		onto='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo'
+		onto='ancillaries/BioNLP-OST+EnovFood-Habitat.obo'
 	conda: 'softwares/envs/obo-utils-env.yaml'
 	shell: """
 			python softwares/obo-utils/obo-subtree.py \
@@ -67,7 +67,7 @@ rule cut_subtrees_phenotype:
 	input:
 		onto='ancillaries/BioNLP-OST+EnovFood-no-obsolete.obo'
 	output:
-		onto='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Phenotype.obo'
+		onto='ancillaries/BioNLP-OST+EnovFood-Phenotype.obo'
 	conda: 'softwares/envs/obo-utils-env.yaml'
 	shell: """
 			python softwares/obo-utils/obo-subtree.py \
@@ -83,7 +83,7 @@ rule cut_subtrees_use:
 	input:
 		onto='ancillaries/BioNLP-OST+EnovFood-no-obsolete.obo'
 	output:
-		onto='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Use.obo'
+		onto='ancillaries/BioNLP-OST+EnovFood-Use.obo'
 	conda: 'softwares/envs/obo-utils-env.yaml'
 	shell: """
 			python softwares/obo-utils/obo-subtree.py \
@@ -98,9 +98,9 @@ Analyze ontologies with tomap
 '''
 rule analyze_onto_Habitat:
 	input:
-		onto='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo'
+		onto='ancillaries/BioNLP-OST+EnovFood-Habitat.obo'
 	output:
-		tomap='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.tomap'
+		tomap='ancillaries/BioNLP-OST+EnovFood-Habitat.tomap'
 	params:
 		plan='plans/biotope_ontology_analyzer.plan'
 	singularity:config["SINGULARITY_IMG"]
@@ -116,9 +116,9 @@ Analyze ontologies with tomap
 '''
 rule analyze_onto_phenotype:
 	input:
-		onto='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Phenotype.obo'
+		onto='ancillaries/BioNLP-OST+EnovFood-Phenotype.obo'
 	output:
-		tomap='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Phenotype.tomap'
+		tomap='ancillaries/BioNLP-OST+EnovFood-Phenotype.tomap'
 	params:
 		plan='plans/phenotype_ontology_analyzer.plan'
 	singularity:config["SINGULARITY_IMG"]
@@ -135,7 +135,7 @@ Build food process lexicon
 '''
 rule build_food_process_lexicon:
 	input:
-		onto='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo'
+		onto='ancillaries/BioNLP-OST+EnovFood-Habitat.obo'
 	output:
 		lexicon='ancillaries/food-process-lexicon.txt'
 	params:
@@ -163,9 +163,9 @@ build name lexicon
 '''
 rule build_ncbi_common_name_lexicon:
 	input: 
-		onto='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo',
+		onto='ancillaries/BioNLP-OST+EnovFood-Habitat.obo',
 		common_names='ancillaries/NCBI_common_names',
-		tomap='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.tomap',
+		tomap='ancillaries/BioNLP-OST+EnovFood-Habitat.tomap',
 		graylist='ancillaries/graylist_extended.heads',
 		emptywords='ancillaries/stopwords_EN.ttg'
 	output:
@@ -186,7 +186,7 @@ obo to json
 '''
 rule convert_obo2json_habitat:
 	input:
-		obo='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo'
+		obo='ancillaries/BioNLP-OST+EnovFood-Habitat.obo'
 	output:
 		json='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.json'
 	conda: 'softwares/envs/obo-utils-env.yaml'
@@ -197,7 +197,7 @@ convert phenotype results to json
 '''
 rule convert_obo2json_phenotype:
 	input:
-		obo='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Phenotype.obo'
+		obo='ancillaries/BioNLP-OST+EnovFood-Phenotype.obo'
 	output:
 		json='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Phenotype.json'
 	conda: 'softwares/envs/obo-utils-env.yaml'
@@ -209,7 +209,7 @@ convert use results to json
 '''
 rule convert_obo2json_use:
 	input:
-		obo='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Use.obo'
+		obo='ancillaries/BioNLP-OST+EnovFood-Use.obo'
 	output:
 		json='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Use.json'
 	conda: 'softwares/envs/obo-utils-env.yaml'
@@ -222,9 +222,9 @@ generate concept paths
 '''
 rule generate_concept_path:
 	input:
-		onto="corpora/florilege/alvisir/{ontoname}.obo"
+		onto="ancillaries/{ontoname}.obo"
 	output:
-		paths="corpora/florilege/alvisir/{ontoname}.paths"
+		paths="ancillaries/{ontoname}.paths"
 	params:
 		plan="plans/get_onto_paths.plan"
 	singularity:config['SINGULARITY_IMG']
diff --git a/process_CIRM_corpus.snakefile b/process_CIRM_corpus.snakefile
index 902e3b40..fee3a27a 100644
--- a/process_CIRM_corpus.snakefile
+++ b/process_CIRM_corpus.snakefile
@@ -144,8 +144,8 @@ rule map_cirm_habitats:
 		mapped_habitats='corpora/cirm/bia/mapped_bia_habitats.txt'
 	params:
 		plan='plans/map_habitats.plan',
-		onto='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo',
-		tomap='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.tomap',
+		onto='ancillaries/BioNLP-OST+EnovFood-Habitat.obo',
+		tomap='ancillaries/BioNLP-OST+EnovFood-Habitat.tomap',
 		graylist='ancillaries/graylist_extended.heads',
 		emptywords='ancillaries/stopwords_EN.ttg',
 		outdir='corpora/cirm/bia',
@@ -170,8 +170,8 @@ rule map_cirm_yeast_habitats:
 		mapped_habitats='corpora/cirm/levures/mapped_yeast_habitats.txt'
 	params:
 		plan='plans/map_habitats.plan',
-		onto='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo',
-		tomap='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.tomap',
+		onto='ancillaries/BioNLP-OST+EnovFood-Habitat.obo',
+		tomap='ancillaries/BioNLP-OST+EnovFood-Habitat.tomap',
 		graylist='ancillaries/graylist_extended.heads',
 		emptywords='ancillaries/stopwords_EN.ttg',
 		outdir='corpora/cirm/levures',
@@ -196,8 +196,8 @@ rule map_cirm_cfbp_habitats:
 		mapped_habitats='corpora/cirm/cfbp/mapped_cfbp_habitats.txt'
 	params:
 		plan='plans/map_habitats.plan',
-		onto='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo',
-		tomap='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.tomap',
+		onto='ancillaries/BioNLP-OST+EnovFood-Habitat.obo',
+		tomap='ancillaries/BioNLP-OST+EnovFood-Habitat.tomap',
 		graylist='ancillaries/graylist_extended.heads',
 		emptywords='ancillaries/stopwords_EN.ttg',
 		outdir='corpora/cirm/cfbp',
diff --git a/process_DSMZ_corpus.snakefile b/process_DSMZ_corpus.snakefile
index 9db4c34a..7ed58a57 100644
--- a/process_DSMZ_corpus.snakefile
+++ b/process_DSMZ_corpus.snakefile
@@ -32,8 +32,8 @@ rule map_dsmz_habitats:
 		mapped_habitats='corpora/dsmz/mapped_habitats.txt'
 	params:
 		plan='plans/map_habitats.plan',
-		onto='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo',
-		tomap='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.tomap',
+		onto='ancillaries/BioNLP-OST+EnovFood-Habitat.obo',
+		tomap='ancillaries/BioNLP-OST+EnovFood-Habitat.tomap',
 		graylist='ancillaries/graylist_extended.heads',
 		emptywords='ancillaries/stopwords_EN.ttg',
 		outdir='corpora/dsmz',
diff --git a/process_GenBank_corpus.snakefile b/process_GenBank_corpus.snakefile
index 89d075f5..c71dc069 100644
--- a/process_GenBank_corpus.snakefile
+++ b/process_GenBank_corpus.snakefile
@@ -75,8 +75,8 @@ rule map_genbank_habitats:
 		mapped_habitats='corpora/genbank/mapped_habitats.txt'
 	params:
 		plan='plans/map_habitats.plan',
-		onto='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo',
-		tomap='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.tomap',
+		onto='ancillaries/BioNLP-OST+EnovFood-Habitat.obo',
+		tomap='ancillaries/BioNLP-OST+EnovFood-Habitat.tomap',
 		graylist='ancillaries/graylist_extended.heads',
 		emptywords='ancillaries/stopwords_EN.ttg',
 		inhibitSyntax='inhibit-syntax',
diff --git a/process_PubMed_corpus.snakefile b/process_PubMed_corpus.snakefile
index cc072812..dff513c7 100644
--- a/process_PubMed_corpus.snakefile
+++ b/process_PubMed_corpus.snakefile
@@ -45,13 +45,13 @@ rule run_pubmed_entities:
 		batch="{B}",
 		corpus='pubmed',
 		inhibitSyntax='inhibit-syntax',
-		onto_habitat='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo',
-		tomap_habitat='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.tomap',
-		onto_pheno='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Phenotype.obo',
-		tomap_pheno='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Phenotype.tomap',
+		onto_habitat='ancillaries/BioNLP-OST+EnovFood-Habitat.obo',
+		tomap_habitat='ancillaries/BioNLP-OST+EnovFood-Habitat.tomap',
+		onto_pheno='ancillaries/BioNLP-OST+EnovFood-Phenotype.obo',
+		tomap_pheno='ancillaries/BioNLP-OST+EnovFood-Phenotype.tomap',
 		graylist='ancillaries/graylist_extended.heads',
 		emptywords='ancillaries/stopwords_EN.ttg',
-		ontobiotopeUse='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Use.obo',
+		ontobiotopeUse='ancillaries/BioNLP-OST+EnovFood-Use.obo',
 		plan='plans/entities.plan',
 		dir='corpora/pubmed/batches/{B}/',
 		taxid_microorganisms='ancillaries/extended-microorganisms-taxonomy/taxid_microorganisms.txt',
@@ -130,9 +130,9 @@ rule create_pubmed_expander:
 	input:
 		expander="ancillaries/expander.xml",
 		taxa_id_microorganisms="ancillaries/extended-microorganisms-taxonomy/taxa+id_microorganisms.txt",
-                onto_habitat="corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo",
-		onto_phenotype="corpora/florilege/alvisir/BioNLP-OST+EnovFood-Phenotype.obo",
-		onto_use="corpora/florilege/alvisir/BioNLP-OST+EnovFood-Use.obo"
+                onto_habitat="ancillaries/BioNLP-OST+EnovFood-Habitat.obo",
+		onto_phenotype="ancillaries/BioNLP-OST+EnovFood-Phenotype.obo",
+		onto_use="ancillaries/BioNLP-OST+EnovFood-Use.obo"
 	output:
 		expander_folder=directory("corpora/florilege/alvisir/expander")
 	params:
-- 
GitLab


From e088375326494d4a168dfd16e6daca22fd446b28 Mon Sep 17 00:00:00 2001
From: Mouhamadou Ba <mandiayba@gmail.com>
Date: Mon, 11 Apr 2022 14:44:56 +0200
Subject: [PATCH 2/2] Deleted generate_concept_path.snakefile

---
 generate_concept_path.snakefile | 34 ---------------------------------
 1 file changed, 34 deletions(-)
 delete mode 100644 generate_concept_path.snakefile

diff --git a/generate_concept_path.snakefile b/generate_concept_path.snakefile
deleted file mode 100644
index ca1ba773..00000000
--- a/generate_concept_path.snakefile
+++ /dev/null
@@ -1,34 +0,0 @@
-## config file
-configfile: "config/config.yaml"
-
-
-
-ONTONAMES = 'BioNLP-OST+EnovFood-Habitat BioNLP-OST+EnovFood-Phenotype BioNLP-OST+EnovFood-Use'
-
-
-
-''' 
-all
-'''
-rule all:
-	input:
-		expand("ancillaries/{ontoname}.paths", ontoname=ONTONAMES.split(' '))
-		
-
-
-'''
-generate concept paths
-'''
-rule generate_concept_path:
-	input:
-		onto="ancillaries/{ontoname}.obo"
-	output:
-		paths="ancillaries/{ontoname}.paths"
-	params:
-		plan="plans/get_onto_paths.plan"
-	singularity:config['SINGULARITY_IMG']
-	shell: """alvisnlp -cleanTmp -verbose \
-		-alias input {input.onto} \
-		-alias output {output.paths} \
-		 {params.plan}
-		"""
-- 
GitLab