From 5e702e9ec9cc93db37a0597f2375ffad22f47937 Mon Sep 17 00:00:00 2001 From: Mouhamadou Ba <mandiayba@gmail.com> Date: Mon, 11 Apr 2022 14:39:58 +0200 Subject: [PATCH 1/2] update path to ontologies --- preprocess-ontology.snakefile | 42 ++++++++++++++++---------------- process_CIRM_corpus.snakefile | 12 ++++----- process_DSMZ_corpus.snakefile | 4 +-- process_GenBank_corpus.snakefile | 4 +-- process_PubMed_corpus.snakefile | 16 ++++++------ 5 files changed, 39 insertions(+), 39 deletions(-) diff --git a/preprocess-ontology.snakefile b/preprocess-ontology.snakefile index 82f53e34..11bce226 100644 --- a/preprocess-ontology.snakefile +++ b/preprocess-ontology.snakefile @@ -22,14 +22,14 @@ rule all: 'corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.json', 'corpora/florilege/alvisir/BioNLP-OST+EnovFood-Phenotype.json', 'corpora/florilege/alvisir/BioNLP-OST+EnovFood-Use.json', - 'corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.tomap', - 'corpora/florilege/alvisir/BioNLP-OST+EnovFood-Phenotype.tomap', + 'ancillaries/BioNLP-OST+EnovFood-Habitat.tomap', + 'ancillaries/BioNLP-OST+EnovFood-Phenotype.tomap', 'ancillaries/food-process-lexicon.txt', 'ancillaries/NCBI_taxa_ontobiotope.txt', - 'corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo', - 'corpora/florilege/alvisir/BioNLP-OST+EnovFood-Phenotype.obo', - 'corpora/florilege/alvisir/BioNLP-OST+EnovFood-Use.obo', - expand("corpora/florilege/alvisir/{ontoname}.paths", ontoname=ONTONAMES.split(' ')) + 'ancillaries/BioNLP-OST+EnovFood-Habitat.obo', + 'ancillaries/BioNLP-OST+EnovFood-Phenotype.obo', + 'ancillaries/BioNLP-OST+EnovFood-Use.obo', + expand("ancillaries/{ontoname}.paths", ontoname=ONTONAMES.split(' ')) ''' @@ -51,7 +51,7 @@ rule cut_subtrees_habitat: input: onto='ancillaries/BioNLP-OST+EnovFood-no-obsolete.obo' output: - onto='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo' + onto='ancillaries/BioNLP-OST+EnovFood-Habitat.obo' conda: 'softwares/envs/obo-utils-env.yaml' shell: """ python softwares/obo-utils/obo-subtree.py \ @@ -67,7 +67,7 @@ rule cut_subtrees_phenotype: input: onto='ancillaries/BioNLP-OST+EnovFood-no-obsolete.obo' output: - onto='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Phenotype.obo' + onto='ancillaries/BioNLP-OST+EnovFood-Phenotype.obo' conda: 'softwares/envs/obo-utils-env.yaml' shell: """ python softwares/obo-utils/obo-subtree.py \ @@ -83,7 +83,7 @@ rule cut_subtrees_use: input: onto='ancillaries/BioNLP-OST+EnovFood-no-obsolete.obo' output: - onto='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Use.obo' + onto='ancillaries/BioNLP-OST+EnovFood-Use.obo' conda: 'softwares/envs/obo-utils-env.yaml' shell: """ python softwares/obo-utils/obo-subtree.py \ @@ -98,9 +98,9 @@ Analyze ontologies with tomap ''' rule analyze_onto_Habitat: input: - onto='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo' + onto='ancillaries/BioNLP-OST+EnovFood-Habitat.obo' output: - tomap='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.tomap' + tomap='ancillaries/BioNLP-OST+EnovFood-Habitat.tomap' params: plan='plans/biotope_ontology_analyzer.plan' singularity:config["SINGULARITY_IMG"] @@ -116,9 +116,9 @@ Analyze ontologies with tomap ''' rule analyze_onto_phenotype: input: - onto='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Phenotype.obo' + onto='ancillaries/BioNLP-OST+EnovFood-Phenotype.obo' output: - tomap='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Phenotype.tomap' + tomap='ancillaries/BioNLP-OST+EnovFood-Phenotype.tomap' params: plan='plans/phenotype_ontology_analyzer.plan' singularity:config["SINGULARITY_IMG"] @@ -135,7 +135,7 @@ Build food process lexicon ''' rule build_food_process_lexicon: input: - onto='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo' + onto='ancillaries/BioNLP-OST+EnovFood-Habitat.obo' output: lexicon='ancillaries/food-process-lexicon.txt' params: @@ -163,9 +163,9 @@ build name lexicon ''' rule build_ncbi_common_name_lexicon: input: - onto='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo', + onto='ancillaries/BioNLP-OST+EnovFood-Habitat.obo', common_names='ancillaries/NCBI_common_names', - tomap='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.tomap', + tomap='ancillaries/BioNLP-OST+EnovFood-Habitat.tomap', graylist='ancillaries/graylist_extended.heads', emptywords='ancillaries/stopwords_EN.ttg' output: @@ -186,7 +186,7 @@ obo to json ''' rule convert_obo2json_habitat: input: - obo='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo' + obo='ancillaries/BioNLP-OST+EnovFood-Habitat.obo' output: json='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.json' conda: 'softwares/envs/obo-utils-env.yaml' @@ -197,7 +197,7 @@ convert phenotype results to json ''' rule convert_obo2json_phenotype: input: - obo='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Phenotype.obo' + obo='ancillaries/BioNLP-OST+EnovFood-Phenotype.obo' output: json='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Phenotype.json' conda: 'softwares/envs/obo-utils-env.yaml' @@ -209,7 +209,7 @@ convert use results to json ''' rule convert_obo2json_use: input: - obo='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Use.obo' + obo='ancillaries/BioNLP-OST+EnovFood-Use.obo' output: json='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Use.json' conda: 'softwares/envs/obo-utils-env.yaml' @@ -222,9 +222,9 @@ generate concept paths ''' rule generate_concept_path: input: - onto="corpora/florilege/alvisir/{ontoname}.obo" + onto="ancillaries/{ontoname}.obo" output: - paths="corpora/florilege/alvisir/{ontoname}.paths" + paths="ancillaries/{ontoname}.paths" params: plan="plans/get_onto_paths.plan" singularity:config['SINGULARITY_IMG'] diff --git a/process_CIRM_corpus.snakefile b/process_CIRM_corpus.snakefile index 902e3b40..fee3a27a 100644 --- a/process_CIRM_corpus.snakefile +++ b/process_CIRM_corpus.snakefile @@ -144,8 +144,8 @@ rule map_cirm_habitats: mapped_habitats='corpora/cirm/bia/mapped_bia_habitats.txt' params: plan='plans/map_habitats.plan', - onto='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo', - tomap='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.tomap', + onto='ancillaries/BioNLP-OST+EnovFood-Habitat.obo', + tomap='ancillaries/BioNLP-OST+EnovFood-Habitat.tomap', graylist='ancillaries/graylist_extended.heads', emptywords='ancillaries/stopwords_EN.ttg', outdir='corpora/cirm/bia', @@ -170,8 +170,8 @@ rule map_cirm_yeast_habitats: mapped_habitats='corpora/cirm/levures/mapped_yeast_habitats.txt' params: plan='plans/map_habitats.plan', - onto='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo', - tomap='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.tomap', + onto='ancillaries/BioNLP-OST+EnovFood-Habitat.obo', + tomap='ancillaries/BioNLP-OST+EnovFood-Habitat.tomap', graylist='ancillaries/graylist_extended.heads', emptywords='ancillaries/stopwords_EN.ttg', outdir='corpora/cirm/levures', @@ -196,8 +196,8 @@ rule map_cirm_cfbp_habitats: mapped_habitats='corpora/cirm/cfbp/mapped_cfbp_habitats.txt' params: plan='plans/map_habitats.plan', - onto='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo', - tomap='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.tomap', + onto='ancillaries/BioNLP-OST+EnovFood-Habitat.obo', + tomap='ancillaries/BioNLP-OST+EnovFood-Habitat.tomap', graylist='ancillaries/graylist_extended.heads', emptywords='ancillaries/stopwords_EN.ttg', outdir='corpora/cirm/cfbp', diff --git a/process_DSMZ_corpus.snakefile b/process_DSMZ_corpus.snakefile index 9db4c34a..7ed58a57 100644 --- a/process_DSMZ_corpus.snakefile +++ b/process_DSMZ_corpus.snakefile @@ -32,8 +32,8 @@ rule map_dsmz_habitats: mapped_habitats='corpora/dsmz/mapped_habitats.txt' params: plan='plans/map_habitats.plan', - onto='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo', - tomap='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.tomap', + onto='ancillaries/BioNLP-OST+EnovFood-Habitat.obo', + tomap='ancillaries/BioNLP-OST+EnovFood-Habitat.tomap', graylist='ancillaries/graylist_extended.heads', emptywords='ancillaries/stopwords_EN.ttg', outdir='corpora/dsmz', diff --git a/process_GenBank_corpus.snakefile b/process_GenBank_corpus.snakefile index 89d075f5..c71dc069 100644 --- a/process_GenBank_corpus.snakefile +++ b/process_GenBank_corpus.snakefile @@ -75,8 +75,8 @@ rule map_genbank_habitats: mapped_habitats='corpora/genbank/mapped_habitats.txt' params: plan='plans/map_habitats.plan', - onto='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo', - tomap='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.tomap', + onto='ancillaries/BioNLP-OST+EnovFood-Habitat.obo', + tomap='ancillaries/BioNLP-OST+EnovFood-Habitat.tomap', graylist='ancillaries/graylist_extended.heads', emptywords='ancillaries/stopwords_EN.ttg', inhibitSyntax='inhibit-syntax', diff --git a/process_PubMed_corpus.snakefile b/process_PubMed_corpus.snakefile index cc072812..dff513c7 100644 --- a/process_PubMed_corpus.snakefile +++ b/process_PubMed_corpus.snakefile @@ -45,13 +45,13 @@ rule run_pubmed_entities: batch="{B}", corpus='pubmed', inhibitSyntax='inhibit-syntax', - onto_habitat='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo', - tomap_habitat='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.tomap', - onto_pheno='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Phenotype.obo', - tomap_pheno='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Phenotype.tomap', + onto_habitat='ancillaries/BioNLP-OST+EnovFood-Habitat.obo', + tomap_habitat='ancillaries/BioNLP-OST+EnovFood-Habitat.tomap', + onto_pheno='ancillaries/BioNLP-OST+EnovFood-Phenotype.obo', + tomap_pheno='ancillaries/BioNLP-OST+EnovFood-Phenotype.tomap', graylist='ancillaries/graylist_extended.heads', emptywords='ancillaries/stopwords_EN.ttg', - ontobiotopeUse='corpora/florilege/alvisir/BioNLP-OST+EnovFood-Use.obo', + ontobiotopeUse='ancillaries/BioNLP-OST+EnovFood-Use.obo', plan='plans/entities.plan', dir='corpora/pubmed/batches/{B}/', taxid_microorganisms='ancillaries/extended-microorganisms-taxonomy/taxid_microorganisms.txt', @@ -130,9 +130,9 @@ rule create_pubmed_expander: input: expander="ancillaries/expander.xml", taxa_id_microorganisms="ancillaries/extended-microorganisms-taxonomy/taxa+id_microorganisms.txt", - onto_habitat="corpora/florilege/alvisir/BioNLP-OST+EnovFood-Habitat.obo", - onto_phenotype="corpora/florilege/alvisir/BioNLP-OST+EnovFood-Phenotype.obo", - onto_use="corpora/florilege/alvisir/BioNLP-OST+EnovFood-Use.obo" + onto_habitat="ancillaries/BioNLP-OST+EnovFood-Habitat.obo", + onto_phenotype="ancillaries/BioNLP-OST+EnovFood-Phenotype.obo", + onto_use="ancillaries/BioNLP-OST+EnovFood-Use.obo" output: expander_folder=directory("corpora/florilege/alvisir/expander") params: -- GitLab From e088375326494d4a168dfd16e6daca22fd446b28 Mon Sep 17 00:00:00 2001 From: Mouhamadou Ba <mandiayba@gmail.com> Date: Mon, 11 Apr 2022 14:44:56 +0200 Subject: [PATCH 2/2] Deleted generate_concept_path.snakefile --- generate_concept_path.snakefile | 34 --------------------------------- 1 file changed, 34 deletions(-) delete mode 100644 generate_concept_path.snakefile diff --git a/generate_concept_path.snakefile b/generate_concept_path.snakefile deleted file mode 100644 index ca1ba773..00000000 --- a/generate_concept_path.snakefile +++ /dev/null @@ -1,34 +0,0 @@ -## config file -configfile: "config/config.yaml" - - - -ONTONAMES = 'BioNLP-OST+EnovFood-Habitat BioNLP-OST+EnovFood-Phenotype BioNLP-OST+EnovFood-Use' - - - -''' -all -''' -rule all: - input: - expand("ancillaries/{ontoname}.paths", ontoname=ONTONAMES.split(' ')) - - - -''' -generate concept paths -''' -rule generate_concept_path: - input: - onto="ancillaries/{ontoname}.obo" - output: - paths="ancillaries/{ontoname}.paths" - params: - plan="plans/get_onto_paths.plan" - singularity:config['SINGULARITY_IMG'] - shell: """alvisnlp -cleanTmp -verbose \ - -alias input {input.onto} \ - -alias output {output.paths} \ - {params.plan} - """ -- GitLab