I have trained a crf model and stored it locally using joblib
. I want to use this model inside in place of default spacy ner. I tried it using spacy_crfsuite
, but I get an attribute error. Any ideas?
import spacy
from spacy.language import Language
from spacy_crfsuite import CRFEntityExtractor
@Language.factory("ner-crf-3")
def create_my_component(nlp, name):
crf_extractor = CRFExtractor().from_disk("spacy_crfsuite_trained_spacy3.bz2")
return CRFEntityExtractor(nlp, crf_extractor=crf_extractor)
nlp = spacy.load("en_core_web_md", disable=["ner"])
nlp.add_pipe("ner-crf-3")
# And use natively ..
doc = nlp(
"George Walker Bush (born July 6, 1946) is an American politician and businessman "
"who served as the 43rd president of the United States from 2001 to 2009.")
for ent in doc.ents:
print(ent, "-", ent.label_)
Here is the output
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
/var/folders/v1/t149q84d20s60ktyxvb3bft40000gn/T/ipykernel_16022/3378811215.py in <module>
12
13 nlp = spacy.load("en_core_web_md", disable=["ner"])
---> 14 nlp.add_pipe("ner-crf-3")
15
16 doc = nlp(
~/opt/anaconda3/envs/mini_project_spacy3/lib/python3.9/site-packages/spacy/language.py in add_pipe(self, factory_name, name, before, after, first, last, source, config, raw_config, validate)
793 lang_code=self.lang,
794 )
--> 795 pipe_component = self.create_pipe(
796 factory_name,
797 name=name,
~/opt/anaconda3/envs/mini_project_spacy3/lib/python3.9/site-packages/spacy/language.py in create_pipe(self, factory_name, name, config, raw_config, validate)
672 # We're calling the internal _fill here to avoid constructing the
673 # registered functions twice
--> 674 resolved = registry.resolve(cfg, validate=validate)
675 filled = registry.fill({"cfg": cfg[factory_name]}, validate=validate)["cfg"]
676 filled = Config(filled)
~/opt/anaconda3/envs/mini_project_spacy3/lib/python3.9/site-packages/confection/__init__.py in resolve(cls, config, schema, overrides, validate)
726 validate: bool = True,
...
--> 101 assert isinstance(ent_tagger, CRF)
102
103 self.ent_tagger = ent_tagger
AssertionError: