Source code for zink.extractor
from gliner import GLiNER
import warnings
warnings.filterwarnings("ignore")
[docs]
class EntityExtractor:
def __init__(
self, model_name="deepanwa/NuNerZero_onnx"
): # previous model - numind/NuNerZero
self.model = GLiNER.from_pretrained(
model_name, load_onnx_model=True, load_tokenizer=True
)
# NuZero requires lower-cased labels.
self.labels = ["person", "date", "location"]
[docs]
def predict(self, text, labels=None):
"""
Predict entities in the given text.
Parameters:
text (str): The input text.
labels (list of str, ): Only entities with these labels will be returned.
If None, all detected entities are returned.
Returns:
list of dict: A list of dictionaries, each containing 'start', 'end', 'label', and 'text'.
"""
if labels is not None:
labels = [label.lower() for label in labels]
else:
labels = self.labels
return self.model.predict_entities(text, labels)
_DEFAULT_EXTRACTOR = EntityExtractor()