From ce22087f2525f826850d222445cec4fd5e476ff0 Mon Sep 17 00:00:00 2001
From: Tom Aarsen <Cubiegamedev@gmail.com>
Date: Tue, 19 Sep 2023 12:03:22 +0200
Subject: [PATCH 1/2] Remove link to now-removed space

---
 README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/README.md b/README.md
index 9abc3fd3..90b8f3b6 100644
--- a/README.md
+++ b/README.md
@@ -132,7 +132,6 @@ These models are further elaborated on in my [thesis](https://raw.githubusercont
 
 ### FewNERD
 * [`tomaarsen/span-marker-bert-base-fewnerd-fine-super`](https://huggingface.co/tomaarsen/span-marker-bert-base-fewnerd-fine-super) is a model that I have trained in 2 hours on the finegrained, supervised [Few-NERD dataset](https://huggingface.co/datasets/DFKI-SLT/few-nerd). It reached a 70.53 Test F1, competitive in the all-time [Few-NERD leaderboard](https://paperswithcode.com/sota/named-entity-recognition-on-few-nerd-sup) using `bert-base`. My training script resembles the one that you can see above.
-  * Try the model out online using this [🤗 Space](https://tomaarsen-span-marker-bert-base-fewnerd-fine-super.hf.space/).
 
 * [`tomaarsen/span-marker-roberta-large-fewnerd-fine-super`](https://huggingface.co/tomaarsen/span-marker-roberta-large-fewnerd-fine-super) was trained in 6 hours on the finegrained, supervised [Few-NERD dataset](https://huggingface.co/datasets/DFKI-SLT/few-nerd) using `roberta-large`. It reached a 71.03 Test F1, reaching a new state of the art in the all-time [Few-NERD leaderboard](https://paperswithcode.com/sota/named-entity-recognition-on-few-nerd-sup).
 * [`tomaarsen/span-marker-xlm-roberta-base-fewnerd-fine-super`](https://huggingface.co/tomaarsen/span-marker-xlm-roberta-base-fewnerd-fine-super) is a multilingual model that I have trained in 1.5 hours on the finegrained, supervised [Few-NERD dataset](https://huggingface.co/datasets/DFKI-SLT/few-nerd). It reached a 68.6 Test F1 on English, and works well on other languages like Spanish, French, German, Russian, Dutch, Polish, Icelandic, Greek and many more.

From 506c25b64eb2a8796fa6ff0a9621a9838f44d690 Mon Sep 17 00:00:00 2001
From: Tom Aarsen <37621491+tomaarsen@users.noreply.github.com>
Date: Fri, 29 Sep 2023 18:59:06 +0200
Subject: [PATCH 2/2] Add pipeline support for SpanMarker (#34)

* Add pipeline support for SpanMarker

* Add general-purpose default model

* Mention that pipeline is from transformers

* Add to docs
---
 CHANGELOG.md                                |  6 ++++
 docs/api/span_marker.pipeline_component.rst | 14 ++++++++
 docs/api/span_marker.rst                    |  1 +
 span_marker/__init__.py                     |  9 +++++
 span_marker/pipeline_component.py           | 38 +++++++++++++++++++++
 tests/test_pipeline.py                      | 11 ++++++
 6 files changed, 79 insertions(+)
 create mode 100644 docs/api/span_marker.pipeline_component.rst
 create mode 100644 span_marker/pipeline_component.py
 create mode 100644 tests/test_pipeline.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0532f5cf..67bcef1e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,12 @@ Types of changes
 * "Security" in case of vulnerabilities.
 -->
 
+## [Unreleased]
+
+### Added
+
+- Added `transformers` `pipeline` support, e.g. `pipeline(task="span-marker", model="tomaarsen/span-marker-mbert-base-multinerd")`.
+
 ## [1.3.0]
 
 ### Added
diff --git a/docs/api/span_marker.pipeline_component.rst b/docs/api/span_marker.pipeline_component.rst
new file mode 100644
index 00000000..8dc3a0cc
--- /dev/null
+++ b/docs/api/span_marker.pipeline_component.rst
@@ -0,0 +1,14 @@
+
+:autogenerated:
+
+..
+    This file is autogenerated by `sphinx-api`.
+
+span_marker.pipeline_component module
+=====================================
+
+.. currentmodule:: span_marker.pipeline_component
+
+.. automodule:: span_marker.pipeline_component
+    :members:
+    :show-inheritance:
\ No newline at end of file
diff --git a/docs/api/span_marker.rst b/docs/api/span_marker.rst
index b20446b7..a2650031 100644
--- a/docs/api/span_marker.rst
+++ b/docs/api/span_marker.rst
@@ -19,6 +19,7 @@ span_marker package
        span_marker.modeling
        span_marker.trainer
        span_marker.configuration
+       span_marker.pipeline_component
        span_marker.data_collator
        span_marker.tokenizer
        span_marker.evaluation
diff --git a/span_marker/__init__.py b/span_marker/__init__.py
index 2c2cad1d..0586fd92 100644
--- a/span_marker/__init__.py
+++ b/span_marker/__init__.py
@@ -5,14 +5,23 @@
 
 import torch
 from transformers import AutoConfig, AutoModel, TrainingArguments
+from transformers.pipelines import PIPELINE_REGISTRY, pipeline
 
 from span_marker.configuration import SpanMarkerConfig
 from span_marker.modeling import SpanMarkerModel
+from span_marker.pipeline_component import SpanMarkerPipeline
 from span_marker.trainer import Trainer
 
 # Set up for Transformers
 AutoConfig.register("span-marker", SpanMarkerConfig)
 AutoModel.register(SpanMarkerConfig, SpanMarkerModel)
+PIPELINE_REGISTRY.register_pipeline(
+    "span-marker",
+    pipeline_class=SpanMarkerPipeline,
+    pt_model=SpanMarkerModel,
+    type="text",
+    default={"pt": ("tomaarsen/span-marker-bert-base-fewnerd-fine-super", "main")},
+)
 
 # Set up for spaCy
 try:
diff --git a/span_marker/pipeline_component.py b/span_marker/pipeline_component.py
new file mode 100644
index 00000000..be80a734
--- /dev/null
+++ b/span_marker/pipeline_component.py
@@ -0,0 +1,38 @@
+from typing import Any, Dict, List, Tuple, Union
+
+from transformers import Pipeline
+
+INPUT_TYPES = Union[str, List[str], List[List[str]]]
+OUTPUT_TYPES = Union[List[Dict[str, Union[str, int, float]]], List[List[Dict[str, Union[str, int, float]]]]]
+
+
+class SpanMarkerPipeline(Pipeline):
+    """A Pipeline component for SpanMarker.
+
+    The `pipeline` function is :func:`~transformers.pipeline`, which you can also import with
+    ``from transformers import pipeline``, but you must also import ``span_marker`` to register the
+    ``"span-marker"`` pipeline task.
+
+    Example::
+
+        >>> from span_marker import pipeline
+        >>> pipe = pipeline(task="span-marker", model="tomaarsen/span-marker-mbert-base-multinerd", device_map="auto")
+        >>> pipe("Amelia Earhart flew her single engine Lockheed Vega 5B across the Atlantic to Paris.")
+        [{'span': 'Amelia Earhart', 'label': 'PER', 'score': 0.9999709129333496, 'char_start_index': 0, 'char_end_index': 14},
+         {'span': 'Lockheed Vega 5B', 'label': 'VEHI', 'score': 0.9050095677375793, 'char_start_index': 38, 'char_end_index': 54},
+         {'span': 'Atlantic', 'label': 'LOC', 'score': 0.9991973042488098, 'char_start_index': 66, 'char_end_index': 74},
+         {'span': 'Paris', 'label': 'LOC', 'score': 0.9999232292175293, 'char_start_index': 78, 'char_end_index': 83}]
+
+    """
+
+    def _sanitize_parameters(self, **kwargs) -> Tuple[Dict[str, Any], Dict[str, Any], Dict[str, Any]]:
+        return {}, {}, {}
+
+    def preprocess(self, inputs: INPUT_TYPES) -> INPUT_TYPES:
+        return inputs
+
+    def _forward(self, inputs: INPUT_TYPES) -> OUTPUT_TYPES:
+        return self.model.predict(inputs)
+
+    def postprocess(self, outputs: OUTPUT_TYPES) -> OUTPUT_TYPES:
+        return outputs
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
new file mode 100644
index 00000000..c85a9d02
--- /dev/null
+++ b/tests/test_pipeline.py
@@ -0,0 +1,11 @@
+from transformers import pipeline
+
+import span_marker
+
+
+def test_pipeline() -> None:
+    pipe = pipeline(task="span-marker", model="tomaarsen/span-marker-bert-tiny-fewnerd-coarse-super")
+    outputs = pipe("Tom lives in the Netherlands.")
+    assert len(outputs) == 2
+    assert outputs[0]["span"] == "Tom"
+    assert outputs[1]["span"] == "Netherlands"