chrisjbryant · FellaBennadji · Aug 13, 2020 · Aug 13, 2020 · Aug 13, 2020 · Aug 13, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,12 @@
 
 This log describes all the changes made to ERRANT since its release.
 
+## v2.2.2 (14-08-20)
+
+1. Added a copy of the NLTK Lancaster stemmer to `errant.en.lancaster` and removed the NLTK dependency. It was overkill to require the entire NLTK package just for this stemmer so we now bundle it with ERRANT. 
+
+2. Replaced the deprecated `tokens_from_list` function from spaCy v1 with the `Doc` function from spaCy v2 in `Annotator.parse`.
+
 ## v2.2.1 (17-05-20)
 
 Fixed key error in the classifier for rare spaCy 2 POS tags: _SP, BES, HVS.

diff --git a/LICENSE.md b/LICENSE.md
@@ -1,5 +1,6 @@
 # MIT License
 
+Copyright (c) 2020 Omri Abend, Leshem Choshen, Matanel Oren   
 Copyright (c) 2017 Christopher Bryant, Mariano Felice
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -18,4 +19,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+SOFTWARE.
diff --git a/README.md b/README.md
diff --git a/demo/cor.txt b/demo/cor.txt
@@ -0,0 +1,4 @@
+This is a great sentence .
+Can you see the sea from where you live ?
+There is no need to say that we are highly motivated .
+This sentence contains no errors .
diff --git a/demo/cor1.txt b/demo/cor1.txt
diff --git a/demo/cor2.txt b/demo/cor2.txt
diff --git a/demo/orig.txt b/demo/orig.txt
@@ -1,5 +1,4 @@
-This are a great sentences .
+This are the most great sentences .
 Can you seen the sea from where live you .
-Let us discuss about all softwares problems you 've been having recently .
-This sentence contains no errors .
-
+There is no needing to say that they say we are highly motivation .
+This sentence contains no errors .
diff --git a/demo/out.m2 b/demo/out.m2
diff --git a/demo/out_combined.m2 b/demo/out_combined.m2
@@ -0,0 +1,18 @@
+S This are the most great sentences .
+A 1 2|||R:VERB:SVA|||is|||REQUIRED|||-NONE-|||0
+A 2 4|||R:DET:MW|||a|||REQUIRED|||-NONE-|||0
+A 5 6|||R:NOUN:NUM|||sentence|||REQUIRED|||-NONE-|||0
+
+S Can you seen the sea from where live you .
+A 2 3|||R:VERB:WC|||see|||REQUIRED|||-NONE-|||0
+A 7 9|||R:WO|||you live|||REQUIRED|||-NONE-|||0
+A 9 10|||R:PUNCT:WC|||?|||REQUIRED|||-NONE-|||0
+
+S There is no needing to say that they say we are highly motivation .
+A 3 4|||R:VERB->NOUN|||need|||REQUIRED|||-NONE-|||0
+A 7 9|||U:VERB||||||REQUIRED|||-NONE-|||0
+A 12 13|||R:NOUN->ADJ|||motivated|||REQUIRED|||-NONE-|||0
+
+S This sentence contains no errors .
+A -1 -1|||noop|||-NONE-|||REQUIRED|||-NONE-|||0
+
diff --git a/demo/out_errant.m2 b/demo/out_errant.m2
@@ -0,0 +1,18 @@
+S This are the most great sentences .
+A 1 2|||R:VERB:SVA|||is|||REQUIRED|||-NONE-|||0
+A 2 4|||R:OTHER|||a|||REQUIRED|||-NONE-|||0
+A 5 6|||R:NOUN:NUM|||sentence|||REQUIRED|||-NONE-|||0
+
+S Can you seen the sea from where live you .
+A 2 3|||R:VERB|||see|||REQUIRED|||-NONE-|||0
+A 7 9|||R:WO|||you live|||REQUIRED|||-NONE-|||0
+A 9 10|||R:PUNCT|||?|||REQUIRED|||-NONE-|||0
+
+S There is no needing to say that they say we are highly motivation .
+A 3 4|||R:MORPH|||need|||REQUIRED|||-NONE-|||0
+A 7 9|||U:OTHER||||||REQUIRED|||-NONE-|||0
+A 12 13|||R:MORPH|||motivated|||REQUIRED|||-NONE-|||0
+
+S This sentence contains no errors .
+A -1 -1|||noop|||-NONE-|||REQUIRED|||-NONE-|||0
+
diff --git a/demo/out_sercl.m2 b/demo/out_sercl.m2
@@ -0,0 +1,18 @@
+S This are the most great sentences .
+A 1 2|||AUX->AUX|||is|||REQUIRED|||-NONE-|||0
+A 2 4|||DET->DET|||a|||REQUIRED|||-NONE-|||0
+A 5 6|||NOUN->NOUN|||sentence|||REQUIRED|||-NONE-|||0
+
+S Can you seen the sea from where live you .
+A 2 3|||VERB->VERB|||see|||REQUIRED|||-NONE-|||0
+A 7 9|||VERB->VERB|||you live|||REQUIRED|||-NONE-|||0
+A 9 10|||PUNCT->PUNCT|||?|||REQUIRED|||-NONE-|||0
+
+S There is no needing to say that they say we are highly motivation .
+A 3 4|||VERB->NOUN|||need|||REQUIRED|||-NONE-|||0
+A 7 9|||VERB->None||||||REQUIRED|||-NONE-|||0
+A 12 13|||NOUN->ADJ|||motivated|||REQUIRED|||-NONE-|||0
+
+S This sentence contains no errors .
+A -1 -1|||noop|||-NONE-|||REQUIRED|||-NONE-|||0
+
diff --git a/demo/readme.md b/demo/readme.md
@@ -1,7 +1,21 @@
-## ERRANT Demo
+## SERRANT Demo
 
-Assuming you have read the main readme and installed ERRANT successfully, you can try running it on the sample text in this directory to make sure it's running properly:
+Assuming you have read the main readme and installed SERRANT successfully, you can try running it on the sample text in this directory to make sure it's running properly:
 
-`errant_parallel -orig orig.txt -cor cor1.txt cor2.txt -out test.m2`
+#### Annotated by ERRANT:
 
-This should produce a file called `test.m2` which is identical to `out.m2`.
+`serrant_parallel -orig orig.txt -cor cor.txt -out test_errant.m2 -annotator errant`
+
+This should produce a file called `test_errant.m2` which is identical to `out_errant.m2`.
+
+#### Annotated by SerCl:
+
+`serrant_parallel -orig orig.txt -cor cor.txt -out test_sercl.m2 -annotator sercl`
+
+This should produce a file called `test_sercl.m2` which is identical to `out_sercl.m2`.
+
+#### Our combination of both:
+
+`serrant_parallel -orig orig.txt -cor cor.txt -out test_combined.m2 -annotator combined`
+
+This should produce a file called `test_combined.m2` which is identical to `out_combined.m2`.
diff --git a/errant/__init__.py b/errant/__init__.py
diff --git a/serrant/__init__.py b/serrant/__init__.py
@@ -0,0 +1,33 @@
+from importlib import import_module
+import spacy
+from serrant.annotator import Annotator
+
+# SERRANT version
+__version__ = '1.0'
+# compatible to ERRANT version 2.2.2
+
+# Load an ERRANT Annotator object for a given language
+def load(lang, nlp=None):
+    # Make sure the language is supported
+    supported = {"en"}
+    if lang not in supported:
+        raise Exception("%s is an unsupported or unknown language" % lang)
+
+    # Load spacy
+    model_per_lang={"en":"en_core_web_sm"}
+    nlp = nlp or spacy.load(model_per_lang[lang], disable=["ner"])
+
+    # Load language edit merger
+    merger = import_module("serrant.%s.merger" % lang)
+
+    # Load language edit classifier
+    classifier = import_module("serrant.%s.classifier" % lang)
+    # Load sercl (syntactic classifier)
+    syntax_classifier = import_module("serrant.syntactic_classifier")
+    # Load combiner
+    combiner = import_module("serrant.%s.sercl_errant_combine" % lang)
+    # The English classifier needs spacy
+    if lang == "en": classifier.nlp = nlp
+
+    # Return a configured ERRANT annotator
+    return Annotator(lang, nlp, merger, classifier, syntax_classifier, combiner)
diff --git a/errant/alignment.py → serrant/alignment.py b/errant/alignment.py → serrant/alignment.py
@@ -1,7 +1,7 @@
 from itertools import groupby
 import Levenshtein
 import spacy.parts_of_speech as POS
-from errant.edit import Edit
+from serrant.edit import Edit
 
 class Alignment:
     # Protected class resource

diff --git a/errant/annotator.py → serrant/annotator.py b/errant/annotator.py → serrant/annotator.py
@@ -1,18 +1,24 @@
-from errant.alignment import Alignment
-from errant.edit import Edit
+from serrant.alignment import Alignment
+from serrant.edit import Edit
+from copy import copy
+from spacy.tokens import Doc
+
 
 # Main ERRANT Annotator class
-class Annotator:
+class Annotator:    
 
     # Input 1: A string language id: e.g. "en"
     # Input 2: A spacy processing object for the language
     # Input 3: A merging module for the language
     # Input 4: A classifier module for the language
-    def __init__(self, lang, nlp=None, merger=None, classifier=None):
+    def __init__(self, lang, nlp=None, merger=None, classifier=None, syntax_classifier=None,
+                 classification_combiner=None):
         self.lang = lang
         self.nlp = nlp
         self.merger = merger
-        self.classifier = classifier
+        self.errant_classifier = classifier
+        self.syntax_classifier = syntax_classifier
+        self.combiner = classification_combiner
 
     # Input 1: A text string
     # Input 2: A flag for word tokenisation
@@ -21,7 +27,7 @@ def parse(self, text, tokenise=False):
         if tokenise:
             text = self.nlp(text)
         else:
-            text = self.nlp.tokenizer.tokens_from_list(text.split())
+            text = Doc(self.nlp.vocab, text.split())
             self.nlp.tagger(text)
             self.nlp.parser(text)
         return text
@@ -56,29 +62,65 @@ def merge(self, alignment, merging="rules"):
         return edits
 
     # Input: An Edit object
-    # Output: The same Edit object with an updated error type
-    def classify(self, edit):
-        return self.classifier.classify(edit)
+    # Output: The same Edit object with an updated error type by errant
+    def classify_by_errant(self, edit):
+        return self.errant_classifier.classify(edit)
+
+    # Input: An Edit object
+    # Output: The same Edit object with an updated error type by sercl
+    def classify_syntactically(self, edit):
+        return self.syntax_classifier.classify(edit)
 
     # Input 1: An original text string parsed by spacy
     # Input 2: A corrected text string parsed by spacy
     # Input 3: A flag for standard Levenshtein alignment
     # Input 4: A flag for merging strategy
-    # Output: A list of automatically extracted, typed Edit objects
-    def annotate(self, orig, cor, lev=False, merging="rules"):
+    # Output: A list of automatically extracted, typed Edit objects by errant
+    def errant_annotate(self, orig, cor, lev=False, merging="rules"):
+        alignment = self.align(orig, cor, lev)
+        edits = self.merge(alignment, merging)
+        for edit in edits:
+            edit = self.classify_by_errant(edit)
+        return edits
+
+    # Input 1: An original text string parsed by spacy
+    # Input 2: A corrected text string parsed by spacy
+    # Input 3: A flag for standard Levenshtein alignment
+    # Input 4: A flag for merging strategy
+    # Output: A list of automatically extracted, typed Edit objects by sercl
+    def syntax_annotate(self, orig, cor, lev=False, merging="rules"):
         alignment = self.align(orig, cor, lev)
         edits = self.merge(alignment, merging)
         for edit in edits:
-            edit = self.classify(edit)
+            edit = self.classify_syntactically(edit)
         return edits
 
+    # Input 1: An original text string parsed by spacy
+    # Input 2: A corrected text string parsed by spacy
+    # Input 3: A flag for standard Levenshtein alignment
+    # Input 4: A flag for merging strategy
+    # Input 5: A flag for annotating strategy
+    # Output: A list of automatically extracted, typed Edit objects
+    def annotate(self, orig, cor, lev=False, merging="rules", annotator='combined'):
+        errant_edits = self.errant_annotate(orig, cor, lev, merging)
+        sercl_edits = self.syntax_annotate(orig, cor, lev, merging)
+
+        assert len(errant_edits) == len(sercl_edits)
+        if self.combiner is None or annotator == 'errant':
+            return errant_edits
+        if annotator == 'sercl':
+            return sercl_edits
+        return [self.combiner.classification_combiner(errant_edit, sercl_edit) for errant_edit, sercl_edit in
+                zip(errant_edits, sercl_edits)]
+
     # Input 1: An original text string parsed by spacy
     # Input 2: A corrected text string parsed by spacy
     # Input 3: A token span edit list; [o_start, o_end, c_start, c_end, (cat)]
     # Input 4: A flag for gold edit minimisation; e.g. [a b -> a c] = [b -> c]
     # Input 5: A flag to preserve the old error category (i.e. turn off classifier)
+    # Input 5: A flag for annotating strategy (if old_cat==False)
     # Output: An Edit object
-    def import_edit(self, orig, cor, edit, min=True, old_cat=False):
+    def import_edit(self, orig, cor, edit, min=True, old_cat=False, annotator='combined'):
         # Undefined error type
         if len(edit) == 4:
             edit = Edit(orig, cor, edit)
@@ -93,6 +135,13 @@ def import_edit(self, orig, cor, edit, min=True, old_cat=False):
         if min: 
             edit = edit.minimise()
         # Classify edit
-        if not old_cat: 
-            edit = self.classify(edit)
+        if not old_cat:
+            errant_edit = self.classify_by_errant(copy(edit))
+            sercl_edit = self.classify_syntactically(copy(edit))
+            if self.combiner is None or annotator == 'errant':
+                edit = errant_edit
+            elif annotator == 'sercl':
+                edit = sercl_edit
+            else:
+                edit = self.combiner.classification_combiner(errant_edit, sercl_edit)
         return edit
diff --git a/errant/commands/__init__.py → serrant/commands/__init__.py b/errant/commands/__init__.py → serrant/commands/__init__.py
diff --git a/errant/commands/compare_m2.py → serrant/commands/compare_m2.py b/errant/commands/compare_m2.py → serrant/commands/compare_m2.py