From 19eb9e391bb26e52140af52fceb100522c0eea5d Mon Sep 17 00:00:00 2001 From: lopezp Date: Mon, 1 Jul 2024 09:37:42 +0200 Subject: [PATCH 1/3] change for supporting tensorflow > 1.16 and keras 3 --- .../src/main/java/org/grobid/core/jni/JEPThreadPool.java | 6 ++++++ .../java/org/grobid/core/jni/JEPThreadPoolClassifier.java | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/grobid-core/src/main/java/org/grobid/core/jni/JEPThreadPool.java b/grobid-core/src/main/java/org/grobid/core/jni/JEPThreadPool.java index 6da9e66491..9ee42a68d9 100644 --- a/grobid-core/src/main/java/org/grobid/core/jni/JEPThreadPool.java +++ b/grobid-core/src/main/java/org/grobid/core/jni/JEPThreadPool.java @@ -86,6 +86,12 @@ private void initializeJepInstance(Jep jep, File delftPath) throws JepException // import packages jep.eval("import os"); jep.eval("os.chdir('" + delftPath.getAbsolutePath() + "')"); + + // for using legacy Keras 2, and not Keras 3 installed by default by TensorFlow from version 2.16 + jep.eval("os.environ[\"TF_USE_LEGACY_KERAS\"] = \"1\""); + jep.eval("os.environ[\"KERAS_BACKEND\"] = \"tensorflow\""); + jep.eval("import tf_keras as keras"); + jep.eval("from delft.utilities.Embeddings import Embeddings"); jep.eval("import delft.sequenceLabelling"); jep.eval("from delft.sequenceLabelling import Sequence"); diff --git a/grobid-core/src/main/java/org/grobid/core/jni/JEPThreadPoolClassifier.java b/grobid-core/src/main/java/org/grobid/core/jni/JEPThreadPoolClassifier.java index 349e00d103..fa45eb2cd0 100644 --- a/grobid-core/src/main/java/org/grobid/core/jni/JEPThreadPoolClassifier.java +++ b/grobid-core/src/main/java/org/grobid/core/jni/JEPThreadPoolClassifier.java @@ -87,6 +87,12 @@ private void initializeJepInstance(Jep jep, File delftPath) throws JepException // import packages jep.eval("import os"); jep.eval("import json"); + + // for using legacy Keras 2, and not Keras 3 installed by default by TensorFlow from version 2.16 + jep.eval("os.environ[\"TF_USE_LEGACY_KERAS\"] = \"1\""); + jep.eval("os.environ[\"KERAS_BACKEND\"] = \"tensorflow\""); + jep.eval("import tf_keras as keras"); + jep.eval("os.chdir('" + delftPath.getAbsolutePath() + "')"); jep.eval("from delft.utilities.Embeddings import Embeddings"); //jep.eval("from delft.utilities.Utilities import split_data_and_labels"); From 1316110d9883b5f292235c2ccc38746205762d20 Mon Sep 17 00:00:00 2001 From: lopezp Date: Fri, 9 Aug 2024 18:02:34 +0200 Subject: [PATCH 2/3] update benchmark markdown --- ...RF-FEATURES_citations_header-09.08.2024.md | 282 +++++++++++++++++ ...n_reference_segmenter-header-09.08.2024.md | 277 +++++++++++++++++ ...S_reference_segmenter_header-09.08.2024.md | 294 ++++++++++++++++++ ...itations_reference_segmenter-09.08.2024.md | 278 +++++++++++++++++ 4 files changed, 1131 insertions(+) create mode 100644 grobid-trainer/doc/PLOS_1000.results.grobid-0.8-1-Glutton-DeLFT-BidLSTM-CRF-FEATURES_citations_header-09.08.2024.md create mode 100644 grobid-trainer/doc/PMC_sample_1943.results.grobid-0.8.1-Glutton-BidLSTM-CRF_citation_reference_segmenter-header-09.08.2024.md create mode 100644 grobid-trainer/doc/bioRxiv_test_2000.results.grobid-0.8-1-Glutton-DeLFT-BidLSTM-CRF-FEATURES_CITATIONS_reference_segmenter_header-09.08.2024.md create mode 100644 grobid-trainer/doc/eLife_984.results.grobid-0.8-1--Glutton-DeLFT-BidLSTM-CRF-FEATURES_citations_reference_segmenter-09.08.2024.md diff --git a/grobid-trainer/doc/PLOS_1000.results.grobid-0.8-1-Glutton-DeLFT-BidLSTM-CRF-FEATURES_citations_header-09.08.2024.md b/grobid-trainer/doc/PLOS_1000.results.grobid-0.8-1-Glutton-DeLFT-BidLSTM-CRF-FEATURES_citations_header-09.08.2024.md new file mode 100644 index 0000000000..ace5c995a2 --- /dev/null +++ b/grobid-trainer/doc/PLOS_1000.results.grobid-0.8-1-Glutton-DeLFT-BidLSTM-CRF-FEATURES_citations_header-09.08.2024.md @@ -0,0 +1,282 @@ + +## Header metadata + +Evaluation on 1000 random PDF files out of 998 PDF (ratio 1.0). + +#### Strict Matching (exact matches) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| abstract | 13.58 | 13.65 | 13.61 | 960 | +| authors | 98.87 | 98.97 | 98.92 | 969 | +| first_author | 99.18 | 99.28 | 99.23 | 969 | +| keywords | 0 | 0 | 0 | 0 | +| title | 95.75 | 94.6 | 95.17 | 1000 | +| | | | | | +| **all fields (micro avg.)** | **77.01** | **76.91** | **76.96** | 3898 | +| all fields (macro avg.) | 76.84 | 76.62 | 76.73 | 3898 | + + + +#### Soft Matching (ignoring punctuation, case and space characters mismatches) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| abstract | 50.57 | 50.83 | 50.7 | 960 | +| authors | 98.87 | 98.97 | 98.92 | 969 | +| first_author | 99.18 | 99.28 | 99.23 | 969 | +| keywords | 0 | 0 | 0 | 0 | +| title | 99.39 | 98.2 | 98.79 | 1000 | +| | | | | | +| **all fields (micro avg.)** | **87.11** | **86.99** | **87.05** | 3898 | +| all fields (macro avg.) | 87 | 86.82 | 86.91 | 3898 | + + + +#### Levenshtein Matching (Minimum Levenshtein distance at 0.8) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| abstract | 76.68 | 77.08 | 76.88 | 960 | +| authors | 99.28 | 99.38 | 99.33 | 969 | +| first_author | 99.28 | 99.38 | 99.33 | 969 | +| keywords | 0 | 0 | 0 | 0 | +| title | 99.7 | 98.5 | 99.09 | 1000 | +| | | | | | +| **all fields (micro avg.)** | **93.78** | **93.66** | **93.72** | 3898 | +| all fields (macro avg.) | 93.73 | 93.59 | 93.66 | 3898 | + + + +#### Ratcliff/Obershelp Matching (Minimum Ratcliff/Obershelp similarity at 0.95) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| abstract | 66.94 | 67.29 | 67.12 | 960 | +| authors | 99.18 | 99.28 | 99.23 | 969 | +| first_author | 99.18 | 99.28 | 99.23 | 969 | +| keywords | 0 | 0 | 0 | 0 | +| title | 99.49 | 98.3 | 98.89 | 1000 | +| | | | | | +| **all fields (micro avg.)** | **91.27** | **91.15** | **91.21** | 3898 | +| all fields (macro avg.) | 91.2 | 91.04 | 91.12 | 3898 | + + +#### Instance-level results + +``` +Total expected instances: 1000 +Total correct instances: 139 (strict) +Total correct instances: 487 (soft) +Total correct instances: 726 (Levenshtein) +Total correct instances: 642 (ObservedRatcliffObershelp) + +Instance-level recall: 13.9 (strict) +Instance-level recall: 48.7 (soft) +Instance-level recall: 72.6 (Levenshtein) +Instance-level recall: 64.2 (RatcliffObershelp) +``` + + +## Citation metadata + +Evaluation on 1000 random PDF files out of 998 PDF (ratio 1.0). + +#### Strict Matching (exact matches) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| authors | 81.17 | 78.43 | 79.78 | 44770 | +| date | 84.61 | 81.24 | 82.89 | 45457 | +| first_author | 91.47 | 88.34 | 89.88 | 44770 | +| inTitle | 81.67 | 83.58 | 82.61 | 42795 | +| issue | 93.62 | 92.68 | 93.15 | 18983 | +| page | 93.7 | 77.57 | 84.87 | 40844 | +| title | 59.97 | 60.47 | 60.22 | 43101 | +| volume | 95.89 | 96.11 | 96 | 40458 | +| | | | | | +| **all fields (micro avg.)** | **84.23** | **81.45** | **82.81** | 321178 | +| all fields (macro avg.) | 85.26 | 82.3 | 83.67 | 321178 | + + + +#### Soft Matching (ignoring punctuation, case and space characters mismatches) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| authors | 81.49 | 78.73 | 80.09 | 44770 | +| date | 84.61 | 81.24 | 82.89 | 45457 | +| first_author | 91.69 | 88.55 | 90.09 | 44770 | +| inTitle | 85.51 | 87.5 | 86.49 | 42795 | +| issue | 93.62 | 92.68 | 93.15 | 18983 | +| page | 93.7 | 77.57 | 84.87 | 40844 | +| title | 91.95 | 92.74 | 92.34 | 43101 | +| volume | 95.89 | 96.11 | 96 | 40458 | +| | | | | | +| **all fields (micro avg.)** | **89.32** | **86.37** | **87.82** | 321178 | +| all fields (macro avg.) | 89.81 | 86.89 | 88.24 | 321178 | + + + +#### Levenshtein Matching (Minimum Levenshtein distance at 0.8) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| authors | 90.64 | 87.57 | 89.08 | 44770 | +| date | 84.61 | 81.24 | 82.89 | 45457 | +| first_author | 92.23 | 89.08 | 90.62 | 44770 | +| inTitle | 86.45 | 88.47 | 87.45 | 42795 | +| issue | 93.62 | 92.68 | 93.15 | 18983 | +| page | 93.7 | 77.57 | 84.87 | 40844 | +| title | 94.56 | 95.37 | 94.96 | 43101 | +| volume | 95.89 | 96.11 | 96 | 40458 | +| | | | | | +| **all fields (micro avg.)** | **91.17** | **88.16** | **89.64** | 321178 | +| all fields (macro avg.) | 91.46 | 88.51 | 89.88 | 321178 | + + + +#### Ratcliff/Obershelp Matching (Minimum Ratcliff/Obershelp similarity at 0.95) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| authors | 84.94 | 82.06 | 83.48 | 44770 | +| date | 84.61 | 81.24 | 82.89 | 45457 | +| first_author | 91.47 | 88.34 | 89.88 | 44770 | +| inTitle | 85.16 | 87.15 | 86.14 | 42795 | +| issue | 93.62 | 92.68 | 93.15 | 18983 | +| page | 93.7 | 77.57 | 84.87 | 40844 | +| title | 93.95 | 94.74 | 94.34 | 43101 | +| volume | 95.89 | 96.11 | 96 | 40458 | +| | | | | | +| **all fields (micro avg.)** | **90** | **87.03** | **88.49** | 321178 | +| all fields (macro avg.) | 90.42 | 87.49 | 88.84 | 321178 | + + +#### Instance-level results + +``` +Total expected instances: 48449 +Total extracted instances: 48250 +Total correct instances: 13496 (strict) +Total correct instances: 22269 (soft) +Total correct instances: 24916 (Levenshtein) +Total correct instances: 23272 (RatcliffObershelp) + +Instance-level precision: 27.97 (strict) +Instance-level precision: 46.15 (soft) +Instance-level precision: 51.64 (Levenshtein) +Instance-level precision: 48.23 (RatcliffObershelp) + +Instance-level recall: 27.86 (strict) +Instance-level recall: 45.96 (soft) +Instance-level recall: 51.43 (Levenshtein) +Instance-level recall: 48.03 (RatcliffObershelp) + +Instance-level f-score: 27.91 (strict) +Instance-level f-score: 46.06 (soft) +Instance-level f-score: 51.53 (Levenshtein) +Instance-level f-score: 48.13 (RatcliffObershelp) + +Matching 1 : 35369 + +Matching 2 : 1260 + +Matching 3 : 3266 + +Matching 4 : 1800 + +Total matches : 41695 +``` + + +#### Citation context resolution +``` + +Total expected references: 48449 - 48.45 references per article +Total predicted references: 48250 - 48.25 references per article + +Total expected citation contexts: 69755 - 69.75 citation contexts per article +Total predicted citation contexts: 73696 - 73.7 citation contexts per article + +Total correct predicted citation contexts: 56772 - 56.77 citation contexts per article +Total wrong predicted citation contexts: 16924 (wrong callout matching, callout missing in NLM, or matching with a bib. ref. not aligned with a bib.ref. in NLM) + +Precision citation contexts: 77.04 +Recall citation contexts: 81.39 +fscore citation contexts: 79.15 +``` + + +## Fulltext structures + +Fulltext structure contents are complicated to capture from JATS NLM files. They are often normalized and different from the actual PDF content and are can be inconsistent from one document to another. The scores of the following metrics are thus not very meaningful in absolute term, in particular for the strict matching (textual content of the srtructure can be very long). As relative values for comparing different models, they seem however useful. + + +Evaluation on 1000 random PDF files out of 998 PDF (ratio 1.0). + +#### Strict Matching (exact matches) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| availability_stmt | 54.06 | 52.12 | 53.07 | 779 | +| figure_title | 2.11 | 0.92 | 1.28 | 8943 | +| funding_stmt | 5.27 | 28.14 | 8.88 | 1507 | +| reference_citation | 86.69 | 94.65 | 90.49 | 69741 | +| reference_figure | 72.06 | 54.06 | 61.77 | 11010 | +| reference_table | 84.28 | 92.07 | 88 | 5159 | +| section_title | 77.18 | 65.8 | 71.03 | 17540 | +| table_title | 1.13 | 0.59 | 0.77 | 6092 | +| | | | | | +| **all fields (micro avg.)** | **73.79** | **73.86** | **73.82** | 120771 | +| all fields (macro avg.) | 47.85 | 48.54 | 46.91 | 120771 | + + + +#### Soft Matching (ignoring punctuation, case and space characters mismatches) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| availability_stmt | 79.36 | 76.51 | 77.91 | 779 | +| figure_title | 81.17 | 35.33 | 49.24 | 8943 | +| funding_stmt | 6.89 | 36.76 | 11.6 | 1507 | +| reference_citation | 86.7 | 94.66 | 90.51 | 69741 | +| reference_figure | 72.52 | 54.41 | 62.17 | 11010 | +| reference_table | 84.46 | 92.27 | 88.19 | 5159 | +| section_title | 78.17 | 66.65 | 71.95 | 17540 | +| table_title | 15.97 | 8.39 | 11 | 6092 | +| | | | | | +| **all fields (micro avg.)** | **77.16** | **77.24** | **77.2** | 120771 | +| all fields (macro avg.) | 63.16 | 58.12 | 57.82 | 120771 | + + +**Document-level ratio results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| availability_stmt | 99.47 | 96.41 | 97.91 | 779 | +| | | | | | +| **all fields (micro avg.)** | **99.47** | **96.41** | **97.91** | 779 | +| all fields (macro avg.) | 99.47 | 96.41 | 97.91 | 779 | + +Evaluation metrics produced in 396.908 seconds diff --git a/grobid-trainer/doc/PMC_sample_1943.results.grobid-0.8.1-Glutton-BidLSTM-CRF_citation_reference_segmenter-header-09.08.2024.md b/grobid-trainer/doc/PMC_sample_1943.results.grobid-0.8.1-Glutton-BidLSTM-CRF_citation_reference_segmenter-header-09.08.2024.md new file mode 100644 index 0000000000..7bc732a83e --- /dev/null +++ b/grobid-trainer/doc/PMC_sample_1943.results.grobid-0.8.1-Glutton-BidLSTM-CRF_citation_reference_segmenter-header-09.08.2024.md @@ -0,0 +1,277 @@ + +## Header metadata + +Evaluation on 1943 random PDF files out of 1941 PDF (ratio 1.0). + +#### Strict Matching (exact matches) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| abstract | 16.78 | 16.48 | 16.63 | 1911 | +| authors | 92.01 | 91.91 | 91.96 | 1941 | +| first_author | 96.7 | 96.6 | 96.65 | 1941 | +| keywords | 64.99 | 63.62 | 64.3 | 1380 | +| title | 84.67 | 84.41 | 84.54 | 1943 | +| | | | | | +| **all fields (micro avg.)** | **71.79** | **71.22** | **71.5** | 9116 | +| all fields (macro avg.) | 71.03 | 70.6 | 70.81 | 9116 | + + + +#### Soft Matching (ignoring punctuation, case and space characters mismatches) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| abstract | 63.83 | 62.69 | 63.25 | 1911 | +| authors | 93.91 | 93.82 | 93.87 | 1941 | +| first_author | 97.06 | 96.96 | 97.01 | 1941 | +| keywords | 73.72 | 72.17 | 72.94 | 1380 | +| title | 92.15 | 91.87 | 92.01 | 1943 | +| | | | | | +| **all fields (micro avg.)** | **84.95** | **84.27** | **84.61** | 9116 | +| all fields (macro avg.) | 84.14 | 83.5 | 83.82 | 9116 | + + + +#### Levenshtein Matching (Minimum Levenshtein distance at 0.8) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| abstract | 91.05 | 89.43 | 90.23 | 1911 | +| authors | 96.08 | 95.98 | 96.03 | 1941 | +| first_author | 97.32 | 97.22 | 97.27 | 1941 | +| keywords | 84.16 | 82.39 | 83.27 | 1380 | +| title | 98.35 | 98.04 | 98.2 | 1943 | +| | | | | | +| **all fields (micro avg.)** | **94.01** | **93.25** | **93.63** | 9116 | +| all fields (macro avg.) | 93.39 | 92.61 | 93 | 9116 | + + + +#### Ratcliff/Obershelp Matching (Minimum Ratcliff/Obershelp similarity at 0.95) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| abstract | 87.11 | 85.56 | 86.33 | 1911 | +| authors | 94.95 | 94.85 | 94.9 | 1941 | +| first_author | 96.7 | 96.6 | 96.65 | 1941 | +| keywords | 79.5 | 77.83 | 78.65 | 1380 | +| title | 96.33 | 96.04 | 96.19 | 1943 | +| | | | | | +| **all fields (micro avg.)** | **91.68** | **90.95** | **91.32** | 9116 | +| all fields (macro avg.) | 90.92 | 90.17 | 90.54 | 9116 | + + +#### Instance-level results + +``` +Total expected instances: 1943 +Total correct instances: 219 (strict) +Total correct instances: 904 (soft) +Total correct instances: 1434 (Levenshtein) +Total correct instances: 1294 (ObservedRatcliffObershelp) + +Instance-level recall: 11.27 (strict) +Instance-level recall: 46.53 (soft) +Instance-level recall: 73.8 (Levenshtein) +Instance-level recall: 66.6 (RatcliffObershelp) +``` + + +## Citation metadata + +Evaluation on 1943 random PDF files out of 1941 PDF (ratio 1.0). + +#### Strict Matching (exact matches) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| authors | 83.03 | 76.31 | 79.53 | 85778 | +| date | 94.6 | 84.25 | 89.13 | 87067 | +| first_author | 89.78 | 82.49 | 85.98 | 85778 | +| inTitle | 73.23 | 71.88 | 72.55 | 81007 | +| issue | 91.09 | 87.74 | 89.38 | 16635 | +| page | 94.57 | 83.7 | 88.81 | 80501 | +| title | 79.67 | 75.3 | 77.42 | 80736 | +| volume | 96.01 | 89.82 | 92.81 | 80067 | +| | | | | | +| **all fields (micro avg.)** | **87.22** | **80.74** | **83.86** | 597569 | +| all fields (macro avg.) | 87.75 | 81.44 | 84.45 | 597569 | + + + +#### Soft Matching (ignoring punctuation, case and space characters mismatches) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| authors | 83.5 | 76.75 | 79.98 | 85778 | +| date | 94.6 | 84.25 | 89.13 | 87067 | +| first_author | 89.95 | 82.65 | 86.14 | 85778 | +| inTitle | 84.92 | 83.36 | 84.13 | 81007 | +| issue | 91.09 | 87.74 | 89.38 | 16635 | +| page | 94.57 | 83.7 | 88.81 | 80501 | +| title | 91.43 | 86.42 | 88.86 | 80736 | +| volume | 96.01 | 89.82 | 92.81 | 80067 | +| | | | | | +| **all fields (micro avg.)** | **90.61** | **83.89** | **87.12** | 597569 | +| all fields (macro avg.) | 90.76 | 84.34 | 87.41 | 597569 | + + + +#### Levenshtein Matching (Minimum Levenshtein distance at 0.8) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| authors | 89.21 | 81.99 | 85.45 | 85778 | +| date | 94.6 | 84.25 | 89.13 | 87067 | +| first_author | 90.15 | 82.84 | 86.34 | 85778 | +| inTitle | 86.18 | 84.59 | 85.38 | 81007 | +| issue | 91.09 | 87.74 | 89.38 | 16635 | +| page | 94.57 | 83.7 | 88.81 | 80501 | +| title | 93.8 | 88.66 | 91.15 | 80736 | +| volume | 96.01 | 89.82 | 92.81 | 80067 | +| | | | | | +| **all fields (micro avg.)** | **91.96** | **85.14** | **88.42** | 597569 | +| all fields (macro avg.) | 91.95 | 85.45 | 88.56 | 597569 | + + + +#### Ratcliff/Obershelp Matching (Minimum Ratcliff/Obershelp similarity at 0.95) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| authors | 85.98 | 79.02 | 82.35 | 85778 | +| date | 94.6 | 84.25 | 89.13 | 87067 | +| first_author | 89.8 | 82.51 | 86 | 85778 | +| inTitle | 83.49 | 81.95 | 82.72 | 81007 | +| issue | 91.09 | 87.74 | 89.38 | 16635 | +| page | 94.57 | 83.7 | 88.81 | 80501 | +| title | 93.39 | 88.27 | 90.76 | 80736 | +| volume | 96.01 | 89.82 | 92.81 | 80067 | +| | | | | | +| **all fields (micro avg.)** | **91.01** | **84.25** | **87.5** | 597569 | +| all fields (macro avg.) | 91.12 | 84.66 | 87.74 | 597569 | + + +#### Instance-level results + +``` +Total expected instances: 90125 +Total extracted instances: 85902 +Total correct instances: 38762 (strict) +Total correct instances: 50900 (soft) +Total correct instances: 55783 (Levenshtein) +Total correct instances: 52319 (RatcliffObershelp) + +Instance-level precision: 45.12 (strict) +Instance-level precision: 59.25 (soft) +Instance-level precision: 64.94 (Levenshtein) +Instance-level precision: 60.91 (RatcliffObershelp) + +Instance-level recall: 43.01 (strict) +Instance-level recall: 56.48 (soft) +Instance-level recall: 61.9 (Levenshtein) +Instance-level recall: 58.05 (RatcliffObershelp) + +Instance-level f-score: 44.04 (strict) +Instance-level f-score: 57.83 (soft) +Instance-level f-score: 63.38 (Levenshtein) +Instance-level f-score: 59.44 (RatcliffObershelp) + +Matching 1 : 68328 + +Matching 2 : 4154 + +Matching 3 : 1863 + +Matching 4 : 662 + +Total matches : 75007 +``` + + +#### Citation context resolution +``` + +Total expected references: 90125 - 46.38 references per article +Total predicted references: 85902 - 44.21 references per article + +Total expected citation contexts: 139835 - 71.97 citation contexts per article +Total predicted citation contexts: 115373 - 59.38 citation contexts per article + +Total correct predicted citation contexts: 97277 - 50.07 citation contexts per article +Total wrong predicted citation contexts: 18096 (wrong callout matching, callout missing in NLM, or matching with a bib. ref. not aligned with a bib.ref. in NLM) + +Precision citation contexts: 84.32 +Recall citation contexts: 69.57 +fscore citation contexts: 76.23 +``` + + +## Fulltext structures + +Fulltext structure contents are complicated to capture from JATS NLM files. They are often normalized and different from the actual PDF content and are can be inconsistent from one document to another. The scores of the following metrics are thus not very meaningful in absolute term, in particular for the strict matching (textual content of the srtructure can be very long). As relative values for comparing different models, they seem however useful. + + +Evaluation on 1943 random PDF files out of 1941 PDF (ratio 1.0). + +#### Strict Matching (exact matches) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| figure_title | 31.44 | 24.61 | 27.61 | 7281 | +| reference_citation | 57.43 | 58.68 | 58.05 | 134196 | +| reference_figure | 61.21 | 65.9 | 63.47 | 19330 | +| reference_table | 83.01 | 88.39 | 85.62 | 7327 | +| section_title | 76.39 | 67.77 | 71.82 | 27619 | +| table_title | 57.3 | 50.29 | 53.57 | 3971 | +| | | | | | +| **all fields (micro avg.)** | **60.41** | **60.32** | **60.36** | 199724 | +| all fields (macro avg.) | 61.13 | 59.27 | 60.02 | 199724 | + + + +#### Soft Matching (ignoring punctuation, case and space characters mismatches) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| figure_title | 78.68 | 61.58 | 69.09 | 7281 | +| reference_citation | 61.68 | 63.03 | 62.35 | 134196 | +| reference_figure | 61.69 | 66.41 | 63.97 | 19330 | +| reference_table | 83.19 | 88.58 | 85.8 | 7327 | +| section_title | 81.25 | 72.08 | 76.39 | 27619 | +| table_title | 81.89 | 71.87 | 76.56 | 3971 | +| | | | | | +| **all fields (micro avg.)** | **65.77** | **65.67** | **65.72** | 199724 | +| all fields (macro avg.) | 74.73 | 70.59 | 72.36 | 199724 | + + +**Document-level ratio results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| | | | | | +| **all fields (micro avg.)** | **0** | **0** | **0** | 0 | +| all fields (macro avg.) | 0 | 0 | 0 | 0 | + +Evaluation metrics produced in 625.694 seconds diff --git a/grobid-trainer/doc/bioRxiv_test_2000.results.grobid-0.8-1-Glutton-DeLFT-BidLSTM-CRF-FEATURES_CITATIONS_reference_segmenter_header-09.08.2024.md b/grobid-trainer/doc/bioRxiv_test_2000.results.grobid-0.8-1-Glutton-DeLFT-BidLSTM-CRF-FEATURES_CITATIONS_reference_segmenter_header-09.08.2024.md new file mode 100644 index 0000000000..d25f6456c4 --- /dev/null +++ b/grobid-trainer/doc/bioRxiv_test_2000.results.grobid-0.8-1-Glutton-DeLFT-BidLSTM-CRF-FEATURES_CITATIONS_reference_segmenter_header-09.08.2024.md @@ -0,0 +1,294 @@ + +## Header metadata + +Evaluation on 2000 random PDF files out of 1998 PDF (ratio 1.0). + +#### Strict Matching (exact matches) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| abstract | 2.2 | 2.16 | 2.18 | 1990 | +| authors | 83.2 | 82.49 | 82.84 | 1999 | +| first_author | 97.02 | 96.29 | 96.66 | 1997 | +| keywords | 58.71 | 59.83 | 59.27 | 839 | +| title | 77.67 | 76.85 | 77.26 | 2000 | +| | | | | | +| **all fields (micro avg.)** | **64.62** | **64.07** | **64.35** | 8825 | +| all fields (macro avg.) | 63.76 | 63.53 | 63.64 | 8825 | + + + +#### Soft Matching (ignoring punctuation, case and space characters mismatches) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| abstract | 59.71 | 58.54 | 59.12 | 1990 | +| authors | 83.7 | 82.99 | 83.35 | 1999 | +| first_author | 97.23 | 96.49 | 96.86 | 1997 | +| keywords | 63.86 | 65.08 | 64.46 | 839 | +| title | 79.89 | 79.05 | 79.47 | 2000 | +| | | | | | +| **all fields (micro avg.)** | **78.61** | **77.94** | **78.27** | 8825 | +| all fields (macro avg.) | 76.88 | 76.43 | 76.65 | 8825 | + + + +#### Levenshtein Matching (Minimum Levenshtein distance at 0.8) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| abstract | 80.22 | 78.64 | 79.42 | 1990 | +| authors | 92.18 | 91.4 | 91.79 | 1999 | +| first_author | 97.48 | 96.75 | 97.11 | 1997 | +| keywords | 79.42 | 80.93 | 80.17 | 839 | +| title | 92.02 | 91.05 | 91.53 | 2000 | +| | | | | | +| **all fields (micro avg.)** | **89.43** | **88.66** | **89.04** | 8825 | +| all fields (macro avg.) | 88.26 | 87.75 | 88 | 8825 | + + + +#### Ratcliff/Obershelp Matching (Minimum Ratcliff/Obershelp similarity at 0.95) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| abstract | 76.88 | 75.38 | 76.12 | 1990 | +| authors | 87.79 | 87.04 | 87.42 | 1999 | +| first_author | 97.02 | 96.29 | 96.66 | 1997 | +| keywords | 71.35 | 72.71 | 72.02 | 839 | +| title | 87.87 | 86.95 | 87.41 | 2000 | +| | | | | | +| **all fields (micro avg.)** | **85.86** | **85.12** | **85.49** | 8825 | +| all fields (macro avg.) | 84.18 | 83.67 | 83.92 | 8825 | + + +#### Instance-level results + +``` +Total expected instances: 2000 +Total correct instances: 35 (strict) +Total correct instances: 708 (soft) +Total correct instances: 1222 (Levenshtein) +Total correct instances: 1046 (ObservedRatcliffObershelp) + +Instance-level recall: 1.75 (strict) +Instance-level recall: 35.4 (soft) +Instance-level recall: 61.1 (Levenshtein) +Instance-level recall: 52.3 (RatcliffObershelp) +``` + + +## Citation metadata + +Evaluation on 2000 random PDF files out of 1998 PDF (ratio 1.0). + +#### Strict Matching (exact matches) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| authors | 88.16 | 83.24 | 85.63 | 97183 | +| date | 91.69 | 86.31 | 88.92 | 97630 | +| doi | 70.84 | 83.79 | 76.78 | 16894 | +| first_author | 95.06 | 89.68 | 92.29 | 97183 | +| inTitle | 82.83 | 79.4 | 81.08 | 96430 | +| issue | 94.34 | 92.04 | 93.18 | 30312 | +| page | 94.97 | 78.34 | 85.86 | 88597 | +| pmcid | 66.38 | 86.12 | 74.97 | 807 | +| pmid | 70.08 | 84.95 | 76.8 | 2093 | +| title | 84.88 | 83.58 | 84.23 | 92463 | +| volume | 96.23 | 95.23 | 95.73 | 87709 | +| | | | | | +| **all fields (micro avg.)** | **89.85** | **85.34** | **87.54** | 707301 | +| all fields (macro avg.) | 85.04 | 85.7 | 85.04 | 707301 | + + + +#### Soft Matching (ignoring punctuation, case and space characters mismatches) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| authors | 89.31 | 84.33 | 86.75 | 97183 | +| date | 91.69 | 86.31 | 88.92 | 97630 | +| doi | 75.34 | 89.11 | 81.65 | 16894 | +| first_author | 95.48 | 90.08 | 92.7 | 97183 | +| inTitle | 92.32 | 88.51 | 90.38 | 96430 | +| issue | 94.34 | 92.04 | 93.18 | 30312 | +| page | 94.97 | 78.34 | 85.86 | 88597 | +| pmcid | 75.64 | 98.14 | 85.44 | 807 | +| pmid | 74.5 | 90.3 | 81.64 | 2093 | +| title | 93.23 | 91.8 | 92.51 | 92463 | +| volume | 96.23 | 95.23 | 95.73 | 87709 | +| | | | | | +| **all fields (micro avg.)** | **92.66** | **88.02** | **90.28** | 707301 | +| all fields (macro avg.) | 88.46 | 89.47 | 88.61 | 707301 | + + + +#### Levenshtein Matching (Minimum Levenshtein distance at 0.8) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| authors | 94.58 | 89.3 | 91.87 | 97183 | +| date | 91.69 | 86.31 | 88.92 | 97630 | +| doi | 77.6 | 91.79 | 84.1 | 16894 | +| first_author | 95.63 | 90.22 | 92.85 | 97183 | +| inTitle | 93.3 | 89.45 | 91.33 | 96430 | +| issue | 94.34 | 92.04 | 93.18 | 30312 | +| page | 94.97 | 78.34 | 85.86 | 88597 | +| pmcid | 75.64 | 98.14 | 85.44 | 807 | +| pmid | 74.5 | 90.3 | 81.64 | 2093 | +| title | 96.05 | 94.58 | 95.31 | 92463 | +| volume | 96.23 | 95.23 | 95.73 | 87709 | +| | | | | | +| **all fields (micro avg.)** | **93.99** | **89.28** | **91.57** | 707301 | +| all fields (macro avg.) | 89.51 | 90.52 | 89.66 | 707301 | + + + +#### Ratcliff/Obershelp Matching (Minimum Ratcliff/Obershelp similarity at 0.95) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| authors | 91.54 | 86.43 | 88.91 | 97183 | +| date | 91.69 | 86.31 | 88.92 | 97630 | +| doi | 76.04 | 89.94 | 82.41 | 16894 | +| first_author | 95.1 | 89.72 | 92.33 | 97183 | +| inTitle | 91.06 | 87.29 | 89.13 | 96430 | +| issue | 94.34 | 92.04 | 93.18 | 30312 | +| page | 94.97 | 78.34 | 85.86 | 88597 | +| pmcid | 66.38 | 86.12 | 74.97 | 807 | +| pmid | 70.08 | 84.95 | 76.8 | 2093 | +| title | 95.35 | 93.89 | 94.62 | 92463 | +| volume | 96.23 | 95.23 | 95.73 | 87709 | +| | | | | | +| **all fields (micro avg.)** | **93.02** | **88.36** | **90.63** | 707301 | +| all fields (macro avg.) | 87.53 | 88.21 | 87.53 | 707301 | + + +#### Instance-level results + +``` +Total expected instances: 98799 +Total extracted instances: 98068 +Total correct instances: 43771 (strict) +Total correct instances: 54778 (soft) +Total correct instances: 58972 (Levenshtein) +Total correct instances: 55693 (RatcliffObershelp) + +Instance-level precision: 44.63 (strict) +Instance-level precision: 55.86 (soft) +Instance-level precision: 60.13 (Levenshtein) +Instance-level precision: 56.79 (RatcliffObershelp) + +Instance-level recall: 44.3 (strict) +Instance-level recall: 55.44 (soft) +Instance-level recall: 59.69 (Levenshtein) +Instance-level recall: 56.37 (RatcliffObershelp) + +Instance-level f-score: 44.47 (strict) +Instance-level f-score: 55.65 (soft) +Instance-level f-score: 59.91 (Levenshtein) +Instance-level f-score: 56.58 (RatcliffObershelp) + +Matching 1 : 79296 + +Matching 2 : 4442 + +Matching 3 : 4371 + +Matching 4 : 2084 + +Total matches : 90193 +``` + + +#### Citation context resolution +``` + +Total expected references: 98797 - 49.4 references per article +Total predicted references: 98068 - 49.03 references per article + +Total expected citation contexts: 142862 - 71.43 citation contexts per article +Total predicted citation contexts: 135692 - 67.85 citation contexts per article + +Total correct predicted citation contexts: 116736 - 58.37 citation contexts per article +Total wrong predicted citation contexts: 18956 (wrong callout matching, callout missing in NLM, or matching with a bib. ref. not aligned with a bib.ref. in NLM) + +Precision citation contexts: 86.03 +Recall citation contexts: 81.71 +fscore citation contexts: 83.82 +``` + + +## Fulltext structures + +Fulltext structure contents are complicated to capture from JATS NLM files. They are often normalized and different from the actual PDF content and are can be inconsistent from one document to another. The scores of the following metrics are thus not very meaningful in absolute term, in particular for the strict matching (textual content of the srtructure can be very long). As relative values for comparing different models, they seem however useful. + + +Evaluation on 2000 random PDF files out of 1998 PDF (ratio 1.0). + +#### Strict Matching (exact matches) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| availability_stmt | 29.95 | 25.78 | 27.71 | 446 | +| figure_title | 4.23 | 2.01 | 2.72 | 22978 | +| funding_stmt | 4.16 | 24.43 | 7.11 | 745 | +| reference_citation | 71.05 | 71.33 | 71.19 | 147470 | +| reference_figure | 70.59 | 67.74 | 69.14 | 47984 | +| reference_table | 48.11 | 83.03 | 60.92 | 5957 | +| section_title | 72.59 | 69.6 | 71.06 | 32398 | +| table_title | 4.31 | 2.85 | 3.43 | 3925 | +| | | | | | +| **all fields (micro avg.)** | **65.46** | **63.41** | **64.42** | 261903 | +| all fields (macro avg.) | 38.12 | 43.35 | 39.16 | 261903 | + + + +#### Soft Matching (ignoring punctuation, case and space characters mismatches) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| availability_stmt | 50.52 | 43.5 | 46.75 | 446 | +| figure_title | 69.47 | 32.91 | 44.67 | 22978 | +| funding_stmt | 4.37 | 25.64 | 7.46 | 745 | +| reference_citation | 83.04 | 83.37 | 83.21 | 147470 | +| reference_figure | 71.22 | 68.34 | 69.75 | 47984 | +| reference_table | 48.56 | 83.8 | 61.49 | 5957 | +| section_title | 76.47 | 73.32 | 74.86 | 32398 | +| table_title | 51.44 | 34.06 | 40.99 | 3925 | +| | | | | | +| **all fields (micro avg.)** | **76.38** | **73.99** | **75.17** | 261903 | +| all fields (macro avg.) | 56.89 | 55.62 | 53.65 | 261903 | + + +**Document-level ratio results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| availability_stmt | 84.77 | 86.1 | 85.43 | 446 | +| | | | | | +| **all fields (micro avg.)** | **84.77** | **86.1** | **85.43** | 446 | +| all fields (macro avg.) | 84.77 | 86.1 | 85.43 | 446 | + +Evaluation metrics produced in 773.926 seconds diff --git a/grobid-trainer/doc/eLife_984.results.grobid-0.8-1--Glutton-DeLFT-BidLSTM-CRF-FEATURES_citations_reference_segmenter-09.08.2024.md b/grobid-trainer/doc/eLife_984.results.grobid-0.8-1--Glutton-DeLFT-BidLSTM-CRF-FEATURES_citations_reference_segmenter-09.08.2024.md new file mode 100644 index 0000000000..28b9a89542 --- /dev/null +++ b/grobid-trainer/doc/eLife_984.results.grobid-0.8-1--Glutton-DeLFT-BidLSTM-CRF-FEATURES_citations_reference_segmenter-09.08.2024.md @@ -0,0 +1,278 @@ + +## Header metadata + +Evaluation on 983 random PDF files out of 982 PDF (ratio 1.0). + +#### Strict Matching (exact matches) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| abstract | 9.44 | 9.16 | 9.3 | 983 | +| authors | 74.28 | 73.52 | 73.9 | 982 | +| first_author | 92.39 | 91.54 | 91.96 | 981 | +| title | 86.81 | 85.05 | 85.92 | 983 | +| | | | | | +| **all fields (micro avg.)** | **65.96** | **64.8** | **65.37** | 3929 | +| all fields (macro avg.) | 65.73 | 64.82 | 65.27 | 3929 | + + + +#### Soft Matching (ignoring punctuation, case and space characters mismatches) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| abstract | 22.46 | 21.77 | 22.11 | 983 | +| authors | 74.59 | 73.83 | 74.21 | 982 | +| first_author | 92.39 | 91.54 | 91.96 | 981 | +| title | 94.81 | 92.88 | 93.83 | 983 | +| | | | | | +| **all fields (micro avg.)** | **71.24** | **69.99** | **70.61** | 3929 | +| all fields (macro avg.) | 71.06 | 70 | 70.53 | 3929 | + + + +#### Levenshtein Matching (Minimum Levenshtein distance at 0.8) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| abstract | 47.53 | 46.08 | 46.8 | 983 | +| authors | 88.17 | 87.27 | 87.72 | 982 | +| first_author | 92.7 | 91.85 | 92.27 | 981 | +| title | 96.26 | 94.3 | 95.27 | 983 | +| | | | | | +| **all fields (micro avg.)** | **81.3** | **79.87** | **80.58** | 3929 | +| all fields (macro avg.) | 81.16 | 79.88 | 80.51 | 3929 | + + + +#### Ratcliff/Obershelp Matching (Minimum Ratcliff/Obershelp similarity at 0.95) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| abstract | 44.49 | 43.13 | 43.8 | 983 | +| authors | 79.94 | 79.12 | 79.53 | 982 | +| first_author | 92.39 | 91.54 | 91.96 | 981 | +| title | 96.26 | 94.3 | 95.27 | 983 | +| | | | | | +| **all fields (micro avg.)** | **78.39** | **77.02** | **77.7** | 3929 | +| all fields (macro avg.) | 78.27 | 77.02 | 77.64 | 3929 | + + +#### Instance-level results + +``` +Total expected instances: 983 +Total correct instances: 73 (strict) +Total correct instances: 198 (soft) +Total correct instances: 377 (Levenshtein) +Total correct instances: 335 (ObservedRatcliffObershelp) + +Instance-level recall: 7.43 (strict) +Instance-level recall: 20.14 (soft) +Instance-level recall: 38.35 (Levenshtein) +Instance-level recall: 34.08 (RatcliffObershelp) +``` + + +## Citation metadata + +Evaluation on 983 random PDF files out of 982 PDF (ratio 1.0). + +#### Strict Matching (exact matches) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| authors | 79.4 | 78.19 | 78.79 | 63170 | +| date | 95.86 | 93.99 | 94.91 | 63567 | +| first_author | 94.76 | 93.28 | 94.02 | 63170 | +| inTitle | 95.77 | 94.68 | 95.22 | 63118 | +| issue | 1.99 | 75 | 3.88 | 16 | +| page | 96.26 | 95.2 | 95.72 | 53303 | +| title | 90.25 | 90.68 | 90.47 | 61950 | +| volume | 97.85 | 98.17 | 98.01 | 60955 | +| | | | | | +| **all fields (micro avg.)** | **92.66** | **91.93** | **92.29** | 429249 | +| all fields (macro avg.) | 81.52 | 89.9 | 81.38 | 429249 | + + + +#### Soft Matching (ignoring punctuation, case and space characters mismatches) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| authors | 79.54 | 78.33 | 78.93 | 63170 | +| date | 95.86 | 93.99 | 94.91 | 63567 | +| first_author | 94.84 | 93.36 | 94.1 | 63170 | +| inTitle | 96.25 | 95.15 | 95.7 | 63118 | +| issue | 1.99 | 75 | 3.88 | 16 | +| page | 96.26 | 95.2 | 95.72 | 53303 | +| title | 95.92 | 96.38 | 96.15 | 61950 | +| volume | 97.85 | 98.17 | 98.01 | 60955 | +| | | | | | +| **all fields (micro avg.)** | **93.59** | **92.85** | **93.22** | 429249 | +| all fields (macro avg.) | 82.31 | 90.7 | 82.17 | 429249 | + + + +#### Levenshtein Matching (Minimum Levenshtein distance at 0.8) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| authors | 93.29 | 91.87 | 92.58 | 63170 | +| date | 95.86 | 93.99 | 94.91 | 63567 | +| first_author | 95.29 | 93.8 | 94.54 | 63170 | +| inTitle | 96.58 | 95.47 | 96.02 | 63118 | +| issue | 1.99 | 75 | 3.88 | 16 | +| page | 96.26 | 95.2 | 95.72 | 53303 | +| title | 97.66 | 98.12 | 97.89 | 61950 | +| volume | 97.85 | 98.17 | 98.01 | 60955 | +| | | | | | +| **all fields (micro avg.)** | **95.97** | **95.21** | **95.59** | 429249 | +| all fields (macro avg.) | 84.35 | 92.7 | 84.19 | 429249 | + + + +#### Ratcliff/Obershelp Matching (Minimum Ratcliff/Obershelp similarity at 0.95) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| authors | 86.71 | 85.39 | 86.05 | 63170 | +| date | 95.86 | 93.99 | 94.91 | 63567 | +| first_author | 94.78 | 93.3 | 94.03 | 63170 | +| inTitle | 96.25 | 95.16 | 95.7 | 63118 | +| issue | 1.99 | 75 | 3.88 | 16 | +| page | 96.26 | 95.2 | 95.72 | 53303 | +| title | 97.5 | 97.97 | 97.74 | 61950 | +| volume | 97.85 | 98.17 | 98.01 | 60955 | +| | | | | | +| **all fields (micro avg.)** | **94.87** | **94.11** | **94.49** | 429249 | +| all fields (macro avg.) | 83.4 | 91.77 | 83.26 | 429249 | + + +#### Instance-level results + +``` +Total expected instances: 63569 +Total extracted instances: 66388 +Total correct instances: 42246 (strict) +Total correct instances: 45085 (soft) +Total correct instances: 52715 (Levenshtein) +Total correct instances: 49331 (RatcliffObershelp) + +Instance-level precision: 63.63 (strict) +Instance-level precision: 67.91 (soft) +Instance-level precision: 79.4 (Levenshtein) +Instance-level precision: 74.31 (RatcliffObershelp) + +Instance-level recall: 66.46 (strict) +Instance-level recall: 70.92 (soft) +Instance-level recall: 82.93 (Levenshtein) +Instance-level recall: 77.6 (RatcliffObershelp) + +Instance-level f-score: 65.02 (strict) +Instance-level f-score: 69.38 (soft) +Instance-level f-score: 81.13 (Levenshtein) +Instance-level f-score: 75.92 (RatcliffObershelp) + +Matching 1 : 58505 + +Matching 2 : 1012 + +Matching 3 : 1242 + +Matching 4 : 371 + +Total matches : 61130 +``` + + +#### Citation context resolution +``` + +Total expected references: 63569 - 64.67 references per article +Total predicted references: 66388 - 67.54 references per article + +Total expected citation contexts: 108880 - 110.76 citation contexts per article +Total predicted citation contexts: 99284 - 101 citation contexts per article + +Total correct predicted citation contexts: 95494 - 97.15 citation contexts per article +Total wrong predicted citation contexts: 3790 (wrong callout matching, callout missing in NLM, or matching with a bib. ref. not aligned with a bib.ref. in NLM) + +Precision citation contexts: 96.18 +Recall citation contexts: 87.71 +fscore citation contexts: 91.75 +``` + + +## Fulltext structures + +Fulltext structure contents are complicated to capture from JATS NLM files. They are often normalized and different from the actual PDF content and are can be inconsistent from one document to another. The scores of the following metrics are thus not very meaningful in absolute term, in particular for the strict matching (textual content of the srtructure can be very long). As relative values for comparing different models, they seem however useful. + + +Evaluation on 983 random PDF files out of 982 PDF (ratio 1.0). + +#### Strict Matching (exact matches) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| availability_stmt | 29.94 | 26.71 | 28.24 | 584 | +| figure_title | 0.02 | 0.01 | 0.01 | 31671 | +| funding_stmt | 4.77 | 23.8 | 7.95 | 920 | +| reference_citation | 55.46 | 55.67 | 55.56 | 108807 | +| reference_figure | 56.78 | 49.91 | 53.12 | 68786 | +| reference_table | 68.24 | 73.46 | 70.75 | 2381 | +| section_title | 85.17 | 74.17 | 79.29 | 21808 | +| table_title | 0.45 | 0.16 | 0.23 | 1924 | +| | | | | | +| **all fields (micro avg.)** | **54.74** | **47.79** | **51.03** | 236881 | +| all fields (macro avg.) | 37.6 | 37.99 | 36.89 | 236881 | + + + +#### Soft Matching (ignoring punctuation, case and space characters mismatches) + +**Field-level results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| availability_stmt | 38.96 | 34.76 | 36.74 | 584 | +| figure_title | 48.86 | 15.12 | 23.09 | 31671 | +| funding_stmt | 4.77 | 23.8 | 7.95 | 920 | +| reference_citation | 91.04 | 91.38 | 91.21 | 108807 | +| reference_figure | 57.06 | 50.16 | 53.39 | 68786 | +| reference_table | 68.32 | 73.54 | 70.83 | 2381 | +| section_title | 86.05 | 74.93 | 80.1 | 21808 | +| table_title | 80.63 | 27.91 | 41.47 | 1924 | +| | | | | | +| **all fields (micro avg.)** | **76.29** | **66.6** | **71.12** | 236881 | +| all fields (macro avg.) | 59.46 | 48.95 | 50.6 | 236881 | + + +**Document-level ratio results** + +| label | precision | recall | f1 | support | +|--- |--- |--- |--- |--- | +| availability_stmt | 96.3 | 89.21 | 92.62 | 584 | +| | | | | | +| **all fields (micro avg.)** | **96.3** | **89.21** | **92.62** | 584 | +| all fields (macro avg.) | 96.3 | 89.21 | 92.62 | 584 | + +Evaluation metrics produced in 640.707 seconds From e2af354f48bdf982fe01f477a15f4883bfb0084f Mon Sep 17 00:00:00 2001 From: lopezp Date: Fri, 30 Aug 2024 09:59:47 +0200 Subject: [PATCH 3/3] fix jvm compatibility --- build.gradle | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.gradle b/build.gradle index 8c716c69c0..7e7e403818 100644 --- a/build.gradle +++ b/build.gradle @@ -60,8 +60,8 @@ subprojects { } } -// sourceCompatibility = 1.11 -// targetCompatibility = 1.11 + //sourceCompatibility = 1.11 + //targetCompatibility = 1.11 kotlin { jvmToolchain(11)