From dfd91330b43f9e2589d10821029291eaf5365648 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20de=20Fr=C3=A9minville?= <6165084+pidefrem@users.noreply.github.com> Date: Thu, 11 Apr 2024 15:27:58 +0200 Subject: [PATCH 1/7] Introduce new env var LTP_JAR_DIR_PATH to let the user specify a custom installation of LT --- README.md | 16 +- language_tool_python/__main__.py | 4 +- language_tool_python/download_lt.py | 77 +++++-- language_tool_python/server.py | 109 ++++++--- language_tool_python/utils.py | 78 +++++-- requirements.txt | 2 + requirements_dev.txt | 3 + setup.py | 2 +- tests/test_local.bash | 15 +- tests/test_major_functionality.py | 338 ++++++++++++++++++---------- 10 files changed, 431 insertions(+), 213 deletions(-) create mode 100644 requirements_dev.txt diff --git a/README.md b/README.md index 557ccdf..0ed594d 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ ![Test with PyTest](https://github.com/jxmorris12/language_tool_python/workflows/Test%20with%20PyTest/badge.svg) -Current LanguageTool version: **5.5** +Current LanguageTool version: **6.2** This is a Python wrapper for [LanguageTool](https://languagetool.org). LanguageTool is open-source grammar tool, also known as the spellchecker for OpenOffice. This library allows you to make to detect grammar errors and spelling mistakes through a Python script or through a command-line interface. @@ -151,7 +151,7 @@ You can run LanguageTool on one host and connect to it from another. This is us ## Configuration -LanguageTool offers lots of built-in configuration options. +LanguageTool offers lots of built-in configuration options. ### Example: Enabling caching Here's an example of using the configuration options to enable caching. Some users have reported that this helps performance a lot. @@ -222,13 +222,20 @@ Searching for a specific rule to enable or disable? Curious the breadth of rules ### Customizing Download URL or Path +If LanguageTool is already installed on your system, you can defined the following environment variable: +```bash +$ export LTP_JAR_DIR_PATH = /path/to/the/language/tool/jar/files +``` + +Overwise, `language_tool_python` can download LanguageTool for you automatically. + To overwrite the host part of URL that is used to download LanguageTool-{version}.zip: ```bash $ export LTP_DOWNLOAD_HOST = [alternate URL] ``` -This can be used to downgrade to an older version, for example, or to download from a mirror. +This can be used to downgrade to an older version, for example, or to download from a mirror. And to choose the specific folder to download the server to: @@ -252,6 +259,7 @@ into where the ``language_tool_python`` package resides. As of April 2020, `language_tool_python` was forked from `language-check` and no longer supports LanguageTool versions lower than 4.0. -### Acknowledgements +### Acknowledgements + This is a fork of https://github.com/myint/language-check/ that produces more easily parsable results from the command-line. diff --git a/language_tool_python/__main__.py b/language_tool_python/__main__.py index 113f5e9..54c3d4d 100644 --- a/language_tool_python/__main__.py +++ b/language_tool_python/__main__.py @@ -8,7 +8,7 @@ from .server import LanguageTool from .utils import LanguageToolError -import pkg_resources +import pkg_resources __version__ = pkg_resources.require("language_tool_python")[0].version @@ -175,4 +175,4 @@ def main(): return status -sys.exit(main()) \ No newline at end of file +sys.exit(main()) diff --git a/language_tool_python/download_lt.py b/language_tool_python/download_lt.py index 801ded2..e128523 100755 --- a/language_tool_python/download_lt.py +++ b/language_tool_python/download_lt.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- """Download latest LanguageTool distribution.""" -import glob import logging import os import re @@ -11,11 +10,16 @@ import sys import tempfile import tqdm +from typing import Optional import zipfile from distutils.spawn import find_executable from urllib.parse import urljoin -from .utils import get_language_tool_download_path +from .utils import ( + find_existing_language_tool_downloads, + get_language_tool_download_path, + LTP_JAR_DIR_PATH_ENV_VAR +) # Create logger for this file. logging.basicConfig(format='%(message)s') @@ -56,7 +60,10 @@ def parse_java_version(version_text): (1, 8) """ - match = re.search(JAVA_VERSION_REGEX, version_text) or re.search(JAVA_VERSION_REGEX_UPDATED, version_text) + match = ( + re.search(JAVA_VERSION_REGEX, version_text) + or re.search(JAVA_VERSION_REGEX_UPDATED, version_text) + ) if not match: raise SystemExit( 'Could not parse Java version from """{}""".'.format(version_text)) @@ -64,19 +71,25 @@ def parse_java_version(version_text): major2 = int(match.group('major2')) if match.group('major2') else 0 return (major1, major2) + def confirm_java_compatibility(): """ Confirms Java major version >= 8. """ java_path = find_executable('java') if not java_path: - raise ModuleNotFoundError('No java install detected. Please install java to use language-tool-python.') + raise ModuleNotFoundError( + 'No java install detected. ' + 'Please install java to use language-tool-python.' + ) output = subprocess.check_output([java_path, '-version'], stderr=subprocess.STDOUT, universal_newlines=True) major_version, minor_version = parse_java_version(output) - # Some installs of java show the version number like `14.0.1` and others show `1.14.0.1` - # (with a leading 1). We want to support both, as long as the major version is >= 8. + # Some installs of java show the version number like `14.0.1` + # and others show `1.14.0.1` + # (with a leading 1). We want to support both, + # as long as the major version is >= 8. # (See softwareengineering.stackexchange.com/questions/175075/why-is-java-version-1-x-referred-to-as-java-x) if major_version == 1 and minor_version >= 8: return True @@ -85,6 +98,7 @@ def confirm_java_compatibility(): else: raise SystemError('Detected java {}.{}. LanguageTool requires Java >= 8.'.format(major_version, minor_version)) + def get_common_prefix(z): """Get common directory in a zip file if any.""" name_list = z.namelist() @@ -92,24 +106,29 @@ def get_common_prefix(z): return name_list[0] return None + def http_get(url, out_file, proxies=None): """ Get contents of a URL and save to a file. """ req = requests.get(url, stream=True, proxies=proxies) content_length = req.headers.get('Content-Length') total = int(content_length) if content_length is not None else None - if req.status_code == 403: # Not found on AWS + if req.status_code == 403: # Not found on AWS raise Exception('Could not find at URL {}.'.format(url)) - progress = tqdm.tqdm(unit="B", unit_scale=True, total=total, desc=f'Downloading LanguageTool {LATEST_VERSION}') + progress = tqdm.tqdm(unit="B", unit_scale=True, total=total, + desc=f'Downloading LanguageTool {LATEST_VERSION}') for chunk in req.iter_content(chunk_size=1024): - if chunk: # filter out keep-alive new chunks + if chunk: # filter out keep-alive new chunks progress.update(len(chunk)) out_file.write(chunk) progress.close() + def unzip_file(temp_file, directory_to_extract_to): """ Unzips a .zip file to folder path. """ - logger.info('Unzipping {} to {}.'.format(temp_file.name, directory_to_extract_to)) + logger.info( + 'Unzipping {} to {}.'.format(temp_file.name, directory_to_extract_to) + ) with zipfile.ZipFile(temp_file.name, 'r') as zip_ref: zip_ref.extractall(directory_to_extract_to) @@ -128,26 +147,34 @@ def download_zip(url, directory): # Tell the user the download path. logger.info('Downloaded {} to {}.'.format(url, directory)) -def download_lt(): - download_folder = get_language_tool_download_path() - assert os.path.isdir(download_folder) - old_path_list = [ - path for path in - glob.glob(os.path.join(download_folder, 'LanguageTool*')) - if os.path.isdir(path) - ] +def download_lt(language_tool_version: Optional[str] = LATEST_VERSION): confirm_java_compatibility() - version = LATEST_VERSION - filename = FILENAME.format(version=version) - language_tool_download_url = urljoin(BASE_URL, filename) - dirname, _ = os.path.splitext(filename) - extract_path = os.path.join(download_folder, dirname) - if extract_path in old_path_list: + download_folder = get_language_tool_download_path() + + # Use the env var to the jar directory if it is defined + # otherwise look in the download directory + if os.environ.get(LTP_JAR_DIR_PATH_ENV_VAR): return - download_zip(language_tool_download_url, download_folder) + # Make download path, if it doesn't exist. + os.makedirs(download_folder, exist_ok=True) + + assert os.path.isdir(download_folder) + old_path_list = find_existing_language_tool_downloads(download_folder) + + if language_tool_version: + version = language_tool_version + filename = FILENAME.format(version=version) + language_tool_download_url = urljoin(BASE_URL, filename) + dirname, _ = os.path.splitext(filename) + extract_path = os.path.join(download_folder, dirname) + + if extract_path in old_path_list: + return + download_zip(language_tool_download_url, download_folder) + if __name__ == '__main__': sys.exit(download_lt()) diff --git a/language_tool_python/server.py b/language_tool_python/server.py index e9be920..f9e28d6 100644 --- a/language_tool_python/server.py +++ b/language_tool_python/server.py @@ -17,9 +17,11 @@ from .match import Match from .utils import ( correct, - parse_url, get_locale_language, get_language_tool_directory, get_server_cmd, + parse_url, get_locale_language, + get_language_tool_directory, get_server_cmd, FAILSAFE_LANGUAGE, startupinfo, - LanguageToolError, ServerError, JavaError, PathError) + LanguageToolError, ServerError, PathError +) DEBUG_MODE = False @@ -28,9 +30,11 @@ # we can ensure they're killed on exit. RUNNING_SERVER_PROCESSES: List[subprocess.Popen] = [] + class LanguageTool: - """Main class used for checking text against different rules. - LanguageTool v2 API documentation: https://languagetool.org/http-api/swagger-ui/#!/default/post_check + """Main class used for checking text against different rules. + LanguageTool v2 API documentation: + https://languagetool.org/http-api/swagger-ui/#!/default/post_check """ _MIN_PORT = 8081 _MAX_PORT = 8999 @@ -40,11 +44,11 @@ class LanguageTool: _server: subprocess.Popen = None _consumer_thread: threading.Thread = None _PORT_RE = re.compile(r"(?:https?://.*:|port\s+)(\d+)", re.I) - - def __init__(self, language=None, motherTongue=None, - remote_server=None, newSpellings=None, - new_spellings_persist=True, - host=None, config=None): + + def __init__(self, language=None, motherTongue=None, + remote_server=None, newSpellings=None, + new_spellings_persist=True, + host=None, config=None): self._new_spellings = None self._new_spellings_persist = new_spellings_persist self._host = host or socket.gethostbyname('localhost') @@ -115,15 +119,19 @@ def motherTongue(self): checking bilingual texts. """ return self._motherTongue + @motherTongue.setter def motherTongue(self, motherTongue): - self._motherTongue = (None if motherTongue is None - else LanguageTag(motherTongue, self._get_languages())) + self._motherTongue = ( + None if motherTongue is None + else LanguageTag(motherTongue, self._get_languages()) + ) + @property def _spell_checking_categories(self): return {'TYPOS'} - def check(self, text: str) -> [Match]: + def check(self, text: str) -> List[Match]: """Match text against enabled rules.""" url = urllib.parse.urljoin(self._url, 'check') response = self._query_server(url, self._create_params(text)) @@ -151,10 +159,12 @@ def _create_params(self, text: str) -> Dict[str, str]: def correct(self, text: str) -> str: """Automatically apply suggestions to the text.""" return correct(text, self.check(text)) - + def enable_spellchecking(self): """Enable spell-checking rules.""" - self.disabled_categories.difference_update(self._spell_checking_categories) + self.disabled_categories.difference_update( + self._spell_checking_categories + ) def disable_spellchecking(self): """Disable spell-checking rules.""" @@ -163,23 +173,35 @@ def disable_spellchecking(self): @staticmethod def _get_valid_spelling_file_path() -> str: library_path = get_language_tool_directory() - spelling_file_path = os.path.join(library_path, "org/languagetool/resource/en/hunspell/spelling.txt") + spelling_file_path = os.path.join( + library_path, "org/languagetool/resource/en/hunspell/spelling.txt" + ) if not os.path.exists(spelling_file_path): - raise FileNotFoundError("Failed to find the spellings file at {}\n Please file an issue at " - "https://github.com/jxmorris12/language_tool_python/issues" - .format(spelling_file_path)) + raise FileNotFoundError( + "Failed to find the spellings file at {}\n " + "Please file an issue at " + "https://github.com/jxmorris12/language_tool_python/issues" + .format(spelling_file_path)) return spelling_file_path def _register_spellings(self, spellings): spelling_file_path = self._get_valid_spelling_file_path() - with open(spelling_file_path, "a+", encoding='utf-8') as spellings_file: - spellings_file.write("\n" + "\n".join([word for word in spellings])) + with ( + open(spelling_file_path, "a+", encoding='utf-8') + as spellings_file + ): + spellings_file.write( + "\n" + "\n".join([word for word in spellings]) + ) if DEBUG_MODE: print("Registered new spellings at {}".format(spelling_file_path)) def _unregister_spellings(self): spelling_file_path = self._get_valid_spelling_file_path() - with open(spelling_file_path, 'r+', encoding='utf-8') as spellings_file: + with ( + open(spelling_file_path, 'r+', encoding='utf-8') + as spellings_file + ): spellings_file.seek(0, os.SEEK_END) for _ in range(len(self._new_spellings)): while spellings_file.read(1) != '\n': @@ -188,7 +210,9 @@ def _unregister_spellings(self): spellings_file.seek(spellings_file.tell() + 1, os.SEEK_SET) spellings_file.truncate() if DEBUG_MODE: - print("Unregistered new spellings at {}".format(spelling_file_path)) + print( + "Unregistered new spellings at {}".format(spelling_file_path) + ) def _get_languages(self) -> set: """Get supported languages (by querying the server).""" @@ -215,12 +239,19 @@ def _query_server(self, url, params=None, num_tries=2): print('_query_server url:', url, 'params:', params) for n in range(num_tries): try: - with requests.get(url, params=params, timeout=self._TIMEOUT) as response: + with ( + requests.get(url, params=params, timeout=self._TIMEOUT) + as response + ): try: return response.json() except json.decoder.JSONDecodeError as e: if DEBUG_MODE: - print('URL {} and params {} returned invalid JSON response:'.format(url, params)) + print( + 'URL {} and params {} ' + 'returned invalid JSON response: {}' + .format(url, params, e) + ) print(response) print(response.content) raise LanguageToolError(response.content.decode()) @@ -250,9 +281,16 @@ def _start_local_server(self): try: if DEBUG_MODE: if self._port: - print('language_tool_python initializing with port:', self._port) + print( + 'language_tool_python initializing with port:', + self._port + ) if self.config: - print('language_tool_python initializing with temporary config file:', self.config.path) + print( + 'language_tool_python initializing ' + 'with temporary config file:', + self.config.path + ) server_cmd = get_server_cmd(self._port, self.config) except PathError as e: # Can't find path to LanguageTool. @@ -279,8 +317,10 @@ def _start_local_server(self): if match: port = int(match.group(1)) if port != self._port: - raise LanguageToolError('requested port {}, but got {}'.format( - self._port, port)) + raise LanguageToolError( + 'requested port {}, but got {}' + .format(self._port, port) + ) break if not match: err_msg = self._terminate_server() @@ -298,7 +338,12 @@ def _start_local_server(self): self._consumer_thread.start() else: # Couldn't start the server, so maybe there is already one running. - raise ServerError('Server running; don\'t start a server here.') + if err: + raise Exception(err) + else: + raise ServerError( + 'Server running; don\'t start a server here.' + ) def _server_is_alive(self): return self._server and self._server.poll() is None @@ -328,10 +373,14 @@ def _terminate_server(self): self._server = None return LanguageToolError_message + class LanguageToolPublicAPI(LanguageTool): """Language tool client of the official API.""" def __init__(self, *args, **kwargs): - super().__init__(*args, remote_server='https://languagetool.org/api/', **kwargs) + super().__init__( + *args, remote_server='https://languagetool.org/api/', **kwargs + ) + @atexit.register def terminate_server(): diff --git a/language_tool_python/utils.py b/language_tool_python/utils.py index cecf1b9..c9d435d 100644 --- a/language_tool_python/utils.py +++ b/language_tool_python/utils.py @@ -1,12 +1,9 @@ from typing import List, Tuple -import http.client import glob import locale import os -import re import subprocess -import sys import urllib.parse import urllib.request @@ -22,6 +19,11 @@ ] FAILSAFE_LANGUAGE = 'en' +LTP_PATH_ENV_VAR = "LTP_PATH" # LanguageTool download path + +# Directory containing the LanguageTool jar file: +LTP_JAR_DIR_PATH_ENV_VAR = "LTP_JAR_DIR_PATH" + # https://mail.python.org/pipermail/python-dev/2011-July/112551.html if os.name == 'nt': @@ -30,26 +32,32 @@ else: startupinfo = None + class LanguageToolError(Exception): - pass - + pass + + class ServerError(LanguageToolError): pass + class JavaError(LanguageToolError): pass + class PathError(LanguageToolError): pass + def parse_url(url_str): """ Parses a URL string, and adds 'http' if necessary. """ if 'http' not in url_str: - url_str = 'http://' + url_str + url_str = 'http://' + url_str return urllib.parse.urlparse(url_str).geturl() -def correct(text: str, matches: [Match]) -> str: + +def correct(text: str, matches: List[Match]) -> str: """Automatically apply suggestions to the text.""" ltext = list(text) matches = [match for match in matches if match.replacements] @@ -66,40 +74,56 @@ def correct(text: str, matches: [Match]) -> str: correct_offset += len(repl) - len(errors[n]) return ''.join(ltext) -def get_language_tool_download_path(): + +def get_language_tool_download_path() -> str: # Get download path from environment or use default. download_path = os.environ.get( - 'LTP_PATH', + LTP_PATH_ENV_VAR, os.path.join(os.path.expanduser("~"), ".cache", "language_tool_python") ) - # Make download path, if it doesn't exist. - os.makedirs(download_path, exist_ok=True) return download_path -def get_language_tool_directory(): - """Get LanguageTool directory.""" - download_folder = get_language_tool_download_path() - assert os.path.isdir(download_folder) + +def find_existing_language_tool_downloads(download_folder: str) -> List[str]: language_tool_path_list = [ path for path in glob.glob(os.path.join(download_folder, 'LanguageTool*')) if os.path.isdir(path) ] + return language_tool_path_list + + +def get_language_tool_directory() -> str: + """Get LanguageTool directory.""" + download_folder = get_language_tool_download_path() + if not os.path.isdir(download_folder): + raise NotADirectoryError( + "LanguageTool directory path is not a valid directory {}." + .format(download_folder) + ) + language_tool_path_list = find_existing_language_tool_downloads( + download_folder + ) if not len(language_tool_path_list): - raise FileNotFoundError('LanguageTool not found in {}.'.format(download_folder)) + raise FileNotFoundError( + 'LanguageTool not found in {}.'.format(download_folder) + ) + # Return the latest version found in the directory. return max(language_tool_path_list) -def get_server_cmd(port: int=None, config: LanguageToolConfig=None) -> List[str]: +def get_server_cmd( + port: int = None, config: LanguageToolConfig = None +) -> List[str]: java_path, jar_path = get_jar_info() cmd = [java_path, '-cp', jar_path, - 'org.languagetool.server.HTTPServer'] + 'org.languagetool.server.HTTPServer'] if port is not None: - cmd += ['-p', str(port)] - + cmd += ['-p', str(port)] + if config is not None: cmd += ['--config', config.path] @@ -110,10 +134,16 @@ def get_jar_info() -> Tuple[str, str]: java_path = which('java') if not java_path: raise JavaError("can't find Java") - dir_name = get_language_tool_directory() + + # Use the env var to the jar directory if it is defined + # otherwise look in the download directory + jar_dir_name = os.environ.get( + LTP_JAR_DIR_PATH_ENV_VAR, + get_language_tool_directory() + ) jar_path = None for jar_name in JAR_NAMES: - for jar_path in glob.glob(os.path.join(dir_name, jar_name)): + for jar_path in glob.glob(os.path.join(jar_dir_name, jar_name)): if os.path.isfile(jar_path): break else: @@ -122,10 +152,10 @@ def get_jar_info() -> Tuple[str, str]: break else: raise PathError("can't find languagetool-standalone in {!r}" - .format(dir_name)) + .format(jar_dir_name)) return java_path, jar_path def get_locale_language(): """Get the language code for the current locale setting.""" - return locale.getlocale()[0] or locale.getdefaultlocale()[0] \ No newline at end of file + return locale.getlocale()[0] or locale.getdefaultlocale()[0] diff --git a/requirements.txt b/requirements.txt index 5bb8c66..9dc910a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,4 @@ +pip requests tqdm +wheel diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 0000000..466a229 --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,3 @@ +pytest +pytest-cov +pytest-runner diff --git a/setup.py b/setup.py index a08ed42..c734620 100755 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name='language_tool_python', - version='2.7.3', + version='3.6', description='Checks grammar using LanguageTool.', long_description_content_type='text/markdown', long_description=long_description, diff --git a/tests/test_local.bash b/tests/test_local.bash index b5d5e0d..2231d1b 100755 --- a/tests/test_local.bash +++ b/tests/test_local.bash @@ -4,20 +4,23 @@ set -ex -trap "echo -e '\x1b[01;31mFailed\x1b[0m'" ERR +failed_message='\x1b[01;31mFailed\x1b[0m' +trap "echo -e ${failed_message}" ERR exit_status=0 echo 'This is okay.' | python -m language_tool_python - || exit_status=1 echo 'This is noot okay.' | python -m language_tool_python - && exit_status=1 -echo 'This is okay.' | python -m language_tool_python - || exit_status=1 -echo 'This is noot okay.' | python -m language_tool_python - && exit_status=1 +echo 'This is okay.' | python -m language_tool_python --enabled-only \ + --enable=MORFOLOGIK_RULE_EN_US - || exit_status=1 +echo 'This is noot okay.' | python -m language_tool_python --enabled-only \ + --enable=MORFOLOGIK_RULE_EN_US - && exit_status=1 echo 'These are “smart” quotes.' | python -m language_tool_python - || exit_status=1 -echo 'These are "dumb" quotes.' | python -m language_tool_python - && exit_status=1 +echo 'These are "dumb" quotes.' | python -m language_tool_python - || exit_status=1 echo 'These are "dumb" quotes.' | python -m language_tool_python --enabled-only \ - --enable=EN_QUOTES - && exit_status=1 + --enable=EN_QUOTES - || exit_status=1 echo 'These are "dumb" quotes.' | python -m language_tool_python --enabled-only \ --enable=EN_UNPAIRED_BRACKETS - || exit_status=1 @@ -25,5 +28,7 @@ echo '# These are "dumb".' | python -m language_tool_python --ignore-lines='^#' if [[ "$exit_status" == 0 ]]; then echo -e '\x1b[01;32mOkay\x1b[0m' +else + echo -e $failed_message fi exit "$exit_status" diff --git a/tests/test_major_functionality.py b/tests/test_major_functionality.py index e40a87e..20f4a65 100644 --- a/tests/test_major_functionality.py +++ b/tests/test_major_functionality.py @@ -1,4 +1,5 @@ import re +import subprocess import time import pytest @@ -7,159 +8,252 @@ def test_langtool_load(): - import language_tool_python - tool = language_tool_python.LanguageTool("en-US") - matches = tool.check('ain\'t nothin but a thang') - assert str(matches) == """[Match({'ruleId': 'UPPERCASE_SENTENCE_START', 'message': 'This sentence does not start with an uppercase letter.', 'replacements': ['Ai'], 'offsetInContext': 0, 'context': "ain't nothin but a thang", 'offset': 0, 'errorLength': 2, 'category': 'CASING', 'ruleIssueType': 'typographical', 'sentence': "ain't nothin but a thang"}), Match({'ruleId': 'MORFOLOGIK_RULE_EN_US', 'message': 'Possible spelling mistake found.', 'replacements': ['nothing', 'no thin'], 'offsetInContext': 6, 'context': "ain't nothin but a thang", 'offset': 6, 'errorLength': 6, 'category': 'TYPOS', 'ruleIssueType': 'misspelling', 'sentence': "ain't nothin but a thang"}), Match({'ruleId': 'MORFOLOGIK_RULE_EN_US', 'message': 'Possible spelling mistake found.', 'replacements': ['than', 'thing', 'hang', 'thank', 'Chang', 'tang', 'thong', 'twang', 'Thant', 'thane', 'Thanh', 'Jhang', 'Shang', 'Zhang'], 'offsetInContext': 19, 'context': "ain't nothin but a thang", 'offset': 19, 'errorLength': 5, 'category': 'TYPOS', 'ruleIssueType': 'misspelling', 'sentence': "ain't nothin but a thang"})]""" - tool.close() + import language_tool_python + tool = language_tool_python.LanguageTool("en-US") + matches = tool.check('ain\'t nothin but a thang') + + expected_matches = [ + { + 'ruleId': 'UPPERCASE_SENTENCE_START', + 'message': 'This sentence does not start with an uppercase letter.', + 'replacements': ['Ai'], + 'offsetInContext': 0, + 'context': "ain't nothin but a thang", + 'offset': 0, 'errorLength': 2, + 'category': 'CASING', 'ruleIssueType': 'typographical', + 'sentence': "ain't nothin but a thang" + }, + { + 'ruleId': 'MORFOLOGIK_RULE_EN_US', + 'message': 'Possible spelling mistake found.', + 'replacements': ['nothing', 'no thin'], + 'offsetInContext': 6, + 'context': "ain't nothin but a thang", + 'offset': 6, 'errorLength': 6, + 'category': 'TYPOS', 'ruleIssueType': 'misspelling', + 'sentence': "ain't nothin but a thang" + }, + { + 'ruleId': 'MORFOLOGIK_RULE_EN_US', + 'message': 'Possible spelling mistake found.', + 'replacements': [ + 'than', 'thing', 'hang', 'thank', 'Chang', 'tang', 'thong', + 'twang', 'Thant', 'thane', 'Thanh', 'Jhang', 'Shang', 'Zhang' + ], + 'offsetInContext': 19, + 'context': "ain't nothin but a thang", + 'offset': 19, 'errorLength': 5, + 'category': 'TYPOS', 'ruleIssueType': 'misspelling', + 'sentence': "ain't nothin but a thang" + } + ] + + assert len(matches) == len(expected_matches) + for match_i, match in enumerate(matches): + assert isinstance(match, language_tool_python.Match) + for key in [ + 'ruleId', 'message', 'offsetInContext', + 'context', 'offset', 'errorLength', 'category', 'ruleIssueType', + 'sentence' + ]: + assert expected_matches[match_i][key] == getattr(match, key) + + # For replacements we allow some flexibility in the order + # of the suggestions depending on the version of LT. + for key in [ + 'replacements', + ]: + assert ( + set(expected_matches[match_i][key]) == set(getattr(match, key)) + ) + + tool.close() def test_process_starts_and_stops_in_context_manager(): - import language_tool_python - with language_tool_python.LanguageTool("en-US") as tool: - proc: subprocess.Popen = tool._server - # Make sure process is running before killing language tool object. - assert proc.poll() is None, "tool._server not running after creation" - # Make sure process stopped after close() was called. - assert proc.poll() is not None, "tool._server should stop running after deletion" + import language_tool_python + with language_tool_python.LanguageTool("en-US") as tool: + proc: subprocess.Popen = tool._server + # Make sure process is running before killing language tool object. + assert proc.poll() is None, "tool._server not running after creation" + # Make sure process stopped after close() was called. + assert proc.poll() is not None, "tool._server should stop running after deletion" + def test_process_starts_and_stops_on_close(): - import language_tool_python - tool = language_tool_python.LanguageTool("en-US") - proc: subprocess.Popen = tool._server - # Make sure process is running before killing language tool object. - assert proc.poll() is None, "tool._server not running after creation" - tool.close() # Explicitly close() object so process stops before garbage collection. - del tool - # Make sure process stopped after close() was called. - assert proc.poll() is not None, "tool._server should stop running after deletion" - # remember --> if poll is None: # p.subprocess is alive - + import language_tool_python + tool = language_tool_python.LanguageTool("en-US") + proc: subprocess.Popen = tool._server + # Make sure process is running before killing language tool object. + assert proc.poll() is None, "tool._server not running after creation" + tool.close() # Explicitly close() object so process stops before garbage collection. + del tool + # Make sure process stopped after close() was called. + assert proc.poll() is not None, "tool._server should stop running after deletion" + # remember --> if poll is None: # p.subprocess is alive + + def test_local_client_server_connection(): - import language_tool_python - tool1 = language_tool_python.LanguageTool('en-US', host='0.0.0.0') - url = 'http://{}:{}/'.format(tool1._host, tool1._port) - tool2 = language_tool_python.LanguageTool('en-US', remote_server=url) - assert len(tool2.check('helo darknes my old frend')) - tool1.close() - tool2.close() + import language_tool_python + tool1 = language_tool_python.LanguageTool('en-US', host='0.0.0.0') + url = 'http://{}:{}/'.format(tool1._host, tool1._port) + tool2 = language_tool_python.LanguageTool('en-US', remote_server=url) + assert len(tool2.check('helo darknes my old frend')) + tool1.close() + tool2.close() + def test_config_text_length(): - import language_tool_python - tool = language_tool_python.LanguageTool('en-US', config={ 'maxTextLength': 12 }) - # With this config file, checking text with >12 characters should raise an error. - error_msg = re.escape("Error: Your text exceeds the limit of 12 characters (it's 27 characters). Please submit a shorter text.") - with pytest.raises(LanguageToolError, match=error_msg): - tool.check('Hello darkness my old frend') - # But checking shorter text should work fine. - # (should have 1 match for this one) - assert len(tool.check('Hello darkne')) - tool.close() + import language_tool_python + tool = language_tool_python.LanguageTool('en-US', config={'maxTextLength': 12 }) + # With this config file, checking text with >12 characters should raise an error. + error_msg = re.escape("Error: Your text exceeds the limit of 12 characters (it's 27 characters). Please submit a shorter text.") + with pytest.raises(LanguageToolError, match=error_msg): + tool.check('Hello darkness my old frend') + # But checking shorter text should work fine. + # (should have 1 match for this one) + assert len(tool.check('Hello darkne')) + tool.close() + def test_config_caching(): - import language_tool_python - tool = language_tool_python.LanguageTool('en-US', config={ 'cacheSize': 1000, 'pipelineCaching': True }) - s = 'hello darkness my old frend' - t1 = time.time() - tool.check(s) - t2 = time.time() - tool.check(s) - t3 = time.time() - - print(t3 - t2, t2 - t1) - # This is a silly test that says: caching should speed up a grammary-checking by a factor - # of speed_factor when checking the same sentence twice. It theoretically could be very flaky. - # But in practice I've observed speedup of around 250x (6.76s to 0.028s). - speedup_factor = 10.0 - assert (t2 - t1) / speedup_factor > (t3 - t2) - tool.close() + import language_tool_python + tool = language_tool_python.LanguageTool('en-US', config={'cacheSize': 1000, 'pipelineCaching': True}) + s = 'hello darkness my old frend' + t1 = time.time() + tool.check(s) + t2 = time.time() + tool.check(s) + t3 = time.time() + + print(t3 - t2, t2 - t1) + # This is a silly test that says: caching should speed up a grammary-checking by a factor + # of speed_factor when checking the same sentence twice. It theoretically could be very flaky. + # But in practice I've observed speedup of around 250x (6.76s to 0.028s). + speedup_factor = 10.0 + assert (t2 - t1) / speedup_factor > (t3 - t2) + tool.close() + def test_langtool_languages(): - import language_tool_python - tool = language_tool_python.LanguageTool("en-US") - assert tool._get_languages() == {'es-AR', 'ast-ES', 'fa', 'ar', 'ja', 'pl', 'en-ZA', 'sl', 'be-BY', 'gl', 'de-DE-x-simple-language-DE', 'ga', 'da-DK', 'ca-ES-valencia', 'eo', 'pt-PT', 'ro', 'fr-FR', 'sv-SE', 'br-FR', 'es-ES', 'be', 'de-CH', 'pl-PL', 'it-IT', 'de-DE-x-simple-language', 'en-NZ', 'sv', 'auto', 'km', 'pt', 'da', 'ta-IN', 'de', 'fa-IR', 'ca', 'de-AT', 'de-DE', 'sk', 'ta', 'uk', 'en-US', 'zh', 'uk-UA', 'pt-AO', 'el-GR', 'br', 'ca-ES-balear', 'fr', 'sk-SK', 'pt-BR', 'ro-RO', 'it', 'es', 'ru-RU', 'km-KH', 'en-GB', 'sl-SI', 'gl-ES', 'pt-MZ', 'nl', 'el', 'ca-ES', 'zh-CN', 'de-LU', 'nl-NL', 'ja-JP', 'ast', 'tl', 'ga-IE', 'en-AU', 'en', 'ru', 'nl-BE', 'en-CA', 'tl-PH'} - tool.close() + import language_tool_python + tool = language_tool_python.LanguageTool("en-US") + assert tool._get_languages().issuperset( + { + 'es-AR', 'ast-ES', 'fa', 'ar', 'ja', 'pl', 'en-ZA', 'sl', 'be-BY', + 'gl', 'de-DE-x-simple-language-DE', 'ga', 'da-DK', + 'ca-ES-valencia', 'eo', 'pt-PT', 'ro', 'fr-FR', 'sv-SE', 'br-FR', + 'es-ES', 'be', 'de-CH', 'pl-PL', 'it-IT', + 'de-DE-x-simple-language', 'en-NZ', 'sv', 'auto', 'km', 'pt', + 'da', 'ta-IN', 'de', 'fa-IR', 'ca', 'de-AT', 'de-DE', 'sk', 'ta', + 'uk', 'en-US', 'zh', 'uk-UA', 'pt-AO', 'el-GR', 'br', + 'ca-ES-balear', 'fr', 'sk-SK', 'pt-BR', 'ro-RO', 'it', 'es', + 'ru-RU', 'km-KH', 'en-GB', 'sl-SI', 'gl-ES', 'pt-MZ', 'nl', 'el', + 'ca-ES', 'zh-CN', 'de-LU', 'nl-NL', 'ja-JP', 'ast', 'tl', 'ga-IE', + 'en-AU', 'en', 'ru', 'nl-BE', 'en-CA', 'tl-PH' + } + ) + tool.close() + def test_match(): - import language_tool_python - tool = language_tool_python.LanguageTool('en-US') - text = u'A sentence with a error in the Hitchhiker’s Guide tot he Galaxy' - matches = tool.check(text) - assert len(matches) == 2 - assert str(matches[0]) == 'Offset 16, length 1, Rule ID: EN_A_VS_AN\nMessage: Use “an” instead of ‘a’ if the following word starts with a vowel sound, e.g. ‘an article’, ‘an hour’.\nSuggestion: an\nA sentence with a error in the Hitchhiker’s Guide tot he ...\n ^' - tool.close() + import language_tool_python + tool = language_tool_python.LanguageTool('en-US') + text = u'A sentence with a error in the Hitchhiker’s Guide tot he Galaxy' + matches = tool.check(text) + assert len(matches) == 2 + assert str(matches[0]) == ( + 'Offset 16, length 1, Rule ID: EN_A_VS_AN\n' + 'Message: Use “an” instead of ‘a’ if the following word starts with a vowel sound, e.g. ‘an article’, ‘an hour’.\n' + 'Suggestion: an\n' + 'A sentence with a error in the Hitchhiker’s Guide tot he ...' + '\n ^' + ) + tool.close() + def test_uk_typo(): - import language_tool_python - tool = language_tool_python.LanguageTool("en-UK") + import language_tool_python + tool = language_tool_python.LanguageTool("en-UK") + + sentence1 = "If you think this sentence is fine then, your wrong." + results1 = tool.check(sentence1) + assert len(results1) == 1 + assert language_tool_python.utils.correct(sentence1, results1) == "If you think this sentence is fine then, you're wrong." - sentence1 = "If you think this sentence is fine then, your wrong." - results1 = tool.check(sentence1) - assert len(results1) == 1 - assert language_tool_python.utils.correct(sentence1, results1) == "If you think this sentence is fine then, you're wrong." + results2 = tool.check("You're mum is called Emily, is that right?") + assert len(results2) == 0 + tool.close() - results2 = tool.check("You're mum is called Emily, is that right?") - assert len(results2) == 0 - tool.close() def test_remote_es(): - import language_tool_python - tool = language_tool_python.LanguageToolPublicAPI('es') - es_text = 'Escriba un texto aquí. LanguageTool le ayudará a afrentar algunas dificultades propias de la escritura. Se a hecho un esfuerzo para detectar errores tipográficos, ortograficos y incluso gramaticales. También algunos errores de estilo, a grosso modo.' - matches = tool.check(es_text) - assert str(matches) == """[Match({'ruleId': 'AFRENTAR_DIFICULTADES', 'message': 'Confusión entre «afrontar» y «afrentar».', 'replacements': ['afrontar'], 'offsetInContext': 43, 'context': '...n texto aquí. LanguageTool le ayudará a afrentar algunas dificultades propias de la escr...', 'offset': 49, 'errorLength': 8, 'category': 'INCORRECT_EXPRESSIONS', 'ruleIssueType': 'grammar', 'sentence': 'LanguageTool le ayudará a afrentar algunas dificultades propias de la escritura.'}), Match({'ruleId': 'PRON_HABER_PARTICIPIO', 'message': 'El v. ‘haber’ se escribe con hache.', 'replacements': ['ha'], 'offsetInContext': 43, 'context': '...ificultades propias de la escritura. Se a hecho un esfuerzo para detectar errores...', 'offset': 107, 'errorLength': 1, 'category': 'MISSPELLING', 'ruleIssueType': 'misspelling', 'sentence': 'Se a hecho un esfuerzo para detectar errores tipográficos, ortograficos y incluso gramaticales.'}), Match({'ruleId': 'MORFOLOGIK_RULE_ES', 'message': 'Se ha encontrado un posible error ortográfico.', 'replacements': ['ortográficos', 'ortográficas', 'ortográfico', 'orográficos', 'ortografiaos', 'ortografíeos'], 'offsetInContext': 43, 'context': '...rzo para detectar errores tipográficos, ortograficos y incluso gramaticales. También algunos...', 'offset': 163, 'errorLength': 12, 'category': 'TYPOS', 'ruleIssueType': 'misspelling', 'sentence': 'Se a hecho un esfuerzo para detectar errores tipográficos, ortograficos y incluso gramaticales.'}), Match({'ruleId': 'Y_E_O_U', 'message': 'Cuando precede a palabras que comienzan por ‘i’, la conjunción ‘y’ se transforma en ‘e’.', 'replacements': ['e'], 'offsetInContext': 43, 'context': '...ctar errores tipográficos, ortograficos y incluso gramaticales. También algunos e...', 'offset': 176, 'errorLength': 1, 'category': 'GRAMMAR', 'ruleIssueType': 'grammar', 'sentence': 'Se a hecho un esfuerzo para detectar errores tipográficos, ortograficos y incluso gramaticales.'}), Match({'ruleId': 'GROSSO_MODO', 'message': 'Esta expresión latina se usa sin preposición.', 'replacements': ['grosso modo'], 'offsetInContext': 43, 'context': '...les. También algunos errores de estilo, a grosso modo.', 'offset': 235, 'errorLength': 13, 'category': 'GRAMMAR', 'ruleIssueType': 'grammar', 'sentence': 'También algunos errores de estilo, a grosso modo.'})]""" - tool.close() + import language_tool_python + tool = language_tool_python.LanguageToolPublicAPI('es') + es_text = 'Escriba un texto aquí. LanguageTool le ayudará a afrentar algunas dificultades propias de la escritura. Se a hecho un esfuerzo para detectar errores tipográficos, ortograficos y incluso gramaticales. También algunos errores de estilo, a grosso modo.' + matches = tool.check(es_text) + assert str(matches) == """[Match({'ruleId': 'AFRENTAR_DIFICULTADES', 'message': 'Confusión entre «afrontar» y «afrentar».', 'replacements': ['afrontar'], 'offsetInContext': 43, 'context': '...n texto aquí. LanguageTool le ayudará a afrentar algunas dificultades propias de la escr...', 'offset': 49, 'errorLength': 8, 'category': 'INCORRECT_EXPRESSIONS', 'ruleIssueType': 'grammar', 'sentence': 'LanguageTool le ayudará a afrentar algunas dificultades propias de la escritura.'}), Match({'ruleId': 'PRON_HABER_PARTICIPIO', 'message': 'El v. ‘haber’ se escribe con hache.', 'replacements': ['ha'], 'offsetInContext': 43, 'context': '...ificultades propias de la escritura. Se a hecho un esfuerzo para detectar errores...', 'offset': 107, 'errorLength': 1, 'category': 'MISSPELLING', 'ruleIssueType': 'misspelling', 'sentence': 'Se a hecho un esfuerzo para detectar errores tipográficos, ortograficos y incluso gramaticales.'}), Match({'ruleId': 'MORFOLOGIK_RULE_ES', 'message': 'Se ha encontrado un posible error ortográfico.', 'replacements': ['ortográficos', 'ortográficas', 'ortográfico', 'orográficos', 'ortografiaos', 'ortografíeos'], 'offsetInContext': 43, 'context': '...rzo para detectar errores tipográficos, ortograficos y incluso gramaticales. También algunos...', 'offset': 163, 'errorLength': 12, 'category': 'TYPOS', 'ruleIssueType': 'misspelling', 'sentence': 'Se a hecho un esfuerzo para detectar errores tipográficos, ortograficos y incluso gramaticales.'}), Match({'ruleId': 'Y_E_O_U', 'message': 'Cuando precede a palabras que comienzan por ‘i’, la conjunción ‘y’ se transforma en ‘e’.', 'replacements': ['e'], 'offsetInContext': 43, 'context': '...ctar errores tipográficos, ortograficos y incluso gramaticales. También algunos e...', 'offset': 176, 'errorLength': 1, 'category': 'GRAMMAR', 'ruleIssueType': 'grammar', 'sentence': 'Se a hecho un esfuerzo para detectar errores tipográficos, ortograficos y incluso gramaticales.'}), Match({'ruleId': 'GROSSO_MODO', 'message': 'Esta expresión latina se usa sin preposición.', 'replacements': ['grosso modo'], 'offsetInContext': 43, 'context': '...les. También algunos errores de estilo, a grosso modo.', 'offset': 235, 'errorLength': 13, 'category': 'GRAMMAR', 'ruleIssueType': 'grammar', 'sentence': 'También algunos errores de estilo, a grosso modo.'})]""" + tool.close() + def test_correct_en_us(): - import language_tool_python - tool = language_tool_python.LanguageTool('en-US') + import language_tool_python + tool = language_tool_python.LanguageTool('en-US') + + matches = tool.check('cz of this brand is awsome,,i love this brand very much') + assert len(matches) == 4 - matches = tool.check('cz of this brand is awsome,,i love this brand very much') - assert len(matches) == 4 + assert tool.correct('cz of this brand is awsome,,i love this brand very much') == 'Cz of this brand is awesome,I love this brand very much' + tool.close() - assert tool.correct('cz of this brand is awsome,,i love this brand very much') == 'Cz of this brand is awesome,I love this brand very much' - tool.close() def test_spellcheck_en_gb(): - import language_tool_python + import language_tool_python + + s = 'Wat is wrong with the spll chker' - s = 'Wat is wrong with the spll chker' + # Correct a sentence with spell-checking + tool = language_tool_python.LanguageTool('en-GB') + assert tool.correct(s) == "Was is wrong with the sell cheer" - # Correct a sentence with spell-checking - tool = language_tool_python.LanguageTool('en-GB') - assert tool.correct(s) == "Was is wrong with the sell cheer" + # Correct a sentence without spell-checking + tool.disable_spellchecking() + assert tool.correct(s) == "Wat is wrong with the spll chker" + tool.close() - # Correct a sentence without spell-checking - tool.disable_spellchecking() - assert tool.correct(s) == "Wat is wrong with the spll chker" - tool.close() def test_session_only_new_spellings(): - import os - import hashlib - import language_tool_python - library_path = language_tool_python.utils.get_language_tool_directory() - spelling_file_path = os.path.join(library_path, "org/languagetool/resource/en/hunspell/spelling.txt") - with open(spelling_file_path, 'r') as spelling_file: - initial_spelling_file_contents = spelling_file.read() - initial_checksum = hashlib.sha256(initial_spelling_file_contents.encode()) - - new_spellings = ["word1", "word2", "word3"] - with language_tool_python.LanguageTool('en-US', newSpellings=new_spellings, new_spellings_persist=False) as tool: - tool.enabled_rules_only = True - tool.enabled_rules = {'MORFOLOGIK_RULE_EN_US'} - matches = tool.check(" ".join(new_spellings)) - - with open(spelling_file_path, 'r') as spelling_file: - subsequent_spelling_file_contents = spelling_file.read() - subsequent_checksum = hashlib.sha256(subsequent_spelling_file_contents.encode()) - - if initial_checksum != subsequent_checksum: - with open(spelling_file_path, 'w') as spelling_file: - spelling_file.write(initial_spelling_file_contents) - - assert not matches - assert initial_checksum.hexdigest() == subsequent_checksum.hexdigest() + import os + import hashlib + import language_tool_python + library_path = language_tool_python.utils.get_language_tool_directory() + spelling_file_path = os.path.join( + library_path, "org/languagetool/resource/en/hunspell/spelling.txt" + ) + with open(spelling_file_path, 'r') as spelling_file: + initial_spelling_file_contents = spelling_file.read() + initial_checksum = hashlib.sha256(initial_spelling_file_contents.encode()) + + new_spellings = ["word1", "word2", "word3"] + with language_tool_python.LanguageTool( + 'en-US', newSpellings=new_spellings, new_spellings_persist=False + ) as tool: + tool.enabled_rules_only = True + tool.enabled_rules = {'MORFOLOGIK_RULE_EN_US'} + matches = tool.check(" ".join(new_spellings)) + + with open(spelling_file_path, 'r') as spelling_file: + subsequent_spelling_file_contents = spelling_file.read() + subsequent_checksum = hashlib.sha256( + subsequent_spelling_file_contents.encode() + ) + + if initial_checksum != subsequent_checksum: + with open(spelling_file_path, 'w') as spelling_file: + spelling_file.write(initial_spelling_file_contents) + + assert not matches + assert initial_checksum.hexdigest() == subsequent_checksum.hexdigest() + def test_debug_mode(): from language_tool_python.server import DEBUG_MODE From a26dc68a269c4f9af6a4f7167b5212438d93c2f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20de=20Fr=C3=A9minville?= <6165084+pidefrem@users.noreply.github.com> Date: Thu, 11 Apr 2024 15:44:10 +0200 Subject: [PATCH 2/7] Allow changing the languagetool download version --- language_tool_python/download_lt.py | 6 +++--- language_tool_python/server.py | 16 ++++++++++------ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/language_tool_python/download_lt.py b/language_tool_python/download_lt.py index e128523..06b0fc6 100755 --- a/language_tool_python/download_lt.py +++ b/language_tool_python/download_lt.py @@ -31,7 +31,7 @@ BASE_URL = os.environ.get('LTP_DOWNLOAD_HOST', 'https://www.languagetool.org/download/') FILENAME = 'LanguageTool-{version}.zip' -LATEST_VERSION = '6.2' +LT_DOWNLOAD_VERSION = '6.2' JAVA_VERSION_REGEX = re.compile( r'^(?:java|openjdk) version "(?P\d+)(|\.(?P\d+)\.[^"]+)"', @@ -116,7 +116,7 @@ def http_get(url, out_file, proxies=None): if req.status_code == 403: # Not found on AWS raise Exception('Could not find at URL {}.'.format(url)) progress = tqdm.tqdm(unit="B", unit_scale=True, total=total, - desc=f'Downloading LanguageTool {LATEST_VERSION}') + desc=f'Downloading LanguageTool {LT_DOWNLOAD_VERSION}') for chunk in req.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks progress.update(len(chunk)) @@ -148,7 +148,7 @@ def download_zip(url, directory): logger.info('Downloaded {} to {}.'.format(url, directory)) -def download_lt(language_tool_version: Optional[str] = LATEST_VERSION): +def download_lt(language_tool_version: Optional[str] = LT_DOWNLOAD_VERSION): confirm_java_compatibility() download_folder = get_language_tool_download_path() diff --git a/language_tool_python/server.py b/language_tool_python/server.py index f9e28d6..110a0b4 100644 --- a/language_tool_python/server.py +++ b/language_tool_python/server.py @@ -12,7 +12,7 @@ import urllib.parse from .config_file import LanguageToolConfig -from .download_lt import download_lt +from .download_lt import download_lt, LT_DOWNLOAD_VERSION from .language_tag import LanguageTag from .match import Match from .utils import ( @@ -45,10 +45,14 @@ class LanguageTool: _consumer_thread: threading.Thread = None _PORT_RE = re.compile(r"(?:https?://.*:|port\s+)(\d+)", re.I) - def __init__(self, language=None, motherTongue=None, - remote_server=None, newSpellings=None, - new_spellings_persist=True, - host=None, config=None): + def __init__( + self, language=None, motherTongue=None, + remote_server=None, newSpellings=None, + new_spellings_persist=True, + host=None, config=None, + language_tool_download_version: str = LT_DOWNLOAD_VERSION + ): + self.language_tool_download_version = language_tool_download_version self._new_spellings = None self._new_spellings_persist = new_spellings_persist self._host = host or socket.gethostbyname('localhost') @@ -276,7 +280,7 @@ def _start_server_on_free_port(self): def _start_local_server(self): # Before starting local server, download language tool if needed. - download_lt() + download_lt(self.language_tool_download_version) err = None try: if DEBUG_MODE: From 4bbd80ab16ab718cfed8acfcf9330a3f7251fb5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20de=20Fr=C3=A9minville?= <6165084+pidefrem@users.noreply.github.com> Date: Thu, 11 Apr 2024 15:48:09 +0200 Subject: [PATCH 3/7] Add download version for LTP --- language_tool_python/download_lt.py | 6 +++--- language_tool_python/server.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/language_tool_python/download_lt.py b/language_tool_python/download_lt.py index 06b0fc6..f070109 100755 --- a/language_tool_python/download_lt.py +++ b/language_tool_python/download_lt.py @@ -31,7 +31,7 @@ BASE_URL = os.environ.get('LTP_DOWNLOAD_HOST', 'https://www.languagetool.org/download/') FILENAME = 'LanguageTool-{version}.zip' -LT_DOWNLOAD_VERSION = '6.2' +LTP_DOWNLOAD_VERSION = '6.2' JAVA_VERSION_REGEX = re.compile( r'^(?:java|openjdk) version "(?P\d+)(|\.(?P\d+)\.[^"]+)"', @@ -116,7 +116,7 @@ def http_get(url, out_file, proxies=None): if req.status_code == 403: # Not found on AWS raise Exception('Could not find at URL {}.'.format(url)) progress = tqdm.tqdm(unit="B", unit_scale=True, total=total, - desc=f'Downloading LanguageTool {LT_DOWNLOAD_VERSION}') + desc=f'Downloading LanguageTool {LTP_DOWNLOAD_VERSION}') for chunk in req.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks progress.update(len(chunk)) @@ -148,7 +148,7 @@ def download_zip(url, directory): logger.info('Downloaded {} to {}.'.format(url, directory)) -def download_lt(language_tool_version: Optional[str] = LT_DOWNLOAD_VERSION): +def download_lt(language_tool_version: Optional[str] = LTP_DOWNLOAD_VERSION): confirm_java_compatibility() download_folder = get_language_tool_download_path() diff --git a/language_tool_python/server.py b/language_tool_python/server.py index 110a0b4..bf52abb 100644 --- a/language_tool_python/server.py +++ b/language_tool_python/server.py @@ -12,7 +12,7 @@ import urllib.parse from .config_file import LanguageToolConfig -from .download_lt import download_lt, LT_DOWNLOAD_VERSION +from .download_lt import download_lt, LTP_DOWNLOAD_VERSION from .language_tag import LanguageTag from .match import Match from .utils import ( @@ -50,7 +50,7 @@ def __init__( remote_server=None, newSpellings=None, new_spellings_persist=True, host=None, config=None, - language_tool_download_version: str = LT_DOWNLOAD_VERSION + language_tool_download_version: str = LTP_DOWNLOAD_VERSION ): self.language_tool_download_version = language_tool_download_version self._new_spellings = None From 17f13519e84ff774f861815a678363d22fb69ed1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20de=20Fr=C3=A9minville?= <6165084+pidefrem@users.noreply.github.com> Date: Thu, 11 Apr 2024 15:55:46 +0200 Subject: [PATCH 4/7] Update version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c734620..f3feb4a 100755 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name='language_tool_python', - version='3.6', + version='2.8', description='Checks grammar using LanguageTool.', long_description_content_type='text/markdown', long_description=long_description, From 65881e52cf79235ec2aa5ed1cc0ee313d5573477 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20de=20Fr=C3=A9minville?= <6165084+pidefrem@users.noreply.github.com> Date: Thu, 11 Apr 2024 16:25:35 +0200 Subject: [PATCH 5/7] Update LTP to 6.4 --- README.md | 2 +- language_tool_python/download_lt.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0ed594d..ef992c6 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ ![Test with PyTest](https://github.com/jxmorris12/language_tool_python/workflows/Test%20with%20PyTest/badge.svg) -Current LanguageTool version: **6.2** +Current LanguageTool version: **6.4** This is a Python wrapper for [LanguageTool](https://languagetool.org). LanguageTool is open-source grammar tool, also known as the spellchecker for OpenOffice. This library allows you to make to detect grammar errors and spelling mistakes through a Python script or through a command-line interface. diff --git a/language_tool_python/download_lt.py b/language_tool_python/download_lt.py index f070109..4b0e077 100755 --- a/language_tool_python/download_lt.py +++ b/language_tool_python/download_lt.py @@ -31,7 +31,7 @@ BASE_URL = os.environ.get('LTP_DOWNLOAD_HOST', 'https://www.languagetool.org/download/') FILENAME = 'LanguageTool-{version}.zip' -LTP_DOWNLOAD_VERSION = '6.2' +LTP_DOWNLOAD_VERSION = '6.4' JAVA_VERSION_REGEX = re.compile( r'^(?:java|openjdk) version "(?P\d+)(|\.(?P\d+)\.[^"]+)"', From d0ad13fb16c19a04ad744e1fcda3aa33a7b48a7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20de=20Fr=C3=A9minville?= <6165084+pidefrem@users.noreply.github.com> Date: Thu, 11 Apr 2024 17:41:47 +0200 Subject: [PATCH 6/7] Update server.py --- language_tool_python/server.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/language_tool_python/server.py b/language_tool_python/server.py index bf52abb..77da7cc 100644 --- a/language_tool_python/server.py +++ b/language_tool_python/server.py @@ -190,10 +190,7 @@ def _get_valid_spelling_file_path() -> str: def _register_spellings(self, spellings): spelling_file_path = self._get_valid_spelling_file_path() - with ( - open(spelling_file_path, "a+", encoding='utf-8') - as spellings_file - ): + with open(spelling_file_path, "a+", encoding='utf-8') as spellings_file: spellings_file.write( "\n" + "\n".join([word for word in spellings]) ) @@ -204,8 +201,7 @@ def _unregister_spellings(self): spelling_file_path = self._get_valid_spelling_file_path() with ( open(spelling_file_path, 'r+', encoding='utf-8') - as spellings_file - ): + ) as spellings_file:: spellings_file.seek(0, os.SEEK_END) for _ in range(len(self._new_spellings)): while spellings_file.read(1) != '\n': @@ -245,8 +241,7 @@ def _query_server(self, url, params=None, num_tries=2): try: with ( requests.get(url, params=params, timeout=self._TIMEOUT) - as response - ): + ) as response: try: return response.json() except json.decoder.JSONDecodeError as e: From c2fcc7fbe22caef4d4c2b229b392ef2ce574038e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20de=20Fr=C3=A9minville?= <6165084+pidefrem@users.noreply.github.com> Date: Thu, 11 Apr 2024 21:47:06 +0200 Subject: [PATCH 7/7] Fix typo in server.py --- language_tool_python/server.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/language_tool_python/server.py b/language_tool_python/server.py index 77da7cc..46237e1 100644 --- a/language_tool_python/server.py +++ b/language_tool_python/server.py @@ -190,7 +190,9 @@ def _get_valid_spelling_file_path() -> str: def _register_spellings(self, spellings): spelling_file_path = self._get_valid_spelling_file_path() - with open(spelling_file_path, "a+", encoding='utf-8') as spellings_file: + with ( + open(spelling_file_path, "a+", encoding='utf-8') + ) as spellings_file: spellings_file.write( "\n" + "\n".join([word for word in spellings]) ) @@ -201,7 +203,7 @@ def _unregister_spellings(self): spelling_file_path = self._get_valid_spelling_file_path() with ( open(spelling_file_path, 'r+', encoding='utf-8') - ) as spellings_file:: + ) as spellings_file: spellings_file.seek(0, os.SEEK_END) for _ in range(len(self._new_spellings)): while spellings_file.read(1) != '\n':