Skip to content

Commit

Permalink
Merge pull request #86 from pidefrem/deactivate_download_if_env_var_d…
Browse files Browse the repository at this point in the history
…efined

Introduce new env var `LTP_JAR_DIR_PATH` to let the user specify a custom installation path of LT
  • Loading branch information
jxmorris12 authored Apr 11, 2024
2 parents 6246cdf + c2fcc7f commit 908dcc6
Show file tree
Hide file tree
Showing 10 changed files with 435 additions and 216 deletions.
16 changes: 12 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

![Test with PyTest](https://github.com/jxmorris12/language_tool_python/workflows/Test%20with%20PyTest/badge.svg)

Current LanguageTool version: **5.5**
Current LanguageTool version: **6.4**

This is a Python wrapper for [LanguageTool](https://languagetool.org). LanguageTool is open-source grammar tool, also known as the spellchecker for OpenOffice. This library allows you to make to detect grammar errors and spelling mistakes through a Python script or through a command-line interface.

Expand Down Expand Up @@ -151,7 +151,7 @@ You can run LanguageTool on one host and connect to it from another. This is us

## Configuration

LanguageTool offers lots of built-in configuration options.
LanguageTool offers lots of built-in configuration options.

### Example: Enabling caching
Here's an example of using the configuration options to enable caching. Some users have reported that this helps performance a lot.
Expand Down Expand Up @@ -222,13 +222,20 @@ Searching for a specific rule to enable or disable? Curious the breadth of rules

### Customizing Download URL or Path

If LanguageTool is already installed on your system, you can defined the following environment variable:
```bash
$ export LTP_JAR_DIR_PATH = /path/to/the/language/tool/jar/files
```

Overwise, `language_tool_python` can download LanguageTool for you automatically.

To overwrite the host part of URL that is used to download LanguageTool-{version}.zip:

```bash
$ export LTP_DOWNLOAD_HOST = [alternate URL]
```

This can be used to downgrade to an older version, for example, or to download from a mirror.
This can be used to downgrade to an older version, for example, or to download from a mirror.

And to choose the specific folder to download the server to:

Expand All @@ -252,6 +259,7 @@ into where the ``language_tool_python`` package resides.

As of April 2020, `language_tool_python` was forked from `language-check` and no longer supports LanguageTool versions lower than 4.0.

### Acknowledgements
### Acknowledgements

This is a fork of https://github.com/myint/language-check/ that produces more easily parsable
results from the command-line.
4 changes: 2 additions & 2 deletions language_tool_python/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .server import LanguageTool
from .utils import LanguageToolError

import pkg_resources
import pkg_resources
__version__ = pkg_resources.require("language_tool_python")[0].version


Expand Down Expand Up @@ -175,4 +175,4 @@ def main():
return status


sys.exit(main())
sys.exit(main())
79 changes: 53 additions & 26 deletions language_tool_python/download_lt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# -*- coding: utf-8 -*-
"""Download latest LanguageTool distribution."""

import glob
import logging
import os
import re
Expand All @@ -11,11 +10,16 @@
import sys
import tempfile
import tqdm
from typing import Optional
import zipfile

from distutils.spawn import find_executable
from urllib.parse import urljoin
from .utils import get_language_tool_download_path
from .utils import (
find_existing_language_tool_downloads,
get_language_tool_download_path,
LTP_JAR_DIR_PATH_ENV_VAR
)

# Create logger for this file.
logging.basicConfig(format='%(message)s')
Expand All @@ -27,7 +31,7 @@
BASE_URL = os.environ.get('LTP_DOWNLOAD_HOST', 'https://www.languagetool.org/download/')
FILENAME = 'LanguageTool-{version}.zip'

LATEST_VERSION = '6.2'
LTP_DOWNLOAD_VERSION = '6.4'

JAVA_VERSION_REGEX = re.compile(
r'^(?:java|openjdk) version "(?P<major1>\d+)(|\.(?P<major2>\d+)\.[^"]+)"',
Expand Down Expand Up @@ -56,27 +60,36 @@ def parse_java_version(version_text):
(1, 8)
"""
match = re.search(JAVA_VERSION_REGEX, version_text) or re.search(JAVA_VERSION_REGEX_UPDATED, version_text)
match = (
re.search(JAVA_VERSION_REGEX, version_text)
or re.search(JAVA_VERSION_REGEX_UPDATED, version_text)
)
if not match:
raise SystemExit(
'Could not parse Java version from """{}""".'.format(version_text))
major1 = int(match.group('major1'))
major2 = int(match.group('major2')) if match.group('major2') else 0
return (major1, major2)


def confirm_java_compatibility():
""" Confirms Java major version >= 8. """
java_path = find_executable('java')
if not java_path:
raise ModuleNotFoundError('No java install detected. Please install java to use language-tool-python.')
raise ModuleNotFoundError(
'No java install detected. '
'Please install java to use language-tool-python.'
)

output = subprocess.check_output([java_path, '-version'],
stderr=subprocess.STDOUT,
universal_newlines=True)

major_version, minor_version = parse_java_version(output)
# Some installs of java show the version number like `14.0.1` and others show `1.14.0.1`
# (with a leading 1). We want to support both, as long as the major version is >= 8.
# Some installs of java show the version number like `14.0.1`
# and others show `1.14.0.1`
# (with a leading 1). We want to support both,
# as long as the major version is >= 8.
# (See softwareengineering.stackexchange.com/questions/175075/why-is-java-version-1-x-referred-to-as-java-x)
if major_version == 1 and minor_version >= 8:
return True
Expand All @@ -85,31 +98,37 @@ def confirm_java_compatibility():
else:
raise SystemError('Detected java {}.{}. LanguageTool requires Java >= 8.'.format(major_version, minor_version))


def get_common_prefix(z):
"""Get common directory in a zip file if any."""
name_list = z.namelist()
if name_list and all(n.startswith(name_list[0]) for n in name_list[1:]):
return name_list[0]
return None


def http_get(url, out_file, proxies=None):
""" Get contents of a URL and save to a file.
"""
req = requests.get(url, stream=True, proxies=proxies)
content_length = req.headers.get('Content-Length')
total = int(content_length) if content_length is not None else None
if req.status_code == 403: # Not found on AWS
if req.status_code == 403: # Not found on AWS
raise Exception('Could not find at URL {}.'.format(url))
progress = tqdm.tqdm(unit="B", unit_scale=True, total=total, desc=f'Downloading LanguageTool {LATEST_VERSION}')
progress = tqdm.tqdm(unit="B", unit_scale=True, total=total,
desc=f'Downloading LanguageTool {LTP_DOWNLOAD_VERSION}')
for chunk in req.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
if chunk: # filter out keep-alive new chunks
progress.update(len(chunk))
out_file.write(chunk)
progress.close()


def unzip_file(temp_file, directory_to_extract_to):
""" Unzips a .zip file to folder path. """
logger.info('Unzipping {} to {}.'.format(temp_file.name, directory_to_extract_to))
logger.info(
'Unzipping {} to {}.'.format(temp_file.name, directory_to_extract_to)
)
with zipfile.ZipFile(temp_file.name, 'r') as zip_ref:
zip_ref.extractall(directory_to_extract_to)

Expand All @@ -128,26 +147,34 @@ def download_zip(url, directory):
# Tell the user the download path.
logger.info('Downloaded {} to {}.'.format(url, directory))

def download_lt():
download_folder = get_language_tool_download_path()
assert os.path.isdir(download_folder)
old_path_list = [
path for path in
glob.glob(os.path.join(download_folder, 'LanguageTool*'))
if os.path.isdir(path)
]

def download_lt(language_tool_version: Optional[str] = LTP_DOWNLOAD_VERSION):
confirm_java_compatibility()
version = LATEST_VERSION
filename = FILENAME.format(version=version)
language_tool_download_url = urljoin(BASE_URL, filename)
dirname, _ = os.path.splitext(filename)
extract_path = os.path.join(download_folder, dirname)

if extract_path in old_path_list:
download_folder = get_language_tool_download_path()

# Use the env var to the jar directory if it is defined
# otherwise look in the download directory
if os.environ.get(LTP_JAR_DIR_PATH_ENV_VAR):
return

download_zip(language_tool_download_url, download_folder)
# Make download path, if it doesn't exist.
os.makedirs(download_folder, exist_ok=True)

assert os.path.isdir(download_folder)
old_path_list = find_existing_language_tool_downloads(download_folder)

if language_tool_version:
version = language_tool_version
filename = FILENAME.format(version=version)
language_tool_download_url = urljoin(BASE_URL, filename)
dirname, _ = os.path.splitext(filename)
extract_path = os.path.join(download_folder, dirname)

if extract_path in old_path_list:
return
download_zip(language_tool_download_url, download_folder)


if __name__ == '__main__':
sys.exit(download_lt())
Loading

0 comments on commit 908dcc6

Please sign in to comment.