Skip to content

Commit

Permalink
remove spacy
Browse files Browse the repository at this point in the history
  • Loading branch information
trivedikaushal committed Aug 19, 2024
1 parent 495b61f commit cff2f91
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 6 deletions.
2 changes: 1 addition & 1 deletion VERSION.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.0.25
2.0.26
6 changes: 2 additions & 4 deletions fast_bert/data_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
Dataset,
)
from torch.utils.data.distributed import DistributedSampler
import spacy

from tqdm import tqdm, trange
from fastprogress.fastprogress import master_bar, progress_bar

Expand Down Expand Up @@ -73,8 +73,6 @@

def create_corpus(text_list, target_path, logger=None):

# nlp = spacy.load("en_core_web_sm", disable=["tagger", "ner", "textcat"])

with open(target_path, "w") as f:
# Split sentences for each document
logger.info("Formatting corpus for {}".format(target_path))
Expand Down Expand Up @@ -317,7 +315,7 @@ def __init__(
# Mask tokens

def mask_tokens(self, inputs, mlm_probability=0.15):
""" Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original. """
"""Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original."""
labels = inputs.clone()
# We sample a few tokens in each sequence for masked-LM training (with probability mlm_probability defaults to 0.15 in Bert/RoBERTa)

Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ pytorch-lamb
tensorboardX
fastprogress
scikit-learn
spacy
seqeval
transformers==4.22.*
pandas
Expand Down

0 comments on commit cff2f91

Please sign in to comment.