-
Notifications
You must be signed in to change notification settings - Fork 0
/
status_utils.py
38 lines (31 loc) · 1008 Bytes
/
status_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import pickle
import re
def dummy_status(max_words=2000):
"""
generates a random word-frequency dict
Args:
max_words (int, default=2000): max number of word-frequency
pair to generate
Returns:
dict object which contains random words as its key and random
word count as its value.
"""
# load list of dummy words
with open('../static/dummy_words', 'rb') as fh:
DUMMY_DATA = pickle.load(fh)
# generate random words from DUMMY_DATA, random ints between 1-1000
sample_strs = np.random.choice(DUMMY_DATA, size=max_words, replace=False)
sample_ints = np.random.randint(low=1, high=2000, size=max_words)
return dict(zip(sample_strs, sample_ints))
def filter_words(words):
"""
removes non-ascii chars and extra white space from given string
Args:
words (str): string of words to filter
Returns:
filtered string with only ascii chars and single whitespace
"""
return re.sub(r'[^\x00-\x7F]+', '',
words.replace('\n', ' ')
.replace('\r', ' ')
.replace('\\', ""))