forked from lorenlugosch/end-to-end-SLU
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Loren Lugosch
committed
Apr 3, 2019
1 parent
8a79ad7
commit 3b15dd1
Showing
24 changed files
with
30,986 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
[experiment] | ||
seed=1234 | ||
folder=experiments/no_pretraining_limited_wordings | ||
|
||
[phoneme_module] | ||
use_sincnet=True | ||
fs=16000 | ||
|
||
cnn_N_filt=80,60,60 | ||
cnn_len_filt=401,5,5 | ||
cnn_stride=80,1,1 | ||
cnn_max_pool_len=2,1,1 | ||
cnn_use_laynorm_inp=True | ||
cnn_use_batchnorm_inp=False | ||
cnn_use_laynorm=True,True,True | ||
cnn_use_batchnorm=False,False,False | ||
cnn_act=leaky_relu,leaky_relu,leaky_relu | ||
cnn_drop=0.0,0.0,0.0 | ||
|
||
phone_rnn_num_hidden=128,128 | ||
phone_downsample_len=2,2 | ||
phone_downsample_type=avg,avg | ||
phone_rnn_drop=0.0,0.0 | ||
phone_rnn_bidirectional=True | ||
|
||
[word_module] | ||
word_rnn_num_hidden=128,128 | ||
word_downsample_len=2,2 | ||
word_downsample_type=avg,avg | ||
word_rnn_drop=0.0,0.0 | ||
word_rnn_bidirectional=True | ||
vocabulary_size=10000 | ||
|
||
[intent_module] | ||
intent_rnn_num_hidden=128 | ||
intent_downsample_len=1 | ||
intent_downsample_type=none | ||
intent_rnn_drop=0.0 | ||
intent_rnn_bidirectional=True | ||
|
||
[pretraining] | ||
asr_path=/scratch/lugosch/librispeech | ||
pretraining_type=0 | ||
; 0 - no pre-training, 1 - phoneme loss, 2 - word loss + phoneme loss | ||
pretraining_lr=0.001 | ||
pretraining_batch_size=64 | ||
pretraining_num_epochs=10 | ||
pretraining_length_mean=2.25 | ||
pretraining_length_var=1 | ||
|
||
[training] | ||
slu_path=/scratch/lugosch/fluent_commands_dataset/ | ||
unfreezing_type=2 | ||
; 0 - no unfreezing, 1 - unfreeze word layers, 2 - unfreeze word layers and phoneme layers | ||
training_lr=0.00001 | ||
training_batch_size=64 | ||
training_num_epochs=100 | ||
dataset_subset_percentage=1 | ||
train_wording_path=experiments/train_wordings.txt | ||
; path to .txt file containing phrases to be included during training; if None, uses all phrases | ||
test_wording_path=experiments/test_wordings.txt |
61 changes: 61 additions & 0 deletions
61
experiments/no_pretraining_limited_wordings/experiment.cfg
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
[experiment] | ||
seed=1234 | ||
folder=experiments/no_pretraining_limited_wordings | ||
|
||
[phoneme_module] | ||
use_sincnet=True | ||
fs=16000 | ||
|
||
cnn_N_filt=80,60,60 | ||
cnn_len_filt=401,5,5 | ||
cnn_stride=80,1,1 | ||
cnn_max_pool_len=2,1,1 | ||
cnn_use_laynorm_inp=True | ||
cnn_use_batchnorm_inp=False | ||
cnn_use_laynorm=True,True,True | ||
cnn_use_batchnorm=False,False,False | ||
cnn_act=leaky_relu,leaky_relu,leaky_relu | ||
cnn_drop=0.0,0.0,0.0 | ||
|
||
phone_rnn_num_hidden=128,128 | ||
phone_downsample_len=2,2 | ||
phone_downsample_type=avg,avg | ||
phone_rnn_drop=0.0,0.0 | ||
phone_rnn_bidirectional=True | ||
|
||
[word_module] | ||
word_rnn_num_hidden=128,128 | ||
word_downsample_len=2,2 | ||
word_downsample_type=avg,avg | ||
word_rnn_drop=0.0,0.0 | ||
word_rnn_bidirectional=True | ||
vocabulary_size=10000 | ||
|
||
[intent_module] | ||
intent_rnn_num_hidden=128 | ||
intent_downsample_len=1 | ||
intent_downsample_type=none | ||
intent_rnn_drop=0.0 | ||
intent_rnn_bidirectional=True | ||
|
||
[pretraining] | ||
asr_path=/scratch/lugosch/librispeech | ||
pretraining_type=0 | ||
; 0 - no pre-training, 1 - phoneme loss, 2 - word loss + phoneme loss | ||
pretraining_lr=0.001 | ||
pretraining_batch_size=64 | ||
pretraining_num_epochs=10 | ||
pretraining_length_mean=2.25 | ||
pretraining_length_var=1 | ||
|
||
[training] | ||
slu_path=/scratch/lugosch/fluent_commands_dataset/ | ||
unfreezing_type=2 | ||
; 0 - no unfreezing, 1 - unfreeze word layers, 2 - unfreeze word layers and phoneme layers | ||
training_lr=0.00001 | ||
training_batch_size=64 | ||
training_num_epochs=100 | ||
dataset_subset_percentage=1 | ||
train_wording_path=experiments/train_wordings.txt | ||
; path to .txt file containing phrases to be included during training; if None, uses all phrases | ||
test_wording_path=experiments/test_wordings.txt |
21 changes: 21 additions & 0 deletions
21
experiments/no_pretraining_limited_wordings/pretraining/log.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
,phone_loss,phone_acc,word_loss,word_acc,set | ||
0,1.3447539217893283,0.5927509773338477,3.43951475203999,0.42782973619690545,train | ||
1,1.1893919976369598,0.6220227427792673,2.9667645765845556,0.5041583235513935,valid | ||
2,1.0823629311797929,0.6692771713363596,2.1809397046296746,0.5711428573883969,train | ||
3,1.0961804397234223,0.6519592652120431,2.4755801895008664,0.5692369051268269,valid | ||
4,1.0208398115563229,0.687909120898283,1.9100123722864404,0.6116601173124308,train | ||
5,1.062676408677592,0.6594296051395074,2.278185287711649,0.597411582719194,valid | ||
6,0.9961352141358085,0.6957458009786527,1.7975990468872856,0.6291673503747952,train | ||
7,1.060639298945071,0.6626063720079733,2.187572661927129,0.6095535460185679,valid | ||
8,0.9686492588511139,0.7036590352884712,1.7066683734674624,0.6440317140164772,train | ||
9,1.0239742797946125,0.6723535911643315,2.0549805276544215,0.6254583073728749,valid | ||
10,0.9546383454627353,0.707941908003501,1.6527819172270768,0.6527026828799296,train | ||
11,1.0123725482842343,0.6758742542311386,2.0099265527870833,0.6347253011569357,valid | ||
12,0.9419254876896349,0.7119580948663692,1.6111297064726684,0.6596654465464237,train | ||
13,0.9962982201203637,0.6839274698963751,1.9752632067322153,0.6406449175446299,valid | ||
14,0.9327209101101391,0.7145372336629323,1.5798849746295074,0.6647429590255473,train | ||
15,0.994990048897594,0.6818984996722163,1.9616814263530367,0.645758692668331,valid | ||
16,0.9339337178368284,0.7143349934977328,1.570603801490456,0.666365391170089,train | ||
17,0.9856117718284426,0.6838804736093707,1.911640357174607,0.6538676007208971,valid | ||
18,0.9172263790838209,0.7191225592174278,1.529421610962467,0.6735980742882584,train | ||
19,0.9760309108416696,0.6858952880462453,1.909460269529001,0.6554694634291606,valid |
Binary file added
BIN
+14 MB
experiments/no_pretraining_limited_wordings/pretraining/model_state.pth
Binary file not shown.
42 changes: 42 additions & 0 deletions
42
experiments/no_pretraining_limited_wordings/pretraining/phonemes.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
sil | ||
S | ||
IH | ||
N | ||
Y | ||
UW | ||
AA | ||
R | ||
AH | ||
F | ||
EH | ||
D | ||
V | ||
M | ||
AY | ||
K | ||
Z | ||
HH | ||
P | ||
IY | ||
B | ||
sp | ||
SH | ||
UH | ||
AE | ||
ER | ||
T | ||
OW | ||
DH | ||
CH | ||
L | ||
EY | ||
JH | ||
AO | ||
W | ||
G | ||
AW | ||
TH | ||
NG | ||
OY | ||
ZH | ||
spn |
Oops, something went wrong.