From 1c5dc381d72b6814dda93faba23aa90e6f360418 Mon Sep 17 00:00:00 2001 From: Volodymyr Kyrylov Date: Wed, 26 Apr 2023 11:52:02 +0300 Subject: [PATCH] when measuring BPC, divide by ln(2) to get into the right base --- examples/exp/ppl/BPC | 8 ++++---- examples/scripts/evaluate_nll.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/exp/ppl/BPC b/examples/exp/ppl/BPC index 9bc374c..71395ab 100644 --- a/examples/exp/ppl/BPC +++ b/examples/exp/ppl/BPC @@ -1,12 +1,12 @@ data/flair-uk-forward.ppl.tsv sentences 28643 data/flair-uk-forward.ppl.tsv nll_mean 139.6773665773955 -data/flair-uk-forward.ppl.tsv bpc 0.8214119255908473 +data/flair-uk-forward.ppl.tsv bpc 1.7096613025528762 exp/ppl/small.tsv sentences 28643 exp/ppl/small.tsv nll_mean 118.76219278015107 -exp/ppl/small.tsv bpc 0.7222614857594851 +exp/ppl/small.tsv bpc 1.503292652634816 exp/ppl/medium.tsv sentences 28643 exp/ppl/medium.tsv nll_mean 115.0707072714907 -exp/ppl/medium.tsv bpc 0.6998114303527089 +exp/ppl/medium.tsv bpc 1.4565658036892946 exp/ppl/large.tsv sentences 28643 exp/ppl/large.tsv nll_mean 113.01209246430402 -exp/ppl/large.tsv bpc 0.6872915942213289 +exp/ppl/large.tsv bpc 1.4305074051181672 diff --git a/examples/scripts/evaluate_nll.py b/examples/scripts/evaluate_nll.py index fef1e9d..01ccaf2 100644 --- a/examples/scripts/evaluate_nll.py +++ b/examples/scripts/evaluate_nll.py @@ -24,7 +24,7 @@ df = df.loc[df.index.intersection(idf.index)] nll = np.log(df.ppl.to_numpy()) * df.sentence_len.to_numpy() -nll2 = nll / np.log2(np.e) +nll2 = nll / np.log(2) char_len = df.text.str.len().to_numpy() N = np.sum(char_len)