Skip to content

Commit

Permalink
Performance improvement when reading the file
Browse files Browse the repository at this point in the history
Average run loading [r50k_base, p50k_base, cl100k_base, o200k_base]
down from 0.47 seconds to 0.39 seconds
  • Loading branch information
thekid committed Oct 13, 2024
1 parent a402fdf commit 5491226
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions src/main/php/com/openai/TikTokenFilesIn.class.php
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,10 @@ public function tokens($source): iterable {
if ($file->exists()) {
$file->open(File::READ);
try {
while (false !== ($line= $file->gets(2048))) {
sscanf($line, '%s %d', $encoded, $rank);
yield base64_decode($encoded) => $rank;
$handle= $file->getHandle();
while (false !== ($line= fgets($handle, 2048))) {
[$encoded, $rank]= explode(' ', $line);
yield base64_decode($encoded) => (int)$rank;
}
} finally {
$file->close();
Expand Down

0 comments on commit 5491226

Please sign in to comment.