Skip to content

Commit

Permalink
Merge pull request #1 from xp-forge/feature/integration-tests
Browse files Browse the repository at this point in the history
Add integration tests for cl100k_base and o200k_base
  • Loading branch information
thekid authored Oct 13, 2024
2 parents 5491226 + 6df17cc commit ef59ccb
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,9 @@ jobs:
- name: Run test suite
run: sh xp-run xp.test.Runner -r Dots src/test/php

- name: Run integration tests
run: >
curl -O 'https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken' &&
curl -O 'https://openaipublic.blob.core.windows.net/encodings/o200k_base.tiktoken' &&
sh xp-run xp.test.Runner src/it/php --folder=.
1 change: 1 addition & 0 deletions class.pth
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
src/main/php/
src/test/php/
src/it/php/
36 changes: 36 additions & 0 deletions src/it/php/com/openai/unittest/Cl100kBaseTest.class.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
<?php namespace com\openai\unittest;

use com\openai\{Encoding, TikTokenFilesIn};
use test\{Args, Assert, Test, Values};

#[Args('folder')]
class Cl100kBaseTest {
private $encoder;

/** Creates an instance with a given folder containing the `.tiktoken` files */
public function __construct($folder= '.') {
$this->encoder= Encoding::named('cl100k_base')->load(new TikTokenFilesIn($folder));
}

/** @return iterable */
private function fixtures() {
yield ['hello world', [15339, 1917]];
yield ['привет мир', [8164, 2233, 28089, 8341, 11562, 78746]];
yield [".\n", [627]];
yield ["today\n ", [31213, 198, 220]];
yield ["today\n \n", [31213, 27907]];
yield ["today\n \n", [31213, 14211]];
yield ['🌶', [9468, 234, 114]];
yield ["👍", [9468, 239, 235]];
}

#[Test]
public function empty() {
Assert::equals([], [...$this->encoder->encode('')]);
}

#[Test, Values(from: 'fixtures')]
public function encode($text, $expected) {
Assert::equals($expected, [...$this->encoder->encode($text)]);
}
}
36 changes: 36 additions & 0 deletions src/it/php/com/openai/unittest/O200kBaseTest.class.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
<?php namespace com\openai\unittest;

use com\openai\{Encoding, TikTokenFilesIn};
use test\{Args, Assert, Test, Values};

#[Args('folder')]
class O200kBaseTest {
private $encoder;

/** Creates an instance with a given folder containing the `.tiktoken` files */
public function __construct($folder= '.') {
$this->encoder= Encoding::named('o200k_base')->load(new TikTokenFilesIn($folder));
}

/** @return iterable */
private function fixtures() {
yield ['hello world', [24912, 2375]];
yield ['привет мир', [9501, 131903, 37934]];
yield [".\n", [558]];
yield ["today\n ", [58744, 198, 220]];
yield ["today\n \n", [58744, 47812]];
yield ["today\n \n", [58744, 31835]];
yield ['🌶', [64364, 114]];
yield ["👍", [82514]];
}

#[Test]
public function empty() {
Assert::equals([], [...$this->encoder->encode('')]);
}

#[Test, Values(from: 'fixtures')]
public function encode($text, $expected) {
Assert::equals($expected, [...$this->encoder->encode($text)]);
}
}

0 comments on commit ef59ccb

Please sign in to comment.