Skip to content

Commit de25276

Browse files
authored
Merge pull request #130 from chenqianhe/main
feat: add missing gpt-4o-2024-11-20 models and fix o3-mini on wasm
2 parents 37c993f + 5f92348 commit de25276

File tree

4 files changed

+18
-3
lines changed

4 files changed

+18
-3
lines changed

.changeset/beige-grapes-wink.md

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
"tiktoken": patch
3+
"js-tiktoken": patch
4+
"@dqbd/tiktoken": patch
5+
---
6+
7+
Add missing historic models, fix incorrect tokenizers for old instruct models

js/src/core.ts

+1
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,7 @@ export function getEncodingNameForModel(model: TiktokenModel) {
279279
case "gpt-4o":
280280
case "gpt-4o-2024-05-13":
281281
case "gpt-4o-2024-08-06":
282+
case "gpt-4o-2024-11-20":
282283
case "gpt-4o-mini-2024-07-18":
283284
case "gpt-4o-mini":
284285
case "o1":

tiktoken/model_to_encoding.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@
5757
"gpt-4-vision-preview": "cl100k_base",
5858
"gpt-4o": "o200k_base",
5959
"gpt-4o-2024-05-13": "o200k_base",
60-
"gpt-4o-2024-08-06":"o200k_base",
60+
"gpt-4o-2024-08-06": "o200k_base",
61+
"gpt-4o-2024-11-20": "o200k_base",
6162
"gpt-4o-mini-2024-07-18": "o200k_base",
6263
"gpt-4o-mini": "o200k_base",
6364
"o1": "o200k_base",

wasm/src/lib.rs

+8-2
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,7 @@ export type TiktokenModel =
423423
| "gpt-4o"
424424
| "gpt-4o-2024-05-13"
425425
| "gpt-4o-2024-08-06"
426+
| "gpt-4o-2024-11-20"
426427
| "gpt-4o-mini-2024-07-18"
427428
| "gpt-4o-mini"
428429
| "o1"
@@ -459,9 +460,10 @@ pub fn encoding_for_model(
459460
"text-babbage-001" => Ok("r50k_base"),
460461
"text-ada-001" => Ok("r50k_base"),
461462
"davinci" => Ok("r50k_base"),
463+
"davinci-002" => Ok("cl100k_base"),
462464
"curie" => Ok("r50k_base"),
463465
"babbage" => Ok("r50k_base"),
464-
"babbage-002" => Ok("r50k_base"),
466+
"babbage-002" => Ok("cl100k_base"),
465467
"ada" => Ok("r50k_base"),
466468
"code-davinci-002" => Ok("p50k_base"),
467469
"code-davinci-001" => Ok("p50k_base"),
@@ -490,7 +492,7 @@ pub fn encoding_for_model(
490492
"gpt-3.5-turbo-0613" => Ok("cl100k_base"),
491493
"gpt-3.5-turbo-16k" => Ok("cl100k_base"),
492494
"gpt-3.5-turbo-16k-0613" => Ok("cl100k_base"),
493-
"gpt-3.5-turbo-instruct" => Ok("clk100k_base"),
495+
"gpt-3.5-turbo-instruct" => Ok("cl100k_base"),
494496
"gpt-3.5-turbo-instruct-0914" => Ok("cl100k_base"),
495497
"gpt-4" => Ok("cl100k_base"),
496498
"gpt-4-0314" => Ok("cl100k_base"),
@@ -510,8 +512,10 @@ pub fn encoding_for_model(
510512
"gpt-4o" => Ok("o200k_base"),
511513
"gpt-4o-2024-05-13" => Ok("o200k_base"),
512514
"gpt-4o-2024-08-06" => Ok("o200k_base"),
515+
"gpt-4o-2024-11-20" => Ok("o200k_base"),
513516
"gpt-4o-mini-2024-07-18" => Ok("o200k_base"),
514517
"gpt-4o-mini" => Ok("o200k_base"),
518+
"o1" => Ok("o200k_base"),
515519
"o1-2024-12-17" => Ok("o200k_base"),
516520
"o1-mini" => Ok("o200k_base"),
517521
"o1-preview" => Ok("o200k_base"),
@@ -520,6 +524,8 @@ pub fn encoding_for_model(
520524
"chatgpt-4o-latest" => Ok("o200k_base"),
521525
"gpt-4o-realtime" => Ok("o200k_base"),
522526
"gpt-4o-realtime-preview-2024-10-01" => Ok("o200k_base"),
527+
"o3-mini" => Ok("o200k_base"),
528+
"o3-mini-2025-01-31" => Ok("o200k_base"),
523529
model => Err(JsError::new(
524530
format!("Invalid model: {}", model.to_string()).as_str(),
525531
)),

0 commit comments

Comments
 (0)