From 4cd1e2d4189ddfbeb94129f7b0c9a00c3400ebac Mon Sep 17 00:00:00 2001 From: ThibaultLSDC Date: Wed, 23 Oct 2024 01:50:24 +0000 Subject: [PATCH 1/2] adding llm configs --- src/agentlab/llm/llm_configs.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/agentlab/llm/llm_configs.py b/src/agentlab/llm/llm_configs.py index 30889be3d..13cb69b41 100644 --- a/src/agentlab/llm/llm_configs.py +++ b/src/agentlab/llm/llm_configs.py @@ -77,6 +77,13 @@ max_input_tokens=40_000, max_new_tokens=4_000, ), + "azure/gpt-4o-mini-2024-07-18": AzureModelArgs( + model_name="gpt-4o-mini", + deployment_name="gpt-4o-mini-2024-07-18", + max_total_tokens=128_000, + max_input_tokens=40_000, + max_new_tokens=4_000, + ), # ---------------- OSS LLMs ----------------# "meta-llama/Meta-Llama-3-70B-Instruct": SelfHostedModelArgs( model_name="meta-llama/Meta-Llama-3-70B-Instruct", @@ -152,4 +159,11 @@ max_new_tokens=2_000, temperature=1e-1, ), + "openrouter/openai/o1-mini-2024-09-12": OpenRouterModelArgs( + model_name="openai/o1-mini-2024-09-12", + max_total_tokens=128_000, + max_input_tokens=40_000, + max_new_tokens=4000, + temperature=1e-1, + ), } From 02d4ee97c45115a8e29ec75ae96148fa0ca1475f Mon Sep 17 00:00:00 2001 From: ThibaultLSDC Date: Wed, 23 Oct 2024 01:50:24 +0000 Subject: [PATCH 2/2] new L1 entries --- reproducibility_journal.csv | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/reproducibility_journal.csv b/reproducibility_journal.csv index f1d937bc8..b4b1f920a 100644 --- a/reproducibility_journal.csv +++ b/reproducibility_journal.csv @@ -15,3 +15,8 @@ ThibaultLSDC,GenericAgent-gpt-4o-mini-2024-07-18,miniwob,0.8.1,2024-10-17_10-50- ThibaultLSDC,GenericAgent-gpt-4o-mini-2024-07-18,workarena.l1,0.4.1,2024-10-17_17-30-43,,0.258,0.024,0,330/330,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.2,7bba275c004f1f90dfd83eaaab963ab5066e2baf,,0.8.1,None, ThibaultLSDC,GenericAgent-gpt-4o-mini-2024-07-18,workarena.l1,0.4.1,2024-10-17_18-30-28,,0.273,0.025,0,330/330,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.2,8b2b3f39a2bdb9efafad97791536a0b8cff4e708,,0.8.1,None, ThibaultLSDC,GenericAgent-gpt-4o-mini-2024-07-18,miniwob_all,0.9.0,2024-10-20_01-54-16,2024-10-20_01-54-02,0.588,0.014,0,1250/1250,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.2,1770eba87fabfe1e32cdf6078d71032fe00db736,,0.9.0,None, +ThibaultLSDC,GenericAgent-gpt-4o-mini,workarena_l1,0.4.1,2024-10-23_22-30-06,2024-10-23_14-17-40,0.27,0.024,1,330/330,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,4cd1e2d4189ddfbeb94129f7b0c9a00c3400ebac,,0.9.0,f25bdcd6b946fc4a79cdbee5fbcad53548af8724, +ThibaultLSDC,GenericAgent-gpt-4o,workarena_l1,0.4.1,2024-10-23_22-30-06,2024-10-23_14-17-40,0.455,0.027,1,330/330,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,4cd1e2d4189ddfbeb94129f7b0c9a00c3400ebac,,0.9.0,f25bdcd6b946fc4a79cdbee5fbcad53548af8724, +ThibaultLSDC,GenericAgent-anthropic_claude-3.5-sonnet:beta,workarena_l1,0.4.1,2024-10-23_22-30-06,2024-10-23_14-17-40,0.564,0.027,1,330/330,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,4cd1e2d4189ddfbeb94129f7b0c9a00c3400ebac,,0.9.0,f25bdcd6b946fc4a79cdbee5fbcad53548af8724, +ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-70b-instruct,workarena_l1,0.4.1,2024-10-23_22-30-06,2024-10-23_14-17-40,0.279,0.025,0,330/330,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,4cd1e2d4189ddfbeb94129f7b0c9a00c3400ebac,,0.9.0,f25bdcd6b946fc4a79cdbee5fbcad53548af8724, +ThibaultLSDC,GenericAgent-openai_o1-mini-2024-09-12,workarena_l1,0.4.1,2024-10-23_22-30-06,2024-10-23_14-17-40,0.567,0.027,4,330/330,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,4cd1e2d4189ddfbeb94129f7b0c9a00c3400ebac,,0.9.0,f25bdcd6b946fc4a79cdbee5fbcad53548af8724,