From 827d847995f19dc337f3899427340bdddbd81cd5 Mon Sep 17 00:00:00 2001 From: ThibaultLSDC Date: Wed, 23 Oct 2024 01:50:24 +0000 Subject: [PATCH 1/6] adding llm configs --- src/agentlab/llm/llm_configs.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/agentlab/llm/llm_configs.py b/src/agentlab/llm/llm_configs.py index 30889be3d..13cb69b41 100644 --- a/src/agentlab/llm/llm_configs.py +++ b/src/agentlab/llm/llm_configs.py @@ -77,6 +77,13 @@ max_input_tokens=40_000, max_new_tokens=4_000, ), + "azure/gpt-4o-mini-2024-07-18": AzureModelArgs( + model_name="gpt-4o-mini", + deployment_name="gpt-4o-mini-2024-07-18", + max_total_tokens=128_000, + max_input_tokens=40_000, + max_new_tokens=4_000, + ), # ---------------- OSS LLMs ----------------# "meta-llama/Meta-Llama-3-70B-Instruct": SelfHostedModelArgs( model_name="meta-llama/Meta-Llama-3-70B-Instruct", @@ -152,4 +159,11 @@ max_new_tokens=2_000, temperature=1e-1, ), + "openrouter/openai/o1-mini-2024-09-12": OpenRouterModelArgs( + model_name="openai/o1-mini-2024-09-12", + max_total_tokens=128_000, + max_input_tokens=40_000, + max_new_tokens=4000, + temperature=1e-1, + ), } From e056dfcd877ec8cf1e35d84738134b867a129196 Mon Sep 17 00:00:00 2001 From: ThibaultLSDC Date: Thu, 24 Oct 2024 17:59:15 +0000 Subject: [PATCH 2/6] L2 entries --- reproducibility_journal.csv | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/reproducibility_journal.csv b/reproducibility_journal.csv index f1d937bc8..409a06183 100644 --- a/reproducibility_journal.csv +++ b/reproducibility_journal.csv @@ -15,3 +15,8 @@ ThibaultLSDC,GenericAgent-gpt-4o-mini-2024-07-18,miniwob,0.8.1,2024-10-17_10-50- ThibaultLSDC,GenericAgent-gpt-4o-mini-2024-07-18,workarena.l1,0.4.1,2024-10-17_17-30-43,,0.258,0.024,0,330/330,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.2,7bba275c004f1f90dfd83eaaab963ab5066e2baf,,0.8.1,None, ThibaultLSDC,GenericAgent-gpt-4o-mini-2024-07-18,workarena.l1,0.4.1,2024-10-17_18-30-28,,0.273,0.025,0,330/330,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.2,8b2b3f39a2bdb9efafad97791536a0b8cff4e708,,0.8.1,None, ThibaultLSDC,GenericAgent-gpt-4o-mini-2024-07-18,miniwob_all,0.9.0,2024-10-20_01-54-16,2024-10-20_01-54-02,0.588,0.014,0,1250/1250,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.2,1770eba87fabfe1e32cdf6078d71032fe00db736,,0.9.0,None, +ThibaultLSDC,GenericAgent-gpt-4o-mini,workarena_l2_agent_curriculum_eval,0.4.1,2024-10-24_17-08-53,2024-10-23_17-10-46,0.013,0.007,2,235/235,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,827d847995f19dc337f3899427340bdddbd81cd5,,0.10.0,None, +ThibaultLSDC,GenericAgent-gpt-4o,workarena_l2_agent_curriculum_eval,0.4.1,2024-10-24_17-08-53,2024-10-23_17-10-46,0.085,0.018,3,233/235,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,827d847995f19dc337f3899427340bdddbd81cd5,,0.10.0,None, +ThibaultLSDC,GenericAgent-anthropic_claude-3.5-sonnet:beta,workarena_l2_agent_curriculum_eval,0.4.1,2024-10-24_17-08-53,2024-10-23_17-10-46,0.391,0.032,3,235/235,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,827d847995f19dc337f3899427340bdddbd81cd5,,0.10.0,None, +ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-70b-instruct,workarena_l2_agent_curriculum_eval,0.4.1,2024-10-24_17-08-53,2024-10-23_17-10-46,0.021,0.009,2,235/235,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,827d847995f19dc337f3899427340bdddbd81cd5,,0.10.0,None, +ThibaultLSDC,GenericAgent-openai_o1-mini-2024-09-12,workarena_l2_agent_curriculum_eval,0.4.1,2024-10-24_17-08-53,2024-10-23_17-10-46,0.149,0.023,1,235/235,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,827d847995f19dc337f3899427340bdddbd81cd5,,0.10.0,None, From 59847fd02ea67e7a881944c896a2c2d2126ddae8 Mon Sep 17 00:00:00 2001 From: ThibaultLSDC Date: Thu, 24 Oct 2024 23:50:45 +0000 Subject: [PATCH 3/6] claude L3 --- reproducibility_journal.csv | 1 + 1 file changed, 1 insertion(+) diff --git a/reproducibility_journal.csv b/reproducibility_journal.csv index 3c5fa7b1a..86789bf9e 100644 --- a/reproducibility_journal.csv +++ b/reproducibility_journal.csv @@ -31,3 +31,4 @@ ThibaultLSDC,GenericAgent-gpt-4o,workarena_l2_agent_curriculum_eval,0.4.1,2024-1 ThibaultLSDC,GenericAgent-anthropic_claude-3.5-sonnet:beta,workarena_l2_agent_curriculum_eval,0.4.1,2024-10-24_17-08-53,2024-10-23_17-10-46,0.391,0.032,3,235/235,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,827d847995f19dc337f3899427340bdddbd81cd5,,0.10.0,None, ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-70b-instruct,workarena_l2_agent_curriculum_eval,0.4.1,2024-10-24_17-08-53,2024-10-23_17-10-46,0.021,0.009,2,235/235,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,827d847995f19dc337f3899427340bdddbd81cd5,,0.10.0,None, ThibaultLSDC,GenericAgent-openai_o1-mini-2024-09-12,workarena_l2_agent_curriculum_eval,0.4.1,2024-10-24_17-08-53,2024-10-23_17-10-46,0.149,0.023,1,235/235,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,827d847995f19dc337f3899427340bdddbd81cd5,,0.10.0,None, +ThibaultLSDC,GenericAgent-anthropic_claude-3.5-sonnet:beta,workarena_l3_agent_curriculum_eval,0.4.1,2024-10-24_23-03-30,2024-10-24_18-06-57,0.004,0.004,1,235/235,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,de67ed8ad4321740ff05cf26ab889978be706460,,0.10.2,a9e44a88139798543ba53fc8c45d44997665ccca, From f12887f776525bcad6a0c42cb49651ff4f65af43 Mon Sep 17 00:00:00 2001 From: ThibaultLSDC Date: Fri, 25 Oct 2024 05:56:21 +0000 Subject: [PATCH 4/6] claude vision support --- src/agentlab/llm/llm_configs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/agentlab/llm/llm_configs.py b/src/agentlab/llm/llm_configs.py index 13cb69b41..8376b5c25 100644 --- a/src/agentlab/llm/llm_configs.py +++ b/src/agentlab/llm/llm_configs.py @@ -151,6 +151,7 @@ max_input_tokens=40_000, max_new_tokens=4000, temperature=1e-1, + vision_support=True, ), "openrouter/qwen/qwen-2-72b-instruct": OpenRouterModelArgs( model_name="qwen/qwen-2-72b-instruct", From 177ba72a7469e5610e6b615adf1bdcde58cb0298 Mon Sep 17 00:00:00 2001 From: ThibaultLSDC Date: Fri, 25 Oct 2024 17:17:19 +0000 Subject: [PATCH 5/6] miniwob results --- reproducibility_journal.csv | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/reproducibility_journal.csv b/reproducibility_journal.csv index 86789bf9e..bd637ff21 100644 --- a/reproducibility_journal.csv +++ b/reproducibility_journal.csv @@ -32,3 +32,8 @@ ThibaultLSDC,GenericAgent-anthropic_claude-3.5-sonnet:beta,workarena_l2_agent_cu ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-70b-instruct,workarena_l2_agent_curriculum_eval,0.4.1,2024-10-24_17-08-53,2024-10-23_17-10-46,0.021,0.009,2,235/235,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,827d847995f19dc337f3899427340bdddbd81cd5,,0.10.0,None, ThibaultLSDC,GenericAgent-openai_o1-mini-2024-09-12,workarena_l2_agent_curriculum_eval,0.4.1,2024-10-24_17-08-53,2024-10-23_17-10-46,0.149,0.023,1,235/235,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,827d847995f19dc337f3899427340bdddbd81cd5,,0.10.0,None, ThibaultLSDC,GenericAgent-anthropic_claude-3.5-sonnet:beta,workarena_l3_agent_curriculum_eval,0.4.1,2024-10-24_23-03-30,2024-10-24_18-06-57,0.004,0.004,1,235/235,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,de67ed8ad4321740ff05cf26ab889978be706460,,0.10.2,a9e44a88139798543ba53fc8c45d44997665ccca, +ThibaultLSDC,GenericAgent-gpt-4o-mini,miniwob,0.10.2,2024-10-25_17-16-23,2024-10-25_06-08-16,0.566,0.02,0,625/625,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,f12887f776525bcad6a0c42cb49651ff4f65af43,,0.10.2,a9e44a88139798543ba53fc8c45d44997665ccca, +ThibaultLSDC,GenericAgent-gpt-4o,miniwob,0.10.2,2024-10-25_17-16-23,2024-10-25_06-08-16,0.638,0.019,0,625/625,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,f12887f776525bcad6a0c42cb49651ff4f65af43,,0.10.2,a9e44a88139798543ba53fc8c45d44997665ccca, +ThibaultLSDC,GenericAgent-anthropic_claude-3.5-sonnet:beta,miniwob,0.10.2,2024-10-25_17-16-23,2024-10-25_06-08-16,0.698,0.018,0,625/625,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,f12887f776525bcad6a0c42cb49651ff4f65af43,,0.10.2,a9e44a88139798543ba53fc8c45d44997665ccca, +ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-70b-instruct,miniwob,0.10.2,2024-10-25_17-16-23,2024-10-25_06-08-16,0.576,0.02,0,625/625,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,f12887f776525bcad6a0c42cb49651ff4f65af43,,0.10.2,a9e44a88139798543ba53fc8c45d44997665ccca, +ThibaultLSDC,GenericAgent-openai_o1-mini-2024-09-12,miniwob,0.10.2,2024-10-25_17-16-23,2024-10-25_06-08-16,0.678,0.019,0,625/625,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,f12887f776525bcad6a0c42cb49651ff4f65af43,,0.10.2,a9e44a88139798543ba53fc8c45d44997665ccca, From 14bfc8f42c848fba17cd787e8dd37576228a9bc6 Mon Sep 17 00:00:00 2001 From: ThibaultLSDC Date: Fri, 25 Oct 2024 20:34:29 +0000 Subject: [PATCH 6/6] 405b L1 entry --- reproducibility_journal.csv | 1 + 1 file changed, 1 insertion(+) diff --git a/reproducibility_journal.csv b/reproducibility_journal.csv index bd637ff21..5403a4d7d 100644 --- a/reproducibility_journal.csv +++ b/reproducibility_journal.csv @@ -37,3 +37,4 @@ ThibaultLSDC,GenericAgent-gpt-4o,miniwob,0.10.2,2024-10-25_17-16-23,2024-10-25_0 ThibaultLSDC,GenericAgent-anthropic_claude-3.5-sonnet:beta,miniwob,0.10.2,2024-10-25_17-16-23,2024-10-25_06-08-16,0.698,0.018,0,625/625,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,f12887f776525bcad6a0c42cb49651ff4f65af43,,0.10.2,a9e44a88139798543ba53fc8c45d44997665ccca, ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-70b-instruct,miniwob,0.10.2,2024-10-25_17-16-23,2024-10-25_06-08-16,0.576,0.02,0,625/625,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,f12887f776525bcad6a0c42cb49651ff4f65af43,,0.10.2,a9e44a88139798543ba53fc8c45d44997665ccca, ThibaultLSDC,GenericAgent-openai_o1-mini-2024-09-12,miniwob,0.10.2,2024-10-25_17-16-23,2024-10-25_06-08-16,0.678,0.019,0,625/625,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,f12887f776525bcad6a0c42cb49651ff4f65af43,,0.10.2,a9e44a88139798543ba53fc8c45d44997665ccca, +ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-405b-instruct,workarena_l1,0.4.1,2024-10-25_20-32-26,2024-10-25_17-34-45,0.433,0.027,1,330/330,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,177ba72a7469e5610e6b615adf1bdcde58cb0298,,0.10.2,a9e44a88139798543ba53fc8c45d44997665ccca,