<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
  <channel>
    <title>PyPI recent updates for llm-benchmark-toolkit</title>
    <link>https://pypi.org/project/llm-benchmark-toolkit/</link>
    <description>Recent updates to the Python Package Index for llm-benchmark-toolkit</description>
    <language>en</language>    <item>
      <title>2.4.2</title>
      <link>https://pypi.org/project/llm-benchmark-toolkit/2.4.2/</link>
      <description>Benchmark LLMs with 10 benchmarks &amp; 132K+ questions. 8 providers: OpenAI, Anthropic, Groq, Together, Fireworks, DeepSeek, Ollama, HuggingFace. Unified CLI + Web dashboard.</description>
<author>nahuelgiudizi@hotmail.com</author>      <pubDate>Fri, 05 Dec 2025 23:13:52 GMT</pubDate>
    </item>    <item>
      <title>2.4.1</title>
      <link>https://pypi.org/project/llm-benchmark-toolkit/2.4.1/</link>
      <description>Benchmark LLMs with 10 benchmarks &amp; 132K+ questions. 8 providers: OpenAI, Anthropic, Groq, Together, Fireworks, DeepSeek, Ollama, HuggingFace. Unified CLI + Web dashboard.</description>
<author>nahuelgiudizi@hotmail.com</author>      <pubDate>Fri, 05 Dec 2025 22:35:18 GMT</pubDate>
    </item>    <item>
      <title>2.4.0</title>
      <link>https://pypi.org/project/llm-benchmark-toolkit/2.4.0/</link>
      <description>Benchmark LLMs with 9 benchmarks &amp; 100K+ questions. 8 providers: OpenAI, Anthropic, Groq, Together, Fireworks, DeepSeek, Ollama, HuggingFace. Web dashboard included.</description>
<author>nahuelgiudizi@hotmail.com</author>      <pubDate>Fri, 05 Dec 2025 06:16:17 GMT</pubDate>
    </item>    <item>
      <title>2.3.2</title>
      <link>https://pypi.org/project/llm-benchmark-toolkit/2.3.2/</link>
      <description>Benchmark LLMs with 9 benchmarks &amp; 100K+ questions. 8 providers: OpenAI, Anthropic, Groq, Together, Fireworks, DeepSeek, Ollama, HuggingFace. Web dashboard included.</description>
<author>nahuelgiudizi@hotmail.com</author>      <pubDate>Thu, 04 Dec 2025 02:15:06 GMT</pubDate>
    </item>    <item>
      <title>2.3.1</title>
      <link>https://pypi.org/project/llm-benchmark-toolkit/2.3.1/</link>
      <description>Benchmark LLMs with 9 benchmarks &amp; 100K+ questions. 8 providers: OpenAI, Anthropic, Groq, Together, Fireworks, DeepSeek, Ollama, HuggingFace. Web dashboard included.</description>
<author>nahuelgiudizi@hotmail.com</author>      <pubDate>Thu, 04 Dec 2025 01:13:45 GMT</pubDate>
    </item>    <item>
      <title>2.3.0</title>
      <link>https://pypi.org/project/llm-benchmark-toolkit/2.3.0/</link>
      <description>Benchmark LLMs with 9 benchmarks &amp; 100K+ questions. 8 providers: OpenAI, Anthropic, Groq, Together, Fireworks, DeepSeek, Ollama, HuggingFace. Web dashboard included.</description>
<author>nahuelgiudizi@hotmail.com</author>      <pubDate>Wed, 03 Dec 2025 22:26:05 GMT</pubDate>
    </item>    <item>
      <title>2.2.1</title>
      <link>https://pypi.org/project/llm-benchmark-toolkit/2.2.1/</link>
      <description>Benchmark LLMs with real academic datasets: MMLU, TruthfulQA, HellaSwag, ARC &amp; more. Web dashboard included.</description>
<author>nahuelgiudizi@hotmail.com</author>      <pubDate>Tue, 02 Dec 2025 22:48:15 GMT</pubDate>
    </item>    <item>
      <title>2.2.0</title>
      <link>https://pypi.org/project/llm-benchmark-toolkit/2.2.0/</link>
      <description>Production-ready LLM evaluation with 24K+ real questions</description>
<author>nahuel@example.com</author>      <pubDate>Tue, 02 Dec 2025 22:42:14 GMT</pubDate>
    </item>    <item>
      <title>2.1.0</title>
      <link>https://pypi.org/project/llm-benchmark-toolkit/2.1.0/</link>
      <description>Production-ready LLM evaluation with 24K+ real questions</description>
<author>nahuel@example.com</author>      <pubDate>Tue, 02 Dec 2025 02:17:45 GMT</pubDate>
    </item>    <item>
      <title>2.0.0</title>
      <link>https://pypi.org/project/llm-benchmark-toolkit/2.0.0/</link>
      <description>Comprehensive evaluation framework for Large Language Models with academic statistical rigor</description>
<author>nahuel@example.com</author>      <pubDate>Mon, 01 Dec 2025 20:39:03 GMT</pubDate>
    </item>    <item>
      <title>0.4.1</title>
      <link>https://pypi.org/project/llm-benchmark-toolkit/0.4.1/</link>
      <description>Comprehensive evaluation framework for Large Language Models</description>
<author>nahuel@example.com</author>      <pubDate>Mon, 01 Dec 2025 06:50:35 GMT</pubDate>
    </item>    <item>
      <title>0.4.0</title>
      <link>https://pypi.org/project/llm-benchmark-toolkit/0.4.0/</link>
      <description>Comprehensive evaluation framework for Large Language Models</description>
<author>nahuel@example.com</author>      <pubDate>Mon, 01 Dec 2025 05:11:20 GMT</pubDate>
    </item>    <item>
      <title>0.3.2</title>
      <link>https://pypi.org/project/llm-benchmark-toolkit/0.3.2/</link>
      <description>Comprehensive evaluation framework for Large Language Models</description>
<author>nahuel@example.com</author>      <pubDate>Mon, 01 Dec 2025 00:35:50 GMT</pubDate>
    </item>    <item>
      <title>0.3.1</title>
      <link>https://pypi.org/project/llm-benchmark-toolkit/0.3.1/</link>
      <description>Comprehensive evaluation framework for Large Language Models</description>
<author>nahuel@example.com</author>      <pubDate>Sun, 30 Nov 2025 06:42:03 GMT</pubDate>
    </item>    <item>
      <title>0.3.0</title>
      <link>https://pypi.org/project/llm-benchmark-toolkit/0.3.0/</link>
      <description>Comprehensive evaluation framework for Large Language Models</description>
<author>nahuel@example.com</author>      <pubDate>Sun, 30 Nov 2025 06:22:36 GMT</pubDate>
    </item>  </channel>
</rss>