Spaces:

Eurico149
/

SmallAgent-POC

Sleeping

File size: 2,235 Bytes

7de7078
7e54f4f
6fa006d
 
7de7078
54e6e24
bb8cd70
7e54f4f
7de7078
 
9e65f78
 
7de7078
 
 
 
88ce0e7
 
 
 
 
 
 
 
 
9e65f78
 
 
 
 
88ce0e7
 
 
 
 
9e65f78
 
 
 
 
 
 
 
7e54f4f
9e65f78
 
 
 
 
 
 
 
 
 
7de7078
 
88ce0e7
7de7078
 
 
7e54f4f
9e65f78
7de7078
88ce0e7
 
 
 
 
 
 
 
 
 
 
 
 
7e54f4f
 
9e65f78
 
 
 
 
7e54f4f

import torch
import gradio as gr
from dataclasses import asdict
from smolagents import CodeAgent, TransformersModel, InferenceClientModel, stream_to_gradio
from transformers import BitsAndBytesConfig
from tools import get_weather, CurrencyConverterTool


model_path = "Qwen/Qwen3-4B-Instruct-2507"

cuda = torch.cuda.is_available()
if cuda:
    print("\nRunning on Local GPU\n")
else:
    print("\nRunning on Hugging Face Ecosystem\n")

def interact_with_agent(
    message,
    history: list[dict[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    hf_token: gr.OAuthToken,
):
    if cuda:
        quantization = BitsAndBytesConfig(load_in_8bit=True)
        model = TransformersModel(
            model_id=model_path,
            max_new_tokens=1024,
            temperature=temperature,
            hf_token=hf_token.token,
            top_p=top_p,
            max_tokens=max_tokens,
            system_message=system_message,
            model_kwargs={
                "quantization_config": quantization
            })
    else:
        model = InferenceClientModel(
            token=hf_token.token,
            model_id=model_path
        )

    agent = CodeAgent(
        tools=[
            get_weather,
            CurrencyConverterTool()
        ],
        model=model,
        max_steps=8,
        verbosity_level=2,
        add_base_tools=True
    )
    messages = []
    yield messages
    for msg in stream_to_gradio(agent, message):
        messages.append(asdict(msg))
        yield messages
    yield messages

chatbot = gr.ChatInterface(
    interact_with_agent,
    type="messages",
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)

with gr.Blocks() as demo:
    with gr.Sidebar():
        gr.LoginButton()
    chatbot.render()


if __name__ == "__main__":
    demo.launch()