smolagents documentation

πŸ“š μ—μ΄μ „νŠΈ λ©”λͺ¨λ¦¬ 관리

Hugging Face's logo
Join the Hugging Face community

and get access to the augmented documentation experience

to get started

πŸ“š μ—μ΄μ „νŠΈ λ©”λͺ¨λ¦¬ 관리

κ²°κ΅­ μ—μ΄μ „νŠΈλŠ” 도ꡬ와 ν”„λ‘¬ν”„νŠΈλ‘œ 이루어진 λ‹¨μˆœν•œ κ΅¬μ„±μš”μ†Œλ‘œ μ •μ˜λ©λ‹ˆλ‹€. 그리고 무엇보닀 μ€‘μš”ν•œ 것은 μ—μ΄μ „νŠΈκ°€ κ³Όκ±° λ‹¨κ³„μ˜ λ©”λͺ¨λ¦¬λ₯Ό κ°€μ§€κ³  μžˆμ–΄ κ³„νš, μ‹€ν–‰, 였λ₯˜μ˜ 이λ ₯을 μΆ”μ ν•œλ‹€λŠ” μ μž…λ‹ˆλ‹€.

μ—μ΄μ „νŠΈ λ©”λͺ¨λ¦¬ μž¬μƒ

κ³Όκ±° μ‹€ν–‰λœ μ—μ΄μ „νŠΈλ₯Ό ν™•μΈν•˜κΈ° μœ„ν•œ λͺ‡ κ°€μ§€ κΈ°λŠ₯을 μ œκ³΅ν•©λ‹ˆλ‹€.

계츑 κ°€μ΄λ“œμ—μ„œ μ–ΈκΈ‰ν•œ 바와 같이, μ—μ΄μ „νŠΈ 싀행을 κ³„μΈ‘ν•˜μ—¬ νŠΉμ • 단계λ₯Ό ν™•λŒ€ν•˜κ±°λ‚˜ μΆ•μ†Œν•  수 μžˆλŠ” μš°μˆ˜ν•œ UI둜 μ‹œκ°ν™”ν•  수 μžˆμŠ΅λ‹ˆλ‹€.

λ˜ν•œ λ‹€μŒκ³Ό 같이 agent.replay()λ₯Ό μ‚¬μš©ν•  μˆ˜λ„ μžˆμŠ΅λ‹ˆλ‹€.

μ—μ΄μ „νŠΈλ₯Ό μ‹€ν–‰ν•œ ν›„,

from smolagents import InferenceClientModel, CodeAgent

agent = CodeAgent(tools=[], model=InferenceClientModel(), verbosity_level=0)

result = agent.run("What's the 20th Fibonacci number?")

이 λ§ˆμ§€λ§‰ 싀행을 λ‹€μ‹œ μž¬μƒν•˜κ³  μ‹Άλ‹€λ©΄, λ‹€μŒ μ½”λ“œλ₯Ό μ‚¬μš©ν•˜λ©΄ λ©λ‹ˆλ‹€.

agent.replay()

μ—μ΄μ „νŠΈ λ©”λͺ¨λ¦¬ 동적 λ³€κ²½

λ§Žμ€ κ³ κΈ‰ μ‚¬μš© μ‚¬λ‘€μ—μ„œλŠ” μ—μ΄μ „νŠΈμ˜ λ©”λͺ¨λ¦¬λ₯Ό λ™μ μœΌλ‘œ μˆ˜μ •ν•΄μ•Ό ν•©λ‹ˆλ‹€.

μ—μ΄μ „νŠΈμ˜ λ©”λͺ¨λ¦¬λŠ” λ‹€μŒκ³Ό 같이 μ ‘κ·Όν•  수 μžˆμŠ΅λ‹ˆλ‹€.

from smolagents import ActionStep

system_prompt_step = agent.memory.system_prompt
print("The system prompt given to the agent was:")
print(system_prompt_step.system_prompt)

task_step = agent.memory.steps[0]
print("\n\nThe first task step was:")
print(task_step.task)

for step in agent.memory.steps:
    if isinstance(step, ActionStep):
        if step.error is not None:
            print(f"\nStep {step.step_number} got this error:\n{step.error}\n")
        else:
            print(f"\nStep {step.step_number} got these observations:\n{step.observations}\n")

agent.memory.get_full_steps()λ₯Ό μ‚¬μš©ν•˜μ—¬ 전체 단계λ₯Ό λ”•μ…”λ„ˆλ¦¬ ν˜•νƒœλ‘œ κ°€μ Έμ˜¬ 수 μžˆμŠ΅λ‹ˆλ‹€.

λ˜ν•œ 단계 μ½œλ°±μ„ μ‚¬μš©ν•˜μ—¬ μ—μ΄μ „νŠΈμ˜ λ©”λͺ¨λ¦¬λ₯Ό λ™μ μœΌλ‘œ λ³€κ²½ν•  μˆ˜λ„ μžˆμŠ΅λ‹ˆλ‹€.

단계 μ½œλ°±μ€ 인자둜 agent 객체 μžμ²΄μ— μ ‘κ·Όν•  수 μžˆμœΌλ―€λ‘œ, μœ„μ—μ„œ μ„€λͺ…ν•œ κ²ƒμ²˜λŸΌ λͺ¨λ“  λ©”λͺ¨λ¦¬ 단계에 μ ‘κ·Όν•˜μ—¬ ν•„μš”ν•œ 경우 μˆ˜μ •ν•  수 μžˆμŠ΅λ‹ˆλ‹€. 예λ₯Ό λ“€μ–΄, μ›Ή λΈŒλΌμš°μ € μ—μ΄μ „νŠΈκ°€ μˆ˜ν–‰ν•˜λŠ” 각 λ‹¨κ³„μ˜ μŠ€ν¬λ¦°μƒ·μ„ κ΄€μ°°ν•˜κ³  μžˆλ‹€κ³  κ°€μ •ν•΄ λ³΄κ² μŠ΅λ‹ˆλ‹€. 이 경우 μ΅œμ‹  μŠ€ν¬λ¦°μƒ·μ€ μœ μ§€ν•˜λ©΄μ„œ 토큰 λΉ„μš©μ„ μ ˆμ•½ν•˜κΈ° μœ„ν•΄ 이전 λ‹¨κ³„μ˜ 이미지λ₯Ό λ©”λͺ¨λ¦¬μ—μ„œ μ œκ±°ν•  수 μžˆμŠ΅λ‹ˆλ‹€.

이 경우 λ‹€μŒκ³Ό 같은 μ½”λ“œλ₯Ό μ‚¬μš©ν•  수 μžˆμŠ΅λ‹ˆλ‹€. 주의: 이 μ½”λ“œλŠ” 간결함을 μœ„ν•΄ 일뢀 μž„ν¬νŠΈ 및 객체 μ •μ˜κ°€ μƒλž΅λœ λΆˆμ™„μ „ν•œ μ˜ˆμ‹œμž…λ‹ˆλ‹€. 전체 μž‘λ™ λ²„μ „μ˜ μ½”λ“œλŠ” 원본 μŠ€ν¬λ¦½νŠΈμ—μ„œ ν™•μΈν•˜μ„Έμš”.

import helium
from PIL import Image
from io import BytesIO
from time import sleep

def update_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None:
    sleep(1.0)  # JavaScript μ• λ‹ˆλ©”μ΄μ…˜μ΄ μ™„λ£Œλœ 후에 μŠ€ν¬λ¦°μƒ·μ„ 찍도둝 ν•©λ‹ˆλ‹€.
    driver = helium.get_driver()
    latest_step = memory_step.step_number
    for previous_memory_step in agent.memory.steps:  # 이전 μŠ€ν¬λ¦°μƒ·μ„ λ‘œκ·Έμ—μ„œ μ œκ±°ν•˜μ—¬ 처리 과정을 κ°„μ†Œν™”ν•©λ‹ˆλ‹€.
        if isinstance(previous_memory_step, ActionStep) and previous_memory_step.step_number <= latest_step - 2:
            previous_memory_step.observations_images = None
    png_bytes = driver.get_screenshot_as_png()
    image = Image.open(BytesIO(png_bytes))
    memory_step.observations_images = [image.copy()]

κ·Έ λ‹€μŒ μ—μ΄μ „νŠΈλ₯Ό μ΄ˆκΈ°ν™”ν•  λ•Œ 이 ν•¨μˆ˜λ₯Ό λ‹€μŒκ³Ό 같이 step_callbacks μΈμˆ˜μ— 전달해야 ν•©λ‹ˆλ‹€.

CodeAgent(
    tools=[WebSearchTool(), go_back, close_popups, search_item_ctrl_f],
    model=model,
    additional_authorized_imports=["helium"],
    step_callbacks=[update_screenshot],
    max_steps=20,
    verbosity_level=2,
)

전체 μž‘λ™ μ˜ˆμ‹œλŠ” λΉ„μ „ μ›Ή λΈŒλΌμš°μ € μ½”λ“œμ—μ„œ 확인할 수 μžˆμŠ΅λ‹ˆλ‹€.

μ—μ΄μ „νŠΈλ₯Ό λ‹¨κ³„λ³„λ‘œ μ‹€ν–‰

이 κΈ°λŠ₯은 도ꡬ ν˜ΈμΆœμ— 였랜 μ‹œκ°„μ΄ κ±Έλ¦¬λŠ” κ²½μš°μ— μœ μš©ν•©λ‹ˆλ‹€. μ—μ΄μ „νŠΈλ₯Ό ν•œ 단계씩 μ‹€ν–‰ν•˜λ©΄μ„œ 각 λ‹¨κ³„μ—μ„œ λ©”λͺ¨λ¦¬λ₯Ό μ—…λ°μ΄νŠΈν•  수 μžˆμŠ΅λ‹ˆλ‹€.

from smolagents import InferenceClientModel, CodeAgent, ActionStep, TaskStep

agent = CodeAgent(tools=[], model=InferenceClientModel(), verbosity_level=1)
agent.python_executor.send_tools({**agent.tools})
print(agent.memory.system_prompt)

task = "What is the 20th Fibonacci number?"

# ν•„μš”μ— 따라 λ‹€λ₯Έ μ—μ΄μ „νŠΈμ˜ λ©”λͺ¨λ¦¬λ₯Ό λΆˆλŸ¬μ™€ λ©”λͺ¨λ¦¬λ₯Ό μˆ˜μ •ν•  수 μžˆμŠ΅λ‹ˆλ‹€.
# agent.memory.steps = previous_agent.memory.steps

# μƒˆλ‘œμš΄ μž‘μ—…μ„ μ‹œμž‘ν•©λ‹ˆλ‹€!
agent.memory.steps.append(TaskStep(task=task, task_images=[]))

final_answer = None
step_number = 1
while final_answer is None and step_number <= 10:
    memory_step = ActionStep(
        step_number=step_number,
        observations_images=[],
    )
    # ν•œ 단계λ₯Ό μ‹€ν–‰ν•©λ‹ˆλ‹€.
    final_answer = agent.step(memory_step)
    agent.memory.steps.append(memory_step)
    step_number += 1

    # ν•„μš”ν•œ 경우 λ©”λͺ¨λ¦¬λ₯Ό μˆ˜μ •ν•  μˆ˜λ„ μžˆμŠ΅λ‹ˆλ‹€
    # 예λ₯Ό λ“€μ–΄ μ΅œμ‹  단계λ₯Ό μ—…λ°μ΄νŠΈ ν•˜λ €λ©΄ λ‹€μŒκ³Ό 같이 μ²˜λ¦¬ν•©λ‹ˆλ‹€:
    # agent.memory.steps[-1] = ...

print("The final answer is:", final_answer)
Update on GitHub