ScrapeGraph

Enable Agno agents to use ScrapeGraph tools that use Graph Logic and LLMs to understand the semantics of a page, making it resilient to layout changes.

from agno.agent import Agent
from agno.models.openai import OpenAIChat
from agno.tools.scrapegraph import ScrapeGraphTools

# ---------------------------------------------------------------------------
# Create Agent
# ---------------------------------------------------------------------------

agent_model = OpenAIChat(id="gpt-4.1")
scrapegraph_smartscraper = ScrapeGraphTools(enable_smartscraper=True)

agent = Agent(
    tools=[scrapegraph_smartscraper], model=agent_model, markdown=True, stream=True
)

# Example 1: Use smartscraper tool

# ---------------------------------------------------------------------------
# Run Agent
# ---------------------------------------------------------------------------
if __name__ == "__main__":
    agent.print_response("""
    Use smartscraper to extract the following from https://www.wired.com/category/science/:
    - News articles
    - Headlines
    - Images
    - Links
    - Author
    """)

    # Example 2: Only markdownify enabled (by setting smartscraper=False)
    # scrapegraph_md = ScrapeGraphTools(enable_smartscraper=False)

    # md_agent = Agent(tools=[scrapegraph_md], model=agent_model, markdown=True)

    # md_agent.print_response(
    #     "Fetch and convert https://www.wired.com/category/science/ to markdown format"
    # )

    # # Example 3: Enable crawl
    # scrapegraph_crawl = ScrapeGraphTools(enable_crawl=True)

    # crawl_agent = Agent(tools=[scrapegraph_crawl], model=agent_model, markdown=True)

    # crawl_agent.print_response(
    #     "Use crawl to extract what the company does and get text content from privacy and terms from https://scrapegraphai.com/ with a suitable schema."
    # )

    # # Example 4: Enable scrape method for raw HTML content
    # scrapegraph_scrape = ScrapeGraphTools(enable_scrape=True, enable_smartscraper=False)

    # scrape_agent = Agent(
    #     tools=[scrapegraph_scrape],
    #     model=agent_model,
    #     markdown=True,
    #     stream=True,
    # )

    # scrape_agent.print_response(
    #     "Use the scrape tool to get the complete raw HTML content from https://en.wikipedia.org/wiki/2025_FIFA_Club_World_Cup"
    # )

    # # Example 5: Enable all ScrapeGraph functions
    # scrapegraph_all = Agent(
    #     tools=[
    #         ScrapeGraphTools(all=True, render_heavy_js=True)
    #     ],  # render_heavy_js=True scrapes all JavaScript
    #     model=agent_model,
    #     markdown=True,
    #     stream=True,
    # )

    # scrapegraph_all.print_response("""
    # Use any appropriate scraping method to extract comprehensive information from https://www.wired.com/category/science/:
    # - News articles and headlines
    # - Convert to markdown if needed
    # - Search for specific information
    # """)

Run the Example

# Clone and setup repo
git clone https://github.com/agno-agi/agno.git
cd agno/cookbook/91_tools

# Create and activate virtual environment
./scripts/demo_setup.sh
source .venvs/demo/bin/activate

python scrapegraph_tools.py

For details, see ScrapeGraph cookbook.

​Run the Example

Run the Example