Skip to content

Commit

Permalink
Add web scrapping tool
Browse files Browse the repository at this point in the history
  • Loading branch information
Shulyaka committed Dec 11, 2024
1 parent 2b65273 commit 959aaa2
Show file tree
Hide file tree
Showing 11 changed files with 159 additions and 124 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ This integration provides:
4. Selectively Enable/Disable any tool
5. Extra LLM tools:
* Web, maps, and news search with Duck Duck Go
* Web scrapping to access the Internet
* Permanent memory tool
* Python code execution

Expand Down Expand Up @@ -69,11 +70,12 @@ There are two options:

* Extend the `custom_components.powerllm.PowerLLMTool` class to implement the functionality, then call `custom_components.powerllm.async_register_tool` to register the object of the class. See the [memory tool](https://github.com/Shulyaka/powerllm/blob/master/custom_components/powerllm/tools/memory.py) for an example

* Use the `custom_components.powerllm.llm_tool` decorator for any python function. The function is recommended to have annotations for all parameters. If a parameter name is "hass", "llm_context", or any of the `homeassistant.helpers.llm.LLMContext` attributes, then the value for that parameter will be provided by the conversation agent 'pytest-style'. All other arguments will be provided by the LLM. Refer to the [python code tool](https://github.com/Shulyaka/powerllm/blob/master/custom_components/powerllm/tools/python_code.py) as an example.
* Use the `custom_components.powerllm.llm_tool` decorator for any python function. The function is recommended to have type annotations for all parameters. If a parameter name is "hass", "llm_context", or any of the `homeassistant.helpers.llm.LLMContext` attributes, then the value for that parameter will be provided by the conversation agent ("pytest-style"). All other arguments will be provided by the LLM. Refer to the [python code tool](https://github.com/Shulyaka/powerllm/blob/master/custom_components/powerllm/tools/python_code.py) as an example.

The tools in this repository use various techniques for demonstration.

## TODO

* Weather forecast intent
* Web scrapping using trafilatura
* Ability to talk to other conversation agents (i.e. "Ask expert" for a reasoning model, or NLP conversation (Assist) for device control fallback)
* Your suggestions!
2 changes: 2 additions & 0 deletions custom_components/powerllm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
deferred_register_tools,
llm_tool as llm_tool,
)
from .tools.web_scrape import setup as setup_web_scrape_tool

_LOGGER = logging.getLogger(__name__)

Expand All @@ -35,5 +36,6 @@ async def async_setup(hass: HomeAssistant, config: ConfigType) -> bool:
hass.http.register_view(LLMToolsListView)
hass.http.register_view(LLMToolView)
deferred_register_tools(hass)
setup_web_scrape_tool(hass)

return True
2 changes: 1 addition & 1 deletion custom_components/powerllm/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@
"integration_type": "service",
"iot_class": "local_push",
"issue_tracker": "https://github.com/Shulyaka/powerllm/issues",
"requirements": ["duckduckgo-search==6.2.9", "RestrictedPython>=7.4"],
"requirements": ["duckduckgo-search==6.2.9", "RestrictedPython>=7.4", "trafilatura==2.0.0"],
"version": "0.0.1"
}
35 changes: 35 additions & 0 deletions custom_components/powerllm/tools/web_scrape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""Web scraper tool."""

import json
import logging

import trafilatura
from homeassistant.core import HomeAssistant

from ..llm_tools import llm_tool

_LOGGER = logging.getLogger(__name__)


def setup(hass: HomeAssistant):
"""Register the tool on integration startup."""

@llm_tool(hass)
def web_scrape(url: str):
"""Get latest content of a web page."""
downloaded = trafilatura.fetch_url("linux.org.ru")

parsed = trafilatura.extract(
downloaded,
output_format="json",
include_links=True,
deduplicate=True,
favor_precision=True,
)

result = json.loads(parsed)

if "comments" in result and not result["comments"]:
del result["comments"]

return result
1 change: 1 addition & 0 deletions requirements_test_all.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ ha-ffmpeg
pymicro-vad
duckduckgo-search
RestrictedPython
trafilatura
35 changes: 34 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from unittest.mock import patch

import pytest
from homeassistant.core import HomeAssistant
from homeassistant.core import Context, HomeAssistant
from homeassistant.helpers import llm
from homeassistant.setup import async_setup_component
from pytest_homeassistant_custom_component.common import MockConfigEntry

Expand Down Expand Up @@ -60,3 +61,35 @@ async def setup_ha(hass: HomeAssistant) -> None:
assert await async_setup_component(hass, "assist_pipeline", {})
assert await async_setup_component(hass, "intent", {})
assert await async_setup_component(hass, "script", {})


@pytest.fixture
def llm_context() -> llm.LLMContext:
"""Return tool input context."""
return llm.LLMContext(
platform="test_platform",
context=Context(user_id="12345"),
user_prompt=None,
language=None,
assistant=None,
device_id=None,
)


@pytest.fixture
async def async_call_tool(
hass: HomeAssistant, llm_context: llm.LLMContext, mock_init_component
):
"""Get the tool call function."""

api = await llm.async_get_api(hass, "powerllm", llm_context)

async def _call_tool(name: str, **kwargs):
tool_input = llm.ToolInput(
tool_name=name,
tool_args=kwargs,
)

return await api.async_call_tool(tool_input)

return _call_tool
23 changes: 5 additions & 18 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from decimal import Decimal
from unittest.mock import patch

import pytest
import voluptuous as vol
from homeassistant.components.homeassistant.exposed_entities import async_expose_entity
from homeassistant.components.intent import async_register_timer_handler
Expand All @@ -23,19 +22,6 @@
from custom_components.powerllm.const import CONF_PROMPT_ENTITIES


@pytest.fixture
def llm_context() -> llm.LLMContext:
"""Return tool input context."""
return llm.LLMContext(
platform="test_platform",
context=Context(user_id="12345"),
user_prompt=None,
language=None,
assistant=None,
device_id=None,
)


def test_test(hass):
"""Workaround for https://github.com/MatthewFlamm/pytest-homeassistant-custom-component/discussions/160."""

Expand Down Expand Up @@ -84,19 +70,19 @@ class MyIntentHandler(intent.IntentHandler):

assert len(llm.async_get_apis(hass)) == 2
api = await llm.async_get_api(hass, "powerllm", llm_context)
assert len(api.tools) == 10
assert len(api.tools) == 11

# Match all
intent_handler.platforms = None

api = await llm.async_get_api(hass, "powerllm", llm_context)
assert len(api.tools) == 11
assert len(api.tools) == 12

# Match specific domain
intent_handler.platforms = {"light"}

api = await llm.async_get_api(hass, "powerllm", llm_context)
assert len(api.tools) == 11
assert len(api.tools) == 12
tool = api.tools[4]
assert tool.name == "test_intent"
assert tool.description == "Execute Home Assistant test_intent intent"
Expand Down Expand Up @@ -311,6 +297,7 @@ class MyIntentHandler(intent.IntentHandler):
"maps_search",
"memory",
"python_code_execute",
"web_scrape",
]


Expand All @@ -327,7 +314,7 @@ class MyIntentHandler(intent.IntentHandler):

assert len(llm.async_get_apis(hass)) == 2
api = await llm.async_get_api(hass, "powerllm", llm_context)
assert len(api.tools) == 12
assert len(api.tools) == 13
tool = api.tools[5]
assert tool.name == "test_intent"
assert tool.description == "my intent handler"
Expand Down
14 changes: 0 additions & 14 deletions tests/test_llm_tools.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Test powerllm config flow."""

import pytest
import voluptuous as vol
from homeassistant.const import ATTR_FRIENDLY_NAME
from homeassistant.core import Context, HomeAssistant, State
Expand All @@ -14,19 +13,6 @@
from custom_components.powerllm import llm_tools


@pytest.fixture
def llm_context() -> llm.LLMContext:
"""Return tool input context."""
return llm.LLMContext(
platform="test_platform",
context=Context(),
user_prompt=None,
language=None,
assistant=None,
device_id=None,
)


def test_test(hass):
"""Workaround for https://github.com/MatthewFlamm/pytest-homeassistant-custom-component/discussions/160."""

Expand Down
88 changes: 0 additions & 88 deletions tests/test_tool_python_code.py

This file was deleted.

47 changes: 47 additions & 0 deletions tests/tools/test_python_code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""Test python script tool."""


def test_test(hass):
"""Workaround for https://github.com/MatthewFlamm/pytest-homeassistant-custom-component/discussions/160."""


async def test_python_script_tool(async_call_tool) -> None:
"""Test python script tool."""

source = """
output["test"] = "passed"
output["test2"] = "passed2"
"""

response = await async_call_tool("python_code_execute", source=source)

assert response == {"output": {"test": "passed", "test2": "passed2"}}


async def test_python_script_tool_import(async_call_tool) -> None:
"""Test python script tool with import."""

source = """
import math
output["test"] = math.cos(0)
"""

response = await async_call_tool("python_code_execute", source=source)

assert response == {"output": {"test": 1.0}}


async def test_python_script_tool_print(async_call_tool) -> None:
"""Test print in python script tool."""

source = """
print("test1")
def test2():
print("test2")
test2()
"""

response = await async_call_tool("python_code_execute", source=source)

assert response == {"printed": "test1\ntest2\n"}
30 changes: 30 additions & 0 deletions tests/tools/test_web_scrape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Test web scrape tool."""

from unittest.mock import patch


def test_test(hass):
"""Workaround for https://github.com/MatthewFlamm/pytest-homeassistant-custom-component/discussions/160."""


async def test_web_scrape_tool(async_call_tool) -> None:
"""Test web scrape tool."""

helloworld = """<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Hello World</title>
</head>
<body>
<h1>Hello, World!</h1>
<p>This is a simple HTML page with a greeting message.</p>
</body>
</html>
"""

with patch("trafilatura.fetch_url", return_value=helloworld):
response = await async_call_tool("web_scrape", url="example.com")

assert response == {"text": "This is a simple HTML page with a greeting message."}

0 comments on commit 959aaa2

Please sign in to comment.