From 639c366729a7a101e029670c2eed496d055af564 Mon Sep 17 00:00:00 2001 From: Patrick Golden Date: Tue, 5 Nov 2024 09:16:51 -0500 Subject: [PATCH] Add --target-class option to web_extract command --- src/ontogpt/cli.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/ontogpt/cli.py b/src/ontogpt/cli.py index 0700d9719..8735b7e26 100644 --- a/src/ontogpt/cli.py +++ b/src/ontogpt/cli.py @@ -974,6 +974,7 @@ def search_and_extract( @main.command() @template_option +@target_class_option @model_option @recurse_option @output_option_wb @@ -989,6 +990,7 @@ def search_and_extract( def web_extract( model, template, + target_class, url, output, output_format, @@ -1026,8 +1028,14 @@ def web_extract( web_client = SoupClient() text = web_client.text(url) + if target_class: + schemaview = template_details[3] + target_class_def = schemaview.get_class(target_class) + else: + target_class_def = None + logging.debug(f"Input text: {text}") - results = ke.extract_from_text(text=text, show_prompt=show_prompt) + results = ke.extract_from_text(text=text, cls=target_class_def, show_prompt=show_prompt) write_extraction(results, output, output_format, ke, template, cut_input_text)