forked from evidentlyai/evidently
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from pmittaldev/ROUGE-metric-(evidentlyai#1318)
Rouge metric (evidentlyai#1318)
- Loading branch information
Showing
43 changed files
with
2,016 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
from evidently.experimental.dataset_generators.llm.questions import QADatasetFromSeedGenerator, QADatasetGenerator | ||
from evidently.experimental.dataset_generators.llm.index import DataCollectionProvider | ||
from evidently.options.base import Options | ||
|
||
|
||
def generate_from_file(): | ||
file_path = "../cloud_quickstart_tracing.pdf" | ||
data = DataCollectionProvider.from_files(file_path, chunk_size=50, chunk_overlap=20, splitter="simple") | ||
|
||
generator = QADatasetGenerator( | ||
data_collection=data, | ||
provider="openai", | ||
model="gpt-4o-mini", | ||
num_questions=5, | ||
options=Options.from_any_options(None) | ||
) | ||
generated = generator.generate() | ||
for _, a in generated.iterrows(): | ||
print("Q", a["questions"]) | ||
if "answers" in a: | ||
print("A", a["answers"]) | ||
if "context" in a: | ||
print("C", a["context"]) | ||
print() | ||
|
||
|
||
def main(): | ||
data = DataCollectionProvider.from_chunks(chunks=["I am a banana", "My spoon is too big"]) | ||
generator = QADatasetGenerator( | ||
data_collection=data, | ||
provider="openai", | ||
model="gpt-4o-mini", | ||
num_questions=5, | ||
options=Options.from_any_options(None) | ||
) | ||
|
||
generated = generator.generate() | ||
for _, a in generated.iterrows(): | ||
print("Q", a["questions"]) | ||
if "answers" in a: | ||
print("A", a["answers"]) | ||
if "context" in a: | ||
print("C", a["context"]) | ||
print() | ||
|
||
generator = QADatasetFromSeedGenerator( | ||
seed_question="What is 'kek'?", | ||
num_questions=5, | ||
provider="openai", | ||
model="gpt-4o-mini", | ||
options=Options.from_any_options(None) | ||
) | ||
|
||
generated = generator.generate() | ||
for _, a in generated.iterrows(): | ||
print("Q", a["questions"]) | ||
if "answers" in a: | ||
print("A", a["answers"]) | ||
if "context" in a: | ||
print("C", a["context"]) | ||
print() | ||
|
||
|
||
if __name__ == '__main__': | ||
main() | ||
# generate_from_file() |
117 changes: 117 additions & 0 deletions
117
examples/how_to_questions/metrics/data_integrity/dataset_rouge_summary_metric.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Evidently Dataset ROUGE Summary Metric" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"from evidently.report import Report\n", | ||
"from evidently.metrics import ROUGESummaryMetric" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"current_data = {\n", | ||
" \"summary\": [\"hello there\", \"general kenobi\"],\n", | ||
"}\n", | ||
"\n", | ||
"current_df = pd.DataFrame(current_data)\n", | ||
"\n", | ||
"reference_data = {\n", | ||
" \"summary\": [\"hello there\", \"no de\"]\n", | ||
"}\n", | ||
"\n", | ||
"current_df = pd.DataFrame(current_data)\n", | ||
"reference_df = pd.DataFrame(reference_data)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"report = Report(metrics=[\n", | ||
" ROUGESummaryMetric(column_name=\"summary\", rouge_n=2)\n", | ||
"])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"report.run(current_data=current_df, reference_data=reference_df)\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"report.show()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"report.as_dict()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"report.as_dataframe()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.19" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
from typing import Dict | ||
|
||
from evidently.features import json_schema_match_feature | ||
from evidently.features.generated_features import FeatureDescriptor | ||
from evidently.features.generated_features import GeneratedFeature | ||
|
||
|
||
class JSONSchemaMatch(FeatureDescriptor): | ||
class Config: | ||
type_alias = "evidently:descriptor:JSONSchemaMatch" | ||
|
||
expected_schema: Dict[str, type] | ||
validate_types: bool = False | ||
exact_match: bool = False | ||
|
||
def feature(self, column_name: str) -> GeneratedFeature: | ||
return json_schema_match_feature.JSONSchemaMatch( | ||
column_name=column_name, | ||
expected_schema=self.expected_schema, | ||
validate_types=self.validate_types, | ||
exact_match=self.exact_match, | ||
display_name=self.display_name, | ||
) |
Oops, something went wrong.