From 033e44172be9118c1b71ecfd270501362fea6898 Mon Sep 17 00:00:00 2001 From: "devin-ai-integration[bot]" <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 1 Oct 2024 08:54:13 +0000 Subject: [PATCH] Add generative AI model and evaluation script with chain-of-thought and multi-prompt capabilities --- NeuroFlex/Transformers/evaluate_model.py | 43 ++++++ NeuroFlex/Transformers/generative_ai_model.py | 141 ++++++++++++++++++ 2 files changed, 184 insertions(+) create mode 100644 NeuroFlex/Transformers/evaluate_model.py create mode 100644 NeuroFlex/Transformers/generative_ai_model.py diff --git a/NeuroFlex/Transformers/evaluate_model.py b/NeuroFlex/Transformers/evaluate_model.py new file mode 100644 index 0000000..f3663c6 --- /dev/null +++ b/NeuroFlex/Transformers/evaluate_model.py @@ -0,0 +1,43 @@ +import torch +from generative_ai_model import T5Model, DevInAI, ChainOfThought, MultiPromptHandler + +def evaluate_model(devin_ai, chain_of_thought, multi_prompt_handler): + print("Evaluating Generative AI Text-to-Text Model") + print("===========================================") + + # Test basic text generation + prompt = "Translate the following English text to French: 'Hello, how are you?'" + response = devin_ai.generate_multiple_texts([prompt])[0] + print(f"\nBasic Text Generation:") + print(f"Prompt: {prompt}") + print(f"Response: {response}") + + # Test chain-of-thought reasoning + initial_prompt = "Explain the process of photosynthesis" + reasoning_steps = chain_of_thought.reason_through_steps(initial_prompt, num_steps=3) + print("\nChain-of-Thought Reasoning:") + for i, step in enumerate(reasoning_steps): + print(f"Step {i}: {step}") + + # Test multi-prompt development + prompts = [ + "What is the capital of France?", + "Explain quantum physics in simple terms.", + "Describe the water cycle." + ] + multi_prompt_response = multi_prompt_handler.generate_with_multi_prompt(prompts) + print("\nMulti-Prompt Development:") + print(f"Prompts: {prompts}") + print(f"Combined Response: {multi_prompt_response}") + +if __name__ == "__main__": + # Initialize model and components + model = T5Model() + devin_ai = DevInAI(model) + chain_of_thought = ChainOfThought(devin_ai) + multi_prompt_handler = MultiPromptHandler(devin_ai) + + # Evaluate the model + evaluate_model(devin_ai, chain_of_thought, multi_prompt_handler) + + print("\nEvaluation complete. Please review the outputs to assess the model's performance.") diff --git a/NeuroFlex/Transformers/generative_ai_model.py b/NeuroFlex/Transformers/generative_ai_model.py new file mode 100644 index 0000000..aefd256 --- /dev/null +++ b/NeuroFlex/Transformers/generative_ai_model.py @@ -0,0 +1,141 @@ +import torch +from torch import nn +from transformers import T5Tokenizer, T5ForConditionalGeneration + +class T5Model: + def __init__(self, model_name="t5-base"): + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.model = T5ForConditionalGeneration.from_pretrained(model_name).to(self.device) + self.tokenizer = T5Tokenizer.from_pretrained(model_name) + + def generate_text(self, input_text, max_length=100): + input_ids = self.tokenizer.encode(input_text, return_tensors="pt").to(self.device) + output_ids = self.model.generate(input_ids, max_length=max_length) + return self.tokenizer.decode(output_ids[0], skip_special_tokens=True) + +class DevInAI: + def __init__(self, model): + self.model = model + + def generate_multiple_texts(self, input_texts): + outputs = [] + for input_text in input_texts: + outputs.append(self.model.generate_text(input_text)) + return outputs + +class ChainOfThought: + def __init__(self, devin_ai): + self.devin_ai = devin_ai + + def reason_through_steps(self, initial_prompt, num_steps): + reasoning_steps = [initial_prompt] + context = "" + for step in range(num_steps): + prompt = f"Context: {context}\nGiven the previous step: '{reasoning_steps[-1]}', provide the next logical step in explaining this concept. Be specific, detailed, and ensure continuity with previous steps. Focus on explaining one aspect of the concept in depth." + next_step = self.devin_ai.generate_multiple_texts([prompt])[0] + reasoning_steps.append(next_step) + context += f"Step {step + 1}: {next_step}\n" + return reasoning_steps + +class MultiPromptHandler: + def __init__(self, devin_ai): + self.devin_ai = devin_ai + + def generate_with_multi_prompt(self, prompts): + responses = self.devin_ai.generate_multiple_texts(prompts) + combined_prompt = "Synthesize the following information into a coherent and detailed response. Ensure logical flow and connections between ideas:\n" + for i, response in enumerate(responses): + combined_prompt += f"{i+1}. {response}\n" + combined_prompt += "Provide a comprehensive answer that addresses all points while maintaining a clear narrative structure. Use transitional phrases to connect ideas." + final_response = self.devin_ai.generate_multiple_texts([combined_prompt])[0] + return final_response + +def integrate_huggingface_dataset(dataset_name, tokenizer, max_length=512): + from datasets import load_dataset + dataset = load_dataset(dataset_name) + + def preprocess_function(examples): + return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=max_length) + + tokenized_dataset = dataset.map(preprocess_function, batched=True, remove_columns=dataset["train"].column_names) + return tokenized_dataset + +# Instructions for training the model on Hugging Face + + + +# Instructions for training the model on Hugging Face + +def prepare_huggingface_dataset(dataset_name, tokenizer, max_length=512): + from datasets import load_dataset + dataset = load_dataset(dataset_name) + + def tokenize_function(examples): + return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=max_length) + + tokenized_dataset = dataset.map(tokenize_function, batched=True) + return tokenized_dataset + +def initialize_model(): + return T5Model() + +# Example usage +if __name__ == "__main__": + model = initialize_model() + devin_ai = DevInAI(model) + chain_of_thought = ChainOfThought(devin_ai) + multi_prompt_handler = MultiPromptHandler(devin_ai) + + # Test chain-of-thought reasoning + initial_prompt = "Explain the process of photosynthesis" + reasoning_steps = chain_of_thought.reason_through_steps(initial_prompt, num_steps=3) + print("Chain-of-Thought Reasoning:") + for i, step in enumerate(reasoning_steps): + print(f"Step {i}: {step}") + + # Test multi-prompt development + prompts = [ + "What is the capital of France?", + "Explain quantum physics in simple terms.", + "Describe the water cycle." + ] + multi_prompt_response = multi_prompt_handler.generate_with_multi_prompt(prompts) + print("\nMulti-Prompt Response:") + print(multi_prompt_response) + +# Instructions for integrating Hugging Face datasets and training the model +def load_dataset(dataset_name): + from datasets import load_dataset + return load_dataset(dataset_name) + +def prepare_data(dataset, tokenizer, max_length=512): + def tokenize_function(examples): + return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=max_length) + return dataset.map(tokenize_function, batched=True) + +def train_model(model, dataset, tokenizer, num_epochs=3, batch_size=8): + from transformers import Trainer, TrainingArguments + + training_args = TrainingArguments( + output_dir="./results", + num_train_epochs=num_epochs, + per_device_train_batch_size=batch_size, + save_steps=10_000, + save_total_limit=2, + ) + + trainer = Trainer( + model=model, + args=training_args, + train_dataset=dataset["train"], + tokenizer=tokenizer, + ) + + trainer.train() + +# Example usage for training +if __name__ == "__main__": + model = initialize_model() + dataset = load_dataset("your_dataset_name") + tokenized_dataset = prepare_data(dataset, model.tokenizer) + train_model(model.model, tokenized_dataset, model.tokenizer)