Skip to content

Commit

Permalink
Merge pull request #132 from VishwamAI/feature/generative_ai_updates
Browse files Browse the repository at this point in the history
Add generative AI model and evaluation scripts
  • Loading branch information
kasinadhsarma authored Oct 1, 2024
2 parents d5ad6d1 + 033e441 commit a687891
Show file tree
Hide file tree
Showing 2 changed files with 184 additions and 0 deletions.
43 changes: 43 additions & 0 deletions NeuroFlex/Transformers/evaluate_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import torch
from generative_ai_model import T5Model, DevInAI, ChainOfThought, MultiPromptHandler

def evaluate_model(devin_ai, chain_of_thought, multi_prompt_handler):
print("Evaluating Generative AI Text-to-Text Model")
print("===========================================")

# Test basic text generation
prompt = "Translate the following English text to French: 'Hello, how are you?'"
response = devin_ai.generate_multiple_texts([prompt])[0]
print(f"\nBasic Text Generation:")
print(f"Prompt: {prompt}")
print(f"Response: {response}")

# Test chain-of-thought reasoning
initial_prompt = "Explain the process of photosynthesis"
reasoning_steps = chain_of_thought.reason_through_steps(initial_prompt, num_steps=3)
print("\nChain-of-Thought Reasoning:")
for i, step in enumerate(reasoning_steps):
print(f"Step {i}: {step}")

# Test multi-prompt development
prompts = [
"What is the capital of France?",
"Explain quantum physics in simple terms.",
"Describe the water cycle."
]
multi_prompt_response = multi_prompt_handler.generate_with_multi_prompt(prompts)
print("\nMulti-Prompt Development:")
print(f"Prompts: {prompts}")
print(f"Combined Response: {multi_prompt_response}")

if __name__ == "__main__":
# Initialize model and components
model = T5Model()
devin_ai = DevInAI(model)
chain_of_thought = ChainOfThought(devin_ai)
multi_prompt_handler = MultiPromptHandler(devin_ai)

# Evaluate the model
evaluate_model(devin_ai, chain_of_thought, multi_prompt_handler)

print("\nEvaluation complete. Please review the outputs to assess the model's performance.")
141 changes: 141 additions & 0 deletions NeuroFlex/Transformers/generative_ai_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import torch
from torch import nn
from transformers import T5Tokenizer, T5ForConditionalGeneration

class T5Model:
def __init__(self, model_name="t5-base"):
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.model = T5ForConditionalGeneration.from_pretrained(model_name).to(self.device)
self.tokenizer = T5Tokenizer.from_pretrained(model_name)

def generate_text(self, input_text, max_length=100):
input_ids = self.tokenizer.encode(input_text, return_tensors="pt").to(self.device)
output_ids = self.model.generate(input_ids, max_length=max_length)
return self.tokenizer.decode(output_ids[0], skip_special_tokens=True)

class DevInAI:
def __init__(self, model):
self.model = model

def generate_multiple_texts(self, input_texts):
outputs = []
for input_text in input_texts:
outputs.append(self.model.generate_text(input_text))
return outputs

class ChainOfThought:
def __init__(self, devin_ai):
self.devin_ai = devin_ai

def reason_through_steps(self, initial_prompt, num_steps):
reasoning_steps = [initial_prompt]
context = ""
for step in range(num_steps):
prompt = f"Context: {context}\nGiven the previous step: '{reasoning_steps[-1]}', provide the next logical step in explaining this concept. Be specific, detailed, and ensure continuity with previous steps. Focus on explaining one aspect of the concept in depth."
next_step = self.devin_ai.generate_multiple_texts([prompt])[0]
reasoning_steps.append(next_step)
context += f"Step {step + 1}: {next_step}\n"
return reasoning_steps

class MultiPromptHandler:
def __init__(self, devin_ai):
self.devin_ai = devin_ai

def generate_with_multi_prompt(self, prompts):
responses = self.devin_ai.generate_multiple_texts(prompts)
combined_prompt = "Synthesize the following information into a coherent and detailed response. Ensure logical flow and connections between ideas:\n"
for i, response in enumerate(responses):
combined_prompt += f"{i+1}. {response}\n"
combined_prompt += "Provide a comprehensive answer that addresses all points while maintaining a clear narrative structure. Use transitional phrases to connect ideas."
final_response = self.devin_ai.generate_multiple_texts([combined_prompt])[0]
return final_response

def integrate_huggingface_dataset(dataset_name, tokenizer, max_length=512):
from datasets import load_dataset
dataset = load_dataset(dataset_name)

def preprocess_function(examples):
return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=max_length)

tokenized_dataset = dataset.map(preprocess_function, batched=True, remove_columns=dataset["train"].column_names)
return tokenized_dataset

# Instructions for training the model on Hugging Face



# Instructions for training the model on Hugging Face

def prepare_huggingface_dataset(dataset_name, tokenizer, max_length=512):
from datasets import load_dataset
dataset = load_dataset(dataset_name)

def tokenize_function(examples):
return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=max_length)

tokenized_dataset = dataset.map(tokenize_function, batched=True)
return tokenized_dataset

def initialize_model():
return T5Model()

# Example usage
if __name__ == "__main__":
model = initialize_model()
devin_ai = DevInAI(model)
chain_of_thought = ChainOfThought(devin_ai)
multi_prompt_handler = MultiPromptHandler(devin_ai)

# Test chain-of-thought reasoning
initial_prompt = "Explain the process of photosynthesis"
reasoning_steps = chain_of_thought.reason_through_steps(initial_prompt, num_steps=3)
print("Chain-of-Thought Reasoning:")
for i, step in enumerate(reasoning_steps):
print(f"Step {i}: {step}")

# Test multi-prompt development
prompts = [
"What is the capital of France?",
"Explain quantum physics in simple terms.",
"Describe the water cycle."
]
multi_prompt_response = multi_prompt_handler.generate_with_multi_prompt(prompts)
print("\nMulti-Prompt Response:")
print(multi_prompt_response)

# Instructions for integrating Hugging Face datasets and training the model
def load_dataset(dataset_name):
from datasets import load_dataset
return load_dataset(dataset_name)

def prepare_data(dataset, tokenizer, max_length=512):
def tokenize_function(examples):
return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=max_length)
return dataset.map(tokenize_function, batched=True)

def train_model(model, dataset, tokenizer, num_epochs=3, batch_size=8):
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
output_dir="./results",
num_train_epochs=num_epochs,
per_device_train_batch_size=batch_size,
save_steps=10_000,
save_total_limit=2,
)

trainer = Trainer(
model=model,
args=training_args,
train_dataset=dataset["train"],
tokenizer=tokenizer,
)

trainer.train()

# Example usage for training
if __name__ == "__main__":
model = initialize_model()
dataset = load_dataset("your_dataset_name")
tokenized_dataset = prepare_data(dataset, model.tokenizer)
train_model(model.model, tokenized_dataset, model.tokenizer)

0 comments on commit a687891

Please sign in to comment.