Skip to content

Commit

Permalink
Remove hardcoded values and references to gpt-3.5-turbo models (#458)
Browse files Browse the repository at this point in the history
This includes updates to the docs, the tests, the CLI, and two engines.
  • Loading branch information
caufieldjh authored Sep 23, 2024
2 parents 8a66db6 + 16fd9dc commit 4d4d71e
Show file tree
Hide file tree
Showing 10 changed files with 25 additions and 34 deletions.
2 changes: 1 addition & 1 deletion docs/custom.md
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ The output of this is then passed through further SPIRES iterations.

#### Text length limit

LLMs have context sizes limiting the combined length of their inputs and outputs. The `gpt-3.5-turbo` model, for example, has a 4,096 token limit (prompt + completion), while the `gpt-3.5-turbo-16k` model has a larger context of 16,384 tokens.
LLMs have context sizes limiting the combined length of their inputs and outputs.

To see the token limit for each model, use `ontogpt list-models`. The Max Tokens value will be in the fourth column.

Expand Down
8 changes: 4 additions & 4 deletions docs/functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,7 @@ Options:
Examples:
```bash
ontogpt pubmed-annotate -t phenotype "Takotsubo Cardiomyopathy: A Brief Review" --get-pmc --model gpt-3.5-turbo-16k --limit 3
ontogpt pubmed-annotate -t phenotype "Takotsubo Cardiomyopathy: A Brief Review" --get-pmc --model gpt-4o --limit 3
```
```bash
Expand Down Expand Up @@ -770,7 +770,7 @@ Even relatively short pages may exceed a model's context size, so larger context
Example:
```bash
ontogpt web-extract -t reaction.Reaction -m gpt-3.5-turbo-16k https://www.scienceofcooking.com/maillard_reaction.htm
ontogpt web-extract -t reaction.Reaction -m gpt-4o https://www.scienceofcooking.com/maillard_reaction.htm
```
### wikipedia-extract
Expand All @@ -784,7 +784,7 @@ Even relatively short pages may exceed a model's context size, so larger context
Example:
```bash
ontogpt wikipedia-extract -t mendelian_disease.MendelianDisease -m gpt-3.5-turbo-16k "Cartilage–hair hypoplasia"
ontogpt wikipedia-extract -t mendelian_disease.MendelianDisease -m gpt-4o "Cartilage–hair hypoplasia"
```
### wikipedia-search
Expand All @@ -798,5 +798,5 @@ Even relatively short pages may exceed a model's context size, so larger context
Example:
```bash
ontogpt wikipedia-search -t biological_process -m gpt-3.5-turbo-16k "digestion"
ontogpt wikipedia-search -t biological_process -m gpt-4o "digestion"
```
3 changes: 1 addition & 2 deletions src/ontogpt/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -714,7 +714,7 @@ def pubmed_annotate(
Example:
ontogpt pubmed-annotate -t phenotype "Takotsubo Cardiomyopathy: A Brief Review"
--get-pmc --model gpt-3.5-turbo-16k --limit 3
--get-pmc --model gpt-4o --limit 3
"""
if not model:
model = DEFAULT_MODEL
Expand Down Expand Up @@ -1174,7 +1174,6 @@ def convert(
@model_option
@output_option_txt
@temperature_option
@cut_input_text_option
@api_base_option
@api_version_option
@model_provider_option
Expand Down
1 change: 0 additions & 1 deletion src/ontogpt/engines/halo_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ def text(self) -> str:
class HALOEngine(KnowledgeEngine):
"""Engine for Hallucinating Latent Ontologies."""

engine: str = "gpt-3.5-turbo"
ontology: Ontology = None
traverse_slots: List[FIELD] = field(
default_factory=lambda: ["subtypes", "parts", "subclass_of", "part_of"]
Expand Down
4 changes: 0 additions & 4 deletions src/ontogpt/engines/spires_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,10 +286,6 @@ def map_terms(
"""
Map the given terms to the given ontology.
EXPERIMENTAL
currently GPT-3 does not do so well with this task.
:param terms:
:param ontology:
:return:
Expand Down
2 changes: 0 additions & 2 deletions src/ontogpt/engines/synonym_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
class SynonymEngine(KnowledgeEngine):
"""Engine for generating synonyms."""

engine: str = "gpt-3.5-turbo-instruct"

def synonyms(self, named_entity: str, domain: str) -> List[str]:
"""Get synonyms for a given text."""
prompt = f"List the example formal scientific\
Expand Down
1 change: 0 additions & 1 deletion src/ontogpt/webapp/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
"gpt-4o",
"gpt-4",
"gpt-4-turbo",
"gpt-3.5-turbo",
"ollama/llama2",
"ollama/llama3",
"ollama/orca-mini",
Expand Down
30 changes: 15 additions & 15 deletions tests/input/prompts/prompts.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
default_engine: gpt-3.5-turbo
default_engine: gpt-4o
prompts:
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
From the text below, extract the following entities in the following format:
Expand All @@ -15,7 +15,7 @@ prompts:
expected:
- "predicate: INDUCES"
- "object: .*nystagmus"
- engine: gpt-3.5-turbo
- engine: gpt-4o
pre_prompt: |
## Instructions:
## Add an additional element to the YAML below, which is for elements
Expand Down Expand Up @@ -99,31 +99,31 @@ prompts:
equivalent_to: Transport and utilizes some Train
- name: CarEngine
context: IndustrialOntology
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
Answer the following question where the answer is one of: subtype of; part of; equivalent to.
Question: The relationship between the concepts Car and Vehicle is:
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
Answer the following question where the answer is one of: subtype of; supertype of; part of; has part; equivalent to.
Question: The relationship between the concepts Car and Vehicle is:
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
Answer the following question where the answer is one of: subtype of; supertype of; part of; has part; equivalent to.
Question: The relationship between the concepts Tire and Bicycle is:
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
In the context of cell biology, list all the parts of a nucleus as a semi-colon separated list.
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
In the context of cell biology, list all the things a nucleus is a part of, as a semi-colon separated list.
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
In the context of cell biology, list all parent concepts for the concept of nucleus, as a semi-colon separated list.
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
In the context of cell biology, list all child concepts for the concept of nucleus, as a semi-colon separated list.
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
From the text below, extract the following entities in the following format:
triples: <A semi-colon separated list of chemical to disease relationships, where the relationship is either INDUCES or TREATS.
Expand All @@ -134,7 +134,7 @@ prompts:
In vivo evidences suggesting the role of oxidative stress in pathogenesis of vancomycin-induced nephrotoxicity:
protection by erdosteine
In the context of cell biology, list all child concepts for the concept of nucleus, as a semi-colon separated list.
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
From the text below, extract the following entities in the following format:
Expand All @@ -145,7 +145,7 @@ prompts:
===
#expected:
# - "Veralipride INDUCES Parkinsonism"
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
From the text below, extract the following entities in the following format:
Expand All @@ -154,7 +154,7 @@ prompts:
Text:
Kaliuretic effect of L-dopa treatment in parkinsonian patients.
===
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
From the text below, extract the following entities in the following format:
Expand All @@ -163,7 +163,7 @@ prompts:
Text:
Electrocardiographic evidence of myocardial injury in psychiatrically hospitalized cocaine abusers.
===
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
Generate a list of exact synonyms for the chemical concept "heavy metal", as a semi-colon separated list.
Only include concepts with the identical meaning. Do not include more specific concepts or broader concepts.
Expand Down
6 changes: 3 additions & 3 deletions tests/integration/test_clients/test_llmclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def setUp(self) -> None:
def test_all_prompts(self):
"""Test all prompts."""
prompt_doc = yaml.safe_load(open(PROMPTS_FILE))
default_engine = prompt_doc.get("default_engine", "gpt-3.5-turbo-instruct")
default_engine = prompt_doc.get("default_engine", "gpt-4o")
for prompt in prompt_doc["prompts"]:
prompt_text = prompt["prompt"]
if not isinstance(prompt_text, str):
Expand Down Expand Up @@ -199,15 +199,15 @@ def test_drug_mech_db(self):

def test_code_completion_generalization(self):
"""Tests structured responses."""
engine = "gpt-3.5-turbo"
engine = "gpt-4o"
client = LLMClient(model=engine)
print(len(CODE_PROMPT_GENERALIZATION))
ann = client.complete(CODE_PROMPT_GENERALIZATION)
print(ann)

def test_extract_via_code_completion(self):
"""Tests structured responses."""
engine = "gpt-3.5-turbo"
engine = "gpt-4o"
client = LLMClient(model=engine)
ann = client.complete(CODE_PROMPT_EXTRACT)
print(ann)
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/test_evaluation/test_eval_drugmechdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def test_training_set(self):
evaluator = self.engine
ke = evaluator.extractor
training_set = list(evaluator.create_training_set(100))
t = dict(base_model="gpt-3.5-turbo-instruct", template=ke.template, examples=training_set)
t = dict(base_model="gpt-4o", template=ke.template, examples=training_set)
with open(TRAINING_OUT, "w") as f:
yaml.dump(t, f)
# print(yaml.dump(training_set))
Expand Down

0 comments on commit 4d4d71e

Please sign in to comment.