-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocess_conversations.py
executable file
·113 lines (95 loc) · 3.47 KB
/
preprocess_conversations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/env python3
"""
Author : Xinyuan Chen <[email protected]>
Date : 2023-09-12
Purpose: Pre-compute attributes and other data for the conversations
"""
import argparse
import json
from config import (
pre_computed_rows_json,
pre_computed_rows_msgpack,
chatgpt_linear_conversations_json_path,
alfred_title_max_length,
generated_dir,
message_preview_len,
)
from utils import (
model_slug_to_model_name,
chatgpt_conversation_id_to_url,
iso_to_month_day,
get_model_short_subtitle_suffix_update_item3_kwargs,
)
def get_rows() -> list[dict]:
linear_conversations: list[dict] = json.loads(
chatgpt_linear_conversations_json_path.read_text()
)
for conversation in linear_conversations:
conversation['concatenated_messages'] = '\n---\n'.join(
conversation.pop('linear_messages')
)
conversation['model'] = model_slug_to_model_name(conversation.pop('model_slug'))
return linear_conversations
def search_key_for_rows(row: dict) -> str:
return ' '.join(
x
for _, x in row.items()
if x
and isinstance(x, str)
and not _.startswith('non_key_')
and not _.startswith('_')
).lower()
def get_and_process_rows() -> list[dict]:
rows = get_rows()
for row in rows:
date_short = iso_to_month_day(row['update_time'])
model = row['model']
chatgpt_url = chatgpt_conversation_id_to_url(row['id'], 'chatgpt')
typingmind_url = chatgpt_conversation_id_to_url(row['id'], 'typingmind')
item3_kwargs = {}
(
model_short,
subtitle_prefix,
) = get_model_short_subtitle_suffix_update_item3_kwargs(
date_short, model, item3_kwargs
)
title_suffix = f"""{date_short}{f' ({model_short})' if model_short else ''}"""
row_title = row.get('title', '') or ''
num_white_spaces = max(
2, alfred_title_max_length - len(row_title) - len(title_suffix)
)
title = f"""{row_title}{' ' * num_white_spaces}{title_suffix}"""
# subtitle_remaining_length = (
# alfred_subtitle_max_length - len(subtitle_prefix) - 3
# )
# message_preview = get_message_preview(alfred_subtitle_max_length)
row['_title'] = title
row['_quicklookurl'] = str(generated_dir / f"{row['id']}.md")
row['_chatgpt_url'] = chatgpt_url
row['_typingmind_url'] = typingmind_url
row['_item3_kwargs'] = item3_kwargs
row['_search_key'] = search_key_for_rows(row)
message_preview = row['concatenated_messages'].strip()[:message_preview_len]
row['_message_preview'] = message_preview
return rows
def get_args():
"""Get command-line arguments"""
parser = argparse.ArgumentParser(
description='Pre-compute attributes and other data for the conversations',
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
return parser.parse_args()
def main():
"""Make a jazz noise here"""
get_args()
rows = get_and_process_rows()
pre_computed_rows_json.write_text(json.dumps(rows, indent=2, ensure_ascii=False))
print(f'Wrote pre-computed rows to {pre_computed_rows_json}')
try:
import msgpack
pre_computed_rows_msgpack.write_bytes(msgpack.packb(rows)) # type: ignore
print(f'Wrote pre-computed rows to {pre_computed_rows_msgpack}')
except ImportError:
pass
if __name__ == '__main__':
main()