-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathautomate.py
106 lines (84 loc) · 3.48 KB
/
automate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import warnings
warnings.simplefilter(action="ignore", category=UserWarning)
import logging
import argparse
from constant import ROOT
from classifier import categorize_paper
from scholar_inbox import get_scholar_inbox_digest
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Automate the process of categorizing papers"
)
parser.add_argument("--dry-run", action="store_true", help="Enable debug mode")
parser.add_argument("--debug", action="store_true", help="Enable debug mode")
parser.add_argument(
"--no-changes", action="store_true", help="Do not make any changes"
)
args = parser.parse_args()
is_dry_run = args.dry_run
is_debug = args.debug
logging.basicConfig(
level=logging.DEBUG if is_debug else logging.INFO,
format="[%(asctime)s - L%(lineno)4s : %(funcName)-15s] %(message)s"
if is_debug
else "%(message)s",
handlers=[
logging.StreamHandler(),
]
+ (
[logging.FileHandler("debug.log", encoding="utf-8")]
if not is_dry_run
else []
),
)
logging.getLogger("requests").setLevel(logging.CRITICAL)
logging.getLogger("urllib3").setLevel(logging.CRITICAL)
logging.getLogger("httpx").setLevel(logging.CRITICAL)
logging.getLogger("urllib3").propagate = False
# Get the latest papers from Scholar Inbox
papers = get_scholar_inbox_digest()
categorization_result = (
{}
) # {category: {category_name, category_directory, [papers]}}
# Categorize each paper
for paper in papers[:25]:
title = paper["title"]
abstract = paper["abstract"]
categories = categorize_paper(title, abstract)
if categories is None:
continue
for category in categories:
cate_name = category["name"]
cate_dir = category["directory"]
if cate_dir not in categorization_result:
categorization_result[cate_name] = {
"category_name": cate_name,
"category_directory": cate_dir,
"papers": [],
}
categorization_result[cate_name]["papers"].append(paper)
assert categorization_result, "No papers were categorized"
pull_request_body = ""
for category in categorization_result.values():
pull_request_body += f"## {category['category_name']}\n\n"
for paper in category["papers"]:
authors = paper["authors"]
pull_request_body += f"\"{paper['title']}\",{paper['date']},[\"{paper['url']}\"]({paper['url']}),{authors}\n\n"
pull_request_body += "\n"
logging.info(pull_request_body)
print_all_paper = "\n\n---\n ## All Digest Papers From Scholar Inbox\n\n"
# print in markdown format with link format
for paper in papers:
authors = paper["authors"]
print_all_paper += f"- \"{paper['title']}\",{paper['date']},[\"{paper['url']}\"]({paper['url']}),{authors}\n\n"
logging.info(print_all_paper)
if not is_dry_run and not args.no_changes:
for category in categorization_result.values():
with open(
f"{ROOT}/{category['category_directory']}/papers.csv", "a"
) as file:
for paper in category["papers"]:
authors = paper["authors"]
file.write(
f"\n\"{paper['title']}\",{paper['date']},{paper['url']},{authors}\n"
)