-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathobtainrealcomments.py
37 lines (30 loc) · 987 Bytes
/
obtainrealcomments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import praw
import os
from praw.models import MoreComments
from private.details import init
reddit = init()
with open("authors2.txt") as f:
authors = f.readlines()
cwd = os.getcwd()
datapath = os.path.join(cwd,"humandata")
originalpath = datapath
for i in range(len(authors)):
authors[i] = authors[i].replace("GPT2Bot\n","")
allcontent = ""
newpath = os.path.join(datapath, authors[i])
if not os.path.exists(newpath):
os.mkdir(newpath)
else:
continue
posts = reddit.subreddit(authors[i]).hot(limit=10000)
print(authors[i])
for num,submission in enumerate(posts):
if num % 1000 == 0:
print(num)
for comment in submission.comments:
if isinstance(comment, MoreComments): continue
allcontent += "[SEP]"
allcontent += comment.body
savepath = os.path.join(newpath, "contents.txt")
with open(savepath, 'w', encoding="utf-8") as f:
f.write(allcontent)