-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscript.py
117 lines (99 loc) · 3.62 KB
/
script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import requests
import datetime
from datetime import datetime, timedelta
import xlsxwriter
import csv
# Search for comments on claims from a list of channels. Better chances to catch spam on top channels.
CHANNEL_IDS = []
with open('ids', 'r') as r:
reader = csv.reader(r)
for row in reader:
CHANNEL_IDS.append(row[0])
# Filter comments from a list of Keywords and sentences
KEYWORDS = ['follow me', 'support each other', 'follow you back', 'follow my channel', "i'll follow you",
'puedes seguirme', 'watch me', 'get free money', 'earn free bitcoin', 'follow for follow']
DAYS_BACK = 15
# Create CSV file
CSV_FILE = True
# Create XLSX file
XLSX_FILE = True
def get_claim_ids():
claim_ids = []
limit = datetime.now() - timedelta(days=DAYS_BACK) # days back to search
timestamp_limit = str(int(datetime.timestamp(limit)))
for page in range(1, 30):
call = requests.post("http://localhost:5279", json={"method": "claim_search", "params": {
"claim_ids": [],
"channel_ids": CHANNEL_IDS,
'release_time': f'>{timestamp_limit}',
"not_channel_ids": [],
"stream_types": [],
"media_types": [],
"any_tags": [],
"all_tags": [],
"not_tags": [],
"any_languages": [],
"all_languages": [],
"not_languages": [],
"any_locations": [],
"all_locations": [],
"not_locations": [],
"order_by": [],
"page_size": 50,
"page": page,
'no_totals': True,
}}).json().get('result').get('items')
for claim in call:
claim_id = claim.get('claim_id')
claim_ids.append(claim_id)
print(f'Searching spam on {len(claim_ids)} claims...%')
return claim_ids
def get_spam_comments(claim_ids):
keywords = KEYWORDS
spam_list = []
spam_count = 0
for claim_id in claim_ids:
call = requests.post("http://localhost:5279", json={"method": "comment_list", "params": {
"claim_id": claim_id,
"include_replies": False, }}).json().get('result').get('items')
for comment in call:
content = comment.get('comment').lower()
for keyword in keywords:
if keyword in content:
spam_count += 1
spam_list.append([comment.get('timestamp'), comment.get('comment_id'), comment.get('claim_id'),
comment.get('channel_name'), content])
print(f'{spam_count} spam comments found!')
return spam_list
# Start
print('Searching...%')
claim_ids = get_claim_ids()
data = get_spam_comments(claim_ids)
# Print result
print(data)
# timestamp of created data
timestamp = f'{str(int(datetime.timestamp(datetime.now())))}-{str(DAYS_BACK)}'
# Create xlsx file
if XLSX_FILE:
workbook = xlsxwriter.Workbook(f'{timestamp}.xlsx')
worksheet = workbook.add_worksheet()
row = 0
col = 0
for item in data:
worksheet.write(row, col, item[0])
worksheet.write(row, col + 1, item[1])
worksheet.write(row, col + 2, item[2])
worksheet.write(row, col + 3, item[3])
worksheet.write(row, col + 4, item[4])
row += 1
workbook.close()
print(f'{timestamp}.xlsx created')
'''
encode issues
# Create CSV file
if CSV_FILE:
with open(f'{timestamp}', 'w', newline="") as c:
writer = csv.writer(c)
for item in data:
writer.writerow([item])
print('CSV CREATED')'''