-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathScrape_Aylo_v4.ps1
124 lines (113 loc) · 4.71 KB
/
Scrape_Aylo_v4.ps1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
function Get-HeaderMG (){
param(
[ValidateSet("bangbros","realitykings","twistys","milehigh", "biempire", `
"babes", "erito", "mofos", "fakehub", "sexyhub", "propertysex", "metrohd",`
"brazzers", "milfed", "gilfed", "dilfed", "men", "whynotbi", `
"seancody", "iconmale", "realitydudes","spicevids","sweetheartvideo","doghousedigital" ,ErrorMessage="Error: studio argumement is not supported" )]
[String]$studio
)
#these are used to get the API key which controls which studio you can scrape
$useragent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
[uri]$urlsite = "www." + $studio + ".com"
$webr = Invoke-WebRequest -UseBasicParsing -Uri $urlsite -Method HEAD
$iname = $webr.Headers.'Set-Cookie'
$instance = $iname.Split(';')
$apikey = $instance[0].Split('=')[1]
$headers = @{
"UserAgent" = "$useragent";
"instance"="$apikey"
}
return $headers
}
function Set-QueryParameters {
param (
[string]$groupid = $null,
[Int]$offset = 0,
[Parameter(Mandatory)]
[string]$studio,
#content types can only be {actor, scene, movie}
[Parameter(Mandatory)]
[ValidateSet('actor','movie','scene')]
[string]$ContentType
)
#initialize variables
$page = 0
$Body = @{
limit = 100
offset = $offset
}
$header = Get-HeaderMG -studio $studio
If ($null -eq $groupid) {$body.Add("groupID",$groupid)}
#api call for actors is different from movies and releases
If ($ContentType -eq "actor") {
$urlapi = "https://site-api.project1service.com/v1/actors"
} else {
$urlapi = "https://site-api.project1service.com/v2/releases"
$body.Add("orderBy","-dateReleased")
$body.Add('type',$ContentType)
}
$params = @{
"Uri" = $urlapi
"Body" = $Body
"Headers" = $header
}
#$params.Add("Headers",$headers)
return $params
}
function Get-MaxPages ($meta){
$limit = $meta.count
$maxpage = $meta.total/$limit
$maxpage = [Math]::Ceiling($maxpage)
return $maxpage
}
function Get-StudioJson ($groupID = $null, $studio, $ContentType ){
$scenelist = New-Object -TypeName System.Collections.ArrayList
$params = Set-QueryParameters -studio $studio -ContentType $ContentType
$scenes0 = Invoke-RestMethod @params
$limit = $scenes0.meta.count
$maxpage = Get-MaxPages -meta $scenes0.meta
for ($p=1;$p -le $maxpage;$p++) {
$page = $p-1
Write-Host "Downloading: $page of $maxpage"
$offset = $page*$limit
$params = Set-QueryParameters -studio $studio -ContentType $ContentType -offset $offset
$scenes = Invoke-RestMethod @params
$scenelist.AddRange($scenes.result)
}
return $scenelist
}
# there are only 3 supported content types.
# not all studios support movies
$studios = ("bangbros","realitykings","twistys","milehigh", "biempire", `
"babes", "erito", "mofos", "fakehub", "sexyhub", "propertysex", "metrohd",`
"brazzers", "milfed", "gilfed", "dilfed", "men", "whynotbi", `
"seancody", "iconmale", "realitydudes", "spicevids", "sweetheartvideo","doghousedigital" )
$ContentTypes = @("actor", "scene", "movie")
#this is a simple check to make sure BiEmpire is working
# this will create a directory called "C:\DB\Mindgeek\json\BiEmpire"
#big studios like BiEmpire and RealityKings may hang due to low memory
#I know it works with 32GB RAM and maybe 16GB
# $studios = ("doghousedigital")
# foreach ($ContentType in $ContentTypes ) {
# foreach ($studio in $studios) {
# $filedir = "C:\Users\tatooine\OneDrive\DB\Mindgeek\json\$studio"
# $filepath = Join-Path -Path $filedir -ChildPath "$ContentType.json"
# if (!(Test-Path $filedir)) {New-Item -ItemType "directory" -Path $filedir}
# Write-Host "Downloading: $studio : $ContentType"
# $json = Get-StudioJson -studio $studio -ContentType $ContentType
# $json | ConvertTo-Json -Depth 32 | Out-File -FilePath $filepath
# }
# }
#this part below is a loop do not run this unless you want all data from all studios.
#it may not work for studios which to not support movies
$ContentTypes = @("actor", "scene", "movie")
foreach ($ContentType in $ContentTypes ) {
foreach ($studio in $studios) {
$filedir = "C:\Users\tatooine\OneDrive\DB\Mindgeek\json\$studio"
$filepath = Join-Path -Path $filedir -ChildPath "$ContentType.json"
if (!(Test-Path $filedir)) {New-Item -ItemType "directory" -Path $filedir}
Write-Host "Downloading: $studio : $ContentType"
$json = Get-StudioJson -studio $studio -ContentType $ContentType
$json | ConvertTo-Json -Depth 32 | Out-File -FilePath $filepath
}
}