From e7c79605e1bb66e6de5b7961af7dc1f683ffc6da Mon Sep 17 00:00:00 2001 From: Nikita Bulai Date: Wed, 23 Oct 2019 13:46:43 +0300 Subject: [PATCH 1/4] Big refactoring --- .rubocop.yml | 36 +++++-- Gemfile | 13 ++- Rakefile | 6 +- gemfiles/nokogiri.gemfile | 14 +-- gemfiles/oga.gemfile | 14 +-- lib/proxy_fetcher.rb | 60 ++++++------ lib/proxy_fetcher/client/client.rb | 2 +- lib/proxy_fetcher/client/request.rb | 8 +- lib/proxy_fetcher/configuration.rb | 10 +- lib/proxy_fetcher/document/adapters.rb | 2 +- .../document/adapters/abstract_adapter.rb | 6 +- .../document/adapters/nokogiri_adapter.rb | 2 +- .../document/adapters/oga_adapter.rb | 2 +- lib/proxy_fetcher/document/node.rb | 4 +- lib/proxy_fetcher/exceptions.rb | 12 +-- lib/proxy_fetcher/manager.rb | 4 +- lib/proxy_fetcher/providers/base.rb | 43 ++++---- .../providers/free_proxy_list.rb | 19 ++-- .../providers/free_proxy_list_ssl.rb | 23 ++--- lib/proxy_fetcher/providers/gather_proxy.rb | 26 ++--- lib/proxy_fetcher/providers/http_tunnel.rb | 30 +++--- lib/proxy_fetcher/providers/proxy_list.rb | 24 ++--- lib/proxy_fetcher/providers/xroxy.rb | 26 ++--- lib/proxy_fetcher/proxy.rb | 8 +- lib/proxy_fetcher/utils/http_client.rb | 18 ++-- .../utils/proxy_list_validator.rb | 4 +- lib/proxy_fetcher/utils/proxy_validator.rb | 2 +- lib/proxy_fetcher/version.rb | 6 +- proxy_fetcher.gemspec | 34 ++++--- spec/proxy_fetcher/client/client_spec.rb | 97 ++++++++++--------- spec/proxy_fetcher/configuration_spec.rb | 22 ++--- spec/proxy_fetcher/document/adapters_spec.rb | 16 +-- spec/proxy_fetcher/document/node_spec.rb | 8 +- spec/proxy_fetcher/providers/base_spec.rb | 18 ++-- .../providers/free_proxy_list_spec.rb | 4 +- .../providers/free_proxy_list_ssl_spec.rb | 4 +- .../providers/gather_proxy_spec.rb | 4 +- .../providers/http_tunnel_spec.rb | 4 +- .../providers/multiple_providers_spec.rb | 8 +- .../providers/proxy_list_spec.rb | 4 +- spec/proxy_fetcher/providers/xroxy_spec.rb | 4 +- spec/proxy_fetcher/proxy_spec.rb | 28 +++--- spec/proxy_fetcher/version_spec.rb | 2 + spec/spec_helper.rb | 20 ++-- spec/support/manager_examples.rb | 42 ++++---- 45 files changed, 374 insertions(+), 369 deletions(-) diff --git a/.rubocop.yml b/.rubocop.yml index dc57abc..6501874 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -1,14 +1,30 @@ -LineLength: - Max: 120 AllCops: - TargetRubyVersion: 2.1 + TargetRubyVersion: 2.3 Exclude: - - 'spec/**/*' - 'bin/*' DisplayCopNames: true -Rails: - Enabled: false -Documentation: - Enabled: false -FrozenStringLiteralComment: - Enabled: false + +Style/ClassAndModuleChildren: + Exclude: + - spec/**/* +Style/FrozenStringLiteralComment: + Enabled: true +Style/StringLiterals: + EnforcedStyle: double_quotes +Style/StringLiteralsInInterpolation: + EnforcedStyle: double_quotes + +Layout/MultilineMethodCallIndentation: + EnforcedStyle: indented +Layout/TrailingBlankLines: + Enabled: true +Layout/DotPosition: + EnforcedStyle: leading + +Metrics/LineLength: + Exclude: + - spec/**/* + Max: 100 +Metrics/BlockLength: + Exclude: + - spec/**/* diff --git a/Gemfile b/Gemfile index 3bb2db8..0b131f1 100644 --- a/Gemfile +++ b/Gemfile @@ -1,11 +1,14 @@ -source 'https://rubygems.org' +# frozen_string_literal: true + +source "https://rubygems.org" gemspec -gem 'nokogiri', '~> 1.8' -gem 'oga', '~> 2.0' +gem "nokogiri", "~> 1.8" +gem "oga", "~> 2.0" +gem "rubocop", "~> 0.74" group :test do - gem 'coveralls', require: false - gem 'evil-proxy', '~> 0.2' + gem "coveralls", require: false + gem "evil-proxy", "~> 0.2" end diff --git a/Rakefile b/Rakefile index 2470119..c6c547f 100644 --- a/Rakefile +++ b/Rakefile @@ -1,6 +1,8 @@ -require 'bundler/gem_tasks' +# frozen_string_literal: true -require 'rspec/core/rake_task' +require "bundler/gem_tasks" + +require "rspec/core/rake_task" RSpec::Core::RakeTask.new(:spec) task default: :spec diff --git a/gemfiles/nokogiri.gemfile b/gemfiles/nokogiri.gemfile index 2d9e8d9..4f1e13b 100644 --- a/gemfiles/nokogiri.gemfile +++ b/gemfiles/nokogiri.gemfile @@ -1,11 +1,13 @@ -source 'https://rubygems.org' +# frozen_string_literal: true -gemspec path: '../' +source "https://rubygems.org" -gem 'nokogiri', '~> 1.8' +gemspec path: "../" + +gem "nokogiri", "~> 1.8" group :test do - gem 'coveralls', require: false - gem 'evil-proxy', '~> 0.2' - gem 'rspec', '~> 3.6' + gem "coveralls", require: false + gem "evil-proxy", "~> 0.2" + gem "rspec", "~> 3.6" end diff --git a/gemfiles/oga.gemfile b/gemfiles/oga.gemfile index 6492862..8b52da4 100644 --- a/gemfiles/oga.gemfile +++ b/gemfiles/oga.gemfile @@ -1,11 +1,13 @@ -source 'https://rubygems.org' +# frozen_string_literal: true -gemspec path: '../' +source "https://rubygems.org" -gem 'oga', '~> 2.0' +gemspec path: "../" + +gem "oga", "~> 2.0" group :test do - gem 'coveralls', require: false - gem 'evil-proxy', '~> 0.2' - gem 'rspec', '~> 3.6' + gem "coveralls", require: false + gem "evil-proxy", "~> 0.2" + gem "rspec", "~> 3.6" end diff --git a/lib/proxy_fetcher.rb b/lib/proxy_fetcher.rb index f353e42..2caeca9 100644 --- a/lib/proxy_fetcher.rb +++ b/lib/proxy_fetcher.rb @@ -1,44 +1,44 @@ # frozen_string_literal: true -require 'uri' -require 'http' -require 'logger' +require "uri" +require "http" +require "logger" -require File.dirname(__FILE__) + '/proxy_fetcher/version' +require File.dirname(__FILE__) + "/proxy_fetcher/version" -require File.dirname(__FILE__) + '/proxy_fetcher/exceptions' -require File.dirname(__FILE__) + '/proxy_fetcher/configuration' -require File.dirname(__FILE__) + '/proxy_fetcher/configuration/providers_registry' -require File.dirname(__FILE__) + '/proxy_fetcher/proxy' -require File.dirname(__FILE__) + '/proxy_fetcher/manager' -require File.dirname(__FILE__) + '/proxy_fetcher/null_logger' +require File.dirname(__FILE__) + "/proxy_fetcher/exceptions" +require File.dirname(__FILE__) + "/proxy_fetcher/configuration" +require File.dirname(__FILE__) + "/proxy_fetcher/configuration/providers_registry" +require File.dirname(__FILE__) + "/proxy_fetcher/proxy" +require File.dirname(__FILE__) + "/proxy_fetcher/manager" +require File.dirname(__FILE__) + "/proxy_fetcher/null_logger" -require File.dirname(__FILE__) + '/proxy_fetcher/utils/http_client' -require File.dirname(__FILE__) + '/proxy_fetcher/utils/proxy_validator' -require File.dirname(__FILE__) + '/proxy_fetcher/utils/proxy_list_validator' -require File.dirname(__FILE__) + '/proxy_fetcher/client/client' -require File.dirname(__FILE__) + '/proxy_fetcher/client/request' -require File.dirname(__FILE__) + '/proxy_fetcher/client/proxies_registry' +require File.dirname(__FILE__) + "/proxy_fetcher/utils/http_client" +require File.dirname(__FILE__) + "/proxy_fetcher/utils/proxy_validator" +require File.dirname(__FILE__) + "/proxy_fetcher/utils/proxy_list_validator" +require File.dirname(__FILE__) + "/proxy_fetcher/client/client" +require File.dirname(__FILE__) + "/proxy_fetcher/client/request" +require File.dirname(__FILE__) + "/proxy_fetcher/client/proxies_registry" -require File.dirname(__FILE__) + '/proxy_fetcher/document' -require File.dirname(__FILE__) + '/proxy_fetcher/document/adapters' -require File.dirname(__FILE__) + '/proxy_fetcher/document/node' -require File.dirname(__FILE__) + '/proxy_fetcher/document/adapters/abstract_adapter' -require File.dirname(__FILE__) + '/proxy_fetcher/document/adapters/nokogiri_adapter' -require File.dirname(__FILE__) + '/proxy_fetcher/document/adapters/oga_adapter' +require File.dirname(__FILE__) + "/proxy_fetcher/document" +require File.dirname(__FILE__) + "/proxy_fetcher/document/adapters" +require File.dirname(__FILE__) + "/proxy_fetcher/document/node" +require File.dirname(__FILE__) + "/proxy_fetcher/document/adapters/abstract_adapter" +require File.dirname(__FILE__) + "/proxy_fetcher/document/adapters/nokogiri_adapter" +require File.dirname(__FILE__) + "/proxy_fetcher/document/adapters/oga_adapter" ## # Ruby / JRuby lib for managing proxies module ProxyFetcher # ProxyFetcher providers namespace module Providers - require File.dirname(__FILE__) + '/proxy_fetcher/providers/base' - require File.dirname(__FILE__) + '/proxy_fetcher/providers/free_proxy_list' - require File.dirname(__FILE__) + '/proxy_fetcher/providers/free_proxy_list_ssl' - require File.dirname(__FILE__) + '/proxy_fetcher/providers/gather_proxy' - require File.dirname(__FILE__) + '/proxy_fetcher/providers/http_tunnel' - require File.dirname(__FILE__) + '/proxy_fetcher/providers/proxy_list' - require File.dirname(__FILE__) + '/proxy_fetcher/providers/xroxy' + require File.dirname(__FILE__) + "/proxy_fetcher/providers/base" + require File.dirname(__FILE__) + "/proxy_fetcher/providers/free_proxy_list" + require File.dirname(__FILE__) + "/proxy_fetcher/providers/free_proxy_list_ssl" + require File.dirname(__FILE__) + "/proxy_fetcher/providers/gather_proxy" + require File.dirname(__FILE__) + "/proxy_fetcher/providers/http_tunnel" + require File.dirname(__FILE__) + "/proxy_fetcher/providers/proxy_list" + require File.dirname(__FILE__) + "/proxy_fetcher/providers/xroxy" end # Main ProxyFetcher module. @@ -75,7 +75,7 @@ def configure # Returns ProxyFetcher logger instance. # - # @return [Logger, NullLogger] logger object + # @return [Logger, ProxyFetcher::NullLogger] logger object # def logger return @logger if defined?(@logger) diff --git a/lib/proxy_fetcher/client/client.rb b/lib/proxy_fetcher/client/client.rb index 4e70641..2eb01ca 100644 --- a/lib/proxy_fetcher/client/client.rb +++ b/lib/proxy_fetcher/client/client.rb @@ -152,7 +152,7 @@ def request_without_payload(method, url, headers, options) # def default_headers { - 'User-Agent' => ProxyFetcher.config.user_agent + "User-Agent" => ProxyFetcher.config.user_agent } end diff --git a/lib/proxy_fetcher/client/request.rb b/lib/proxy_fetcher/client/request.rb index 0fe5e8c..e373bb7 100644 --- a/lib/proxy_fetcher/client/request.rb +++ b/lib/proxy_fetcher/client/request.rb @@ -50,7 +50,7 @@ def self.execute(args) # @return [Request] # def initialize(args) - raise ArgumentError, 'args must be a Hash!' unless args.is_a?(Hash) + raise ArgumentError, "args must be a Hash!" unless args.is_a?(Hash) @url = args.fetch(:url) @method = args.fetch(:method).to_s.downcase @@ -86,9 +86,9 @@ def execute # def build_http_client HTTP.via(proxy.addr, proxy.port.to_i) - .headers(headers) - .timeout(connect: timeout, read: timeout) - .follow(max_hops: max_redirects) + .headers(headers) + .timeout(connect: timeout, read: timeout) + .follow(max_hops: max_redirects) end # Default SSL options that will be used for connecting to resources diff --git a/lib/proxy_fetcher/configuration.rb b/lib/proxy_fetcher/configuration.rb index 6c1283e..acfe2ae 100644 --- a/lib/proxy_fetcher/configuration.rb +++ b/lib/proxy_fetcher/configuration.rb @@ -35,11 +35,7 @@ class Configuration # @!attribute [r] adapter # @return [Object] HTML parser adapter - attr_accessor :adapter - - # @!attribute [r] adapter_class - # @return [Object] HTML adapter class - attr_reader :adapter_class + attr_reader :adapter # @!attribute [r] http_client # @return [Object] HTTP client class @@ -58,8 +54,8 @@ class Configuration # # Default is Google Chrome 60, but can be changed in ProxyFetcher.config. # - DEFAULT_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 ' \ - '(KHTML, like Gecko) Chrome/60.0.3112 Safari/537.36'.freeze + DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 " \ + "(KHTML, like Gecko) Chrome/60.0.3112 Safari/537.36".freeze # HTML parser adapter name. # diff --git a/lib/proxy_fetcher/document/adapters.rb b/lib/proxy_fetcher/document/adapters.rb index 32a91f9..90c99aa 100644 --- a/lib/proxy_fetcher/document/adapters.rb +++ b/lib/proxy_fetcher/document/adapters.rb @@ -13,7 +13,7 @@ class Document # ProxyFetcher::Document::AbstractAdapter. class Adapters # Adapters class name suffix - ADAPTER = 'Adapter'.freeze + ADAPTER = "Adapter".freeze private_constant :ADAPTER class << self diff --git a/lib/proxy_fetcher/document/adapters/abstract_adapter.rb b/lib/proxy_fetcher/document/adapters/abstract_adapter.rb index 7af1a74..a94efc3 100644 --- a/lib/proxy_fetcher/document/adapters/abstract_adapter.rb +++ b/lib/proxy_fetcher/document/adapters/abstract_adapter.rb @@ -42,7 +42,7 @@ def css(selector) # node # def proxy_node - self.class.const_get('Node') + self.class.const_get("Node") end # Installs adapter requirements. @@ -53,8 +53,8 @@ def proxy_node def self.setup!(*args) install_requirements!(*args) self - rescue LoadError, StandardError => error - raise Exceptions::AdapterSetupError.new(name, error.message) + rescue LoadError, StandardError => e + raise Exceptions::AdapterSetupError.new(name, e.message) end end end diff --git a/lib/proxy_fetcher/document/adapters/nokogiri_adapter.rb b/lib/proxy_fetcher/document/adapters/nokogiri_adapter.rb index 987de58..a684ea0 100644 --- a/lib/proxy_fetcher/document/adapters/nokogiri_adapter.rb +++ b/lib/proxy_fetcher/document/adapters/nokogiri_adapter.rb @@ -6,7 +6,7 @@ class Document class NokogiriAdapter < AbstractAdapter # Requires Nokogiri gem to the application. def self.install_requirements! - require 'nokogiri' + require "nokogiri" end # Parses raw HTML content with specific gem. diff --git a/lib/proxy_fetcher/document/adapters/oga_adapter.rb b/lib/proxy_fetcher/document/adapters/oga_adapter.rb index 3a42a77..51b49db 100644 --- a/lib/proxy_fetcher/document/adapters/oga_adapter.rb +++ b/lib/proxy_fetcher/document/adapters/oga_adapter.rb @@ -6,7 +6,7 @@ class Document class OgaAdapter < AbstractAdapter # Requires Oga gem to the application. def self.install_requirements! - require 'oga' + require "oga" end # Parses raw HTML content with specific gem. diff --git a/lib/proxy_fetcher/document/node.rb b/lib/proxy_fetcher/document/node.rb index acaa7ee..e02e963 100644 --- a/lib/proxy_fetcher/document/node.rb +++ b/lib/proxy_fetcher/document/node.rb @@ -81,9 +81,9 @@ def html # clean text # def clear(text) - return '' if text.nil? || text.empty? + return "" if text.nil? || text.empty? - text.strip.gsub(/[ \t]/i, '') + text.strip.gsub(/[\t]/i, "") end end end diff --git a/lib/proxy_fetcher/exceptions.rb b/lib/proxy_fetcher/exceptions.rb index 31f658a..e82e410 100644 --- a/lib/proxy_fetcher/exceptions.rb +++ b/lib/proxy_fetcher/exceptions.rb @@ -13,7 +13,7 @@ class WrongCustomClass < Error # @return [WrongCustomClass] # def initialize(klass, methods) - required_methods = Array(methods).join(', ') + required_methods = Array(methods).join(", ") super("#{klass} must respond to [#{required_methods}] class methods!") end end @@ -53,7 +53,7 @@ class MaximumRedirectsReached < Error # @return [MaximumRedirectsReached] # def initialize(*) - super('maximum redirects reached') + super("maximum redirects reached") end end @@ -66,7 +66,7 @@ class MaximumRetriesReached < Error # @return [MaximumRetriesReached] # def initialize(*) - super('reached the maximum number of retries') + super("reached the maximum number of retries") end end @@ -95,7 +95,7 @@ def initialize(*) super(<<-MSG.strip.squeeze you need to specify adapter for HTML parsing: ProxyFetcher.config.adapter = :nokogiri. You can use one of the predefined adapters (:nokogiri or :oga) or your own implementation. - MSG + MSG ) end end @@ -111,7 +111,7 @@ class AdapterSetupError < Error # @return [AdapterSetupError] # def initialize(adapter_name, error) - adapter = demodulize(adapter_name.gsub('Adapter', '')) + adapter = demodulize(adapter_name.gsub("Adapter", "")) super("can't setup '#{adapter}' adapter during the following error:\n\t#{error}'") end @@ -127,7 +127,7 @@ def initialize(adapter_name, error) # def demodulize(path) path = path.to_s - index = path.rindex('::') + index = path.rindex("::") index ? path[(index + 2)..-1] : path end diff --git a/lib/proxy_fetcher/manager.rb b/lib/proxy_fetcher/manager.rb index d413f81..92b5f61 100644 --- a/lib/proxy_fetcher/manager.rb +++ b/lib/proxy_fetcher/manager.rb @@ -55,7 +55,7 @@ def refresh_list!(filters = nil) # Pop just first proxy (and back it to the end of the proxy list). # - # @return [Proxy] + # @return [ProxyFetcher::Proxy, NilClass] # proxy object from the list # def get @@ -72,7 +72,7 @@ def get # Pop first valid proxy (and back it to the end of the proxy list) # Invalid proxies will be removed from the list # - # @return [Proxy] + # @return [ProxyFetcher::Proxy, NilClass] # proxy object from the list # def get! diff --git a/lib/proxy_fetcher/providers/base.rb b/lib/proxy_fetcher/providers/base.rb index 216ca5e..c0d1863 100644 --- a/lib/proxy_fetcher/providers/base.rb +++ b/lib/proxy_fetcher/providers/base.rb @@ -6,12 +6,15 @@ module Providers class Base # Loads proxy provider page content, extract proxy list from it # and convert every entry to proxy object. - def fetch_proxies!(filters = {}) + def fetch_proxies(filters = {}) raw_proxies = load_proxy_list(filters) proxies = raw_proxies.map { |html_node| build_proxy(html_node) }.compact proxies.reject { |proxy| proxy.addr.nil? } end + # For retro-compatibility + alias fetch_proxies! fetch_proxies + def provider_url raise NotImplementedError, "#{__method__} must be implemented in a descendant class!" end @@ -24,10 +27,17 @@ def provider_params {} end + # @return [Hash] + # Provider headers required to fetch the proxy list + # def provider_headers {} end + def xpath + raise NotImplementedError, "#{__method__} must be implemented in a descendant class!" + end + # Just synthetic sugar to make it easier to call #fetch_proxies! method. def self.fetch_proxies!(*args) new.fetch_proxies!(*args) @@ -41,7 +51,7 @@ def self.fetch_proxies!(*args) # HTML body # def load_html(url, filters = {}) - raise ArgumentError, 'filters must be a Hash' if filters && !filters.is_a?(Hash) + raise ArgumentError, "filters must be a Hash" if filters && !filters.is_a?(Hash) uri = URI.parse(url) # TODO: query for post request? @@ -71,29 +81,28 @@ def load_document(url, filters = {}) ProxyFetcher::Document.parse(html) end + # Fetches HTML content by sending HTTP request to the provider URL and + # parses the document (built as abstract ProxyFetcher::Document) + # to return all the proxy entries (HTML nodes). + # + # @return [Array] + # Collection of extracted HTML nodes with full proxy info + # + def load_proxy_list(filters = {}) + doc = load_document(provider_url, filters) + doc.xpath(xpath) + end + def build_proxy(*args) to_proxy(*args) - rescue StandardError => error + rescue StandardError => e ProxyFetcher.logger.warn( - "Failed to build Proxy object for #{self.class.name} due to error: #{error.message}" + "Failed to build Proxy object for #{self.class.name} due to error: #{e.message}" ) nil end - # Fetches HTML content by sending HTTP request to the provider URL and - # parses the document (built as abstract ProxyFetcher::Document) - # to return all the proxy entries (HTML nodes). - # - # Abstract method. Must be implemented in a descendant class - # - # @return [Array] - # list of proxy elements from the providers HTML content - # - def load_proxy_list(*) - raise NotImplementedError, "#{__method__} must be implemented in a descendant class!" - end - # Convert HTML element with proxy info to ProxyFetcher::Proxy instance. # # Abstract method. Must be implemented in a descendant class diff --git a/lib/proxy_fetcher/providers/free_proxy_list.rb b/lib/proxy_fetcher/providers/free_proxy_list.rb index 9baa7f5..6e2dc30 100644 --- a/lib/proxy_fetcher/providers/free_proxy_list.rb +++ b/lib/proxy_fetcher/providers/free_proxy_list.rb @@ -6,13 +6,12 @@ module Providers class FreeProxyList < Base # Provider URL to fetch proxy list def provider_url - 'https://free-proxy-list.net/' + "https://free-proxy-list.net/" end # [NOTE] Doesn't support filtering - def load_proxy_list(_filters = {}) - doc = load_document(provider_url, {}) - doc.xpath('//table[@id="proxylisttable"]/tbody/tr') + def xpath + '//table[@id="proxylisttable"]/tbody/tr' end # Converts HTML node (entry of N tags) to ProxyFetcher::Proxy @@ -26,10 +25,10 @@ def load_proxy_list(_filters = {}) # def to_proxy(html_node) ProxyFetcher::Proxy.new.tap do |proxy| - proxy.addr = html_node.content_at('td[1]') - proxy.port = Integer(html_node.content_at('td[2]').gsub(/^0+/, '')) - proxy.country = html_node.content_at('td[4]') - proxy.anonymity = html_node.content_at('td[5]') + proxy.addr = html_node.content_at("td[1]") + proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, "")) + proxy.country = html_node.content_at("td[4]") + proxy.anonymity = html_node.content_at("td[5]") proxy.type = parse_type(html_node) end end @@ -45,8 +44,8 @@ def to_proxy(html_node) # Proxy type # def parse_type(html_node) - https = html_node.content_at('td[6]') - https && https.casecmp('yes').zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP + https = html_node.content_at("td[6]") + https && https.casecmp("yes").zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP end end diff --git a/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb b/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb index 337686a..00a90bc 100644 --- a/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb +++ b/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb @@ -6,20 +6,11 @@ module Providers class FreeProxyListSSL < Base # Provider URL to fetch proxy list def provider_url - 'https://www.sslproxies.org/' + "https://www.sslproxies.org/" end - # Fetches HTML content by sending HTTP request to the provider URL and - # parses the document (built as abstract ProxyFetcher::Document) - # to return all the proxy entries (HTML nodes). - # - # @return [Array] - # Collection of extracted HTML nodes with full proxy info - # - # [NOTE] Doesn't support filtering - def load_proxy_list(_filters = {}) - doc = load_document(provider_url, {}) - doc.xpath('//table[@id="proxylisttable"]/tbody/tr') + def xpath + '//table[@id="proxylisttable"]/tbody/tr' end # Converts HTML node (entry of N tags) to ProxyFetcher::Proxy @@ -33,10 +24,10 @@ def load_proxy_list(_filters = {}) # def to_proxy(html_node) ProxyFetcher::Proxy.new.tap do |proxy| - proxy.addr = html_node.content_at('td[1]') - proxy.port = Integer(html_node.content_at('td[2]').gsub(/^0+/, '')) - proxy.country = html_node.content_at('td[4]') - proxy.anonymity = html_node.content_at('td[5]') + proxy.addr = html_node.content_at("td[1]") + proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, "")) + proxy.country = html_node.content_at("td[4]") + proxy.anonymity = html_node.content_at("td[5]") proxy.type = ProxyFetcher::Proxy::HTTPS end end diff --git a/lib/proxy_fetcher/providers/gather_proxy.rb b/lib/proxy_fetcher/providers/gather_proxy.rb index b8ea355..bf35968 100644 --- a/lib/proxy_fetcher/providers/gather_proxy.rb +++ b/lib/proxy_fetcher/providers/gather_proxy.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require 'json' +require "json" module ProxyFetcher module Providers @@ -8,19 +8,11 @@ module Providers class GatherProxy < Base # Provider URL to fetch proxy list def provider_url - 'http://www.gatherproxy.com/' + "http://www.gatherproxy.com/" end - # Fetches HTML content by sending HTTP request to the provider URL and - # parses the document (built as abstract ProxyFetcher::Document) - # to return all the proxy entries (HTML nodes). - # - # @return [Array] - # Collection of extracted HTML nodes with full proxy info - # - def load_proxy_list(*) - doc = load_document(provider_url) - doc.xpath('//div[@class="proxy-list"]/table/script') + def xpath + '//div[@class="proxy-list"]/table/script' end # Converts HTML node (entry of N tags) to ProxyFetcher::Proxy @@ -36,11 +28,11 @@ def to_proxy(html_node) json = parse_json(html_node) ProxyFetcher::Proxy.new.tap do |proxy| - proxy.addr = json['PROXY_IP'] - proxy.port = json['PROXY_PORT'].to_i(16) - proxy.anonymity = json['PROXY_TYPE'] - proxy.country = json['PROXY_COUNTRY'] - proxy.response_time = json['PROXY_TIME'].to_i + proxy.addr = json["PROXY_IP"] + proxy.port = json["PROXY_PORT"].to_i(16) + proxy.anonymity = json["PROXY_TYPE"] + proxy.country = json["PROXY_COUNTRY"] + proxy.response_time = json["PROXY_TIME"].to_i proxy.type = ProxyFetcher::Proxy::HTTP end end diff --git a/lib/proxy_fetcher/providers/http_tunnel.rb b/lib/proxy_fetcher/providers/http_tunnel.rb index 71e1efa..eeac54c 100644 --- a/lib/proxy_fetcher/providers/http_tunnel.rb +++ b/lib/proxy_fetcher/providers/http_tunnel.rb @@ -6,19 +6,11 @@ module Providers class HTTPTunnel < Base # Provider URL to fetch proxy list def provider_url - 'http://www.httptunnel.ge/ProxyListForFree.aspx' + "http://www.httptunnel.ge/ProxyListForFree.aspx" end - # Fetches HTML content by sending HTTP request to the provider URL and - # parses the document (built as abstract ProxyFetcher::Document) - # to return all the proxy entries (HTML nodes). - # - # @return [Array] - # Collection of extracted HTML nodes with full proxy info - # - def load_proxy_list(_filters = {}) - doc = load_document(provider_url) - doc.xpath('//table[contains(@id, "GridView")]/tr[(count(td)>2)]') + def xpath + '//table[contains(@id, "GridView")]/tr[(count(td)>2)]' end # Converts HTML node (entry of N tags) to ProxyFetcher::Proxy @@ -53,7 +45,7 @@ def to_proxy(html_node) # URI object # def parse_proxy_uri(html_node) - full_addr = html_node.content_at('td[1]') + full_addr = html_node.content_at("td[1]") URI.parse("http://#{full_addr}") end @@ -66,7 +58,7 @@ def parse_proxy_uri(html_node) # Country code # def parse_country(html_node) - html_node.find('.//img').attr('title') + html_node.find(".//img").attr("title") end # Parses HTML node to extract proxy anonymity level. @@ -78,14 +70,14 @@ def parse_country(html_node) # Anonymity level # def parse_anonymity(html_node) - transparency = html_node.content_at('td[5]').to_sym + transparency = html_node.content_at("td[5]").to_sym { - A: 'Anonymous', - E: 'Elite', - T: 'Transparent', - U: 'Unknown' - }.fetch(transparency, 'Unknown') + A: "Anonymous", + E: "Elite", + T: "Transparent", + U: "Unknown" + }.fetch(transparency, "Unknown") end end diff --git a/lib/proxy_fetcher/providers/proxy_list.rb b/lib/proxy_fetcher/providers/proxy_list.rb index a762334..1033e1b 100644 --- a/lib/proxy_fetcher/providers/proxy_list.rb +++ b/lib/proxy_fetcher/providers/proxy_list.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require 'base64' +require "base64" module ProxyFetcher module Providers @@ -8,19 +8,11 @@ module Providers class ProxyList < Base # Provider URL to fetch proxy list def provider_url - 'https://proxy-list.org/english/index.php' + "https://proxy-list.org/english/index.php" end - # Fetches HTML content by sending HTTP request to the provider URL and - # parses the document (built as abstract ProxyFetcher::Document) - # to return all the proxy entries (HTML nodes). - # - # @return [Array] - # Collection of extracted HTML nodes with full proxy info - # - def load_proxy_list(filters = {}) - doc = load_document(provider_url, filters) - doc.css('.table-wrap .table ul') + def xpath + '//div[@class="table-wrap"]/div[@class="table"]/ul' end # Converts HTML node (entry of N tags) to ProxyFetcher::Proxy @@ -38,9 +30,9 @@ def to_proxy(html_node) proxy.addr = uri.host proxy.port = uri.port - proxy.type = html_node.content_at('li[2]') - proxy.anonymity = html_node.content_at('li[4]') - proxy.country = html_node.find("li[5]//span[@class='country']").attr('title') + proxy.type = html_node.content_at("li[2]") + proxy.anonymity = html_node.content_at("li[4]") + proxy.country = html_node.find("li[5]//span[@class='country']").attr("title") end end @@ -55,7 +47,7 @@ def to_proxy(html_node) # URI object # def parse_proxy_uri(html_node) - full_addr = ::Base64.decode64(html_node.at_css('li script').html.match(/'(.+)'/)[1]) + full_addr = ::Base64.decode64(html_node.at_css("li script").html.match(/'(.+)'/)[1]) URI.parse("http://#{full_addr}") end end diff --git a/lib/proxy_fetcher/providers/xroxy.rb b/lib/proxy_fetcher/providers/xroxy.rb index c9ffdc3..fd04748 100644 --- a/lib/proxy_fetcher/providers/xroxy.rb +++ b/lib/proxy_fetcher/providers/xroxy.rb @@ -6,19 +6,11 @@ module Providers class XRoxy < Base # Provider URL to fetch proxy list def provider_url - 'https://www.xroxy.com/free-proxy-lists/' + "https://www.xroxy.com/free-proxy-lists/" end - # Fetches HTML content by sending HTTP request to the provider URL and - # parses the document (built as abstract ProxyFetcher::Document) - # to return all the proxy entries (HTML nodes). - # - # @return [Array] - # Collection of extracted HTML nodes with full proxy info - # - def load_proxy_list(filters = { type: 'All_http' }) - doc = load_document(provider_url, filters) - doc.xpath('//div/table/tbody/tr') + def xpath + "//div/table/tbody/tr" end # Converts HTML node (entry of N tags) to ProxyFetcher::Proxy @@ -32,12 +24,12 @@ def load_proxy_list(filters = { type: 'All_http' }) # def to_proxy(html_node) ProxyFetcher::Proxy.new.tap do |proxy| - proxy.addr = html_node.content_at('td[1]') - proxy.port = Integer(html_node.content_at('td[2]').gsub(/^0+/, '')) - proxy.anonymity = html_node.content_at('td[3]') - proxy.country = html_node.content_at('td[5]') - proxy.response_time = Integer(html_node.content_at('td[6]')) - proxy.type = html_node.content_at('td[3]') + proxy.addr = html_node.content_at("td[1]") + proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, "")) + proxy.anonymity = html_node.content_at("td[3]") + proxy.country = html_node.content_at("td[5]") + proxy.response_time = Integer(html_node.content_at("td[6]")) + proxy.type = html_node.content_at("td[3]") end end end diff --git a/lib/proxy_fetcher/proxy.rb b/lib/proxy_fetcher/proxy.rb index baf03c6..3f3ee8b 100644 --- a/lib/proxy_fetcher/proxy.rb +++ b/lib/proxy_fetcher/proxy.rb @@ -29,10 +29,10 @@ class Proxy # Proxy types TYPES = [ - HTTP = 'HTTP'.freeze, - HTTPS = 'HTTPS'.freeze, - SOCKS4 = 'SOCKS4'.freeze, - SOCKS5 = 'SOCKS5'.freeze + HTTP = "HTTP".freeze, + HTTPS = "HTTPS".freeze, + SOCKS4 = "SOCKS4".freeze, + SOCKS5 = "SOCKS5".freeze ].freeze # Proxy type predicates (#socks4?, #https?) diff --git a/lib/proxy_fetcher/utils/http_client.rb b/lib/proxy_fetcher/utils/http_client.rb index 712e98a..fec29f3 100644 --- a/lib/proxy_fetcher/utils/http_client.rb +++ b/lib/proxy_fetcher/utils/http_client.rb @@ -70,22 +70,24 @@ def initialize(url, method: :get, params: {}, headers: {}) def fetch response = process_http_request response.body.to_s - rescue StandardError => error - ProxyFetcher.logger.warn("Failed to process request to #{url} (#{error.message})") - '' + rescue StandardError => e + ProxyFetcher.logger.warn("Failed to process request to #{url} (#{e.message})") + "" end def fetch_with_headers process_http_request - rescue StandardError => error - ProxyFetcher.logger.warn("Failed to process request to #{url} (#{error.message})") - HTTP::Response.new(version: '1.1', status: 500, body: '') + rescue StandardError => e + ProxyFetcher.logger.warn("Failed to process request to #{url} (#{e.message})") + HTTP::Response.new(version: "1.1", status: 500, body: "") end protected def process_http_request(http_method: method, http_params: params) - raise ArgumentError, 'wrong http method name!' unless HTTP::Request::METHODS.include?(http_method) + unless HTTP::Request::METHODS.include?(http_method) + raise ArgumentError, "'#{http_method}' is a wrong HTTP method name!" + end http.public_send( http_method.to_sym, url, @@ -101,7 +103,7 @@ def process_http_request(http_method: method, http_params: params) # def default_headers { - 'User-Agent' => ProxyFetcher.config.user_agent + "User-Agent" => ProxyFetcher.config.user_agent } end end diff --git a/lib/proxy_fetcher/utils/proxy_list_validator.rb b/lib/proxy_fetcher/utils/proxy_list_validator.rb index f2036f2..fe5d6a5 100644 --- a/lib/proxy_fetcher/utils/proxy_list_validator.rb +++ b/lib/proxy_fetcher/utils/proxy_list_validator.rb @@ -34,7 +34,9 @@ def validate proxy = target_proxies_lock.synchronize { target_proxies.shift } break unless proxy - connectable_proxies_lock.synchronize { connectable_proxies << proxy } if proxy.connectable? + if proxy.connectable? + connectable_proxies_lock.synchronize { connectable_proxies << proxy } + end end end end diff --git a/lib/proxy_fetcher/utils/proxy_validator.rb b/lib/proxy_fetcher/utils/proxy_validator.rb index f2f9d9a..fb20905 100644 --- a/lib/proxy_fetcher/utils/proxy_validator.rb +++ b/lib/proxy_fetcher/utils/proxy_validator.rb @@ -6,7 +6,7 @@ module ProxyFetcher # URL to check if proxy can be used (aka connectable?). class ProxyValidator # Default URL that will be used to check if proxy can be used. - URL_TO_CHECK = 'https://google.com'.freeze + URL_TO_CHECK = "https://google.com".freeze # Short variant to validate proxy. # diff --git a/lib/proxy_fetcher/version.rb b/lib/proxy_fetcher/version.rb index 7d91195..39d3446 100644 --- a/lib/proxy_fetcher/version.rb +++ b/lib/proxy_fetcher/version.rb @@ -13,11 +13,11 @@ module VERSION # Major version number MAJOR = 0 # Minor version number - MINOR = 10 + MINOR = 11 # Smallest version number - TINY = 2 + TINY = 0 # Full version number - STRING = [MAJOR, MINOR, TINY].compact.join('.') + STRING = [MAJOR, MINOR, TINY].compact.join(".") end end diff --git a/proxy_fetcher.gemspec b/proxy_fetcher.gemspec index e51b04d..7acc736 100644 --- a/proxy_fetcher.gemspec +++ b/proxy_fetcher.gemspec @@ -1,25 +1,27 @@ -$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), 'lib')) +# frozen_string_literal: true -require 'proxy_fetcher/version' +$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), "lib")) + +require "proxy_fetcher/version" Gem::Specification.new do |gem| - gem.name = 'proxy_fetcher' + gem.name = "proxy_fetcher" gem.version = ProxyFetcher.gem_version - gem.summary = 'Ruby gem for dealing with proxy lists from different providers' - gem.description = 'This gem can help your Ruby application to make HTTP(S) requests ' \ - 'using proxies by fetching and validating proxy lists from the different providers.' - gem.authors = ['Nikita Bulai'] - gem.email = 'bulajnikita@gmail.com' - gem.require_paths = ['lib'] - gem.bindir = 'bin' + gem.summary = "Ruby gem for dealing with proxy lists from different providers" + gem.description = "This gem can help your Ruby application to make HTTP(S) requests " \ + "using proxies by fetching and validating proxy lists from the different providers." + gem.authors = ["Nikita Bulai"] + gem.email = "bulajnikita@gmail.com" + gem.require_paths = ["lib"] + gem.bindir = "bin" gem.files = `git ls-files`.split($RS) - %w[README.md .travis.yml .rubocop.yml] gem.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) } - gem.homepage = 'http://github.com/nbulaj/proxy_fetcher' - gem.license = 'MIT' - gem.required_ruby_version = '>= 2.0.0' + gem.homepage = "http://github.com/nbulaj/proxy_fetcher" + gem.license = "MIT" + gem.required_ruby_version = ">= 2.0.0" - gem.add_runtime_dependency 'http', '>= 3', '< 5' + gem.add_runtime_dependency "http", ">= 3", "< 5" - gem.add_development_dependency 'rake', '>= 12.0' - gem.add_development_dependency 'rspec', '~> 3.5' + gem.add_development_dependency "rake", ">= 12.0" + gem.add_development_dependency "rspec", "~> 3.5" end diff --git a/spec/proxy_fetcher/client/client_spec.rb b/spec/proxy_fetcher/client/client_spec.rb index 941b30a..03c43ef 100644 --- a/spec/proxy_fetcher/client/client_spec.rb +++ b/spec/proxy_fetcher/client/client_spec.rb @@ -1,10 +1,10 @@ # frozen_string_literal: true -require 'spec_helper' -require 'json' +require "spec_helper" +require "json" -require 'evil-proxy' -require 'evil-proxy/async' +require "evil-proxy" +require "evil-proxy/async" xdescribe ProxyFetcher::Client do before :all do @@ -23,119 +23,128 @@ # Use local proxy server in order to avoid side effects, non-working proxies, etc before :each do - proxy = ProxyFetcher::Proxy.new(addr: '127.0.0.1', port: 3128, type: 'HTTP, HTTPS') + proxy = ProxyFetcher::Proxy.new(addr: "127.0.0.1", port: 3128, type: "HTTP, HTTPS") ProxyFetcher::Client::ProxiesRegistry.manager.instance_variable_set(:'@proxies', [proxy]) allow_any_instance_of(ProxyFetcher::Providers::Base).to receive(:fetch_proxies!).and_return([proxy]) end - context 'GET request with the valid proxy' do - it 'successfully returns page content for HTTP' do - content = ProxyFetcher::Client.get('http://httpbin.org') + context "GET request with the valid proxy" do + it "successfully returns page content for HTTP" do + content = ProxyFetcher::Client.get("http://httpbin.org") expect(content).not_to be_empty end - it 'successfully returns page content for HTTPS' do - content = ProxyFetcher::Client.get('https://httpbin.org') + it "successfully returns page content for HTTPS" do + content = ProxyFetcher::Client.get("https://httpbin.org") expect(content).not_to be_empty end - it 'successfully returns page content using custom proxy' do + it "successfully returns page content using custom proxy" do manager = ProxyFetcher::Manager.new proxy = manager.get! until proxy - content = ProxyFetcher::Client.get('http://httpbin.org', options: { proxy: proxy }) + content = ProxyFetcher::Client.get("http://httpbin.org", options: { proxy: proxy }) expect(content).not_to be_empty end end - context 'POST request with the valid proxy' do - it 'successfully returns page content for HTTP' do + context "POST request with the valid proxy" do + it "successfully returns page content for HTTP" do headers = { - 'X-Proxy-Fetcher-Version' => ProxyFetcher::VERSION::STRING + "X-Proxy-Fetcher-Version" => ProxyFetcher::VERSION::STRING } - content = ProxyFetcher::Client.post('http://httpbin.org/post', { param: 'value' } , headers: headers) + + content = ProxyFetcher::Client.post( + "http://httpbin.org/post", + { param: "value" }, + headers: headers + ) expect(content).not_to be_empty json = JSON.parse(content) - expect(json['headers']['X-Proxy-Fetcher-Version']).to eq(ProxyFetcher::VERSION::STRING) - expect(json['headers']['User-Agent']).to eq(ProxyFetcher.config.user_agent) + expect(json["headers"]["X-Proxy-Fetcher-Version"]).to eq(ProxyFetcher::VERSION::STRING) + expect(json["headers"]["User-Agent"]).to eq(ProxyFetcher.config.user_agent) end end - context 'PUT request with the valid proxy' do - it 'successfully returns page content for HTTP' do - content = ProxyFetcher::Client.put('http://httpbin.org/put', 'param=PutValue') + context "PUT request with the valid proxy" do + it "successfully returns page content for HTTP" do + content = ProxyFetcher::Client.put("http://httpbin.org/put", "param=PutValue") expect(content).not_to be_empty json = JSON.parse(content) - expect(json['data']).to eq('param=PutValue') + expect(json["data"]).to eq("param=PutValue") end end - context 'PATCH request with the valid proxy' do - it 'successfully returns page content for HTTP' do - content = ProxyFetcher::Client.patch('http://httpbin.org/patch', param: 'value') + context "PATCH request with the valid proxy" do + it "successfully returns page content for HTTP" do + content = ProxyFetcher::Client.patch("http://httpbin.org/patch", param: "value") expect(content).not_to be_empty json = JSON.parse(content) - expect(json['form']['param']).to eq('value') + expect(json["form"]["param"]).to eq("value") end end - context 'DELETE request with the valid proxy' do - it 'successfully returns page content for HTTP' do - content = ProxyFetcher::Client.delete('http://httpbin.org/delete') + context "DELETE request with the valid proxy" do + it "successfully returns page content for HTTP" do + content = ProxyFetcher::Client.delete("http://httpbin.org/delete") expect(content).not_to be_empty end end - context 'HEAD request with the valid proxy' do - it 'successfully works' do - content = ProxyFetcher::Client.head('http://httpbin.org') + context "HEAD request with the valid proxy" do + it "successfully works" do + content = ProxyFetcher::Client.head("http://httpbin.org") expect(content).to be_empty end end - context 'retries' do - it 'raises an error when reaches max retries limit' do + context "retries" do + it "raises an error when reaches max retries limit" do allow(ProxyFetcher::Client::Request).to receive(:execute).and_raise(StandardError) - expect { ProxyFetcher::Client.get('http://httpbin.org') }.to raise_error(ProxyFetcher::Exceptions::MaximumRetriesReached) + expect { ProxyFetcher::Client.get("http://httpbin.org") } + .to raise_error(ProxyFetcher::Exceptions::MaximumRetriesReached) end - it 'raises an error when http request returns an error' do + it "raises an error when http request returns an error" do allow_any_instance_of(HTTP::Client).to receive(:get).and_return(StandardError.new) - expect { ProxyFetcher::Client.get('http://httpbin.org') }.to raise_error(ProxyFetcher::Exceptions::MaximumRetriesReached) + expect { ProxyFetcher::Client.get("http://httpbin.org") } + .to raise_error(ProxyFetcher::Exceptions::MaximumRetriesReached) end - it 'refreshes proxy lists if no proxy found' do + it "refreshes proxy lists if no proxy found" do ProxyFetcher::Client::ProxiesRegistry.manager.instance_variable_set(:'@proxies', []) - expect { ProxyFetcher::Client.get('http://httpbin.org') }.not_to raise_error + expect { ProxyFetcher::Client.get("http://httpbin.org") } + .not_to raise_error end end - context 'redirects' do - it 'follows redirect when present' do - content = ProxyFetcher::Client.get('http://httpbin.org/absolute-redirect/2') + context "redirects" do + it "follows redirect when present" do + content = ProxyFetcher::Client.get("http://httpbin.org/absolute-redirect/2") expect(content).not_to be_empty end - it 'raises an error when reaches max redirects limit' do - expect { ProxyFetcher::Client.get('http://httpbin.org/absolute-redirect/11') }.to raise_error(ProxyFetcher::Exceptions::MaximumRedirectsReached) + it "raises an error when reaches max redirects limit" do + expect { ProxyFetcher::Client.get("http://httpbin.org/absolute-redirect/11") } + .to raise_error(ProxyFetcher::Exceptions::MaximumRedirectsReached) end end end diff --git a/spec/proxy_fetcher/configuration_spec.rb b/spec/proxy_fetcher/configuration_spec.rb index a6ccbcf..0478dd1 100644 --- a/spec/proxy_fetcher/configuration_spec.rb +++ b/spec/proxy_fetcher/configuration_spec.rb @@ -1,13 +1,13 @@ # frozen_string_literal: true -require 'spec_helper' +require "spec_helper" describe ProxyFetcher::Configuration do before { ProxyFetcher.config.reset! } after { ProxyFetcher.config.reset! } - context 'custom HTTP client' do - it 'successfully setups if class has all the required methods' do + context "custom HTTP client" do + it "successfully setups if class has all the required methods" do class MyHTTPClient def self.fetch(url) url @@ -17,7 +17,7 @@ def self.fetch(url) expect { ProxyFetcher.config.http_client = MyHTTPClient }.not_to raise_error end - it 'failed on setup if required methods are missing' do + it "failed on setup if required methods are missing" do MyWrongHTTPClient = Class.new expect { ProxyFetcher.config.http_client = MyWrongHTTPClient } @@ -25,8 +25,8 @@ def self.fetch(url) end end - context 'custom proxy validator' do - it 'successfully setups if class has all the required methods' do + context "custom proxy validator" do + it "successfully setups if class has all the required methods" do class MyProxyValidator def self.connectable?(*) true @@ -36,7 +36,7 @@ def self.connectable?(*) expect { ProxyFetcher.config.proxy_validator = MyProxyValidator }.not_to raise_error end - it 'failed on setup if required methods are missing' do + it "failed on setup if required methods are missing" do MyWrongProxyValidator = Class.new expect { ProxyFetcher.config.proxy_validator = MyWrongProxyValidator } @@ -44,8 +44,8 @@ def self.connectable?(*) end end - context 'custom provider' do - it 'fails on registration if provider class already registered' do + context "custom provider" do + it "fails on registration if provider class already registered" do expect { ProxyFetcher::Configuration.register_provider(:xroxy, Class.new) } .to raise_error(ProxyFetcher::Exceptions::RegisteredProvider) end @@ -58,13 +58,13 @@ def self.connectable?(*) end end - context 'custom HTML parsing adapter' do + context "custom HTML parsing adapter" do it "fails if adapter can't be installed" do old_adapter = ProxyFetcher.config.adapter.dup class CustomAdapter < ProxyFetcher::Document::AbstractAdapter def self.install_requirements! - require 'not_existing_gem' + require "not_existing_gem" end end diff --git a/spec/proxy_fetcher/document/adapters_spec.rb b/spec/proxy_fetcher/document/adapters_spec.rb index 6a2401c..03ea439 100644 --- a/spec/proxy_fetcher/document/adapters_spec.rb +++ b/spec/proxy_fetcher/document/adapters_spec.rb @@ -1,26 +1,26 @@ # frozen_string_literal: true -require 'spec_helper' +require "spec_helper" describe ProxyFetcher::Document::Adapters do - describe '#lookup' do - it 'returns predefined adapters if symbol or string passed' do - expect(described_class.lookup('nokogiri')).to eq(ProxyFetcher::Document::NokogiriAdapter) + describe "#lookup" do + it "returns predefined adapters if symbol or string passed" do + expect(described_class.lookup("nokogiri")).to eq(ProxyFetcher::Document::NokogiriAdapter) expect(described_class.lookup(:oga)).to eq(ProxyFetcher::Document::OgaAdapter) end - it 'returns self if class passed' do + it "returns self if class passed" do expect(described_class.lookup(Struct)).to eq(Struct) end - it 'raises an exception if passed value is blank' do + it "raises an exception if passed value is blank" do expect { described_class.lookup(nil) }.to raise_error(ProxyFetcher::Exceptions::BlankAdapter) - expect { described_class.lookup('') }.to raise_error(ProxyFetcher::Exceptions::BlankAdapter) + expect { described_class.lookup("") }.to raise_error(ProxyFetcher::Exceptions::BlankAdapter) end it "raises an exception if adapter doesn't exist" do - expect { described_class.lookup('wrong') }.to raise_error(ProxyFetcher::Exceptions::UnknownAdapter) + expect { described_class.lookup("wrong") }.to raise_error(ProxyFetcher::Exceptions::UnknownAdapter) end end end diff --git a/spec/proxy_fetcher/document/node_spec.rb b/spec/proxy_fetcher/document/node_spec.rb index 90d6922..ba2a5f6 100644 --- a/spec/proxy_fetcher/document/node_spec.rb +++ b/spec/proxy_fetcher/document/node_spec.rb @@ -1,11 +1,11 @@ # frozen_string_literal: true -require 'spec_helper' +require "spec_helper" describe ProxyFetcher::Document::Node do - context 'overridable methods' do - it 'raises an error' do - node = ProxyFetcher::Document::Node.new('') + context "overridable methods" do + it "raises an error" do + node = ProxyFetcher::Document::Node.new("") %w[content html].each do |method| expect { node.public_send(method) }.to raise_error do |error| diff --git a/spec/proxy_fetcher/providers/base_spec.rb b/spec/proxy_fetcher/providers/base_spec.rb index 6d7554b..f761a69 100644 --- a/spec/proxy_fetcher/providers/base_spec.rb +++ b/spec/proxy_fetcher/providers/base_spec.rb @@ -1,37 +1,37 @@ # frozen_string_literal: true -require 'spec_helper' +require "spec_helper" describe ProxyFetcher::Providers::Base do before { ProxyFetcher.config.reset! } after { ProxyFetcher.config.reset! } - it 'does not allows to use not implemented methods' do + it "does not allows to use not implemented methods" do NotImplementedCustomProvider = Class.new(ProxyFetcher::Providers::Base) ProxyFetcher::Configuration.register_provider(:provider_without_methods, NotImplementedCustomProvider) ProxyFetcher.config.provider = :provider_without_methods expect { ProxyFetcher::Manager.new }.to raise_error(NotImplementedError) do |error| - expect(error.message).to include('load_proxy_list') + expect(error.message).to include("provider_url") end # implement one of the methods NotImplementedCustomProvider.class_eval do - def load_proxy_list(*) - [1, 2, 3] + def provider_url + "http://provider.com" end end expect { ProxyFetcher::Manager.new }.to raise_error(NotImplementedError) do |error| - expect(error.message).to include('to_proxy') + expect(error.message).to include("xpath") end end - it 'logs failed to load proxy providers' do + it "logs failed to load proxy providers" do CustomProvider = Class.new(ProxyFetcher::Providers::Base) do def load_proxy_list(*) - doc = load_document('https://google.com', {}) + doc = load_document("https://google.com", {}) doc.xpath('//table[contains(@class, "table")]/tr[(not(@id="proxy-table-header")) and (count(td)>2)]') end end @@ -44,7 +44,7 @@ def load_proxy_list(*) allow_any_instance_of(HTTP::Client).to receive(:get).and_raise(StandardError) - expect(logger).to receive(:warn).with(/Failed to process request to http[s:\/]/) + expect(logger).to receive(:warn).with(%r{Failed to process request to http[s:/]}) ProxyFetcher::Manager.new end diff --git a/spec/proxy_fetcher/providers/free_proxy_list_spec.rb b/spec/proxy_fetcher/providers/free_proxy_list_spec.rb index ab7df77..72f3b59 100644 --- a/spec/proxy_fetcher/providers/free_proxy_list_spec.rb +++ b/spec/proxy_fetcher/providers/free_proxy_list_spec.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require 'spec_helper' +require "spec_helper" describe ProxyFetcher::Providers::FreeProxyList do before :all do @@ -9,5 +9,5 @@ end end - it_behaves_like 'a manager' + it_behaves_like "a manager" end diff --git a/spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb b/spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb index ff632e0..2f05bbc 100644 --- a/spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb +++ b/spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb @@ -1,11 +1,11 @@ # frozen_string_literal: true -require 'spec_helper' +require "spec_helper" describe ProxyFetcher::Providers::FreeProxyListSSL do before :all do ProxyFetcher.config.provider = :free_proxy_list_ssl end - it_behaves_like 'a manager' + it_behaves_like "a manager" end diff --git a/spec/proxy_fetcher/providers/gather_proxy_spec.rb b/spec/proxy_fetcher/providers/gather_proxy_spec.rb index afad3e4..a2e1bc0 100644 --- a/spec/proxy_fetcher/providers/gather_proxy_spec.rb +++ b/spec/proxy_fetcher/providers/gather_proxy_spec.rb @@ -1,11 +1,11 @@ # frozen_string_literal: true -require 'spec_helper' +require "spec_helper" describe ProxyFetcher::Providers::GatherProxy do before :all do ProxyFetcher.config.provider = :gather_proxy end - it_behaves_like 'a manager' + it_behaves_like "a manager" end diff --git a/spec/proxy_fetcher/providers/http_tunnel_spec.rb b/spec/proxy_fetcher/providers/http_tunnel_spec.rb index 02377b7..7a79ba8 100644 --- a/spec/proxy_fetcher/providers/http_tunnel_spec.rb +++ b/spec/proxy_fetcher/providers/http_tunnel_spec.rb @@ -1,11 +1,11 @@ # frozen_string_literal: true -require 'spec_helper' +require "spec_helper" describe ProxyFetcher::Providers::HTTPTunnel do before :all do ProxyFetcher.config.provider = :http_tunnel end - it_behaves_like 'a manager' + it_behaves_like "a manager" end diff --git a/spec/proxy_fetcher/providers/multiple_providers_spec.rb b/spec/proxy_fetcher/providers/multiple_providers_spec.rb index 06a585f..2b65cca 100644 --- a/spec/proxy_fetcher/providers/multiple_providers_spec.rb +++ b/spec/proxy_fetcher/providers/multiple_providers_spec.rb @@ -1,13 +1,13 @@ # frozen_string_literal: true -require 'spec_helper' +require "spec_helper" -describe 'Multiple proxy providers' do +describe "Multiple proxy providers" do before { ProxyFetcher.config.reset! } after { ProxyFetcher.config.reset! } - it 'combine proxies from multiple providers' do - proxy_stub = ProxyFetcher::Proxy.new(addr: '192.168.1.1', port: 8080) + it "combine proxies from multiple providers" do + proxy_stub = ProxyFetcher::Proxy.new(addr: "192.168.1.1", port: 8080) # Each proxy provider will return 2 proxies ProxyFetcher::Configuration.providers_registry.providers.each do |_name, klass| diff --git a/spec/proxy_fetcher/providers/proxy_list_spec.rb b/spec/proxy_fetcher/providers/proxy_list_spec.rb index 833ec27..36a3a96 100644 --- a/spec/proxy_fetcher/providers/proxy_list_spec.rb +++ b/spec/proxy_fetcher/providers/proxy_list_spec.rb @@ -1,11 +1,11 @@ # frozen_string_literal: true -require 'spec_helper' +require "spec_helper" describe ProxyFetcher::Providers::ProxyList do before :all do ProxyFetcher.config.provider = :proxy_list end - it_behaves_like 'a manager' + it_behaves_like "a manager" end diff --git a/spec/proxy_fetcher/providers/xroxy_spec.rb b/spec/proxy_fetcher/providers/xroxy_spec.rb index 6692ed9..d28e899 100644 --- a/spec/proxy_fetcher/providers/xroxy_spec.rb +++ b/spec/proxy_fetcher/providers/xroxy_spec.rb @@ -1,11 +1,11 @@ # frozen_string_literal: true -require 'spec_helper' +require "spec_helper" describe ProxyFetcher::Providers::XRoxy do before :all do ProxyFetcher.config.provider = :xroxy end - it_behaves_like 'a manager' + it_behaves_like "a manager" end diff --git a/spec/proxy_fetcher/proxy_spec.rb b/spec/proxy_fetcher/proxy_spec.rb index 2ffd80e..092e433 100644 --- a/spec/proxy_fetcher/proxy_spec.rb +++ b/spec/proxy_fetcher/proxy_spec.rb @@ -1,20 +1,20 @@ # frozen_string_literal: true -require 'spec_helper' +require "spec_helper" describe ProxyFetcher::Proxy do - let(:proxy) { described_class.new(addr: '192.169.1.1', port: 8080, type: 'HTTP') } + let(:proxy) { described_class.new(addr: "192.169.1.1", port: 8080, type: "HTTP") } - it 'can initialize a new proxy object' do - proxy = described_class.new(addr: '192.169.1.1', port: 8080, type: 'HTTP') + it "can initialize a new proxy object" do + proxy = described_class.new(addr: "192.169.1.1", port: 8080, type: "HTTP") expect(proxy).not_to be_nil - expect(proxy.addr).to eq('192.169.1.1') + expect(proxy.addr).to eq("192.169.1.1") expect(proxy.port).to eq(8080) - expect(proxy.type).to eq('HTTP') + expect(proxy.type).to eq("HTTP") end - it 'checks schema' do + it "checks schema" do proxy.type = ProxyFetcher::Proxy::HTTP expect(proxy.http?).to be_truthy expect(proxy.https?).to be_falsey @@ -34,28 +34,28 @@ expect(proxy.ssl?).to be_truthy end - it 'not connectable if IP addr is wrong' do - proxy.addr = '192.168.1.0' + it "not connectable if IP addr is wrong" do + proxy.addr = "192.168.1.0" expect(proxy.connectable?).to be_falsey end - it 'not connectable if there are some error during connection request' do + it "not connectable if there are some error during connection request" do allow_any_instance_of(HTTP::Client).to receive(:head).and_raise(HTTP::TimeoutError) expect(proxy.connectable?).to be_falsey end - it 'returns URI::Generic' do + it "returns URI::Generic" do expect(proxy.uri).to be_a(URI::Generic) expect(proxy.uri.host).not_to be_empty expect(proxy.uri.port).not_to be_nil end - it 'returns URL' do + it "returns URL" do expect(proxy.url).to be_a(String) end - it 'returns URL with scheme' do - expect(proxy.url(scheme: true)).to include('://') + it "returns URL with scheme" do + expect(proxy.url(scheme: true)).to include("://") end end diff --git a/spec/proxy_fetcher/version_spec.rb b/spec/proxy_fetcher/version_spec.rb index 6c336ef..c7e3fd9 100644 --- a/spec/proxy_fetcher/version_spec.rb +++ b/spec/proxy_fetcher/version_spec.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + RSpec.describe ProxyFetcher::VERSION do it { expect(ProxyFetcher::VERSION::STRING).to match(/^\d+\.\d+\.\d+(\.\w+)?$/) } end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 747863f..b1298c1 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,24 +1,24 @@ # frozen_string_literal: true -require 'simplecov' -SimpleCov.add_filter 'spec' -SimpleCov.add_filter 'version' +require "simplecov" +SimpleCov.add_filter "spec" +SimpleCov.add_filter "version" -if ENV['CI'] || ENV['TRAVIS'] || ENV['COVERALLS'] || ENV['JENKINS_URL'] - require 'coveralls' +if ENV["CI"] || ENV["TRAVIS"] || ENV["COVERALLS"] || ENV["JENKINS_URL"] + require "coveralls" Coveralls.wear! else SimpleCov.start end -require 'bundler/setup' +require "bundler/setup" Bundler.setup -require 'proxy_fetcher' +require "proxy_fetcher" -Dir['./spec/support/**/*.rb'].sort.each { |f| require f } +Dir["./spec/support/**/*.rb"].sort.each { |f| require f } -adapter = ENV['BUNDLE_GEMFILE'][/.+\/(.+)\.gemfile/i, 1] || :nokogiri +adapter = ENV["BUNDLE_GEMFILE"][%r{.+/(.+)\.gemfile}i, 1] || :nokogiri puts "Configured adapter: '#{adapter}'" ProxyFetcher.configure do |config| @@ -26,5 +26,5 @@ end RSpec.configure do |config| - config.order = 'random' + config.order = "random" end diff --git a/spec/support/manager_examples.rb b/spec/support/manager_examples.rb index fc1ad89..7175c1f 100644 --- a/spec/support/manager_examples.rb +++ b/spec/support/manager_examples.rb @@ -1,9 +1,12 @@ # frozen_string_literal: true -RSpec.shared_examples 'a manager' do - it 'loads proxy list on initialization by default' do - manager = ProxyFetcher::Manager.new - expect(manager.proxies).not_to be_empty +RSpec.shared_examples "a manager" do + before :all do + @cached_manager = ProxyFetcher::Manager.new + end + + it "loads proxy list on initialization by default" do + expect(@cached_manager.proxies).not_to be_empty end it "doesn't load proxy list on initialization if `refresh` argument was set to false" do @@ -11,11 +14,10 @@ expect(manager.proxies).to be_empty end - it 'returns valid Proxy objects' do - manager = ProxyFetcher::Manager.new - expect(manager.proxies).to all(be_a(ProxyFetcher::Proxy)) + it "returns valid Proxy objects" do + expect(@cached_manager.proxies).to all(be_a(ProxyFetcher::Proxy)) - manager.proxies.each do |proxy| + @cached_manager.proxies.each do |proxy| expect(proxy.addr).to match(/\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/i) expect(proxy.port).to be_a_kind_of(Numeric) expect(proxy.type).not_to be_empty @@ -25,12 +27,11 @@ end end - it 'returns raw proxies (HOST:PORT)' do - manager = ProxyFetcher::Manager.new - expect(manager.raw_proxies).to all(be_a(String)) + it "returns raw proxies (HOST:PORT)" do + expect(@cached_manager.raw_proxies).to all(be_a(String)) end - it 'cleanup proxy list from dead servers' do + it "cleanup proxy list from dead servers" do allow_any_instance_of(ProxyFetcher::Proxy).to receive(:connectable?).and_return(false) manager = ProxyFetcher::Manager.new @@ -45,7 +46,7 @@ expect(manager.inspect).to eq(manager.to_s) end - it 'returns first proxy' do + it "returns first proxy" do manager = ProxyFetcher::Manager.new first_proxy = manager.proxies.first @@ -54,16 +55,16 @@ expect(manager.proxies.first).not_to eq(first_proxy) end - it 'returns first valid proxy' do + it "returns first valid proxy" do manager = ProxyFetcher::Manager.new(refresh: false) - proxies = Array.new(5) { instance_double('ProxyFetcher::Proxy', connectable?: false) } + proxies = Array.new(5) { instance_double("ProxyFetcher::Proxy", connectable?: false) } manager.instance_variable_set(:@proxies, proxies) - connectable_proxy = instance_double('ProxyFetcher::Proxy') + connectable_proxy = instance_double("ProxyFetcher::Proxy") allow(connectable_proxy).to receive(:connectable?).and_return(true) - manager.proxies[0..2].each { |proxy| proxy.instance_variable_set(:@addr, '192.168.1.1') } + manager.proxies[0..2].each { |proxy| proxy.instance_variable_set(:@addr, "192.168.1.1") } manager.proxies[2] = connectable_proxy expect(manager.get!).to eq(connectable_proxy) @@ -73,15 +74,14 @@ expect(manager.proxies.size).to be(1) end - it 'returns nothing if proxy list is empty' do + it "returns nothing if proxy list is empty" do manager = ProxyFetcher::Manager.new(refresh: false) expect(manager.get).to be_nil expect(manager.get!).to be_nil end - it 'returns random proxy' do - manager = ProxyFetcher::Manager.new - expect(manager.random).to be_an_instance_of(ProxyFetcher::Proxy) + it "returns random proxy" do + expect(@cached_manager.random).to be_an_instance_of(ProxyFetcher::Proxy) end end From 980249fd9c7b02f22d2406bb5be6222786ba1f42 Mon Sep 17 00:00:00 2001 From: Nikita Bulai Date: Wed, 23 Oct 2019 15:18:33 +0300 Subject: [PATCH 2/4] Refactor Client specs, clear code --- .travis.yml | 4 +-- lib/proxy_fetcher/client/client.rb | 11 ++++++-- lib/proxy_fetcher/configuration.rb | 2 +- lib/proxy_fetcher/document.rb | 9 ------ .../document/adapters/abstract_adapter.rb | 9 ------ spec/proxy_fetcher/client/client_spec.rb | 28 +++++++++---------- 6 files changed, 26 insertions(+), 37 deletions(-) diff --git a/.travis.yml b/.travis.yml index dc5f644..8990678 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,12 +22,12 @@ rvm: - 2.5 - 2.6 - ruby-head - - jruby-9.2.1 + - jruby-9.2.8 matrix: allow_failures: - rvm: ruby-head - - rvm: jruby-9.2.1 + - rvm: jruby-9.2.8 - rvm: truffleruby exclude: - rvm: 2.0 diff --git a/lib/proxy_fetcher/client/client.rb b/lib/proxy_fetcher/client/client.rb index 2eb01ca..50d387c 100644 --- a/lib/proxy_fetcher/client/client.rb +++ b/lib/proxy_fetcher/client/client.rb @@ -128,7 +128,11 @@ def patch(url, payload, headers: {}, options: {}) # def request_with_payload(method, url, payload, headers, options) with_proxy_for(url, options.fetch(:max_retries, 1000)) do |proxy| - opts = options.merge(payload: payload, proxy: options.fetch(:proxy, proxy), headers: default_headers.merge(headers)) + opts = options.merge( + payload: payload, + proxy: options.fetch(:proxy, proxy), + headers: default_headers.merge(headers) + ) Request.execute(url: url, method: method, **opts) end @@ -138,7 +142,10 @@ def request_with_payload(method, url, payload, headers, options) # def request_without_payload(method, url, headers, options) with_proxy_for(url, options.fetch(:max_retries, 1000)) do |proxy| - opts = options.merge(proxy: options.fetch(:proxy, proxy), headers: default_headers.merge(headers)) + opts = options.merge( + proxy: options.fetch(:proxy, proxy), + headers: default_headers.merge(headers) + ) Request.execute(url: url, method: method, **opts) end diff --git a/lib/proxy_fetcher/configuration.rb b/lib/proxy_fetcher/configuration.rb index acfe2ae..fe14d73 100644 --- a/lib/proxy_fetcher/configuration.rb +++ b/lib/proxy_fetcher/configuration.rb @@ -30,7 +30,7 @@ class Configuration attr_accessor :user_agent # @!attribute [r] logger - # @return [Object] Logger object + # @return [Logger] Logger object attr_accessor :logger # @!attribute [r] adapter diff --git a/lib/proxy_fetcher/document.rb b/lib/proxy_fetcher/document.rb index f957f50..c5faf07 100644 --- a/lib/proxy_fetcher/document.rb +++ b/lib/proxy_fetcher/document.rb @@ -36,14 +36,5 @@ def initialize(backend) def xpath(*args) backend.xpath(*args).map { |node| backend.proxy_node.new(node) } end - - # Searches elements by CSS selector. - # - # @return [Array] - # collection of nodes - # - def css(*args) - backend.css(*args).map { |node| backend.proxy_node.new(node) } - end end end diff --git a/lib/proxy_fetcher/document/adapters/abstract_adapter.rb b/lib/proxy_fetcher/document/adapters/abstract_adapter.rb index a94efc3..3fe9ae3 100644 --- a/lib/proxy_fetcher/document/adapters/abstract_adapter.rb +++ b/lib/proxy_fetcher/document/adapters/abstract_adapter.rb @@ -26,15 +26,6 @@ def xpath(selector) document.xpath(selector) end - # You can override this method in your own adapter class - # - # @param selector [String] - # CSS selector - # - def css(selector) - document.css(selector) - end - # Returns Node class that will handle HTML # nodes for particular adapter. # diff --git a/spec/proxy_fetcher/client/client_spec.rb b/spec/proxy_fetcher/client/client_spec.rb index 03c43ef..61a76a3 100644 --- a/spec/proxy_fetcher/client/client_spec.rb +++ b/spec/proxy_fetcher/client/client_spec.rb @@ -6,11 +6,12 @@ require "evil-proxy" require "evil-proxy/async" -xdescribe ProxyFetcher::Client do +describe ProxyFetcher::Client do before :all do ProxyFetcher.configure do |config| config.provider = :xroxy config.client_timeout = 5 + config.logger = ProxyFetcher::NullLogger.new end @server = EvilProxy::MITMProxyServer.new Port: 3128, Quiet: true @@ -21,31 +22,30 @@ @server.shutdown end + let(:local_proxy) { ProxyFetcher::Proxy.new(addr: "127.0.0.1", port: 3128, type: "HTTP, HTTPS") } + # Use local proxy server in order to avoid side effects, non-working proxies, etc before :each do - proxy = ProxyFetcher::Proxy.new(addr: "127.0.0.1", port: 3128, type: "HTTP, HTTPS") - ProxyFetcher::Client::ProxiesRegistry.manager.instance_variable_set(:'@proxies', [proxy]) - allow_any_instance_of(ProxyFetcher::Providers::Base).to receive(:fetch_proxies!).and_return([proxy]) + ProxyFetcher::Client::ProxiesRegistry.manager.instance_variable_set(:'@proxies', [local_proxy]) + allow_any_instance_of(ProxyFetcher::Providers::Base).to receive(:fetch_proxies).and_return([local_proxy]) end context "GET request with the valid proxy" do it "successfully returns page content for HTTP" do - content = ProxyFetcher::Client.get("http://httpbin.org") + content = ProxyFetcher::Client.get("http://httpbin.org/get") expect(content).not_to be_empty end - it "successfully returns page content for HTTPS" do - content = ProxyFetcher::Client.get("https://httpbin.org") + # TODO: oh this SSL / MITM proxies .... + xit "successfully returns page content for HTTPS" do + content = ProxyFetcher::Client.get("https://httpbin.org/get") expect(content).not_to be_empty end it "successfully returns page content using custom proxy" do - manager = ProxyFetcher::Manager.new - - proxy = manager.get! until proxy - content = ProxyFetcher::Client.get("http://httpbin.org", options: { proxy: proxy }) + content = ProxyFetcher::Client.get("http://httpbin.org/get", options: { proxy: local_proxy }) expect(content).not_to be_empty end @@ -80,7 +80,7 @@ json = JSON.parse(content) - expect(json["data"]).to eq("param=PutValue") + expect(json["form"]["param"]).to eq("PutValue") end end @@ -112,7 +112,7 @@ end end - context "retries" do + xcontext "retries" do it "raises an error when reaches max retries limit" do allow(ProxyFetcher::Client::Request).to receive(:execute).and_raise(StandardError) @@ -128,7 +128,7 @@ end it "refreshes proxy lists if no proxy found" do - ProxyFetcher::Client::ProxiesRegistry.manager.instance_variable_set(:'@proxies', []) + ProxyFetcher::Client::ProxiesRegistry.manager.instance_variable_set(:"@proxies", []) expect { ProxyFetcher::Client.get("http://httpbin.org") } .not_to raise_error From 5b5ec2ac760d7844e73e219d53a1fff15ad45b4f Mon Sep 17 00:00:00 2001 From: Nikita Bulai Date: Wed, 23 Oct 2019 15:43:57 +0300 Subject: [PATCH 3/4] Drop drop old rubies (no longer supported) --- .travis.yml | 10 ++------ lib/proxy_fetcher/configuration.rb | 13 +++++++---- lib/proxy_fetcher/document/adapters.rb | 2 +- lib/proxy_fetcher/providers/base.rb | 23 ++++++++++++++----- .../providers/free_proxy_list.rb | 23 ++++++++++++++++++- lib/proxy_fetcher/proxy.rb | 8 +++---- lib/proxy_fetcher/utils/proxy_validator.rb | 2 +- proxy_fetcher.gemspec | 5 ++-- 8 files changed, 59 insertions(+), 27 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8990678..dd95336 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,21 +14,15 @@ gemfile: - gemfiles/nokogiri.gemfile rvm: - - 2.0 - - 2.1 - - 2.2 - 2.3 - 2.4 - 2.5 - 2.6 - ruby-head - - jruby-9.2.8 + - jruby-9.2.8.0 matrix: allow_failures: - rvm: ruby-head - - rvm: jruby-9.2.8 + - rvm: jruby-9.2.8.0 - rvm: truffleruby - exclude: - - rvm: 2.0 - gemfile: gemfiles/nokogiri.gemfile # Nokogiri doesn't support Ruby 2.0 diff --git a/lib/proxy_fetcher/configuration.rb b/lib/proxy_fetcher/configuration.rb index fe14d73..7e5d235 100644 --- a/lib/proxy_fetcher/configuration.rb +++ b/lib/proxy_fetcher/configuration.rb @@ -6,15 +6,20 @@ module ProxyFetcher # class Configuration # @!attribute client_timeout - # @return [Integer] HTTP request timeout (connect / open) for [ProxyFetcher::Client] + # @return [Integer] + # HTTP request timeout (connect / open) for [ProxyFetcher::Client] attr_accessor :client_timeout # @!attribute provider_proxies_load_timeout - # @return [Integer] HTTP request timeout (connect / open) for loading of proxies list by provider + # @return [Integer] + # HTTP request timeout (connect / open) for loading + # of proxies list by provider attr_accessor :provider_proxies_load_timeout # @!attribute proxy_validation_timeout - # @return [Integer] HTTP request timeout (connect / open) for proxy validation with [ProxyFetcher::ProxyValidator] + # @return [Integer] + # HTTP request timeout (connect / open) for proxy + # validation with [ProxyFetcher::ProxyValidator] attr_accessor :proxy_validation_timeout # to save compatibility @@ -55,7 +60,7 @@ class Configuration # Default is Google Chrome 60, but can be changed in ProxyFetcher.config. # DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 " \ - "(KHTML, like Gecko) Chrome/60.0.3112 Safari/537.36".freeze + "(KHTML, like Gecko) Chrome/60.0.3112 Safari/537.36" # HTML parser adapter name. # diff --git a/lib/proxy_fetcher/document/adapters.rb b/lib/proxy_fetcher/document/adapters.rb index 90c99aa..acb02bd 100644 --- a/lib/proxy_fetcher/document/adapters.rb +++ b/lib/proxy_fetcher/document/adapters.rb @@ -13,7 +13,7 @@ class Document # ProxyFetcher::Document::AbstractAdapter. class Adapters # Adapters class name suffix - ADAPTER = "Adapter".freeze + ADAPTER = "Adapter" private_constant :ADAPTER class << self diff --git a/lib/proxy_fetcher/providers/base.rb b/lib/proxy_fetcher/providers/base.rb index c0d1863..baf2ba4 100644 --- a/lib/proxy_fetcher/providers/base.rb +++ b/lib/proxy_fetcher/providers/base.rb @@ -47,18 +47,29 @@ def self.fetch_proxies!(*args) # Loads raw provider HTML with proxies. # + # @param url [String] + # Provider URL + # + # @param filters [#to_h] + # Provider filters (Hash-like object) + # # @return [String] - # HTML body + # HTML body from the response # def load_html(url, filters = {}) - raise ArgumentError, "filters must be a Hash" if filters && !filters.is_a?(Hash) + unless filters.respond_to?(:to_h) + raise ArgumentError, "filters must be a Hash or respond to #to_h" + end - uri = URI.parse(url) - # TODO: query for post request? - uri.query = URI.encode_www_form(provider_params.merge(filters)) if filters && filters.any? + if filters&.any? + # TODO: query for post request? + uri = URI.parse(url) + uri.query = URI.encode_www_form(provider_params.merge(filters.to_h)) + url = uri.to_s + end ProxyFetcher.config.http_client.fetch( - uri.to_s, + url, method: provider_method, headers: provider_headers, params: provider_params diff --git a/lib/proxy_fetcher/providers/free_proxy_list.rb b/lib/proxy_fetcher/providers/free_proxy_list.rb index 6e2dc30..0672b9c 100644 --- a/lib/proxy_fetcher/providers/free_proxy_list.rb +++ b/lib/proxy_fetcher/providers/free_proxy_list.rb @@ -45,7 +45,28 @@ def to_proxy(html_node) # def parse_type(html_node) https = html_node.content_at("td[6]") - https && https.casecmp("yes").zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP + # frozen_string_literal: true + # FreeProxyList provider class. + # Provider URL to fetch proxy list + # [NOTE] Doesn't support filtering + # Converts HTML node (entry of N tags) to ProxyFetcher::Proxy + # object. + # + # @param html_node [Object] + # HTML node from the ProxyFetcher::Document DOM model. + # + # @return [ProxyFetcher::Proxy] + # Proxy object + # + # Parses HTML node to extract proxy type. + # + # @param html_node [Object] + # HTML node from the ProxyFetcher::Document DOM model. + # + # @return [String] + # Proxy type + # + https&.casecmp("yes")&.zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP end end diff --git a/lib/proxy_fetcher/proxy.rb b/lib/proxy_fetcher/proxy.rb index 3f3ee8b..4660e8f 100644 --- a/lib/proxy_fetcher/proxy.rb +++ b/lib/proxy_fetcher/proxy.rb @@ -29,10 +29,10 @@ class Proxy # Proxy types TYPES = [ - HTTP = "HTTP".freeze, - HTTPS = "HTTPS".freeze, - SOCKS4 = "SOCKS4".freeze, - SOCKS5 = "SOCKS5".freeze + HTTP = "HTTP", + HTTPS = "HTTPS", + SOCKS4 = "SOCKS4", + SOCKS5 = "SOCKS5" ].freeze # Proxy type predicates (#socks4?, #https?) diff --git a/lib/proxy_fetcher/utils/proxy_validator.rb b/lib/proxy_fetcher/utils/proxy_validator.rb index fb20905..e448d29 100644 --- a/lib/proxy_fetcher/utils/proxy_validator.rb +++ b/lib/proxy_fetcher/utils/proxy_validator.rb @@ -6,7 +6,7 @@ module ProxyFetcher # URL to check if proxy can be used (aka connectable?). class ProxyValidator # Default URL that will be used to check if proxy can be used. - URL_TO_CHECK = "https://google.com".freeze + URL_TO_CHECK = "https://google.com" # Short variant to validate proxy. # diff --git a/proxy_fetcher.gemspec b/proxy_fetcher.gemspec index 7acc736..33bb70a 100644 --- a/proxy_fetcher.gemspec +++ b/proxy_fetcher.gemspec @@ -9,7 +9,8 @@ Gem::Specification.new do |gem| gem.version = ProxyFetcher.gem_version gem.summary = "Ruby gem for dealing with proxy lists from different providers" gem.description = "This gem can help your Ruby application to make HTTP(S) requests " \ - "using proxies by fetching and validating proxy lists from the different providers." + "using proxies by fetching and validating proxy lists from " \ + "the different providers." gem.authors = ["Nikita Bulai"] gem.email = "bulajnikita@gmail.com" gem.require_paths = ["lib"] @@ -18,7 +19,7 @@ Gem::Specification.new do |gem| gem.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) } gem.homepage = "http://github.com/nbulaj/proxy_fetcher" gem.license = "MIT" - gem.required_ruby_version = ">= 2.0.0" + gem.required_ruby_version = ">= 2.3.0" gem.add_runtime_dependency "http", ">= 3", "< 5" From 29ad097426118a1a5d04f1d36470c6885ef5ca5f Mon Sep 17 00:00:00 2001 From: Nikita Bulai Date: Wed, 23 Oct 2019 17:31:16 +0300 Subject: [PATCH 4/4] Hack :( --- spec/proxy_fetcher/client/client_spec.rb | 34 ++++++++++++++---------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/spec/proxy_fetcher/client/client_spec.rb b/spec/proxy_fetcher/client/client_spec.rb index 61a76a3..3089e76 100644 --- a/spec/proxy_fetcher/client/client_spec.rb +++ b/spec/proxy_fetcher/client/client_spec.rb @@ -14,7 +14,9 @@ config.logger = ProxyFetcher::NullLogger.new end - @server = EvilProxy::MITMProxyServer.new Port: 3128, Quiet: true + quiet = ENV.key?("LOG_MITM") ? ENV["LOG_MITM"] == "false" : true + + @server = EvilProxy::MITMProxyServer.new Port: 3128, Quiet: quiet @server.start end @@ -72,27 +74,31 @@ end end - context "PUT request with the valid proxy" do - it "successfully returns page content for HTTP" do - content = ProxyFetcher::Client.put("http://httpbin.org/put", "param=PutValue") + # TODO: EvilProxy incompatible with latest Ruby/Webrick + # @see https://github.com/bbtfr/evil-proxy/issues/10 + if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("2.6") + context "PUT request with the valid proxy" do + it "successfully returns page content for HTTP" do + content = ProxyFetcher::Client.put("http://httpbin.org/put", "param=PutValue") - expect(content).not_to be_empty + expect(content).not_to be_empty - json = JSON.parse(content) + json = JSON.parse(content) - expect(json["form"]["param"]).to eq("PutValue") + expect(json["form"]["param"]).to eq("PutValue") + end end - end - context "PATCH request with the valid proxy" do - it "successfully returns page content for HTTP" do - content = ProxyFetcher::Client.patch("http://httpbin.org/patch", param: "value") + context "PATCH request with the valid proxy" do + it "successfully returns page content for HTTP" do + content = ProxyFetcher::Client.patch("http://httpbin.org/patch", param: "value") - expect(content).not_to be_empty + expect(content).not_to be_empty - json = JSON.parse(content) + json = JSON.parse(content) - expect(json["form"]["param"]).to eq("value") + expect(json["form"]["param"]).to eq("value") + end end end