Skip to content

Commit

Permalink
DomainExpirationScanner: more logging, timeouts accounting for the fa…
Browse files Browse the repository at this point in the history
…ct that we wait for quota, minor message fixes, etc. (#604)
  • Loading branch information
kazet authored Nov 23, 2023
1 parent 8cf4ef6 commit d886a9b
Show file tree
Hide file tree
Showing 10 changed files with 83 additions and 30 deletions.
4 changes: 2 additions & 2 deletions artemis/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ class Locking:
Locks are not permanent, because a service that has acquired a lock may get restarted or killed.
This is the lock default expiry time.
""",
] = get_config("DEFAULT_LOCK_EXPIRY_SECONDS", default=3600, cast=int)
] = get_config("DEFAULT_LOCK_EXPIRY_SECONDS", default=48 * 60 * 60, cast=int)

class PublicSuffixes:
ALLOW_SCANNING_PUBLIC_SUFFIXES: Annotated[
Expand Down Expand Up @@ -473,7 +473,7 @@ class WordPressBruter:
class DomainExpirationScanner:
DOMAIN_EXPIRATION_TIMEFRAME_DAYS: Annotated[
int, "The scanner warns if the domain's expiration date falls within this time frame from now."
] = get_config("DOMAIN_EXPIRATION_TIMEFRAME_DAYS", default=5, cast=int)
] = get_config("DOMAIN_EXPIRATION_TIMEFRAME_DAYS", default=14, cast=int)

@staticmethod
def verify_each_variable_is_annotated() -> None:
Expand Down
31 changes: 29 additions & 2 deletions artemis/module_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,15 @@ class ArtemisBase(Karton):
# their IPs are already scanned, the actual batch size may be lower.
task_max_batch_size = 1

timeout_seconds = Config.Limits.TASK_TIMEOUT_SECONDS

lock_target = Config.Locking.LOCK_SCANNED_TARGETS

# Sometimes there are multiple modules that make use of a resource, e.g. whois database.
# This is the name of the resource - if a module locks it, no other module using this
# resource can use it.
resource_name_to_lock_before_scanning: Optional[str] = None

def __init__(self, db: Optional[DB] = None, *args, **kwargs) -> None: # type: ignore[no-untyped-def]
super().__init__(*args, **kwargs)
self.cache = RedisCache(REDIS, self.identity)
Expand Down Expand Up @@ -234,6 +241,23 @@ def run_multiple(self, tasks: List[Task]) -> None:
raise NotImplementedError()

def lock_and_internal_process_multiple(self, tasks: List[Task]) -> None:
if self.resource_name_to_lock_before_scanning:
resource_lock = ResourceLock(
REDIS,
f"resource-lock-{self.resource_name_to_lock_before_scanning}",
max_tries=Config.Locking.SCAN_DESTINATION_LOCK_MAX_TRIES,
)
try:
resource_lock.acquire()
self.log.info("Succeeded to lock resource %s", self.resource_name_to_lock_before_scanning)
except FailedToAcquireLockException:
self.log.info("Failed to lock resource %s", self.resource_name_to_lock_before_scanning)
for task in tasks:
self.reschedule_task(task)
return
else:
resource_lock = None

if self.lock_target:
locks_acquired = []
tasks_to_reschedule = []
Expand Down Expand Up @@ -288,6 +312,9 @@ def lock_and_internal_process_multiple(self, tasks: List[Task]) -> None:
self._log_tasks(tasks)
self.internal_process_multiple(tasks)

if resource_lock:
resource_lock.release()

def internal_process_multiple(self, tasks: List[Task]) -> None:
tasks_filtered = []
for task in tasks:
Expand Down Expand Up @@ -350,10 +377,10 @@ def process_multiple(self, tasks: List[Task]) -> None:

try:
if self.batch_tasks:
timeout_decorator.timeout(Config.Limits.TASK_TIMEOUT_SECONDS)(lambda: self.run_multiple(tasks))()
timeout_decorator.timeout(self.timeout_seconds)(lambda: self.run_multiple(tasks))()
else:
(task,) = tasks
timeout_decorator.timeout(Config.Limits.TASK_TIMEOUT_SECONDS)(lambda: self.run(task))()
timeout_decorator.timeout(self.timeout_seconds)(lambda: self.run(task))()
except Exception:
for task in tasks:
self.db.save_task_result(task=task, status=TaskStatus.ERROR, data=traceback.format_exc())
Expand Down
37 changes: 17 additions & 20 deletions artemis/modules/domain_expiration_scanner.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
#!/usr/bin/env python3
import datetime
import time
from typing import Any, Dict, Optional

from karton.core import Task
from whois import Domain, WhoisQuotaExceeded, query # type: ignore

from artemis.binds import TaskStatus, TaskType
from artemis.config import Config
from artemis.domains import is_main_domain
from artemis.module_base import ArtemisBase
from artemis.utils import perform_whois_or_sleep


class DomainExpirationScanner(ArtemisBase):
Expand All @@ -19,34 +18,32 @@ class DomainExpirationScanner(ArtemisBase):

identity = "domain_expiration_scanner"
filters = [{"type": TaskType.DOMAIN.value}]
resource_name_to_lock_before_scanning = "whois"

# As the logic sometimes requires waiting 24 hours for the quota to be renewed, let's
# set the timeout for 24 hours + 1 hour.
timeout_seconds = (24 + 1) * 3600

def run(self, current_task: Task) -> None:
domain = current_task.get_payload(TaskType.DOMAIN)
result: Dict[str, Any] = {}
status = TaskStatus.OK
status_reason = None
if is_main_domain(domain):
try:
domain_data = self._query_whois(domain=domain)
except WhoisQuotaExceeded:
time.sleep(24 * 60 * 60)
domain_data = self._query_whois(domain=domain)
domain_data = perform_whois_or_sleep(domain=domain, logger=self.log)

expiry_date = domain_data.expiration_date
result = self._prepare_expiration_data(expiration_date=expiry_date, result=result)
if domain_data:
expiry_date = domain_data.expiration_date
result = self._prepare_expiration_data(expiration_date=expiry_date, result=result)

if "close_expiration_date" in result:
status = TaskStatus.INTERESTING
status_reason = self._prepare_expiration_status_reason(
days_to_expire=result["days_to_expire"], expiration_date=result["expiration_date"]
)
if "close_expiration_date" in result:
status = TaskStatus.INTERESTING
status_reason = self._prepare_expiration_status_reason(
days_to_expire=result["days_to_expire"], expiration_date=result["expiration_date"]
)

self.db.save_task_result(task=current_task, status=status, status_reason=status_reason, data=result)

@staticmethod
def _query_whois(domain: str) -> Domain:
return query(domain)

@staticmethod
def _prepare_expiration_data(
expiration_date: Optional[datetime.datetime], result: Dict[str, Any]
Expand All @@ -64,9 +61,9 @@ def _prepare_expiration_data(
@staticmethod
def _prepare_expiration_status_reason(days_to_expire: int, expiration_date: datetime.datetime) -> str:
return (
f"Scanned domain will expire in {days_to_expire} days - (on {expiration_date})."
f"Scanned domain will expire in {days_to_expire} days - on {expiration_date}."
if days_to_expire != 1
else f"Scanned domain will expire in {days_to_expire} day - (on {expiration_date})."
else f"Scanned domain will expire in {days_to_expire} day - on {expiration_date}."
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
{% for report in data.reports %}
{% if report.report_type == "close_domain_expiration_date" %}
<li>
<p> {{ report.target }} - {% trans %}will expire on{% endtrans %} {{ report.additional_data["expiration_date"] }} </p>
<p> {{ report.target }} - {% trans %}will expire on{% endtrans %} {{ report.additional_data["expiration_date"] }}.</p>
</li>
{% endif %}
{% endfor %}
Expand Down
1 change: 1 addition & 0 deletions artemis/reporting/severity.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class Severity(str, Enum):
ReportType("subdomain_takeover_possible"): Severity.HIGH,
ReportType("sql_injection"): Severity.HIGH,
ReportType("closed_wordpress_plugin"): Severity.HIGH,
ReportType("close_domain_expiration_date"): Severity.HIGH,
ReportType("exposed_database_with_easy_password"): Severity.HIGH,
ReportType("exposed_version_control_folder"): Severity.HIGH,
ReportType("exposed_version_control_folder_with_credentials"): Severity.HIGH,
Expand Down
23 changes: 22 additions & 1 deletion artemis/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
import time
import urllib.parse
from ipaddress import ip_address
from typing import Any, Callable, List
from typing import Any, Callable, List, Optional

from whoisdomain import Domain, WhoisQuotaExceeded # type: ignore
from whoisdomain import query as whois_query

from artemis.config import Config

Expand All @@ -21,6 +24,24 @@ def check_output_log_on_error(command: List[str], logger: logging.Logger, **kwar
raise


def perform_whois_or_sleep(domain: str, logger: logging.Logger) -> Optional[Domain]:
try:
domain_data = whois_query(domain=domain)
logger.info(
"Successful whois query for %s expiry=%s", domain, domain_data.expiration_date if domain_data else None
)
except WhoisQuotaExceeded:
logger.info("Quota exceeded for whois query for %s, sleeping 24 hours", domain)
time.sleep(24 * 60 * 60)
domain_data = whois_query(domain=domain)
logger.info(
"Successful whois query for %s after retry expiry=%s",
domain,
domain_data.expiration_date if domain_data else None,
)
return domain_data


def build_logger(name: str) -> logging.Logger:
logger = logging.getLogger(name)
logger.setLevel(logging.INFO)
Expand Down
2 changes: 1 addition & 1 deletion docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ services:
depends_on: [ karton-logger ]
env_file: .env
restart: always
volumes: [ "./docker/karton.ini:/etc/karton/karton.ini" ]
volumes: ["./docker/karton.ini:/etc/karton/karton.ini", "${DOCKER_COMPOSE_ADDITIONAL_SHARED_DIRECTORY:-./shared}:/shared/"]

volumes:
data-mongodb:
Expand Down
6 changes: 4 additions & 2 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ FROM python:3.11-alpine3.18
COPY docker/wait-for-it.sh /wait-for-it.sh

ARG ADDITIONAL_REQUIREMENTS
RUN apk add --no-cache --virtual .build-deps go gcc git libc-dev make libffi-dev libpcap-dev postgresql-dev whois && \
apk add --no-cache bash libpcap libpq git subversion
RUN apk add --no-cache --virtual .build-deps go gcc git libc-dev make libffi-dev libpcap-dev postgresql-dev && \
# Whois here is important - if we wouldn't install it, we would default to busybox whois,
# having different output making https://pypi.org/project/whoisdomain/ regexes fail.
apk add --no-cache bash libpcap libpq git subversion whois
RUN GOBIN=/usr/local/bin/ go install github.com/projectdiscovery/naabu/v2/cmd/[email protected] && \
GOBIN=/usr/local/bin/ go install github.com/praetorian-inc/fingerprintx/cmd/[email protected] && \
GOBIN=/usr/local/bin/ go install github.com/lc/gau/v2/cmd/[email protected]
Expand Down
5 changes: 5 additions & 0 deletions docker/karton.ini
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
[system]
# 2 days
task_dispatched_timeout=172800
task_started_timeout=172800

[minio]
address=minio:9000
bucket=artemis
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,4 @@ typing-extensions==4.8.0
urllib3==1.26.18
uvicorn==0.24.0.post1
validators==0.22.0
whois==0.9.27
whoisdomain==1.20231102.1

0 comments on commit d886a9b

Please sign in to comment.