diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml index a9a84b7f6..15fe67ce4 100644 --- a/.github/workflows/build-docs.yml +++ b/.github/workflows/build-docs.yml @@ -1,39 +1,59 @@ -#Github Workflow to run test documentation built -# -#SPDX-FileCopyrightText: 2020 IntelMQ Team -#SPDX-License-Identifier: AGPL-3.0-or-later -# -name: "Build the documentation with sphinx" +name: "Build and publish documentation" + on: push: - branches: [develop, maintenance, master] - paths-ignore: - - '.github/**' + branches: + - develop + - maintenance + - mkdocs + pull_request: - branches: [develop, maintenance] - paths-ignore: - - '.github/**' + branches: + - develop + - maintenance + - mkdocs + + release: + types: + - published + +permissions: + contents: write jobs: - documentationbuild: + build: runs-on: ubuntu-latest - name: Build the documentation - strategy: - fail-fast: false - matrix: - python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] - steps: - - name: Checkout repository - uses: actions/checkout@v3 + - name: "Checkout repository" + uses: actions/checkout@v3 + + - name: "Setup python" + uses: actions/setup-python@v4 + with: + python-version: 3.x + + - name: "Install build dependencies" + run: | + pip install mkdocs-material mike lunr pygments mkdocstrings[python] mkdocs-material mkdocs-glightbox mkdocs-redirects mkdocs-minify-plugin + + - name: "Prepare git" + run: | + git fetch origin gh-pages --depth=1 + git config user.name intelmq-bot + git config user.email intelmq-bot - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} + - name: "Build docs without publishing" + if: github.event_name == 'pull_request' + run: | + mkdocs build - - name: Install documentation dependencies - run: pip install -r docs/requirements.txt + - name: "Build docs with version tag and publish" + if: github.event_name == 'release' + run: | + mike deploy --push --force --update-aliases ${{ github.ref_name }} latest +# mike set-default --push latest --force - - name: Build documentation - run: make -C docs html + - name: "Build docs with branch tag and publish" + if: github.event_name == 'push' + run: | + mike deploy --push --force ${{ github.ref_name }} diff --git a/.gitignore b/.gitignore index bac7657c5..d552caf86 100644 --- a/.gitignore +++ b/.gitignore @@ -29,11 +29,10 @@ src/ .venv/ .env -# sphinx -docs/source -docs/_build -docs/user/feeds.rst -docs/dev/harmonization-fields.rst +# mkdocs +#docs/user/feeds.md +#docs/dev/harmonization-fields.md +docs_build # Debian build filed debian/files diff --git a/.readthedocs.yaml b/.readthedocs.yaml deleted file mode 100644 index 6f47314dd..000000000 --- a/.readthedocs.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Read the Docs configuration file for Sphinx projects -# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details - -# Required -version: 2 - -# Set the OS, Python version and other tools you might need -build: - os: ubuntu-22.04 - tools: - python: "3.11" - -# Build documentation in the "docs/" directory with Sphinx -sphinx: - configuration: docs/conf.py - -# Optional but recommended, declare the Python requirements required -# to build your documentation -# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html -python: - install: - - requirements: docs/requirements.txt diff --git a/.readthedocs.yaml.license b/.readthedocs.yaml.license deleted file mode 100644 index 04f1c8379..000000000 --- a/.readthedocs.yaml.license +++ /dev/null @@ -1,2 +0,0 @@ -SPDX-FileCopyrightText: 2023 Institute for Common Good Technology -SPDX-License-Identifier: AGPL-3.0-or-later diff --git a/CHANGELOG.md b/CHANGELOG.md index 72d950193..a0d0e3fca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,10 @@ -CHANGELOG -========== +# CHANGELOG + 3.2.2 (unreleased) diff --git a/README.md b/README.md new file mode 120000 index 000000000..e89233038 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +docs/index.md \ No newline at end of file diff --git a/README.rst b/README.rst deleted file mode 100644 index fe3058635..000000000 --- a/README.rst +++ /dev/null @@ -1,105 +0,0 @@ -.. - SPDX-FileCopyrightText: 2020-2021 Birger Schacht - SPDX-License-Identifier: AGPL-3.0-or-later - -=================== -Welcome to IntelMQ! -=================== - -.. image:: docs/_static/Logo_Intel_MQ.svg - :alt: IntelMQ - -|Build Status| |CII Badge| - -**IntelMQ** is a solution for IT security teams (CERTs & CSIRTs, SOCs, abuse -departments, etc.) for collecting and processing security feeds (such as -log files) using a message queuing protocol. It's a community driven -initiative called **IHAP** (Incident Handling Automation Project) which -was conceptually designed by European CERTs/CSIRTs during several -InfoSec events. Its main goal is to give to incident responders an easy -way to collect & process threat intelligence thus improving the incident -handling processes of CERTs. - -IntelMQ can be used for -- automated incident handling -- situational awareness -- automated notifications -- as data collector for other tools -- etc. - -IntelMQ's design was influenced by `AbuseHelper `__, -however it was re-written from scratch and aims at: - -- Reducing the complexity of system administration -- Reducing the complexity of writing new bots for new data feeds -- Reducing the probability of events lost in all process with - persistence functionality (even system crash) -- Use and improve the existing Data Harmonization Ontology -- Use JSON format for all messages -- Provide easy way to store data into Log Collectors like - ElasticSearch, Splunk, databases (such as PostgreSQL) -- Provide easy way to create your own black-lists -- Provide easy communication with other systems via HTTP RESTful API - -It follows the following basic meta-guidelines: - -- Don't break simplicity - KISS -- Keep it open source - forever -- Strive for perfection while keeping a deadline -- Reduce complexity/avoid feature bloat -- Embrace unit testing -- Code readability: test with unexperienced programmers -- Communicate clearly - -============ -Getting Help -============ - -IntelMQ's documentation is available at `intelmq.readthedocs.io `_. - -For support questions please reach out on the `the intelmq-users mailing list `_ - -============================== -IntelMQ Manager and more tools -============================== - -Several pieces of software evolved around IntelMQ. For example, check out `IntelMQ Manager `_ which is a web based interface to easily manage an IntelMQ system. - -More tools can be found in the `IntelMQ Universe chapter in the documentation `_. - -================== -How to participate -================== - -IntelMQ is a community project depending on your contributions. Please consider sharing your work. - -- Have a look at our `Developers Guide `_ for documentation. -- Subscribe to the `Intelmq-dev Mailing list `_ to get answers to your development questions: -- The `Github issues `_ lists all the open feature requests, bug reports and ideas. - -==================================== -Incident Handling Automation Project -==================================== - -- **URL:** http://www.enisa.europa.eu/activities/cert/support/incident-handling-automation -- **Mailing-list:** ihap@lists.trusted-introducer.org - -======= -Licence -======= - -This software is licensed under GNU Affero General Public License version 3 - -======= -Funding -======= - -This project was partially funded by the CEF framework - -.. figure:: https://ec.europa.eu/inea/sites/default/files/ceflogos/en_horizontal_cef_logo_2.png - :alt: Co-financed by the Connecting Europe Facility of the European Union - -.. |Build Status| image:: https://github.com/certtools/intelmq/workflows/Nosetest%20test%20suite/badge.svg - :target: https://github.com/certtools/intelmq/actions -.. |CII Badge| image:: https://bestpractices.coreinfrastructure.org/projects/4186/badge - :target: https://bestpractices.coreinfrastructure.org/projects/4186/ diff --git a/SECURITY.md b/SECURITY.md index f4276bb89..ddd441d18 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -1,14 +1,10 @@ -IntelMQ Security Notes -====================== +# Found a security issue? -Found a security issue? ------------------------ - -In case you find security-relevant bugs in IntelMQ, please contact team@cert.at. +In case you find security-relevant bugs in IntelMQ, please contact [team@cert.at](mailto:team@cert.at). More information including the PGP key can be found on [CERT.at's website](https://www.cert.at/about/contact/contact_en.html). diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index 38173eab3..000000000 --- a/docs/Makefile +++ /dev/null @@ -1,25 +0,0 @@ -# SPDX-FileCopyrightText: 2020 Birger Schacht -# -# SPDX-License-Identifier: AGPL-3.0-or-later -# - -# You can set these variables from the command line, and also -# from the environment for the first two. -SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build -SOURCEDIR = . -BUILDDIR = _build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -clean: - rm -rf "$(BUILDDIR)" diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index 9bbb68d65..000000000 --- a/docs/README.md +++ /dev/null @@ -1,10 +0,0 @@ - - -# Documentation at readthedocs! - -To read the documentation, please head over to [intelmq.readthedocs.io](https://intelmq.readthedocs.io/)! - -Instructions to build the documentation locally for development purposes can be found in the Developers Guide (section *Documentation*). diff --git a/docs/admin/beta-features.md b/docs/admin/beta-features.md new file mode 100644 index 000000000..56983cfd6 --- /dev/null +++ b/docs/admin/beta-features.md @@ -0,0 +1,180 @@ + + + +# Beta Features + +## Using Supervisor as a Process Manager + +!!! warning + Do not use it in production environments yet! It has not been tested thoroughly yet. + +[Supervisor](http://supervisord.org) is process manager written in Python. The main advantage is that it take care about +processes, so if bot process exit with failure (exit code different than 0), supervisor try to run it again. Another +advantage is that it not require writing PID files. + +This was tested on Ubuntu 18.04. + +Install supervisor. `supervisor_twiddler` is extension for supervisor, that makes possible to create process +dynamically. (Ubuntu `supervisor` +package is currently based on Python 2, so `supervisor_twiddler` must be installed with Python 2 `pip`.) + +```bash +apt install supervisor python-pip +pip install supervisor_twiddler +``` + +Create default config `/etc/supervisor/conf.d/intelmq.conf` and restart +`supervisor` service: + +```ini +[rpcinterface:twiddler] +supervisor.rpcinterface_factory=supervisor_twiddler.rpcinterface:make_twiddler_rpcinterface + +[group:intelmq] +``` + +Change IntelMQ process manager in the *global* configuration: + +```yaml +process_manager: supervisor +``` + +After this it is possible to manage bots like before with `intelmqctl` +command. + + +## Using AMQP Message Broker + +Starting with IntelMQ 1.2 the AMQP protocol is supported as message queue. To use it, install a broker, for example +RabbitMQ. The configuration and the differences are outlined here. Keep in mind that it is slower, but has better +monitoring capabilities and is more stable. The AMQP support is considered beta, so small problems might occur. So far, +only RabbitMQ as broker has been tested. + +You can change the broker for single bots (set the parameters in the runtime configuration per bot) or for the whole +botnet (using the global configuration). + +You need to set the parameter +`source_pipeline_broker`/`destination_pipeline_broker` to `amqp`. There are more parameters available: + + +**`destination_pipeline_broker`** + +(required, string) `"amqp"` + + +**`destination_pipeline_host`** + +() (default: `'127.0.0.1'`) + +**`destination_pipeline_port`** + +() (default: 5672) + +**`destination_pipeline_username`** + +() + +**`destination_pipeline_password`** + +() + +**`destination_pipeline_socket_timeout`** + +() (default: no timeout) + +**`destination_pipeline_amqp_exchange`** + +() Only change/set this if you know what you do. If set, the destination queues are not declared as queues, but used as routing key. (default: `''`). + +**`destination_pipeline_amqp_virtual_host`** + +() (default: `'/'`) + +**`source_pipeline_host`** + +() (default: `'127.0.0.1'`) + + +**`source_pipeline_port`** + +() (default: 5672) + +**`source_pipeline_username`** + +() + +**`source_pipeline_password`** + +() + +**`source_pipeline_socket_timeout`** + +() (default: no timeout) + +**`source_pipeline_amqp_exchange`** + +() Only change/set this if you know what you do. If set, the destination queues are not declared as queues, but used as routing key. (default: ['']). + + +**`source_pipeline_amqp_virtual_host`** + +() (default: `'/'`) + +**`intelmqctl_rabbitmq_monitoring_url`** + +() string, see below (default: `"http://{host}:15672"`) + +For getting the queue sizes, `intelmqctl` needs to connect to the monitoring interface of RabbitMQ. If the monitoring +interface is not available under `http://{host}:15672` you can manually set using the +parameter `intelmqctl_rabbitmq_monitoring_url`. In a RabbitMQ's default configuration you might not provide a user +account, as by default the administrator (`guest`:`guest`) allows full access from localhost. If you create a separate +user account, make sure to add the tag "monitoring" to it, otherwise IntelMQ can't fetch the queue sizes. + +![](../static/images/rabbitmq-user-monitoring.png) + +Setting the statistics (and cache) parameters is necessary when the local redis is running under a non-default +host/port. If this is the case, you can set them explicitly: + + +**`statistics_database`** + +() `3` + +**`statistics_host`** + +() `"127.0.0.1"` + +**`statistics_password`** + +() `null` + + +**`statistics_port`** + +() `6379` + +## Multithreading + +First of all: Do not use it in production environments yet! There are a few bugs, see below + +Since IntelMQ 2.0 it is possible to provide the following runtime parameter: + +**`instances_threads`** + +Set it to a non-zero integer, then this number of worker threads will be spawn. This is useful if bots often wait for +system resources or if network-based lookups are a bottleneck. + +However, there are currently a few cavecats: + +- This is not possible for all bots, there are some exceptions (collectors and some outputs), see the `FAQ` for some reasons. +- Only use it with the AMQP pipeline, as with Redis, messages may get duplicated because there's only one internal + queue +- In the logs, you can see the main thread initializing first, then all of the threads which log with the + name `[bot-id].[thread-id]`. + + + + diff --git a/docs/admin/common-problems.md b/docs/admin/common-problems.md new file mode 100644 index 000000000..11463007d --- /dev/null +++ b/docs/admin/common-problems.md @@ -0,0 +1,121 @@ + + + +# Common Problems + +## IntelMQ + +### Permission denied when using Redis Unix socket + +If you get an error like this: + +``` +intelmq.lib.exceptions.PipelineError: pipeline failed - ConnectionError('Error 13 connecting to unix socket: /var/run/redis/redis.sock. Permission denied.',) +``` + +Make sure the intelmq user as sufficient permissions for the socket. + +In `/etc/redis/redis.conf` (or wherever your configuration is), check +the permissions and set it for example to group-writeable: + +``` +unixsocketperm 770 +``` + +And add the user intelmq to the redis-group: + +```bash +usermod -aG redis intelmq +``` + +### My bot(s) died on startup with no errors logged + +Rather than starting your bot(s) with `intelmqctl start`, try +`intelmqctl run [bot]`. This will provide valuable debug output you +might not otherwise see, pointing to issues like system configuration +errors. + +### Orphaned Queues + +This section has been moved to the [Management Guide](management/intelmq.md#orphaned-queues). + + +### Multithreading is not available for this bot + +Multithreading is not available for some bots and AMQP broker is +necessary. Possible reasons why a certain bot or a setup does not +support Multithreading include: + + - Multithreading is only available when using the AMQP broker. + - For most collectors, Multithreading is disabled. Otherwise this + would lead to duplicated data, as the data retrieval is not + atomic. + - Some bots use libraries which are not thread safe. Look a the + bot's documentation for more information. + - Some bots' operations are not thread safe. Look a the bot's + documentation for more information. + +If you think this mapping is wrong, please report a bug. + + +## IntelMQ API + + +### IntelMQCtlError + +If the command is not configured correctly, you will see exceptions on +startup like this: + +```bash +intelmq_manager.runctl.IntelMQCtlError: +``` + +This means the intelmqctl command could not be executed as a subprocess. +The `` should indicate why. + +### Access Denied / Authentication Required "Please provide valid Token verification credentials" + +If you see the IntelMQ Manager interface and menu, but the API calls to +the back-end querying configuration and status of IntelMQ fail with +"Access Denied" or "Authentication Required: Please provide valid +Token verification credentials" errors, you are maybe not logged in +while the API requires authentication. + +By default, the API requires authentication. Create user accounts and +login with them or - if you have other protection means in place - +deactivate the authentication requirement by removing or renaming the +`session_store` parameter in the configuration. + +### Internal Server Error + +There can be various reasons for internal server errors. You need to +look at the error log of your web server, for example +`/var/log/apache2/error.log` or `/var/log/httpd/error_log` for Apache 2. +It could be that the sudo-setup is not functional, the configuration +file or session database file can not be read or written or other errors +in regards to the execution of the API program. + +### Can I just install it from the deb/rpm packages while installing IntelMQ from a different source? + +Yes, you can install the API and the Manager from the deb/rpm +repositories, and install your IntelMQ from a somewhere else, e.g. a +local repository. However, knowledge about Python and system +administration experience is recommended if you do so. + +The packages install IntelMQ to +`/usr/lib/python3*/site-packages/intelmq/`. Installing with `pip` +results in `/usr/local/lib/python3*/site-packages/intelmq/` (and some +other accompaning resources) which overrides the installation in +`/usr/lib/`. You probably need to adapt the configuration parameter +`intelmq_ctl_cmd` to the `/usr/local/bin/intelmqctl` executable and some +other tweaks. + +### sqlite3.OperationalError: attempt to write a readonly database + +SQLite does not only need write access to the database itself, but also +the folder the database file is located in. Please check that the +webserver has write permissions to the folder the session file is +located in. \ No newline at end of file diff --git a/docs/admin/configuration/intelmq-api.md b/docs/admin/configuration/intelmq-api.md new file mode 100644 index 000000000..bc39c0863 --- /dev/null +++ b/docs/admin/configuration/intelmq-api.md @@ -0,0 +1,101 @@ + + + +# Configuring IntelMQ API + +Depending on your setup you might have to install `sudo` to make it +possible for the `intelmq-api` to run the `intelmq` command as the +user-account usually used to run `intelmq` (which is also often called +`intelmq`). + +`intelmq-api` is configured using a configuration file in `json` format. +`intelmq-api` tries to load the configuration file from +`/etc/intelmq/api-config.json` and +`${PREFIX}/etc/intelmq/api-config.json`, but you can override the path +setting the environment variable `INTELMQ_API_CONFIG`. (When using +Apache, you can do this by modifying the Apache configuration file +shipped with `intelmq-api`, the file contains an example) + +When running the API using `hug`, you can set the environment variable +like this: + +```bash +INTELMQ_API_CONFIG=/etc/intelmq/api-config.json hug -m intelmq_api.serve +``` + +The default configuration which is shipped with the packages is also +listed here for reference: + +```json +{ + "intelmq_ctl_cmd": ["sudo", "-u", "intelmq", "intelmqctl"], + "allowed_path": "/opt/intelmq/var/lib/bots/", + "session_store": "/etc/intelmq/api-session.sqlite", + "session_duration": 86400, + "allow_origins": ["*"] +} +``` + +On Debian based systems, the default path for the `session_store` is +`/var/lib/dbconfig-common/sqlite3/intelmq-api/intelmqapi`, because the +Debian package uses the Debian packaging tools to manage the database +file. + +The following configuration options are available: + +- `intelmq_ctl_cmd`: Your `intelmqctl` command. If this is not set in + a configuration file the default is used, which is + `["sudo", "-u", "intelmq", "/usr/local/bin/intelmqctl"]` The option + `"intelmq_ctl_cmd"` is a list of strings so that we can avoid + shell-injection vulnerabilities because no shell is involved when + running the command. This means that if the command you want to use + needs parameters, they have to be separate strings. +- `allowed_path`: intelmq-api can grant **read-only** access to + specific files - this setting defines the path those files can + reside in. +- `session_store`: this is an optional path to a sqlite database, + which is used for session storage and authentication. If it is not + set (which is the default), no authentication is used! +- `session_duration`: the maximal duration of a session, its 86400 + seconds by default +- `allow_origins`: a list of origins the responses of the API can be + shared with. Allows every origin by default. + +### Permissions + +`intelmq-api` tries to write a couple of configuration files in the +`${PREFIX}/etc/intelmq` directory - this is only possible if you set the +permissions accordingly, given that `intelmq-api` runs under a different +user. The user the API run as also needs write access to the folder the +`session_store` is located in, otherwise there will be an error +accessing the session data. If you\'re using the default Apache 2 setup, +you might want to set the group of the files to `www-data` and give it +write permissions (`chmod -R g+w `). In addition to that, +the `intelmq-manager` tries to store the bot positions via the API into +the file `${PREFIX}/etc/intelmq/manager/positions.conf`. You should +therefore create the folder `${PREFIX}/etc/intelmq/manager` and the file +`positions.conf` in it. + +## Adding a user + +If you enable the `session_store` you will have to create user accounts +to be able to access the API functionality. You can do this using +`intelmq-api-adduser`: + +```bash +intelmq-api-adduser --user --password +``` + +## A note on SELinux + +On systems with SELinux enabled, the API will fail to call `intelmqctl`. +Therefore, SELinux needs to be disabled: + +```bash +setenforce 0 +``` + +We welcome contributions to provide SELinux policies. \ No newline at end of file diff --git a/docs/admin/configuration/intelmq-manager.md b/docs/admin/configuration/intelmq-manager.md new file mode 100644 index 000000000..f48f3876d --- /dev/null +++ b/docs/admin/configuration/intelmq-manager.md @@ -0,0 +1,68 @@ + + + +# Configuring IntelMQ Manager + +In the file `/usr/share/intelmq-manager/html/js/vars.js` set `ROOT` to the URL of your `intelmq-api` installation - by +default that's on the same host as `intelmq-manager`. + +## Configuration Paths + +The IntelMQ Manager queries the configuration file paths and directory names from `intelmqctl` and therefore any global +environment variables +(if set) are effective in the Manager too. The interface for this query is `intelmqctl debug --get-paths`, the result is +also shown in the +`/about.html` page of your IntelMQ Manager installation. + +## CSP Headers + +It is recommended to set these two headers for all requests: + +``` +Content-Security-Policy: script-src 'self' +X-Content-Security-Policy: script-src 'self' +``` + +## Security considerations + +Never ever run intelmq-manager on a public webserver without SSL and proper authentication! + +The way the current version is written, anyone can send a POST request and change intelmq's configuration files via +sending HTTP POST requests. Intelmq-manager will reject non JSON data but nevertheless, we don't want anyone to be able +to reconfigure an intelmq installation. + +Therefore you will need authentication and SSL. Authentication can be handled by the `intelmq-api`. Please refer to its +documentation on how to enable authentication and setup accounts. + +Never ever allow unencrypted, unauthenticated access to IntelMQ Manager! + +### Docker: Security headers + +If you run our docker image in production, we recommend you to set security headers. You can do this by creating a new +file called +`example_config/nginx/security.conf` in the cloned `intelmq-docker` +repository. + +Write the following inside the configuration file, and change the +`http(s)://` to your domain name. + +```bash +server_tokens off; # turn off server_token, instead of nginx/13.2 now it will only show nginx +add_header X-Frame-Options SAMEORIGIN; # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Frame-Options +add_header X-Content-Type-Options nosniff; # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Content-Type-Options +add_header X-XSS-Protection "1; mode=block"; # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-XSS-Protection +add_header Content-Security-Policy "script-src 'self' 'unsafe-inline' http(s)://; frame-src 'self' http(s)://; object-src 'self' http(s)://"; # https://developer.mozilla.org/en-US/docs/Web/HTTP/CSP +``` + +After you created the file, edit the `docker-compose.yml` and mount it to the `nginx` with + +```yaml +volumes: + - ./example_config/nginx/security.conf:/etc/nginx/conf.d/security.conf +``` + +**IMPORTANT** Mount the exact name & not the directory, because otherwise you would overwrite the whole directory and +the other files would be gone inside the container. diff --git a/docs/admin/configuration/intelmq.md b/docs/admin/configuration/intelmq.md new file mode 100644 index 000000000..7c57d5575 --- /dev/null +++ b/docs/admin/configuration/intelmq.md @@ -0,0 +1,392 @@ + + + +# Configuring IntelMQ + +## Directories + +### LSB + +If you installed the packages, standard Linux paths (LSB paths) are used: + +- `/etc/intelmq/` (configurations) +- `/var/log/intelmq/` (logs) +- `/var/lib/intelmq/` (local states) +- `/var/run/intelmq/` (PID files) + +Otherwise, the configuration directory is `/opt/intelmq/etc/`. Using the environment variable `INTELMQ_ROOT_DIR` allows setting any arbitrary root directory. + +You can switch this by setting the environment variables `INTELMQ_PATHS_NO_OPT` and `INTELMQ_PATHS_OPT`, respectively. + +- When installing the Python packages, you can set `INTELMQ_PATHS_NO_OPT` to something non-empty to use LSB-paths. +- When installing the deb/rpm packages, you can set `INTELMQ_PATHS_OPT` to something non-empty to use `/opt/intelmq/` paths, or a path set with `INTELMQ_ROOT_DIR`. + +The environment variable `ROOT_DIR` is meant to set an alternative root directory instead of `/`. This is primarily meant for package build environments an analogous to setuptool's `--root` parameter. Thus it is only used in LSB-mode. + +## Environment Variables + +| Name | Type | Description | +| ---- | ---- | ------------| +| `INTELMQ_PATHS_OPT` | | +| `INTELMQ_PATHS_NO_OPT` | | +| `INTELMQ_ROOT_DIR` | | +| `ROOT_DIR` | | + +## Configuration Files + +### `runtime.yaml` + +This is the main configuration file. It uses YAML format since IntelMQ 3.0. It consists of two parts: + +* Global Configuration +* Individual Bot Configuration + +!!! warning + Comments in YAML are currently not preserved by IntelMQ (known bug [#2003](https://github.com/certtools/intelmq/issues/2003)). + +Example `runtime.yaml` configuration file is installed by the tool `intelmqsetup`. If this is not the case, make sure the program was run. It is shipped preconfigured with 4 collectors and parsers, 6 common experts and one output bot. The default collector and the parser handle data from malware domain list, the file output bot writes all data to one of these files (based on your installation): + +- `/opt/intelmq/var/lib/bots/file-output/events.txt` + +- `/var/lib/intelmq/bots/file-output/events.txt` + +The `runtime.yaml` configuration is divided into two sections: + +- Global configuration which is applied to each bot. +- Individual bot configuration which overloads the global configuration and contains bot specific options. + +Example configuration snippet: + +```yaml +global: # global configuration section + # ... + http_timeout_max_tries: 3 + http_timeout_sec: 30 + http_user_agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 + http_verify_cert: true + +blocklistde-apache-collector: # individual bot configuration section + group: Collector + name: Blocklist.de Apache List + module: intelmq.bots.collectors.http.collector_http + description: Blocklist.de Apache Collector fetches all IP addresses which have been reported within the last 48 hours as having run attacks on the service Apache, Apache-DDOS, RFI-Attacks. + parameters: + http_url: https://lists.blocklist.de/lists/apache.txt + name: Blocklist.de Apache + rate_limit: 3600 + http_verify_cert: false # overriding the global configuration for this particular bot +``` + +#### Global Configuration + +The global configuration parameters apply to all bots, however they can be overridden in the individual bot configuration. + +##### Logging + +The logging can be configured with the following parameters: + +**`logging_handler`** + +(required, string) Allowed values are `file` or `syslog`. + +**`logging_level`** + +(required, string) Allowed values are `CRITICAL`, `ERROR`, `WARNING`, `INFO` or `DEBUG`. Defines the system-wide log level that will be use by all bots and the intelmqctl tool. We recommend `logging_level` `WARNING` for production environments and +`INFO` if you want more details. In any case, watch your free disk space! + +**`logging_path`** + +(required, string) When the `logging_handler` is `file` this parameter is used to set the logging directory for all the bots as well as the `intelmqctl` tool. Defaults to `/opt/intelmq/var/log/` or `/var/log/intelmq/` respectively. + +**`logging_syslog`** + +(required, string) When the `logging_handler` is `syslog`. Either a list with hostname and UDP port of syslog service, e.g. `["localhost", 514]` or a device name/path. Defaults to `/var/log`. + +##### Log Rotation + +To rotate the logs, you can use the standard Linux-tool logrotate. An example logrotate configuration is given +in `contrib/logrotate/` and delivered with all deb/rpm-packages. When not using logrotate, IntelMQ can rotate the logs +itself, which is not enabled by default! You need to set both values. + +**`logging_max_size`** + +(optional, integer) Maximum number of bytes to be stored in one logfile before the file is rotated. Defaults to 0 (log rotation disabled). + +**`logging_max_copies`** + +(optional, integer) Maximum number of logfiles to keep. Compression is not supported. Default is unset. + +Some information can as well be found in Python's documentation on the used +[RotatingFileHandler](https://docs.python.org/3/library/logging.handlers.html#logging.handlers.RotatingFileHandler). + +##### Error Handling + +**`error_log_message`** + +(required, boolean) Whether to write the message (Event/Report) to the log file in case of an error. + +**`error_log_exception`** + +(required, boolean) Whether to write an error exception to the log file in case of an error. + +**`error_procedure`** + +(required, string) Allowed values are `stop` or `pass`. In case of an error, this option defines the procedure that the bot will adopt. Use the following values: + + - `stop` - stop bot after retrying X times (as defined in `error_max_retries`) with a delay between retries (as defined in `error_retry_delay`). If the bot reaches the `error_max_retries` value, it will remove the message from the pipeline and stop. If the option `error_dump_message` is also enable, the bot will dump the removed message to its dump file (to be found in var/log). + + - `pass` - will skip this message and will process the next message after retrying X times, removing the current message from pipeline. If the option `error_dump_message` is also enable, then the bot will dump the removed message to its dump file. After max retries are reached, the rate limit is applied (e.g. a collector bot fetch an unavailable resource does not try forever). + +**`error_max_retries`** + +(required, integer) In case of an error, the bot will try to re-start processing the current message X times as + defined by this option. + +**`error_retry_delay`** + +(required, integer) Defines the number of seconds to wait between subsequent re-tries in case of an error. + +**`error_dump_message`** + +(required, boolean) Specifies if the bot will write queued up messages to its dump file (use intelmqdump to + re-insert the message). + +If the path `_on_error` exists for a bot, the message is also sent to this queue, instead of (only) dumping the file if +configured to do so. + +##### Miscellaneous + +**`load_balance`** + +(required, boolean) this option allows you to choose the behavior of the queue. Use the following values: + + - **true** - splits the messages into several queues without duplication + - **false** - duplicates the messages into each queue - When using AMQP as message broker, take a look at the `multithreading`{.interpreted-text role="ref"} section and the `instances_threads` parameter. + +**`rate_limit`** + +(required, integer) time interval (in seconds) between messages processing. int value. + +**`ssl_ca_certificate`** + +(optional, string) trusted CA certificate for IMAP connections (supported by some bots). + +**`source_pipeline_broker`** + +(optional, string) Allowed values are `redis` and `amqp`. Selects the message broker IntelMQ should use. As this parameter can be overridden by each bot, this allows usage of different broker systems and hosts, as well as switching between them on the same IntelMQ instance. Defaults to `redis`. + + - **redis** - Please note that persistence has to be [manually activated](http://redis.io/topics/persistence). + - **amqp** - [Using the AMQP broker]() is currently beta but there are no known issues. A popular AMQP broker is [RabbitMQ](https://www.rabbitmq.com/). + +**`destination_pipeline_broker`** + +(required, string) See `source_pipeline_broker`. + +**`source_pipeline_host`** + +(required, string) Hostname or path to Unix socket that the bot will use to connect and receive messages. + +**`source_pipeline_port`** + +(optional, integer) Broker port that the bot will use to connect and receive messages. Can be empty for Unix socket. + +**`source_pipeline_password`** + +(optional, string) Broker password that the bot will use to connect and receive messages. Can be null for unprotected broker. + +**`source_pipeline_db`** + +(required, integer) broker database that the bot will use to connect and receive messages (requirement from + redis broker). + +**`destination_pipeline_host`** + +(optional, string) broker IP, FQDN or Unix socket that the bot will use to connect and send messages. + +**`destination_pipeline_port`** + +(optional, integer) broker port that the bot will use to connect and send messages. Can be empty for + Unix socket. + +**`destination_pipeline_password`** + +(optional, string) broker password that the bot will use to connect and send messages. Can be null + for unprotected broker. + +**`destination_pipeline_db`** + +(required, integer) broker database that the bot will use to connect and send messages (requirement from + redis broker). + +**`http_proxy`** + +(optional, string) Proxy to use for HTTP. + +**`https_proxy`** + +(optional, string) Proxy to use for HTTPS. + +**`http_user_agent`** + +(optional, string) User-Agent to be used for HTTP requests. + +**`http_verify_cert`** + +(optional, boolean) Verify the TLS certificate of the server. Defaults to true. + +#### Individual Bot Configuration + +!!! info + For the individual bot configuration please see the [Bots](../../user/bots.md) document in the User Guide. + +##### Run Mode + +This sections provides more detailed explanation of the two run modes of the bots. + +###### Continuous + +Most of the cases, bots will need to be configured as `continuous` run mode (the default) in order to have them always +running and processing events. Usually, the types of bots that will require the continuous mode will be Parsers, Experts +and Outputs. To do this, set `run_mode` to +`continuous` in the `runtime.yaml` for the bot. Check the following example: + +```yaml +blocklistde-apache-parser: + name: Blocklist.de Parser + group: Parser + module: intelmq.bots.parsers.blocklistde.parser + description: Blocklist.DE Parser is the bot responsible to parse the report and sanitize the information. + enabled: false + run_mode: continuous + parameters: ... +``` + +You can now start the bot using the following command: + +```bash +intelmqctl start blocklistde-apache-parser +``` + +Bots configured as `continuous` will never exit except if there is an error and the error handling configuration +requires the bot to exit. See the Error Handling section for more details. + +###### Scheduled + +In many cases, it is useful to schedule a bot at a specific time (i.e. via cron(1)), for example to collect information +from a website every day at midnight. To do this, set `run_mode` to `scheduled` in the +`runtime.yaml` for the bot. Check out the following example: + +```yaml +blocklistde-apache-collector: + name: Generic URL Fetcher + group: Collector + module: intelmq.bots.collectors.http.collector_http + description: All IP addresses which have been reported within the last 48 hours as having run attacks on the service Apache, Apache-DDOS, RFI-Attacks. + enabled: false + run_mode: scheduled + parameters: + feed: Blocklist.de Apache + provider: Blocklist.de + http_url: https://lists.blocklist.de/lists/apache.txt + ssl_client_certificate: null +``` + +You can schedule the bot with a crontab-entry like this: + +``` +0 0 * * * intelmqctl start blocklistde-apache-collector +``` + +Bots configured as `scheduled` will exit after the first successful run. Setting `enabled` to `false` will cause the bot +to not start with +`intelmqctl start`, but only with an explicit start, in this example +`intelmqctl start blocklistde-apache-collector`. + +##### Additional Runtime Parameters + +Some of the parameters are deliberately skipped from the User Guide because they are configured via graphical user interface provided by the IntelMQ Manager. These parameters have to do with configuring the pipeline: defining how the data is exchanged between the bots. Using the IntelMQ Manager for this have many benefits as it guarantees that the configuration is correct upon saving. + +However as an administrator you should be also familiar with the manual (and somewhat tedious) configuration. For each bot there are two parameters that need to be set: + +**`source_queue`** + +(optional, string) The name of the source queue from which the bot is going to processing data. Each bot has maximum one source queue (collector bots don't have any source queue as they fetch data from elsewhere). Defaults to the bot id appended with the string `-queue`. + +Example: a bot with id `example-bot` will have a default source queue named `example-bot-queue`. + + +**`destination_queues`** + +(optional, object) Bots can have multiple destination queues. Destination queues can also be grouped into **named paths**. There are two special path names `_default` and `_on_error`. The path `_default` is used if the path is not is specified by the bot itself (which is the most common case). In case of an error during the processing, the message will be sent to the `_on_error` path if specified (optional). + +Only few of the bots (mostly expert bots with filtering capabilities) can take advantage of arbitrarily named paths. Some expert bots are capable of sending messages to paths, this feature is explained in their documentation, e.g. the [Filter](../../user/bots.md#intelmq.bots.experts.filter.expert) expert and the [Sieve](../../user/bots.md#intelmq.bots.experts.sieve.expert) expert. + +Example: + +```yaml +blocklistde-apache-collector: + # ... + parameters: + # ... + destination_queues: + _default: + - + - + _on_error: + - + - + other-path: + - + - +``` + +### `harmonization.conf` + +This configuration is used to specify the fields for all message types. The harmonization library will load this configuration to check, during the message processing, if the values are compliant to the configured harmonization format. +Usually, this configuration doesn't need any change. It is mostly maintained by the IntelMQ maintainers. + +**Template:** + +```json +{ + "": { + "": { + "description": "", + "type": "" + }, + "": { + "description": "", + "type": "" + } + } +} +``` + +**Example:** + +```json +{ + "event": { + "destination.asn": { + "description": "The autonomous system number from which originated the connection.", + "type": "Integer" + }, + "destination.geolocation.cc": { + "description": "Country-Code according to ISO3166-1 alpha-2 for the destination IP.", + "regex": "^[a-zA-Z0-9]{2}$", + "type": "String" + } + } +} +``` + + + + + + + + diff --git a/docs/admin/database/elasticsearch.md b/docs/admin/database/elasticsearch.md new file mode 100644 index 000000000..330222eee --- /dev/null +++ b/docs/admin/database/elasticsearch.md @@ -0,0 +1,141 @@ + + + +# Using Elasticsearch as a database for IntelMQ + +If you wish to run IntelMQ with Elasticsearch or full ELK stack (Elasticsearch, Logstash, Kibana) it is entirely +possible. This guide assumes the reader is familiar with basic configuration of ELK and does not aim to cover using ELK +in general. It is based on the version 6.8.0 (ELK is a fast moving train therefore things might change). Assuming you +have IntelMQ (and Redis) +installation in place, lets dive in. + +## Configuration without Logstash + +This case involves two steps: + +1. Configure IntelMQ to output data directly into Elasticsearch. + +2. Configure Elasticsearch for ingesting the inserted data. + +## Configuration with Logstash + +This case involves three steps: + +1. Configuring IntelMQ to output data to Redis. + +2. Configure Logstash to collect data from Redis and insert them into Elasticsearch. + +3. Configure Elasticsearch for ingesting the inserted data. + +Each step is described in detail in the following sections. + +### Configuring IntelMQ + +In order to pass IntelMQ events to Logstash we will utilize already installed Redis. Add a new Redis Output Bot to your +pipeline. As the minimum fill in the following parameters: `bot-id`, `redis_server_ip` (can be hostname) +, `redis_server_port`, `redis_password` (if required, else set to empty!), `redis_queue` (name for the queue). It is +recommended to use a different `redis_db` parameter than used by the IntelMQ (specified as `source_pipeline_db` +, `destination_pipeline_db` and `statistics_database`). + +Example values: + +```yaml +bot-id: redis-output +redis_server_ip: 10.10.10.10 +redis_server_port: 6379 +redis_db: 4 +redis_queue: logstash-queue +``` + +!!! warning + You will not be able to monitor this redis queue via IntelMQ Manager. + +### Configuring Logstash + +Logstash defines pipelines as well. In the pipeline configuration of Logstash you need to specify where it should look +for IntelMQ events, what to do with them and where to pass them. + +#### Input + +This part describes how to receive data from Redis queue. See the example configuration and comments below: + +``` +input { + redis { + host => "10.10.10.10" + port => 6379 + db => 4 + data_type => "list" + key => "logstash-queue" + } +} +``` + +- `host` - same as redis_server_ip from the Redis Output Bot +- `port` - the redis_server_port from the Redis Output Bot +- `db` - the redis_db parameter from the Redis Output Bot +- `data_type` - set to `list` +- `key` - same as redis_queue from the Redis Output Bot + +!!! tip + You can use environment variables for the Logstash configuration, for example `host => "${REDIS_HOST:10.10.10.10}"`. The value will be taken from the environment variable `$REDIS_HOST`. If the environment variable is not set then the default value of `10.10.10.10` will be used instead. + +#### Filter (optional) + +Before passing the data to the database you can apply certain changes. This is done with filters. See an example: + +``` +filter { + mutate { + lowercase => ["source.geolocation.city", "classification.identifier"] + remove_field => ["__type", "@version"] + } + date { + match => ["time.observation", "ISO8601"] + } +} +``` + +!!! tip + It is recommended to use the `date` filter: generally we have two timestamp fields - `time.source` (provided by the feed source this can be understood as when the event happened; however it is not always present) and `time.observation` (when IntelMQ collected this event). Logstash also adds another field `@timestamp` with time of processing by Logstash. While it can be useful for debugging, I recommend to set the `@timestamp` to the same value as `time.observation`. + +!!! warning + It is not recommended to apply any modifications to the data (within the `mutate` key) outside of the IntelMQ. All necessary modifications should be done only by appropriate IntelMQ bots. This example only demonstrates the possibility. + +#### Output + +The pipeline also needs output, where we define our database +(Elasticsearch). The simplest way of doing so is defining an output like this: + +``` +output { + elasticsearch { + hosts => ["http://10.10.10.11:9200", "http://10.10.10.12:9200"] + index => "intelmq-%{+YYYY.MM}" + } +} +``` + +- `hosts` - Elasticsearch host (or more) with the correct port (9200 by default) +- `index` - name of the index where to insert data + +!!! tip + Authors experience, hardware equipment and the amount of events collected led to having a separate index for each month. This might not necessarily suit your needs, but it is a suggested option. + +!!! warning + By default the ELK stack uses insecure HTTP. It is possible to setup Security for secure connections and basic user management. This is possible with the Basic (free) licence since versions 6.8.0 and 7.1.0. + +### Configuring Elasticsearch + +Configuring Elasticsearch is entirely up to you and should be consulted with +the [official documentation](https://www.elastic.co/guide/en/elasticsearch/reference/index.html). What you will most +likely need is something +called [index template](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-templates.html) +mappings. IntelMQ provides a tool for generating such mappings. See +[ElasticMapper Tool](https://github.com/certtools/intelmq/tree/develop/contrib/elasticsearch/README.md). + +!!! danger + Default installation of Elasticsearch database allows anyone with cURL and connection capability to have administrative access to the database. Make sure you secure your toys! diff --git a/docs/admin/database/postgresql.md b/docs/admin/database/postgresql.md new file mode 100644 index 000000000..6a478d7af --- /dev/null +++ b/docs/admin/database/postgresql.md @@ -0,0 +1,182 @@ + + +# Using PostgreSQL as a database for IntelMQ + +The EventDB is a database (usually [PostgreSQL](https://postgresql.org/)) that +gets filled with with data from IntelMQ using the SQL Output +Bot. + +## intelmq_psql_initdb + +IntelMQ comes with the `intelmq_psql_initdb` command line tool designed to help with creating the +EventDB. It creates in the first line: + +- A `CREATE TABLE events` statement with all valid IntelMQ fields as columns and correct types +- Several indexes as examples for a good read & search performance + +Having an `events` table as outlined in the SQL file, IntelMQ's SQL Output Bot can write all received events into this database table. + +In addition, the script supports some additional features supporting use cases described later in this document: + +- `--partition-key` - for generating schema aligned with TimescaleDB or partitioned tables, +- `--separate-raws` - for generating views and triggers needed to `eventdb_raws_table` (works also together with adjustments for partitioning). + +For a full list of supported parameters, call the script help using `-h` parameter. + +All elements of the generated SQL file can be adapted and extended before running the SQL file against a database, especially the indexes. Please review the generated script before applying. + +Be aware that if you create tables using another DB user that is used later by the output bot, you may need to adjust ownership or privileges in the database. If you have problems with database permissions, +refer to `PostgreSQL documentation `. + +## EventDB Utilities + +Some scripts related to the EventDB are located in the +[contrib/eventdb](https://github.com/certtools/intelmq/tree/develop/contrib/eventdb) +folder in the IntelMQ git repository. + +### Apply Malware Name Mapping + +The `apply_mapping_eventdb.py` script applies the malware +name mapping to the EventDB. Source and destination columns can be +given, also a local file. If no local file is present, the mapping can +be downloaded on demand. It queries the database for all distinct +malware names with the taxonomy "malicious-code" and sets another +column to the malware family name. + +### Apply Domain Suffix + +The `apply_domain_suffix.py` script writes the public +domain suffix to the `source.domain_suffix` / +`destination.domain_suffix` columns, extracted from +`source.fqdn` / `destination.fqdn`. + +#### Usage + +The Python scripts can connect to a PostgreSQL server with an +`eventdb` database and an `events` table. The +command line arguments interface for both scripts are the same. See +`--help` for more information: + +```bash +apply_mapping_eventdb.py -h +apply_domain_suffix.py -h +``` + +### PostgreSQL trigger + +PostgreSQL trigger is a trigger keeping track of the oldest +inserted/updated "time.source" data. This can be useful to +(re-)generate statistics or aggregation data. + +The SQL script can be executed in the database directly. + +## EventDB Statistics + +The EventDB provides a great base for statistical analysis of the data. + +The [eventdb-stats +repository](https://github.com/wagner-certat/eventdb-stats) contains a +Python script that generates an HTML file and includes the [Plotly +JavaScript Open Source Graphing +Library](https://plotly.com/javascript/). By modifying the configuration +file it is possible to configure various queries that are then displayed +using graphs: + +![EventDB Statistics Example](../../static/images/eventdb_stats.png) + +## Using EventDB with Timescale DB + +[Timescale DB](https://www.timescale.com/) is a PostgreSQL extension to +add time-series support, which is quite handy as you don't have to learn +other syntaxes as you already know. You can use the SQL Queries as +before, the extension will handle the rest. To see all limitations, +please check the [Timescale DB +Documentation](https://docs.timescale.com/timescaledb/latest/). + +### What is time-series? + +Time-series has been invented as traditional database design like +relational or nosql are not made for time-based data. A big benefit of +time-series instead of other database designs over a time-based search +pattern is the performance. As IntelMQ uses data based upon time, this +design is awesome & will give you a performance boost. + +### How to choose the time column? + +To utilize the time-series, choose a column containing the right time. This is then +used by you for manual queries and graphs, and also by the database itself for organizing the data. + +An Event has two fields that can be used for this: +`time.source` or `time.observation`. Depending on your needs (tracking when the event occurred or when it was detected, if different), choose one of them. + +You can use the :ref:`intelmq_psql_initdb` tool to generate SQL schema valid for TimescaleDB by passing the partitioning key: + +``` +intelmq_psql_initdb --partition-key "time.source" +``` + +### How to setup + +Thanks to TimescaleDB its very easy to setup. + +1. Choose your preferred +[Timescale +DB](https://docs.timescale.com/timescaledb/latest/how-to-guides/install-timescaledb/self-hosted/) +environment & follow the installation instructions. 2. Now lets create a +[hypertable](https://docs.timescale.com/api/latest/hypertable/create_hypertable/), +which is the timescale DB time-series structure. +`SELECT create_hypertable('', 'time.source');`. 3. Now our hypertable is +setup & timescaleDB takes care of the rest. You can perform queries as +usual, for further information please check [Timescale DB +Documentation](https://docs.timescale.com/timescaledb/latest/). + +### How to upgrade from my existing database? + +To update your existing database to use this awesome time-series +feature, just follow the `How to setup` instruction. You can perform the +`hypertable` command even on already existing databases. **BUT** there +are [some limitations](https://docs.timescale.com/timescaledb/latest/overview/limitations/) +from timescaleDB. + +## Separating raw values in PostgreSQL using view and trigger + +In order to reduce the row size in the events table, the +`raw` column's data can be separated from the other +columns. While the raw-data is about 30-50% of the data row's size, it +is not used in most database queries, as it serves only a backup +functionality. Other possibilities to reduce or getting rid of this +field are described in the FAQ, section +`faq-remove-raw-data`. + +The steps described here are best performed before the +`events` table is filled with data, but can as well be done +with existing data. + +The approach requires four steps: + +1. An existing `events` table, see the first section of + this document. +2. Deleting or renaming the `raw` column of the + `events` table. +3. Creating a table `raws` which holds only the + `raw` field of the events and linking both tables using + the `event_id`. +4. Creating the view `v_events` which joins the tables + `events` and `raws`. +5. Creating the function `process_v_events_insert` and + `INSERT` trigger `tr_events`. + +The last steps brings us several advantages: + +- All `INSERT` statements can contain all data, including + the `raw` field. +- No code changes are needed in the IntelMQ output bot or your own + scripts. A migration is seamless. +- PostgreSQL itself ensures that the data of both tables is consistent + and linked correctly. + +The complete SQL script can be generated using the `intelmq_psql_initdb`. It does *not* cover step 2 to avoid accidental +data loss - you need to do this step manually. diff --git a/docs/admin/database/splunk.md b/docs/admin/database/splunk.md new file mode 100644 index 000000000..b3342af2d --- /dev/null +++ b/docs/admin/database/splunk.md @@ -0,0 +1,12 @@ + + + +# Sending IntelMQ events to Splunk + +1. Go to Splunk and configure in order to be able to receive + logs (intelmq events) to a TCP port +2. Use TCP output bot and configure accordingly to the Splunk + configuration that you applied. diff --git a/docs/admin/database/todo_sort.md b/docs/admin/database/todo_sort.md new file mode 100644 index 000000000..55e9cc3a1 --- /dev/null +++ b/docs/admin/database/todo_sort.md @@ -0,0 +1,82 @@ + + + +### PostgreSQL + +You have two basic choices to run PostgreSQL: + +1. on the same machine as intelmq, then you could use Unix sockets if available on your platform +2. on a different machine. In which case you would need to use a TCP connection and make sure you give the right + connection parameters to each psql or client call. + +Make sure to consult your PostgreSQL documentation about how to allow network connections and authentication in case 2. + +**PostgreSQL Version** + +Any supported version of PostgreSQL should work (v>=9.2 as of Oct 2016) [[1]](https://www.postgresql.org/support/versioning/). + +If you use PostgreSQL server v >= 9.4, it gives you the possibility to use the time-zone [formatting string](https://www.postgresql.org/docs/9.4/static/images/functions-formatting.html) "OF" for date-times and the [GiST index for the CIDR type](https://www.postgresql.org/docs/9.4/static/images/release-9-4.html#AEN120769). This may be useful depending on how you plan to use the events that this bot writes into the database. + +**How to install** + +Use `intelmq_psql_initdb` to create initial SQL statements from `harmonization.conf`. The script will create the +required table layout and save it as `/tmp/initdb.sql` + +You need a PostgreSQL database-user to own the result database. The recommendation is to use the name `intelmq` +. There may already be such a user for the PostgreSQL database-cluster to be used by other bots. (For example from +setting up the expert/certbund_contact bot.) + +Therefore if still necessary: create the database-user as postgresql superuser, which usually is done via the system +user `postgres`: + +```bash +createuser --no-superuser --no-createrole --no-createdb --encrypted --pwprompt intelmq +``` + +Create the new database: + +```bash +createdb --encoding='utf-8' --owner=intelmq intelmq-events +``` + +(The encoding parameter should ensure the right encoding on platform where this is not the default.) + +Now initialize it as database-user `intelmq` (in this example a network connection to localhost is used, so you would +get to test if the user `intelmq` can authenticate): + +```bash +psql -h localhost intelmq-events intelmq .read /tmp/initdb.sql +``` + +Then, set the [database] parameter to the [your-db.db] file path. + +::: {#stomp output bot} +::: {#intelmq.bots.outputs.stomp.output} +**MSSQL** +::: +::: + +For MSSQL support, the library [pymssql>=2.2] is required. \ No newline at end of file diff --git a/docs/admin/faq.md b/docs/admin/faq.md new file mode 100644 index 000000000..92dce512e --- /dev/null +++ b/docs/admin/faq.md @@ -0,0 +1,103 @@ + + + +# Frequently asked questions + +## How can I improve the speed? + +In most cases the bottlenecks are look-up experts. In these cases you +can easily use the integrated load balancing features. + +### Multithreading + +When using the AMQP broker, you can make use of Multi-threading. See the +`multithreading` section. + +### "Classic" load-balancing (Multiprocessing) + +Before Multithreading was available in IntelMQ, and in case you use +Redis as broker, the only way to do load balancing involves more work. +Create multiple instances of the same bot and connect them all to the +same source and destination bots. Then set the parameter `load_balance` +to `true` for the bot which sends the messages to the duplicated bot. +Then, the bot sends messages to only one of the destination queues and +not to all of them. + +True Multi*processing* is not available in IntelMQ. See also this +`discussion on a possible enhanced load balancing <186>`. + +### Other options + +For any bottleneck based on (online) lookups, optimize the lookup itself +and if possible use local databases. + +It is also possible to use multiple servers to spread the workload. To +get the messages from one system to the other you can either directly +connect to the other's pipeline or use a fast exchange mechanism such +as the TCP Collector/Output (make sure to secure the network by other +means). + +### Removing raw data for higher performance and less space usage
+ +If you do not need the raw data, you can safely remove it. For events +(after parsers), it keeps the original data, eg. a line of a CSV file. +In reports it keeps the actual data to be parsed, so don't delete the +raw field in Reports - between collectors and parsers. + +The raw data consumes about 50% - 30% of the messages' size. The size +of course depends on how many additional data you add to it and how much +data the report includes. Dropping it, will improve the speed as less +data needs to be transferred and processed at each step. + +**In a bot** + +You can do this for example by using the *Field Reducer Expert*. The +configuration could be: + +- `type`: `blacklist` +- `keys`: `raw` + +Other solutions are the *Modify* bot and the *Sieve* bot. The last one +is a good choice if you already use it and you only need to add the +command: + +``` +remove raw +``` + +**In the database** + +In case you store data in the database and you want to keep its size +small, you can (periodically) delete the raw data there. + +To remove the raw data for a events table of a PostgreSQL database, you +can use something like: + +```sql +UPDATE events SET raw = NULL WHERE "time.source" < '2018-07-01'; +``` + +If the database is big, make sure only update small parts of the +database by using an appropriate `WHERE` clause. If you do not see any +negative performance impact, you can increase the size of the chunks, +otherwise the events in the output bot may queue up. The `id` column can +also be used instead of the source's time. + +Another way of reducing the `raw`-data from the database is described in +the EventDB documentation: `eventdb_raws_table`. + +### How to Uninstall + +If you installed intelmq with native packages: Use the package management tool to remove the package `intelmq`. These +tools do not remove configuration by default. + +If you installed manually via pip (note that this also deletes all configuration and possibly data): + +```bash +pip3 uninstall intelmq +rm -r /opt/intelmq +``` + diff --git a/docs/admin/hardware-requirements.md b/docs/admin/hardware-requirements.md new file mode 100644 index 000000000..064bff8d5 --- /dev/null +++ b/docs/admin/hardware-requirements.md @@ -0,0 +1,109 @@ + + + +# Hardware Requirements + +Do you ask yourself how much RAM do you need to give your new IntelMQ +virtual machine? + +The honest answer is simple and pointless: It depends ;) + + +## IntelMQ and the messaging queue (broker) + +IntelMQ uses a messaging queue to move the messages between the bots. +All bot instances can only process one message at a time, therefore all +other messages need to wait in the queue. As not all bots are equally +fast, the messages will naturally "queue up" before the slower ones. +Further, parsers produce many events with just one message (the report) +as input. + +The following estimations assume Redis as messaging broker which is the +default for IntelMQ. When RabbitMQ is used, the required resources will +differ, and RabbitMQ can handle system overload and therefore a shortage +of memory. + +As Redis stores all data in memory, the data which is processed at any +point in time must fit there, including overheads. Please note that +IntelMQ does *neither store nor cache* any input data. These estimates +therefore only relate to the processing step, not the storage. + +For a minimal system, these requirements suffice: + +- 4 GB of RAM +- 2 CPUs +- 10 GB disk size + +**Depending on your data input, you will need the twentiethfold of the +input data size as memory for processing.** + +When using [Redis persistence](http://redis.io/topics/persistence), you +will additionally need twice as much memory for Redis. + +### Disk space + +Disk space is only relevant if you save your data to a file, which is +not recommended for production setups, and only useful for testing and +evaluation. + +Do not forget to rotate your logs or use syslog, especially if you use +the logging level "DEBUG". logrotate is in use by default for all +installation with deb/rpm packages. When other means of installation are +used (pip, manual), configure log rotation manually. See +[logging configuration](configuration/intelmq.md#logging). + +### Background on memory + +For experimentation, we used multiple Shadowserver Poodle reports for +demonstration purpose, totaling in 120 MB of data. All numbers are +estimates and are rounded. In memory, the report data requires 160 MB. +After parsing, the memory usage increases to 850 MB in total, as every +data line is stored as JSON, with additional information plus the +original data encoded in Base 64. The further processing steps depend on +the configuration, but you can estimate that caches (for lookups and +deduplication) and other added information cause an additional size +increase of about 2x. Once a dataset finished processing in IntelMQ, it +is no longer stored in memory. Therefore, the memory is only needed to +catch high load. + +The above numbers result in a factor of 14 for input data size vs. +memory required by Redis. Assuming some overhead and memory for the +bots' processes, a factor of 20 seems sensible. + +To reduce the amount of required memory and disk size, you can +optionally remove the `raw` data field, see this +[section](faq.md#faq-remove-raw) in the FAQ. + +## Additional components + +If some of the optional components are in use, they can add +additional hardware requirements. + +Those components do not add relevant requirements: + +- IntelMQ API: It is just an API for `intelmqctl`. +- IntelMQ Manager: Only contains static files served by the webserver. +- IntelMQ Webinput CSV: Just a webinterface to insert data. Requires + the amount of processed data to fit in memory, see above. +- Stats Portal: The aggregation step and Graphana require some + resources, but no exact numbers are known. +- Malware Name Mapping +- Docker: The docker layer adds only minimal hardware requirements. + +### Database + +When storing data in databases (such as MongoDB, PostgreSQL, +ElasticSearch), it is recommended to do this on separate machines for +operational reasons. Using a different machine results in a separation +of stream processing to data storage and allows for a specialized system +optimization for both use-cases. + +### IntelMQ cb mailgen + +While the Fody backend and frontend do not have significant +requirements, the [RIPE import tool of the +certbund-contact](https://github.com/Intevation/intelmq-certbund-contact/blob/master/README-ripe-import.md) +requires about 8 GB of memory as of March 2021. diff --git a/docs/admin/installation/dockerhub.md b/docs/admin/installation/dockerhub.md new file mode 100644 index 000000000..9db54343f --- /dev/null +++ b/docs/admin/installation/dockerhub.md @@ -0,0 +1,118 @@ + + + +# Installation from DockerHub + +This guide provides instruction on how to install IntelMQ and it's components using Docker. + +!!! warning + Docker installation is currently in Beta state and things might break. Consider this if you plan to use IntelMQ as a production level system. + +!!! warning + Currently you can't manage your botnet via `intelmqctl` command line tool. You need to use [IntelMQ-Manager](https://github.com/certtools/intelmq-manager) currently! + +The latest IntelMQ image is hosted on [Docker Hub](https://hub.docker.com/r/certat/intelmq-full) and the image build instructions are in our [intelmq-docker repository](https://github.com/certat/intelmq-docker). + +Follow [Docker Install](https://docs.docker.com/engine/install/) and +[Docker-Compose Install](https://docs.docker.com/compose/install/) +instructions. + +Before you start using docker-compose or any docker related tools, make +sure docker is running: + +```bash +# To start the docker daemon +systemctl start docker.service +# To enable the docker daemon for the future +systemctl enable docker.service +``` + +## Docker with docker-compose + +Now we can download IntelMQ and start the containers. Navigate to your +preferred installation directory and run the following commands: + +```bash +git clone https://github.com/certat/intelmq-docker.git --recursive +cd intelmq-docker +sudo docker-compose pull +sudo docker-compose up +``` + +Your installation should be successful now. You're now able to visit +`http://127.0.0.1:1337/` to access the intelmq-manager. You have to +login with the username `intelmq` and the password `intelmq`, if you +want to change the username or password, you can do this by adding the +environment variables `INTELMQ_API_USER` for the username and +`INTELMQ_API_PASS` for the password. + + +!!! note + If you get an **Permission denied** error, you should run `chown -R $USER:$USER example_config` + + + +## Docker without docker-compose + +If not already installed, please install +[Docker](https://docs.docker.com/get-docker/). + +Navigate to your preferred installation directory and run +`git clone https://github.com/certat/intelmq-docker.git --recursive`. + +You need to prepare some volumes & configs. Edit the left-side after -v, +to change paths. + +Change `redis_host` to a running redis-instance. Docker will resolve it +automatically. All containers are connected using [Docker +Networks](https://docs.docker.com/engine/tutorials/networkingcontainers/). + +In order to work with your current infrastructure, you need to specify +some environment variables + +```bash +sudo docker pull redis:latest + +sudo docker pull certat/intelmq-full:latest + +sudo docker pull certat/intelmq-nginx:latest + +sudo docker network create intelmq-internal + +sudo docker run -v ~/intelmq/example_config/redis/redis.conf:/redis.conf \ + --network intelmq-internal \ + --name redis \ + redis:latest + +sudo docker run --network intelmq-internal \ + --name nginx \ + certat/intelmq-nginx:latest + +sudo docker run -e INTELMQ_IS_DOCKER="true" \ + -e INTELMQ_SOURCE_PIPELINE_BROKER: "redis" \ + -e INTELMQ_PIPELINE_BROKER: "redis" \ + -e INTELMQ_DESTIONATION_PIPELINE_BROKER: "redis" \ + -e INTELMQ_PIPELINE_HOST: redis \ + -e INTELMQ_SOURCE_PIPELINE_HOST: redis \ + -e INTELMQ_DESTINATION_PIPELINE_HOST: redis \ + -e INTELMQ_REDIS_CACHE_HOST: redis \ + -v $(pwd)/example_config/intelmq/etc/:/etc/intelmq/etc/ \ + -v $(pwd)/example_config/intelmq-api/config.json:/etc/intelmq/api-config.json \ + -v $(pwd)/intelmq_logs:/etc/intelmq/var/log \ + -v $(pwd)/intelmq_output:/etc/intelmq/var/lib/bots \ + -v ~/intelmq/lib:/etc/intelmq/var/lib \ + --network intelmq-internal \ + --name intelmq \ + certat/intelmq-full:latest +``` + +If you want to use another username and password for the intelmq-manager +/ api login, additionally add two new environment variables. + +```bash +-e INTELMQ_API_USER: "your username" +-e INTELMQ_API_PASS: "your password" +``` diff --git a/docs/admin/installation/linux-packages.md b/docs/admin/installation/linux-packages.md new file mode 100644 index 000000000..9973c7fc9 --- /dev/null +++ b/docs/admin/installation/linux-packages.md @@ -0,0 +1,106 @@ + + + +# Installation as Linux package + +This guide provides instructions on how to install IntelMQ and it's components from Linux distribution's package repository. + +!!! note + Some bots may have additional dependencies which are mentioned in their own documentation. + +## Supported OS + +Native packages are currently provided for the following Linux distributions: + +- **Debian 11** (bullseye) +- **Debian 12** (bookworm) +- **openSUSE Tumbleweed** +- **openSUSE Leap 15.5** +- **Ubuntu 20.04** (focal fossa) +- **Ubuntu 22.04** (jammy jellyfish) + +### Debian 10 and 11 + +Add the repository to the package manager and install IntelMQ (packages `intelmq-api` and `intelmq-manager` are optional): + +```bash +echo "deb http://download.opensuse.org/repositories/home:/sebix:/intelmq/Debian_$(lsb_release -rs)/ /" | sudo tee /etc/apt/sources.list.d/intelmq +curl -fsSL "https://download.opensuse.org/repositories/home:sebix:intelmq/Debian_$(lsb_release -rs)/Release.key" | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/intelmq.gpg > /dev/null +sudo apt update +sudo apt install intelmq intelmq-api intelmq-manager +``` + +### openSUSE Tumbleweed + +Add the repository to the package manager and install IntelMQ (packages `intelmq-api` and `intelmq-manager` are optional): + +```bash +zypper addrepo https://download.opensuse.org/repositories/home:sebix:intelmq/openSUSE_Tumbleweed/home:sebix:intelmq.repo +zypper refresh +zypper install intelmq intelmq-api intelmq-manager +``` + +### Ubuntu 20.04 + +For Ubuntu you must enable the Universe repository which provides community-maintained free and open-source software. + +Add the repository to the package manager and install IntelMQ (packages `intelmq-api` and `intelmq-manager` are optional): + +1. Open the file `/etc/apt/sources.list` in an editor of your choice. Use `sudo` or the `root` user. + +2. Append `universe` to this line: +``` +deb http://[...].archive.ubuntu.com/ubuntu/ focal main universe +``` + +3. Update the list of available packages and install IntelMQ: +```bash +sudo apt update +sudo apt install intelmq intelmq-api intelmq-manager +``` + + + +[comment]: <> (## Installing IntelMQ API (optional)) + +[comment]: <> ([intelmq-api] requires the IntelMQ package to be installed on the system (it uses `intelmqctl` to control the botnet).) + +[comment]: <> (You can install the `intelmq-api` package using your preferred system package installation mechanism or using the `pip`) + +[comment]: <> (Python package installer. We provide packages for the [intelmq-api] for the same operating systems as we do for) + +[comment]: <> (the [intelmq] package itself. For the list of supported distributions, please see the intelmq) + +[comment]: <> (`installation`{.interpreted-text role="doc"} page.) + +[comment]: <> (Our repository page) + +[comment]: <> (gives [installation instructions for various operating systems](https://software.opensuse.org/download.html?project=home:sebix:intelmq&package=intelmq-api)) + +[comment]: <> (. No additional set-up steps are needed if you use these packages.) + +[comment]: <> (The [intelmq-api] provides the route `/api` for managing the IntelMQ installation.) + +[comment]: <> (## Installing IntelMQ Manager (optional)) + +[comment]: <> (To use the IntelMQ Manager web interface, it is required to have a working IntelMQ and IntelMQ API installation.) + +[comment]: <> (As the repositories are already set-up on your system, you can simply install the package `intelmq-manager`.) + +[comment]: <> (Our repository page) + +[comment]: <> (gives [installation instructions for various operating systems](https://software.opensuse.org/download.html?project=home:sebix:intelmq&package=intelmq-manager)) + +[comment]: <> (. No additional set-up steps are needed.) + +[comment]: <> (The webserver configuration (which is also shown below) for Apache will be automatically installed and the HTML files) + +[comment]: <> (are stored under) + +[comment]: <> (`/usr/share/intelmq-manager/html`.) + +[comment]: <> (The webinterface is now available at `http://localhost/intelmq-manager`.) + diff --git a/docs/admin/installation/pypi.md b/docs/admin/installation/pypi.md new file mode 100644 index 000000000..ea840c886 --- /dev/null +++ b/docs/admin/installation/pypi.md @@ -0,0 +1,168 @@ + + + +# Installation from PyPI + +This guide provides instruction on how to install IntelMQ and it's components using the Python Package Index (PyPI) +repository. + +!!! note + Some bots may have additional dependencies which are mentioned in their own documentation. + +## Installing IntelMQ + +### Requirements + +#### Ubuntu / Debian + +```bash +apt install python3-pip python3-dnspython python3-psutil python3-redis python3-requests python3-termstyle python3-tz python3-dateutil redis-server bash-completion jq +# optional dependencies +apt install python3-pymongo python3-psycopg2 +``` + +#### openSUSE: + +```bash +zypper install python3-dateutil python3-dnspython python3-psutil python3-redis python3-requests python3-python-termstyle redis bash-completion jq +# optional dependencies +zypper in python3-psycopg2 python3-pymongo +``` + +#### CentOS 8: + +```bash +dnf install epel-release +dnf install python3-dateutil python3-dns python3-pip python3-psutil python3-redis python3-requests redis bash-completion jq +# optional dependencies +dnf install python3-psycopg2 python3-pymongo +``` + +#### CentOS 7 / RHEL 7: + +!!! warning + We no longer support already end-of-life Python 3.6, which is the last Python version officially packaged for CentOS 7. You can either use alternative Python source, or stay on the IntelMQ 3.0.2. + +```bash +yum install epel-release +yum install python36 python36-dns python36-requests python3-setuptools redis bash-completion jq +yum install gcc gcc-c++ python36-devel +# optional dependencies +yum install python3-psycopg2 +``` + +### Installation + +The default installation directory is `/opt/intelmq/`. + +If you prefer to use Linux Standard Base (LSB) paths, set the following environment variable: + +```bash +export INTELMQ_PATHS_NO_OPT=1 +``` + +If you want to use custom installation directory, set the following environment variable: + +```bash +export INTELMQ_ROOT_DIR=/my-installation-directory-path +``` + +Run the following commands to install IntelMQ. The provided tool `intelmqsetup` will create all the necessary directories and installs a default configuration for new setups. +If you are using the LSB paths installation, change the `--home-dir` parameter to `/var/lib/intelmq` + +```bash +sudo --preserve-env=INTELMQ_PATHS_NO_OPT,INTELMQ_ROOT_DIR -i +pip3 install intelmq +[[ ! -z "$INTELMQ_PATHS_NO_OPT" ]] && export HOME_DIR=/var/lib/intelmq || export HOME_DIR=${INTELMQ_ROOT_DIR:-/opt/intelmq} +useradd --system --user-group --home-dir $HOME_DIR --shell /bin/bash intelmq +intelmqsetup +``` + +### Installation to Python virtual environment + +```bash +sudo mkdir -m 755 /opt/intelmq +sudo useradd --system --user-group --home-dir /opt/intelmq --shell /bin/bash intelmq +sudo chown intelmq:intelmq /opt/intelmq/ +sudo -u intelmq python3 -m venv /opt/intelmq/venv +sudo -u intelmq /opt/intelmq/venv/bin/pip install intelmq intelmq-api intelmq-manager +sudo /opt/intelmq/venv/bin/intelmqsetup +``` + + +## Installing IntelMQ API (optional) + +The `intelmq-api` packages ships: + +- **api configuration** file in `${PREFIX}/etc/intelmq/api-config.json` +- **positions configuration** for the intelmq-manager in `{PREFIX}/etc/intelmq/manager/positions.conf` +- **virtualhost configuration** file for Apache 2 in `${PREFIX}/etc/intelmq/api-apache.conf` +- **sudoers configuration** file in `${PREFIX}/etc/intelmq/api-sudoers.conf` + +The value of `${PREFIX}` depends on your environment and is something like `/usr/local/lib/pythonX.Y/dist-packages/` (where `X.Y` is your Python version). + +The **virtualhost configuration** file needs to be placed in the correct directory for your Apache 2 installation. + +- On Debian or Ubuntu, move the file to `/etc/apache2/conf-available.d/` directory and then execute +`a2enconf api-apache`. +- On CentOS, RHEL or Fedora, move the file to `/etc/httpd/conf.d/` directory. +- On openSUSE, move the file to `/etc/apache2/conf.d/` directory. + +Don't forget to reload your webserver afterwards. + +The **api configuration** file and the **positions configuration** file need to be placed in one of the following directories (based on your IntelMQ installation directory): + +- `/etc/intelmq/` +- `/opt/intelmq/etc/` +- `[my-installation-directory-path]/etc/` + +The **sudoers configuration** file should be placed in the `/etc/sudoers.d/` directory and adapt the webserver username in this file. Set the file permissions to `0o440`. + +Afterwards continue with the section Permissions below. + +IntelMQ 2.3.1 comes with a tool `intelmqsetup` which performs these set-up steps automatically. Please note that the +tool is very new and may not detect all situations correctly. Please report us any bugs you are observing. The tools is +idempotent, you can execute it multiple times. + +## Installing IntelMQ Manager (optional) + +To use the IntelMQ Manager web interface, it is required to have a working IntelMQ and IntelMQ API installation. + +For installation via pip, the situation is more complex. The intelmq-manager package does not contain ready-to-use +files, they need to be built locally. First, lets install the Manager itself: + +```bash +pip3 install intelmq-manager +``` + +If your system uses wheel-packages, not the source distribution, you can use the `intelmqsetup` tool. `intelmqsetup` +which performs these set-up steps automatically but it may not detect all situations correctly. If it +finds `intelmq-manager` installed, calls its build routine is called. The files are placed in +`/usr/share/intelmq_manager/html`, where the default Apache configuration expect it. + +If your system used the dist-package or if you are using a local source, the tool may not do all required steps. To call +the build routine manually, use +`intelmq-manager-build --output-dir your/preferred/output/directory/`. + +`intelmq-manager` ships with a default configuration for the Apache webserver (`manager-apache.conf`): + +``` +Alias /intelmq-manager /usr/share/intelmq_manager/html/ + + + + Header set Content-Security-Policy "script-src 'self'" + Header set X-Content-Security-Policy "script-src 'self'" + + +``` + +This file needs to be placed in the correct place for your Apache 2 installation. + +- On Debian and Ubuntu, the file needs to be placed at `/etc/apache2/conf-available.d/manager-apache.conf` and then execute + `a2enconf manager-apache`. +- On CentOS, RHEL and Fedora, the file needs to be placed at `/etc/httpd/conf.d/` and reload the webserver. +- On openSUSE, the file needs to be placed at `/etc/apache2/conf.d/` and reload the webserver. \ No newline at end of file diff --git a/docs/admin/installation/source-repository.md b/docs/admin/installation/source-repository.md new file mode 100644 index 000000000..acca0d17a --- /dev/null +++ b/docs/admin/installation/source-repository.md @@ -0,0 +1,9 @@ + + +# Installation from source repository + +This guide provides instruction on how to install IntelMQ and it's components from the source repository. + diff --git a/docs/admin/integrations/cifv3.md b/docs/admin/integrations/cifv3.md new file mode 100644 index 000000000..1fcfa7bdc --- /dev/null +++ b/docs/admin/integrations/cifv3.md @@ -0,0 +1,16 @@ + + +# CIFv3 integrations in IntelMQ + +CIF creates an accessible indicator store. A REST API is exposed to interact with the store and quickly process/share +indicators. CIFv3 can correlate indicators via the UUID attribute. + +## CIF3 API Output + +Can be used to submit indicators to a CIFv3 instance by using +the [CIFv3 API](https://github.com/csirtgadgets/bearded-avenger-deploymentkit/wiki/REST-API). + +Look at the CIFv3 API Output Bot for more information. \ No newline at end of file diff --git a/docs/admin/integrations/misp.md b/docs/admin/integrations/misp.md new file mode 100644 index 000000000..4c955d3f8 --- /dev/null +++ b/docs/admin/integrations/misp.md @@ -0,0 +1,51 @@ + + +# MISP integrations in IntelMQ + +While MISP and IntelMQ seem to solve similar problems in the first hindsight, their intentions and strengths differ +significantly. + +In a nutshell, MISP *stores* manually curated indicators (called +*attributes*) grouped in *events*. An event can have an arbitrary number of attributes. MISP correlates these indicators +with each other and can synchronize the data between multiple MISP instances. + +On the other side, IntelMQ in it's essence (not considering the +`EventDB `) has no state or database, but is stream-oriented. IntelMQ acts as a toolbox which can be configured +as needed to automate processes of mass data with little or no human interaction At the end of the processing the data +may land in some database or be sent to other systems. + +Both systems do not intend to replace each other or do compete. They integrate seamless and combine each other enabling +more use-cases and + +## MISP API Collector + +The MISP API Collector fetches data from MISP via the [MISP API](https://misp.gitbooks.io/misp-book/content/automation/) +. + +Look at the Bots documentation page for more information. + +## MISP Expert + +The MISP Expert searches MISP by using the [MISP API](https://misp.gitbooks.io/misp-book/content/automation/) for +attributes/events matching the `source.ip` of the event. The MISP Attribute UUID and MISP Event ID of the newest +attribute are added to the event. + +Look at the Bots documentation page for more information. + +## MISP Feed Output + +This bot creates a complete [MISP feed](https://misp.gitbooks.io/misp-book/content/managing-feeds/) ready to be +configured in MISP as incoming data source. + +Look at the Bots documentation page for more information. + +## MISP API Output + +Can be used to directly create MISP events in a MISP instance by using +the [MISP API](https://misp.gitbooks.io/misp-book/content/automation/). + +Look at the Bots documentation page for more information. + diff --git a/docs/admin/integrations/n6.md b/docs/admin/integrations/n6.md new file mode 100644 index 000000000..ebc7c1c07 --- /dev/null +++ b/docs/admin/integrations/n6.md @@ -0,0 +1,65 @@ + + +# IntelMQ - n6 Integration + +n6 is an Open Source Tool with very similar aims as IntelMQ: processing and distributing IoC data. The use-cases, +architecture and features differ and both tools have non-overlapping strengths. n6 is maintained and developed +by [CERT.pl](https://www.cert.pl/). + +Information about n6 can be found here: + +- Website: [cert.pl/en/n6](https://cert.pl/en/n6/) +- Source Code: + [github.com/CERT-Polska/n6](https://github.com/CERT-Polska/n6/) +- n6 documentation: [n6.readthedocs.io](https://n6.readthedocs.io/) + +![n6 schema](../../static/images/n6/n6-schemat2.png) + +![n6 data flow](../../static/images/n6/data-flow.png) + +## Data format + +The internal data representation differs between IntelMQ and n6, so any data exchange between the systems requires a +format conversion. For example, in n6 one message can contain multiple IP addresses, but IntelMQ is intentionally +restricted to one IP address per message. Therefore, one n6 event results in *one or more* IntelMQ events. Because of +this, and some other naming differences and ambiguities, the format conversion is *not* bidirectional. + +## Data exchange interface + +n6 offers a STOMP interface via the RabbitMQ broker, which can be used for both sending and receiving data. IntelMQ +offers both a STOMP collector bot for receiving data from n6, as well as a STOMP output bot for sending data to n6 +instances. + +- Stomp Collector Bot +- N6 Parser Bot +- Stomp Output Bot + +## Data conversion + +IntelMQ can parse n6 data using the n6 parser and n6 can parse IntelMQ data using the Intelmq n6 parser. + +- N6 Parser Bot + +## Complete example + +### Data flow n6 to IntelMQ + +![dataflow from n6 to IntelMQ](../../static/images/n6/n6-to-intelmq.png) + +### Data flow IntelMQ to n6 + +![dataflow from IntelMQ to n6](../../static/images/n6/intelmq-to-n6.png) + +### CERT.pl Data feed + +CERT.pl offers data feed available to their partners through the STOMP interface. Our feeds documentation contains +details how it can be enabled in IntelMQ: [CERT.pl n6 STOMP stream](../../user/feeds.md#n6-stomp-stream) + +### Webinput CSV + +The IntelMQ Webinput CSV software can also be used together with n6. The documentation on this component can be found in +the software's repository: + diff --git a/docs/admin/integrations/other.md b/docs/admin/integrations/other.md new file mode 100644 index 000000000..7ff52fce8 --- /dev/null +++ b/docs/admin/integrations/other.md @@ -0,0 +1,30 @@ + + + +# Integration with ticketing systems, etc. + +First of all, IntelMQ is a message (event) processing system: it collects feeds, processes them, enriches them, filters +them and then stores them somewhere or sends them to another system. It does this in a composable, data flow oriented +fashion, based on single events. There are no aggregation or grouping features. Now, if you want to integrate IntelMQ +with your ticket system or some other system, you need to send its output to somewhere where your ticket system or other +services can pick up IntelMQ's data. This could be a database, splunk, or you could send your events directly via email +to a ticket system. + +Different users came up with different solutions for this, each of them fitting their own organisation. Hence these +solutions are not part of the core IntelMQ repository. + +: - CERT.at uses a postgresql DB (sql output bot) and has a small tool `intelmqcli` which fetches the events in the +postgresql DB which are marked as \"new\" and will group them and send them out via the RT ticket system. - Others, +including BSI, use a tool called `intelmq-mailgen`. It sends E-Mails to the recipients, optionally PGP-signed with +defined text-templates, CSV formatted attachments with grouped events and generated ticket numbers. + +The following lists external github repositories which you might consult for examples on how to integrate IntelMQ into +your workflow: + +> - [certat repository](https://github.com/certat/intelmq) +> - [Intevation's Mailgen](https://github.com/Intevation/intelmq-mailgen) + +If you came up with another solution for integration, we'd like to hear from you! Please reach out to us on the . \ No newline at end of file diff --git a/docs/admin/intro.md b/docs/admin/intro.md new file mode 100644 index 000000000..b5df76ef0 --- /dev/null +++ b/docs/admin/intro.md @@ -0,0 +1,31 @@ + + + +# Intro + +This guide provides instructions on how to install, configure and manage IntelMQ and it's components. + +IntelMQ uses a message broker such as Redis. This is required for IntelMQ to run. + +IntelMQ doesn't handle long term storage of processed Events beyond writing to a file. However it provides connectors (output bots) for writing events to various database systems and log collectors. It is recommended to configure such +system to preserve processed events. + +## Base Requirements + +The following instructions assume the following requirements. Python versions >= 3.7 are supported. + +Supported and recommended operating systems are: + +- Debian 10 Buster and 11 Bullseye +- openSUSE Tumbleweed +- Ubuntu: 20.04 focal +- For the Docker-installation: Docker Engine: 18.x and higher + +Other distributions which are (most probably) supported include AlmaLinux, CentOS, Fedora, FreeBSD 12, RHEL and +RockyLinux. + +A short guide on hardware requirements can be found on the page +[Hardware Requirements](hardware-requirements.md). diff --git a/docs/admin/management/intelmq-api.md b/docs/admin/management/intelmq-api.md new file mode 100644 index 000000000..335b75e2f --- /dev/null +++ b/docs/admin/management/intelmq-api.md @@ -0,0 +1,17 @@ + + + +# Managing IntelMQ API + + +## Running + + +For development purposes and testing you can run directly using `hug`: + +```bash +hug -m intelmq_api.serve +``` \ No newline at end of file diff --git a/docs/admin/management/intelmq.md b/docs/admin/management/intelmq.md new file mode 100644 index 000000000..8bde09641 --- /dev/null +++ b/docs/admin/management/intelmq.md @@ -0,0 +1,583 @@ + + +# Managing IntelMQ + +## Required services + +You need to enable and start Redis if not already done. Using systemd it +can be done with: + +```bash +systemctl enable redis.service +systemctl start redis.service +``` + +## Introduction + +`intelmqctl` is the main tool to handle a intelmq installation. It handles +the bots themselves and has some tools to handle the installation. + +Should you get lost any time, just use the **--help** after any +argument for further explanation. + +```bash +> intelmqctl run file-output --help +``` + +## Manage the botnet + +In IntelMQ, the botnet is the set of all currently configured and +enabled bots. All configured bots have their configuration in +`runtime.yaml`. By default, all bots are enabled. + +If no bot id is given, the command applies to all bots / the botnet. +All commands except the start action are applied to all bots. But only +enabled bots are started. + +In the examples below, a very minimal botnet is used. + +### start + +The start action applies to all bots which are enabled. + +```bash +> intelmqctl start +Starting abusech-domain-parser... +abusech-domain-parser is running. +Starting abusech-feodo-domains-collector... +abusech-feodo-domains-collector is running. +Starting deduplicator-expert... +deduplicator-expert is running. +file-output is disabled. +Botnet is running. +``` + +As we can file-output is disabled and thus has not been started. You can +always explicitly start disabled bots. + +### stop + +The stop action applies to all bots. Assume that all bots have been +running: + +```bash +> intelmqctl stop +Stopping Botnet... +Stopping abusech-domain-parser... +abusech-domain-parser is stopped. +Stopping abusech-feodo-domains-collector... +abusech-feodo-domains-collector is stopped. +Stopping deduplicator-expert... +deduplicator-expert is stopped. +Stopping file-output... +file-output is stopped. +Botnet is stopped. +``` + +### status + +With this command we can see the status of all configured bots. Here, +the botnet was started beforehand: + +```bash +> intelmqctl status +abusech-domain-parser is running. +abusech-feodo-domains-collector is running. +deduplicator-expert is running. +file-output is disabled. +``` + +And if the disabled bot has also been started: + +```bash +> intelmqctl status +abusech-domain-parser is running. +abusech-feodo-domains-collector is running. +deduplicator-expert is running. +file-output is running. +``` + +If the botnet is stopped, the output looks like this: + +```bash +> intelmqctl status +abusech-domain-parser is stopped. +abusech-feodo-domains-collector is stopped. +deduplicator-expert is stopped. +file-output is disabled. +``` + +### restart + +The same as start and stop consecutively. + +### reload + +The same as reload of every bot. + +### enable / disable + +The sub commands `enable` and `disable` set the corresponding flags in `runtime.yaml`. + +```bash +> intelmqctl status +file-output is stopped. +malware-domain-list-collector is stopped. +malware-domain-list-parser is stopped. +> intelmqctl disable file-output +> intelmqctl status +file-output is disabled. +malware-domain-list-collector is stopped. +malware-domain-list-parser is stopped. +> intelmqctl enable file-output +> intelmqctl status +file-output is stopped. +malware-domain-list-collector is stopped. +malware-domain-list-parser is stopped. +``` + +## Manage individual bots + +As all init systems, intelmqctl has the methods start, stop, restart, +reload and status. + +### start + +This will start the bot with the ID `file-output`. A file +with it's PID will be created in `/opt/intelmq/var/run/[bot-id].pid`. + +```bash +> intelmqctl start file-output +Starting file-output... +file-output is running. +``` + +If the bot is already running, it won't be started again: + +```bash +> intelmqctl start file-output +file-output is running. +``` + +### stop + +If the PID file does exist, a SIGINT will be sent to the process. After +0.25s we check if the process is running. If not, the PID file will be +removed. + +```bash +> intelmqctl stop file-output +Stopping file-output... +file-output is stopped. +``` + +If there's no running bot, there's nothing to do. + +```bash +> intelmqctl stop file-output +file-output was NOT RUNNING. +``` + +If the bot did not stop in 0.25s, intelmqctl will say it's still +running: + +```bash +> intelmqctl stop file-output +file-output is still running +``` + +### status + +Checks for the PID file and if the process with the given PID is alive. +If the PID file exists, but the process does not exist, it will be +removed. + +```bash +> intelmqctl status file-output +file-output is stopped. +> intelmqctl start file-output +Starting file-output... +file-output is running. +> intelmqctl status file-output +file-output is running. +``` + +### restart + +The same as stop and start consecutively. + +```bash +> intelmqctl restart file-output +Stopping file-output... +file-output is stopped. +Starting file-output... +file-output is running. +``` + +### reload + +Sends a SIGHUP to the bot, which will then reload the configuration. + +```bash +> intelmqctl reload file-output +Reloading file-output ... +file-output is running. +``` + +If the bot is not running, we can't reload it: + +```bash +> intelmqctl reload file-output +file-output was NOT RUNNING. +``` + +### run + +This command is used for **debugging** purposes. + +If launched with no arguments, the bot will call its init method and +start processing messages as usual -- but you see everything happens. + +```bash +> intelmqctl run file-output +file-output: RestAPIOutputBot initialized with id file-output and version 3.5.2 as process 12345. +file-output: Bot is starting. +file-output: Loading source pipeline and queue 'file-output-queue'. +file-output: Connected to source queue. +file-output: No destination queues to load. +file-output: Bot initialization completed. +file-output: Waiting for incoming message. +``` + +Note that if another instance of the bot is running, only warning will +be displayed. + +```bash +> intelmqctl run file-output +Main instance of the bot is running in the background. You may want to launch: intelmqctl stop file-output +``` + +You can set the log level with the `-l` flag, e.g. `-l DEBUG`. For the 'console' subcommand, 'DEBUG' is the default. + +#### console + +This command is used for **debugging** purposes. + +If launched with **console** argument, you get a `pdb` live +console; or `ipdb` or `pudb` consoles if they were previously installed (I.E. +`pip3 install ipdb --user`). + +```bash +> intelmqctl run file-output console +*** Using console ipdb. Please use 'self' to access to the bot instance properties. *** +ipdb> self. ... +``` + +You may specify the desired console in the next argument. + +```bash +> intelmqctl run file-output console pudb +``` + +#### message + +Operate directly with the input / output pipelines. + +If **get** is the parameter, you see the message that waits in the input +(source or internal) queue. If the argument is **pop**, the message gets +popped as well. + +```bash +> intelmqctl run file-output message get +file-output: Waiting for a message to get... +{ + "classification.type": "c&c", + "feed.url": "https://example.com", + "raw": "1233", + "source.ip": "1.2.3.4", + "time.observation": "2017-05-17T22:00:33+00:00", + "time.source": "2017-05-17T22:00:32+00:00" +} +``` + +To send directly to the bot's output queue, just as it was sent by `self.send_message()` in bot's `process()` method, use the **send** argument. In our case of `file-output`, it has no destination queue so that nothing happens. + +```bash +> intelmqctl run file-output message send '{"time.observation": "2017-05-17T22:00:33+00:00", "time.source": "2017-05-17T22:00:32+00:00"}' +file-output: Bot has no destination queues. +``` + +Note, if you would like to know possible parameters of the message, put +a wrong one -- you will be prompted if you want to list all the current +bot harmonization. + +#### process + +With no other arguments, bot's `process()` method will be run one time. + +```bash +> intelmqctl run file-output process +file-output: Bot is starting. +file-output: Bot initialization completed. +file-output: Processing... +file-output: Waiting for incoming message. +file-output: Received message {'raw': '1234'}. +``` + +If run with **--dryrun|-d** flag, the message gets never really popped +out from the source or internal pipeline, nor sent to the output +pipeline. Plus, you receive a note about the exact moment the message +would get sent, or acknowledged. If the message would be sent to a +non-default path, the name of this path is printed on the console. + +```bash +> intelmqctl run file-output process -d +file-output: * Dryrun only, no message will be really sent through. +... +file-output: DRYRUN: Message would be acknowledged now! +``` + +You may trick the bot to process a JSON instead of the Message in its +pipeline with **--msg|-m** flag. + +```bash +> intelmqctl run file-output process -m '{"source.ip":"1.2.3.4"}' +file-output: * Message from cli will be used when processing. +... +``` + +If you wish to display the processed message as well, you the +**--show-sent|-s** flag. Then, if sent through (either with +`--dryrun` or without), the message gets displayed as well. + +### disable + +Sets the `enabled` flag in the runtime configuration of the +bot to `false`. By default, all bots are enabled. + +Example output: + +```bash +> intelmqctl status file-output +file-output is stopped. +> intelmqctl disable file-output +> intelmqctl status file-output +file-output is disabled. +``` + +### enable + +Sets the `enabled` flag in the runtime configuration of the +bot to `true`. + +Example output: + +```bash +> intelmqctl status file-output +file-output is disabled. +> intelmqctl enable file-output +> intelmqctl status file-output +file-output is stopped. +``` + +## List bots + +`intelmqctl list bots` does list all configured bots and +their description. + +## List queues + +`intelmqctl list queues` shows all queues which are +currently in use according to the configuration and how much events are +in it: + +```bash +> intelmqctl list queues +abusech-domain-parser-queue - 0 +abusech-domain-parser-queue-internal - 0 +deduplicator-expert-queue - 0 +deduplicator-expert-queue-internal - 0 +file-output-queue - 234 +file-output-queue-internal - 0 +``` + +Use the `-q` or `--quiet` flag to only show +non-empty queues: + +```bash +> intelmqctl list queues -q +file-output-queue - 234 +``` + +The `--sum` or `--count` flag will show the +sum of events on all queues: + +```bash +> intelmqctl list queues --sum +42 +``` + +## Logging + +intelmqctl can show the last log lines for a bot, filtered by the log +level. + +Logs are stored in `/opt/intelmq/var/log/` or `/var/log/intelmq/` directory. In case of failures, messages are dumped to the same directory with the file extension `.dump`. + +See the help page for more information. + +## Check + +This command will do various sanity checks on the installation and +especially the configuration. + +### Orphaned Queues + +The `intelmqctl check` tool can search for orphaned queues. +"Orphaned queues" are queues that have been used in the past and are +no longer in use. For example you had a bot which you removed or renamed +afterwards, but there were still messages in it's source queue. The +source queue won't be renamed automatically and is now disconnected. As +this queue is no longer configured, it won't show up in the list of +IntelMQ's queues too. In case you are using redis as message broker, +you can use the `redis-cli` tool to examine or remove these +queues: + +```bash +redis-cli -n 2 +keys * # lists all existing non-empty queues +llen [queue-name] # shows the length of the queue [queue-name] +lindex [queue-name] [index] # show the [index]'s message of the queue [queue-name] +del [queue-name] # remove the queue [queue-name] +``` + +To ignore certain queues in this check, you can set the parameter +`intelmqctl_check_orphaned_queues_ignore` in the +*defaults* configuration file. For example: + +```yaml +"intelmqctl_check_orphaned_queues_ignore": ["Taichung-Parser"] +``` + +## Configuration upgrade + +The `intelmqctl upgrade-config` function upgrade, upgrade +the configuration from previous versions to the current one. It keeps +track of previously installed versions and the result of all "upgrade +functions" in the "state file", locate in the `$var_state_path/state.json` +`/opt/intelmq/var/lib/state.json` or `/var/lib/intelmq/state.json`). + +This function has been introduced in version 2.0.1. + +It makes backups itself for all changed files before every run. Backups +are overridden if they already exists. So make sure to always have a +backup of your configuration just in case. + +## Output type + +intelmqctl can be used as command line tool, as library and as tool by +other programs. If called directly, it will print all output to the +console (stderr). If used as python library, the python types themselves +are returned. The third option is to use machine-readable JSON as output +(used by other managing tools). + +## Exit code + +In case of errors, unsuccessful operations, the exit code is higher than +0. For example, when running `intelmqctl start` and one +enabled bot is not running, the exit code is 1. The same is valid for +e.g. `intelmqctl status`, which can be used for monitoring, +and all other operations. + +## Error Handling + +When bots are failing due to bad input data or programming errors, they can dump the problematic message to a file along +with a traceback, if configured accordingly. These dumps are saved at in the logging directory as `[botid].dump` as JSON +files. IntelMQ comes with an inspection and reinjection tool, called `intelmqdump`. It is an interactive tool to show +all dumped files and the number of dumps per file. Choose a file by bot-id or listed numeric id. You can then choose to +delete single entries from the file with `e 1,3,4`, show a message in more readable format with `s 1` (prints the +raw-message, can be long!), recover some messages and put them back in the pipeline for the bot by `a` or `r 0,4,5`. Or delete the file with all dumped messages using `d`. + +```bash +intelmqdump -h +usage: + intelmqdump [botid] + intelmqdump [-h|--help] + +intelmqdump can inspect dumped messages, show, delete or reinject them into +the pipeline. It's an interactive tool, directly start it to get a list of +available dumps or call it with a known bot id as parameter. + +positional arguments: + botid botid to inspect dumps of + +optional arguments: + -h, --help show this help message and exit + --truncate TRUNCATE, -t TRUNCATE + Truncate raw-data with more characters than given. 0 for no truncating. Default: 1000. + +Interactive actions after a file has been selected: +- r, Recover by IDs + > r id{,id} [queue name] + > r 3,4,6 + > r 3,7,90 modify-expert-queue + The messages identified by a consecutive numbering will be stored in the + original queue or the given one and removed from the file. +- a, Recover all + > a [queue name] + > a + > a modify-expert-queue + All messages in the opened file will be recovered to the stored or given + queue and removed from the file. +- d, Delete entries by IDs + > d id{,id} + > d 3,5 + The entries will be deleted from the dump file. +- d, Delete file + > d + Delete the opened file as a whole. +- s, Show by IDs + > s id{,id} + > s 0,4,5 + Show the selected IP in a readable format. It's still a raw format from + repr, but with newlines for message and traceback. +- e, Edit by ID + > e id + > e 0 + > e 1,2 + Opens an editor (by calling `sensible-editor`) on the message. The modified message is then saved in the dump. +- q, Quit + > q + +$ intelmqdump + id: name (bot id) content + 0: alienvault-otx-parser 1 dumps + 1: cymru-whois-expert 8 dumps + 2: deduplicator-expert 2 dumps + 3: dragon-research-group-ssh-parser 2 dumps + 4: file-output2 1 dumps + 5: fraunhofer-dga-parser 1 dumps + 6: spamhaus-cert-parser 4 dumps + 7: test-bot 2 dumps +Which dump file to process (id or name)? 3 +Processing dragon-research-group-ssh-parser: 2 dumps + 0: 2015-09-03T13:13:22.159014 InvalidValue: invalid value u'NA' () for key u'source.asn' + 1: 2015-09-01T14:40:20.973743 InvalidValue: invalid value u'NA' () for key u'source.asn' +(r)ecover by ids, recover (a)ll, delete (e)ntries, (d)elete file, (s)how by ids, (q)uit, edit id (v)? d +Deleted file /opt/intelmq/var/log/dragon-research-group-ssh-parser.dump +``` + +Bots and the intelmqdump tool use file locks to prevent writing to already opened files. Bots are trying to lock the +file for up to 60 seconds if the dump file is locked already by another process +(intelmqdump) and then give up. Intelmqdump does not wait and instead only shows an error message. + +By default, the `show` command truncates the `raw` field of messages at 1000 characters to change this limit or disable +truncating at all (value 0), use the `--truncate` parameter. + +## Known issues + +The currently implemented process managing using PID files is very +erroneous. diff --git a/docs/admin/upgrade.md b/docs/admin/upgrade.md new file mode 100644 index 000000000..20092220f --- /dev/null +++ b/docs/admin/upgrade.md @@ -0,0 +1,132 @@ + + + +# Upgrade instructions + +In order to upgrade your IntelMQ installation it is recommended to follow these five steps: + +## 1. Read NEWS.md + +Read the +[NEWS.md](https://github.com/certtools/intelmq/blob/develop/NEWS.md) +file to look for things you need to have a look at. + +## 2. Stop IntelMQ and create a backup + +- Make sure that your IntelMQ system is completely stopped: `intelmqctl stop` +- Create a backup of IntelMQ Home directory, which includes all configurations. They are not overwritten, but backups are always nice to have! + +```bash +sudo cp -R /opt/intelmq /opt/intelmq-backup +``` + +## 3. Upgrade IntelMQ + +Before upgrading, check that your setup is clean and there are no events +in the queues: + +```bash +intelmqctl check +intelmqctl list queues -q +``` + +The upgrade depends on how you installed IntelMQ. + +### Linux Packages + +Use your system's package manager. + +### PyPi + +```bash +pip install -U --no-deps intelmq +sudo intelmqsetup +``` + +Using `--no-deps` will not upgrade dependencies, which would probably overwrite the system's libraries. Remove this option to +also upgrade dependencies. + +### Docker + +You can check out all current versions on our [DockerHub](https://hub.docker.com/r/certat/intelmq-full). + +```bash +docker pull certat/intelmq-full:latest + +docker pull certat/intelmq-nginx:latest +``` + +Alternatively you can use `docker-compose`: + +```bash +docker-compose pull +``` + +You can check the current versions from intelmq & intelmq-manager & intelmq-api via git commit ref. + +The Version format for each included item is `key=value` and they are saparated via `,`. I. e. `IntelMQ=ab12cd34f,IntelMQ-API=xy65z23`. + +```bash +docker inspect --format '{{ index .Config.Labels "org.opencontainers.image.version" }}' intelmq-full:latest +``` + +Now restart your container, if you're using docker-compose you simply run: + +```bash +docker-compose down +``` + +If you don't use `docker-compose`, you can restart a single container using: + +```bash +docker ps | grep certat + +docker restart CONTAINER_ID +``` + +### Source repository + +If you have an editable installation, refer to the instructions in the +`/dev/guide`. + +Update the repository depending on your setup (e.g. [git pull origin +master]). + +And run the installation again: + +```bash +pip install . +sudo intelmqsetup +``` + +For editable installations (development only), run [pip install -e +.] instead. + +## 4. Upgrade configuration and check the installation + +Go through +[NEWS.md](https://github.com/certtools/intelmq/blob/develop/NEWS.md) and +apply necessary adaptions to your setup. If you have adapted IntelMQ's +code, also read the +[CHANGELOG.md](https://github.com/certtools/intelmq/blob/develop/CHANGELOG.md). + +Check your installation and configuration to detect any problems: + +```bash +intelmqctl upgrade-config +intelmqctl check +``` + +`intelmqctl upgrade-config` supports upgrades from one IntelMQ version +to the succeeding. If you skip one or more IntelMQ versions, some +automatic upgrades *may not* work and manual intervention *may* be +necessary. + +## 5. Start IntelMQ + +```bash +intelmqctl start +``` diff --git a/docs/admin/utilities/bash-completion.md b/docs/admin/utilities/bash-completion.md new file mode 100644 index 000000000..1a5a71fa4 --- /dev/null +++ b/docs/admin/utilities/bash-completion.md @@ -0,0 +1,11 @@ + + +# Bash Completion + +To enable bash completion on `intelmqctl` and `intelmqdump` in order to help you run the commands in an easy manner, +follow the installation process +[here](https://github.com/certtools/intelmq/blob/develop/contrib/bash-completion/README.md). + diff --git a/docs/autogen.py b/docs/autogen.py deleted file mode 100644 index 5cc4ae136..000000000 --- a/docs/autogen.py +++ /dev/null @@ -1,150 +0,0 @@ -# SPDX-FileCopyrightText: 2020 Sebastian Wagner -# -# SPDX-License-Identifier: AGPL-3.0-or-later - -# This script generates the files -# `guides/Harmonization-fields.md` -# and `guides/Feeds.md` - -import codecs -import json -import os.path -import textwrap - -from ruamel.yaml import YAML - -import intelmq.lib.harmonization - - -yaml = YAML(typ="safe", pure=True) - - -HEADER = """######################### -Harmonization field names -######################### - -=========================== =================================== ============================= =========== -Section Name Type Description -=========================== =================================== ============================= =========== -""" -HEADER_1 = """ -=========================== =================================== ============================= =========== - -Harmonization types -------------------- - -""" -BASEDIR = os.path.join(os.path.dirname(__file__), '../') - - -def harm_docs(): - output = HEADER - - with codecs.open(os.path.join(BASEDIR, 'intelmq/etc/harmonization.conf'), encoding='utf-8') as fhandle: - HARM = json.load(fhandle)['event'] - - for key, value in sorted(HARM.items()): - section = ' '.join([sec.title() for sec in key.split('.')[:-1]]) - output += '{:27} {:35} {:29} {}\n'.format('|' if not section else section, # needed for GitHub - key, - ':ref:`'+value['type'].lower()+'`', - value['description']) - - output += HEADER_1 - - for value in sorted(dir(intelmq.lib.harmonization)): - if value == 'GenericType' or value.startswith('__'): - continue - obj = getattr(intelmq.lib.harmonization, value) - try: - if issubclass(obj, intelmq.lib.harmonization.GenericType): - doc = getattr(obj, '__doc__', '') - if doc is None: - doc = '' - else: - doc = textwrap.dedent(doc) - output += ".. _{}:\n\n{}\n{}\n{}\n\n".format(value.lower(),value,'-'*len(value),doc) - except TypeError: - pass - - return output - - -def info(key, value=""): - return (f"* **{key.title()}:** {value}\n").strip() + '\n' - - -def feeds_docs(): - with codecs.open(os.path.join(BASEDIR, 'intelmq/etc/feeds.yaml'), encoding='utf-8') as fhandle: - config = yaml.load(fhandle.read()) - - output = """Data Feeds -========== - -The available feeds are grouped by the provider of the feeds. -For each feed the collector and parser that can be used is documented as well as any feed-specific parameters. -To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then rebuild the documentation. - -.. contents :: - -""" - - for provider, feeds in sorted(config['providers'].items(), key=lambda x: x[0]): - - output += f"{provider}\n" - output += "-"*len(provider) + "\n" - - for feed, feed_info in sorted(feeds.items(), key=lambda x: x[0]): - - output += f"{feed}\n" - output += "^"*len(feed) + "\n\n" - - output += info("public", "yes") if feed_info.get('public') else info("public", "no") - - output += info("revision", feed_info['revision']) - - if feed_info.get('documentation') is not None: - output += info("documentation", feed_info['documentation']) - - output += info("description", feed_info['description']) - - if feed_info.get('additional_information') is not None: - output += info("additional information", feed_info['additional_information']) - - output += '\n' - - for bot, bot_info in sorted(feed_info['bots'].items(), key=lambda x: x[0]): - - output += "**%s**\n\n" % bot.title() - - output += info("Module", bot_info['module']) - output += info("Configuration Parameters") - - if bot_info.get('parameters'): - for key, value in sorted(bot_info['parameters'].items(), key=lambda x: x[0]): - - if value == "__FEED__": - value = feed - - if value == "__PROVIDER__": - value = provider - - # format non-empty lists with double-quotes - # single quotes are not conform JSON and not correctly detected/transformed by the manager - if isinstance(value, (list, tuple)) and value: - value = json.dumps(value) - - output += f" * ``{key}``: ``{value}``\n" - - output += '\n' - - output += '\n' - - return output - - -if __name__ == '__main__': # pragma: no cover - with codecs.open('dev/harmonization-fields.rst', 'w', encoding='utf-8') as handle: - handle.write(harm_docs()) - with codecs.open('user/feeds.rst', 'w', encoding='utf-8') as handle: - handle.write(feeds_docs()) diff --git a/docs/changelog.md b/docs/changelog.md new file mode 120000 index 000000000..04c99a55c --- /dev/null +++ b/docs/changelog.md @@ -0,0 +1 @@ +../CHANGELOG.md \ No newline at end of file diff --git a/docs/community.md b/docs/community.md new file mode 100644 index 000000000..1e2750191 --- /dev/null +++ b/docs/community.md @@ -0,0 +1,43 @@ + + +# IntelMQ Organizational Structure + +The central IntelMQ components are maintained by multiple people and organizations in the IntelMQ community. +Please note that some components of the [IntelMQ Universe](overview.md) can have a different project governance, but all are part of the IntelMQ universe and community. + +## IntelMQ Enhancement Proposals (IEP) + +Major changes, including architecture, strategy and the internal data format, require so-called IEPs, IntelMQ Enhancement Proposals. Their name is based on the famous ["PEPs" of Python](https://peps.python.org/). + +IEPs are collected in the separate [IEP Repository](https://github.com/certtools/ieps/). + +## Code-Reviews and Merging + +Every line of code checked in for the IntelMQ Core, is checked by at least one trusted developer (excluding the author of the changes) of the IntelMQ community. Afterwards, the code can be merged. Currently, these three contributors, have the permission to push and merging code to IntelMQ Core, Manager and API: + +- Aaron Kaplan ([aaronkaplan](https://github.com/aaronkaplan)) +- Sebastian Wagner ([sebix](https://github.com/sebix)) +- Sebastian Waldbauer ([waldbauer-certat](https://github.com/waldbauer-certat)) + +Additionally, these people significantly contributed to IntelMQ: + +- Bernhard Reiter +- Birger Schacht +- Edvard Rejthar +- Filip Pokorný +- Karl-Johan Karlsson +- Marius Karotkis +- Marius Urkus +- Mikk Margus Möll +- navtej +- Pavel Kácha +- Robert Šefr +- Tomas Bellus +- Zach Stone + +## Short history + +The idea and overall concept of an free, simple and extendible software for automated incident handling was born at an meeting of several European CSIRTs in Heraklion, Greece, in 2014. Following the event, [Tomás Lima "SYNchroACK"](https://github.com/synchroack) (working at CERT.pt back then) created IntelMQ from scratch. IntelMQ was born on June 24th, 2014. A major support came from CERT.pt at this early stage. Aaron Kaplan (CERT.at until 2020) engaged in the long-term advancement and from 2015 on, CERT.at took the burden of the maintenance and development (Sebastian Wagner 2015-2021 at CERT.at). From 2016 onward, CERT.at started projects, initiated and lead by Aaron Kaplan, receiving CEFF-funding from the European Union to support IntelMQ's development. IntelMQ became a software component of the EU-funded MeliCERTes framework for CSIRTs. In 2020, IntelMQ's organizational structure and architectural development gained new thrive by the newly founded Board and the start of the IEP process, creating more structure and more transparency in the IntelMQ community's decisions. diff --git a/docs/conf.py b/docs/conf.py deleted file mode 100644 index 071c7319d..000000000 --- a/docs/conf.py +++ /dev/null @@ -1,140 +0,0 @@ -# SPDX-FileCopyrightText: 2020 Birger Schacht -# -# SPDX-License-Identifier: AGPL-3.0-or-later -# Configuration file for the Sphinx documentation builder. -# -# This file only contains a selection of the most common options. For a full -# list see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -import datetime -import codecs -import os -import subprocess -import sys -from sphinx.domains import Domain - -sys.path.insert(0, os.path.abspath('../')) -sys.path.insert(0, os.path.abspath('./')) - -import autogen - -# -- Project information ----------------------------------------------------- - -year = datetime.date.today().year -exec(open(os.path.join(os.path.dirname(__file__), '../intelmq/version.py')).read()) # defines __version__ - -project = 'intelmq' -copyright = f'{year}, IntelMQ community' -author = 'IntelMQ Community' -# for compatibility with Sphinx < 2.0 as the old versions default to 'contents' -master_doc = 'index' - -# The full version, including alpha/beta/rc tags -release = __version__ - -rst_prolog = """ -.. |intelmq-users-list-link| replace:: `IntelMQ Users Mailinglist `__ -.. |intelmq-developers-list-link| replace:: `IntelMQ Developers Mailinglist `__ -.. |intelmq-manager-github-link| replace:: `IntelMQ Manager `__ -""" -# -- General configuration --------------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.extlinks', - 'sphinx.ext.napoleon' -] - -# Napoleon settings -# based on https://github.com/certtools/intelmq/issues/910 -#napoleon_google_docstring = True -napoleon_numpy_docstring = False -napoleon_include_init_with_doc = True -napoleon_include_private_with_doc = True -#napoleon_include_special_with_doc = True -#napoleon_use_admonition_for_examples = False -#napoleon_use_admonition_for_notes = False -#napoleon_use_admonition_for_references = False -#napoleon_use_ivar = False -#napoleon_use_param = True -#napoleon_use_rtype = True - - -extlinks = {'issue': ('https://github.com/certtools/intelmq/issues/%s', 'issue ')} - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'source/intelmq.tests.*'] - - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = 'alabaster' - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] - -html_theme_options = { - 'logo': 'Logo_Intel_MQ.svg', - 'github_user': 'certtools', - 'github_repo': 'intelmq', - 'font_family': "'Open Sans', sans-serif", - 'description': 'IntelMQ is a solution for IT security teams for collecting and processing security feeds using a message queuing protocol.', - 'show_powered_by': False, - } - -def run_apidoc(_): - subprocess.check_call("sphinx-apidoc --implicit-namespaces -o source ../intelmq", shell=True) - - -def run_autogen(_): - with codecs.open('dev/harmonization-fields.rst', 'w', encoding='utf-8') as handle: - handle.write(autogen.harm_docs()) - with codecs.open('user/feeds.rst', 'w', encoding='utf-8') as handle: - handle.write(autogen.feeds_docs()) - - -def setup(app): - app.connect("builder-inited", run_apidoc) - app.connect("builder-inited", run_autogen) - - - -import sphinx -if sphinx.__version__ == '5.1.0': - # see https://github.com/sphinx-doc/sphinx/issues/10701 - # workaround copied from https://github.com/datalad/datalad/pull/6883 - - # Although crash happens within NumpyDocstring, it is subclass of GoogleDocstring - # so we need to overload method there - from sphinx.ext.napoleon.docstring import GoogleDocstring - from functools import wraps - - - @wraps(GoogleDocstring._consume_inline_attribute) - def _consume_inline_attribute_safe(self): - try: - return self._consume_inline_attribute_safe() - except: - return "", [] - - GoogleDocstring._consume_inline_attribute = _consume_inline_attribute_safe diff --git a/docs/dev/adding-feeds.md b/docs/dev/adding-feeds.md new file mode 100644 index 000000000..78c0056f7 --- /dev/null +++ b/docs/dev/adding-feeds.md @@ -0,0 +1,129 @@ + + + +# Adding Feeds + +Adding a feed doesn't necessarily require any programming experience. There are several collector and parser bots intended for general use. Depending on the data source you are trying to add as a feed, it might be only a matter of creating a working combination of collector bot (such as URL Fetcher) configuration and a parser bot (such as CSV parser) configuration. When you are satisfied with the configurations, add it to the `intelmq/etc/feeds.yaml` file using the following template and open a [pull request](https://github.com/certtools/intelmq/pulls)! + +```yaml +: + : + description: + additional_information: + documentation: + revision: + public: + bots: + collector: + module: + parameters: + name: __FEED__ # KEEP AS IT IS + provider: __PROVIDER__ # KEEP AS IT IS + + parser: + module: + parameters: + +``` + +If the data source utilizes some unusual way of distribution or uses a custom format for the data it might be necessary to develop specialized bot(s) for this particular data source. Always try to use existing bots before you start developing your own. Please also consider extending an existing bot if your use-case is close enough to it's features. If you are unsure which way to take, start an [issue](https://github.com/certtools/intelmq/issues) and you will receive guidance. + +## Feeds Wishlist + +This is a list with potentially interesting data sources, which are either currently not supported or the usage is not clearly documented in IntelMQ. If you want to **contribute** new feeds to IntelMQ, this is a great place to start! + +!!! note + Some of the following data sources might better serve as an expert bot for enriching processed events. + +- Lists of feeds: + - [threatfeeds.io](https://threatfeeds.io) + - [TheCyberThreat](http://thecyberthreat.com/cyber-threat-intelligence-feeds/) + - [sbilly: Awesome Security](https://github.com/sbilly/awesome-security#threat-intelligence) + - [pannoniait:Backlists](https://doku.pannoniait.at/doku.php?id=know-how:blacklists) + - [hslatman:awesome-threat-intelligence](https://github.com/hslatman/awesome-threat-intelligence) + - [Zeek Intelligence Feeds](https://github.com/CriticalPathSecurity/Zeek-Intelligence-Feeds) + - [imuledx OSING feeds](https://github.com/imuledx/OSINT_sources) +- Some third party intelmq bots: [NRDCS IntelMQ fork](https://github.com/NRDCS/intelmq/tree/certlt/intelmq/bots) +- List of potentially interesting data sources: + - [Abuse.ch SSL Blacklists](https://sslbl.abuse.ch/blacklist/) + - [AbuseIPDB](https://www.abuseipdb.com/pricing) + - [Adblock Plus](https://adblockplus.org/en/subscriptions) + - [apivoid IP Reputation API](https://www.apivoid.com/api/ip-reputation/) + - [Anomali Limo Free Intel Feed](https://www.anomali.com/resources/limo) + - [APWG's ecrimex](https://www.ecrimex.net) + - [Avast Threat Intel IoCs of dark matter repository](https://github.com/avast/ioc) + - [Berkeley](https://security.berkeley.edu/aggressive_ips/ips) + - [Binary Defense](https://www.binarydefense.com/) + - [Bot Invaders Realtime tracker](http://www.marc-blanchard.com/BotInvaders/index.php) + - [Botherder Targetedthreats](https://github.com/botherder/targetedthreats/) + - [Botscout Last Caught](http://botscout.com/last_caught_cache.htm) + - [botvrij](https://www.botvrij.eu/) + - [Carbon Black Feeds](https://github.com/carbonblack/cbfeeds) + - [CERT.pl Phishing Warning List](http://hole.cert.pl/domains/) + - [Chaos Reigns](http://www.chaosreigns.com/spam/) + - [Critical Stack](https://intel.criticalstack.com) + - [Cruzit](http://www.cruzit.com/xwbl2txt.php) + - [Cyber Crime Tracker](http://cybercrime-tracker.net/all.php) + - [drb-ra C2IntelFeeds](https://github.com/drb-ra/C2IntelFeeds) + - [DNS DB API](https://api.dnsdb.info) + - [ESET Malware Indicators of Compromise](https://github.com/eset/malware-ioc) + - [Facebook Threat Exchange](https://developers.facebook.com/docs/threat-exchange) + - [FilterLists](https://filterlists.com) + - [Firehol IPLists](https://iplists.firehol.org/) + - [Google Webmaster Alerts](https://www.google.com/webmasters/) + - [GPF Comics DNS Blacklist](https://www.gpf-comics.com/dnsbl/export.php) + - [Greensnow](https://blocklist.greensnow.co/greensnow.txt) + - [Greynoise](https://developer.greynoise.io/reference/community-api) + - [HP Feeds](https://github.com/rep/hpfeeds) + - [IBM X-Force Exchange](https://exchange.xforce.ibmcloud.com/) + - [ImproWare AntiSpam](https://antispam.imp.ch/) + - [ISightPartners](http://www.isightpartners.com/) + - [James Brine](https://jamesbrine.com.au/) + - [Joewein](http://www.joewein.net) + - Maltrail: + - [Malware](https://github.com/stamparm/maltrail/tree/master/trails/static/images/malware) + - [Suspicious](https://github.com/stamparm/maltrail/tree/master/trails/static/images/suspicious) + - [Mass Scanners](https://github.com/stamparm/maltrail/blob/master/trails/static/images/mass_scanner.txt) + (for whitelisting) + - [Malshare](https://malshare.com/) + - [MalSilo Malware URLs](https://malsilo.gitlab.io/feeds/dumps/url_list.txt) + - [Malware Config](http://malwareconfig.com) + - [Malware DB (cert.pl)](https://mwdb.cert.pl/) + - [MalwareInt](http://malwareint.com) + - [Malware Must Die](https://malwared.malwaremustdie.org/rss.php) + - [Manity Spam IP addresses](http://www.dnsbl.manitu.net/download/nixspam-ip.dump.gz) + - [Marc Blanchard DGA Domains](http://www.marc-blanchard.com/BotInvaders/index.php) + - [MaxMind Proxies](https://www.maxmind.com/en/anonymous_proxies) + - [mIRC Servers](http://www.mirc.com/servers.ini) + - [MISP Warning Lists](https://github.com/MISP/misp-warninglists) + - [Monzymerza](https://github.com/monzymerza/parthenon) + - [Multiproxy](http://multiproxy.org/txt_all/proxy.txt) + - [Neo23x0 signature-base](https://github.com/Neo23x0/signature-base/tree/master/iocs) + - [OpenBugBounty](https://www.openbugbounty.org/) + - [Phishing Army](https://phishing.army/) + - [Phishstats](https://phishstats.info/) (offers JSON API and CSV download) + - [Project Honeypot (#284)](http://www.projecthoneypot.org/list_of_ips.php?rss=1) + - [RST Threat Feed](https://rstcloud.net/) (offers a free and a commercial feed) + - [SANS ISC](https://isc.sans.edu/api/) + - [ShadowServer Sandbox API](http://www.shadowserver.org/wiki/pmwiki.php/Services/Sandboxapi) + - [Shodan search API](https://shodan.readthedocs.io/en/latest/tutorial.html#searching-shodan) + - [Snort](http://labs.snort.org/feeds/ip-filter.blf) + - [stopforumspam Toxic IP addresses and domains](https://www.stopforumspam.com/downloads) + - [Spamhaus Botnet Controller List](https://www.spamhaus.org/bcl/) + - [SteveBlack Hosts File](https://github.com/StevenBlack/hosts) + - [The Haleys](http://charles.the-haleys.org/ssh_dico_attack_hdeny_format.php/hostsdeny.txt) + - [Threat Crowd](https://www.threatcrowd.org/feeds/hashes.txt) + - [Threat Grid](http://www.threatgrid.com/) + - [Threatstream](https://ui.threatstream.com/) + - [TotalHash](http://totalhash.com) + - [UCE Protect](http://wget-mirrors.uceprotect.net/) + - [Unit 42 Public Report IOCs](https://github.com/pan-unit42/iocs) + - [URI BL](http://rss.uribl.com/index.shtml) + - [urlscan.io](https://urlscan.io/products/phishingfeed/) + - [Virustotal](https://www.virustotal.com/gui/home/search) + - [virustream](https://github.com/ntddk/virustream) + - [VoIP Blacklist](http://www.voipbl.org/update/) + - [YourCMC](http://vmx.yourcmc.ru/BAD_HOSTS.IP4) diff --git a/docs/dev/bot-development.md b/docs/dev/bot-development.md new file mode 100644 index 000000000..9a452ec66 --- /dev/null +++ b/docs/dev/bot-development.md @@ -0,0 +1,507 @@ + + + +# Bot Development + +Here you should find everything you need to develop a new bot. + +## Steps TODO + +1. create appropriately placed and named python file +2. use correct parent class +3. code the functionality u want (with mixins, inheritance, etc) +4. create appropriately placed test file +5. prepare code for testing your bot +6. add documentation for your bot +7. add changelog and news info + +## Developing new parser bot + +There's a dummy bot including tests at `intelmq/tests/lib/test_parser_bot.py`. + +TODO message vs event vs report + +## Layout Rules + +``` +intelmq/ + lib/ + bot.py + cache.py + message.py + pipeline.py + utils.py + bots/ + collector/ + / + collector.py + parser/ + / + parser.py + expert/ + / + expert.py + output/ + / + output.py + etc/ + runtime.yaml +``` + +Assuming you want to create a bot for a new 'Abuse.ch' feed. It turns out that here it is necessary to create different +parsers for the respective kind of events (e.g. malicious URLs). Therefore, the usual hierarchy ' +intelmq/bots/parser//parser.py' would not be suitable because it is necessary to have more parsers for each +Abuse.ch Feed. The solution is to use the same hierarchy with an additional "description" +in the file name, separated by underscore. Also see the section +*Directories and Files naming*. + +Example (including the current ones): + +``` +/intelmq/bots/parser/abusech/parser_domain.py +/intelmq/bots/parser/abusech/parser_ip.py +/intelmq/bots/parser/abusech/parser_ransomware.py +/intelmq/bots/parser/abusech/parser_malicious_url.py +``` + +#### Documentation + +Please document your added/modified code. + +For doc strings, we are using the +[sphinx-napoleon-google-type-annotation](http://www.sphinx-doc.org/en/stable/ext/napoleon.html#type-annotations). + +Additionally, Python's type hints/annotations are used, see +PEP484. + +#### Directories Hierarchy on Default Installation + +- Configuration Files Path: `/opt/intelmq/etc/` +- PID Files Path: `/opt/intelmq/var/run/` +- Logs Files and dumps Path: `/opt/intelmq/var/log/` +- Additional Bot Files Path, e.g. templates or databases: + `/opt/intelmq/var/lib/bots/[bot-name]/` + +#### Directories and Files naming + +Any directory and file of IntelMQ has to follow the Directories and Files naming. Any file name or folder name has to: + +- be represented with lowercase and in case of the name has multiple words, the spaces between them must be removed or + replaced by underscores +- be self-explaining what the content contains. + +In the bot directories name, the name must correspond to the feed provider. If necessary and applicable the feed name +can and should be used as postfix for the filename. + +Examples: + +``` +intelmq/bots/parser/taichung/parser.py +intelmq/bots/parser/cymru/parser_full_bogons.py +intelmq/bots/parser/abusech/parser_ransomware.py +``` + +#### Class Names + +Class name of the bot (ex: PhishTank Parser) must correspond to the type of the bot (ex: Parser) +e.g. `PhishTankParserBot` + +## Choosing the parent class + +Please use the correct bot type as parent class for your bot. The `intelmq.lib.bot` module contains the following classes: + +- `CollectorBot` +- `ParserBot` +- `ExpertBot` +- `OutputBot` + + +## move this content +You can always start any bot directly from command line by calling the executable. The executable will be created during installation a directory for binaries. After adding new bots to the code, install IntelMQ to get the files created. Don't forget to give an bot id as first argument. Also, running bots with other users than `intelmq` will raise permission errors. + +```bash +$ sudo -i intelmq +$ intelmqctl run file-output # if configured +$ intelmq.bots.outputs.file.output file-output +``` + +You will get all logging outputs directly on stderr as well as in the log file. + +### Template + +Please adjust the doc strings accordingly and remove the in-line comments (`#`). + +```python +""" +SPDX-FileCopyrightText: 2021 Your Name +SPDX-License-Identifier: AGPL-3.0-or-later + +Parse data from example.com, be a nice ExampleParserBot. + +Document possible necessary configurations. +""" +import sys + +# imports for additional libraries and intelmq +from intelmq.lib.bot import ParserBot + + +class ExampleParserBot(ParserBot): + option1: str = "defaultvalue" + option2: bool = False + + def process(self): + report = self.receive_message() + + event = self.new_event(report) # copies feed.name, time.observation + ... # implement the logic here + event.add('source.ip', '127.0.0.1') + event.add('extra', {"os.name": "Linux"}) + if self.option2: + event.add('extra', {"customvalue": self.option1}) + + self.send_message(event) + self.acknowledge_message() + + +BOT = ExampleParserBot +``` + +Any attributes of the bot that are not private can be set by the user using the IntelMQ configuration settings. + +There are some names with special meaning. These can be used i.e. called: + +- `stop`: Shuts the bot down. +- `receive_message` +- `send_message` +- `acknowledge_message`: see next section +- `start`: internal method to run the bot + +These can be defined: + +- `init`: called at startup, use it to set up the bot (initializing classes, loading files etc) +- `process`: processes the messages +- `shutdown`: To Gracefully stop the bot, e.g. terminate connections + +All other names can be used freely. + +### Mixins + +For common settings and methods you can use mixins from +`intelmq.lib.mixins`. To use the mixins, just let your bot inherit from the Mixin class (in addition to the inheritance +from the Bot class). For example: + +```python +class HTTPCollectorBot(CollectorBot, HttpMixin): +``` + +The following mixins are available: + +- `HttpMixin` +- `SqlMixin` +- `CacheMixin` + +The `HttpMixin` provides the HTTP attributes described in `common-parameters` and the following methods: + +- `http_get` takes an URL as argument. Any other arguments get passed to the `request.Session.get` method. `http_get` + returns a + `requests.Response`. +- `http_session` can be used if you ever want to work with the session object directly. It takes no arguments and + returns the bots request.Session. + +The `SqlMixin` provides methods to connect to SQL servers. Inherit this Mixin so that it handles DB connection for you. +You do not have to bother: + +- connecting database in the `self.init()` method, self.cur will be set in the `__init__()` +- catching exceptions, just call `self.execute()` instead of + `self.cur.execute()` +- `self.format_char` will be set to '%s' in PostgreSQL and to '?' in SQLite + +The `CacheMixin` provides methods to cache values for bots in a Redis database. It uses the following attributes: + +- `redis_cache_host: str = "127.0.0.1"` +- `redis_cache_port: int = 6379` +- `redis_cache_db: int = 9` +- `redis_cache_ttl: int = 15` +- `redis_cache_password: Optional[str] = None` + +and provides the methods: + +- `cache_exists` +- `cache_get` +- `cache_set` +- `cache_flush` +- `cache_get_redis_instance` + +### Pipeline Interactions + +We can call three methods related to the pipeline: + +- `self.receive_message()`: The pipeline handler pops one message + from the internal queue if possible. Otherwise one message from + the sources list is popped, and added it to an internal queue. In + case of errors in process handling, the message can still be found + in the internal queue and is not lost. The bot class unravels the + message a creates an instance of the Event or Report class. +- `self.send_message(event, path="_default")`: Processed + message is sent to destination queues. It is possible to change + the destination queues by optional `path` parameter. +- `self.acknowledge_message()`: Message formerly received by + `receive_message` is removed from the internal + queue. This should always be done after processing and after the + sending of the new message. In case of errors, this function is + not called and the message will stay in the internal queue waiting + to be processed again. + +### Logging + +##### Log Messages Format + +Log messages have to be clear and well formatted. The format is the following: + +Format: + +``` + - - - +``` + +Rules: + +- the Log message MUST follow the common rules of a sentence, beginning with uppercase and ending with period. +- the sentence MUST describe the problem or has useful information to give to an inexperienced user a context. Pure stack traces without any further explanation are not helpful. + +When the logger instance is created, the bot id must be given as parameter anyway. The function call defines the log level, see below. + +##### Log Levels + +- *debug*: Debugging information includes retrieved and sent messages, detailed status information. Can include + sensitive information like passwords and amount can be huge. +- *info*: Logs include loaded databases, fetched reports or waiting messages. +- *warning*: Unexpected, but handled behavior. +- *error*: Errors and Exceptions. +- *critical* Program is failing. + +##### What to Log + +- Try to keep a balance between obscuring the source code file with hundreds of log messages and having too little log + messages. +- In general, a bot MUST report error conditions. + +##### How to Log + +The Bot class creates a logger with that should be used by bots. Other components won't log anyway currently. Examples: + +```python +self.logger.info('Bot start processing.') +self.logger.error('Pipeline failed.') +self.logger.exception('Pipeline failed.') +``` + +The `exception` method automatically appends an exception traceback. The logger instance writes by default to the file +`/opt/intelmq/var/log/[bot-id].log` and to stderr. + +###### String formatting in Logs + +Parameters for string formatting are better passed as argument to the log function, see + In case of formatting problems, the error messages +will be better. For example: + +```python +self.logger.debug('Connecting to %r.', host) +``` + +### Error handling + +The bot class itself has error handling implemented. The bot itself is allowed to throw exceptions and **intended to fail**! The bot should fail in case of malicious messages, and in case of unavailable but necessary resources. The bot class handles the exception and will restart until the maximum number of tries is reached and fail then. Additionally, the message in question is dumped to the file `/opt/intelmq/var/log/[bot-id].dump` and removed from the queue. + +### Initialization + +Maybe it is necessary so setup a Cache instance or load a file into memory. Use the `init` function for this purpose: + +```python +class ExampleParserBot(Bot): + def init(self): + try: + self.database = pyasn.pyasn(self.database) + except IOError: + self.logger.error("pyasn data file does not exist or could not be " + "accessed in '%s'." % self.database) + self.logger.error("Read 'bots/experts/asn_lookup/README.md' and " + "follow the procedure.") + self.stop() +``` + +### Custom configuration checks + +Every bot can define a static method `check(parameters)` which will be called by `intelmqctl check`. For example the check function of the ASNLookupExpert: + +```python +@staticmethod +def check(parameters): + if not os.path.exists(parameters.get('database', '')): + return [["error", "File given as parameter 'database' does not exist."]] + try: + pyasn.pyasn(parameters['database']) + except Exception as exc: + return [["error", "Error reading database: %r." % exc]] +``` + +### Examples + +- Check [Expert Bots](https://github.com/certtools/intelmq/tree/develop/intelmq/bots/experts) +- Check [Parser Bots](https://github.com/certtools/intelmq/tree/develop/intelmq/bots/parsers) + +### Parsers + +Parsers can use a different, specialized Bot-class. It allows to work on individual elements of a report, splitting the functionality of the parser into multiple functions: + +- `process`: getting and sending data, handling of failures etc. +- `parse`: Parses the report and splits it into single elements (e.g. lines). Can be overridden. +- `parse_line`: Parses elements, returns an Event. Can be overridden. +- `recover_line`: In case of failures and for the field `raw`, this function recovers a fully functional report containing only one element. Can be overridden. + +For common cases, like CSV, existing function can be used, reducing the amount of code to implement. In the best case, only `parse_line` needs to be coded, as only this part interprets the data. + +You can have a look at the implementation `intelmq/lib/bot.py` or at examples, e.g. the DummyBot in `intelmq/tests/lib/test_parser_bot.py`. This is a stub for creating a new Parser, showing the parameters and possible code: + +```python +class MyParserBot(ParserBot): + + def parse(self, report): + """A generator yielding the single elements of the data. + + Comments, headers etc. can be processed here. Data needed by + `self.parse_line` can be saved in `self.tempdata` (list). + + Default parser yields stripped lines. + Override for your use or use an existing parser, e.g.: + parse = ParserBot.parse_csv + """ + for line in utils.base64_decode(report.get("raw")).splitlines(): + yield line.strip() + + def parse_line(self, line, report): + """A generator which can yield one or more messages contained in line. + + Report has the full message, thus you can access some metadata. + Override for your use. + """ + raise NotImplementedError + + def process(self): + self.tempdata = [] # temporary data for parse, parse_line and recover_line + self.__failed = [] + report = self.receive_message() + + for line in self.parse(report): + if not line: + continue + try: + # filter out None + events = list(filter(bool, self.parse_line(line, report))) + except Exception as exc: + self.logger.exception('Failed to parse line.') + self.__failed.append((exc, line)) + else: + self.send_message(*events) + + for exc, line in self.__failed: + self._dump_message(exc, self.recover_line(line)) + + self.acknowledge_message() + + def recover_line(self, line): + """Reverse of parse for single lines. + + Recovers a fully functional report with only the problematic line. + """ + return 'n'.join(self.tempdata + [line]) + + +BOT = MyParserBot +``` + +##### parse_line + +One line can lead to multiple events, thus `parse_line` can't just return one Event. Thus, this function is a generator, which allows to easily return multiple values. Use `yield event` for valid Events and `return` in case of a void result (not parsable line, invalid data etc.). + +### Tests + +In order to do automated tests on the bot, it is necessary to write tests including sample data. Have a look at some existing tests: + +- The DummyParserBot in `intelmq/tests/lib/test_parser_bot.py`. This test has the example data (report and event) inside the file, defined as dictionary. +- The parser for malwaregroup at `intelmq/tests/bots/parsers/malwaregroup/test_parser_*.py`. The latter loads a sample HTML file from the same directory, which is the raw report. +- The test for ASNLookupExpertBot has two event tests, one is an expected fail (IPv6). + +Ideally an example contains not only the ideal case which should succeed, but also a case where should fail instead. (TODO: Implement assertEventNotEqual or assertEventNotcontainsSubset or similar) Most existing bots are only tested with one message. For newly written test it is appreciable to have tests including more then one message, e.g. a parser fed with an report consisting of multiple events. + +```python +import unittest + +import intelmq.lib.test as test +from intelmq.bots.parsers.exampleparser.parser import ExampleParserBot # adjust bot class name and module + + +class TestExampleParserBot(test.BotTestCase, unittest.TestCase): # adjust test class name + """A TestCase for ExampleParserBot.""" + + @classmethod + def set_bot(cls): + cls.bot_reference = ExampleParserBot # adjust bot class name + cls.default_input_message = EXAMPLE_EVENT # adjust source of the example event (dict), by default an empty event or report (depending on bot type) + + # This is an example how to test the log output + def test_log_test_line(self): + """Test if bot does log example message.""" + self.run_bot() + self.assertRegexpMatches(self.loglines_buffer, + "INFO - Lorem ipsum dolor sit amet") + + def test_event(self): + """Test if correct Event has been produced.""" + self.run_bot() + self.assertMessageEqual(0, EXAMPLE_REPORT) + + +if __name__ == '__main__': # pragma: no cover + unittest.main() +``` + +When calling the file directly, only the tests in this file for the bot will be expected. Some default tests are always executed (via the `test.BotTestCase` class), such as pipeline and message checks, logging, bot naming or empty message handling. + +See the `testing` section about how to run the tests. + +### Cache + +Bots can use a Redis database as cache instance. Use the `intelmq.lib.utils.Cache` class to set this up and/or look at existing bots, like the `cymru_whois` expert how the cache can be used. Bots must set a TTL for all keys that are cached to avoid caches growing endless over time. Bots must use the Redis databases >= 10, but not those already used by other bots. Look at `find intelmq -type f -name '*.py' -exec grep -r 'redis_cache_db' {} +` to see which databases are already used. + +The databases < 10 are reserved for the IntelMQ core: + +- 2: pipeline +- 3: statistics +- 4: tests + + +## Testing Pre-releases + + +### Installation + +The installation procedures need to be adapted only a little bit. + +For native packages, you can find the unstable packages of the next version +here: [Installation Unstable Native Packages](https://software.opensuse.org/download.html?project=home%3Asebix%3Aintelmq%3Aunstable&package=intelmq) +. The unstable only has a limited set of packages, so enabling the stable repository can be activated in parallel. For +CentOS 8 unstable, the stable repository is required. + +For the installation with pip, use the `--pre` parameter as shown here following command: + +```bash +pip3 install --pre intelmq +``` + +All other steps are not different. Please report any issues you find in +our [Issue Tracker](https://github.com/certtools/intelmq/issues/new). diff --git a/docs/dev/data-format.md b/docs/dev/data-format.md new file mode 100644 index 000000000..e5b183347 --- /dev/null +++ b/docs/dev/data-format.md @@ -0,0 +1,286 @@ + + + +# Data Format + +Data passed between bots is called a Message. There are two types of Messages: Report and Event. Report is produced by collector bots and consists of collected raw data (CSV, JSON, HTML, etc) and feed metadata. It is passed to a parser bot which parses Report into a single or multiple Events. Expert bots and output bots handle only Events. + +All Messages (Reports and Events) are Python dictionaries (or JSONs). The key names and according types are defined by the IntelMQ Data Format. + +The source code for the Data Format can be found in the Python module `intelmq.lib.harmonization` and the configuration is present inside the `harmonization.conf` file. (The term Harmonization is used for historical reasons.) + +## Rules for keys + +The keys are grouped together in sub-fields, e.g. `source.ip` or `source.geolocation.latitude`. + +Only the lower-case alphabet, numbers and the underscore are allowed. Further, the field name must not begin with a +number. Thus, keys must match `^[a-z_][a-z_0-9]+(\.[a-z_0-9]+)*$`. These rules also apply for the otherwise +unregulated `extra.` namespace. + +## Data Types + +This document describes the IntelMQ data types used for individual events with a description of each allowed field. + +### ASN + +ASN type. Derived from Integer with forbidden values. + +Only valid are: 0 < ASN <= 4294967295 + +See + +The first and last ASNs of the original 16-bit integers, namely 0 and 65,535, and the last ASN of the 32-bit numbers, namely 4,294,967,295 are reserved and should not be used by operators. + + +### Accuracy + +Accuracy type. A Float between 0 and 100. + + +### Base64 + +Base64 type. Always gives unicode strings. + +Sanitation encodes to base64 and accepts binary and unicode strings. + + +### Boolean + +Boolean type. Without sanitation only python bool is accepted. + +Sanitation accepts string 'true' and 'false' and integers 0 and 1. + + +### ClassificationTaxonomy + +`classification.taxonomy` type. + +The mapping follows Reference Security Incident Taxonomy Working Group – RSIT WG: + + +These old values are automatically mapped to the new ones: + +- 'abusive content' -> 'abusive-content' +- 'information gathering' -> 'information-gathering' +- 'intrusion attempts' -> 'intrusion-attempts' +- 'malicious code' -> 'malicious-code' + +Allowed values are: + +- abusive-content +- availability +- fraud +- information-content-security +- information-gathering +- intrusion-attempts +- intrusions +- malicious-code +- other +- test +- vulnerable + +### ClassificationType + +`classification.type` type. + +The mapping extends Reference Security Incident Taxonomy Working Group – RSIT WG: + + + +These old values are automatically mapped to the new ones: + +- 'botnet drone' -> 'infected-system' +- 'ids alert' -> 'ids-alert' +- 'c&c' -> 'c2-server' +- 'c2server' -> 'c2-server' +- 'infected system' -> 'infected-system' +- 'malware configuration' -> 'malware-configuration' +- 'Unauthorised-information-access' -> 'unauthorised-information-access' +- 'leak' -> 'data-leak' +- 'vulnerable client' -> 'vulnerable-system' +- 'vulnerable service' -> 'vulnerable-system' +- 'ransomware' -> 'infected-system' +- 'unknown' -> 'undetermined' + +These values changed their taxonomy: +'malware': In terms of the taxonomy 'malicious-code' they can be either 'infected-system' or 'malware-distribution' but in terms of malware actually, it is now taxonomy 'other' + +Allowed values are: + +- application-compromise +- blacklist +- brute-force +- burglary +- c2-server +- copyright +- data-leak +- data-loss +- ddos +- ddos-amplifier +- dga-domain +- dos +- exploit +- harmful-speech +- ids-alert +- infected-system +- information-disclosure +- malware +- malware-configuration +- malware-distribution +- masquerade +- misconfiguration +- other +- outage +- phishing +- potentially-unwanted-accessible +- privileged-account-compromise +- proxy +- sabotage +- scanner +- sniffing +- social-engineering +- spam +- system-compromise +- test +- tor +- unauthorised-information-access +- unauthorised-information-modification +- unauthorized-use-of-resources +- undetermined +- unprivileged-account-compromise +- violence +- vulnerable-system +- weak-crypto + +### DateTime + +Date and time type for timestamps. + +Valid values are timestamps with time zone and in the format '%Y-%m-%dT%H:%M:%S+00:00'. +Invalid are missing times and missing timezone information (UTC). +Microseconds are also allowed. + +Sanitation normalizes the timezone to UTC, which is the only allowed timezone. + +The following additional conversions are available with the convert function: + +- `timestamp` +- `windows_nt`: From Windows NT / AD / LDAP +- `epoch_millis`: From Milliseconds since Epoch +- `from_format`: From a given format, eg. 'from_format|%H %M %S %m %d %Y %Z' +- `from_format_midnight`: Date from a given format and assume midnight, e.g. 'from_format_midnight|%d-%m-%Y' +- `utc_isoformat`: Parse date generated by datetime.isoformat() +- `fuzzy` (or None): Use dateutils' fuzzy parser, default if no specific parser is given + + +### FQDN + +Fully qualified domain name type. + +All valid lowercase domains are accepted, no IP addresses or URLs. Trailing +dot is not allowed. + +To prevent values like '10.0.0.1:8080' (#1235), we check for the +non-existence of ':'. + + +### Float + +Float type. Without sanitation only python float/integer/long is +accepted. Boolean is explicitly denied. + +Sanitation accepts strings and everything float() accepts. + + +### IPAddress + +Type for IP addresses, all families. Uses the ipaddress module. + +Sanitation accepts integers, strings and objects of ipaddress.IPv4Address and ipaddress.IPv6Address. + +Valid values are only strings. 0.0.0.0 is explicitly not allowed. + + +### IPNetwork + +Type for IP networks, all families. Uses the ipaddress module. + +Sanitation accepts strings and objects of ipaddress.IPv4Network and ipaddress.IPv6Network. +If host bits in strings are set, they will be ignored (e.g 127.0.0.1/32). + +Valid values are only strings. + + +### Integer + +Integer type. Without sanitation only python integer/long is accepted. +Bool is explicitly denied. + +Sanitation accepts strings and everything int() accepts. + + +### JSON + +JSON type. + +Sanitation accepts any valid JSON objects. + +Valid values are only unicode strings with JSON objects. + + +### JSONDict + +JSONDict type. + +Sanitation accepts pythons dictionaries and JSON strings. + +Valid values are only unicode strings with JSON dictionaries. + + +### LowercaseString + +Like string, but only allows lower case characters. + +Sanitation lowers all characters. + + +### Registry + +Registry type. Derived from UppercaseString. + +Only valid values: AFRINIC, APNIC, ARIN, LACNIC, RIPE. +RIPE-NCC and RIPENCC are normalized to RIPE. + + +### String + +Any non-empty string without leading or trailing whitespace. + + +### TLP + +TLP level type. Derived from UppercaseString. + +Only valid values: WHITE, GREEN, AMBER, RED. + +Accepted for sanitation are different cases and the prefix 'tlp:'. + + +### URL + +URI type. Local and remote. + +Sanitation converts hxxp and hxxps to http and https. +For local URIs (file) a missing host is replaced by localhost. + +Valid values must have the host (network location part). + + +### UppercaseString + +Like string, but only allows upper case characters. + +Sanitation uppers all characters. diff --git a/docs/dev/data-format.rst b/docs/dev/data-format.rst deleted file mode 100644 index 0b36a2912..000000000 --- a/docs/dev/data-format.rst +++ /dev/null @@ -1,217 +0,0 @@ -.. - SPDX-FileCopyrightText: 2015 Aaron Kaplan - SPDX-License-Identifier: AGPL-3.0-or-later - -################## -Data Format -################## - -.. contents:: - -Overview -======== - -In IntelMQ version 3.x+ the internal data format name changed from DHO ( IntelMQ Data Harmonization ) to IDF ( IntelMQ Data Format ). -The python module `intelmq.lib.harmonization` and the configuration file `harmonization.conf` keep the name `harmonization` for now. DHO and IDF have the same meaning. - -All messages (reports and events) are Python/JSON dictionaries. The key names and according types are defined by the IntelMQ Data Format. - -The purpose of this document is to list and clearly define known **fields** in Abusehelper as well as IntelMQ or similar systems. -A field is a ```key=value``` pair. For a clear and unique definition of a field, we must define the **key** (field-name) as well as the possible **values**. -A field belongs to an **event**. An event is basically a structured log record in the form ```key=value, key=value, key=value, …```. -In the :ref:`List of known fields `, each field is grouped by a **section**. We describe these sections briefly below. -Every event **MUST** contain a timestamp field. - -An `IOC `_ (Indicator of compromise) is a single observation like a log line. - -Rules for keys -============== - -The keys can be grouped together in sub-fields, e.g. `source.ip` or `source.geolocation.latitude`. - -Only the lower-case alphabet, numbers and the underscore are allowed. Further, the field name must not begin with a number. -Thus, keys must match ``^[a-z_][a-z_0-9]+(\.[a-z_0-9]+)*$``. -These rules also apply for the otherwise unregulated ``extra.`` namespace. - - -Sections -======== - -As stated above, every field is organized under some section. The following is a description of the sections and what they imply. - -Feed ----- - -Fields listed under this grouping list details about the source feed where information came from. - -Time ----- - -The time section lists all fields related to time information. -This document requires that all the timestamps MUST be normalized to UTC. If the source reports only a date, do not attempt to invent timestamps. - -Source Identity ---------------- - -This section lists all fields related to identification of the source. The source is the identity the IoC is about, as opposed to the destination identity, which is another identity. - -For examples see the table below. - -The abuse type of an event defines the way these events needs to be interpreted. For example, for a botnet drone they refer to the compromised machine, whereas for a command and control server they refer the server itself. - -Source Geolocation Identity -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -We recognize that ip geolocation is not an exact science and analysis of the abuse data has shown that different sources attribution sources have different opinions of the geolocation of an ip. This is why we recommend to enrich the data with as many sources as you have available and make the decision which value to use for the cc IOC based on those answers. - -Source Local Identity -^^^^^^^^^^^^^^^^^^^^^ - -Some sources report an internal (NATed) IP address. - -Destination Identity --------------------- - -The abuse type of an event defines the way these IOCs needs to be interpreted. For a botnet drone they refer to the compromised machine, whereas for a command and control server they refer the server itself. - -Destination Geolocation Identity -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -We recognize that ip geolocation is not an exact science and analysis of the abuse data has shown that different sources attribution sources have different opinions of the geolocation of an ip. This is why we recommend to enrich the data with as many sources as you have available and make the decision which value to use for the cc IOC based on those answers. - -Destination Local Identity -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Some sources report an internal (NATed) IP address. - -Extra values ------------- -Data which does not fit in the format can be saved in the 'extra' namespace. All keys must begin with `extra.`, there are no other rules on key names and values. The values can be get/set like all other fields. - -.. _data format field list: - -Fields List and data types -========================== - -A list of allowed fields and data types can be found in :doc:`format-fields`. - -.. _data format classification: - -Classification -============== - -IntelMQ classifies events using three labels: taxonomy, type and identifier. This tuple of three values can be used for deduplication of events and describes what happened. - -The taxonomy can be automatically added by the taxonomy expert bot based on the given type. The following classification scheme follows the `Reference Security Incident Taxonomy (RSIT) `_: - - -=============================== ========================================= ============================================= - Taxonomy Type Description -=============================== ========================================= ============================================= - abusive-content harmful-speech Discreditation or discrimination of somebody, e.g. cyber stalking, racism or threats against one or more individuals. - abusive content spam Or 'Unsolicited Bulk Email', this means that the recipient has not granted verifiable permission for the message to be sent and that the message is sent as part of a larger collection of messages, all having a functionally comparable content. - abusive-content violence Child pornography, glorification of violence, etc. - availability ddos Distributed Denial of Service attack, e.g. SYN-Flood or UDP-based reflection/amplification attacks. - availability dos Denial of Service attack, e.g. sending specially crafted requests to a web application which causes the application to crash or slow down. - availability misconfiguration Software misconfiguration resulting in service availability issues, e.g. DNS server with outdated DNSSEC Root Zone KSK. - availability outage Outage caused e.g. by air condition failure or natural disaster. - availability sabotage Physical sabotage, e.g cutting wires or malicious arson. - fraud copyright Offering or Installing copies of unlicensed commercial software or other copyright protected materials (Warez). - fraud masquerade Type of attack in which one entity illegitimately impersonates the identity of another in order to benefit from it. - fraud phishing Masquerading as another entity in order to persuade the user to reveal private credentials. - fraud unauthorized-use-of-resources Using resources for unauthorized purposes including profit-making ventures, e.g. the use of e-mail to participate in illegal profit chain letters or pyramid schemes. - information-content-security data-leak Leaked confidential information like credentials or personal data. - information-content-security data-loss Loss of data, e.g. caused by harddisk failure or physical theft. - information-content-security unauthorised-information-access Unauthorized access to information, e.g. by abusing stolen login credentials for a system or application, intercepting traffic or gaining access to physical documents. - information-content-security unauthorised-information-modification Unauthorised modification of information, e.g. by an attacker abusing stolen login credentials for a system or application or a ransomware encrypting data. - information-gathering scanner Attacks that send requests to a system to discover weaknesses. This also includes testing processes to gather information on hosts, services and accounts. Examples: fingerd, DNS querying, ICMP, SMTP (EXPN, RCPT, ...), port scanning. - information-gathering sniffing Observing and recording of network traffic (wiretapping). - information-gathering social-engineering Gathering information from a human being in a non-technical way (e.g. lies, tricks, bribes, or threats). This IOC refers to a resource, which has been observed to perform brute-force attacks over a given application protocol. - intrusion-attempts brute-force Multiple login attempts (Guessing / cracking of passwords, brute force). - intrusion-attempts exploit An attack using an unknown exploit. - intrusion-attempts ids-alert IOCs based on a sensor network. This is a generic IOC denomination, should it be difficult to reliably denote the exact type of activity involved for example due to an anecdotal nature of the rule that triggered the alert. - intrusions application-compromise Compromise of an application by exploiting (un)known software vulnerabilities, e.g. SQL injection. - intrusions burglary Physical intrusion, e.g. into corporate building or data center. - intrusions privileged-account-compromise Compromise of a system where the attacker gained administrative privileges. - intrusions system-compromise Compromise of a system, e.g. unauthorised logins or commands. This includes compromising attempts on honeypot systems. - intrusions unprivileged-account-compromise Compromise of a system using an unprivileged (user/service) account. - malicious-code c2-server This is a command and control server in charge of a given number of botnet drones. - malicious-code infected-system This is a compromised machine, which has been observed to make a connection to a command and control server. - malicious-code malware-configuration This is a resource which updates botnet drones with a new configuration. - malicious-code malware-distribution URI used for malware distribution, e.g. a download URL included in fake invoice malware spam. - other blacklist Some sources provide blacklists, which clearly refer to abusive behavior, such as spamming, but fail to denote the exact reason why a given identity has been blacklisted. The reason may be that the justification is anecdotal or missing entirely. This type should only be used if the typing fits the definition of a blacklist, but an event specific denomination is not possible for one reason or another. Not in RSIT. - other dga-domain DGA Domains are seen various families of malware that are used to periodically generate a large number of domain names that can be used as rendezvous points with their command and control servers. Not in RSIT. - other other All incidents which don't fit in one of the given categories should be put into this class. - other malware An IoC referring to a malware (sample) itself. Not in RSIT. - other proxy This refers to the use of proxies from inside your network. Not in RSIT. - test test Meant for testing. Not in RSIT. - other tor This IOC refers to incidents related to TOR network infrastructure. Not in RSIT. - other undetermined The categorisation of the incident is unknown/undetermined. - vulnerable ddos-amplifier Publicly accessible services that can be abused for conducting DDoS reflection/amplification attacks, e.g. DNS open-resolvers or NTP servers with monlist enabled. - vulnerable information-disclosure Publicly accessible services potentially disclosing sensitive information, e.g. SNMP or Redis. - vulnerable potentially-unwanted-accessible Potentially unwanted publicly accessible services, e.g. Telnet, RDP or VNC. - vulnerable vulnerable-system A system which is vulnerable to certain attacks. Example: misconfigured client proxy settings (example: WPAD), outdated operating system version, etc. - vulnerable weak-crypto Publicly accessible services offering weak crypto, e.g. web servers susceptible to POODLE/FREAK attacks. -=============================== ========================================= ============================================= - -In the "other" taxonomy, several types are not in the RSIT, but this taxonomy is intentionally extensible. - -Meaning of source and destination identities --------------------------------------------- - -Meaning of source and destination identities for each classification type and possible ``classification.identifier`` meanings and usages. The identifier is often a normalized malware name, grouping many variants or the affected network protocol. -Examples of the meaning of the *source* and *destination* fields for each classification type and possible identifiers are shown here. Usually the main information is in the *source* fields. The identifier is often a normalized malware name, grouping many variants. - -======================= ================================================ ========================== =========================== - Type Source Destination Possible identifiers -======================= ================================================ ========================== =========================== - blacklist *blacklisted device* - brute-force *attacker* target - c2-server *(sinkholed) c&c server* zeus, palevo, feodo - ddos *attacker* target - dga-domain *infected device* - dropzone *server hosting stolen data* - exploit *hosting server* - ids-alert *triggering device* - infected-system *infected device* *contacted c2c server* - malware *infected device* zeus, palevo, feodo - malware configuration *infected device* - malware-distribution *server hosting malware* - phishing *phishing website* - proxy *server allowing policy and security bypass* - scanner *scanning device* scanned device http,modbus,wordpress - spam *infected device* targeted server - system-compromise *server* - vulnerable-system *vulnerable device* heartbleed, openresolver, snmp, wpad -======================= ================================================ ========================== =========================== - -Field in italics is the interesting one for CERTs. - -Example: - -If you know of an IP address that connects to a zeus c&c server, it's about the infected device, thus `classification.taxonomy` is *malicious-code*, `classification.type` is *infected-system* and the `classification.identifier` is zeus. If you want to complain about the c&c server, the event's `classification.type` is *c2server*. The `malware.name` can have the full name, eg. `zeus_p2p`. - -Minimum recommended requirements for events -=========================================== - -Below, we have enumerated the minimum recommended requirements for an actionable abuse event. These keys should to be present for the abuse report to make sense for the end recipient. Please note that if you choose to anonymize your sources, you can substitute **feed** with **feed.code** and that only one of the identity keys **ip**, **domain name**, **url**, **email address** must be present. All the rest of the keys are **optional**. - -================= ======================== ================= - Category Key Terminology -================= ======================== ================= - Feed feed.name Should - Classification classification.type Should - Classification classification.taxonomy Should - Time time.source Should - Time time.observation Should - Identity source.ip Should* - Identity source.fqdn Should* - Identity source.url Should* - Identity source.account Should* -================= ======================== ================= - -* only one of them - -This list of required fields is *not* enforced by IntelMQ. - -**NOTE:** This document was copied from `AbuseHelper repository `_ (now `Arctic Security Public documents `_ and improved. diff --git a/docs/dev/documentation.md b/docs/dev/documentation.md new file mode 100644 index 000000000..9bad3d02e --- /dev/null +++ b/docs/dev/documentation.md @@ -0,0 +1,41 @@ + + + +# Documentation + +The documentation is automatically published to + at every push to the repository. + +To build the documentation you need three packages: + +- Sphinx +- ReCommonMark +- `sphinx-markdown-tables` + +To install them, you can use pip: + +```bash +pip3 install -r docs/requirements.txt +``` + +Then use the Makefile to build the documentation using Sphinx: + +```bash +cd docs +make html +``` + +# Documentation 2 + +The documentation is automatically published at with every push to the `develop` branch. + + +## Feeds documentation + +The feeds which are known to be working with IntelMQ are documented in +the machine-readable file `intelmq/etc/feeds.yaml`. The +human-readable documentation is in generated with the Sphinx build as +described in the previous section. diff --git a/docs/dev/environment.md b/docs/dev/environment.md new file mode 100644 index 000000000..ec60f65f5 --- /dev/null +++ b/docs/dev/environment.md @@ -0,0 +1,121 @@ + + + +# Development Environment + +## Installation + +Developers can create a fork repository of IntelMQ in order to commit the new code to this repository and then be able +to do pull requests to the main repository. Otherwise you can just use the 'certtools' as username below. + +The following instructions will use `pip3 -e`, which gives you a so called *editable* installation. No code is copied in +the libraries directories, there's just a link to your code. However, configuration files still required to be moved to +`/opt/intelmq` as the instructions show. + +The traditional way to work with IntelMQ is to install it globally and have a separated user for running it. If you wish +to separate your machine Python's libraries, e.g. for development purposes, you could alternatively use a Python +virtual environment and your local user to run IntelMQ. Please use your preferred way from instructions below. + +#### Using globally installed IntelMQ + +```bash +sudo -s + +git clone https://github.com//intelmq.git $INTELMQ_REPO +cd $INTELMQ_REPO + +pip3 install -e . + +useradd -d $INTELMQ_ROOT_DIR -U -s /bin/bash intelmq + +intelmqsetup +``` + +#### Using virtual environment + +```bash +git clone https://github.com//intelmq.git $INTELMQ_REPO +cd $INTELMQ_REPO + +python -m venv .venv +source .venv/bin/activate + +pip install -e . + +# If you use a non-local directory as INTELMQ_ROOT_DIR, use following +# command to create it and change the ownership. +sudo install -g `whoami` -o `whoami` -d $INTELMQ_ROOT_DIR +# For local directory, just create it with mkdir: +mkdir $INTELMQ_ROOT_DIR + +intelmqsetup --skip-ownership +``` + +!!! note + Please do not forget that configuration files, log files will be available on `$INTELMQ_ROOT_DIR`. However, if your development is somehow related to any shipped configuration file, you need to apply the changes in your repository `$INTELMQ_REPO/intelmq/etc/`. + +### Additional services + +Some features require additional services, like message queue or database. The commonly used services are gained for development purposes in the Docker Compose file in `contrib/development-tools/docker-compose-common-services.yaml` in the repository. You can use them to run services on your machine in a docker containers, or decide to configure them in an another way. To run them using Docker Compose, use following command from the main repository directory: + +```bash +# For older Docker versions, you may need to use `docker-compose` command +docker compose -f contrib/development-tools/docker-compose-common-services.yaml up -d +``` + +This will start in the background containers with Redis, RabbitMQ, PostgreSQL and MongoDB. + +### How to develop + +After you successfully setup your IntelMQ development environment, you can perform any development on any `.py` file on `$INTELMQ_REPO`. After you change, you can use the normal procedure to run the bots: + +```bash +su - intelmq # Use for global installation +source .venv/bin/activate # Use for virtual environment installation + +intelmqctl start spamhaus-drop-collector + +tail -f $INTELMQ_ROOT_DIR/var/log/spamhaus-drop-collector.log +``` + +You can also add new bots, creating the new `.py` file on the proper directory inside `cd $INTELMQ_REPO/intelmq`. +However, your IntelMQ installation with pip3 needs to be updated. Please check the following section. + +### Update + +In case you developed a new bot, you need to update your current development installation. In order to do that, please +follow this procedure: + +1. Make sure that you have your new bot in the right place. +2. Update pip metadata and new executables: + +```bash +sudo -s # Use for global installation +source .venv/bin/activate # Use for virtual environment installation + +cd /opt/dev_intelmq +pip3 install -e . +``` + +3. If you're using the global installation, an additional step of changing permissions and ownership is necessary: + +```bash +find $INTELMQ_ROOT_DIR/ -type d -exec chmod 0770 {} \+ +find $INTELMQ_ROOT_DIR/ -type f -exec chmod 0660 {} \+ +chown -R intelmq.intelmq $INTELMQ_ROOT_DIR +## if you use the intelmq manager (adapt the webservers' group if needed): +chown intelmq.www-data $INTELMQ_ROOT_DIR/etc/*.conf +``` + +Now you can test run your new bot following this procedure: + +```bash +su - intelmq # Use for global installation +source .venv/bin/activate # Use for virtual environment installation + +intelmqctl start +``` + diff --git a/docs/dev/feeds-wishlist.rst b/docs/dev/feeds-wishlist.rst deleted file mode 100644 index 09d861cdf..000000000 --- a/docs/dev/feeds-wishlist.rst +++ /dev/null @@ -1,109 +0,0 @@ -.. - SPDX-FileCopyrightText: 2020 Sebastian Wagner - SPDX-License-Identifier: AGPL-3.0-or-later - -############### -Feeds wishlist -############### - -This is a list with various feeds, which are either currently not supported or the usage is not clearly documented in IntelMQ. - -If you want to **contribute** documenting how to configure existing bots in order to collect new feeds or by creating new parsers, here is a list of potentially interesting feeds. -See :ref:`feeds documentation` for more information on this. - -This list evolved from the issue :issue:`Contribute: Feeds List (#384) <384>`. - -- Lists of feeds: - - - `threatfeeds.io `_ - - `TheCyberThreat `_ - - `sbilly: Awesome Security `_ - - `pannoniait: Backlists `_ - - `hslatman: awesome-threat-intelligence `_ - - `Zeek Intelligence Feeds `_ - - `imuledx OSING feeds `_ - -- Some third party intelmq bots: `NRDCS' IntelMQ fork `_ - -- List of potentially interesting data sources: - - - `Abuse.ch SSL Blacklists `_ - - `AbuseIPDB `_ - - `Adblock Plus `_ - - `apivoid IP Reputation API `_ - - `Anomali Limo Free Intel Feed `_ - - `APWG's ecrimex `_ - - `Avast Threat Intel IoCs of dark matter repository `_ - - `Berkeley `_ - - `Binary Defense `_ - - `Bot Invaders Realtime tracker `_ - - `Botherder Targetedthreats `_ - - `Botscout Last Caught `_ - - `botvrij `_ - - `Carbon Black Feeds `_ - - `CERT.pl Phishing Warning List `_ - - `Chaos Reigns `_ - - `Critical Stack `_ - - `Cruzit `_ - - `Cyber Crime Tracker `_ - - `drb-ra C2IntelFeeds `_ - - `DNS DB API `_ - - `ESET Malware Indicators of Compromise `_ - - `Facebook Threat Exchange `_ - - `FilterLists `_ - - `Firehol IPLists `_ - - `Google Webmaster Alerts `_ - - `GPF Comics DNS Blacklist `_ - - `Greensnow `_ - - `Greynoise `_ - - `HP Feeds `_ - - `IBM X-Force Exchange `_ - - `ImproWare AntiSpam `_ - - `ISightPartners `_ - - `James Brine `_ - - `Joewein `_ - - Maltrail: - - - `Malware `_ - - `Suspicious `_ - - `Mass Scanners `_ (for whitelisting) - - `Malshare `_ - - `MalSilo Malware URLs `_ - - `Malware Config `_ - - `Malware DB (cert.pl) `_ - - `MalwareInt `_ - - `Malware Must Die `_ - - `Manity Spam IP addresses `_ - - `Marc Blanchard DGA Domains `_ - - `MaxMind Proxies `_ - - `mIRC Servers `_ - - `MISP Warning Lists `_ - - `Monzymerza `_ - - `Multiproxy `_ - - `Neo23x0 signature-base `_ - - `OpenBugBounty `_ - - `Phishing Army `_ - - `Phishstats `_, offers JSON ("API) and CSV download. - - `Project Honeypot (#284) `_ - - `RST Threat Feed `_ (offers a free and a commercial feed) - - `SANS ISC `_ - - `ShadowServer Sandbox API `_ - - `Shodan search API `_ - - `Snort `_ - - `stopforumspam Toxic IP addresses and domains `_ - - `Spamhaus Botnet Controller List `_ - - `SteveBlack Hosts File `_ - - `The Haleys `_ - - `Threat Crowd `_ - - `Threat Grid `_ - - `Threatstream `_ - - `TOR Project Exit addresses `_ - - `TotalHash `_ - - `UCE Protect `_ - - `Unit 42 Public Report IOCs `_ - - `URI BL `_ - - `urlscan.io `_ - - `Virustotal `_ - - `virustream `_ - - `VoIP Blacklist `_ - - `YourCMC `_ diff --git a/docs/dev/guide.rst b/docs/dev/guide.rst deleted file mode 100644 index 1857f291b..000000000 --- a/docs/dev/guide.rst +++ /dev/null @@ -1,945 +0,0 @@ -.. - SPDX-FileCopyrightText: 2015 Aaron Kaplan - SPDX-License-Identifier: AGPL-3.0-or-later - -################ -Developers Guide -################ - -.. contents:: - -***************** -Intended Audience -***************** - -This guide is for developers of IntelMQ. It explains the code architecture, coding guidelines as well as ways you can contribute code or documentation. -If you have not done so, please read the :doc:`../user/introduction` first. -Once you feel comfortable running IntelMQ with open source bots and you feel adventurous enough to contribute to the project, this guide is for you. -It does not matter if you are an experienced Python programmer or just a beginner. There are a lot of samples to help you out. - -However, before we go into the details, it is important to observe and internalize some overall project goals. - -Goals -===== - -It is important, that all developers agree and stick to these meta-guidelines. -IntelMQ tries to: - -* Be well tested. For developers this means, we expect you to write unit tests for bots. Every time. -* Reduce the complexity of system administration -* Reduce the complexity of writing new bots for new data feeds -* Make your code easily and pleasantly readable -* Reduce the probability of events lost in all process with persistence functionality (even system crash) -* Strictly adhere to the existing :doc:`data-format` for key-values in events -* Always use JSON format for all messages internally -* Help and support the interconnection between IntelMQ and existing tools like AbuseHelper, CIF, etc. or new tools (in other words: we will not accept data-silos!) -* Provide an easy way to store data into Log Collectors like ElasticSearch, Splunk -* Provide an easy way to create your own black-lists -* Provide easy to understand interfaces with other systems via HTTP RESTFUL API - -The main take away point from the list above is: things **MUST** stay __intuitive__ and __easy__. -How do you ultimately test if things are still easy? Let them new programmers test-drive your features and if it is not understandable in 15 minutes, go back to the drawing board. - -Similarly, if code does not get accepted upstream by the main developers, it is usually only because of the ease-of-use argument. Do not give up , go back to the drawing board, and re-submit again. - - -.. _development environment: - -*********************** -Development Environment -*********************** - -Installation -============ - -Developers can create a fork repository of IntelMQ in order to commit the new code to this repository and then be able to do pull requests to the main repository. Otherwise you can just use the 'certtools' as username below. - -The following instructions will use `pip3 -e`, which gives you a so called *editable* installation. No code is copied in the libraries directories, there's just a link to your code. However, configuration files still required to be moved to `/opt/intelmq` as the instructions show. - -The traditional way to work with IntelMQ is to install it globally and have a separated user for running it. If you wish to separate your machine Python's libraries, e.g. for development purposes, you could alternatively use a Python virtual environment -and your local user to run IntelMQ. Please use your preferred way from instructions below. - -Directories explained -~~~~~~~~~~~~~~~~~~~~~ - -For development purposes, you need two directories: one for a local repository copy, and the second as a root dictionary for the IntelMQ installation. - -The default IntelMQ root directory is `/opt/intelmq`. This directory is used for configurations (`/opt/intelmq/etc`), local states (`/opt/intelmq/var/lib`) and logs (`/opt/intelmq/var/log`). -If you want to change it, please set the `INTELMQ_ROOT_DIR` environment variable with a desired location. - -For repository directory, you can use any path that is accessible by users you use to run IntelMQ. For globally installed IntelMQ, the directory has to be readable by other unprivileged users (e.g. home directories on Fedora can't be read by other users by default). - -To keep commands in the guide universal, we will use environmental variables for repository and installation paths. You can set them with following commands: - -.. code-block:: - - # Adjust paths if you want to use non-standard directories - export INTELMQ_REPO=/opt/dev_intelmq - export INTELMQ_ROOT_DIR=/opt/intelmq - -.. note:: - - If using non-default installation directory, remember to keep the root directory variable set for every run of IntelMQ commands. - If you don't, then the default location `/opt/intelmq` will be used. - -Using globally installed IntelMQ -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: bash - - sudo -s - - git clone https://github.com//intelmq.git $INTELMQ_REPO - cd $INTELMQ_REPO - - pip3 install -e . - - useradd -d $INTELMQ_ROOT_DIR -U -s /bin/bash intelmq - - intelmqsetup - - -Using virtual environment -~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: bash - - git clone https://github.com//intelmq.git $INTELMQ_REPO - cd $INTELMQ_REPO - - python -m venv .venv - source .venv/bin/activate - - pip install -e . - - # If you use a non-local directory as INTELMQ_ROOT_DIR, use following - # command to create it and change the ownership. - sudo install -g `whoami` -o `whoami` -d $INTELMQ_ROOT_DIR - # For local directory, just create it with mkdir: - mkdir $INTELMQ_ROOT_DIR - - intelmqsetup --skip-ownership - - -.. note:: - - Please do not forget that configuration files, log files will be available on `$INTELMQ_ROOT_DIR`. However, if your development is somehow related to any shipped configuration file, you need to apply the changes in your repository `$INTELMQ_REPO/intelmq/etc/`. - - -Additional services -=================== - -Some features require additional services, like message queue or database. The commonly used services are gained for development purposes in the Docker Compose file in `contrib/development-tools/docker-compose-common-services.yaml` in the repository. -You can use them to run services on your machine in a docker containers, or decide to configure them in an another way. To run them using Docker Compose, use following command from the main repository directory: - -.. code-block:: bash - - # For older Docker versions, you may need to use `docker-compose` command - docker compose -f contrib/development-tools/docker-compose-common-services.yaml up -d - -This will start in the background containers with Redis, RabbitMQ, PostgreSQL and MongoDB. - - -How to develop -============== - -After you successfully setup your IntelMQ development environment, you can perform any development on any `.py` file on `$INTELMQ_REPO`. After you change, you can use the normal procedure to run the bots: - -.. code-block:: bash - - su - intelmq # Use for global installation - source .venv/bin/activate # Use for virtual environment installation - - intelmqctl start spamhaus-drop-collector - - tail -f $INTELMQ_ROOT_DIR/var/log/spamhaus-drop-collector.log - -You can also add new bots, creating the new `.py` file on the proper directory inside `cd $INTELMQ_REPO/intelmq`. However, your IntelMQ installation with pip3 needs to be updated. Please check the following section. - - -Update -====== - -In case you developed a new bot, you need to update your current development installation. In order to do that, please follow this procedure: - - -1. Make sure that you have your new bot in the right place. -2. Update pip metadata and new executables: - -.. code-block:: bash - - sudo -s # Use for global installation - source .venv/bin/activate # Use for virtual environment installation - - cd /opt/dev_intelmq - pip3 install -e . - -3. If you're using the global installation, an additional step of changing permissions and ownership is necessary: - -.. code-block:: bash - - find $INTELMQ_ROOT_DIR/ -type d -exec chmod 0770 {} \+ - find $INTELMQ_ROOT_DIR/ -type f -exec chmod 0660 {} \+ - chown -R intelmq.intelmq $INTELMQ_ROOT_DIR - ## if you use the intelmq manager (adapt the webservers' group if needed): - chown intelmq.www-data $INTELMQ_ROOT_DIR/etc/*.conf - -Now you can test run your new bot following this procedure: - -.. code-block:: bash - - su - intelmq # Use for global installation - source .venv/bin/activate # Use for virtual environment installation - - intelmqctl start - -Testing -======= - -Additional test requirements ----------------------------- - -Libraries required for tests are listed in the `setup.py` file. You can install them with pip: - -.. code-block:: bash - - pip3 install -e .[development] - -or the package management of your operating system. - -Run the tests -------------- - -All changes have to be tested and new contributions should be accompanied by according unit tests. -Please do not run the tests as root just like any other IntelMQ component for security reasons. Any other unprivileged user is possible. - -You can run the tests by changing to the directory with IntelMQ repository and running either `unittest` or `pytest`. For virtual environment -installation, please activate it and omit the `sudo -u` from examples below: - -.. code-block:: bash - - cd $INTELMQ_REPO - sudo -u intelmq python3 -m unittest {discover|filename} # or - sudo -u intelmq pytest [filename] - sudo -u intelmq python3 setup.py test # uses a build environment (no external dependencies) - -Some bots need local databases to succeed. If you only want to test one explicit test file, give the file path as argument. - -There are multiple `GitHub Action Workflows `_ setup for automatic testing, which are triggered on pull requests. You can also easily activate them for your forks. - -Environment variables ---------------------- - -There are a bunch of environment variables which switch on/off some tests: - -* `INTELMQ_TEST_DATABASES`: databases such as postgres, elasticsearch, mongodb are not tested by default. Set this environment variable to 1 to test those bots. These tests need preparation, e.g. running databases with users and certain passwords etc. Have a look at the `.github/workflows/unittests.yml` and the corresponding `.github/workflows/scripts/setup-full.sh` in IntelMQ's repository for steps to set databases up. -* `INTELMQ_SKIP_INTERNET`: tests requiring internet connection will be skipped if this is set to 1. -* `INTELMQ_SKIP_REDIS`: redis-related tests are ran by default, set this to 1 to skip those. -* `INTELMQ_TEST_EXOTIC`: some bots and tests require libraries which may not be available, those are skipped by default. To run them, set this to 1. -* `INTELMQ_TEST_REDIS_PASSWORD`: Set this value to the password for the local redis database if needed. -* `INTELMQ_LOOKYLOO_TEST`: Set this value to run the lookyloo tests. Public lookyloo instance will be used as default. -* `INTELMQ_TEST_INSTALLATION`: Set this value to run tests which require a local IntelMQ installation, such as for testing the command lines tools relying on configuration files, dump files etc. - -For example, to run all tests you can use: - -.. code-block:: bash - - INTELMQ_TEST_DATABASES=1 INTELMQ_TEST_EXOTIC=1 INTELMQ_TEST_INSTALLATION=1 pytest intelmq/tests/ - -Configuration test files ------------------------- - -The tests use the configuration files in your working directory, not those installed in `/opt/intelmq/etc/` or `/etc/`. You can run the tests for a locally changed intelmq without affecting an installation or -requiring root to run them. - -********************** -Development Guidelines -********************** - -Coding-Rules -============ - -Most important: **KEEP IT SIMPLE**!! -This can not be over-estimated. Feature creep can destroy any good software project. But if new folks can not understand what you wrote in 10-15 minutes, it is not good. It's not about the performance, etc. It's about readability. - - -In general, we follow :pep:`0008`. -We recommend reading it before committing code. - -There are some exceptions: sometimes it does not make sense to check for every PEP8 error (such as whitespace indentation when you want to make a dict=() assignment -look pretty. Therefore, we do have some exceptions defined in the `setup.cfg` file. - -We support Python 3 only. - -Unicode -------- - -* Each internal object in IntelMQ (Event, Report, etc) that has strings, their strings MUST be in UTF-8 Unicode format. -* Any data received from external sources MUST be transformed into UTF-8 Unicode format before add it to IntelMQ objects. - -Back-end independence and Compatibility ---------------------------------------- - -Any component of the IntelMQ MUST be independent of the message queue technology (Redis, RabbitMQ, etc...). - -License Header --------------- - -Please add a license and copyright header to your bots. There is a Github action that tests for `reuse compliance `_ of your code files. - -Layout Rules -============ - -.. code-block:: bash - - intelmq/ - lib/ - bot.py - cache.py - message.py - pipeline.py - utils.py - bots/ - collector/ - / - collector.py - parser/ - / - parser.py - expert/ - / - expert.py - output/ - / - output.py - /conf - runtime.yaml - -Assuming you want to create a bot for a new 'Abuse.ch' feed. It turns out that here it is necessary to create different parsers for the respective kind of events (e.g. malicious URLs). Therefore, the usual hierarchy ‘intelmq/bots/parser//parser.py’ would not be suitable because it is necessary to have more parsers for each Abuse.ch Feed. The solution is to use the same hierarchy with an additional "description" in the file name, separated by underscore. Also see the section *Directories and Files naming*. - -Example (including the current ones): - -.. code-block:: - - /intelmq/bots/parser/abusech/parser_domain.py - /intelmq/bots/parser/abusech/parser_ip.py - /intelmq/bots/parser/abusech/parser_ransomware.py - - /intelmq/bots/parser/abusech/parser_malicious_url.py - -Documentation -------------- - -Please document your added/modified code. - -For doc strings, we are using the `sphinx-napoleon-google-type-annotation `_. - -Additionally, Python's type hints/annotations are used, see :pep:`484`. - -Directories Hierarchy on Default Installation ---------------------------------------------- - -* Configuration Files Path: `/opt/intelmq/etc/` -* PID Files Path: `/opt/intelmq/var/run/` -* Logs Files and dumps Path: `/opt/intelmq/var/log/` -* Additional Bot Files Path, e.g. templates or databases: `/opt/intelmq/var/lib/bots/[bot-name]/` - -Directories and Files naming ----------------------------- - -Any directory and file of IntelMQ has to follow the Directories and Files naming. Any file name or folder name has to -* be represented with lowercase and in case of the name has multiple words, the spaces between them must be removed or replaced by underscores; -* be self-explaining what the content contains. - -In the bot directories name, the name must correspond to the feed provider. If necessary and applicable the feed name can and should be used as postfix for the filename. - -Examples: - -.. code-block:: - - intelmq/bots/parser/taichung/parser.py - intelmq/bots/parser/cymru/parser_full_bogons.py - intelmq/bots/parser/abusech/parser_ransomware.py - -Class Names ------------ - -Class name of the bot (ex: PhishTank Parser) must correspond to the type of the bot (ex: Parser) e.g. `PhishTankParserBot` - - -IntelMQ Data Format Rules -========================= - -Any component of IntelMQ MUST respect the IntelMQ Data Format. - -**Reference:** IntelMQ Data Format - :doc:`data-format` - - -Code Submission Rules -===================== - -Releases, Repositories and Branches ------------------------------------ - - * The main repository is in `github.com/certtools/intelmq `_. - * There are a couple of forks which might be regularly merged into the main repository. They are independent and can have incompatible changes and can deviate from the upstream repository. - * We use `semantic versioning `_. A short summary: - * a.x are stable releases - * a.b.x are bugfix/patch releases - * a.x must be compatible to version a.0 (i.e. API/Config-compatibility) - * If you contribute something, please fork the repository, create a separate branch and use this for pull requests, see section below. - -Branching model ---------------- - - * "master" is the stable branch. It hold the latest stable release. Non-developers should only work on this branch. The recommended log level is WARNING. Code is only added by merges from the maintenance branches. - * "maintenance/a.b.x" branches accumulate (cherry-picked) patches for a maintenance release (a.b.x). Recommended for experienced users which deploy intelmq themselves. No new features will be added to these branches. - * "develop" is the development branch for the next stable release (a.x). New features must go there. Developers may want to work on this branch. This branch also holds all patches from maintenance releases if applicable. The recommended log level is DEBUG. - * Separate branches to develop features or bug fixes may be used by any contributor. - -How to Contribute ------------------ - - * Make separate pull requests / branches on GitHub for changes. This allows us to discuss things via GitHub. - * We prefer one Pull Request per feature or change. If you have a bunch of small fixes, please don't create one RP per fix :) - * Only very small and changes (docs, ...) might be committed directly to development branches without Pull Request by the `core-team `_. - * Keep the balance between atomic commits and keeping the amount of commits per PR small. You can use interactive rebasing to squash multiple small commits into one (`rebase -i [base-branch]`). Only do rebasing if the code you are rebasing is yet not used by others or is already merged - because then others may need to run into conflicts. - * Make sure your PR is merge able in the develop branch and all tests are successful. - * If possible `sign your commits with GPG `_. - -Workflow --------- - -We assume here, that origin is your own fork. We first add the upstream repository: - -.. code-block:: bash - - > git remote add upstream https://github.com/certtools/intelmq.git - -Syncing develop: - -.. code-block:: bash - - > git checkout develop - > git pull upstream develop - > git push origin develop - -You can do the same with the branches `master` and `maintenance`. - -Create a separate feature-branch to work on, sync develop with upstream. Create working branch from develop: - -.. code-block:: bash - - > git checkout develop - > git checkout -b bugfix - # your work - > git commit - -Or, for bugfixes create a separate bugfix-branch to work on, sync maintenance with upstream. Create working branch from maintenance: - -.. code-block:: bash - - > git checkout maintenance - > git checkout -b new-feature - # your work - > git commit - -Getting upstream's changes for master or any other branch: - -.. code-block:: bash - - > git checkout develop - > git pull upstream develop - > git push origin develop - -There are 2 possibilities to get upstream's commits into your branch. Rebasing and Merging. Using rebasing, your history is rewritten, putting your changes on top of all other commits. You can use this if your changes are not published yet (or only in your fork). - -.. code-block:: bash - - > git checkout bugfix - > git rebase develop - -Using the `-i` flag for rebase enables interactive rebasing. You can then remove, reorder and squash commits, rewrite commit messages, beginning with the given branch, e.g. develop. - -Or using merging. This doesn't break the history. It's considered more , but also pollutes the history with merge commits. - -.. code-block:: bash - - > git checkout bugfix - > git merge develop - -You can then create a PR with your branch `bugfix` to our upstream repository, using GitHub's web interface. - -Commit Messages ---------------- - -If it fixes an existing issue, please use GitHub syntax, e.g.: `fixes certtools/intelmq#` - -Prepare for Discussion in GitHub --------------------------------- - -If we don't discuss it, it's probably not tested. - -License and Author files -======================== - -License and Authors files can be found at the root of repository. - -* License file **MUST NOT** be modified except by the explicit written permission by CNCS/CERT.PT or CERT.at -* Credit to the authors file must be always retained. When a new contributor (person and/or organization) improves in some way the repository content (code or documentation), he or she might add his name to the list of contributors. - -License and authors must be only listed in an external file but not inside the code files. - - -*************** -System Overview -*************** - -In the `intelmq/lib/` directory you can find some libraries: - -* Bots: Defines base structure for bots and handling of startup, stop, messages etc. -* Cache: For some expert bots it does make sense to cache external lookup results. Redis is used here. -* Harmonization: For defined types, checks and sanitation methods are implemented. -* Message: Defines Events and Reports classes, uses harmonization to check validity of keys and values according to config. -* Pipeline: Writes messages to message queues. Implemented for productions use is only Redis, AMQP is beta. -* Test: Base class for bot tests with predefined test and assert methods. -* Utils: Utility functions used by system components. - -Code Architecture -================= - -.. image:: /_static/intelmq-arch-schema.png - :alt: Code Architecture - -Pipeline -======== - - * collector bot - * **TBD** - - -******************* -Bot Developer Guide -******************* - -There's a dummy bot including tests at `intelmq/tests/lib/test_parser_bot.py`. - -Please use the correct bot type as parent class for your bot. The `intelmq.lib.bot` module contains the classes `CollectorBot`, `ParserBot`, `ExpertBot` and `OutputBot`. - -You can always start any bot directly from command line by calling the executable. -The executable will be created during installation a directory for binaries. After adding new bots to the code, install IntelMQ to get the files created. -Don't forget to give an bot id as first argument. Also, running bots with other users than `intelmq` will raise permission errors. - -.. code-block:: bash - - $ sudo -i intelmq - $ intelmqctl run file-output # if configured - $ intelmq.bots.outputs.file.output file-output - -You will get all logging outputs directly on stderr as well as in the log file. - - -Template -======== - -Please adjust the doc strings accordingly and remove the in-line comments (`#`). - -.. code-block:: python - - """ - SPDX-FileCopyrightText: 2021 Your Name - SPDX-License-Identifier: AGPL-3.0-or-later - - Parse data from example.com, be a nice ExampleParserBot. - - Document possible necessary configurations. - """ - import sys - - # imports for additional libraries and intelmq - from intelmq.lib.bot import ParserBot - - - class ExampleParserBot(ParserBot): - - option1: str = "defaultvalue" - option2: bool = False - - def process(self): - report = self.receive_message() - - event = self.new_event(report) # copies feed.name, time.observation - ... # implement the logic here - event.add('source.ip', '127.0.0.1') - event.add('extra', {"os.name": "Linux"}) - if self.option2: - event.add('extra', {"customvalue": self.option1}) - - self.send_message(event) - self.acknowledge_message() - - - BOT = ExampleParserBot - -Any attributes of the bot that are not private can be set by the user using the IntelMQ configuration settings. - -There are some names with special meaning. These can be used i.e. called: - -* `stop`: Shuts the bot down. -* `receive_message`, `send_message`, `acknowledge_message`: see next section -* `start`: internal method to run the bot - -These can be defined: - -* `init`: called at startup, use it to set up the bot (initializing classes, loading files etc) -* `process`: processes the messages -* `shutdown`: To Gracefully stop the bot, e.g. terminate connections - -All other names can be used freely. - -Mixins -====== - -For common settings and methods you can use mixins from :code:`intelmq.lib.mixins`. To use the mixins, just let your bot inherit from the Mixin class (in addition to the inheritance from the Bot class). For example: - -.. code-block:: python - - class HTTPCollectorBot(CollectorBot, HttpMixin): - -The following mixins are available: - -* `HttpMixin` -* `SqlMixin` -* `CacheMixin` - -The `HttpMixin` provides the HTTP attributes described in :ref:`common-parameters` and the following methods: - -* :code:`http_get` takes an URL as argument. Any other arguments get passed to the :code:`request.Session.get` method. :code:`http_get` returns a :code:`requests.Response`. - -* :code:`http_session` can be used if you ever want to work with the session object directly. It takes no arguments and returns the bots request.Session. - -The `SqlMixin` provides methods to connect to SQL servers. Inherit this Mixin so that it handles DB connection for you. -You do not have to bother: - -* connecting database in the :code:`self.init()` method, self.cur will be set in the :code:`__init__()` -* catching exceptions, just call :code:`self.execute()` instead of :code:`self.cur.execute()` -* :code:`self.format_char` will be set to '%s' in PostgreSQL and to '?' in SQLite - -The `CacheMixin` provides methods to cache values for bots in a Redis database. It uses the following attributes: - -* :code:`redis_cache_host: str = "127.0.0.1"` -* :code:`redis_cache_port: int = 6379` -* :code:`redis_cache_db: int = 9` -* :code:`redis_cache_ttl: int = 15` -* :code:`redis_cache_password: Optional[str] = None` - -and provides the methods: - -* :code:`cache_exists` -* :code:`cache_get` -* :code:`cache_set` -* :code:`cache_flush` -* :code:`cache_get_redis_instance` - -Pipeline interactions -===================== - -We can call three methods related to the pipeline: - - - `self.receive_message()`: The pipeline handler pops one message from the internal queue if possible. Otherwise one message from the sources list is popped, and added it to an internal queue. In case of errors in process handling, the message can still be found in the internal queue and is not lost. The bot class unravels the message a creates an instance of the Event or Report class. - - `self.send_message(event, path="_default")`: Processed message is sent to destination queues. It is possible to change the destination queues by optional `path` parameter. - - `self.acknowledge_message()`: Message formerly received by `receive_message` is removed from the internal queue. This should always be done after processing and after the sending of the new message. In case of errors, this function is not called and the message will stay in the internal queue waiting to be processed again. - -Logging -======= - -Log Messages Format -------------------- - -Log messages have to be clear and well formatted. The format is the following: - -Format: - -.. code-block:: - - - - - - -Rules: - -* the Log message MUST follow the common rules of a sentence, beginning with uppercase and ending with period. -* the sentence MUST describe the problem or has useful information to give to an inexperienced user a context. Pure stack traces without any further explanation are not helpful. - -When the logger instance is created, the bot id must be given as parameter anyway. The function call defines the log level, see below. - -Log Levels ----------- - -* *debug*: Debugging information includes retrieved and sent messages, detailed status information. Can include sensitive information like passwords and amount can be huge. -* *info*: Logs include loaded databases, fetched reports or waiting messages. -* *warning*: Unexpected, but handled behavior. -* *error*: Errors and Exceptions. -* *critical* Program is failing. - -What to Log ------------ - -* Try to keep a balance between obscuring the source code file with hundreds of log messages and having too little log messages. -* In general, a bot MUST report error conditions. - -How to Log ----------- - -The Bot class creates a logger with that should be used by bots. Other components won't log anyway currently. Examples: - -.. code-block::python - - self.logger.info('Bot start processing.') - self.logger.error('Pipeline failed.') - self.logger.exception('Pipeline failed.') - -The `exception` method automatically appends an exception traceback. The logger instance writes by default to the file `/opt/intelmq/var/log/[bot-id].log` and to stderr. - -String formatting in Logs -^^^^^^^^^^^^^^^^^^^^^^^^^ - -Parameters for string formatting are better passed as argument to the log function, see https://docs.python.org/3/library/logging.html#logging.Logger.debug -In case of formatting problems, the error messages will be better. For example: - -.. code-block::python - - self.logger.debug('Connecting to %r.', host) - -Error handling -============== - -The bot class itself has error handling implemented. The bot itself is allowed to throw exceptions and **intended to fail**! The bot should fail in case of malicious messages, and in case of unavailable but necessary resources. The bot class handles the exception and will restart until the maximum number of tries is reached and fail then. Additionally, the message in question is dumped to the file `/opt/intelmq/var/log/[bot-id].dump` and removed from the queue. - -Initialization -============== - -Maybe it is necessary so setup a Cache instance or load a file into memory. Use the `init` function for this purpose: - -.. code-block::python - - class ExampleParserBot(Bot): - def init(self): - try: - self.database = pyasn.pyasn(self.database) - except IOError: - self.logger.error("pyasn data file does not exist or could not be " - "accessed in '%s'." % self.database) - self.logger.error("Read 'bots/experts/asn_lookup/README.md' and " - "follow the procedure.") - self.stop() - -Custom configuration checks -=========================== - -Every bot can define a static method `check(parameters)` which will be called by `intelmqctl check`. -For example the check function of the ASNLookupExpert: - -.. code-block::python - - @staticmethod - def check(parameters): - if not os.path.exists(parameters.get('database', '')): - return [["error", "File given as parameter 'database' does not exist."]] - try: - pyasn.pyasn(parameters['database']) - except Exception as exc: - return [["error", "Error reading database: %r." % exc]] - -Examples -======== - -* Check `Expert Bots `_ -* Check `Parser Bots `_ - -Parsers -======= - -Parsers can use a different, specialized Bot-class. It allows to work on individual elements of a report, splitting the functionality of the parser into multiple functions: - - * `process`: getting and sending data, handling of failures etc. - * `parse`: Parses the report and splits it into single elements (e.g. lines). Can be overridden. - * `parse_line`: Parses elements, returns an Event. Can be overridden. - * `recover_line`: In case of failures and for the field `raw`, this function recovers a fully functional report containing only one element. Can be overridden. - -For common cases, like CSV, existing function can be used, reducing the amount of code to implement. In the best case, only `parse_line` needs to be coded, as only this part interprets the data. - -You can have a look at the implementation `intelmq/lib/bot.py` or at examples, e.g. the DummyBot in `intelmq/tests/lib/test_parser_bot.py`. This is a stub for creating a new Parser, showing the parameters and possible code: - -.. code-block::python - - class MyParserBot(ParserBot): - - def parse(self, report): - """A generator yielding the single elements of the data. - - Comments, headers etc. can be processed here. Data needed by - `self.parse_line` can be saved in `self.tempdata` (list). - - Default parser yields stripped lines. - Override for your use or use an existing parser, e.g.: - parse = ParserBot.parse_csv - """ - for line in utils.base64_decode(report.get("raw")).splitlines(): - yield line.strip() - - def parse_line(self, line, report): - """A generator which can yield one or more messages contained in line. - - Report has the full message, thus you can access some metadata. - Override for your use. - """ - raise NotImplementedError - - def process(self): - self.tempdata = [] # temporary data for parse, parse_line and recover_line - self.__failed = [] - report = self.receive_message() - - for line in self.parse(report): - if not line: - continue - try: - # filter out None - events = list(filter(bool, self.parse_line(line, report))) - except Exception as exc: - self.logger.exception('Failed to parse line.') - self.__failed.append((exc, line)) - else: - self.send_message(*events) - - for exc, line in self.__failed: - self._dump_message(exc, self.recover_line(line)) - - self.acknowledge_message() - - def recover_line(self, line): - """Reverse of parse for single lines. - - Recovers a fully functional report with only the problematic line. - """ - return '\n'.join(self.tempdata + [line]) - - - BOT = MyParserBot - -parse_line ----------- - -One line can lead to multiple events, thus `parse_line` can't just return one Event. Thus, this function is a generator, which allows to easily return multiple values. Use `yield event` for valid Events and `return` in case of a void result (not parsable line, invalid data etc.). - -Tests -===== - -In order to do automated tests on the bot, it is necessary to write tests including sample data. Have a look at some existing tests: - - - The DummyParserBot in `intelmq/tests/lib/test_parser_bot.py`. This test has the example data (report and event) inside the file, defined as dictionary. - - The parser for malwaregroup at `intelmq/tests/bots/parsers/malwaregroup/test_parser_*.py`. The latter loads a sample HTML file from the same directory, which is the raw report. - - The test for ASNLookupExpertBot has two event tests, one is an expected fail (IPv6). - -Ideally an example contains not only the ideal case which should succeed, but also a case where should fail instead. (TODO: Implement assertEventNotEqual or assertEventNotcontainsSubset or similar) -Most existing bots are only tested with one message. For newly written test it is appreciable to have tests including more then one message, e.g. a parser fed with an report consisting of multiple events. - -.. code-block::python - - import unittest - - import intelmq.lib.test as test - from intelmq.bots.parsers.exampleparser.parser import ExampleParserBot # adjust bot class name and module - - - class TestExampleParserBot(test.BotTestCase, unittest.TestCase): # adjust test class name - """A TestCase for ExampleParserBot.""" - - @classmethod - def set_bot(cls): - cls.bot_reference = ExampleParserBot # adjust bot class name - cls.default_input_message = EXAMPLE_EVENT # adjust source of the example event (dict), by default an empty event or report (depending on bot type) - - # This is an example how to test the log output - def test_log_test_line(self): - """Test if bot does log example message.""" - self.run_bot() - self.assertRegexpMatches(self.loglines_buffer, - "INFO - Lorem ipsum dolor sit amet") - - def test_event(self): - """Test if correct Event has been produced.""" - self.run_bot() - self.assertMessageEqual(0, EXAMPLE_REPORT) - - - if __name__ == '__main__': # pragma: no cover - unittest.main() - -When calling the file directly, only the tests in this file for the bot will be expected. Some default tests are always executed (via the `test.BotTestCase` class), such as pipeline and message checks, logging, bot naming or empty message handling. - -See the :ref:`testing` section about how to run the tests. - -Cache -===== - -Bots can use a Redis database as cache instance. Use the `intelmq.lib.utils.Cache` class to set this up and/or look at existing bots, like the `cymru_whois` expert how the cache can be used. -Bots must set a TTL for all keys that are cached to avoid caches growing endless over time. -Bots must use the Redis databases `>=` 10, but not those already used by other bots. Look at `find intelmq -type f -name '*.py' -exec grep -r 'redis_cache_db' {} \+` to see which databases are already used. - -The databases `<` 10 are reserved for the IntelMQ core: - * 2: pipeline - * 3: statistics - * 4: tests - -************* -Documentation -************* - -The documentation is automatically published to https://intelmq.readthedocs.io/ at every push to the repository. - -To build the documentation you need three packages: -- Sphinx -- ReCommonMark -- `sphinx-markdown-tables` - -To install them, you can use pip: - -.. code-block:: bash - - pip3 install -r docs/requirements.txt - -Then use the Makefile to build the documentation using Sphinx: - -.. code-block:: bash - - cd docs - make html - -.. _feeds documentation: - -Feeds documentation -=================== - -The feeds which are known to be working with IntelMQ are documented in the machine-readable file `intelmq/etc/feeds.yaml`. The human-readable documentation is in generated with the Sphinx build as described in the previous section. - -.. _testing: - -******************** -Testing Pre-releases -******************** - -Installation -============ - -The :doc:`installation procedures <../user/installation>` need to be adapted only a little bit. - -For native packages, you can find the unstable packages of the next version here: `Installation Unstable Native Packages `_. -The unstable only has a limited set of packages, so enabling the stable repository can be activated in parallel. For CentOS 8 unstable, the stable repository is required. - -For the installation with pip, use the `--pre` parameter as shown here following command: - -.. code-block:: bash - - pip3 install --pre intelmq - -All other steps are not different. Please report any issues you find in our `Issue Tracker `_. diff --git a/docs/dev/guidelines.md b/docs/dev/guidelines.md new file mode 100644 index 000000000..7723514e5 --- /dev/null +++ b/docs/dev/guidelines.md @@ -0,0 +1,151 @@ + + + +# Development Guidelines + +## Coding-Rules + +Most important: **KEEP IT SIMPLE**! This can not be over-estimated. Feature creep can destroy any good software +project. But if new folks can not understand what you wrote in 10-15 minutes, it is not good. It's not about the +performance, etc. It's about readability. + +In general, we follow [PEP8](https://pep8.org/). We recommend reading it before committing code. + +There are some exceptions: sometimes it does not make sense to check for every PEP8 error (such as whitespace +indentation when you want to make a dict=() assignment look pretty. Therefore, we do have some exceptions defined in the `setup.cfg` file. + +We support Python 3 only. + +#### Unicode + +- Each internal object in IntelMQ (Event, Report, etc) that has strings, their strings MUST be in UTF-8 Unicode format. +- Any data received from external sources MUST be transformed into UTF-8 Unicode format before add it to IntelMQ + objects. + +#### Back-end independence and Compatibility + +Any component of the IntelMQ MUST be independent of the message queue technology (Redis, RabbitMQ, etc...). + +#### License Header + +Please add a license and copyright header to your bots. There is a Github action that tests +for [reuse compliance](https://reuse.software/) of your code files. + +## IntelMQ Data Format Rules + +Any component of IntelMQ MUST respect the [IntelMQ Data Format](data-format.md). + +## Code Submission Rules + +#### Releases, Repositories and Branches + +- The main repository is in [github.com/certtools/intelmq](https://github.com/certtools/intelmq). +- We use [semantic versioning](http://semver.org/). +- If you contribute something, please fork the repository, create a separate branch and use this for pull requests, see section below. +- There are a couple of forks which might be regularly merged into the main repository. They are independent and can have incompatible changes and can deviate from the upstream repository. + +#### Branching model + +- "master" is the stable branch. It hold the latest stable release. Non-developers should only work on this branch. The recommended log level is WARNING. Code is only added by merges from the maintenance branches. +- "maintenance/a.b.x" branches accumulate (cherry-picked) patches for a maintenance release (a.b.x). Recommended for + experienced users which deploy intelmq themselves. No new features will be added to these branches. +- "develop" is the development branch for the next stable release + (a.x). New features must go there. Developers may want to work on this branch. This branch also holds all patches from + maintenance releases if applicable. The recommended log level is DEBUG. +- Separate branches to develop features or bug fixes may be used by any contributor. + +#### How to Contribute + +- Make separate pull requests / branches on GitHub for changes. This allows us to discuss things via GitHub. +- We prefer one Pull Request per feature or change. If you have a bunch of small fixes, please don't create one PR per fix :) +- Only very small and changes (docs, ...) might be committed directly to development branches without Pull Request by the [core-team](https://github.com/orgs/certtools/teams/core). +- Keep the balance between atomic commits and keeping the amount of commits per PR small. You can use interactive + rebasing to squash multiple small commits into one (`rebase -i [base-branch]`). Only do rebasing if the code you are rebasing is yet not used by others or is already merged - because then others may need to run into conflicts. +- Make sure your PR is merge able in the develop branch and all tests are successful. +- If possible [sign your commits with GPG](https://help.github.com/articles/signing-commits-using-gpg/). + +#### Workflow + +We assume here, that origin is your own fork. We first add the upstream repository: + +```bash + git remote add upstream https://github.com/certtools/intelmq.git +``` + +Syncing develop: + +```bash + git checkout develop + git pull upstream develop + git push origin develop +``` + +You can do the same with the branches `master` and `maintenance`. + +Create a separate feature-branch to work on, sync develop with upstream. Create working branch from develop: + +```bash + git checkout develop + git checkout -b bugfix +# your work + git commit +``` + +Or, for bugfixes create a separate bugfix-branch to work on, sync maintenance with upstream. Create working branch from +maintenance: + +```bash +git checkout maintenance +git checkout -b new-feature +# your work +git commit +``` + +Getting upstream's changes for master or any other branch: + +```bash +git checkout develop +git pull upstream develop +git push origin develop +``` + +There are 2 possibilities to get upstream's commits into your branch. Rebasing and Merging. Using rebasing, your history +is rewritten, putting your changes on top of all other commits. You can use this if your changes are not published yet (or only in your fork). + +```bash +git checkout bugfix +git rebase develop +``` + +Using the `-i` flag for rebase enables interactive rebasing. You can then remove, reorder and squash commits, rewrite commit messages, beginning with the given branch, e.g. develop. + +Or using merging. This doesn't break the history. It's considered more , but also pollutes the history with merge +commits. + +```bash +git checkout bugfix +git merge develop +``` + +You can then create a PR with your branch `bugfix` to our upstream repository, using GitHub's web interface. + +#### Commit Messages + +If it fixes an existing issue, please use GitHub syntax, e.g.: `fixes certtools/intelmq#` + +#### Prepare for Discussion in GitHub + +If we don't discuss it, it's probably not tested. + +## License and Author files + +License and Authors files can be found at the root of repository. + +- License file **MUST NOT** be modified except by the explicit written permission by CNCS/CERT.PT or CERT.at +- Credit to the authors file must be always retained. When a new contributor (person and/or organization) improves in + some way the repository content (code or documentation), he or she might add his name to the list of contributors. + +License and authors must be only listed in an external file but not inside the code files. \ No newline at end of file diff --git a/docs/dev/intro.md b/docs/dev/intro.md new file mode 100644 index 000000000..ebda1022c --- /dev/null +++ b/docs/dev/intro.md @@ -0,0 +1,42 @@ + + + +# Intro + +This guide is for developers of IntelMQ. It explains the code architecture, coding guidelines as well as ways you can contribute code or documentation. If you have not done so, please read the +User Guide and the Administrator Guide first. Once you feel comfortable running IntelMQ with open source bots and you feel adventurous enough to contribute to the project, this guide is for you. It does not matter if you are an experienced Python programmer or just a beginner. There is a lot of examples to help you out. + +However, before we go into the details, it is important to observe and internalize some overall project goals. + +## Goals + +It is important, that all developers agree and stick to these meta-guidelines. IntelMQ tries to: + +- Be well tested. For developers this means, we expect you to write unit tests for bots. Every time. +- Reduce the complexity of system administration. +- Reduce the complexity of writing new bots for new data feeds. +- Make your code easily and pleasantly readable. +- Reduce the probability of events lost in all process with persistence functionality (even system crash). +- Strictly adhere to the existing format for keys and values in events. +- Always use JSON format for all messages internally. +- Help and support the interconnection between IntelMQ and existing tools like AbuseHelper, CIF, etc. or new tools (in other words: we will not accept data-silos!). +- Provide an easy way to store data into log collectors such as ElasticSearch or Splunk. +- Provide an easy way to create your own black-lists. +- Provide easy to understand interfaces with other systems via HTTP RESTFUL API. + +The main take away point from the list above is: things **MUST** stay _intuitive_ and _easy_. How do you ultimately test if things are still easy? Let them new programmers test-drive your features and if it is not understandable in 15 minutes, go back to the drawing board. + +Similarly, if code does not get accepted upstream by the main developers, it is usually only because of the ease-of-use argument. Do not give up, go back to the drawing board, and re-submit again. + +## Mailing list + +There is a separate mailing list for developers to discuss development topics: +The [IntelMQ-DevArchive](https://lists.cert.at/pipermail/intelmq-dev/) is public as well. + +## GitHub + +The ideal way to propose changes and additions to IntelMQ is to open +a [Pull Request](https://github.com/certtools/intelmq/pulls) on GitHub. \ No newline at end of file diff --git a/docs/dev/library.md b/docs/dev/library.md new file mode 100644 index 000000000..a524c61d9 --- /dev/null +++ b/docs/dev/library.md @@ -0,0 +1,75 @@ + + +# Running IntelMQ as Library + +## Introduction + +The feature is specified in +[IEP007](https://github.com/certtools/ieps/tree/iep-007/007/). + +## Quickstart + +First, import the Python module and a helper. More about the +`BotLibSettings` later. + +```python +from intelmq.lib.bot import BotLibSettings +from intelmq.bots.experts.domain_suffix.expert import DomainSuffixExpertBot +``` + +Then we need to initialize the bot's instance. We pass two parameters: + +* `bot_id`: The id of the bot +* `settings`: A Python dictionary of runtime configuration parameters, see +`runtime-configuration`. The bot first +loads the runtime configuration file if it exists. Then we update them +with the `BotLibSettings` which are some accumulated settings disabling +the logging to files and configure the pipeline so that we can send and +receive messages directly to/from the bot. +Last by not least, the actual bot parameters, taking the highest priority. + +```python +domain_suffix = DomainSuffixExpertBot('domain-suffix', # bot id +settings=BotLibSettings | { +'field': 'fqdn', +'suffix_file': '/usr/share/publicsuffix/public_suffix_list.dat'} +``` + +As the bot is not fully initialized, we can process messages now. +Inserting a message as dictionary: + +```python +queues = domain_suffix.process_message({'source.fqdn': 'www.example.com'}) +``` + +The return value is a dictionary of queues, e.g. the output queue and +the error queue. More details below. + +The methods accepts multiple messages as positional argument: + +```python +domain_suffix.process_message( + {'source.fqdn': 'www.example.com'}, + {'source.fqdn': 'www.example.net'} +) +domain_suffix.process_message(*[ + {'source.fqdn': 'www.example.com'}, + {'source.fqdn': 'www.example.net'} +]) +``` + +Select the output queue (as defined in `destination_queues`), first message, access the field `source.domain_suffix`: +```python +>>> output['output'][0]['source.domain_suffix'] +'com' +``` + +## Configuration + +Configuration files are not required to run IntelMQ as library. Contrary +to IntelMQ normal behavior, if the files `runtime.yaml` and +`harmonization.conf` do not exist, IntelMQ won't raise any errors. For +the harmonization configuration, internal defaults are loaded. diff --git a/docs/dev/library.rst b/docs/dev/library.rst deleted file mode 100644 index 8553e79f8..000000000 --- a/docs/dev/library.rst +++ /dev/null @@ -1,74 +0,0 @@ -.. - SPDX-FileCopyrightText: 2023 Bundesamt für Sicherheit in der Informationstechnik (BSI) - SPDX-License-Identifier: AGPL-3.0-or-later - -########################## -Running IntelMQ as Library -########################## - -.. contents:: - -************ -Introduction -************ - -The feature is specified in `IEP007 `_. - -********** -Quickstart -********** - -First, import the Python module and a helper. More about the ``BotLibSettings`` later. - -.. code-block:: python - - from intelmq.lib.bot import BotLibSettings - from intelmq.bots.experts.domain_suffix.expert import DomainSuffixExpertBot - -Then we need to initialize the bot's instance. -We pass two parameters: -* ``bot_id``: The id of the bot -* ``settings``: A Python dictionary of runtime configuration parameters, see :ref:`runtime-configuration`. - The bot first loads the runtime configuration file if it exists. - Then we update them with the ``BotLibSettings`` which are some accumulated settings disabling the logging to files and configure the pipeline so that we can send and receive messages directly to/from the bot. - Last by not least, the actual bot parameters, taking the highest priority. - -.. code-block:: python - - domain_suffix = DomainSuffixExpertBot('domain-suffix', # bot id - settings=BotLibSettings | { - 'field': 'fqdn', - 'suffix_file': '/usr/share/publicsuffix/public_suffix_list.dat'} - -As the bot is not fully initialized, we can process messages now. -Inserting a message as dictionary: - -.. code-block:: python - - queues = domain_suffix.process_message({'source.fqdn': 'www.example.com'}) - -The return value is a dictionary of queues, e.g. the output queue and the error queue. -More details below. - -The methods accepts multiple messages as positional argument: - -.. code-block:: python - - domain_suffix.process_message({'source.fqdn': 'www.example.com'}, {'source.fqdn': 'www.example.net'}) - domain_suffix.process_message(*[{'source.fqdn': 'www.example.com'}, {'source.fqdn': 'www.example.net'}]) - - -Select the output queue (as defined in `destination_queues`), first message, access the field 'source.domain_suffix': - -.. code-block:: python - - >>> output['output'][0]['source.domain_suffix'] - 'com' - -************* -Configuration -************* - -Configuration files are not required to run IntelMQ as library. -Contrary to IntelMQ normal behavior, if the files ``runtime.yaml`` and ``harmonization.conf`` do not exist, IntelMQ won't raise any errors. -For the harmonization configuration, internal defaults are loaded. diff --git a/docs/dev/release-procedure.rst b/docs/dev/release-procedure.rst deleted file mode 100644 index 1227e8f62..000000000 --- a/docs/dev/release-procedure.rst +++ /dev/null @@ -1,218 +0,0 @@ -.. - SPDX-FileCopyrightText: 2017-2022 Sebastian Wagner - SPDX-License-Identifier: AGPL-3.0-or-later - -################# -Release procedure -################# - -.. contents:: - -General assumption: You are working on branch maintenance, the next version is a bug fix release. For feature releases it is slightly different. - -************ -Check before -************ - -* Make sure the current state is really final ;) - You can test most of the steps described here locally before doing it real. -* Check the upgrade functions in `intelmq/lib/upgrades.py`. -* Close the milestone on GitHub and move any open issues to the next one. -* `docs/user/installation.rst`: Update supported operating systems. - -************* -Documentation -************* - -These apply to all projects: - - * CHANGELOG.MD and - * NEWS.MD: Update the latest header, fix the order, remove empty sections and (re)group the entries if necessary. - * ``debian/changelog``: Insert a new section for the new version with the tool ``dch`` or update the version of the existing last item if yet unreleased. Don't forget the revision after the version number! - -IntelMQ -^^^^^^^ - - * ``intelmq/version.py``: Update the version. - -Eventually adapt the default log levels if necessary. Should be INFO for stable releases. - -IntelMQ API -^^^^^^^^^^^ - - * ``intelmq_api/version.py``: Update the version. - -IntelMQ Manager -^^^^^^^^^^^^^^^ - - * ``intelmq_manager/version.py``: Update the version. - * ``intelmq_manager/static/js/about.js``: Update the version. - -****************************** -Commit, push, review and merge -****************************** - -Commit your changes in a separate branch, the final commit message should start with :code:`REL:`. Push and create a pull request to maintenance and after that from maintenance to master. Someone else should review the changes. Eventually fix them, make sure the :code:`REL:` is the last commit, you can also push that one at last, after the reviews. - -Why a separate branch? Because if problems show up, you can still force-push to that one, keeping the release commit the latest one. - -*************** -Tag and release -*************** - -Tag the commit with ``git tag -s version HEAD``, merge it into master, push the branches *and* the tag. The tag is just ``a.b.c``, not prefixed with ``v`` (that was necessary only with SVN a long time ago...). - -Go to https://github.com/certtools/intelmq/tags and enter the release notes (from the CHANGELOG) for the new tag, then it's considered a *release* by GitHub. - -***************** -Tarballs and PyPI -***************** - -* Build the source and binary (wheel) distribution: - -.. code-block:: bash - - rm -r build/ - python3 setup.py sdist bdist_wheel - - -* Upload the files including signatures to PyPI with e.g. twine: `twine upload -u __token__ -p $APITOKEN dist/intelmq...` (or set the API Token in `.pypirc`). - -************* -Documentation -************* - -Got to `the version settings on readthedocs `_ and activate build for the new version. - -******** -Packages -******** - -We are currently using the public Open Build Service instance of openSUSE: http://build.opensuse.org/project/show/home:sebix:intelmq - -First, test all the steps first with the `unstable-repository `_ and check that at least installations succeed. - -* Create the tarballs with the script `create-archives.sh`. -* Update the dsc and spec files for new filenames and versions. -* Update the .changes file -* Build locally for all distributions. -* Commit. - -************ -Docker Image -************ - -Releasing a new Docker image is very easy. - -* Clone `IntelMQ Docker Repository `_ with ``git clone https://github.com/certat/intelmq-docker.git --recursive`` as this repository contains submodules -* If the ``intelmq-docker`` repository is not updated yet, use `git pull --recurse-submodules` to pull the latest changes from their respective repository. -* Run ``./build.sh``, check your console if the build was successful. -* Run ``./test.sh`` - It will run nosetests3 with the exotic flag. All errors/warnings will be displayed. -* Change the ``build_version`` in ``publish.sh`` to the new version you want to release. -* Change the ``namespace`` variable in `publish.sh`. -* If no error/warning was shown, you can release with ``./publish.sh``. -* Update the `DockerHub ReadMe `_ and add the latest version. -* Commit and push the updates to the ``intelmq-docker`` repository`` - -************* -Announcements -************* - -Announce the new version at the mailinglists intelmq-users, intelmq-dev. -For bigger releases, probably also at IHAP, Twitter, etc. Ask your favorite social media consultant. - -******************* -Prepare new version -******************* - -Increase the version in `intelmq/version.py` and declare it as alpha version. -Add the new version in `intelmq/lib/upgrades.py`. -Add a new entry in `debian/changelog` with `dch -v [version] -c debian/changelog`. - -Add new entries to `CHANGELOG.md` and `NEWS.md`. - -IntelMQ -^^^^^^^ - -For ``CHANGELOG.md``: - -.. code-block:: markdown - - ### Configuration - - ### Core - - ### Development - - ### Data Format - - ### Bots - #### Collectors - - #### Parsers - - #### Experts - - #### Outputs - - ### Documentation - - ### Packaging - - ### Tests - - ### Tools - - ### Contrib - - ### Known issues - -And for ``NEWS.md``: - -.. code-block:: markdown - - ### Requirements - - ### Tools - - ### Data Format - - ### Configuration - - ### Libraries - - ### Postgres databases - -IntelMQ API -^^^^^^^^^^^ - -An empty section of ``CHANGELOG.rst``. - -IntelMQ Manager -^^^^^^^^^^^^^^^ - -For ``CHANGELOG.md``: - -.. code-block:: markdown - - ### Pages - - #### Landing page - - #### Configuration - - #### Management - - #### Monitor - - #### Check - - ### Documentation - - ### Third-party libraries - - ### Packaging - - ### Known issues - -And an empty section in the ``NEWS.md`` file. diff --git a/docs/dev/release.md b/docs/dev/release.md new file mode 100644 index 000000000..25e0dd753 --- /dev/null +++ b/docs/dev/release.md @@ -0,0 +1,204 @@ + + + +# Release procedure + +General assumption: You are working on branch maintenance, the next +version is a bug fix release. For feature releases it is slightly +different. + +## Check before + +- Make sure the current state is really final ;) You can test most of + the steps described here locally before doing it real. +- Check the upgrade functions in `intelmq/lib/upgrades.py`. +- Close the milestone on GitHub and move any open issues to the next + one. +- `docs/user/installation.rst`: Update supported operating systems. + +## Documentation + +These apply to all projects: + +- CHANGELOG.MD and NEWS.MD: Update the latest header, fix the order, remove empty sections and (re)group the entries if necessary. +- debian/changelog: Insert a new section for the new version with the tool `dch` or update the version of the existing last item if yet unreleased. Don't forget the revision after the version number! + +### IntelMQ + +- `intelmq/version.py`: Update the version. + +Eventually adapt the default log levels if necessary. Should be INFO for stable releases. + +### IntelMQ API + +- `intelmq_api/version.py`: Update the version. + +### IntelMQ Manager + +- `intelmq_manager/version.py`: Update the version. +- `intelmq_manager/static/images/js/about.js`: Update the version. + +## Commit, push, review and merge + +Commit your changes in a separate branch, the final commit message +should start with `REL:`. Push and create a pull request to maintenance +and after that from maintenance to master. Someone else should review +the changes. Eventually fix them, make sure the `REL:` is the last +commit, you can also push that one at last, after the reviews. + +Why a separate branch? Because if problems show up, you can still +force-push to that one, keeping the release commit the latest one. + +## Tag and release + +Tag the commit with `git tag -s version HEAD`, merge it into master, +push the branches *and* the tag. The tag is just `a.b.c`, not prefixed +with `v` (that was necessary only with SVN a long time ago...). + +Go to and enter the release +notes (from the CHANGELOG) for the new tag, then it's considered a +*release* by GitHub. + +## Tarballs and PyPI + +- Build the source and binary (wheel) distribution: + +```bash +rm -r build/ +python3 setup.py sdist bdist_wheel +``` + +* Upload the files including signatures to PyPI with e.g. twine: `twine upload -u __token__ -p $APITOKEN dist/intelmq...` (or set the API Token in `.pypirc`). + + +## Documentation + +Got to `the version settings on readthedocs `_ and activate build for the new version. + +## Packages + +We are currently using the public Open Build Service instance of +openSUSE: + +First, test all the steps first with the [unstable-repository](http://build.opensuse.org/project/show/home:sebix:intelmq:unstable) and check that at least installations succeed. + +- Create the tarballs with the script `create-archives.sh`. +- Update the dsc and spec files for new filenames and versions. +- Update the .changes file +- Build locally for all distributions. +- Commit. + +## Docker Image + +Releasing a new Docker image is very easy. + +- Clone [IntelMQ Docker Repository](https://github.com/certat/intelmq-docker) with `git clone https://github.com/certat/intelmq-docker.git --recursive` as this repository contains submodules +- If the `intelmq-docker` repository is not updated yet, use `git pull --recurse-submodules` to pull the latest changes from their respective repository. +- Run `./build.sh`, check your console if the build was successful. +- Run `./test.sh` - It will run nosetests3 with the exotic flag. All + errors/warnings will be displayed. +- Change the `build_version` in `publish.sh` to the new version you + want to release. +- Change the `namespace` variable in `publish.sh`. +- If no error/warning was shown, you can release with `./publish.sh`. +- Update the [DockerHub ReadMe](https://hub.docker.com/repository/docker/certat/intelmq-full) and add the latest version. +- Commit and push the updates to the `intelmq-docker` repository + +## Announcements + +Announce the new version at the mailinglists intelmq-users, intelmq-dev. +For bigger releases, probably also at IHAP, Twitter, etc. Ask your +favorite social media consultant. + +## Prepare new version + +Increase the version in `intelmq/version.py` and declare it as alpha version. Add the new version in +`intelmq/lib/upgrades.py`. Add a new entry in `debian/changelog` with `dch -v [version] -c debian/changelog`. + +Add new entries to `CHANGELOG.md` and `NEWS.md`. + +### IntelMQ + +For `CHANGELOG.md`: + +```markdown +### Configuration + +### Core + +### Development + +### Data Format + +### Bots +#### Collectors + +#### Parsers + +#### Experts + +#### Outputs + +### Documentation + +### Packaging + +### Tests + +### Tools + +### Contrib + +### Known issues +``` + +And for `NEWS.md`: + +```markdown +### Requirements + +### Tools + +### Data Format + +### Configuration + +### Libraries + +### Postgres databases +``` + +### IntelMQ API + +An empty section of `CHANGELOG.rst`. + +### IntelMQ Manager + +For `CHANGELOG.md`: + +```markdown +### Pages + +#### Landing page + +#### Configuration + +#### Management + +#### Monitor + +#### Check + +### Documentation + +### Third-party libraries + +### Packaging + +### Known issues +``` + +And an empty section in the `NEWS.md` file. diff --git a/docs/dev/structure.md b/docs/dev/structure.md new file mode 100644 index 000000000..e7885dc8a --- /dev/null +++ b/docs/dev/structure.md @@ -0,0 +1,58 @@ + + + +# System Overview + +In the `intelmq/lib/` directory you can find some libraries: + +- Bots: Defines base structure for bots and handling of startup, stop, + messages etc. +- Cache: For some expert bots it does make sense to cache external + lookup results. Redis is used here. +- Harmonization: For defined types, checks and sanitation methods are + implemented. +- Message: Defines Events and Reports classes, uses harmonization to + check validity of keys and values according to config. +- Pipeline: Writes messages to message queues. Implemented for + productions use is only Redis, AMQP is beta. +- Test: Base class for bot tests with predefined test and assert + methods. +- Utils: Utility functions used by system components. + +### Code Architecture + +![Code Architecture](../static/images/intelmq-arch-schema.png) + +### Directories + +For development purposes, you need two directories: one for a local +repository copy, and the second as a root dictionary for the IntelMQ +installation. + +The default IntelMQ root directory is `/opt/intelmq`. This +directory is used for configurations (`/opt/intelmq/etc`), +local states (`/opt/intelmq/var/lib`) and logs +(`/opt/intelmq/var/log`). If you want to change it, please +set the `INTELMQ_ROOT_DIR` environment variable with a +desired location. + +For repository directory, you can use any path that is accessible by +users you use to run IntelMQ. For globally installed IntelMQ, the +directory has to be readable by other unprivileged users (e.g. home +directories on Fedora can't be read by other users by default). + +To keep commands in the guide universal, we will use environmental +variables for repository and installation paths. You can set them with +following commands: + +```bash +# Adjust paths if you want to use non-standard directories +export INTELMQ_REPO=/opt/dev_intelmq +export INTELMQ_ROOT_DIR=/opt/intelmq +``` + +!!! note + If using non-default installation directory, remember to keep the root directory variable set for every run of IntelMQ commands. If you don't, then the default location `/opt/intelmq` will be used. \ No newline at end of file diff --git a/docs/dev/testing.md b/docs/dev/testing.md new file mode 100644 index 000000000..6e42b64fd --- /dev/null +++ b/docs/dev/testing.md @@ -0,0 +1,64 @@ + + + +# Testing + +## Additional test requirements + +Libraries required for tests are listed in the `setup.py` file. You can install them with pip: + +```bash +pip3 install -e .[development] +``` + +or the package management of your operating system. + +## Run the tests + +All changes have to be tested and new contributions should be accompanied by according unit tests. Please do not run the +tests as root just like any other IntelMQ component for security reasons. Any other unprivileged user is possible. + +You can run the tests by changing to the directory with IntelMQ repository and running either `unittest` or +`pytest`. For virtual environment installation, please activate it and omit the `sudo -u` from examples below: + +```bash +cd $INTELMQ_REPO +sudo -u intelmq python3 -m unittest {discover|filename} # or +sudo -u intelmq pytest [filename] +sudo -u intelmq python3 setup.py test # uses a build environment (no external dependencies) +``` + +Some bots need local databases to succeed. If you only want to test one explicit test file, give the file path as +argument. + +There are multiple [GitHub Action Workflows](https://github.com/certtools/intelmq/actions) setup for automatic testing, +which are triggered on pull requests. You can also easily activate them for your forks. + +## Environment variables + +There are a bunch of environment variables which switch on/off some tests: + +- `INTELMQ_TEST_DATABASES`: databases such as postgres, elasticsearch, mongodb are not tested by default. Set this + environment variable to 1 to test those bots. These tests need preparation, e.g. running databases with users and + certain passwords etc. Have a look at the `.github/workflows/unittests.yml` and the corresponding `.github/workflows/scripts/setup-full.sh` in IntelMQ's repository for steps to set databases up. +- `INTELMQ_SKIP_INTERNET`: tests requiring internet connection will be skipped if this is set to 1. +- `INTELMQ_SKIP_REDIS`: redis-related tests are ran by default, set this to 1 to skip those. +- `INTELMQ_TEST_EXOTIC`: some bots and tests require libraries which may not be available, those are skipped by + default. To run them, set this to 1. +- `INTELMQ_TEST_REDIS_PASSWORD`: Set this value to the password for the local redis database if needed. +- `INTELMQ_LOOKYLOO_TEST`: Set this value to run the lookyloo tests. Public lookyloo instance will be used as + default. +- `INTELMQ_TEST_INSTALLATION`: Set this value to run tests which require a local IntelMQ installation, such as for testing the command lines tools relying on configuration files, dump files etc. + +For example, to run all tests you can use: + +```bash +INTELMQ_TEST_DATABASES=1 INTELMQ_TEST_EXOTIC=1 pytest intelmq/tests/ +``` + +## Configuration test files + +The tests use the configuration files in your working directory, not those installed in `/opt/intelmq/etc/` or `/etc/`. You can run the tests for a locally changed intelmq without affecting an installation or requiring root to run them. \ No newline at end of file diff --git a/docs/help.md b/docs/help.md new file mode 100644 index 000000000..ca18f2c1c --- /dev/null +++ b/docs/help.md @@ -0,0 +1,39 @@ + + +# Getting help + +In case you are lost, you need assistance or something is not discussed in this guide, you can ask the community for help. To be most efficient in seeking help, please describe your problem or question with all necessary information, for example: + +- Name and version of the operating system +- Way of installation (deb/rpm packages, PyPI, docker, local git repository) +- Used bots and configuration +- Logs of bots or terminal output +- Any other useful messages, screenshots + +Please report any errors and suggest improvements via [issues](https://github.com/certtools/intelmq/issues). Thank you! + +## GitHub + +GitHub offers a [discussion platform](https://github.com/certtools/intelmq/discussions) where you can ask questions and seek assistance. + +To report bugs, [GitHub issues](https://github.com/certtools/intelmq/issues) are the ideal place to do so. Every IntelMQ component has it's own repository on GitHub, with a separate Issue tracker. + +To participate on GitHub, you first need to create an account on the platform. + +## Mailing list + +The most traditional way is to ask your question, make a proposal or discuss a topic on the +mailing [IntelMQ Users mailing list](https://lists.cert.t/cgi-bin/mailman/listinfo/intelmq-users). You need to subscribe to the mailing list before posting, but the archive is publicly available: [IntelMQ Users Archive](https://lists.cert.at/pipermail/intelmq-users/). + +## Assistance + +If your organisation is a member of the [CSIRTs Network](https://csirtsnetwork.eu/), you are eligible for support in the [MeliCERTes project](https://melicertes.github.io/docs/). You can also ask on for individual support, some members offer support, including, but not limited to: + +- [Aaron Kaplan](https://github.com/aaronkaplan/) (founder of IntelMQ) +- [Institute for Common Good Technology](https://commongoodtechnology.org/) (chairmen Sebastian Wager is an IntelMQ maintainer and developer) +- [Intevation GmbH](https://intevation.de/) (Develops and maintains several IntelMQ components) + + diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 000000000..ca9b21abc --- /dev/null +++ b/docs/index.md @@ -0,0 +1,59 @@ + + + + +# Introduction + +**IntelMQ** is a solution for IT security teams (CERTs & CSIRTs, SOCs +abuse departments, etc.) for collecting and processing security feeds +(such as log files) using a message queuing protocol. It's a community +driven initiative called **IHAP**[^1] (Incident Handling Automation Project) +which was conceptually designed by European CERTs/CSIRTs during several +InfoSec events. Its main goal is to give to incident responders an easy +way to collect & process threat intelligence thus improving the incident +handling processes of CERTs. + +IntelMQ is frequently used for: + +- automated incident handling +- situational awareness +- automated notifications +- as data collector for other tools +- and more! + +The design was influenced by +[AbuseHelper](https://github.com/abusesa/abusehelper) however it was +re-written from scratch and aims at: + +- Reducing the complexity of system administration +- Reducing the complexity of writing new bots for new data feeds +- Reducing the probability of events lost in all process with persistence functionality (even system crash) +- Use and improve the existing Data Harmonization Ontology +- Use JSON format for all messages +- Provide easy way to store data into databases and log collectors such as PostgreSQL, Elasticsearch and Splunk +- Provide easy way to create your own black-lists +- Provide easy communication with other systems via HTTP RESTful API + +It follows the following basic meta-guidelines: + +- Don't break simplicity - KISS +- Keep it open source - forever +- Strive for perfection while keeping a deadline +- Reduce complexity/avoid feature bloat +- Embrace unit testing +- Code readability: test with inexperienced programmers +- Communicate clearly + +## Contribute + +- Subscribe to the [IntelMQ Developers mailing list](https://lists.cert.at/cgi-bin/mailman/listinfo/intelmq-dev) and engage in discussions +- Report any errors and suggest improvements via [issues](https://github.com/certtools/intelmq/issues) +- Read the Developer Guide and open a [pull request](https://github.com/certtools/intelmq/pulls) + +[^1]: [Incident Handling Automation Project](https://www.enisa.europa.eu/activities/cert/support/incident-handling-automation), mailing list: ihap@lists.trusted-introducer.org \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst deleted file mode 100644 index 1fbf7aafe..000000000 --- a/docs/index.rst +++ /dev/null @@ -1,102 +0,0 @@ -.. - SPDX-FileCopyrightText: 2020-2021 Birger Schacht - SPDX-License-Identifier: AGPL-3.0-or-later - -IntelMQ -------- - - -.. figure:: _static/Logo_Intel_MQ.png - :alt: IntelMQ - -|Build Status| |CII Badge| - -.. |Build Status| image:: https://github.com/certtools/intelmq/workflows/Nosetest%20test%20suite/badge.svg - :target: https://github.com/certtools/intelmq/actions -.. |CII Badge| image:: https://bestpractices.coreinfrastructure.org/projects/4186/badge - :target: https://bestpractices.coreinfrastructure.org/projects/4186/ - -**IntelMQ** is a solution for IT security teams (CERTs & CSIRTs, SOCs abuse -departments, etc.) for collecting and processing security feeds (such as -log files) using a message queuing protocol. It's a community driven -initiative called **IHAP** (Incident Handling Automation Project) which -was conceptually designed by European CERTs/CSIRTs during several -InfoSec events. Its main goal is to give to incident responders an easy -way to collect & process threat intelligence thus improving the incident -handling processes of CERTs. - -General information -=================== - -.. toctree:: - :maxdepth: 1 - - user/introduction - user/organization - user/support - -User guide -========== - -.. toctree:: - :maxdepth: 1 - - user/hardware-requirements - user/installation - user/upgrade - user/configuration-management - user/bots - user/intelmqctl - user/feeds - user/intelmq-api - user/intelmq-manager - user/FAQ - -Connecting with other systems -============================= - -.. toctree:: - :maxdepth: 1 - - user/universe - user/ELK-Stack - user/MISP-Integrations - user/n6-integrations - user/CIFv3-Integrations - user/eventdb - user/abuse-contacts - - -Getting involved -================ - -.. toctree:: - :maxdepth: 1 - - dev/guide - dev/library - dev/data-format - dev/harmonization-fields - dev/release-procedure - dev/feeds-wishlist - Code documentation - -Licence -======= - -This software is licensed under GNU Affero General Public License version 3 - -Funded by -========= - -This project was partially funded by the CEF framework - -.. figure:: https://ec.europa.eu/inea/sites/default/files/ceflogos/en_horizontal_cef_logo_2.png - :alt: Co-financed by the Connecting Europe Facility of the European Union - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/docs/dev/IntelMQ-3.0-Architecture.md b/docs/nonlinked/IntelMQ-3.0-Architecture.md similarity index 100% rename from docs/dev/IntelMQ-3.0-Architecture.md rename to docs/nonlinked/IntelMQ-3.0-Architecture.md diff --git a/docs/nonlinked/botnet-concept.md b/docs/nonlinked/botnet-concept.md new file mode 100644 index 000000000..57eef1569 --- /dev/null +++ b/docs/nonlinked/botnet-concept.md @@ -0,0 +1,19 @@ + + + +#### Botnet Concept + +The \"botnet\" represents all currently configured bots which are explicitly enabled. It is, in essence, the graph of +the bots which are connected together via their input source queues and destination queues. + +To get an overview which bots are running, use `intelmqctl status` or use the IntelMQ Manager. Set `"enabled": true` in +the runtime configuration to add a bot to the botnet. By default, bots will be configured as `"enabled": true`. +See `bots`{.interpreted-text role="doc"} for more details on configuration. + +Disabled bots can still be started explicitly using +`intelmqctl start `, but will remain in the state `disabled` if stopped (and not be implicitly enabled by +the `start` command). They are not started by `intelmqctl start` in analogy to the behavior of widely used +initialization systems. diff --git a/docs/nonlinked/fields-table.md b/docs/nonlinked/fields-table.md new file mode 100644 index 000000000..c2ac79aa3 --- /dev/null +++ b/docs/nonlinked/fields-table.md @@ -0,0 +1,90 @@ + + + +| Name | Type | Description | +|----------------------------------|-----------------------------|-------------| +| classification.identifier | [String](#string) | The lowercase identifier defines the actual software or service (e.g. ``heartbleed`` or ``ntp_version``) or standardized malware name (e.g. ``zeus``). Note that you MAY overwrite this field during processing for your individual setup. This field is not standardized across IntelMQ setups/users. | +| classification.taxonomy | [ClassificationTaxonomy](#classificationtaxonomy) | We recognize the need for the CSIRT teams to apply a static (incident) taxonomy to abuse data. With this goal in mind the type IOC will serve as a basis for this activity. Each value of the dynamic type mapping translates to a an element in the static taxonomy. The European CSIRT teams for example have decided to apply the eCSIRT.net incident classification. The value of the taxonomy key is thus a derivative of the dynamic type above. For more information about check `ENISA taxonomies `_. | +| classification.type | [ClassificationType](#classificationtype) | The abuse type IOC is one of the most crucial pieces of information for any given abuse event. The main idea of dynamic typing is to keep our ontology flexible, since we need to evolve with the evolving threatscape of abuse data. In contrast with the static taxonomy below, the dynamic typing is used to perform business decisions in the abuse handling pipeline. Furthermore, the value data set should be kept as minimal as possible to avoid *type explosion*, which in turn dilutes the business value of the dynamic typing. In general, we normally have two types of abuse type IOC: ones referring to a compromised resource or ones referring to pieces of the criminal infrastructure, such as a command and control servers for example. | +| comment | [String](#string) | Free text commentary about the abuse event inserted by an analyst. | +| destination.abuse_contact | [LowercaseString](#lowercasestring) | Abuse contact for destination address. A comma separated list. | +| destination.account | [String](#string) | An account name or email address, which has been identified to relate to the destination of an abuse event. | +| destination.allocated | [DateTime](#datetime) | Allocation date corresponding to BGP prefix. | +| destination.as_name | [String](#string) | The autonomous system name to which the connection headed. | +| destination.asn | [ASN](#asn) | The autonomous system number to which the connection headed. | +| destination.domain_suffix | [FQDN](#fqdn) | The suffix of the domain from the public suffix list. | +| destination.fqdn | [FQDN](#fqdn) | A DNS name related to the host from which the connection originated. DNS allows even binary data in DNS, so we have to allow everything. A final point is stripped, string is converted to lower case characters. | +| destination.geolocation.cc | [UppercaseString](#uppercasestring) | Country-Code according to ISO3166-1 alpha-2 for the destination IP. | +| destination.geolocation.city | [String](#string) | Some geolocation services refer to city-level geolocation. | +| destination.geolocation.country | [String](#string) | The country name derived from the ISO3166 country code (assigned to cc field). | +| destination.geolocation.latitude | [Float](#float) | Latitude coordinates derived from a geolocation service, such as MaxMind geoip db. | +| destination.geolocation.longitude | [Float](#float) | Longitude coordinates derived from a geolocation service, such as MaxMind geoip db. | +| destination.geolocation.region | [String](#string) | Some geolocation services refer to region-level geolocation. | +| destination.geolocation.state | [String](#string) | Some geolocation services refer to state-level geolocation. | +| destination.ip | [IPAddress](#ipaddress) | The IP which is the target of the observed connections. | +| destination.local_hostname | [String](#string) | Some sources report an internal hostname within a NAT related to the name configured for a compromised system | +| destination.local_ip | [IPAddress](#ipaddress) | Some sources report an internal (NATed) IP address related a compromised system. N.B. RFC1918 IPs are OK here. | +| destination.network | [IPNetwork](#ipnetwork) | CIDR for an autonomous system. Also known as BGP prefix. If multiple values are possible, select the most specific. | +| destination.port | [Integer](#integer) | The port to which the connection headed. | +| destination.registry | [Registry](#registry) | The IP registry a given ip address is allocated by. | +| destination.reverse_dns | [FQDN](#fqdn) | Reverse DNS name acquired through a reverse DNS query on an IP address. N.B. Record types other than PTR records may also appear in the reverse DNS tree. Furthermore, unfortunately, there is no rule prohibiting people from writing anything in a PTR record. Even JavaScript will work. A final point is stripped, string is converted to lower case characters. | +| destination.tor_node | [Boolean](#boolean) | If the destination IP was a known tor node. | +| destination.url | [URL](#url) | A URL denotes on IOC, which refers to a malicious resource, whose interpretation is defined by the abuse type. A URL with the abuse type phishing refers to a phishing resource. | +| destination.urlpath | [String](#string) | The path portion of an HTTP or related network request. | +| event_description.target | [String](#string) | Some sources denominate the target (organization) of a an attack. | +| event_description.text | [String](#string) | A free-form textual description of an abuse event. | +| event_description.url | [URL](#url) | A description URL is a link to a further description of the the abuse event in question. | +| event_hash | [UppercaseString](#uppercasestring) | Computed event hash with specific keys and values that identify a unique event. At present, the hash should default to using the SHA1 function. Please note that for an event hash to be able to match more than one event (deduplication) the receiver of an event should calculate it based on a minimal set of keys and values present in the event. Using for example the observation time in the calculation will most likely render the checksum useless for deduplication purposes. | +| extra | [JSONDict](#jsondict) | All anecdotal information, which cannot be parsed into the data harmonization elements. E.g. os.name, os.version, etc. **Note**: this is only intended for mapping any fields which can not map naturally into the data harmonization. It is not intended for extending the data harmonization with your own fields. | +| feed.accuracy | [Accuracy](#accuracy) | A float between 0 and 100 that represents how accurate the data in the feed is | +| feed.code | [String](#string) | Code name for the feed, e.g. DFGS, HSDAG etc. | +| feed.documentation | [String](#string) | A URL or hint where to find the documentation of this feed. | +| feed.name | [String](#string) | Name for the feed, usually found in collector bot configuration. | +| feed.provider | [String](#string) | Name for the provider of the feed, usually found in collector bot configuration. | +| feed.url | [URL](#url) | The URL of a given abuse feed, where applicable | +| malware.hash.md5 | [String](#string) | A string depicting an MD5 checksum for a file, be it a malware sample for example. | +| malware.hash.sha1 | [String](#string) | A string depicting a SHA1 checksum for a file, be it a malware sample for example. | +| malware.hash.sha256 | [String](#string) | A string depicting a SHA256 checksum for a file, be it a malware sample for example. | +| malware.name | [LowercaseString](#lowercasestring) | The malware name in lower case. | +| malware.version | [String](#string) | A version string for an identified artifact generation, e.g. a crime-ware kit. | +| misp.attribute_uuid | [LowercaseString](#lowercasestring) | MISP - Malware Information Sharing Platform & Threat Sharing UUID of an attribute. | +| misp.event_uuid | [LowercaseString](#lowercasestring) | MISP - Malware Information Sharing Platform & Threat Sharing UUID. | +| output | [JSON](#json) | Event data converted into foreign format, intended to be exported by output plugin. | +| protocol.application | [LowercaseString](#lowercasestring) | e.g. vnc, ssh, sip, irc, http or smtp. | +| protocol.transport | [LowercaseString](#lowercasestring) | e.g. tcp, udp, icmp. | +| raw | [Base64](#base64) | The original line of the event from encoded in base64. | +| rtir_id | [Integer](#integer) | Request Tracker Incident Response ticket id. | +| screenshot_url | [URL](#url) | Some source may report URLs related to a an image generated of a resource without any metadata. Or an URL pointing to resource, which has been rendered into a webshot, e.g. a PNG image and the relevant metadata related to its retrieval/generation. | +| source.abuse_contact | [LowercaseString](#lowercasestring) | Abuse contact for source address. A comma separated list. | +| source.account | [String](#string) | An account name or email address, which has been identified to relate to the source of an abuse event. | +| source.allocated | [DateTime](#datetime) | Allocation date corresponding to BGP prefix. | +| source.as_name | [String](#string) | The autonomous system name from which the connection originated. | +| source.asn | [ASN](#asn) | The autonomous system number from which originated the connection. | +| source.domain_suffix | [FQDN](#fqdn) | The suffix of the domain from the public suffix list. | +| source.fqdn | [FQDN](#fqdn) | A DNS name related to the host from which the connection originated. DNS allows even binary data in DNS, so we have to allow everything. A final point is stripped, string is converted to lower case characters. | +| source.geolocation.cc | [UppercaseString](#uppercasestring) | Country-Code according to ISO3166-1 alpha-2 for the source IP. | +| source.geolocation.city | [String](#string) | Some geolocation services refer to city-level geolocation. | +| source.geolocation.country | [String](#string) | The country name derived from the ISO3166 country code (assigned to cc field). | +| source.geolocation.cymru_cc | [UppercaseString](#uppercasestring) | The country code denoted for the ip by the Team Cymru asn to ip mapping service. | +| source.geolocation.geoip_cc | [UppercaseString](#uppercasestring) | MaxMind Country Code (ISO3166-1 alpha-2). | +| source.geolocation.latitude | [Float](#float) | Latitude coordinates derived from a geolocation service, such as MaxMind geoip db. | +| source.geolocation.longitude | [Float](#float) | Longitude coordinates derived from a geolocation service, such as MaxMind geoip db. | +| source.geolocation.region | [String](#string) | Some geolocation services refer to region-level geolocation. | +| source.geolocation.state | [String](#string) | Some geolocation services refer to state-level geolocation. | +| source.ip | [IPAddress](#ipaddress) | The ip observed to initiate the connection | +| source.local_hostname | [String](#string) | Some sources report a internal hostname within a NAT related to the name configured for a compromised system | +| source.local_ip | [IPAddress](#ipaddress) | Some sources report a internal (NATed) IP address related a compromised system. N.B. RFC1918 IPs are OK here. | +| source.network | [IPNetwork](#ipnetwork) | CIDR for an autonomous system. Also known as BGP prefix. If multiple values are possible, select the most specific. | +| source.port | [Integer](#integer) | The port from which the connection originated. | +| source.registry | [Registry](#registry) | The IP registry a given ip address is allocated by. | +| source.reverse_dns | [FQDN](#fqdn) | Reverse DNS name acquired through a reverse DNS query on an IP address. N.B. Record types other than PTR records may also appear in the reverse DNS tree. Furthermore, unfortunately, there is no rule prohibiting people from writing anything in a PTR record. Even JavaScript will work. A final point is stripped, string is converted to lower case characters. | +| source.tor_node | [Boolean](#boolean) | If the source IP was a known tor node. | +| source.url | [URL](#url) | A URL denotes an IOC, which refers to a malicious resource, whose interpretation is defined by the abuse type. A URL with the abuse type phishing refers to a phishing resource. | +| source.urlpath | [String](#string) | The path portion of an HTTP or related network request. | +| status | [String](#string) | Status of the malicious resource (phishing, dropzone, etc), e.g. online, offline. | +| time.observation | [DateTime](#datetime) | The time the collector of the local instance processed (observed) the event. | +| time.source | [DateTime](#datetime) | The time of occurrence of the event as reported the feed (source). | +| tlp | [TLP](#tlp) | Traffic Light Protocol level of the event. | \ No newline at end of file diff --git a/docs/nonlinked/intelmqctl-more.md b/docs/nonlinked/intelmqctl-more.md new file mode 100644 index 000000000..908e43968 --- /dev/null +++ b/docs/nonlinked/intelmqctl-more.md @@ -0,0 +1,84 @@ + + + +## Command-line interface: intelmqctl + +**Syntax** see `intelmqctl -h` + +- Starting a bot: `intelmqctl start bot-id` +- Stopping a bot: `intelmqctl stop bot-id` +- Reloading a bot: `intelmqctl reload bot-id` +- Restarting a bot: `intelmqctl restart bot-id` +- Get status of a bot: `intelmqctl status bot-id` +- Run a bot directly for debugging purpose and temporarily leverage the logging level to DEBUG: `intelmqctl run bot-id` +- Get a pdb (or ipdb if installed) live console. + `intelmqctl run bot-id console` +- See the message that waits in the input queue. + `intelmqctl run bot-id message get` +- See additional help for further explanation. + `intelmqctl run bot-id --help` +- Starting the botnet (all bots): `intelmqctl start` +- Starting a group of bots: `intelmqctl start --group experts` +- Get a list of all configured bots: `intelmqctl list bots` +- Get a list of all queues: `intelmqctl list queues` If -q is given, only queues with more than one item are listed. +- Get a list of all queues and status of the bots: + `intelmqctl list queues-and-status` +- Clear a queue: `intelmqctl clear queue-id` +- Get logs of a bot: `intelmqctl log bot-id number-of-lines log-level` + Reads the last lines from bot log. Log level should be one of DEBUG, INFO, ERROR or CRITICAL. Default is INFO. Number + of lines defaults to 10, -1 gives all. Result can be longer due to our logging format! +- Upgrade from a previous version: `intelmqctl upgrade-config` Make a backup of your configuration first, also including + bot's configuration files. + + +#### Reloading + +Whilst restart is a mere stop & start, performing +`intelmqctl reload ` will not stop the bot, permitting it to keep the state: the same common behavior as for ( +Linux) daemons. It will initialize again (including reading all configuration again) after the current action is +finished. Also, the rate limit/sleep is continued +(with the *new* time) and not interrupted like with the restart command. So if you have a collector with a rate limit of +24 h, the reload does not trigger a new fetching of the source at the time of the reload, but just 24 h after the last +run -- with the new configuration. Which state the bots are keeping depends on the bots of course. + +#### Forcing reset pipeline and cache (be careful) + +If you are using the default broker (Redis), in some test situations you may need to quickly clear all pipelines and +caches. Use the following procedure: + +```bash +redis-cli FLUSHDB +redis-cli FLUSHALL +``` + +## Management + +IntelMQ has a modular structure consisting of bots. There are four types of bots: + +- `collector bots`{.interpreted-text role="ref"} retrieve data from internal or external sources, the output are * + reports* consisting of many individual data sets / log lines. +- `parser bots`{.interpreted-text role="ref"} parse the (report) data by splitting it into individual *events* (log + lines) and giving them a defined structure, see also `/dev/data-format`{.interpreted-text role="doc"} for the list of + fields an event may be split up into. +- `expert bots`{.interpreted-text role="ref"} enrich the existing events by e.g. lookup up information such as DNS + reverse records, geographic location information (country code) or abuse contacts for an IP address or domain name. +- `output bots`{.interpreted-text role="ref"} write events to files, databases, (REST)-APIs or any other data sink that + you might want to write to. + +Each bot has one source queue (except collectors) and can have multiple destination queues (except outputs). But +multiple bots can write to the same pipeline (queue), resulting in multiple inputs for the next bot. + +Every bot runs in a separate process. A bot is identifiable by a *bot id*. + +Currently only one instance (i.e. *with the same bot id*) of a bot can run at the same time. Concepts for +multiprocessing are being discussed, see this issue: +`Multiprocessing per queue is not supported #186 <186>`{.interpreted-text role="issue"}. Currently you can run multiple +processes of the same bot +(with *different bot ids*) in parallel. + +Example: multiple gethostbyname bots (with different bot ids) may run in parallel, with the same input queue and sending +to the same output queue. Note that the bot providing the input queue **must** have the +`load_balance` option set to `true`. diff --git a/docs/nonlinked/shadowserver.md b/docs/nonlinked/shadowserver.md new file mode 100644 index 000000000..1c7c2918e --- /dev/null +++ b/docs/nonlinked/shadowserver.md @@ -0,0 +1,24 @@ + + + +# Shadowserver Parser + +**Structure of this Parser Bot** + +The parser consists of two files: + +: - `_config.py` + +- `parser.py` or `parser_json.py` + +Both files are required for the parser to work properly. + +**Add new Feedformats** + +Add a new feed format and conversions if required to the file +`_config.py`. Don't forget to update the `mapping` dict. It is required to look up the correct configuration. + +Look at the documentation in the bot's `_config.py` file for more information. diff --git a/docs/overview.md b/docs/overview.md new file mode 100644 index 000000000..a52467c94 --- /dev/null +++ b/docs/overview.md @@ -0,0 +1,139 @@ + + +# Overview + +The complete IntelMQ universe consists of the following components: + +* IntelMQ +* IntelMQ API +* IntelMQ Manager +* additional tools +* useful scripts + +## IntelMQ + +This project contains the core functionality. + +The Core includes all the components required for processing data feeds. This includes the bots, configuration, pipeline, the internal data format, management tools etc. + +→ [Repository: IntelMQ](https://github.com/certtools/intelmq/) + +## IntelMQ API + +This is an extension of IntelMQ providing [hug](http://hug.rest) based REST API for remote management. + +→ [Repository: IntelMQ API](https://github.com/certtools/intelmq-api/) + +## IntelMQ Manager + +The Manager is the most known software and can be seen as the face of IntelMQ. It's goal is to provide an intuitive web interface to allow non-programmers to specify the data flow in IntelMQ. + +→ [Repository: IntelMQ Manager](https://github.com/certtools/intelmq-manager/) + +![IntelMQ Manager Landing page](static/images/intelmq-manager/landing_page.png) + +## Additional tools + +Here you can find a list of additional tools. If you think something is missing, please let us know! + +Unless stated otherwise, the tools are maintained by the IntelMQ community. + +### IntelMQ Webinput CSV + +A web-based interface to ingest CSV data into IntelMQ with on-line validation and live feedback. + +This interface allows inserting "one-shot" data feeds into IntelMQ without the need to configure bots in IntelMQ. + +Developed and maintained by [CERT.at](https://cert.at). + +→ [Repository: intelmq-webinput-csv](https://github.com/certat/intelmq-webinput-csv) + +![IntelMQ Webinput CSV Preview page](https://raw.githubusercontent.com/certat/intelmq-webinput-csv/c20413a401c2077140dd17fb7651db1132fde648/docs/images/screenshot.png) + +### IntelMQ Mailgen + +A solution allowing an IntelMQ setup with a complex contact database, managed by a web interface and sending out aggregated email reports. In different words: To send grouped notifications to network owners using SMTP. + +Developed and maintained by [Intevation](https://intevation.de), initially funded by [BSI](https://bsi.bund.de/). + +It consists of the following three components, which can also be used on their own. + +#### IntelMQ CertBUND Contact + +The certbund-contact consists of two IntelMQ expert bots, which fetch and process the information from the contact database, and scripts to import RIPE data into the contact database. Based on user-defined rules, the experts determine to which contact the event is to be sent to, and which e-mail template and attachment format to use. + +→ [Repository: intelmq-certbund-contact](https://github.com/Intevation/intelmq-certbund-contact) + +#### IntelMQ Fody + +Fody is a web based interface for Mailgen. It allows to read and edit contacts, query sent mails (tickets) and call up data from the PostgreSQL database. + +It can also be used to just query the database without using Mailgen. + +![IntelMQ Fody Dashboard](https://raw.githubusercontent.com/Intevation/intelmq-fody/6e41b836d0a2c350a5f2c5c95a4b3be4d3f46027/docs/images/landing_page.png) + +→ [Repository: intelmq-fody](https://github.com/Intevation/intelmq-fody) + +→ [Repository: +intelmq-fody-backend](https://github.com/Intevation/intelmq-fody-backend) + +#### intelmq-mailgen + +Sends emails with grouped event data to the contacts determined by the certbund-contact. Mails can be encrypted with +PGP. + +→ [Repository: +intelmq-mailgen](https://github.com/Intevation/intelmq-mailgen) + +### "Constituency Portal" tuency + +A web application helping CERTs to enable members of their constituency to self-administrate how they get warnings related to their network objects (IP addresses, IP ranges, autonomous systems, domains). *tuency* is developed by [Intevation](https://intevation.de/) for [CERT.at](https://cert.at). + +If features organizational hierarchies, contact roles, self-administration and network objects per organization (Autonomous systems, network ranges, (sub)domains, RIPE organization handles). A network object claiming and approval process prevents abuse. An hierarchical rule-system on the network objects allow fine-grained settings. The tagging system for contacts and organization complement the contact-management features of the portal. Authentication is based on keycloak, which enables the re-use of the user accounts in the portal. The integrated API enables IntelMQ to query the portal for the right abuse contact and notification settings with the `intelmq.bots.experts.tuency.expert` expert bot. + +![Tuency Netobjects Overview](https://gitlab.com/intevation/tuency/tuency/-/raw/64b95ec0/docs/images/netobjects.png) + +→ [Repository: tuency](https://gitlab.com/Intevation/tuency/tuency) + +### "Constituency Portal" do-portal (deprecated) + +!!! warning + The *do-portal* is deprecated and succeeded by *tuency*. + +A contact portal with organizational hierarchies, role functionality and network objects based on RIPE, allows +self-administration by the contacts. Can be queried from IntelMQ and integrates the stats-portal. + +Originally developed by [CERT-EU](https://cert.europa.eu/), then adapted by [CERT.at](https://cert.at). + +→ [Repository: do-portal](https://github.com/certat/do-portal) + +### Stats Portal + +A Grafana-based statistics portal for the `eventdb`{.interpreted-text role="doc"}. Can be integrated into do-portal. It uses aggregated data to serve statistical data quickly. + +![Stats Portal Architecture](https://raw.githubusercontent.com/certtools/stats-portal/38515266aabdf661a0b4becd8e921b03f32429fa/architecture-overview-stats-portal-screen.png) + +→ [Repository: stats-portal](https://github.com/certtools/stats-portal) + +### Malware Name Mapping + +A mapping for malware names of different feeds with different names to a common family name. + +→ [Repository: malware_name_mapping](https://github.com/certtools/malware_name_mapping) + +### IntelMQ-Docker + +A repository with tools for IntelMQ docker instance. + +Developed and maintained by [CERT.at](https://cert.at). + +→ [Repository: intelmq-docker](https://github.com/certat/intelmq-docker) + +## Useful scripts + +The list of useful scripts contributed to the IntelMQ universe can be found in the main repository. + +→ [Repository: intelmq/contrib](https://github.com/certtools/intelmq/tree/develop/contrib) \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index f5dfcbae8..000000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,12 +0,0 @@ -# SPDX-FileCopyrightText: 2020 Birger Schacht -# -# SPDX-License-Identifier: AGPL-3.0-or-later - -Sphinx>=3.2.1 -dnspython>=2.0.0 -psutil>=1.2.1 -python-dateutil>=2.5 -python-termstyle>=0.1.10 -redis>=2.10 -requests>=2.2.0 -ruamel.yaml diff --git a/docs/security.md b/docs/security.md new file mode 120000 index 000000000..9d571381d --- /dev/null +++ b/docs/security.md @@ -0,0 +1 @@ +../SECURITY.md \ No newline at end of file diff --git a/docs/_static/Logo_Intel_MQ.png b/docs/static/images/Logo_Intel_MQ.png similarity index 100% rename from docs/_static/Logo_Intel_MQ.png rename to docs/static/images/Logo_Intel_MQ.png diff --git a/docs/_static/Logo_Intel_MQ.png.license b/docs/static/images/Logo_Intel_MQ.png.license similarity index 100% rename from docs/_static/Logo_Intel_MQ.png.license rename to docs/static/images/Logo_Intel_MQ.png.license diff --git a/docs/_static/Logo_Intel_MQ.svg b/docs/static/images/Logo_Intel_MQ.svg similarity index 100% rename from docs/_static/Logo_Intel_MQ.svg rename to docs/static/images/Logo_Intel_MQ.svg diff --git a/docs/_static/Logo_Intel_MQ.svg.license b/docs/static/images/Logo_Intel_MQ.svg.license similarity index 100% rename from docs/_static/Logo_Intel_MQ.svg.license rename to docs/static/images/Logo_Intel_MQ.svg.license diff --git a/docs/_static/eventdb_stats.png b/docs/static/images/eventdb_stats.png similarity index 100% rename from docs/_static/eventdb_stats.png rename to docs/static/images/eventdb_stats.png diff --git a/docs/_static/eventdb_stats.png.license b/docs/static/images/eventdb_stats.png.license similarity index 100% rename from docs/_static/eventdb_stats.png.license rename to docs/static/images/eventdb_stats.png.license diff --git a/docs/_static/intelmq-arch-schema.png b/docs/static/images/intelmq-arch-schema.png similarity index 100% rename from docs/_static/intelmq-arch-schema.png rename to docs/static/images/intelmq-arch-schema.png diff --git a/docs/_static/intelmq-arch-schema.png.license b/docs/static/images/intelmq-arch-schema.png.license similarity index 100% rename from docs/_static/intelmq-arch-schema.png.license rename to docs/static/images/intelmq-arch-schema.png.license diff --git a/docs/_static/intelmq-arch-schema.vsd b/docs/static/images/intelmq-arch-schema.vsd similarity index 100% rename from docs/_static/intelmq-arch-schema.vsd rename to docs/static/images/intelmq-arch-schema.vsd diff --git a/docs/_static/intelmq-arch-schema.vsd.license b/docs/static/images/intelmq-arch-schema.vsd.license similarity index 100% rename from docs/_static/intelmq-arch-schema.vsd.license rename to docs/static/images/intelmq-arch-schema.vsd.license diff --git a/docs/_static/intelmq-manager/configuration-path-form.png b/docs/static/images/intelmq-manager/configuration-path-form.png similarity index 100% rename from docs/_static/intelmq-manager/configuration-path-form.png rename to docs/static/images/intelmq-manager/configuration-path-form.png diff --git a/docs/_static/intelmq-manager/configuration-path-form.png.license b/docs/static/images/intelmq-manager/configuration-path-form.png.license similarity index 100% rename from docs/_static/intelmq-manager/configuration-path-form.png.license rename to docs/static/images/intelmq-manager/configuration-path-form.png.license diff --git a/docs/_static/intelmq-manager/configuration-path-set.png b/docs/static/images/intelmq-manager/configuration-path-set.png similarity index 100% rename from docs/_static/intelmq-manager/configuration-path-set.png rename to docs/static/images/intelmq-manager/configuration-path-set.png diff --git a/docs/_static/intelmq-manager/configuration-path-set.png.license b/docs/static/images/intelmq-manager/configuration-path-set.png.license similarity index 100% rename from docs/_static/intelmq-manager/configuration-path-set.png.license rename to docs/static/images/intelmq-manager/configuration-path-set.png.license diff --git a/docs/_static/intelmq-manager/configuration.png b/docs/static/images/intelmq-manager/configuration.png similarity index 100% rename from docs/_static/intelmq-manager/configuration.png rename to docs/static/images/intelmq-manager/configuration.png diff --git a/docs/_static/intelmq-manager/configuration.png.license b/docs/static/images/intelmq-manager/configuration.png.license similarity index 100% rename from docs/_static/intelmq-manager/configuration.png.license rename to docs/static/images/intelmq-manager/configuration.png.license diff --git a/docs/_static/intelmq-manager/configuration2.png b/docs/static/images/intelmq-manager/configuration2.png similarity index 100% rename from docs/_static/intelmq-manager/configuration2.png rename to docs/static/images/intelmq-manager/configuration2.png diff --git a/docs/_static/intelmq-manager/configuration2.png.license b/docs/static/images/intelmq-manager/configuration2.png.license similarity index 100% rename from docs/_static/intelmq-manager/configuration2.png.license rename to docs/static/images/intelmq-manager/configuration2.png.license diff --git a/docs/_static/intelmq-manager/landing_page.png b/docs/static/images/intelmq-manager/landing_page.png similarity index 100% rename from docs/_static/intelmq-manager/landing_page.png rename to docs/static/images/intelmq-manager/landing_page.png diff --git a/docs/_static/intelmq-manager/landing_page.png.license b/docs/static/images/intelmq-manager/landing_page.png.license similarity index 100% rename from docs/_static/intelmq-manager/landing_page.png.license rename to docs/static/images/intelmq-manager/landing_page.png.license diff --git a/docs/_static/intelmq-manager/management.png b/docs/static/images/intelmq-manager/management.png similarity index 100% rename from docs/_static/intelmq-manager/management.png rename to docs/static/images/intelmq-manager/management.png diff --git a/docs/_static/intelmq-manager/management.png.license b/docs/static/images/intelmq-manager/management.png.license similarity index 100% rename from docs/_static/intelmq-manager/management.png.license rename to docs/static/images/intelmq-manager/management.png.license diff --git a/docs/_static/intelmq-manager/monitor.png b/docs/static/images/intelmq-manager/monitor.png similarity index 100% rename from docs/_static/intelmq-manager/monitor.png rename to docs/static/images/intelmq-manager/monitor.png diff --git a/docs/_static/intelmq-manager/monitor.png.license b/docs/static/images/intelmq-manager/monitor.png.license similarity index 100% rename from docs/_static/intelmq-manager/monitor.png.license rename to docs/static/images/intelmq-manager/monitor.png.license diff --git a/docs/_static/intelmq-manager/monitor2.png b/docs/static/images/intelmq-manager/monitor2.png similarity index 100% rename from docs/_static/intelmq-manager/monitor2.png rename to docs/static/images/intelmq-manager/monitor2.png diff --git a/docs/_static/intelmq-manager/monitor2.png.license b/docs/static/images/intelmq-manager/monitor2.png.license similarity index 100% rename from docs/_static/intelmq-manager/monitor2.png.license rename to docs/static/images/intelmq-manager/monitor2.png.license diff --git a/docs/_static/intelmq_logo.jpg b/docs/static/images/intelmq_logo.jpg similarity index 100% rename from docs/_static/intelmq_logo.jpg rename to docs/static/images/intelmq_logo.jpg diff --git a/docs/_static/intelmq_logo.jpg.license b/docs/static/images/intelmq_logo.jpg.license similarity index 100% rename from docs/_static/intelmq_logo.jpg.license rename to docs/static/images/intelmq_logo.jpg.license diff --git a/docs/_static/n6/data-flow.png b/docs/static/images/n6/data-flow.png similarity index 100% rename from docs/_static/n6/data-flow.png rename to docs/static/images/n6/data-flow.png diff --git a/docs/_static/n6/data-flow.png.license b/docs/static/images/n6/data-flow.png.license similarity index 100% rename from docs/_static/n6/data-flow.png.license rename to docs/static/images/n6/data-flow.png.license diff --git a/docs/_static/n6/intelmq-to-n6.png b/docs/static/images/n6/intelmq-to-n6.png similarity index 100% rename from docs/_static/n6/intelmq-to-n6.png rename to docs/static/images/n6/intelmq-to-n6.png diff --git a/docs/_static/n6/intelmq-to-n6.png.license b/docs/static/images/n6/intelmq-to-n6.png.license similarity index 100% rename from docs/_static/n6/intelmq-to-n6.png.license rename to docs/static/images/n6/intelmq-to-n6.png.license diff --git a/docs/_static/n6/intelmq-to-n6.svg b/docs/static/images/n6/intelmq-to-n6.svg similarity index 100% rename from docs/_static/n6/intelmq-to-n6.svg rename to docs/static/images/n6/intelmq-to-n6.svg diff --git a/docs/_static/n6/intelmq-to-n6.svg.license b/docs/static/images/n6/intelmq-to-n6.svg.license similarity index 100% rename from docs/_static/n6/intelmq-to-n6.svg.license rename to docs/static/images/n6/intelmq-to-n6.svg.license diff --git a/docs/_static/n6/n6-schemat2.png b/docs/static/images/n6/n6-schemat2.png similarity index 100% rename from docs/_static/n6/n6-schemat2.png rename to docs/static/images/n6/n6-schemat2.png diff --git a/docs/_static/n6/n6-schemat2.png.license b/docs/static/images/n6/n6-schemat2.png.license similarity index 100% rename from docs/_static/n6/n6-schemat2.png.license rename to docs/static/images/n6/n6-schemat2.png.license diff --git a/docs/_static/n6/n6-to-intelmq.png b/docs/static/images/n6/n6-to-intelmq.png similarity index 100% rename from docs/_static/n6/n6-to-intelmq.png rename to docs/static/images/n6/n6-to-intelmq.png diff --git a/docs/_static/n6/n6-to-intelmq.png.license b/docs/static/images/n6/n6-to-intelmq.png.license similarity index 100% rename from docs/_static/n6/n6-to-intelmq.png.license rename to docs/static/images/n6/n6-to-intelmq.png.license diff --git a/docs/_static/n6/n6-to-intelmq.svg b/docs/static/images/n6/n6-to-intelmq.svg similarity index 100% rename from docs/_static/n6/n6-to-intelmq.svg rename to docs/static/images/n6/n6-to-intelmq.svg diff --git a/docs/_static/n6/n6-to-intelmq.svg.license b/docs/static/images/n6/n6-to-intelmq.svg.license similarity index 100% rename from docs/_static/n6/n6-to-intelmq.svg.license rename to docs/static/images/n6/n6-to-intelmq.svg.license diff --git a/docs/_static/rabbitmq-user-monitoring.png b/docs/static/images/rabbitmq-user-monitoring.png similarity index 100% rename from docs/_static/rabbitmq-user-monitoring.png rename to docs/static/images/rabbitmq-user-monitoring.png diff --git a/docs/_static/rabbitmq-user-monitoring.png.license b/docs/static/images/rabbitmq-user-monitoring.png.license similarity index 100% rename from docs/_static/rabbitmq-user-monitoring.png.license rename to docs/static/images/rabbitmq-user-monitoring.png.license diff --git a/docs/tutorials/intelmq-manager.md b/docs/tutorials/intelmq-manager.md new file mode 100644 index 000000000..b3a274442 --- /dev/null +++ b/docs/tutorials/intelmq-manager.md @@ -0,0 +1,7 @@ + + + +# Example tutorial on using IntelMQ Manager diff --git a/docs/user/CIFv3-Integrations.rst b/docs/user/CIFv3-Integrations.rst deleted file mode 100644 index 01cd913a4..000000000 --- a/docs/user/CIFv3-Integrations.rst +++ /dev/null @@ -1,16 +0,0 @@ -.. - SPDX-FileCopyrightText: 2022 REN-ISAC - SPDX-License-Identifier: AGPL-3.0-or-later - -CIFv3 integrations in IntelMQ -============================ - -CIF creates an accessible indicator store. A REST API is exposed to interact with the store and quickly process/share indicators. -CIFv3 can correlate indicators via the UUID attribute. - -CIF3 API Output -------------------------------- - -Can be used to submit indicators to a CIFv3 instance by using the `CIFv3 API `_. - -Look at the :ref:`Bots' documentation ` for more information. diff --git a/docs/user/ELK-Stack.rst b/docs/user/ELK-Stack.rst deleted file mode 100644 index f35aaa51b..000000000 --- a/docs/user/ELK-Stack.rst +++ /dev/null @@ -1,117 +0,0 @@ -.. - SPDX-FileCopyrightText: 2020 gethvi - SPDX-License-Identifier: AGPL-3.0-or-later - -ELK Stack -========= - -If you wish to run IntelMQ with ELK (Elasticsearch, Logstash, Kibana) it is entirely possible. This guide assumes the reader is familiar with basic configuration of ELK and does not aim to cover using ELK in general. It is based on the version 6.8.0 (ELK is a fast moving train therefore things might change). Assuming you have IntelMQ (and Redis) installation in place, lets dive in. - -Configuring IntelMQ for Logstash --------------------------------- - -In order to pass IntelMQ events to Logstash we will utilize already installed Redis. Add a new Redis Output Bot to your pipeline. As the minimum fill in the following parameters: `bot-id`, `redis_server_ip` (can be hostname), `redis_server_port`, `redis_password` (if required, else set to empty!), `redis_queue` (name for the queue). It is recommended to use a different `redis_db` parameter than used by the IntelMQ (specified as `source_pipeline_db`, `destination_pipeline_db` and `statistics_database`). - -Example values: - -.. code-block:: json - - bot-id: logstash-output - redis_server_ip: 10.10.10.10 - redis_server_port: 6379 - redis_db: 4 - redis_queue: logstash-queue - -**Notes** - -* Unfortunately you will not be able to monitor this redis queue via IntelMQ Manager. - - -Configuring Logstash --------------------- - -Logstash defines pipeline as well. In the pipeline configuration of Logstash you need to specify where it should look for IntelMQ events, what to do with them and where to pass them. - -Input -^^^^^ - -This part describes how to receive data from Redis queue. -See the example configuration and comments below: - -.. code-block:: - - input { - redis { - host => "10.10.10.10" - port => 6379 - db => 4 - data_type => "list" - key => "logstash-queue" - } - } - -* `host` - same as redis_server_ip from the Redis Output Bot -* `port` - the redis_server_port from the Redis Output Bot -* `db` - the redis_db parameter from the Redis Output Bot -* `data_type` - set to `list` -* `key` - same as redis_queue from the Redis Output Bot - -**Notes** - -* You can also use syntax like this: `host => "${REDIS_HOST:10.10.10.10}"`\ - The value will be taken from environment variable `$REDIS_HOST`. If the environment variable is not defined then the default value of `10.10.10.10` will be used instead. - -Filter (optional) -^^^^^^^^^^^^^^^^^ - -Before passing the data to the database you can apply certain changes. This is done with filters. See an example: - -.. code-block:: json - - filter { - mutate { - lowercase => ["source.geolocation.city", "classification.identifier"] - remove_field => ["__type", "@version"] - } - date { - match => ["time.observation", "ISO8601"] - } - } - -**Notes** - -* It is not recommended to apply any modifications to the data (within the `mutate` key) outside of the IntelMQ. All necessary modifications should be done only by appropriate IntelMQ bots. This example only demonstrates the possibility. - -* It is recommended to use the `date` filter: generally we have two timestamp fields - `time.source` (provided by the feed source this can be understood as when the event happened; however it is not always present) and `time.observation` (when IntelMQ collected this event). Logstash also adds another field `@timestamp` with time of processing by Logstash. While it can be useful for debugging, I recommend to set the `@timestamp` to the same value as `time.observation`. - -Output -^^^^^^ - -The pipeline also needs output, where we define our database (Elasticsearch). The simplest way of doing so is defining an output like this: - -.. code-block:: json - - output { - elasticsearch { - hosts => ["http://10.10.10.11:9200", "http://10.10.10.12:9200"] - index => "intelmq-%{+YYYY.MM}" - } - } - -* `hosts` - Elasticsearch host (or more) with the correct port (9200 by default) -* `index` - name of the index where to insert data - -**Notes** - -* Authors experience, hardware equipment and the amount of events collected led to having a separate index for each month. This might not necessarily suit your needs, but is a suggested option. - -* By default the ELK stack uses insecure HTTP. It is possible to setup Security for secure connections and basic user management. This is possible with the Basic (free) licence since versions 6.8.0 and 7.1.0. - -Configuring Elasticsearch -------------------------- - -Configuring Elasticsearch is entirely up to you and should be consulted with the `official documentation `_. What you will most likely need is something called `index template `_ mappings. IntelMQ provides a tool for generating such mappings. See `ElasticMapper Tool `_. - -**Notes** - -* Default installation of Elasticsearch database allows anyone with cURL and connection capability administrative access to the database. Make sure you secure your toys! diff --git a/docs/user/FAQ.rst b/docs/user/FAQ.rst deleted file mode 100644 index 35a0a8cf7..000000000 --- a/docs/user/FAQ.rst +++ /dev/null @@ -1,159 +0,0 @@ -.. - SPDX-FileCopyrightText: 2014 Tomás Lima , 2016-2021 Sebastian Wagner - SPDX-License-Identifier: AGPL-3.0-or-later - -Frequently asked questions -========================== - -.. contents:: - -For questions about the API, have a look at the :doc:`API documentation page ` - -Send IntelMQ events to Splunk -------------------------------------------------------------------- - -1. Go to Splunk and configure in order to be able to receive logs(intelmq events) to a TCP port -2. Use TCP output bot and configure accordingly to the Splunk configuration that you applied. - -Permission denied when using Redis Unix socket -------------------------------------------------------------------- - -If you get an error like this: - -.. code-block:: - - intelmq.lib.exceptions.PipelineError: pipeline failed - ConnectionError('Error 13 connecting to unix socket: /var/run/redis/redis.sock. Permission denied.',) - -Make sure the intelmq user as sufficient permissions for the socket. - -In ``/etc/redis/redis.conf`` (or wherever your configuration is), check the permissions and set it for example to group-writeable: - -.. code-block:: - - unixsocketperm 770 - -And add the user intelmq to the redis-group: - -.. code-block:: bash - - usermod -aG redis intelmq - -Why is the time invalid? -------------------------------------------------------------------- - -If you wonder why you are getting errors like this: - -.. code-block:: python - - intelmq.lib.exceptions.InvalidValue: invalid value '2017-03-06T07:36:29' () for key 'time.source' - -IntelMQ requires time zone information for all timestamps. Without a time zone, the time is ambiguous and therefore rejected. - -How can I improve the speed? -------------------------------------------------------------------- - -In most cases the bottlenecks are look-up experts. In these cases you can easily use the integrated load balancing features. - -Multithreading -^^^^^^^^^^^^^^ - -When using the AMQP broker, you can make use of Multi-threading. See the :ref:`multithreading` section. - -"Classic" load-balancing (Multiprocessing) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Before Multithreading was available in IntelMQ, and in case you use Redis as broker, the only way to do load balancing involves more work. -Create multiple instances of the same bot and connect them all to the same source and destination bots. Then set the parameter ``load_balance`` to ``true`` for the bot which sends the messages to the duplicated bot. Then, the bot sends messages to only one of the destination queues and not to all of them. - -True Multi*processing* is not available in IntelMQ. See also this :issue:`discussion on a possible enhanced load balancing <186>`. - -Other options -^^^^^^^^^^^^^ - -For any bottleneck based on (online) lookups, optimize the lookup itself and if possible use local databases. - -It is also possible to use multiple servers to spread the workload. To get the messages from one system to the other you can either directly connect to the other's pipeline or use a fast exchange mechanism such as the TCP Collector/Output (make sure to secure the network by other means). - -.. _faq-remove-raw-data: - -Removing raw data for higher performance and less space usage -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If you do not need the raw data, you can safely remove it. For events (after parsers), it keeps the original data, eg. a line of a CSV file. In reports it keeps the actual data to be parsed, so don't delete the raw field in Reports - between collectors and parsers. - -The raw data consumes about 50% - 30% of the messages' size. The size of course depends on how many additional data you add to it and how much data the report includes. Dropping it, will improve the speed as less data needs to be transferred and processed at each step. - - -**In a bot** - -You can do this for example by using the *Field Reducer Expert*. The configuration could be: - - * ``type``: ``blacklist`` - * ``keys``: ``raw`` - -Other solutions are the *Modify* bot and the *Sieve* bot. The last one is a good choice if you already use it and you only need to add the command: - -``remove raw`` - -**In the database** - -In case you store data in the database and you want to keep its size small, you can (periodically) delete the raw data there. - -To remove the raw data for a events table of a PostgreSQL database, you can use something like: - -.. code-block:: sql - - UPDATE events SET raw = NULL WHERE "time.source" < '2018-07-01'; - -If the database is big, make sure only update small parts of the database by using an appropriate ``WHERE`` clause. If you do not see any negative performance impact, you can increase the size of the chunks, otherwise the events in the output bot may queue up. The ``id`` column can also be used instead of the source's time. - -Another way of reducing the ``raw``-data from the database is described in the EventDB documentation: :ref:`eventdb_raws_table` - -My bot(s) died on startup with no errors logged -------------------------------------------------------------------- - -Rather than starting your bot(s) with ``intelmqctl start``, try ``intelmqctl run [bot]``. This will provide valuable debug output you might not otherwise see, pointing to issues like system configuration errors. - -Orphaned Queues -------------------------------------------------------------------- - -This section has been moved to the section :ref:`orphan-queues`. - -.. _faq multithreading not avail: -Multithreading is not available for this bot -------------------------------------------------------------------- - -Multithreading is not available for some bots and AMQP broker is necessary. Possible reasons why a certain bot or a setup does not support Multithreading include: - - * Multithreading is only available when using the AMQP broker. - * For most collectors, Multithreading is disabled. Otherwise this would lead to duplicated data, as the data retrieval is not atomic. - * Some bots use libraries which are not thread safe. Look a the bot's documentation for more information. - * Some bots' operations are not thread safe. Look a the bot's documentation for more information. - -If you think this mapping is wrong, please report a bug. - -.. _docker security headers: -Docker: Security Headers -------------------------------------------------------------------- - -If you run our docker image in production, we recommend you to set security headers. -You can do this by creating a new file called ``example_config/nginx/security.conf`` in the cloned ``intelmq-docker`` repository. - -Write the following inside the configuration file, and change the ``http(s)://`` to your domain name. - -.. code-block:: bash - - server_tokens off; # turn off server_token, instead of nginx/13.2 now it will only show nginx - add_header X-Frame-Options SAMEORIGIN; # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Frame-Options - add_header X-Content-Type-Options nosniff; # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Content-Type-Options - add_header X-XSS-Protection "1; mode=block"; # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-XSS-Protection - add_header Content-Security-Policy "script-src 'self' 'unsafe-inline' http(s)://; frame-src 'self' http(s)://; object-src 'self' http(s)://"; # https://developer.mozilla.org/en-US/docs/Web/HTTP/CSP - -After you created the file, edit the ``docker-compose.yml`` and mount it to the ``nginx`` with - -.. code-block:: bash - - volumes: - - ./example_config/nginx/security.conf:/etc/nginx/conf.d/security.conf - -**IMPORTANT** Mount the exact name & not the directory, because otherwise you would overwrite the whole directory and the other files would be gone inside the container. diff --git a/docs/user/MISP-Integrations.rst b/docs/user/MISP-Integrations.rst deleted file mode 100644 index 114c983a1..000000000 --- a/docs/user/MISP-Integrations.rst +++ /dev/null @@ -1,49 +0,0 @@ -.. - SPDX-FileCopyrightText: 2019-2021 Sebastian Wagner - SPDX-License-Identifier: AGPL-3.0-or-later - -MISP integrations in IntelMQ -============================ - -While MISP and IntelMQ seem to solve similar problems in the first hindsight, their intentions and strengths differ significantly. - -In a nutshell, MISP *stores* manually curated indicators (called *attributes*) grouped in *events*. An event can have an arbitrary number of attributes. -MISP correlates these indicators with each other and can synchronize the data between multiple MISP instances. - -On the other side, IntelMQ in it's essence (not considering the :doc:`EventDB `) has no state or database, but is stream-oriented. -IntelMQ acts as a toolbox which can be configured as needed to automate processes of mass data with little or no human interaction -At the end of the processing the data may land in some database or be sent to other systems. - -Both systems do not intend to replace each other or do compete. -They integrate seamless and combine each other enabling more use-cases and - -MISP API Collector -------------------------------- - -The MISP API Collector fetches data from MISP via the `MISP API `_. - -Look at the :ref:`Bots' documentation ` for more information. - -MISP Expert -------------------------------- - -The MISP Expert searches MISP by using the `MISP API `_ -for attributes/events matching the ``source.ip`` of the event. -The MISP Attribute UUID and MISP Event ID of the newest attribute are added to the event. - -Look at the :ref:`Bots' documentation ` for more information. - -MISP Feed Output -------------------------------- - -This bot creates a complete `MISP feed `_ ready to be configured in MISP as incoming data source. - -Look at the :ref:`Bots' documentation ` for more information. - - -MISP API Output -------------------------------- - -Can be used to directly create MISP events in a MISP instance by using the `MISP API `_. - -Look at the :ref:`Bots' documentation ` for more information. diff --git a/docs/user/abuse-contacts.md b/docs/user/abuse-contacts.md new file mode 100644 index 000000000..daa9dc474 --- /dev/null +++ b/docs/user/abuse-contacts.md @@ -0,0 +1,65 @@ + + + +# Abuse-contact look-ups + +The right decision whom to contact about a specific incident is vital to get the incident resolved as quick as possible. Different types of events may required different abuse-contact to be selected. For example, issues about a device, e.g. a vulnerability in the operating system or an application, is better sent to the hoster which can inform the server administrator. For website-related issues, like defacements or phishing, the domain owner (maintaining the content of the website) could be the better and more direct contact. Additionally, different CERT's have different approaches and different contact databases. Multiple information sources have different information, and some sources are more accurate than others. IntelMQ can query multiple sources of abuse-contacts and combine them. Internal databases, like a Constituency Portal provide high-quality and first-hand contact information. The RIPE document [Sources of Abuse Contact Information for Abuse Handlers](https://www.ripe.net/publications/docs/ripe-658) contains a good summary of the complex of themes. + +## Sources for abuse-contacts + +All these bots add the queried contacts to the IntelMQ events in the field `source.abuse_contact` if not state otherwise in the documentation. + +## Sources for domain-based abuse-contacts + +These bots are suitable for domain-based abuse-contact look-ups. + +- `intelmq.bots.experts.rdap.expert` expert queries private and public RDAP servers for `source.fqdn` and add the contact information to the event as `source.abuse_contact`. +- `intelmq.bots.experts.trusted_introducer_lookup.expert` expert queries a locally cached [Trusted Introducer team directory](https://www.trusted-introducer.org/directory/teams.json) for the TLD or domain (first match) of `source.fqdn`. + +## Sources for IP address-based abuse-contacts + +These bots are suitable for IP address and ASN based abuse-contact look-ups. + +- `intelmq.bots.experts.abusix.expert` expert queries the online Abusix service. +- `intelmq.bots.experts.do_portal.expert` expert queries an instance of the do-portal software (deprecated). +- `intelmq.bots.experts.tuency.expert` expert queries an instance of the **tuency** Constituency Portal for the IP address. The Portal also takes into account any notification rules, which are saved + additionally in the event. +- `intelmq.bots.experts.ripe.expert` expert queries the online RIPE database for IP-Address and AS contacts. +- `intelmq.bots.experts.trusted_introducer_lookup.expert` expert queries a locally + cached [Trusted Introducer team directory](https://www.trusted-introducer.org/directory/teams.json) + for the Autonomous system `source.asn`. + +## Generic sources for abuse-contacts + +- `intelmq.bots.experts.generic_db_lookup.expert` expert for local data sources, like + database tables mapping ASNs to abuse-contact or Country Codes to abuse-contact. +- `intelmq.bots.experts.uwhoisd.expert` expert for fetching whois-data, not extracting + abuse-contact information + +## Helpful other bots for pre-processing + +- `intelmq.bots.experts.asn_lookup.expert` queries locally cached database to lookup ASN. +- `intelmq.bots.experts.cymru_whois.expert` to lookup ASN, Geolocation, and BGP prefix + for `*.ip`. +- `intelmq.bots.experts.domain_suffix.expert` to lookup the public suffix of the domain + in `*.fqdn`. +- `intelmq.bots.experts.format_field.expert` +- `intelmq.bots.experts.gethostbyname.expert` resolve `*.ip` from `*.fqdn`. +- `intelmq.bots.experts.maxmind_geoip.expert` to lookup Geolocation information for `*.ip` + . +- `intelmq.bots.experts.reverse_dns.expert` to resolve `*.reverse_dns` from `*.ip`. +- `intelmq.bots.experts.ripe.expert` to lookup `*.asn` and Geolocation information + for `*.ip`. +- `intelmq.bots.experts.tor_nodes.expert` for filtering out TOR nodes. +- `intelmq.bots.experts.url2fqdn.expert` to extract `*.fqdn`/`*.ip` from `*.url`. + +## Combining the lookup approaches + +In order to get the best contact, it may be necessary to combine multiple abuse-contact sources. IntelMQ's modularity provides methods to arrange and configure the bots as needed. Among others, the following bots can help in getting the best result: + +- `intelmq.bots.experts.filter.expert` Your lookup process may be different for different types of data. E.g. website-related issues may be better addressed at the domain owner and device-related issues may be better addressed to the hosting provider. +- `intelmq.bots.experts.modify.expert` Allows you to set values based on filter and also format values based on the value of other fields. +- `intelmq.bots.experts.sieve.expert` Very powerful expert which allows filtering, routing (to different subsequent bots) based on if-expressions . It support set-operations (field value is in list) as well as sub-network operations for IP address networks in CIDR notation for the expression-part. You can as well set the abuse-contact directly. diff --git a/docs/user/abuse-contacts.rst b/docs/user/abuse-contacts.rst deleted file mode 100644 index 8da8c43b4..000000000 --- a/docs/user/abuse-contacts.rst +++ /dev/null @@ -1,103 +0,0 @@ -.. - SPDX-FileCopyrightText: 2021 IntelMQ-Team - SPDX-License-Identifier: AGPL-3.0-or-later - -###################### -Abuse-contact look-ups -###################### - -The right decision whom to contact about a specific incident is vital to get the -incident resolved as quick as possible. -Different types of events may required different abuse-contact to be selected. -For example, issues about a device, e.g. a vulnerability in the operating system -or an application, is better sent to the hoster which can inform the server -administrator. -For website-related issues, like defacements or phishing, the domain owner -(maintaining the content of the website) could be the better and more direct -contact. -Additionally, different CERT's have different approaches and different contact -databases. -Multiple information sources have different information, and some sources are -more accurate than others. -IntelMQ can query multiple sources of abuse-contacts and combine them. -Internal databases, like a Constituency Portal (see :doc:`ecosystem`) -provide high-quality and first-hand contact information. -The RIPE document `Sources of Abuse Contact Information for Abuse Handlers `_ -contains a good summary of the complex of themes. - -Sources for abuse-contacts --------------------------- - -All these bots add the queried contacts to the IntelMQ events in the field -`source.abuse_contact` if not state otherwise in the documentation. - -Sources for domain-based abuse-contacts ---------------------------------------- - -These bots are suitable for domain-based abuse-contact look-ups. - -* :ref:`intelmq.bots.experts.rdap.expert` expert queries private and public RDAP servers for `source.fqdn` and add the contact information to the event as `source.abuse_contact`. -* :ref:`intelmq.bots.experts.trusted_introducer_lookup.expert` expert - queries a locally cached - `Trusted Introducer team directory `_ - for the TLD or domain (first match) of *source.fqdn*. - -Sources for IP address-based abuse-contacts -------------------------------------------- - -These bots are suitable for IP address- and ASN-based abuse-contact look-ups. - -* :ref:`intelmq.bots.experts.abusix.expert` expert queries the online Abusix service. -* :ref:`intelmq.bots.experts.do_portal.expert` expert queries an instance of the do-portal software (deprecated). -* :ref:`intelmq.bots.experts.tuency.expert` expert queries an instance of the - *tuency* Constituency Portal for the IP address. The Portal also takes into - account any notification rules, which are saved additionally in the event. -* :ref:`intelmq.bots.experts.ripe.expert` expert queries the online RIPE database for IP-Address and AS contacts. -* :ref:`intelmq.bots.experts.trusted_introducer_lookup.expert` expert - queries a locally cached - `Trusted Introducer team directory `_ - for the Autonomous system *source.asn*. - -Generic sources for abuse-contacts ----------------------------------- - -* :ref:`intelmq.bots.experts.generic_db_lookup.expert` expert for local data - sources, like database tables mapping ASNs to abuse-contact or Country Codes - to abuse-contact. -* :ref:`intelmq.bots.experts.uwhoisd.expert` expert for fetching whois-data, - not extracting abuse-contact information - -Helpful other bots for pre-processing -------------------------------------- - -* :ref:`intelmq.bots.experts.asn_lookup.expert` -* :ref:`intelmq.bots.experts.cymru_whois.expert` to lookup ASN, Geolocation, and BGP prefix for ``*.ip``. -* :ref:`intelmq.bots.experts.domain_suffix.expert` to lookup the public suffix of the domain in ``*.fqdn``. -* :ref:`intelmq.bots.experts.format_field.expert` -* :ref:`intelmq.bots.experts.gethostbyname.expert` resolve ``*.ip`` from ``*.fqdn``. -* :ref:`intelmq.bots.experts.maxmind_geoip.expert` to lookup Geolocation information for ``*.ip``. -* :ref:`intelmq.bots.experts.reverse_dns.expert` to resolve ``*.reverse_dns`` from ``*.ip``. -* :ref:`intelmq.bots.experts.ripe.expert` to lookup ``*.asn`` and Geolocation information for ``*.ip``. -* :ref:`intelmq.bots.experts.tor_nodes.expert` for filtering out TOR nodes. -* :ref:`intelmq.bots.experts.url2fqdn.expert` to extract ``*.fqdn``/``*.ip`` from ``*.url``. - -Combining the lookup approaches -------------------------------- - -In order to get the best contact, it may be necessary to combine multiple -abuse-contact sources. -IntelMQ's modularity provides methods to arrange and configure the bots as -needed. -Among others, the following bots can help in getting the best result: - -* :ref:`intelmq.bots.experts.filter.expert` expert: Your lookup process may be - different for different types of data. E.g. website-related issues may be - better addressed at the domain owner and device-related issues may be better - addressed to the hoster. -* :ref:`intelmq.bots.experts.modify.expert` expert: Allows you to set values - based on filter and also format values based on the value of other fields. -* :ref:`intelmq.bots.experts.sieve.expert` expert: Very powerful expert which - allows filtering, routing (to different subsequent bots) based on - if-expressions . It support set-operations (field value is in list) as well as - sub-network operations for IP address networks in CIDR notation for the - expression-part. You can as well set the abuse-contact directly. diff --git a/docs/user/api.md b/docs/user/api.md new file mode 100644 index 000000000..c4a045b0c --- /dev/null +++ b/docs/user/api.md @@ -0,0 +1,58 @@ + + + +# Using IntelMQ API + +TODO describe endpoints + +## Usage from programs + + +The IntelMQ API can also be used from programs, not just browsers. To do +so, first send a POST-Request with JSON-formatted data to + + +```json +{ + "username": "$your_username", + "password": "$your_password" +} +``` + +With valid credentials, the JSON-formatted response contains the +`login_token`. This token can be used like an API key in the +Authorization header for the next API calls: + +```bash +Authorization: $login_token +``` + +Here is a full example using **curl**: + +1. Authentication step: + ```bash + curl --location --request POST "http://localhost/intelmq/v1/api/login/" \ + --header "Content-Type: application/x-www-form-urlencoded" \ + --data-urlencode "username=$username"\ + --data-urlencode "password=$password" + ``` + ```json + {"login_token":"68b329da9893e34099c7d8ad5cb9c940","username":"$username"} + ``` + +2. Using the login token to fetch data: + ```bash + curl --location "http://localhost/intelmq/v1/api/version" \ + --header "Authorization: 68b329da9893e34099c7d8ad5cb9c940" + ``` + ```json + {"intelmq":"3.0.0rc1","intelmq-manager":"2.3.1"} + ``` + +The same approach also works for *Ansible*, as you can see here: + +1. +2. \ No newline at end of file diff --git a/docs/user/bots.md b/docs/user/bots.md new file mode 100644 index 000000000..29977f56e --- /dev/null +++ b/docs/user/bots.md @@ -0,0 +1,5464 @@ + + +# Bots Inventory + +This document contains complete reference of bots implemented by IntelMQ and how to configure them from the users perspective (meaning via IntelMQ Manager). Some of the bots are intended for general use and some of them are for processing particular data sources. + +## Individual Bot Configuration + +Each bot has it's own configuration. The configuration consists of two types of parameters: + +- **Generic parameters** that are common to all the bots and need to be set for each bot. + +- **Runtime parameters** are needed by the bot itself during runtime. Some of these parameters can be inherited from the [global configuration](../admin/configuration/intelmq.md#runtimeyaml) (which is applied to all the bots), but can be overridden in the individual bot configuration. + +## Generic Parameters + +These parameters must be set for each bot (at least the required ones). + +### `id` + +(required, string) This must be a unique identifier. Commonly it looks something like this: `abusech-feodo-tracker-collector`. It is safer to avoid using spaces. + +### `name` + +(required, string) Human readable name of the bot. + +### `description` + +(required, string) The description of the bot. + +### `module` + +(required, string) The executable (should be in `PATH` environment variable) which will be started. + +### `group` + +(optional, string) The group of the bot. Can be `Collector`, `Parser`, `Expert` or `Output`. Only used for visualization by other tools. + +### `enabled` + +(optional, boolean) Whether the bot will start when the whole botnet is started. You can still start a disabled bot explicitly. Defaults to `true`. + +### `run_mode` + +(optional, string) There are two run modes, `continuous` or `scheduled`. In the first case, the bot will be running +forever until stopped or exits because of errors (depending on the configuration). In the latter case, the bot will stop +after one successful run. This is especially useful when scheduling bots via cron or systemd. +Check [Configuration](../admin/configuration/intelmq.md) section for more details. Defaults to `continuous`. + +## HTTP Parameters + +Common HTTP runtime parameters used in multiple bots. + +### `http_timeout_sec` + +(optional, float) A tuple of floats or only one float describing the timeout (seconds) of the HTTP connection. Can be a +tuple of two floats (read and connect timeout) or just one float (applies for both timeouts). See +also . Defaults to 30. + +### `http_timeout_max_tries` + +(optional, integer) An integer depicting how many times a connection is retried, when a timeout occurred. Defaults to 3. + +### `http_username` + +(optional, string) Username for basic HTTP authentication. + +### `http_password` + +(optional, string) Password for basic HTTP authentication. + +### `http_proxy` + +(optional, string) Proxy to use for HTTP. + +### `https_proxy` + +(optional, string) Proxy to use for HTTPS. + +### `http_user_agent` + +(optional, string) User-Agent to be used for HTTP requests. + +### `http_verify_cert` + +(optional, boolean/string) Path to trusted CA bundle or directory, `false` to ignore verifying SSL certificates, +or `true` to verify SSL certificates. Defaults to `true`. + +### `ssl_client_certificate` + +(optional, string) Path to client certificate to use for TLS connections. + +### `ssl_ca_certificate` + +(optional, string) Path to trusted CA certificate. Only used by some bots. + +## Cache Parameters + +Common Redis cache runtime parameters used in multiple bots (mainly lookup experts). + +### `redis_cache_host` + +(required, string) Hostname of the Redis database. + +### `redis_cache_port` + +(required, string) Port of the Redis database. + +### `redis_cache_db` + +(required, integer) Database number. + +### `redis_cache_ttl` + +(required, integer) TTL used for caching. + +### `redis_cache_password` + +(optional, string) Password for the Redis database. + +## Collector Bots + +Multihreading is disabled for all Collectors, as this would lead to duplicated data. + +### Feed Parameters + +These runtime parameters must be set for each collector bot (at least the required ones). + +#### `name` + +(required, string) Name of the feed ([feed.name](<>)). + +#### `accuracy` + +(optional, float) Accuracy of the data from the feed ([feed.accuracy](<>)). + +#### `code` + +(optional, string) Code for the feed ([feed.code](<>)). + +#### `documentation` + +(optional, string) Link to documentation for the feed ([feed.documentation](<>)). + +#### `provider` + +(optional, string) Name of the provider of the feed ([feed.provider](<>)). + +#### `rate_limit` + +(optional, integer) Time interval (in seconds) between fetching data if applicable. Defaults to 0. + +### Alien Vault OTX
+ +Collects report messages from Alien Vault OTX. + +**Module:** `intelmq.bots.collectors.alienvault_otx.collector` + +**Requirements** + +Install the library from GitHub, as there is no package in PyPi: + +```bash +pip3 install -r intelmq/bots/collectors/alienvault_otx/REQUIREMENTS.txt +``` + +**Parameters (also expects [feed parameters](#feed-parameters)):** + +**`api_key`** + +(required, string) API Key + +**`modified_pulses_only`** + +(optional, boolean) Whether to get only modified pulses instead of all. Defaults to false. + +**`interval`** + +(optional, integer) When `modified_pulses_only` is set, define the time in hours (integer value) to get modified pulses +since then. Defaults to 24 (hours). + +--- + +### AMQP
+ +This bot collects data from (remote) AMQP servers, for both IntelMQ as well as external data. Currently only fetching +from a queue is supported can be extended in the future. Messages will be acknowledge at AMQP after it is sent to the +pipeline. Requires the [pika](https://pypi.org/project/pika/) library, minimum version 1.0.0. + +**Module:** `intelmq.bots.collectors.amqp.collector_amqp` + +**Parameters (also expects [feed parameters](#feed-parameters)):** + +**`connection_host`** + +(optional, string) Hostname of the AMQP server. Defaults to 127.0.0.1. + +**`connection_port`** + +(optional, integer) Port of the AMQP server. Defaults to 5672. + +**`connection_attempts`** + +(optional, integer) The number of connection attempts to the defined server. Defaults to 3. + +**`connection_heartbeat`** + +(optional, integer) Heartbeat to server (seconds). Defaults to 3600. + +**`connection_vhost`** + +(optional, string) Virtual host to connect, on an HTTP(S) connection would be . + +**`expect_intelmq_message`** + +(optional, boolean) This parameter denotes whether the the data is from IntelMQ or not. If true, then the data can be +any Report or Event and will be passed to the next bot as is. Otherwise a new Report is created with the raw data. +Defaults to false. + +**`queue_name`** + +(optional, string) The name of the queue to fetch the data from. + +**`username`** + +(optional, string) Username for authentication to the AMQP server. + +**`password`** + +(optional, string) Password for authentication to the AMQP server. + +**`use_ssl`** + +(optional, boolean) Use of TLS for the connection. Make sure to also set the correct port. Defaults to false. + +--- + +### API
+ +This bot collects data from HTTP or Socket REST API. The API is available at `/intelmq/push` when the HTTP interface is +used. Requires the [tornado](https://pypi.org/project/tornado/) library. + +**Module:** `intelmq.bots.collectors.api.collector` + +**Parameters (also expects [feed parameters](#feed-parameters)):** + +**`port`** + +(optional, integer) The local port at which the API is available. Defaults to 5000. + +**`use_socket`** + +(optional, boolean) If true, the socket will be opened at the location given with `socket_path`. Defaults to false. + +**`socket_path`** + +(optional, string) Location of the socket. Defaults to `/tmp/imq_api_default_socket`. + +--- + +### Generic URL Fetcher
+ +This bot collects data from remote hosts using HTTP protocol. If the HTTP response' status code is not 2xx, this is +treated as error. In Debug logging level, the request's and response's headers and body are logged for further +inspection. + +**Module:** `intelmq.bots.collectors.http.collector_http` + +**Parameters (also expects [feed parameters](#feed-parameters) and [HTTP parameters](#http-parameters)):** + +**`http_url`** + +(required, string) Location of the resource to download. + +**`http_url_formatting`** + +(optional, boolean/object) When true, `{time[format]}` will be replaced by the current time in local timezone formatted +by the given format. E.g. if the URL is `http://localhost/{time[%Y]}`, then the resulting URL is `http://localhost/2019` +for the year 2019. ( +Python's [Format Specification Mini-Language](https://docs.python.org/3/library/string.html#formatspec) is used for +this.). You may use a JSON specifying [time-delta](https://docs.python.org/3/library/datetime.html#datetime.timedelta) +parameters to shift the current time accordingly. For example use `days: -1` for the yesterday's date; the +URL `http://localhost/{time[%Y-%m-%d]}` will get translated to `http://localhost/2018-12-31` for the 1st Jan of 2019. +Defaults to false. + +**`extract_files`** + +(optional, boolean/array of strings) If true, the retrieved (compressed) file or archived will be uncompressed/unpacked +and the files are extracted. If the parameter is a list of strings, only the files matching the filenames are extracted. +Extraction handles gzipped files and both compressed and uncompressed tar-archives as well as zip archives. For +extracted files, every extracted file is sent in it's own report. Every report has a field named `extra.file_name` with +the file name in the archive the content was extracted from. Defaults to false. + +**`verify_pgp_signatures`** + +(optional, boolean) When true, signature file is downloaded and report file is checked. On error (missing signature, +mismatch, ...), the error is logged and the report is not processed. Public key has to be imported in local keyring. +This requires the [python-gnupg](https://pypi.org/project/python-gnupg/) library. Defaults to false. + +**`signature_url`** + +(optional, string) Location of the signature file for the downloaded content. + +**`signature_url_formatting`** + +(optional, boolean/object) Same as `http_url_formatting`. Defaults to false. + +**`gpg_keyring`** + +(optional, string) If specified, the string represents path to keyring file. Otherwise the PGP keyring file of the +current `intelmq` user is used. + +--- + +### Generic URL Stream Fetcher
+ +Opens a streaming connection to the URL and collects the received lines. + +If the stream is interrupted, the connection will be aborted using the timeout parameter. No error will be logged if the +number of consecutive connection fails does not reach the parameter `error_max_retries`. Instead of errors, an INFO +message is logged. This is a measurement against too frequent ERROR logging messages. The consecutive connection fails +are reset if a data line has been successfully transferred. If the consecutive connection fails reaches the +parameter `error_max_retries`, an exception will be thrown and `rate_limit` applies, if not null. + +**Module:** `intelmq.bots.collectors.http.collector_http_stream` + +**Parameters (also expects [feed parameters](#feed-parameters) and [HTTP parameters](#http-parameters)):** + +Uses the same parameters as [Generic URL Fetcher](#intelmq.bots.collectors.http.collector_http). The +parameter `http_timeout_max_tries` is of no use in this collector. + +**`strip_lines`** + +(optional, boolean) Whether the single lines should be stripped (removing whitespace from the beginning and the end of +the line) or not. Defaults to true. + +--- + +### Generic Mail URL Fetcher
+ +Extracts URLs from e-mail messages and downloads the content from the URLs. + +The resulting reports contain the following special fields: + +- `feed.url`: The URL the data was downloaded from. +- `extra.email_date`: The content of the email's `Date` header. +- `extra.email_subject`: The subject of the email. +- `extra.email_from`: The email's from address. +- `extra.email_message_id`: The email's message ID. +- `extra.file_name`: The file name of the downloaded file (extracted from the HTTP Response Headers if possible). + +**Chunking** + +For line-based inputs the bot can split up large reports into smaller chunks. This is particularly important for setups +that use Redis as a message queue which has a per-message size limitation of 512 MB. To configure chunking, +set `chunk_size` to a value in bytes. `chunk_replicate_header` determines whether the header line should be repeated for +each chunk that is passed on to a parser bot. Specifically, to configure a large file input to work around Redis size +limitation set `chunk_size` to something like 384000000 (~384 MB). + +**Module:** `intelmq.bots.collectors.mail.collector_mail_url` + +**Parameters (also expects [feed parameters](#feed-parameters) and [HTTP parameters](#http-parameters)):** + +**`mail_host`** + +(required, string) Hostname of the mail server. + +**`mail_port`** + +(optional, integer) IMAP server port: 143 without TLS, 993 with TLS. Defaults to 143. + +**`mail_user`** + +(required, string) Username of the email account. + +**`mail_password`** + +(required, string) Password associated with the user account. + +**`mail_ssl`** + +(optional, boolean) Whether the mail server uses TLS or not. Defaults to true. + +**`folder`** + +(optional, string) Folder in which to look for e-mail messages. Defaults to INBOX. + +**`subject_regex`** + +(optional, string) Regular expression to look for in the e-mail subject. + +**`url_regex`** + +(optional, string) Regular expression of the feed URL to look for in the e-mail body. + +**`sent_from`** + +(optional, string) Filter messages by the sender. + +**`sent_to`** + +(optional, string) Filter messages by the recipient. + +**`ssl_ca_certificate`** + +(optional, string) Path to trusted CA certificate. Applies only to IMAP connections, not HTTP. If the provided +certificate is not found, the IMAP connection will fail on handshake. Defaults to no certificate. + +--- + +### Generic Mail Attachment Fetcher
+ +This bot collects messages from mailboxes and downloads the attachments. + +The resulting reports contains the following special fields: + +- `extra.email_date`: The content of the email's `Date` header +- `extra.email_subject`: The subject of the email +- `extra.email_from`: The email's from address +- `extra.email_message_id`: The email's message ID +- `extra.file_name`: The file name of the attachment or the file name in the attached archive if attachment is to + uncompress. + +**Module:** `intelmq.bots.collectors.mail.collector_mail_attach` + +**Parameters (also expects [feed parameters](#feed-parameters)):** + +**`mail_host`** + +(required, string) Hostname of the mail server. + +**`mail_port`** + +(optional, integer) IMAP server port: 143 without TLS, 993 with TLS. Defaults to 143. + +**`mail_user`** + +(required, string) Username of the email account. + +**`mail_password`** + +(required, string) Password associated with the user account. + +**`mail_ssl`** + +(optional, boolean) Whether the mail server uses TLS or not. Defaults to true. + +**`folder`** + +(optional, string) Folder in which to look for e-mail messages. Defaults to INBOX. + +**`subject_regex`** + +(optional, string) Regular expression to look for in the e-mail subject. + +**`attach_regex`** + +(optional, string) Regular expression of the name of the attachment. Defaults to csv.zip. + +**`extract_files`** + +(optional, boolean) Whether to extract compress files from the attachment. Defaults to true. + +**`sent_from`** + +(optional, string) Only process messages sent from this address. Defaults to null (any sender). + +**`sent_to`** + +(optional, string) Only process messages sent to this address. Defaults to null (any recipient). + +**`ssl_ca_certificate`** + +(optional, string) Path to trusted CA certificate. Applies only to IMAP connections, not HTTP. If the provided +certificate is not found, the IMAP connection will fail on handshake. By default, no certificate is used. + +--- + +### Generic Mail Body Fetcher
+ +This bot collect messages from mailboxes, forwards the bodies as reports. Each non-empty body with the matching content +type is sent as individual report. + +The resulting reports contains the following special fields: + +- `extra.email_date`: The content of the email's `Date` header +- `extra.email_subject`: The subject of the email +- `extra.email_from`: The email's from address +- `extra.email_message_id`: The email's message ID + +**Module:** `intelmq.bots.collectors.mail.collector_mail_body` + +**Parameters (also expects [feed parameters](#feed-parameters)):** + +**`mail_host`** + +(required, string) Hostname of the mail server. + +**`mail_port`** + +(optional, integer) IMAP server port: 143 without TLS, 993 with TLS. Defaults to 143. + +**`mail_user`** + +(required, string) Username of the email account. + +**`mail_password`** + +(required, string) Password associated with the user account. + +**`mail_ssl`** + +(optional, boolean) Whether the mail server uses TLS or not. Defaults to true. + +**`folder`** + +(optional, string) Folder in which to look for e-mail messages. Defaults to INBOX. + +**`subject_regex`** + +(optional, string) Regular expression to look for in the e-mail subject. + +**`url_regex`** + +(optional, string) Regular expression of the feed URL to look for in the e-mail body. + +**`sent_from`** + +(optional, string) Filter messages by the sender. + +**`sent_to`** + +(optional, string) Filter messages by the recipient. + +**`ssl_ca_certificate`** + +(optional, string) Path to trusted CA certificate. Applies only to IMAP connections, not HTTP. If the provided +certificate is not found, the IMAP connection will fail on handshake. Defaults to no certificate. + +**`content_types`** + +(optional, boolean/array of strings) Which bodies to use based on the content_type. Defaults to `true` (same +as `['html', 'plain']`) for all: + +- string with comma separated values, e.g. `['html', 'plain']` +- `true`, `false`, `null`: Same as default value - `string`, e.g. `plain` + +--- + +### Github API
+ +Collects files matched by regular expression from GitHub repository via the GitHub API. Optionally with GitHub +credentials, which are used as the Basic HTTP authentication. + +**Workflow** + +The optional authentication parameters provide a high limit of the GitHub API requests. With the git hub user +authentication, the requests are rate limited to 5000 per hour, otherwise to 60 requests per hour. + +The collector recursively searches for `regex`-defined files in the provided `repository`. Additionally it adds extra +file metadata defined by the `extra_fields`. + +The bot always sets the url, from which downloaded the file, as `feed.url`. + +**Module:** `intelmq.bots.collectors.github_api.collector_github_contents_api` + +**Parameters (also expects [feed parameters](#feed-parameters)):** + +**`personal_access_token`** + +(required, string) GitHub account personal access +token [GitHub documentation: Creating a personal access token]() + +**`repository`** + +(required, string) GitHub target repository (`/`) + +**`regex`** + +(optional, string) Valid regular expression of target files within the repository. Defaults to `.*.json`. + +**`extra_fields`** + +(optional, array of strings) Comma-separated list of extra fields +from [GitHub contents API](https://developer.github.com/v3/repos/contents/). + +--- + +### File
+ +This bot is capable of reading files from the local file-system. This is handy for testing purposes, or when you need to +react to spontaneous events. In combination with the Generic CSV parser this should work great. + +The resulting reports contains the following special fields: + +- `feed.url`: The URI using the `file://` scheme and localhost, with the full path to the processed file. +- `extra.file_name`: The file name (without path) of the processed file. + +**Chunking** + +Additionally, for line-based inputs the bot can split up large reports into smaller chunks. + +This is particularly important for setups that use Redis as a message queue which has a per-message size limitation of +512 MB. + +To configure chunking, set `chunk_size` to a value in bytes. `chunk_replicate_header` determines whether the header line +should be repeated for each chunk that is passed on to a parser bot. + +Specifically, to configure a large file input to work around Redis' size limitation set `chunk_size` to something like +384000, i.e., ~384 MB. + +**Workflow** + +The bot loops over all files in `path` and tests if their file name matches *postfix, e.g. `*.csv`. If yes, the file +will be read and inserted into the queue. + +If `delete_file` is set, the file will be deleted after processing. If deletion is not possible, the bot will stop. + +To prevent data loss, the bot also stops when no `postfix` is set and `delete_file` was set. This cannot be overridden. + +The bot always sets the file name as `feed.url`. + +**Module:** `intelmq.bots.collectors.file.collector_file` + +**Parameters (also expects [feed parameters](#feed-parameters)):** + +**`path`** + +(required, string) Path to file. + +**`postfix`** + +(required, string) The postfix (file ending) of the files to look for. For example [.csv]. + +**`delete_file`** + +(optional, boolean) Whether to delete the file after reading. Defaults to false. + +--- + +### FireEye
+ +This bot is capable of collecting hashes and URLs from a FireEye MAS appliance. + +The Python library `xmltodict` is required to run this bot. + +**Workflow** + +The bot collects all alerts which occurred during specified duration. After this we make a second call and check if +there is additional information like domains and hashes available. After collecting the openioc data we send this +information to the Fireeye parser. + +**Module:** `intelmq.bots.collectors.fireeye.collector_fireeye` + +**Parameters (also expects [feed parameters](#feed-parameters)):** + +**`host`** + +(required, string) DNS name of the target appliance. + +**`request_duration`** + +(required, string) Allowed values: `24_hours` or `48_hours`. Length of the query in past eg. collect alerts from last 24hours/48hours. + +**`http_username`** + +(required, string) Password for authentication. + +**`http_password`** + +(required, string) Username for authentication. + +--- + +### Kafka
+ +Requires the [kafka python library](https://pypi.org/project/kafka/). + +**Module:** `intelmq.bots.collectors.kafka.collector` + +**Parameters (also expects [feed parameters](#feed-parameters)):** + +**`topic`** + +(required, string) Kafka topic the collector should get messages from. + +**`bootstrap_servers`** + +(required, string) Kafka server(s) and port the collector should connect to. Defaults to `localhost:9092` + +**`ssl_check_hostname`** + +(optional, boolean) Whether to verify TLS certificates. Defaults to true. + +**`ssl_client_certificate`** + +(optional, string) Path to client certificate to use for TLS connections. + +**`ssl_ca_certificate`** + +(optional, string) Path to trusted CA certificate. + +--- + +### MISP Generic
+ +Collects messages from [MISP](https://github.com/MISP), a malware information sharing platform server. + +**Workflow** + +This collector will search for events on a MISP server that have a [to_process] tag attached to them (see +the [misp_tag_to_process] parameter) and collect them for processing by IntelMQ. Once the MISP event has been processed +the [to_process] tag is removed from the MISP event and a [processed] tag is then attached (see the [misp_tag_processed] +parameter). + +**NB.** The MISP tags must be configured to be 'exportable' otherwise they will not be retrieved by the collector. + +**Module:** `intelmq.bots.collectors.misp.collector` + +**Parameters (also expects [feed parameters](#feed-parameters)):** + +**`misp_url`** + +(required, string) URL of MISP server (with trailing '/'). + +**`misp_key`** + +(required, string) MISP Authkey. + +**`misp_tag_to_process`** + +(required, string) MISP tag for events to be processed. + +**`misp_tag_processed`** + +(optional, string) MISP tag for processed events. + +**`http_verify_cert`** + +(optional, boolean) Verify the TLS certificate of the server. Defaults to true. + +--- + +### Request Tracker
+ +Request Tracker Collector fetches attachments from an RTIR instance. + +This rt bot will connect to RT and inspect the given `search_queue` for tickets matching all criteria in `search_*`, Any +matches will be inspected. For each match, all (RT-) attachments of the matching RT tickets are iterated over and within +this loop, the first matching filename in the attachment is processed. If none of the filename matches apply, the +contents of the first (RT-) "history" item is matched against the regular expression for the URL (`url_regex`). + +The parameter `http_timeout_max_tries` is of no use in this collector. + +**Search** + +The parameters prefixed with `search_` allow configuring the ticket search. + +Empty strings and null as value for search parameters are ignored. + +**File downloads** + +Attachments can be optionally unzipped, remote files are downloaded with the `http_*` settings applied. + +If `url_regex` or `attachment_regex` are empty strings, false or null, they are ignored. + +**Ticket processing** + +Optionally, the RT bot can "take" RT tickets (i.e. the `user` is assigned this ticket now) and/or the status can be changed (leave `set_status` empty in case you don't want to change the status). Please note however that you **MUST** do one of the following: either "take" the ticket or set the status (`set_status`). Otherwise, the search will find the ticket every time and get stuck in an endless loop. + +In case a resource needs to be fetched and this resource is permanently not available (status code is 4xx), the ticket +status will be set according to the configuration to avoid processing the ticket over and over. For temporary failures +the status is not modified, instead the ticket will be skipped in this run. + +**Time search** + +To find only tickets newer than a given absolute or relative time, you can use the `search_not_older_than` parameter. Absolute time specification can be anything parseable by dateutil, best use a ISO format. + +Relative must be in this format: `[NUMBER] [TIMESPAN]s`, e.g. `3 days`. Timespan can be hour, day, week, month or year. Trailing 's' is supported for all timespans. Relative times are subtracted from the current time directly before the search is performed. + +The resulting reports contains the following special fields: + +- `rtir_id`: The ticket ID +- `extra.email_subject` and `extra.ticket_subject`: The subject of the ticket +- `extra.email_from` and `extra.ticket_requestors`: Comma separated list of the ticket's requestor's email addresses. +- `extra.ticket_owner`: The ticket's owner name +- `extra.ticket_status`: The ticket's status +- `extra.ticket_queue`: The ticket's queue +- `extra.file_name`: The name of the extracted file, the name of the downloaded file or the attachments' filename without `.gz` postfix. +- `time.observation`: The creation time of the ticket or attachment. + +**Requirements** + +You need the rt-library >= 1.9 and < 3.0 from from nic.cz, available via [pypi](https://pypi.org/project/rt/): `pip3 install rt<3` + +**Module:** `intelmq.bots.collectors.rt.collector_rt` + +**Parameters (also expects [feed parameters](#feed-parameters) and [HTTP parameters](#http-parameters)):** + +**`extract_attachment`** + +(optional, boolean/array of strings) See documentation of the Generic URL Fetcher parameter `extract_files` for more details. + +**`extract_download`** + +(optional, boolean/array of strings) See documentation of the Generic URL Fetcher parameter `extract_files` for more details. + +**`uri`** + +(optional, string) URL of the REST interface of the RT. Defaults to `http://localhost/rt/REST/1.0`. + +**`user`** + +(optional, string) RT username. Defaults to intelmq. + +**`password`** + +(optional, string) RT password. Defaults to password. + +**`search_not_older_than`** + +(optional, string) Absolute time (use ISO format) or relative time, e.g. `3 days`. + +**`search_owner`** + +(optional, string) Owner of the ticket to search for. Defaults to nobody. + +**`search_queue`** + +(optional, string) Queue of the ticket to search for. Defaults to Incident Reports. + +**`search_requestor`** + +(optional, string) E-mail address of the requestor. + +**`search_status`** + +(optional, string) Status of the ticket to search for. Defaults to new. + +**`search_subject_like`** + +(optional, string/array of strings) Part of the subject of the ticket to search for. Defaults to "Report". + + +**`search_subject_notlike`** + +(optional, string/array of strings) Exclude subject containing given value, use list for multiple excluding values. + +**`set_status`** + +(optional, string) Status to set the ticket to after processing. Use false or null to keep current status. Defaults to open. + +**`take_ticket`** + +(optional, boolean) Whether to take the ticket. Defaults to true. + +**`url_regex`** + +(optional, string) Regular expression of an URL to search for in the ticket. Defaults to `https://dl.shadowserver.org/[a-zA-Z0-9?_-]*`. + +**`attachment_regex`** + +(optional, string) Eegular expression of an attachment in the ticket. Defaults to `\.csv\.zip$`. + +--- + +### Rsync
+ +This bot downloads a file via rsync and then load data from downloaded file. Downloaded file is located in +`var/lib/bots/rsync_collector`. + +Requires the rsync executable. + +**Module:** `intelmq.bots.collectors.rsync.collector_rsync` + +**Parameters (also expects [feed parameters](#feed-parameters)):** + +**`file`** + +(required, string) The filename to process, combined with `rsync_path`. + +**`rsync_path`** + +(required, string) Path to the directory of the file. Allowed values are local directory (such as `/home/username/`) or remote directory (such as `:/home/username/directory`). + +**`rsync_file_path_formatting`** + +(optional, boolean) Whether the file and rsync_path should be formatted by the given format. E.g. if the path is `/path/to_file/{time[%Y]}`, then the resulting path is `/path/to/file/2023` for the year 2023. (Python's `Format Specification Mini-Language `_ is used for this.). You may use a `JSON` specifying `time-delta `_ parameters to shift the current time accordingly. For example use `{"days": -1}` for the yesterday's date; the path `/path/to/file/{time[%Y-%m-%d]}` will get translated to "/path/to/file/2018-12-31" for the 1st Jan of 2023. Defaults to false. + +**`extra_params`** + +(optional, array of strings) A list of extra parameters to pass to rsync. + +**`private_key`** + +(optional, string) Private key to use for rsync authentication. + +**`private_key_path`** + +(optional, string) Path to private key to use for rsync authentication. Use `private_key` or `private_key_path`, not both. + +**`strict_host_key_checking`** + +(optional, boolean) Whether the host key should be checked. Defaults to false. + +**`temp_directory`** + +(optional, string) The temporary directory for rsync to use for collected files. Defaults to `/opt/intelmq/var/run/{BOT-ID}` or `/var/run/intelmq/{BOT-ID}`. + +--- + +### Shadowserver Reports API
+ +Connects to the [Shadowserver API](https://www.shadowserver.org/what-we-do/network-reporting/api-documentation/), +requests a list of all the reports for a specific country and processes the ones that are new. + +The Cache is required to memorize which files have already been processed (TTL needs to be high enough to cover the +oldest files available!). + +The resulting reports contain the following special field: + +- `extra.file_name`: The name of the downloaded file, with fixed filename extension. The API returns file names with the + extension `.csv`, although the files are JSON, not CSV. Therefore, for clarity and better error detection in the parser, the file name in `extra.file_name` uses `.json` as extension. + +**Module:** `intelmq.bots.collectors.shadowserver.collector_reports_api` + +**Parameters (also expects [feed parameters](#feed-parameters) and [cache parameters](#cache-parameters)):** + +**`country`** + +(required, string) **Deprecated:** The country you want to download the reports for. Will be removed in IntelMQ version +4.0.0, use *reports* instead. + +**`apikey`** + +(required, string) Your Shadowserver API key. + +**`secret`** + +(required, string) Your Shadowserver API secret. + +**`reports`** + +(required, string/array of strings) An array of strings (or a list of comma-separated values) of the mailing lists you want to process. + +**`types`** + +(optional, string/array of strings) An array of strings (or a list of comma-separated values) with the names of report types you want to process. If you leave this empty, all the available reports will be downloaded and processed (i.e. 'scan', 'drones', 'intel', 'sandbox_connection', 'sinkhole_combined'). The possible report types are equivalent to the file names given in the section Supported Reports of the [Shadowserver parser](#intelmq.bots.parsers.shadowserver.parser_json). + +--- + +### Shodan Stream
+ +Queries the Shodan Streaming API. + +Requires the shodan library to be installed: + +- + +- + +**Module:** `intelmq.bots.collectors.shodan.collector_stream` + +**Parameters (also expects [feed parameters](#feed-parameters) and [HTTP parameters](#http-parameters)):** + +Only the proxy is used (requires `shodan-python > 1.8.1`). Certificate is always verified. + +**`countries`** + +() A list of countries to query for. If it is a string, it will be spit by `,`. + +If the stream is interrupted, the connection will be aborted using the timeout parameter. No error will be logged if the +number of consecutive connection fails does not reach the parameter +`error_max_retries`. Instead of errors, an INFO message is logged. This is a measurement against too frequent ERROR +logging messages. The consecutive connection fails are reset if a data line has been successfully transferred. If the +consecutive connection fails reaches the parameter `error_max_retries`, an exception will be thrown and `rate_limit` +applies, if not null. + +--- + +### TCP
+ +TCP is the bot responsible to receive events on a TCP port (ex: from TCP Output of another IntelMQ instance). Might not +be working on Python 3.4.6. + +**Response** + +TCP collector just sends an "OK" message after every received message, this should not pose a problem for an arbitrary +input. If you intend to link two IntelMQ instance via TCP, have a look at the TCP output bot documentation. + +**Module:** `intelmq.bots.collectors.tcp.collector` + +**Parameters (also expects [feed parameters](#feed-parameters)):** + +**`ip`** + +(required, string) IP of the destination server. + +**`port`** + +(required, integer) Port of destination server. + +--- + +### Blueliv Crimeserver
+ +Collects report messages from Blueliv API. + +For more information visit + +**Module:** `intelmq.bots.collectors.blueliv.collector_crimeserver` + +**Requirements** + +Install the required library: + +```bash +pip3 install -r intelmq/bots/collectors/blueliv/REQUIREMENTS.txt +``` + +**Parameters (also expects [feed parameters](#feed-parameters)):** + +**`api_key`** + +(required, string) location of information resource, see + +**`api_url`** + +(optional, string) The optional API endpoint. Defaults to `https://freeapi.blueliv.com`. + +--- + +### Calidog Certstream
+ +A Bot to collect data from the Certificate Transparency Log (CTL). This bot works based on certstream library +() + +**Module:** `intelmq.bots.collectors.calidog.collector_certstream` + +**Parameters (also expects [feed parameters](#feed-parameters)):** + +--- + +### ESET ETI
+ +Collects data from ESET ETI TAXII server. + +For more information visit . + +**Module:** `intelmq.bots.collectors.eset.collector` + +**Requirements** + +Install the required `cabby` library: + +```bash +pip3 install -r intelmq/bots/collectors/eset/REQUIREMENTS.txt +``` + +**Parameters (also expects [feed parameters](#feed-parameters)):** + +**`username`** + +(required, string) Your username. + +**`password`** + +(required, string) Your password. + +**`endpoint`** + +(optional, string) Defaults to `eti.eset.com`. + +**`time_delta`** + +(optional, integer) The time (in seconds) span to look back. Default to 3600. + +**`collection`** + +(required, string) The collection to fetch. + +--- + +### McAfee openDXL
+ +Collects messages via McAfee openDXL. + +**Module:** `intelmq.bots.collectors.opendxl.collector` + +**Parameters (also expects [feed parameters](#feed-parameters)):** + +**`dxl_config_file`** + +(required, string) Path to the the configuration file containing required information to connect. + +**`dxl_topic`** + +(optional, string) Name of the DXL topic to subscribe to. Defaults to `/mcafee/event/atd/file/report`. + +--- + +### Microsoft Azure
+ +Collects blobs from Microsoft Azure using their library. + +Iterates over all blobs in all containers in an Azure storage. The Cache is required to memorize which files have +already been processed (TTL needs to be high enough to cover the oldest files available!). + +This bot significantly changed in a backwards-incompatible way in IntelMQ Version 2.2.0 to support current versions of +the Microsoft Azure Python libraries. `azure-storage-blob>=12.0.0` is required. + +**Module:** `intelmq.bots.collectors.microsoft.collector_azure` + +**Parameters (also expects [feed parameters](#feed-parameters) and [cache parameters](#cache-parameters)):** + +**`connection_string`** + +(required, string) Connection string as given by Microsoft. + +**`container_name`** + +(required, string) Name of the container to connect to. + +--- + +### Microsoft Interflow
+ +This bot collects files from Microsoft Interflow API. + +Iterates over all files available by this API. Make sure to limit the files to be downloaded with the parameters, +otherwise you will get a lot of data! The cache is used to remember which files have already been downloaded. Make sure +the TTL is high enough, higher than `not_older_than`. + +**Module:** `intelmq.bots.collectors.microsoft.collector_interflow` + +**Parameters (also expects [feed parameters](#feed-parameters)):** + +**`api_key`** + +(required, string) API generated in their portal. + +**`file_match`** + +(optional, string) Regular expression to match file names. + +**`not_older_than`** + +(optional, integer/datetime) an optional relative (minutes) or absolute time (UTC is assumed) expression to determine +the oldest time of a file to be downloaded. + +**`redis_cache_*` and especially `redis_cache_ttl`** + +Settings for the cache where file names of downloaded files are saved. The cache's TTL must always be bigger than +`not_older_than`. + +**Additional functionalities** + +Files are automatically ungzipped if the filename ends with `.gz`. + +--- + +### STOMP
+ +Collects messages from a STOMP server. + +**Module:** `intelmq.bots.collectors.stomp.collector` + +**Requirements** + +Install the `stomp.py` library from PyPI: + +```bash +pip3 install -r intelmq/bots/collectors/stomp/REQUIREMENTS.txt +``` + +**Parameters (also expects [feed parameters](#feed-parameters)):** + +**`server`** + +(required, string) Hostname of the STOMP server. + +**`port`** + +(optional, integer) Defaults to 61614. + +**`exchange`** + +(required, string) STOMP *destination* to subscribe to, e.g. "/exchange/my.org/*.*.*.*" + +**`username`** + +(optional, string) Username to use. + +**`password`** + +(optional, string) Password to use. + +**`ssl_ca_certificate`** + +(optional, string) Path to trusted CA certificate. + +**`auth_by_ssl_client_certificate`** + +(optional, boolean) Whether to authenticate using TLS certificate. (Set to false for new *n6* auth.) Defaults to true. + +**`ssl_client_certificate`** + +(optional, string) Path to client certificate to use for TLS connections. + +**`ssl_client_certificate_key`** + +(optional, string) Path to client private key to use for TLS connections. + +--- + +### Twitter (REMOVE?)
+ +Collects tweets. + +Collects tweets from target_timelines. Up to tweet_count tweets from each user and up to timelimit back in time. The +tweet text is sent separately and if allowed, links to pastebin are followed and the text sent in a separate report + +**Module:** `intelmq.bots.collectors.twitter.collector_twitter` + +**Parameters (also expects [feed parameters](#feed-parameters)):** + +**`target_timelines`** + +() screen_names of twitter accounts to be followed + +**`tweet_count`** + +() number of tweets to be taken from each account + +**`timelimit`** + +() maximum age of the tweets collected in seconds + +**`follow_urls`** + +() list of screen_names for which URLs will be followed + +**`exclude_replies`** + +() exclude replies of the followed screen_names + +**`include_rts`** + +() whether to include retweets by given screen_name + +**`consumer_key`** + +() Twitter API login data + +**`consumer_secret`** + +() Twitter API login data + +**`access_token_key`** + +() Twitter API login data + +**`access_token_secret`** + +() Twitter API login data + +## Parser Bots + +### Common parameters + +#### `default_fields` + +(optional, object) Map of statically added fields to each event (only applied if parsing the event doesn't set the +value). + +example usage: + +```yaml +defaults_fields: + classification.type: c2-server + protocol.transport: tcp +``` + +--- + +### Abuse.ch Feodo Tracker
+ +Parses data from Abuse.ch Feodo Tracker (JSON format). + +**Module:** `intelmq.bots.parsers.abusech.parser_feodotracker` + +No additional parameters. + +--- + +### AlienVault API + +Parses data from AlienVault API. + +**Module:** `intelmq.bots.parsers.alienvault.parser` + +No additional parameters. + +--- + +### AlienVault OTX + +Parses data from AlientVault Open Threat Exchange (OTX). + +**Module:** `intelmq.bots.parsers.alienvault.parser_otx` + +No additional parameters. + +--- + +### AnubisNetworks Cyberfeed Stream
+ +Parses data from AnubisNetworks Cyberfeed Stream. + +The feed format changes over time. The parser supports at least data from 2016 and 2020. + +Events with the Malware "TestSinkholingLoss" are ignored, as they are for the feed provider's internal purpose only and +should not be processed at all. + +**Module:** `intelmq.bots.parsers.anubisnetworks.parser` + +**Parameters:** + +**`use_malware_family_as_classification_identifier`** + +(optional, boolean) Use the `malw.family` field as `classification.type`. If false, check if the same +as `malw.variant`. If it is the same, it is ignored. Otherwise saved as `extra.malware.family`. Defaults to true. + +--- + +### Bambenek
+ +Parses data from Bambenek DGA, Domain, and IP feeds. + +**Module:** `intelmq.bots.parsers.bambenek.parser` + +No additional parameters. + +--- + +### Blocklist.de
+ +Parses data from Blocklist.de feeds. + +**Module:** `intelmq.bots.parsers.blocklistde.parser` + +No additional parameters. + +--- + +### Blueliv Crimeserver
+ +Parses data from Blueliv Crimeserver feed. + +**Module:** `intelmq.bots.parsers.blueliv.parser_crimeserver` + +No additional parameters. + +--- + +### Calidog Certstream
+ +Parses data from Certificate Transparency Log. + +For each domain in the `leaf_cert.all_domains` object one event with the domain in `source.fqdn` (and `source.ip` as fallback) is produced. The seen-date is saved in `time.source` and the classification type is `other`. + +**Module:** `intelmq.bots.parsers.calidog.parser_certstream` + +No additional parameters. + +--- + +### CERT-EU
+ +Parses data from CERT-EU feed (CSV). + +**Module:** `intelmq.bots.parsers.certeu.parser_csv` + +No additional parameters. + +--- + +### CI Army
+ +Parses data from CI Army feed. + +**Module:** `intelmq.bots.parsers.ci_army.parser` + +No additional parameters. + +--- + +### CleanMX
+ +Parses data from CleanMX feed. + +**Module:** `intelmq.bots.parsers.cleanmx.parser` + +No additional parameters. + +--- + +### Team Cymru CAP
+ +Parses data from Team Cymru's CSIRT Assistance Program (CAP) feed. + +There are two different feeds available: + +- `infected_$date.txt` ("old") +- `$certname_$date.txt` ("new") + +The new will replace the old at some point in time, currently you need to fetch both. The parser handles both formats. + +**Old feed** + +As little information on the format is available, the mappings might not be correct in all cases. Some reports are not +implemented at all as there is no data available to check if the parsing is correct at all. If you do get errors +like `Report ... not implement` or similar please open an issue and report the (anonymized) example data. Thanks. + +The information about the event could be better in many cases but as Cymru does not want to be associated with the +report, we can't add comments to the events in the parser, because then the source would be easily identifiable for the +recipient. + +**Module:** `intelmq.bots.parsers.cymru.parser_cap_program` + +No additional parameters. + +--- + +### Team Cymru Full Bogons
+ +Parses data from full bogons feed. + + + +**Module:** `intelmq.bots.parsers.cymru.parser_full_bogons` + +No additional parameters. + +--- + +### CZ.NIC HaaS
+ +Parses data from CZ.NIC Honeypot as a service (HaaS) feed. + +**Module:** `intelmq.bots.parsers.cznic.parser_haas` + +No additional parameters. + +--- + +### CZ.NIC PROKI
+ +Parses data from CZ.NIC PROKI API. + +**Module:** `intelmq.bots.parsers.cznic.parser_proki` + +No additional parameters. + +--- + +### Danger Rulez
+ +Parses data from Danger Rulez SSH blocklist. + +**Module:** `intelmq.bots.parsers.danger_rulez.parser` + +No additional parameters. + +--- + +### Dataplane
+ +Parses data from Dataplane feed. + +**Module:** `intelmq.bots.parsers.dataplane.parser` + +No additional parameters. + +--- + +### DShield ASN
+ +Parses data from DShield ASN feed. + +**Module:** `intelmq.bots.parsers.dshield.parser_asn` + +No additional parameters. + +--- + +### DShield Block
+ +Parses data from DShield Block feed. + +**Module:** `intelmq.bots.parsers.dshield_parser_block` + +No additional parameters. + +--- + +### ESET
+ +Parses data from ESET ETI TAXII server. + +Supported collections: + +- "ei.urls (json)" +- "ei.domains v2 (json)" + +**Module:** `intelmq.bots.parsers.eset.parser` + +No additional parameters. + +--- + +### Dyn (TODO) + +--- + +### FireEye
+ +Parses data from FireEye MAS appliance. + +**Module:** `intelmq.bots.parsers.fireeye.parser` + +No additional parameters. + +--- + +### Fraunhofer DGA
+ +Parses data from Fraunhofer DGA feed. + +**Module:** `intelmq.bots.parsers.fraunhofer.parser_dga` + +No additional parameters. + +--- + +### Generic CSV
+ +Parses CSV data. + +Lines starting with `#` are skipped. Headers won't be interpreted. + +**Module:** `intelmq.bots.parsers.generic.parser_csv` + +**Parameters** + +**`columns`** + +(required, string/array of strings) A list of strings or a string of comma-separated values with field names. The names +must match the IntelMQ Data Format field names. Empty column specifications and columns named `__IGNORE__` are ignored. +E.g. + +```yaml +columns: + - "source.ip" + - "source.fqdn" + - "extra.http_host_header" + - "__IGNORE__" +``` + +is equivalent to: + +```yaml +columns: "source.ip,source.fqdn,extra.http_host_header,__IGNORE__" +``` + +The fourth column is not used in this example. + +It is possible to specify multiple columns using the `|` character. E.g. + +```yaml +columns: + - "source.url|source.fqdn|source.ip" + - "source.fqdn" + - "extra.http_host_header" + - "__IGNORE__" +``` + +First, the bot will try to parse the value as URL, if it fails, it will try to parse it as FQDN, if that fails, it will +try to parse it as IP, if that fails, an error will be raised. Some use cases: + +- Mixed data set, e.g. URL/FQDN/IP/NETMASK: + +```yaml +columns: + - "source.url|source.fqdn|source.ip|source.network" +``` + +- Parse a value and ignore if it fails: + +```yaml +columns: + - "source.url|__IGNORE__" +``` + +**`column_regex_search`** + +(optional, object) A dictionary mapping field names (as given per the columns parameter) to regular expression. The +field is evaluated using `re.search`. Eg. to get the ASN out of `AS1234` use: `{"source.asn": +"[0-9]*"}`. Make sure to properly escape any backslashes in your regular expression (see also +this [issue](https://github.com/certtools/intelmq/issues/1579)). + +**`compose_fields`** + +(optional, object) Compose fields from multiple columns, e.g. with data like this: + +```csv +# Host,Path +example.com,/foo/ +example.net,/bar/ +``` + +Using this parameter: + +```yaml +compose_fields: + source.url: "http://{0}{1}" +``` + +You get: + +``` +http://example.com/foo/ +http://example.net/bar/ +``` + +in the respective `source.url` fields. The value in the dictionary mapping is formatted whereas the columns are +available with their index. + +**`default_url_protocol`** + +(optional, string) For URLs you can give a default protocol which will be prepended to the data. Defaults to null. + +**`delimiter`** + +(optional, string) Character used for columns separation. Defaults to `,` (comma). + +**`skip_header`** + +(optional, boolean/integer) Whether to skip the first N lines of the input (True -> 1, False -> 0). Lines starting +with `#` will be skipped additionally, make sure you do not skip more lines than needed! + +**`time_format`** + +(optional, string) Allowed values: `timestamp`, `windows_nt` or `epoch_millis`. When `null` then fuzzy time parsing is +used. Defaults to null. + +**`type`** + +(optional, string) Set the `classification.type` statically. Deprecated in favour of [`default_fields`](#default_fields) +. Will be removed in IntelMQ 4.0.0. + +**`data_type`** + +(optional, object) Sets the data of specific type, currently only `json` is a supported value. + +Example: + +```yaml +columns: + - source.ip + - source.url + - extra.tags +data_type: + extra.tags: json +``` + +It will ensure that `extra.tags` is treated as JSON. + +**`filter_text`** + +(optional, string) Only process the lines containing or not containing specified text. It is expected to be used in +conjunction with `filter_type`. + +**`filter_type`** + +(optional, string) Allowed values: `whitelist` or `blacklist`. When `whitelist` is used, only lines containing the text +specified in `filter_text` option will be processed. When `blacklist` is used, only lines NOT containing the text will +be processed. + +Example (processing ipset format files): + +```yaml +filter_text: 'ipset add ' +filter_type: whitelist +columns: + - __IGNORE__ + - __IGNORE__ + - __IGNORE__ + - source.ip +``` + +**`type_translation`** + +(optional, object) If the source does have a field with information for `classification.type`, but it does not +correspond to IntelMQ's types, you can map them to the correct ones. The `type_translation` field can hold a dictionary, +or a string with a JSON dictionary which maps the feed's values to IntelMQ's. + +Example: + +```yaml +type_translation: + malware_download: "malware-distribution" +``` + +**`columns_required`** + +(optional, array of booleans) An array of true/false for each column. By default, it is true for every column. + +--- + +### Github Feed
+ +Parses data publicly available on GitHub (should receive from `github_api` collector). + +**Module:** `intelmq.bots.parsers.github_feed.parser` + +No additional parameters. + +--- + +### Have I Been Pwned Callback
+ +Parsers data from the callback of Have I Been Pwned Enterprise Subscription. + +Parses breaches and pastes and creates one event per e-mail address. The e-mail address is stored in `source.account` +. `classification.type` is `leak` and `classification.identifier` is `breach` or `paste`. + +**Module:** `intelmq.bots.parsers.hibp.parser_callback` + +No additional parameters. + +--- + +### HTML Table
+ +Parses tables in HTML documents. + +**Module:** `intelmq.bots.parsers.html_table.parser` + +**Parameters:** + +(required, string/array of strings) A list of strings or a string of comma-separated values with field names. The names +must match the IntelMQ Data Format field names. Empty column specifications and columns named `__IGNORE__` are ignored. +E.g. + +```yaml +columns: + - "source.ip" + - "source.fqdn" + - "extra.http_host_header" + - "__IGNORE__" +``` + +is equivalent to: + +```yaml +columns: "source.ip,source.fqdn,extra.http_host_header,__IGNORE__" +``` + +The fourth column is not used in this example. + +It is possible to specify multiple columns using the `|` character. E.g. + +```yaml +columns: + - "source.url|source.fqdn|source.ip" + - "source.fqdn" + - "extra.http_host_header" + - "__IGNORE__" +``` + +First, the bot will try to parse the value as URL, if it fails, it will try to parse it as FQDN, if that fails, it will +try to parse it as IP, if that fails, an error will be raised. Some use cases: + +- Mixed data set, e.g. URL/FQDN/IP/NETMASK: + +```yaml +columns: + - "source.url|source.fqdn|source.ip|source.network" +``` + +- Parse a value and ignore if it fails: + +```yaml +columns: + - "source.url|__IGNORE__" +``` + +**`ignore_values`** + +(optional, string/array of strings) A list of strings or a string of comma-separated values which are ignored when +encountered. + +Example: + +```yaml +ignore_values: + - "" + - "unknown" + - "Not listed" +``` + +The following configuration will lead to assigning all values to `malware.name` and `extra.SBL` except `unknown` +and `Not listed` respectively. + +```yaml +columns: + - source.url + - malware.name + - extra.SBL +ignore_values: + - '' + - unknown + - Not listed +``` + +Parameters `columns` and `ignore_values` **must have same length!** + +**`attribute_name`** + +(optional, string) Filtering table with table attributes. To be used in conjunction with `attribute_value`. E.g. `class`, `id`, `style`. + +**`attribute_value`** + +(optional, string) To filter all tables with attribute `class='details'` use + +```yaml +attribute_name: "class" +attribute_value: "details" +``` + +**`table_index`** + +(optional, integer) Index of the table if multiple tables present. If `attribute_name` and `attribute_value` given, +index according to tables remaining after filtering with table attribute. Defaults to 0. + +**`split_column`** + +(optional, ) Padded column to be split to get values, to be used in conjunction with `split_separator` and `split_index`, optional. + +**`split_separator`** + +(optional, string) Delimiter string for padded column. + +**`split_index`** + +(optional, integer) Index of unpadded string in returned list from splitting `split_column` with `split_separator` as +delimiter string. Defaults to 0. + +Example: + +```yaml +split_column: "source.fqdn" +split_separator: " " +split_index: 1 +``` + +With above configuration, column corresponding to `source.fqdn` with value `D lingvaworld.ru` will be assigned +as `source.fqdn: lingvaworld.ru`. + +**`skip_table_head`** + +(optional, boolean) Skip the first row of the table. Defaults to true. + +**`default_url_protocol`** + +(optional, string) For URLs you can give a default protocol which will be pretended to the data. Defaults to `http://`. + +**`time_format`** + +(optional, string) Allowed values: `timestamp`, `windows_nt` or `epoch_millis`. When `null` then fuzzy time parsing is +used. Defaults to null. + +**`html_parser`** + +(optional, string) The HTML parser to use. Allowed values: `html.parser` or `lxml` (see +also ). Defaults to `html.parser`. + +--- + +### JSON (TODO)
+ +TODO + +**Module:** `intelmq.bots.parsers.json.parser` + +--- + +### Key=Value Parser
+ +Parses text lines in key=value format, for example FortiGate firewall logs. + +**Parsing limitations** + +The input must not have (quoted) occurrences of the separator in the values. For example, this is not parsable (with +space as separator): + +``` +key="long value" key2="other value" +``` + +In firewall logs like FortiGate, this does not occur. These logs usually look like: + +``` +srcip=192.0.2.1 srcmac="00:00:5e:00:17:17" +``` + +**Module:** `intelmq.bots.parsers.key_value.parser` + +**Parameters:** + +**`pair_separator`** + +(optional, string) String separating key=value pairs. Defaults to space. + +**`kv_separator`** + +(optional, string) String separating the key and the value. Defaults to `=`. + +**`keys`** + +(optional, object) Mapping of original key names to IntelMQ Data Format. + +Example: + +```yaml +keys: + srcip: source.ip + dstip: destination.ip +``` + +The value mapped to `time.source` is parsed. If the value is numeric, it is interpreted. Otherwise, or if it fails, it +is parsed fuzzy with dateutil. If the value cannot be parsed, a warning is logged per line. + +**`strip_quotes`** + +(optional, boolean) Whether to remove opening and closing quotes from values. Defaults to true. + +--- + +### MalwarePatrol
+ +Parses data from MalwarePatrol feed. + +**Module:** `intelmq.bots.parsers.malwarepatrol.parser_dansguardian` + +No additional parameters. + +--- + +### MalwareURL
+ +Parses data from MalwareURL feed. + +**Module:** `intelmq.bots.parsers.malwareurl.parser` + +No additional parameters. + +--- + +### McAfee Advanced Threat Defense File
+ +Parse IoCs from McAfee Advanced Threat Defense reports (hash, IP, URL). + +**Module:** `intelmq.bots.parsers.mcafee.parser_atd` + +**Parameters:** + +**`verdict_severity`** + +(optional, integer) Minimum report severity to parse. Defaults to 4. + +--- + +### Microsoft CTIP
+ +Parses data from the Microsoft CTIP feed. + +Can parse the JSON format provided by the Interflow interface (lists of dictionaries) as well as the format provided by +the Azure interface (one dictionary per line). The provided data differs between the two formats/providers. + +The parser is capable of parsing both feeds: + +- `ctip-c2` +- `ctip-infected-summary` The feeds only differ by a few fields, not in the format. + +The feeds contain a field called `Payload` which is nearly always a base64 encoded JSON structure. If decoding works, +the contained fields are saved as `extra.payload.*`, otherwise the field is saved as `extra.payload.text`. + +**Module:** `intelmq.bots.parsers.microsoft.parser_ctip` + +**Parameters:** + +**`overwrite`** + +(optional, boolean) Overwrite an existing field `feed.name` with `DataFeed` of the source. Defaults to false. + +--- + +### MISP
+ +Parses MISP events. + +MISP events collected by the MISPCollectorBot are passed to this parser for processing. Supported MISP event categories +and attribute types are defined in the `SUPPORTED_MISP_CATEGORIES` and `MISP_TYPE_MAPPING` class constants. + +**Module:** `intelmq.bots.parsers.misp.parser` + +No additional parameters. + +--- + +### N6
+ +Parses n6 data into IntelMQ format. + +Test messages are ignored, this is logged with debug logging level. Also contains a mapping for the classification ( +results in taxonomy, type and identifier). The `name` field is normally used as `malware.name`, if that fails due to +disallowed characters, these characters are removed and the original value is saved as `event_description.text`. This +can happen for names like `further iocs: text with invalid ' char`. + +If a n6 message contains multiple IP addresses, multiple events are generated, resulting in events only differing in the +address information. + +**Module:** `intelmq.bots.parsers.n6.parser_n6stomp` + +No additional parameters. + +--- + +### OpenPhish Free
+ +Parses data from OpenPhish Free feed. + +**Module:** `intelmq.bots.parsers.openphish.parser` + +No additional parameters. + +--- + +### OpenPhish Premium
+ +Parses data from OpenPhish Premium feed (JSON). + +**Module:** `intelmq.bots.parsers.openphish.parser_commercial` + +No additional parameters. + +--- + +### Phishtank
+ +Parses data from Phishtank feed. + +**Module:** `intelmq.bots.parsers.phishtank.parser` + +No additional parameters. + +--- + +### Shadowserver
+ +Parses various reports from Shadowserver. + +There are two Shadowserver parsers, one for data in `CSV` format and one for data in `JSON` format. The latter was added +in IntelMQ 2.3 and is meant to be used together with the Shadowserver API collector. + +**How this bot works?** + +There are two possibilities for the bot to determine which report type the data belongs to in order to determine the +correct mapping of the columns: + +1. **Automatic report type detection** + + Since IntelMQ version 2.1 the parser can detect the feed based on metadata provided by the collector. + + When processing a report, this bot takes `extra.file_name` from the report and looks in `config.py` how the report + should be parsed. If this lookup is not possible, and the `feedname` is not given as parameter, the feed cannot be + parsed. + + The field `extra.file_name` has the following structure: `%Y-%m-%d-${report_name}[-suffix].csv` where the optional + suffix can be something like `country-geo`. For example, some possible filenames + are `2019-01-01-scan_http-country-geo.csv` or `2019-01-01-scan_tftp.csv`. The important part is the `report_name`, + between the date and the suffix. Since version 2.1.2 the date in the filename is optional, so filenames + like `scan_tftp.csv` are also detected. + +2. **Fixed report type** + + If the method above is not possible and for upgraded instances, the report type can be set with the `feedname` + parameter. Report type is derived from the subject of Shadowserver e-mails. A list of possible values of + the `feedname` parameter can be found in the table below in the column "Report Type". + +**Module:** + +`intelmq.bots.parsers.shadowserver.parser` (for CSV data) +`intelmq.bots.parsers.shadowserver.parser_json` (for JSON data) + +**Parameters:** + +**`feedname`** + +(optional, string) Name of the Shadowserver report, see list below for possible values. + +**`overwrite`** + +(optional, boolean) If an existing `feed.name` should be overwritten. + +**Supported reports:** + +These are the supported report types and their corresponding file name for automatic detection: + +| Report Type (`feedname`) | File Name | +|-----------|-----------| +| Accessible-ADB | `scan_adb` | +| Accessible-AFP | `scan_afp` | +| Accessible-AMQP | `scan_amqp` | +| Accessible-ARD | `scan_ard` | +| Accessible-Cisco-Smart-Install | `cisco_smart_install` | +| Accessible-CoAP | `scan_coap` | +| Accessible-CWMP | `scan_cwmp` | +| Accessible-MS-RDPEUDP | `scan_msrdpeudp` | +| Accessible-FTP | `scan_ftp` | +| Accessible-Hadoop | `scan_hadoop` | +| Accessible-HTTP | `scan_http` | +| Accessible-Radmin | `scan_radmin` | +| Accessible-RDP | `scan_rdp` | +| Accessible-Rsync | `scan_rsync` | +| Accessible-SMB | `scan_smb` | +| Accessible-Telnet | `scan_telnet` | +| Accessible-Ubiquiti-Discovery-Service | `scan_ubiquiti` | +| Accessible-VNC | `scan_vnc` | +| Blacklisted-IP (deprecated) | `blacklist` | +| Blocklist | `blocklist` | +| Compromised-Website| `compromised_website` | +| Device-Identification-IPv4 | `device_id` | +| Device-Identification-IPv6 | `device_id6` | +| DNS-Open-Resolvers | `scan_dns` | +| Honeypot-Amplification-DDoS-Events | `event4_honeypot_ddos_amp` | +| Honeypot-Brute-Force-Events | `event4_honeypot_brute_force` | +| Honeypot-Darknet | `event4_honeypot_darknet` | +| Honeypot-HTTP-Scan | `event4_honeypot_http_scan` | +| HTTP-Scanners | `hp_http_scan` | +| ICS-Scanners | `hp_ics_scan` | +| IP-Spoofer-Events | `event4_ip_spoofer` | +| Microsoft-Sinkhole-Events-IPv4 | `event4_microsoft_sinkhole` | +| Microsoft-Sinkhole-Events-HTTP | `event4_microsoft_sinkhole_http` | +| NTP-Monitor | `scan_ntpmonitor` | +| NTP-Version | `scan_ntp` | +| Open-Chargen | `scan_chargen` | +| Open-DB2-Discovery-Service | `scan_db2` | +| Open-Elasticsearch | `scan_elasticsearch` | +| Open-IPMI| `scan_ipmi` | +| Open-IPP | `scan_ipp` | +| Open-LDAP | `scan_ldap` | +| Open-LDAP-TCP | `scan_ldap_tcp` | +| Open-mDNS | `scan_mdns` | +| Open-Memcached | `scan_memcached` | +| Open-MongoDB | `scan_mongodb` | +| Open-MQTT | `scan_mqtt` | +| Open-MSSQL | `scan_mssql` | +| Open-NATPMP | `scan_nat_pmp` | +| Open-NetBIOS-Nameservice | `scan_netbios` | +| Open-Netis | `netis_router` | +| Open-Portmapper | `scan_portmapper` | +| Open-QOTD | `scan_qotd` | +| Open-Redis | `scan_redis` | +| Open-SNMP | `scan_snmp` | +| Open-SSDP | `scan_ssdp` | +| Open-TFTP | `scan_tftp` | +| Open-XDMCP | `scan_xdmcp` | +| Outdated-DNSSEC-Key| `outdated_dnssec_key` | +| Outdated-DNSSEC-Key-IPv6 | `outdated_dnssec_key_v6` | +| Sandbox-URL | `cwsandbox_url` | +| Sinkhole-DNS | `sinkhole_dns` | +| Sinkhole-Events | `event4_sinkhole` | +| Sinkhole-Events IPv4 | `event4_sinkhole` | +| Sinkhole-Events IPv6 | `event6_sinkhole` | +| Sinkhole-HTTP-Events | `event4_sinkhole_http`/`event6_sinkhole_http` | +| Sinkhole-HTTP-Events IPv4 | `event4_sinkhole_http` | +| Sinkhole-HTTP-Events IPv6 | `event6_sinkhole_http` | +| Sinkhole-Events-HTTP-Referer| `event4_sinkhole_http_referer`/`event6_sinkhole_http_referer` | +| Sinkhole-Events-HTTP-Referer IPv4 | `event4_sinkhole_http_referer` | +| Sinkhole-Events-HTTP-Referer IPv6 | `event6_sinkhole_http_referer` | +| Spam-URL | `spam_url` | +| SSL-FREAK-Vulnerable-Servers | `scan_ssl_freak` | +| SSL-POODLE-Vulnerable-Servers | `scan_ssl_poodle`/`scan6_ssl_poodle` | +| Vulnerable-Exchange-Server* | `scan_exchange` | +| Vulnerable-ISAKMP | `scan_isakmp` | +| Vulnerable-HTTP | `scan_http` | +| Vulnerable-SMTP | `scan_smtp_vulnerable` | + +\* This report can also contain data on active webshells (column `tag` is `exchange;webshell`), and are therefore not +only vulnerable but also actively infected. + +In addition, the following legacy reports are supported: + +| Legacy Report Type | Successor Report Type | File Name | +|--------------------|-----------------------|-----------| +| Amplification-DDoS-Victim | Honeypot-Amplification-DDoS-Events | `ddos_amplification` | +| CAIDA-IP-Spoofer | IP-Spoofer-Events | `caida_ip_spoofer` | +| Darknet | Honeypot-Darknet | `darknet` | +| Drone | Sinkhole-Events | `botnet_drone` | +| Drone-Brute-Force | Honeypot-Brute-Force-Events, Sinkhole-HTTP-Events | `drone_brute_force` | +| Microsoft-Sinkhole | Sinkhole-HTTP-Events | `microsoft_sinkhole` | +| Sinkhole-HTTP-Drone | Sinkhole-HTTP-Events | `sinkhole_http_drone` | +| IPv6-Sinkhole-HTTP-Drone | Sinkhole-HTTP-Events | `sinkhole6_http` | + +More information on these legacy reports can be found +in [Changes in Sinkhole and Honeypot Report Types and Formats](https://www.shadowserver.org/news/changes-in-sinkhole-and-honeypot-report-types-and-formats/) +. + +--- + +### Shodan
+ +Parses data from Shodan (search, stream etc). + +The parser is by far not complete as there are a lot of fields in a big nested structure. There is a minimal mode +available which only parses the important/most useful fields and also saves everything in `extra.shodan` keeping the original structure. When not using the minimal mode if may be useful to ignore errors as many +parsing errors can happen with the incomplete mapping. + +**Module:** `intelmq.bots.parsers.shodan.parser` + +**Parameters:** + +**`ignore_errors`** + +(optional, boolean) Defaults to true. + +**`minimal_mode`** + +(optional, boolean) Defaults to false. + +--- + +### Spamhaus DROP
+ +Parses data from Spamhaus DROP feed. + +**Module:** `intelmq.bots.parsers.spamhaus.parser_drop` + +No additional parameters. + +--- + +### Spamhaus CERT
+ +Parses data from Spamhaus CERT feed. + +**Module:** `intelmq.bots.parsers.spamhaus.parser_cert` + +No additional parameters. + +--- + +### Surbl
+ +Parses data from surbl feed. + +**Module:** `intelmq.bots.parsers.surbl.parser` + +No additional parameters. + +--- + +### Threatminer
+ +Parses data from Threatminer feed. + +**Module:** `intelmq.bots.parsers.threatminer.parser` + +No additional parameters. + +--- + +### Turris
+ +Parses data from Turris Greylist feed. + +**Module:** `intelmq.bots.parsers.turris.parser` + +No additional parameters. + +--- + +### Twitter
+ +Extracts URLs from text, fuzzy, aimed at parsing tweets. + +**Module:** `intelmq.bots.parsers.twitter.parser` + +**Parameters:** + +**`domain_whitelist`** + +(optional, array of strings) domains to be filtered out + +**`substitutions`** + +(optional, string) Semicolon delimited list of even length of pairs of substitutions (for example: `.;.;,;.` +substitutes `.` for `.` and `,` for `.`). + +**`classification_type`** + +(optional, string) Statically set `classification.type`. + +**`default_scheme`** + +(optional, string) Default scheme for URLs if not given. See also the next section. + +**Default scheme** + +The dependency `url-normalize` changed it's behavior in version 1.4.0 from using `http://` as default scheme to +`https://`. Version 1.4.1 added the possibility to specify it. Thus you can only use the `default_scheme` +parameter with a current version of this library >= 1.4.1, with 1.4.0 you will always get `https://` +as default scheme and for older versions < 1.4.0 `http://` is used. + +This does not affect URLs which already include the scheme. + +--- + +### VxVault
+ +Parses data from VxVault feed. + +**Module:** `intelmq.bots.parsers.vxvault.parser` + +No additional parameters. + +--- + +### ZoneH
+ +Parses data from ZoneH. + +This bot is designed to consume defacement reports from zone-h.org. It expects fields normally present in CSV files +distributed by email. + +**Module:** `intelmq.bots.parsers.zoneh.parser` + +No additional parameters. + +## Expert Bots + +Expert bots are used for enriching, filtering and/or other data manipulation. + +### Abusix
+ +This bot adds `source.abuse_contact` and `destination.abuse_contact` e-mail addresses. They are obtained via DNS TXT +queries to Abusix servers. + +**Requirements** + +This bot can optionally use the python module *querycontacts* by Abusix itself: + +```bash +pip3 install querycontacts +``` + +If the package is not installed, our own routines are used. + +**Module:** `intelmq.bots.experts.abusix.expert` + +**Parameters (also expects [cache parameters](#cache-parameters)):** + +No additional parameters. + +--- + +### Aggregate
+ +Aggregates events based upon given fields & timespan. + +Define specific fields to filter incoming events and aggregate them. Also set the timespan you want the events to get +aggregated. + +The "cleanup" procedure, sends out the aggregated events or drops them based upon the given threshold value. It is +called on every incoming message and on the bot's initialization. If you're potentially running on low traffic ( no +incoming events within the given timestamp ) it is recommended to reload or restart the bot via cronjob each 30 +minutes (adapt to your configured timespan). Otherwise you might loose information. + +I. e.: + +```bash +crontab -e + +0,30 * * * * intelmqctl reload my-aggregate-bot +``` + +For reloading/restarting please check the `intelmqctl` documentation. + +**Module:** `intelmq.bots.experts.aggregate.expert` + +**Parameters (also expects [cache parameters](#cache-parameters)):** + +!!! warning + `redis_cache_ttl` is not used at it would result in data loss. + +**`fields`** + +(required, string) Given fields which are used to aggregate like `classification.type`, `classification.identifier`. + +**`threshold`** + +(required, integer) If the aggregated event is lower than the given threshold after the timespan, the event will get +dropped. + +**`timespan`** + +(required, string) Timespan to aggregate events during the given time. I. e. `1 hour` + +--- + +### ASN Lookup
+ +This bot uses an offline database to add `source.asn` and `destination.asn` based on the respective IP address. + +**Requirements** + +Install `pyasn` module. + +```bash +pip3 install pyasn +``` + +**Module:** `intelmq.bots.experts.asn_lookup.expert` + +**Parameters:** + +**`database`** + +(required, string) Path to the downloaded database. + +**Database** + +Use this command to create/update the database and reload the bot: + +```bash +intelmq.bots.experts.asn_lookup.expert --update-database +``` + +The database is fetched from [routeviews.org](http://www.routeviews.org/routeviews/) and licensed under the Creative +Commons Attribution 4.0 International license (see +the [routeviews FAQ](http://www.routeviews.org/routeviews/index.php/faq/#faq-6666)). + +--- + +### CSV Converter
+ +Converts an event to CSV format, saved in the `output` field. + +To use the CSV-converted data in an output bot - for example in a file output, use the configuration +parameter `single_key` of the output bot and set it to `output`. + +**Module:** `intelmq.bots.experts.csv_converter.expert` + +**Parameters:** + +**`delimiter`** + +(optional, string) Defaults to `,`. + +**`fieldnames`** + +(required, string) Comma-separated list of field names, e.g. `"time.source,classification.type,source.ip"`. + +--- + +### Team Cymru Whois
+ +This bot adds geolocation, ASN and BGP prefix based on IP address. + +Public documentation: + +**Module:** `intelmq.bots.experts.cymru_whois.expert` + +**Parameters (also expects [cache parameters](#cache-parameters)):** + +**`overwrite`** + +(optional, boolean) Whether to overwrite existing fields. Defaults to true. + +--- + +### Remove Affix
+ +Remove part of string from string fields, example: `www.` from `source.fqdn`. + +**Module:** `intelmq.bots.experts.remove_affix.expert` + +**Parameters:** + +**`remove_prefix`** + +(optional, boolean) True - cut from start, False - cut from end. Defaults to true. + +**`affix`** + +(required, string) example 'www.' + +**`field`** + +(required, string) Which field to modify. 'source.fqdn' + +--- + +### Domain Suffix
+ +This bots uses an offline database to add the public suffix to the event, derived by a domain. See or information on the +public suffix list: . Only rules for ICANN domains are processed. The list can (and +should) contain Unicode data, punycode conversion is done during reading. + +Note that the public suffix is not the same as the top level domain (TLD). E.g. `co.uk` is a public suffix, but the TLD +is `uk`. Privately registered suffixes (such as `blogspot.co.at`) which are part of the public suffix list too, are +ignored. + +**Rule processing** + +A short summary how the rules are processed: + +The simple ones: + +``` +com +at +gv.at +``` + +`example.com` leads to `com`, +`example.gv.at` leads to `gv.at`. + +Wildcards: + +``` +*.example.com +``` + +`www.example.com` leads to `www.example.com`. + +And additionally the exceptions, together with the above wildcard rule: + +``` +!www.example.com +``` + +`www.example.com` does now not lead to `www.example.com`, but to `example.com`. + +**Module:** `intelmq.bots.experts.domain_suffix.expert` + +**Parameters:** + +**`field`** + +(required, string) Allowed values: `fqdn` or `reverse_dns`. + +**`suffix_file`** + +(required, string) path to the suffix file + +**Database** + +Use this command to create/update the database and reload the bot: + +```bash +intelmq.bots.experts.domain_suffix.expert --update-database +``` + +--- + +### Domain Valid
+ +Checks if a domain is valid by performing multiple validity checks (see below). + +If the field given in `domain_field` does not exist in the event, the event is dropped. If the domain contains +underscores (`_`), the event is dropped. If the domain is not valid according to +the [validators library](https://pypi.org/project/validators/), the event is dropped. If the domain's last part (the +TLD) is not in the TLD-list configured by parameter `tlds_domains_list`, the field is dropped. Latest TLD +list: + +**Module:** `intelmq.bots.experts.domain_valid.expert` + +**Parameters:** + +**`domain_field`** + +(required, string) The name of the field to be validated. + +**`tlds_domains_list`** + +(required, string) Path to a local file with all valid TLDs. Defaults to `/opt/intelmq/var/lib/bots/domain_valid/tlds-alpha-by-domain.txt` + + +--- + +### Deduplicator
+ +Bot responsible for dropping duplicate events. Deduplication can be performed based on an arbitrary set of fields. + +**Module:** `intelmq.bots.experts.deduplicator.expert` + +**Parameters (also expects [cache parameters](#cache-parameters)):** + +**`bypass`** + +(optional, boolean) Whether to bypass the deduplicator or not. When set to true, messages will not be deduplicated. +Defaults to false. + +**`filter_type`** + +(optional, string) Allowed values: `blacklist` or `whitelist`. The filter type will be used to define how Deduplicator +bot will interpret the parameter `filter_keys` in order to decide whether an event has already been seen or not, i.e., +duplicated event or a completely new event. + +- `whitelist` configuration: only the keys listed in `filter_keys` will be considered to verify if an event is + duplicated or not. +- `blacklist` configuration: all keys except those in `filter_keys` will be considered to verify if an event is + duplicated or not. + +**`filter_keys`** + +(optional, string) string with multiple keys separated by comma. Please note that `time.observation` key will not be +considered even if defined, because the system always ignore that key. + +When using a whitelist field pattern and a small number of fields (keys), it becomes more important, that these fields +exist in the events themselves. If a field does not exist, but is part of the hashing/deduplication, this field will be +ignored. If such events should not get deduplicated, you need to filter them out before the deduplication process, e.g. +using a sieve expert. See +also [this discussion thread](https://lists.cert.at/pipermail/intelmq-users/2021-July/000370.html) on the mailing-list. + +**Configuration Example** + +*Example 1* + +The bot with this configuration will detect duplication only based on `source.ip` and `destination.ip` keys. + +```yaml +parameters: + redis_cache_db: 6 + redis_cache_host: "127.0.0.1" + redis_cache_password: null + redis_cache_port: 6379 + redis_cache_ttl: 86400 + filter_type: "whitelist" + filter_keys: "source.ip,destination.ip" +``` + +*Example 2* + +The bot with this configuration will detect duplication based on all keys, except `source.ip` and `destination.ip` keys. + +```yaml +parameters: + redis_cache_db: 6 + redis_cache_host: "127.0.0.1" + redis_cache_password: null + redis_cache_port: 6379 + redis_cache_ttl: 86400 + filter_type: "blacklist" + filter_keys: "source.ip,destination.ip" +``` + +**Flushing the cache** + +To flush the deduplicator's cache, you can use the `redis-cli` tool. Enter the database used by the bot and submit +the `flushdb` command: + +```bash +redis-cli -n 6 +flushdb +``` + +--- + +### DO Portal
+ +The DO portal retrieves the contact information from a DO portal instance: + + +**Module:** `intelmq.bots.experts.do_portal.expert` + +**Parameters:** + +**`mode`** + +(required, string) Allowed values: `replace` or `append`. How to handle new abuse contacts in case there are existing +ones. + +**`portal_url`** + +(required, string) The URL to the portal, without the API-path. The used URL +is `$portal_url + '/api/1.0/ripe/contact?cidr=%s'`. + +**`portal_api_key`** + +(required, string) The API key of the user to be used. Must have sufficient privileges. + +--- + +### Field Reducer
+ +The field reducer bot is capable of removing fields from events. + +**Module:** `intelmq.bots.experts.field_reducer.expert` + +**Parameters:** + +**`type`** + +(required, string) Allowed values: `whitelist` or `blacklist`. When `whitelist` is set, tnly the fields in `keys` will +passed along. When `blacklist` is set then the fields in `keys` will be removed from events. + +**`keys`** + +(required, array of strings) Can be an array of field names or a string with a comma-separated list of field names. + +--- + +### Filter
+ +The filter bot is capable of filtering specific events. + +A simple filter for messages (drop or pass) based on a exact string comparison or regular expression. + +**Module:** `intelmq.bots.experts.filter.expert` + +**Parameters:** + +*Parameters for filtering with key/value attributes* + +**`filter_key`** + +() - key from data format + +**`filter_value`** + +() - value for the key + +**`filter_action`** + +() - action when a message match to the criteria +(possible actions: keep/drop) + +**`filter_regex`** + +() - attribute determines if the `filter_value` shall be treated as regular expression or not. + +If this attribute is not empty (can be `true`, `yes` or whatever), the bot uses python's `` `re.search `` +<>`_ function to evaluate the filter with regular expressions. If +this attribute is empty or evaluates to false, an exact string comparison is performed. A check on string * +inequality* can be achieved with the usage of *Paths* described below. + +*Parameters for time based filtering* + +**`not_before`** + +(optional, string) Events before this time will be dropped. Example: `1 week`. + +**`not_after`** + +(optional, string) - Events after this time will be dropped. + +Both parameters accept string values describing absolute or relative time: + +- absolute +- basically anything parseable by datetime parser, eg. + +``` +2015-09-12T06:22:11+00:00 +``` + +**`time.source`** + +(optional, string) Taken from the event will be compared to this value to decide the filter behavior. + +- relative +- accepted string formatted like this " ", where epoch could be any of following strings (could + optionally end with trailing 's'): hour, day, week, month, year +- time.source taken from the event will be compared to the value (now - relative) to decide the filter behavior + +*Examples of time filter definition* + +- `not_before: "2015-09-12T06:22:11+00:00"` - events older than the specified time will be dropped +- `not_after: "6 months"` - just events older than 6 months will be passed through the pipeline + +**Possible paths** + +- `_default`: default path, according to the configuration +- `action_other`: Negation of the default path +- `filter_match`: For all events the filter matched on +- `filter_no_match`: For all events the filter does not match + +| action | match | \_default | action_other | filter_match | filter_no_match | +| ------ | ----- | --------- | ------------ | ------------ | --------------- | +| keep | ✓ | ✓ | ✗ | ✓ | ✗ | +| keep | ✗ | ✗ | ✓ | ✗ | ✓ | +| drop | ✓ | ✗ | ✓ | ✓ | ✗ | +| drop | ✗ | ✓ | ✗ | ✗ | ✓ | + +In `DEBUG` logging level, one can see that the message is sent to both matching paths, also if one of the paths is not +configured. Of course the message is only delivered to the configured paths. + +--- + +### Format Field
+ +String method operations on column values. + +**Module:** `intelmq.bots.experts.format_field.expert` + +**Parameters:** + +*Parameters for stripping chars* + +**`strip_columns`** +(optional, string/array of strings) A list of strings or a string of comma-separated values with field names. The names +must match the IntelMQ Data Format field names. + +For example: + +```yaml +columns: + - malware.name + - extra.tags +``` + +is equivalent to: + +```yaml +columns: "malware.name,extra.tags" +``` + +**`strip_chars`** + +(optional, string) Set of characters to remove as leading/trailing characters. Defaults to space. + +*Parameters for replacing chars* + +**`replace_column`** + +() key from data format + +**`old_value`** + +() the string to search for + +**`new_value`** + +() the string to replace the old value with + +**`replace_count`** +() number specifying how many occurrences of the old value you want to replace(default: [1]) + +*Parameters for splitting string to list of string* + +**`split_column`** + +() key from data format + +**`split_separator`** + +() specifies the separator to use when splitting the string(default: `,`) + +Order of operation: `strip -> replace -> split`. These three methods can be combined such as first strip and then split. + +--- + +### Generic DB Lookup
+ +This bot is capable for enriching intelmq events by lookups to a database. Currently only PostgreSQL and SQLite are +supported. + +If more than one result is returned, a ValueError is raised. + +**Module:** `intelmq.bots.experts.generic_db_lookup.expert` + +**Parameters:** + +*Connection* + +**`engine`** + +(required, string) Allowed values: `postgresql` or `sqlite`. + +**`database`** + +(optional, string) Database name or the SQLite filename. Defaults to `intelmq`. + +**`table`** + +(optional, string) Name of the table. Defaults to `contacts`. + +*PostgreSQL specific parameters* + +**`host`** + +(optional, string) Hostname of the PostgreSQL server. Defaults to `localhost`. + +**`port`** + +(optional, integer) Port of the PostgreSQL server. Defaults to 5432. + +**`user`** + +(optional, string) Username for accessing PostgreSQL. Defaults to `intelmq`. + +**`password`** + +(optional, string) Password for accessing PostgreSQL. Defaults to ?. + +**`sslmode`** + +(optional, string) Type of TLS mode to use. Defaults to `require`. + +*Lookup* + +**`match_fields`** + +(optional, object) The value is a key-value mapping an arbitrary number IntelMQ field names to table column names. The +values are compared with `=` only. Defaults to `source.asn: "asn"`. + +*Replace fields* + +**`overwrite`** + +(optional, boolean) Whether to overwrite existing fields. Defaults to false. + +**`replace_fields`** + +(optional, object) Key-value mapping an arbitrary number of table column names to IntelMQ field names. Defaults +to `{"contact": "source.abuse_contact"}`. + +--- + +### Gethostbyname
+ +This bot resolves to IP address (`source.ip` and `destination.ip`). Can possibly use also the `source.url` +and `destination.url` for extracting FQDN. + +This bot resolves the DNS name (`source.fqdn` and `destination.fqdn`) using the `gethostbyname` syscall to an IP +address (`source.ip` and `destination.ip`). The following gaierror resolution errors are ignored and treated as if the +hostname cannot be resolved: + +* `-2`/`EAI_NONAME`: NAME or SERVICE is unknown +* `-4`/`EAI_FAIL`: Non-recoverable failure in name res. +* `-5`/`EAI_NODATA`: No address associated with NAME. +* `-8`/`EAI_SERVICE`: SERVICE not supported for `ai_socktype'. +* `-11`/`EAI_SYSTEM`: System error returned in `errno'. + +Other errors result in an exception if not ignored by the parameter `gaierrors_to_ignore`. All gaierrors can be found here: + +**Module:** `intelmq.bots.experts.gethostbyname.expert` + +**Parameters:** + +**`fallback_to_url`** + +(optional, boolean) When true and no `source.fqdn` present, use `source.url` instead for producing `source.ip`. + +**`gaierrors_to_ignore`** + +(optional, array of integers) Gaierror codes to ignore, e.g. `-3` for EAI_AGAIN (Temporary failure in name resolution). +Only accepts the integer values, not the names. + +**`overwrite`** + +(optional, boolean) Whether to overwrite existing `source.ip` and/or `source.destination` fields. Defaults to false. + +--- + +### HTTP Status
+ +The bot fetches the HTTP status for a given URL and saves it in the event. + +**Module:** `intelmq.bots.experts.http.expert_status` + +**Parameters:** + +**`field`** + +(required, string) The name of the field containing the URL to be checked. + +**`success_status_codes`** + +(optional, array of integers) An array of success status codes. If this parameter is omitted or the list is empty, +successful status codes are the ones between 200 and 400. + +**`overwrite`** + +(optional, boolean) Whether to overwrite existing `status` field. Defaults to false. + +--- + +### HTTP Content
+ +Fetches an HTTP resource and checks if it contains a specific string. + +The bot fetches an HTTP resource and checks if it contains a specific string. + +**Module:** `intelmq.bots.experts.http.expert_content` + +**Parameters:** + +**`field`** + +(optional, string) The name of the field containing the URL to be checked. Defaults to `source.url`. + +**`needle`** + +(optional, string) The string that the content available on URL is checked for. + +**`overwrite`** + +(optional, boolean) Whether to overwrite existing `status` field. Defaults to false. + +--- + +### IDEA Converter
+ +Converts the event to IDEA format and saves it as JSON in the field `output`. All other fields are not modified. + +Documentation about IDEA: + +**Module:** `intelmq.bots.experts.idea.expert` + +**Parameters:** + +**`test_mode`** + +(optional, boolean) Adds `Test` category to mark all outgoing IDEA events as informal (meant to simplify setting up and +debugging new IDEA producers). Defaults to true. + +--- + +### Jinja2 Template
+ +This bot lets you modify the content of your IntelMQ message fields using Jinja2 templates. + +Documentation about Jinja2 templating language: + +**Module:** `intelmq.bots.experts.jinja.expert` + +**Parameters:** + +**`fields`** + +(required, object) a dict containing as key the name of the field where the result of the Jinja2 template should be +written to and as value either a Jinja2 template or a filepath to a Jinja2 template file (starting with `file:///`). +Because the experts decides if it is a filepath based on the value starting with `file:///` it is not possible to simply +write values starting with `file:///` to fields. The object containing the existing message will be passed to the Jinja2 +template with the name `msg`. + +```yaml +fields: + output: The provider is {{ msg['feed.provider'] }}! + feed.url: "{{ msg['feed.url'] | upper }}" + extra.somejinjaoutput: file:///etc/intelmq/somejinjatemplate.j2 +``` + +--- + +### Lookyloo
+ +Lookyloo is a website screenshotting and analysis tool. For more information and installation instructions visit + + +The bot sends a request for `source.url` to the configured Lookyloo instance and saves the retrieved website screenshot +link in the field `screenshot_url`. Lookyloo only *queues* the website for screenshotting, therefore the screenshot may +not be directly ready after the bot requested it. The `pylookyloo` library is required for this bot. +The `http_user_agent` parameter is passed on, but not other HTTP-related parameter like proxies. + +Events without `source.url` are ignored. + +**Module:** `intelmq.bots.experts.lookyloo.expert` + +**Parameters:** + +**`instance_url`** + +(required, string) LookyLoo instance to connect to. + +--- + +### MaxMind GeoIP
+ +This bot uses an offline database for adding geolocation information based on the IP address (`source.ip` and `destination.ip`). + +**Requirements** + +The bot requires the MaxMind's `geoip2` Python library, version 2.2.0 has been tested. + +To download the database a free license key is required. More information can be found +at . + +**Module:** `intelmq.bots.experts.maxmind_geoip.expert` + +**Parameters:** + +**`database`** + +(required, string) Path to the local database file. + +**`overwrite`** + +(optional, boolean) Whether to overwrite existing fields. Defaults to true. + +**`use_registered`** + +(optional, boolean) MaxMind has two country ISO codes: One for the physical location of the address and one for the +registered location. See also for a short explanation. Defaults +to `false` (backwards-compatibility). + +**`license_key`** + +(required, string) MaxMind license key is necessary for downloading the GeoLite2 database. + +**Database** + +Use this command to create/update the database and reload the bot: + +```bash +intelmq.bots.experts.maxmind_geoip.expert --update-database +``` + +--- + +### MISP
+ +Queries a MISP instance for the `source.ip` and adds the MISP Attribute UUID and MISP Event ID of the newest attribute +found. + +**Module:** `intelmq.bots.experts.misp.expert` + +**Parameters:** + +**`misp_key`** + +(required, string) MISP Authkey. + +**`misp_url`** + +(required, string) URL of MISP server (with trailing '/') + +**`http_verify_cert`** + +(optional, boolean) Verify the TLS certificate of the server. Default to `true`. + +--- + +### McAfee Active Response Lookup
+ +Queries DXL bus for hashes, IP addresses or FQDNs. + +**Module:** `intelmq.bots.experts.mcafee.expert_mar` + +**Parameters:** + +**`dxl_config_file`** + +(required, string) Location of the file containing required information to connect to DXL bus. + +**`lookup_type`** + +(required, string) Allowed values: + +- `Hash` - Looks up `malware.hash.md5`, `malware.hash.sha1` and `malware.hash.sha256`. +- `DestSocket` - Looks up `destination.ip` and `destination.port`. +- `DestIP` - Looks up `destination.ip`. +- `DestFQDN` - Looks up in `destination.fqdn`. + +--- + +### Modify
+ +This bots allows you to change arbitrary field values of events using a configuration file. + +**Module:** `intelmq.bots.experts.modify.expert` + +**Parameters:** + +**`configuration_path`** + +(required, string) Location of the configuration file. + +**`case_sensitive`** + +(optional, boolean) Defaults to true. + +**`maximum_matches`** + +(optional, boolean) Maximum number of matches. Processing stops after the limit is reached. Defaults to null (no limit). + +**`overwrite`** + +(optional, boolean) Overwrite any existing fields by matching rules. Defaults to false. + +**Configuration File** + +The modify expert bot allows you to change arbitrary field values of events just using a configuration file. Thus it is +possible to adapt certain values or adding new ones only by changing JSON-files without touching the code of many other +bots. + +The configuration is called `modify.conf` and looks like this: + +```json +[ + { + "rulename": "Standard Protocols http", + "if": { + "source.port": "^(80|443)$" + }, + "then": { + "protocol.application": "http" + } + }, + { + "rulename": "Spamhaus Cert conficker", + "if": { + "malware.name": "^conficker(ab)?$" + }, + "then": { + "classification.identifier": "conficker" + } + }, + { + "rulename": "bitdefender", + "if": { + "malware.name": "bitdefender-(.*)$" + }, + "then": { + "malware.name": "{matches[malware.name][1]}" + } + }, + { + "rulename": "urlzone", + "if": { + "malware.name": "^urlzone2?$" + }, + "then": { + "classification.identifier": "urlzone" + } + }, + { + "rulename": "default", + "if": { + "feed.name": "^Spamhaus Cert$" + }, + "then": { + "classification.identifier": "{msg[malware.name]}" + } + } +] +``` + +In our example above we have five groups labeled `Standard Protocols http`, `Spamhaus Cert conficker`, +`bitdefender`, `urlzone` and `default`. All sections will be considered, in the given order (from top to bottom). + +Each rule consists of *conditions* and *actions*. Conditions and actions are dictionaries holding the field names of +events and regular expressions to match values (selection) or set values (action). All matching rules will be applied in +the given order. The actions are only performed if all selections apply. + +If the value for a condition is an empty string, the bot checks if the field does not exist. This is useful to apply +default values for empty fields. + +**Actions** + +You can set the value of the field to a string literal or number. + +In addition you can use the [standard Python string format syntax](https://docs.python.org/3/library/string.html#format-string-syntax) to access the values from the processed event as `msg` and the match groups of the conditions as `matches`, see the bitdefender example above. Group 0 ([`0`]) contains the full matching string. See also the documentation on [re.Match.group](https://docs.python.org/3/library/re.html?highlight=re%20search#re.Match.group). + +Note that `matches` will also contain the match groups from the default conditions if there were any. + +**Examples** + +We have an event with `feed.name = Spamhaus Cert` and `malware.name = confickerab`. The expert loops over all sections +in the file and eventually enters section `Spamhaus Cert`. First, the default condition is checked, it matches! +OK, going on. Otherwise the expert would have selected a different section that has not yet been considered. Now, go +through the rules, until we hit the rule `conficker`. We combine the conditions of this rule with the default +conditions, and both rules match! So we can apply the action: `classification.identifier` is set to `conficker`, the +trivial name. + +Assume we have an event with `feed.name = Spamhaus Cert` and `malware.name = feodo`. The default condition matches, but +no others. So the default action is applied. The value for `classification.identifier` will be set to `feodo` +by `{msg[malware.name]}`. + +**Types** + +If the rule is a string, a regular expression search is performed, also for numeric values (`str()` is called on them). +If the rule is numeric for numeric values, a simple comparison is done. If other types are mixed, a warning will be +thrown. + +For boolean values, the comparison value needs to be `true` or `false` as in JSON they are written all-lowercase. + +--- + +### National CERT Contact Lookup by CERT.AT
+ + offers an IP address to national CERT contact (and cc) mapping. + +**Module:** `intelmq.bots.experts.national_cert_contact_certat.expert` + +**Parameters:** + +**`filter`** + +(optional, boolean) Whether to act as a filter for AT. + +**`overwrite_cc`** + +(optional, boolean) Set to true if you want to overwrite any potentially existing cc fields in the event. Defaults to +false. + +--- + +### RDAP
+ +This bot queries RDAP servers for additional information about a domain. + +**Module:** `intelmq.bots.experts.rdap.expert` + +**Parameters:** + +**`rdap_order`** + +(optional, array of strings) Search order of contacts with these roles. Defaults to `["abuse", "technical"]`. + +**`rdap_bootstrapped_servers`** + +(optional, object) Customized RDAP servers. Do not forget the trailing slash. For example: + +```json +{ + "at": { + "url": "rdap.server.at/v1/", + "auth": { + "type": "jwt", + "token": "ey..." + } + }, + "de": "rdap.service:1337/v1/" +} +``` + +--- + +### RecordedFuture IP Risk
+ +This bot tags events with the score found in RecordedFuture large IP risklist. + +Record risk score associated to source and destination IP if they are present. Assigns 0 to IP addresses not in the RF +list. + +For both `source.ip` and `destination.ip` the corresponding risk score is fetched from a local database created from +RecordedFuture's API. The score is recorded in `extra.rf_iprisk.source` and `extra.rf_iprisk.destination`. If a lookup +for an IP fails a score of 0 is recorded. + +See and speak with your RecordedFuture representative for more +information. + +The list is obtained from recorded future API and needs a valid API TOKEN The large list contains all IP's with a risk +score of 25 or more. If IP's are not present in the database a risk score of 0 is given. + +**Module:** `intelmq.bots.experts.recordedfuture_iprisk.expert` + +**Parameters:** + +**`database`** + +(required, string) Path to the local database file. + +**`api_token`** + +(required, string) This needs to contain valid API token to download the latest database data. + +**`overwrite`** + +(optional, boolean) Whether to overwrite existing fields. Defaults to false. + +**Database** + +Use this command to create/update the database and reload the bot: + +```bash +intelmq.bots.experts.recordedfuture_iprisk.expert --update-database +``` + +--- + +### Reverse DNS
+ +For both `source.ip` and `destination.ip` the PTR record is fetched and the first valid result is used +for `source.reverse_dns` or `destination.reverse_dns`. + +**Module:** `intelmq.bots.experts.reverse_dns.expert` + +**Parameters (also expects [cache parameters](#cache-parameters)):** + +**`cache_ttl_invalid_response`** + +(required, integer) The TTL for cached invalid responses. + +**`overwrite`** + +(optional, boolean) Whether to overwrite existing fields. Defaults to false. + +--- + +### RFC1918
+ +Several RFCs define ASNs, IP Addresses and Hostnames (and TLDs) reserved for *documentation*. Events or fields of events +can be dropped if they match the criteria of either being reserved for documentation (e.g. AS 64496, +Domain `example.com`) or belonging to a local area network (e.g. `192.168.0.0/24`). These checks can applied to URLs, IP +Addresses, FQDNs and ASNs. + +It is configurable if the whole event should be dropped ("policies") or just the field removed, as well as which fields +should be checked. + +Sources: + +- `1918` +- `2606` +- `3849` +- `4291` +- `5737` +- +- + +**Module:** `intelmq.bots.experts.rfc1918.expert` + +**Parameters:** + +**`fields`** + +(required, string) Comma-separated list of fields. Allowed values: + +- `destination.asn` & `source.asn` +- `destination.fqdn` & `source.fqdn` +- `destination.ip` & `source.ip` +- `destination.url` & `source.url` + +**`policy`** + +(required, string) Comma-separated list of policies. Allowed values: + +- `drop` - the entire events is dropped +- `del` - the affected field is removed + +With the example parameter values given above, this means that: + +- If a `destination.ip` value is part of a reserved network block, the field will be removed (policy `del`). +- If a `source.asn` value is in the range of reserved AS numbers, the event will be removed altogether (policy `drop`). +- If a `source.url` value contains a host with either an IP address part of a reserved network block, or a reserved + domain name (or with a reserved TLD), the event will be dropped (policy `drop`). + +--- + +### RIPE
+ +Online RIPE Abuse Contact and Geolocation Finder for IP addresses and Autonomous Systems. + +**Module:** `intelmq.bots.experts.ripe.expert` + +**Parameters (also expects [cache parameters](#cache-parameters)):** + +**`mode`** + +(optional, string) Allowed values: `append` or `replace`. Defaults to `append`. + +**`query_ripe_db_asn`** + +(optional, boolean) Query for IPs at `http://rest.db.ripe.net/abuse-contact/%s.json`. Defaults to true. + +**`query_ripe_db_ip`** + +(optional, boolean) Query for ASNs at `http://rest.db.ripe.net/abuse-contact/as%s.json`. Defaults to true. + +**`query_ripe_stat_asn`** + +(optional, boolean) Query for ASNs at `https://stat.ripe.net/data/abuse-contact-finder/data.json?resource=%s`. Defaults +to true. + +**`query_ripe_stat_ip`** + +(optional, boolean) Query for IPs at `https://stat.ripe.net/data/abuse-contact-finder/data.json?resource=%s`. Defaults +to true. + +**`query_ripe_stat_geolocation`** + +(optional, boolean) Query for IPs at `https://stat.ripe.net/data/maxmind-geo-lite/data.json?resource=%s`. Defaults to +true. + +--- + +### Sieve
+ +This bot is used to filter and/or modify events based on a set of rules. The rules are specified in an external +configuration file and with a syntax *similar* to the [Sieve language](http://sieve.info) used for mail filtering. + +Each rule defines a set of matching conditions on received events. Events can be matched based on keys and values in the +event. Conditions can be combined using parenthesis and the boolean operators `&&` and `||`. If the processed event +matches a rule's conditions, the corresponding actions are performed. Actions can specify whether the event should be +kept or dropped in the pipeline (filtering actions) or if keys and values should be changed (modification actions). + +**Requirements** + +To use this bot, you need to install the required dependencies: + +```bash +pip3 install -r intelmq/bots/experts/sieve/REQUIREMENTS.txt +``` + +**Module:** `intelmq.bots.experts.sieve.expert` + +**Parameters:** + +**`file`** + +(required, string) Path to sieve file. Syntax can be validated with `intelmq_sieve_expert_validator`. + +**Examples** + +The following excerpts illustrate some of the basic features of the sieve file format: + +``` +if :exists source.fqdn { + keep // aborts processing of subsequent rules and forwards the event. +} + + +if :notexists source.abuse_contact || source.abuse_contact =~ '.*@example.com' { + drop // aborts processing of subsequent rules and drops the event. +} + +if source.ip << '192.0.0.0/24' { + add! comment = 'bogon' // sets the field comment to this value and overwrites existing values + path 'other-path' // the message is sent to the given path +} + +if classification.type :in ['phishing', 'malware-distribution'] && source.fqdn =~ '.*.(ch|li)$' { + add! comment = 'domainabuse' + keep +} elif classification.type == 'scanner' { + add! comment = 'ignore' + drop +} else { + remove comment +} +``` + +**Reference** + +*Sieve File Structure* + +The sieve file contains an arbitrary number of rules of the form: + +``` +if EXPRESSION { + ACTIONS +} elif EXPRESSION { + ACTIONS +} else { + ACTIONS +} +``` + +Nested if-statements and mixed if statements and rules in the same scope are possible. + +*Expressions* + +Each rule specifies on or more expressions to match an event based on its keys and values. Event keys are specified as +strings without quotes. String values must be enclosed in single quotes. Numeric values can be specified as integers or +floats and are unquoted. IP addresses and network ranges (IPv4 and IPv6) are specified with quotes. List values for use +with list/set operators are specified as string, float, int, bool and string literals separated by commas and enclosed +in square brackets. Expression statements can be combined and chained using parentheses and the boolean operators `&&` +and `||`. The following operators may be used to match events: + +- `:exists` and `:notexists` match if a given key exists, for example: + +``` +if :exists source.fqdn { ... } +``` + +- `==` and `!=` match for equality of strings, numbers, and booleans, for example: + +``` +if feed.name != 'acme-security' || feed.accuracy == 100 || extra.false_positive == false { ... } +``` + +- `:contains` matches on substrings. + +- `=~` matches strings based on the given regular expression. `!~` is the inverse regular expression match. + +- Numerical comparisons are evaluated with `<`, `<=`, `>`, `>=`. + +- `<<` matches if an IP address is contained in the specified network range: + +``` +if source.ip << '10.0.0.0/8' { ... } +``` + +- String values to match against can also be specified as lists of strings, which have separate operators. For example: + +``` +if source.ip :in ['8.8.8.8', '8.8.4.4'] { ... } +``` + +In this case, the event will match if it contains a key `source.ip` with either value `8.8.8.8` or `8.8.4.4`. + +There are also `:containsany` to match at least one of a list of substrings, and `:regexin` to match at least one of a +list of regular expressions, similar to the `:contains` and `=~` operators. + +- Lists of numeric values support `:in` to check for inclusion in a list of numbers: + +``` +if source.port :in [80, 443] { ... } +``` + +- `:equals` tests for equality between lists, including order. Example for checking a hostname-port pair: + +``` +if extra.host_tuple :equals ['dns.google', 53] { ... } +``` + +- `:setequals` tests for set-based equality (ignoring duplicates and value order) between a list of given values. + Example for checking for the first nameserver of two domains, regardless of the order they are given in the list: + +``` +if extra.hostnames :setequals ['ns1.example.com', 'ns1.example.mx'] { ... } +``` + +- `:overlaps` tests if there is at least one element in common between the list specified by a key and a list of values. + Example for checking if at least one of the ICS, database or vulnerable tags is given: + +``` +if extra.tags :overlaps ['ics', 'database', 'vulnerable'] { ... } +``` + +- `:subsetof` tests if the list of values from the given key only contains values from a set of values specified as the + argument. Example for checking for a host that has only ns1.example.com and/or ns2.* as its apparent hostname: + +``` +if extra.hostnames :subsetof ['ns1.example.com', 'ns2.example.com'] { ... } +``` + +- `:supersetof` tests if the list of values from the given key is a superset of the values specified as the argument. + Example for matching hosts with at least the IoT and vulnerable tags: + +``` +if extra.tags :supersetof ['iot', 'vulnerable'] { ... } +``` + +* `:before` tests if the date value occurred before given time ago. The time might be absolute (basically anything parseable by pendulum parser, eg. “2015-09-12T06:22:11+00:00”) or relative (accepted string formatted like this “ ”, where epoch could be any of following strings (could optionally end with trailing ‘s’): hour, day, week, month, year) + +``` +if time.observation :before '1 week' { ... } +``` + +* `:after` tests if the date value occurred after given time ago; see `:before` + +``` +if time.observation :after '2015-09-12' { ... } # happened after midnight the 12th Sep +``` + +- Boolean values can be matched with `==` or `!=` followed by `true` or `false`. Example: + +``` +if extra.has_known_vulns == true { ... } +``` + +- The combination of multiple expressions can be done using parenthesis and boolean operators: + +``` +if (source.ip == '127.0.0.1') && (comment == 'add field' || classification.taxonomy == 'vulnerable') { ... } +``` + +- Any single expression or a parenthesised group of expressions can be negated using `!`: + +``` +if ! source.ip :contains '127.0.0.' || ! ( source.ip == '172.16.0.5' && source.port == 25 ) { ... } +``` + +!!! note Since 3.0.0, list-based operators are used on list values, such as `foo :in [1, 2, 3]` instead +of `foo == [1, 2, 3]` and `foo :regexin ['.mx', '.zz']` rather than `foo =~ ['.mx', '.zz']`, and similarly +for `:containsany` vs `:contains`. Besides that, `:notcontains` has been removed, with +e.g `foo :notcontains ['.mx', '.zz']` now being represented using negation as `! foo :contains ['.mx', '.zz']`. + +*Actions* + +If part of a rule matches the given conditions, the actions enclosed in `{` and `}` are applied. By default, all events +that are matched or not matched by rules in the sieve file will be forwarded to the next bot in the pipeline, unless +the `drop` action is applied. + +- `add` adds a key value pair to the event. It can be a string, number, or boolean. This action only applies if the key + is not yet defined in the event. If the key is already defined, the action is ignored. Example: + +``` +add comment = 'hello, world' +``` + +Some basic mathematical expressions are possible, but currently support only relative time specifications objects are +supported. For example: + +``` +add time.observation += '1 hour' +add time.observation -= '10 hours' +``` + +- `add!` same as above, but will force overwrite the key in the event. + +- `update` modifies an existing value for a key. Only applies if the key is already defined. If the key is not defined + in the event, this action is ignored. This supports mathematical expressions like above. Example: + +``` +update feed.accuracy = 50 +``` + +Some basic mathematical expressions are possible, but currently support only relative time specifications objects are +supported. For example: + +``` +update time.observation += '1 hour' +update time.observation -= '10 hours' +``` + +- `remove` removes a key/value from the event. Action is ignored if the key is not defined in the event. Example: + +``` +remove extra.comments +``` + +- `keep` sends the message to the next bot in the pipeline (same as the default behaviour), and stops sieve rules + processing. + +- `path` sets the path (named queue) the message should be sent to (implicitly or with the command `keep`. The named + queue needs to configured in the pipeline, see the User Guide for more information. + +``` +path 'named-queue' +``` + +You can as well set multiple destination paths with the same syntax as for value lists: + +``` +path ['one', 'two'] +``` + +This will result in two identical message, one sent to the path `one` and the other sent to the path `two`. + +If the path is not configured, the error looks like: + +``` +File "/path/to/intelmq/intelmq/lib/pipeline.py", line 353, in send for destination_queue in self.destination_queues path]: KeyError: 'one' +``` + +- `drop` marks the event to be dropped. The event will not be forwarded to the next bot in the pipeline. The sieve file + processing is interrupted upon reaching this action. No other actions may be specified besides the `drop` action + within `{` and `}`. + +*Comments* + +Comments may be used in the sieve file: all characters after `//` and until the end of the line will be ignored. + +--- + +### Splunk Saved Search Lookup
+ +Runs a saved search in Splunk using fields in an event, adding fields from the search result into the event. + +Splunk documentation on saved +searches: + +The saved search should take parameters according to the `search_parameters` configuration and deliver results according +to `result_fields`. The examples above match a saved search of this format: + +``` +index="dhcp" ipv4address="$ip$" | ... | fields _time username ether +``` + +The time window used is the one saved with the search. + +Waits for Splunk to return an answer for each message, so slow searches will delay the entire botnet. If you anticipate +a load of more than one search every few seconds, consider running multiple load-balanced copies of this bot. + +**Module:** `intelmq.bots.experts.splunk_saved_search.expert` + +**Parameters (also expects [HTTP parameters](#http-parameters)):** + +**`auth_token`** + +(required, string) Splunk API authentication token. + +**`url`** + +(required, string) base URL of the Splunk REST API. + +**`retry_interval`** + +(optional, integer) Number of seconds to wait between polling for search results to be available. Defaults to 5. + +**`saved_search`** + +(required, string) Name of Splunk saved search to run. + +**`search_parameters`** + +(optional, object) Mapping of IntelMQ event fields containing the data to search for to parameters of the Splunk saved +search. Defaults to `{}`. Example: + +```yaml +search_parameters: + source.ip: ip +``` + +**`result_fields`** + +(optional, object) Mapping of Splunk saved search result fields to IntelMQ event fields to store the results in. +Defaults to `{}`. Example: + +```yaml +result_fields: + username: source.account +``` + +**`not_found`** + +(optional, array of strings) How to handle empty search results. Allowed values: + +- `warn` - log a warning message +- `send` - send the event on unmodified +- `drop` - drop the message +- `send` - and `drop` are mutually exclusive + +All specified actions are performed. Defaults to `[ "warn", "send" ]`. + +**`multiple_result_handling`** + +(optional, array of strings) How to handle more than one search result. Allowed values: + +- `limit` - limit the search so that duplicates are impossible +- `warn` - log a warning message +- `use_first` - use the first search result +- `ignore` - do not modify the event +- `send` - send the event on +- `drop` - drop the message +- `limit` cannot be combined with any other value +- `send` and `drop` are mutually exclusive +- `ignore` and `use_first` are mutually exclusive + +All specified actions are performed. Defaults to `["warn", "use_first", "send" ]`. + +**`overwrite`** + +(optional, boolean/null) Whether search results overwrite values already in the message or not. If null, attempting to add a field that already exists throws an exception. Defaults to null. + +--- + +### Taxonomy
+ +This bot adds the `classification.taxonomy` field according to the RSIT taxonomy. + +Please note that there is a slight mismatch of IntelMQ's taxonomy to the upstream taxonomy. See also this [issue](https://github.com/certtools/intelmq/issues/1409). + +Information on the "Reference Security Incident Taxonomy" can be found here: + +For brevity, "type" means `classification.type` and "taxonomy" means `classification.taxonomy`. + +- If taxonomy is missing, and type is given, the according taxonomy is set. +- If neither taxonomy, not type is given, taxonomy is set to "other" and type to "unknown". +- If taxonomy is given, but type is not, type is set to "unknown". + +**Module:** `intelmq.bots.experts.taxonomy.expert` + +No additional parameters. + +--- + +### Threshold
+ +Check if the number of similar messages during a specified time interval exceeds a set value. + +**Limitations** + +This bot has certain limitations and is not a true threshold filter (yet). It works like this: + +1. Every incoming message is hashed according to the `filter_*` parameters. +2. The hash is looked up in the cache and the count is incremented by 1, and the TTL of the key is (re-)set to the timeout. +3. If the new count matches the threshold exactly, the message is forwarded. Otherwise it is dropped. + +!!! note + Even if a message is sent, any further identical messages are dropped, if the time difference to the last message is less than the timeout! The counter is not reset if the threshold is reached. + +**Module:** `intelmq.bots.experts.threshold.expert` + +**Parameters (also expects [cache parameters](#cache-parameters)):** + +**`filter_keys`** + +(required, string/array of strings) Array or comma-separated list of field names to consider or ignore when determining which messages are similar. + +**`filter_type`** + +(required, string) Allowed values: `whitelist` or `blacklist`. When `whitelist` is used, only lines containing the text +specified in `filter_text` option will be processed. When `blacklist` is used, only lines NOT containing the text will +be processed. + +**`threshold`** + +(required, integer) Number of messages required before propagating one. In forwarded messages, the threshold is saved in the message as `extra.count`. + +**`add_keys`** + +(optional, object) List of keys and their respective values to add to the propagated messages. Example: + +```yaml +add_keys: + classification.type: "spam" + comment: "Started more than 10 SMTP connections" +``` + +--- + +### Tor Exit Node
+ +This bot uses an offline database to determine whether the host is a Tor exit node. + +**Module:** `intelmq.bots.experts.tor_nodes.expert` + +**Parameters:** + +**`database`** + +(required, string) Path to the database file. + +**Database** + +Use this command to create/update the database and reload the bot: + +```bash +intelmq.bots.experts.tor_nodes.expert --update-database +``` + +--- + +### Trusted Introducer Lookup
+ +Lookups data from Trusted Introducer public teams list. + +**Module:** `intelmq.bots.experts.trusted_introducer_lookup.expert` + +**Parameters:** + +**`order`** + +(required, string) Allowed values: `domain` and `asn`. You can set multiple values, so first match wins. + +- When `domain` is set, it will lookup the `source.fqdn` field. It will go from high-order to low-order, i.e. + `1337.super.example.com -> super.example.com -> example.com -> .com` +- If `asn` is set, it will lookup `source.asn`. + +After a match, the abuse contact will be fetched from the trusted introducer teams list and will be stored in the event +as `source.abuse_contact`. If there is no match, the event will not be enriched and will be sent to the next configured +step. + +--- + +### Tuency
+ +Queries the [IntelMQ API](https://gitlab.com/intevation/tuency/tuency/-/blob/master/backend/docs/IntelMQ-API.md) +of a [Tuency Contact Database](https://gitlab.com/intevation/tuency/tuency/) instance. + +*Tuency* is a contact management database addressing the needs of CERTs. Users of *tuency* can configure contact +addresses and delivery settings for IP objects (addresses, netblocks), Autonomous Systems, and +(sub-)domains. This expert queries the information for `source.ip` and +`source.fqdn` using the following other fields: + +- `classification.taxonomy` +- `classification.type` +- `feed.provider` +- `feed.name` + +These fields therefore need to exist, otherwise the message is skipped. + +The API parameter "feed_status" is currently set to "production" constantly, until IntelMQ supports this field. + +The API answer is processed as following. For the notification interval: + +- If *suppress* is true, then `extra.notify` is set to false. +- Otherwise: +- If the interval is *immediate*, then `extra.ttl` is set to 0. +- Otherwise the interval is converted into seconds and saved in + `extra.ttl`. + +For the contact lookup: For both fields *ip* and *domain*, the +*destinations* objects are iterated and its *email* fields concatenated to a comma-separated list +in `source.abuse_contact`. + +The IntelMQ fields used by this bot may change in the next IntelMQ release, as soon as better suited fields are +available. + +**Module:** `intelmq.bots.experts.tuency.expert` + +**Parameters:** + +**`url`** + +(required, string) Tuency instance URL. Without the API path. + +**`authentication_token`** + +(required, string) The Bearer authentication token. Without the `Bearer` prefix. + +**`overwrite`** + +(optional, boolean) Whether the existing data in `source.abuse_contact` should be overwritten. Defaults to true. + +--- + +### Truncate By Delimiter
+ +Cut string if length is bigger than maximum length. + +**Module:** `intelmq.bots.experts.truncate_by_delimiter.expert` + +**Parameters:** + +**`delimiter`** + +(required, string) The delimiter to be used for truncating. Defaults to `.`. + +**`max_length`** + +(required, integer) The maximum string length. + +**`field`** + +(required, string) The field to be truncated, e.g. `source.fqdn`. The given field is truncated step-by-step using the delimiter from the beginning, until the field is shorter than `max_length`. + +Example: Cut through a long domain with a dot. The string is truncated until the domain does not exceed the configured +maximum length. + +- Input domain (e.g. `source.fqdn`): `www.subdomain.web.secondsubomain.test.domain.com` +- `delimiter`: `.` +- `max_length`: 20 +- Resulting value `test.domain.com` (length: 15 characters) + +--- + +### URL
+ +This bot extracts additional information from `source.url` and `destination.url` fields. It can fill the following +fields: + +- `source.fqdn` +- `source.ip` +- `source.port` +- `source.urlpath` +- `source.account` +- `destination.fqdn` +- `destination.ip` +- `destination.port` +- `destination.urlpath` +- `destination.account` +- `protocol.application` +- `protocol.transport` + +**Module:** `intelmq.bots.experts.url.expert` + +**Parameters:** + +**`overwrite`** + +(optional, boolean) Whether to overwrite existing fields. Defaults to false. + +**`skip_fields`** + +(optional, array of string) An array of field names that shouldn't be extracted from the URL. + +--- + +### Url2FQDN
+ +This bot is deprecated and will be removed in version 4.0. Use [URL Expert](#intelmq.bots.experts.url.expert) bot instead. + +This bot extracts the Host from the `source.url` and `destination.url` fields and writes it to `source.fqdn` or `destination.fqdn` if it is a hostname, or `source.ip` or `destination.ip` if it is an IP address. + +**Module:** `intelmq.bots.experts.url2fqdn.expert` + +**Parameters:** + +**`overwrite`** + +(optional, boolean) Whether to overwrite existing fields. Defaults to false. + +--- + +### uWhoisd
+ +[uWhoisd](https://github.com/Lookyloo/uwhoisd) is a universal Whois server that supports caching and stores whois +entries for historical purposes. + +The bot sends a request for `source.url`, `source.fqdn`, `source.ip` or `source.asn` to the configured uWhoisd instance and saves the retrieved whois entry: + +- If both `source.url` and `source.fqdn` are present, it will only do a request for `source.fqdn`, as the hostname of `source.url` should be the same as `source.fqdn`. The whois entry will be saved in `extra.whois.fqdn`. +- If `source.ip` is present, the whois entry will be saved in `extra.whois.ip`. +- If `source.asn` is present, he whois entry will be saved in `extra.whois.asn`. + +Events without `source.url`, `source.fqdn`, `source.ip`, or `source.asn`, are ignored. + +!!! note + Requesting a whois entry for a fully qualified domain name (FQDN) only works if the request only contains the domain. uWhoisd will automatically strip the subdomain part if it is present in the request. + +Example: `https://www.theguardian.co.uk` + +- TLD: `co.uk` (uWhoisd uses the [Mozilla public suffix list](https://publicsuffix.org/list/) as a reference) +- Domain: `theguardian.co.uk` +- Subdomain: `www` + +The whois request will be for `theguardian.co.uk` + +**Module:** `intelmq.bots.experts.uwhoisd.expert` + +**Parameters:** + +**`server`** + +(optional, string) Hostname of the uWhoisd server. Defaults to localhost. + +**`port`** + +(optional, integer) Port of the uWhoisd server. Defaults to 4243. + +--- + +### Wait
+ +Waits for a some time or until a queue size is lower than a given number. + +Only one of the two modes is possible. If a queue name is given, the queue mode is active. If the sleep_time is a +number, sleep mode is active. Otherwise the dummy mode is active, the events are just passed without an additional +delay. + +Note that SIGHUPs and reloads interrupt the sleeping. + +**Module:** `intelmq.bots.experts.wait.expert` + +**Parameters:** + +**`queue_db`** + +(optional, integer) Database number of the database. Defaults to 2. + +**`queue_host`** + +(optional, string) Hostname of the database. Defaults to localhost. + +**`queue_name`** + +(optional, string) Name of the queue to be watched. This is not the name of a bot but the queue's name. Defaults to null. + +**`queue_password`** + +(optional, string) Password for the database. Defaults to null. + +**`queue_polling_interval`** + +(required, float) Interval to poll the list length in seconds. Defaults to ?. + +**`queue_port`** + +(optional, integer) Port of the database. Defaults to 6379. + +**`queue_size`** + +(optional, integer) Maximum size of the queue. Defaults to 0. + +**`sleep_time`** + +(optional, integer) Time to sleep before sending the event. Defaults to null. + +## Output Bots + +### AMQP Topic
+ +Sends the event to a specified topic of an AMQP server + +Sends data to an AMQP Server See + for more details on amqp topic exchange. + +Requires the [pika python library](https://pypi.org/project/pika/). + +**Module:** `intelmq.bots.outputs.amqptopic.output` + +**Parameters:** + +**`connection_attempts`** + +(optional, integer) The number of connection attempts to defined server. Defaults to 3. + +**`connection_heartbeat`** + +(optional, integer) Heartbeat to server (in seconds). Defaults to 3600. + +**`connection_host`** + +(optional, string) Hostname of the AMQP server. Defaults to 127.0.0.1. + +**`connection_port`** + +(optional, integer) Port of the AMQP server. Defaults to 5672. + +**`connection_vhost`** + +(optional, string) Virtual host to connect, on an http(s) connection would be `http://IP/`. + +**`content_type`** + +(optional, string) Content type to deliver to AMQP server. Currently only supports `application/json`. + +**`delivery_mode`** + +(optional, integer) Allowed values: + +- `1` - Non-persistent delivery. +- `2` - Persistent delivery. Messages are delivered to 'durable' queues and will be saved to disk. + +**`exchange_durable`** + +(optional, boolean) When set to true, the exchange will survive broker restart, otherwise will be a transient exchange. + +**`exchange_name`** + +(optional, string) The name of the exchange to use. + +**`exchange_type`** + +(optional, string) Type of the exchange, e.g. `topic`, `fanout` etc. + +**`keep_raw_field`** + +(optional, boolean) Whether to keep the `raw` field or not. Defaults to false. + +**`password`** + +(optional, boolean) Password for authentication on your AMQP server. Leave empty if authentication is not required. + +**`require_confirmation`** + +(optional, boolean) If set to True, an exception will be raised if a confirmation error is received. + +**`routing_key`** + +(required, string) The routing key for your amqptopic. + +**`single_key`** + +(optional, boolean) Only send the field instead of the full event (expecting a field name as string). Defaults to false. + +**`username`** + +(required, string) Username for authentication on your AMQP server. + +**`use_ssl`** + +(optional, boolean) Use ssl for the connection, make sure to also set the correct port, usually 5671. Defaults to false. + +**`message_hierarchical_output`** + +(optional, boolean) Convert the message to hierarchical JSON. Defaults to false. + +**`message_with_type`** + +(optional, boolean) Whether to include the type in the sent message. Defaults to false. + +**`message_jsondict_as_string`** + +(optional, boolean) Whether to convert JSON fields (`extra`) to string. Defaults to false. + +**Examples of usage** + +- Useful to send events to a RabbitMQ exchange topic to be further processed in other platforms. + +**Confirmation** + +If routing key or exchange name are invalid or non existent, the message is accepted by the server but we receive no +confirmation. If parameter require_confirmation is True and no confirmation is received, an error is raised. + +**Common errors** + +*Unroutable messages / Undefined destination queue* + +The destination exchange and queue need to exist beforehand, with your preferred settings (e.g. durable, [lazy queue](https://www.rabbitmq.com/lazy-queues.html). If the error message says that the message is "unroutable", the queue doesn't exist. + +--- + +### Blackhole
+ +This bot discards all incoming messages. + +**Module:** `intelmq.bots.outputs.blackhole.output` + +No additional parameters. + +--- + +### Bro File
+ +This bot outputs to BRO (zeek) file. + +File example: + +``` +#fields indicator indicator_type meta.desc meta.cif_confidence meta.source xxx.xxx.xxx.xxx Intel::ADDR phishing 100 MISP XXX www.testdomain.com Intel::DOMAIN apt 85 CERT +``` + +**Module:** `intelmq.bots.outputs.bro_file.output` + +No additional parameters. + +--- + +### CIFv3 API
+ +This bot outputs to a CIFv3 API instance and adds new indicator if not there already. + +By default, CIFv3 does an upsert check and will only insert entirely new indicators. Otherwise, +upsert matches will have their count increased by 1. By default, the CIF3 output bot will batch indicators +up to 500 at a time prior to doing a single bulk send. If the output bot doesn't receive a full 500 +indicators within 5 seconds of the first received indicator, it will send what it has so far. + +CIFv3 should be able to process indicators as fast as IntelMQ can +send them. + +**Module:** `intelmq.bots.outputs.cif3.output` + +**Parameters:** + +**`add_feed_provider_as_tag`** + +(required, boolean) Use `false` when in doubt. + +**`cif3_additional_tags`** + +(required, array of strings) An array of tags to set on submitted indicator(s). + +**`cif3_feed_confidence`** + +(required, float) Used when mapping a feed's confidence fails or if static confidence parameter is true. + +**`cif3_static_confidence`** + +(required, boolean) Whether to always use `cif3_feed_confidence` value as confidence rather than dynamically interpret feed value (use `false` when in doubt). + +**`cif3_token`** + +(required, string) Token key for accessing CIFv3 API. + +**`cif3_url`** + +(required, string) URL of the CIFv3 instance. + +**`fireball`** + +(required, integer) Used to batch events before submitting to a CIFv3 instance, use 0 to disable batch and send each event as received. Defaults to 500. + +**`http_verify_cert`** + +(optional, boolean) Verify the TLS certificate of the server. Defaults to true. + + + +--- + +### Elasticsearch
+ +This bot outputs to Elasticsearch. + +**Module:** `intelmq.bots.outputs.elasticsearch.output` + +- `lookup`: yes +- `public`: yes +- `cache`: no +- `description`: Output Bot that sends events to Elasticsearch + +Only ElasticSearch version 7 supported. + +It is also possible to feed data into ElasticSearch using ELK-Stack via Redis and Logstash, see `ELK-Stack` +{.interpreted-text role="doc"} for more information. This methods supports various different versions of ElasticSearch. + +**Parameters:** + +**`elastic_host`** + +(optional, string) Name/IP for the Elasticsearch server. Defaults to 127.0.0.1. + +**`elastic_port`** + +(optional, int) Port for the Elasticsearch server. Defaults to 9200. + +**`elastic_index`** + +(optional, string) Index for the Elasticsearch output. Defaults to intelmq. + +**`rotate_index`** + +(optional, string) Allowed values: `never`, `daily`, `weekly`, `monthly` or `yearly`. If set, will index events using the date information associated with the event. Defaults to never. + +Using 'intelmq' as the `elastic_index`, the following are examples of the generated index names: + +``` +'never' --> intelmq +'daily' --> intelmq-2018-02-02 +'weekly' --> intelmq-2018-42 +'monthly' --> intelmq-2018-02 +'yearly' --> intelmq-2018 +``` + +**`http_username`** + +(optional, string) HTTP basic authentication username. + +**`http_password`** + +(optional, string) HTTP basic authentication password. + +**`use_ssl`** + +(optional, boolean) Whether to use SSL/TLS when connecting to Elasticsearch. Defaults to false. + +**`http_verify_cert`** + +(optional, boolean) Whether to require verification of the server's certificate. Defaults to false. + +**`ssl_ca_certificate`** + +(optional, string) Path to trusted CA certificate. + +**`ssl_show_warnings`** + +(optional, boolean) Whether to show warnings if the server's certificate cannot be verified. Defaults to true. + +**`replacement_char`** + +(optional, string) If set, dots ('.') in field names will be replaced with this character prior to indexing. This is for backward compatibility with ES 2.X. Defaults to null. Recommended for Elasticsearch 2.X: `_` + +**`flatten_fields`** + +(optional, array of strings) In ES, some query and aggregations work better if the fields are flat and not JSON. Here you can provide a list of fields to convert. Defaults to `['extra']`. + +Can be a list of strings (fieldnames) or a string with field names separated by a comma (,). eg `extra,field2` or `['extra', 'field2']`. + +See `contrib/elasticsearch/elasticmapper` for a utility for creating Elasticsearch mappings and templates. + +If using `rotate_index`, the resulting index name will be of the form `elastic_index`-`event date`. To query all intelmq +indices at once, use an alias (), or a multi-index query. + +The data in ES can be retrieved with the HTTP-Interface: + +```bash + curl -XGET 'http://localhost:9200/intelmq/events/_search?pretty=True' +``` + +--- + +### File
+ +This bot outputs messages (reports or events) to a file. + +Multihreading is disabled for this bot, as this would lead to corrupted files. + +**Module:** `intelmq.bots.outputs.file.output` + +**Parameters:** + +**`encoding_errors_mode`** + +(optional, string) See for more details and options: For example with `backslashreplace` all characters which cannot be properly encoded will be written escaped with backslashes. Defaults to `strict`. + +**`file`** + +(optional, string) Path to the output file. Missing directories will be created if possible with the mode 755. Defaults to `/opt/intelmq/var/lib/bots/file-output/events.txt`. + +**`format_filename`** + +(optional, boolean) Whether the file name should be formatted. Defaults to false. + +Uses Python formatted strings. See: + +Example: + +- The filename `.../{event[source.abuse_contact]}.txt` will be (for example) `.../abuse@example.com.txt`. +- `.../{event[time.source]:%Y-%m-%d}` results in the date of the event used as filename. + +If the field used in the format string is not defined, `None` will be used as fallback. + +**`hierarchical_output`** + +(optional, boolean) Whether the resulting dictionary should be hierarchical (field names split by a dot). Defaults to false. + +**`single_key`** + +(optional, string) Output only a single specified key. In case of `raw` key the data is base64 decoded. Defaults to null (output the whole message). + +--- + +### Files
+ +This bot outputs each message to a separate file. + +**Module:** `intelmq.bots.outputs.files.output` + +**Parameters:** + +**`dir`** + +(optional, string) Path to the output directory. Defaults to `/opt/intelmq/var/lib/bots/files-output/incoming`. + +**`tmp`** + +(optional, string) Temporary directory to use (must reside on the same filesystem as `dir`). Defaults to `/opt/intelmq/var/lib/bots/files-output/tmp`. + +**`suffix`** + +(optional, strings) Extension of created files. Defaults to .json. + +**`hierarchical_output`** + +(optional, boolean) Whether the resulting dictionary should be hierarchical (field names split by a dot). Defaults to false. + +**`single_key`** + +(optional, string) Output only a single specified key. In case of `raw` key the data is base64 decoded. Defaults to null (output the whole message). + +--- + +### McAfee Enterprise Security Manager
+ +This bot outputs messages to McAfee Enterprise Security Manager watchlist. + +**Module:** `intelmq.bots.outputs.mcafee.output_esm_ip` + +**Parameters:** + +- **Feed parameters** (see above) + +**`esm_ip`** + +(optional, string) Hostname of the ESM server. Defaults to 1.2.3.4. + +**`esm_user`** + +(optional, string) Username of user entitled to write to watchlist. Defaults to NGCP. + +**`esm_pw`** + +(required, string) Password of user entitled to write to watchlist. + +**`esm_watchlist`** + +(required, string) Name of the watchlist to write to. + +**`field`** + +(optional, string) Name of the IntelMQ field to be written to ESM. Defaults to source.ip. + +--- + +### MISP Feed
+ +Create a directory layout in the MISP Feed format. + +The PyMISP library >= 2.4.119.1 is required, see +[REQUIREMENTS.txt](https://github.com/certtools/intelmq/blob/master/intelmq/bots/outputs/misp/REQUIREMENTS.txt). + +**Module:** `intelmq.bots.outputs.misp.output_feed` + +**Parameters:** + +- **Feed parameters** (see above) + +**`misp_org_name`** + +() Org name which creates the event, string + +**`misp_org_uuid`** + +() Org UUID which creates the event, string + +**`output_dir`** + +() Output directory path, e.g. +[/opt/intelmq/var/lib/bots/mispfeed-output]. Will be created if it does not exist and possible. + +**`interval_event`** + +() The output bot creates one event per each interval, all data in this time frame is part of this event. Default "1 +hour", string. + +**Usage in MISP** + +Configure the destination directory of this feed as feed in MISP, either as local location, or served via a web server. +See [the MISP documentation on Feeds](https://www.circl.lu/doc/misp/managing-feeds) +for more information + +--- + +### MISP API
+ +**Module:** `intelmq.bots.outputs.misp.output_api` + +Connect to a MISP instance and add event as MISPObject if not there already. + +The PyMISP library >= 2.4.120 is required, see +[REQUIREMENTS.txt](https://github.com/certtools/intelmq/blob/master/intelmq/bots/outputs/misp/REQUIREMENTS.txt). + +**Parameters:** + +- **Feed parameters** (see above) + +**`add_feed_provider_as_tag`** + +() boolean (use [true] when in doubt) + +**`add_feed_name_as_tag`** + +() boolean (use [true] when in doubt) + +**`misp_additional_correlation_fields`** + +() list of fields for which the correlation flags will be enabled (in addition to those which are in significant_fields) + +**`misp_additional_tags`** + +() list of tags to set not be searched for when looking for duplicates + +**`misp_key`** + +() string, API key for accessing MISP + +**`misp_publish`** + +() boolean, if a new MISP event should be set to "publish". + +Expert setting as MISP may really make it "public"! (Use +[false] when in doubt.) + +**`misp_tag_for_bot`** + +() string, used to mark MISP events + +**`misp_to_ids_fields`** + +() list of fields for which the +[to_ids] flags will be set + +**`misp_url`** + +() string, URL of the MISP server + +**`significant_fields`** + +() list of intelmq field names + +The `significant_fields` values will be searched for in all MISP attribute values and if all values are found in the +same MISP event, no new MISP event will be created. Instead if the existing MISP events have the same feed.provider and +match closely, their timestamp will be updated. + +If a new MISP event is inserted the `significant_fields` and the `misp_additional_correlation_fields` +will be the attributes where correlation is enabled. + +Make sure to build the IntelMQ Botnet in a way the rate of incoming events is what MISP can handle, as IntelMQ can +process many more events faster than MISP (which is by design as MISP is for manual handling). Also remove the fields of +the IntelMQ events with an expert bot that you do not want to be inserted into MISP. + +(More details can be found in the docstring of +[output_api.py](https://github.com/certtools/intelmq/blob/master/intelmq/bots/outputs/misp/output_api.py). + +--- + +### MongoDB
+ +MongoDB is the bot responsible to send events to a MongoDB database + +Saves events in a MongoDB either as hierarchical structure or flat with full key names. `time.observation` +and `time.source` are saved as datetime objects, not as ISO formatted string. + +**Module:** `intelmq.bots.outputs.mongodb.output` + +**Requirements** + +```bash +pip3 install pymongo>=2.7.1 +``` + +The bot has been tested with pymongo versions 2.7.1, 3.4 and 3.10.1 +(server versions 2.6.10 and 3.6.8). + +**Parameters:** + +**`host`** + +(optional, string) Hostname of the MongoDB server. Defaults to localhost. + +**`port`** + +(optional, integer) Port of the MongoDB server. Defaults to 27017. + +**`database`** + +(required, string) Name of the MongoDB database to use. + +**`db_user`** + +(optional, string) User that should be used if authentication is required. + +**`db_pass`** + +(optional, string) Password. + +**`collection`** + +(required, string) Name of the MongoDB collection to use. + +**`hierarchical_output`** + +(optional, boolean) MongoDB does not allow saving keys with dots, we split the dictionary in sub-dictionaries. Defaults to true. + +**`replacement_char`** + +(optional, string) Replacement character for replacing the dots in key names if hierarchical output is not used. Defaults to `_`. + +--- + +### Redis
+ +This bot outputs events to a remote Redis server/queue. + +**Examples of usage** + +- Can be used to send events to be processed in another system. E.g.: send events to Logstash. +- In a multi tenant installation can be used to send events to external/remote IntelMQ instance. Any expert bot queue + can receive the events. +- In a complex configuration can be used to create logical sets in IntelMQ-Manager. + +**Module:** `intelmq.bots.outputs.redis.output` + +**Parameters:** + +**`redis_server_ip`** + +(optional, string) Hostname of the Redis server. Defaults to 127.0.0.1. + +**`redis_server_port`** + +(optional, integer) Port of the Redis server. Defaults to 6379. + +**`redis_db`** + +(optional, integer) Redis database number. Defaults to 2. + +**`redis_password`** + +(optional, string) Redis server password. Defaults to null. + +**`redis_queue`** + +(required, string) Redis queue name (such as `remote-server-queue`). + +**`redis_timeout`** + +(optional, integer) Connection timeout, in milliseconds. Defaults to 5000. + +**`hierarchical_output`** + +(optional, boolean) Whether the resulting dictionary should be hierarchical (field names split by a dot). Defaults to false. + +**`with_type`** + +(optional, boolean) Whether to include `__type` field. Defaults to true. + +--- + +### Request Tracker
+ +Output Bot that creates Request Tracker tickets from events. + +**Module:** `intelmq.bots.outputs.rt.output` + +**Description** + +The bot creates tickets in Request Tracker and uses event fields for the ticket body text. The bot follows the workflow +of the RTIR: + +- create ticket in Incidents queue (or any other queue) +- all event fields are included in the ticket body, +- event attributes are assigned to tickets' CFs according to the attribute mapping, +- ticket taxonomy can be assigned according to the CF mapping. If you use taxonomy different + from [ENISA RSIT](https://github.com/enisaeu/Reference-Security-Incident-Taxonomy-Task-Force), consider using some + extra attribute field and do value mapping with modify or sieve bot, +- create linked ticket in Investigations queue, if these conditions are met +- if first ticket destination was Incidents queue, +- if there is source.abuse_contact is specified, +- if description text is specified in the field appointed by configuration, +- RT/RTIR supposed to do relevant notifications by script working on condition "On Create", +- configuration option investigation_fields specifies which event fields has to be included in the investigation, +- Resolve Incident ticket, according to configuration (Investigation ticket status should depend on RT script + configuration), + +Take extra caution not to flood your ticketing system with enormous amount of tickets. Add extra filtering for that to +pass only critical events to the RT, and/or deduplicating events. + +**Parameters:** + +**`rt_uri`** + +() + +**`rt_user`** + +() + +**`rt_password`** + +() + +**`verify_cert`** + +() RT API endpoint connection details, string. + +**`queue`** + +() ticket destination queue. If set to 'Incidents', 'Investigations' ticket will be created if create_investigation is set to true, string. + +**`CF_mapping`** + +(optional, object) Mapping event fields to ticket CFs. Defaults to: + +```yaml +classification.taxonomy: Classification +classification.type: Incident Type +event_description.text: Description +extra.incident.importance: Importance +extra.incident.severity: Incident Severity +extra.organization.name: Customer +source.ip: IP +``` + +**`final_status`** + +(optional, string) The final status for the created ticket. Defaults to resolved. The linked Investigation ticket will be resolved automatically by RTIR scripts. + +**`create_investigation`** + +(optional, boolean) Whether an Investigation ticket should be created (in case of RTIR workflow). Defaults to false. + +**`investigation_fields`** + +(optional, string) Comma-separated string of attributes to include in an Investigation ticket. Defaults to `time.source,source.ip,source.port,source.fqdn,source.url,classification.taxonomy,classification.type,classification.identifier,event_description.url,event_description.text,malware.name,protocol.application,protocol.transport`. + +**`description_attr`** + +(optional, string) Event field to be used as a text message being sent to the recipient. If it is not specified or not found in the event, the Investigation ticket is not going to be created. Defaults to `event_decription.text`. + +--- + +### REST API
+ +REST API is the bot responsible to send events to a REST API listener through POST. + +**Module:** `intelmq.bots.outputs.restapi.output` + +**Parameters:** + +**`host`** + +(required, host) Destination URL of the POST request. + +**`auth_type`** + +(required, string) Allowed values: `http_basic_auth` or `http_header`. Type of authentication to use. + +**`auth_token`** + +(required, string) Username or HTTP header key. + +**`auth_token_name`** + +(required, string) Password or HTTP header value. + +**`hierarchical_output`** + +(optional, boolean) Whether the resulting dictionary should be hierarchical (field names split by a dot). Defaults to false. + +**`use_json`** + +(optional, boolean) Whether to use JSON. Defaults to true. + +--- + +### RPZ File
+ +This bot outputs events into DNS RPZ blocklist file used for "DNS firewall". + +The prime motivation for creating this feature was to protect users from badness on the Internet related to +known-malicious global identifiers such as host names, domain names, IP addresses, or nameservers. More +information: + +Example: +``` +$TTL 3600 @ SOA rpz.yourdomain.eu. hostmaster.rpz.yourdomain.eu. 2105260601 60 60 432000 60 NS localhost. ; ; +yourdomain.eu. CERT.XX Response Policy Zones (RPZ) ; Last updated: 2021-05-26 06:01:41 (UTC) ; ; Terms Of +Use: https://rpz.yourdomain.eu ; For questions please contact rpz [at] yourdomain.eu ; *.maliciousdomain.com CNAME +rpz.yourdomain.eu. *.secondmaliciousdomain.com CNAME rpz.yourdomain.eu. +``` + +**Module:** `intelmq.bots.outputs.rpz_file.output` + +**Parameters:** + +**`cname`** + +(optional, string) example rpz.yourdomain.eu + +**`organization_name`** + +(optional, string) Your organisation name + +**`rpz_domain`** + +(optional, string) Information website about RPZ + +**`hostmaster_rpz_domain`** + +() Technical website + +**`rpz_email`** + +() Contact email + +**`ttl`** + +() Time to live + +**`ncachttl`** + +() DNS negative cache + +**`serial`** + +() Time stamp or another numbering + +**`refresh`** + +() Refresh time + +**`retry`** + +() Retry time + +**`expire`** + +() Expiration time + +**`test_domain`** + +() For test domain, it's added in first rpz file (after header) + +--- + +### SMTP
+ +Sends a MIME Multipart message containing the text and the event as CSV for every single event. + +**Module:** `intelmq.bots.outputs.smtp.output` + +**Parameters:** + +**`fieldnames`** + +(optional, string/array of strings) Array of field names (or comma-separated list) to be included in the email. If empty, no attachment is sent - this can be useful if the actual data is already in the body (parameter `text`) or the `subject`. + +**`mail_from`** + +(optional, string) Sender's e-email address. Defaults to `cert@localhost`. + +**`mail_to`** + +(required, string) Comma-separated string of recipient email addresses. Supports formatting. + +**`smtp_host`** + +(optional, string) Hostname of the SMTP server. Defaults to `localhost`. + +**`smtp_password`** + +(optional, string) Password for authentication to your SMTP server. Defaults to `null`. + +**`smtp_port`** + +(optional, integer) Port of the SMTP server. Defaults to 25. + +**`smtp_username`** + +(optional, string) Username for authentication to your SMTP server. Defaults to `null`. + +**`fail_on_errors`** + +(optional, boolean) Whether any error should cause the bot to fail (raise an exception) or otherwise rollback. If false, the bot eventually waits and re-try (e.g. re-connect) etc. to solve the issue. If true, the bot raises an exception and - depending on the IntelMQ error handling configuration - stops. Defaults to false. + +**`ssl`** + +(optional, boolean) Defaults to false. + +**`starttls`** + +(optional, boolean) Defaults to true. + +**`subject`** + +(optional, string) Subject of the e-mail message. Supports formatting. Defaults to `Incident in your AS {ev[source.asn]}`. + +**`text`** + +(optional, string) Body of the e-mail message. Supports formatting. Defaults to +``` +Dear network owner, + +We have been informed that the following device might have security problems. + +Your localhost CERT +``` + +For several strings you can use values from the string using the [standard Python string format syntax](https://docs.python.org/3/library/string.html#format-string-syntax). Access the event's values with `{ev[source.ip]}` and similar. Any not existing fields will result in `None`. For example, to set the recipient(s) to the value given in the event's `source.abuse_contact` field, use this as `mail_to` parameter: `{ev[source.abuse_contact]}` + +Authentication is optional. If both username and password are given, these mechanism are tried: CRAM-MD5, PLAIN, and LOGIN. + +Client certificates are not supported. If `http_verify_cert` is true, TLS certificates are checked. + +--- + +### SQL
+ +SQL is the bot responsible to send events to a PostgreSQL, SQLite, or MSSQL Database. + +!!! note + When activating autocommit, transactions are not used. See: + +**Module:** `intelmq.bots.outputs.sql.output` + +**Parameters:** + +The parameters marked with 'PostgreSQL' will be sent to libpq via psycopg2. Check the [libpq parameter documentation](https://www.postgresql.org/docs/current/static/images/libpq-connect.html#LIBPQ-PARAMKEYWORDS) for the versions you are using. + +**`autocommit`** + +(optional, boolean) [Psycopg's autocommit mode](http://initd.org/psycopg/docs/connection.html?#connection.autocommit). Defaults to true. + +**`engine`** + +(required, string) Allowed values are `postgresql`, `sqlite`, or `mssql`. + +**`database`** + +(optional, string) Database name or SQLite database file. Defaults to intelmq-events. + +**`host`** + +(optional, string) Hostname of the database server. Defaults to localhost. + +**`jsondict_as_string`** + +(optional, boolean) Whether to save JSON fields as JSON string. Defaults to true. + +**`message_jsondict_as_string`** + +(optional, boolean) Whether to save JSON fields as JSON string. Defaults to true. + +**`port`** + +(optional, integer) Port of the database server. Defaults to 5432. + +**`user`** + +(optional, string) Username for connecting to the database system. Defaults to intelmq. + +**`password`** + +(optional, string) Password for connecting to the database system. Defaults to null. + +**`sslmode`** + +(optional, string) Database sslmode, Allowed values: `disable`, `allow`, `prefer`, `require`, `verify-ca` or `verify-full`. See: . Defaults to `require`. + +**`table`** + +(optional, string) Name of the database table to use. Defaults to events. + +**`fields`** + +(optional, array) Array of event fields to output to the database. Defaults to null (use all fields). + +**`reconnect_delay`** + +(optional, integer) Number of seconds to wait before reconnecting in case of an error. Defaults to 0. + +**`fail_on_errors`** + +(optional, boolean) Whether an error should cause the bot to fail (raise an exception) or otherwise rollback. If false, the bot eventually waits and re-try (e.g. re-connect) etc. to solve the issue. If true, the bot raises an exception and - depending on the IntelMQ error handling configuration - stops. Defaults to false. + + +### STOMP + +This bot pushes data to any STOMP stream. STOMP stands for Streaming Text Oriented Messaging Protocol. See: + +**Module:** `intelmq.bots.outputs.stomp.output` + +**Requirements** + +Install the stomp.py library, e.g. [apt install python3-stomp.py] or [pip install stomp.py]. + +You need a CA certificate, client certificate and key file from the organization / server you are connecting to. Also +you will need a so called "exchange point". + +**Parameters:** + +**`exchange`** + +(optional, string) The exchange to push to. Defaults to `/exchange/_push`. + +**`username`** + +(optional, string) Username to use. + +**`password`** + +(optional, string) Password to use. + +**`ssl_ca_certificate`** + +(optional, string) Path to trusted CA certificate. + +**`auth_by_ssl_client_certificate`** + +(optional, boolean) Whether to authenticate using TLS certificate. (Set to false for new *n6* auth.) Defaults to true. + +**`heartbeat`** + +(optional, integer) Defaults to 60000. + +**`message_hierarchical_output`** + +(optional, boolean) Defaults to false. + +**`message_jsondict_as_string`** + +(optional, boolean) Defaults to false. + +**`message_with_type`** + +(optional, boolean) Defaults to false. + +**`port`** + +(optional, integer) Defaults to 61614. + +**`server`** + +(optional, string) Hostname of the STOMP server. + +**`single_key`** + +(optional, string) Output only a single specified key. In case of `raw` key the data is base64 decoded. Defaults to null (output the whole message). + +**`ssl_ca_certificate`** + +(optional, string) Path to trusted CA certificate. + +**`ssl_client_certificate`** + +(optional, string) Path to client certificate to use for TLS connections. + +**`ssl_client_certificate_key`** + +(optional, string) Path to client private key to use for TLS connections. + +--- + +### TCP
+ +TCP is the bot responsible to send events to a TCP port (Splunk, another IntelMQ, etc..). + +Multihreading is disabled for this bot. + +**Sending to an IntelMQ TCP collector** + +If you intend to link two IntelMQ instance via TCP, set the parameter `counterpart_is_intelmq` to true. The bot then awaits an "Ok" message to be received after each message is sent. The TCP collector just sends "Ok" after every message it gets. + +**Module:** `intelmq.bots.outputs.tcp.output` + +**Parameters:** + +**`counterpart_is_intelmq`** + +(optional, boolean) Whether the receiver is an IntelMQ TCP collector bot. Defaults to true. + +**`ip`** + +(required, string) Hostname of the destination server. + +**`hierarchical_output`** + +(optional, boolean) True for a nested JSON, false for a flat JSON (when sending to a TCP collector). + +**`port`** + +(required, integer) Port of destination server. + +**`separator`** + +(optional, string) Separator of messages, e.g. "n", optional. When sending to a TCP collector, parameter shouldn't be present. In that case, the output waits every message is acknowledged by "Ok" message the TCP collector bot implements. + +--- + +### Templated SMTP
+ +Sends a MIME Multipart message built from an event and static text using Jinja2 templates. + +See the Jinja2 documentation at . + +Authentication is attempted only if both username and password are specified. + +Templates are in Jinja2 format with the event provided in the variable `event`. E.g.: + +```yaml +mail_to: "{{ event['source.abuse_contact'] }}" +``` + +As an extension to the Jinja2 environment, the function `from_json` is available for parsing JSON strings into Python +structures. This is useful if you want to handle complicated structures in the `output` field of an event. In that case, +you would start your template with a line like: + +``` +{%- set output = from_json(event['output']) %} +``` + +and can then use `output` as a regular Python object in the rest of the template. + +Attachments are templated strings, especially useful for sending structured data. E.g. to send a JSON document including +`malware.name` and all other fields starting with `source.`: + +```yaml +attachments: + - content-type: application/json + text: | + { + "malware": "{{ event['malware.name'] }}", + {%- set comma = joiner(", ") %} + {%- for key in event %} + {%- if key.startswith('source.') %} + {{ comma() }}"{{ key }}": "{{ event[key] }}" + {%- endif %} + {%- endfor %} + } + name: report.json +``` + +You are responsible for making sure that the text produced by the template is valid according to the content-type. + +If you are migrating from the SMTP output bot that produced CSV format attachments, use the following configuration to +produce a matching format: + +```yaml +attachments: + - content-type: text/csv + text: | + {%- set fields = ["classification.taxonomy", "classification.type", "classification.identifier", "source.ip","source.asn", "source.port"] %} + {%- set sep = joiner(";") %} + {%- for field in fields %}{{ sep() }}{{ field }}{%- endfor %} + {% set sep = joiner(";") %} + {%- for field in fields %}{{ sep() }}{{ event[field] }}{%- endfor %} + name: event.csv +``` + +**Module:** `intelmq.bots.outputs.templated_smtp.output` + +**Requirements** + +Install the required `jinja2` library: + +```bash +pip3 install -r intelmq/bots/collectors/templated_smtp/REQUIREMENTS.txt +``` + +**Parameters:** + +**`attachments`** + +(required, array of objects) Each object must have `content-type`, `text` (attachment text) and `name` (filename of the attachment) fields. + +```yaml +- content-type: simple string/jinja template + text: simple string/jinja template + name: simple string/jinja template +``` + +**`body`** + +(optional, string) Simple string or Jinja template. The default body template prints every field in the event except `raw`, in undefined order, one field per line, as "field: value". + +**`mail_from`** + +(optional, string) Simple string or Jinja template. Sender's address. + +**`mail_to`** + +(required, string) Simple string or Jinja template. Comma-separated array of recipient addresses. + +**`smtp_host`** + +(optional, string) Hostname of the SMTP server. Defaults to localhost. + +**`smtp_password`** + +(optional, string) Password (if any) for authenticated SMTP. Defaults to null. + +**`smtp_port`** + +(optional, integer) TCP port to connect to. Defaults to 25. + +**`smtp_username`** + +(optional, string) Username (if any) for authenticated SMTP. Defaults to null. + +**`tls`** + +(optional, boolean) Whether to use use SMTPS. When true, also set smtp_port to the SMTPS port. Defaults to false. + +**`starttls`** + +(optional, boolean) Whether to use opportunistic STARTTLS over SMTP. Defaults to true. + +**`subject`** + +(optional, string) Simple string or Jinja template. E-mail subject line. Defaults to "IntelMQ event". + +**`verify_cert`** + +(optional, boolean) Whether to verify the server certificate in STARTTLS or SMTPS. Defaults to true. + +--- + +### Touch
+ +Touches a file for every event received. Does not output the event! + +**Module:** `intelmq.bots.outputs.touch.output` + +**Parameters:** + +**`path`** + +(optional, string) Path to the file to touch. + +--- + +### UDP
+ +Output Bot that sends events to a remote UDP server. + +Multihreading is disabled for this bot. + +**Module:** `intelmq.bots.outputs.udp.output` + +**Parameters:** + +**`format`** + +(optional, string) Allowed values: `json` or `delimited`. The JSON format outputs the event 'as-is'. Delimited will deconstruct the event and print each field:value separated by the field delimit. See examples below. + +**`field_delimiter`** + +(optional, string) If the `format` is `delimited` then this parameter is used as a delimiter between fields. Defaults to `|`. + +**`header`** + +(required, string) Header text to be sent in the UDP datagram. + +**`keep_raw_field`** + +(optional, boolean) Whether to keep `raw` field. Defaults to false. + +**`udp_host`** + +(optional, string) Hostname of the destination server. + +**`udp_port`** + +(required, integer) Port of the destination server. + +**Examples of usage** + +Consider the following event: + +```json +{ + "raw": "MjAxNi8wNC8yNV8xMTozOSxzY2hpenppbm8ub21hcmF0aG9uLmNvbS9na0NDSnVUSE0vRFBlQ1pFay9XdFZOSERLbC1tWFllRk5Iai8sODUuMjUuMTYwLjExNCxzdGF0aWMtaXAtODUtMjUtMTYwLTExNC5pbmFkZHIuaXAtcG9vbC5jb20uLEFuZ2xlciBFSywtLDg5NzI=", + "source.asn": 8972, + "source.ip": "85.25.160.114", + "source.url": "http://schizzino.omarathon.com/gkCCJuTHM/DPeCZEk/WtVNHDKl-mXYeFNHj/", + "source.reverse_dns": "static-ip-85-25-160-114.inaddr.ip-pool.com", + "classification.type": "malware-distribution", + "event_description.text": "Angler EK", + "feed.url": "http://www.malwaredomainlist.com/updatescsv.php", + "feed.name": "Malware Domain List", + "feed.accuracy": 100, + "time.observation": "2016-04-29T10:59:34+00:00", + "time.source": "2016-04-25T11:39:00+00:00" +} +``` + +With the following parameters: + +```yaml +format: json +header: header example +keep_raw_field: true +ip: 127.0.0.1 +port: 514 +``` + +Resulting line in syslog: + +``` +Apr 29 11:01:29 header example {"raw": "MjAxNi8wNC8yNV8xMTozOSxzY2hpenppbm8ub21hcmF0aG9uLmNvbS9na0NDSnVUSE0vRFBlQ1pFay9XdFZOSERLbC1tWFllRk5Iai8sODUuMjUuMTYwLjExNCxzdGF0aWMtaXAtODUtMjUtMTYwLTExNC5pbmFkZHIuaXAtcG9vbC5jb20uLEFuZ2xlciBFSywtLDg5NzI=", "source": {"asn": 8972, "ip": "85.25.160.114", "url": "http://schizzino.omarathon.com/gkCCJuTHM/DPeCZEk/WtVNHDKl-mXYeFNHj/", "reverse_dns": "static-ip-85-25-160-114.inaddr.ip-pool.com"}, "classification": {"type": "malware-distribution"}, "event_description": {"text": "Angler EK"}, "feed": {"url": "http://www.malwaredomainlist.com/updatescsv.php", "name": "Malware Domain List", "accuracy": 100.0}, "time": {"observation": "2016-04-29T10:59:34+00:00", "source": "2016-04-25T11:39:00+00:00"}} +``` + +With the following Parameters: + +```yaml +field_delimiter: | +format: delimited +header: IntelMQ-event +keep_raw_field: false +ip: 127.0.0.1 +port: 514 +``` + + +Resulting line in syslog: + +``` +Apr 29 11:17:47 localhost IntelMQ-event|source.ip: 85.25.160.114|time.source:2016-04-25T11:39:00+00:00|feed.url:http://www.malwaredomainlist.com/updatescsv.php|time.observation:2016-04-29T11:17:44+00:00|source.reverse_dns:static-ip-85-25-160-114.inaddr.ip-pool.com|feed.name:Malware Domain List|event_description.text:Angler EK|source.url:http://schizzino.omarathon.com/gkCCJuTHM/DPeCZEk/WtVNHDKl-mXYeFNHj/|source.asn:8972|classification.type:malware-distribution|feed.accuracy:100.0 +``` diff --git a/docs/user/bots.rst b/docs/user/bots.rst deleted file mode 100644 index 2fbe27df8..000000000 --- a/docs/user/bots.rst +++ /dev/null @@ -1,4545 +0,0 @@ -.. - SPDX-FileCopyrightText: 2015-2022 Sebastian Wagner - SPDX-License-Identifier: AGPL-3.0-or-later - -############## -Bots inventory -############## - -.. contents:: - -*************** -General remarks -*************** - -By default all of the bots are started when you start the whole botnet, however there is a possibility to -*disable* a bot. This means that the bot will not start every time you start the botnet, but you can start -and stop the bot if you specify the bot explicitly. To disable a bot, add the following to your -``runtime.yaml``: `"enabled": false`. Be aware that this is **not** a normal parameter (like the others -described in this file). It is set outside of the `parameters` object in ``runtime.yaml``. Check out -:doc:`configuration-management` for an example. - -There are two different types of parameters: The initialization parameters are need to start the bot. The runtime parameters are needed by the bot itself during runtime. - -The initialization parameters are in the first level, the runtime parameters live in the `parameters` sub-dictionary: - -.. code-block:: yaml - - bot-id: - parameters: - runtime parameters... - initialization parameters... - -For example: - -.. code-block:: yaml - - abusech-feodo-domains-collector: - parameters: - provider: Abuse.ch - name: Abuse.ch Feodo Domains - http_url: http://example.org/feodo-domains.txt - name: Generic URL Fetcher - group: Collector - module: intelmq.bots.collectors.http.collector_http - description: collect report messages from remote hosts using http protocol - enabled: true - run_mode: scheduled - -This configuration resides in the file `runtime.yaml` in your IntelMQ's configuration directory for each configured bot. - -************************* -Initialization parameters -************************* - -* `name` and `description`: The name and description of the bot. See also ``intelmqctl list --configured bots``. -* `group`: Can be `"Collector"`, `"Parser"`, `"Expert"` or `"Output"`. Only used for visualization by other tools. -* `module`: The executable (should be in `$PATH`) which will be started. -* `enabled`: If the parameter is set to `true` (which is NOT the default value if it is missing as a protection) the bot will start when the botnet is started (`intelmqctl start`). If the parameter was set to `false`, the Bot will not be started by `intelmqctl start`, however you can run the bot independently using `intelmqctl start `. Check :doc:`configuration-management` for more details. -* `run_mode`: There are two run modes, "continuous" (default run mode) or "scheduled". In the first case, the bot will be running forever until stopped or exits because of errors (depending on configuration). In the latter case, the bot will stop after one successful run. This is especially useful when scheduling bots via cron or systemd. Default is `continuous`. Check :doc:`configuration-management` for more details. - -.. _common-parameters: - -************************* -Common parameters -************************* - -Feed parameters -^^^^^^^^^^^^^^^ - -Common configuration options for all collectors. - -* `name`: Name for the feed (`feed.name`). In IntelMQ versions smaller than 2.2 the parameter name `feed` is also supported. -* `accuracy`: Accuracy for the data of the feed (`feed.accuracy`). -* `code`: Code for the feed (`feed.code`). -* `documentation`: Link to documentation for the feed (`feed.documentation`). -* `provider`: Name of the provider of the feed (`feed.provider`). -* `rate_limit`: time interval (in seconds) between fetching data if applicable. - -HTTP parameters -^^^^^^^^^^^^^^^ - -Common URL fetching parameters used in multiple bots. - -* `http_timeout_sec`: A tuple of floats or only one float describing the timeout of the HTTP connection. Can be a tuple of two floats (read and connect timeout) or just one float (applies for both timeouts). The default is 30 seconds in default.conf, if not given no timeout is used. See also https://requests.readthedocs.io/en/master/user/advanced/#timeouts -* `http_timeout_max_tries`: An integer depicting how often a connection is retried, when a timeout occurred. Defaults to 3 in default.conf. -* `http_username`: username for basic authentication. -* `http_password`: password for basic authentication. -* `http_proxy`: proxy to use for HTTP -* `https_proxy`: proxy to use for HTTPS -* `http_user_agent`: user agent to use for the request. -* `http_verify_cert`: path to trusted CA bundle or directory, `false` to ignore verifying SSL certificates, or `true` (default) to verify SSL certificates -* `ssl_client_certificate`: SSL client certificate to use. -* `ssl_ca_certificate`: Optional string of path to trusted CA certificate. Only used by some bots. -* `http_header`: HTTP request headers - -Cache parameters -^^^^^^^^^^^^^^^^ - -Common Redis cache parameters used in multiple bots (mainly lookup experts): - -* `redis_cache_host`: Hostname of the Redis database. -* `redis_cache_port`: Port of the Redis database. -* `redis_cache_db`: Database number. -* `redis_cache_ttl`: TTL used for caching. -* `redis_cache_password`: Optional password for the Redis database (default: none). - -.. _collector bots: - -************** -Collector Bots -************** - -Multihreading is disabled for all Collectors, as this would lead to duplicated data. - -.. _intelmq.bots.collectors.amqp.collector_amqp: - -AMQP -^^^^ - -Requires the `pika python library `_, minimum version 1.0.0. - -**Information** - -* `name`: intelmq.bots.collectors.amqp.collector_amqp -* `lookup`: yes -* `public`: yes -* `cache (redis db)`: none -* `description`: collect data from (remote) AMQP servers, for both IntelMQ as well as external data - -**Configuration Parameters** - -* **Feed parameters** (see above) -* `connection_attempts`: The number of connection attempts to defined server, defaults to 3 -* `connection_heartbeat`: Heartbeat to server, in seconds, defaults to 3600 -* `connection_host`: Name/IP for the AMQP server, defaults to 127.0.0.1 -* `connection_port`: Port for the AMQP server, defaults to 5672 -* `connection_vhost`: Virtual host to connect, on an HTTP(S) connection would be http:/IP/ -* `expect_intelmq_message`: Boolean, if the data is from IntelMQ or not. Default: `false`. If true, then the data can be any Report or Event and will be passed to the next bot as is. Otherwise a new report is created with the raw data. -* `password`: Password for authentication on your AMQP server -* `queue_name`: The name of the queue to fetch data from -* `username`: Username for authentication on your AMQP server -* `use_ssl`: Use ssl for the connection, make sure to also set the correct port, usually 5671 (`true`/`false`) - -Currently only fetching from a queue is supported can be extended in the future. Messages will be acknowledge at AMQP after it is sent to the pipeline. - - -.. _intelmq.bots.collectors.api.collector: - -API -^^^ - -**Information** - -* `name:` intelmq.bots.collectors.api.collector -* `lookup:` yes -* `public:` yes -* `cache (redis db):` none -* `description:` collect report messages from an HTTP or Socket REST API - -**Configuration Parameters** - -* **Feed parameters** (see above) -* `port`: Optional, integer. Default: 5000. The local port, the API will be available at. -* `use_socket`: Optional, boolean. Default: false. If true, the socket will be opened at the location given with `socket_path`. -* `socket_path`: Optional, string. Default: ``/tmp/imq_api_default_socket`` - -The API is available at `/intelmq/push` if the HTTP interface is used (default). -The `tornado` library is required. - - -.. _intelmq.bots.collectors.http.collector_http: - -Generic URL Fetcher -^^^^^^^^^^^^^^^^^^^ - -**Information** - -* `name:` intelmq.bots.collectors.http.collector_http -* `lookup:` yes -* `public:` yes -* `cache (redis db):` none -* `description:` collect report messages from remote hosts using HTTP protocol - -**Configuration Parameters** - -* **Feed parameters** (see above) -* **HTTP parameters** (see above) -* `extract_files`: Optional, boolean or list of strings. If it is true, the retrieved (compressed) file or archived will be uncompressed/unpacked and the files are extracted. If the parameter is a list for strings, only the files matching the filenames are extracted. Extraction handles gzipped files and both compressed and uncompressed tar-archives as well as zip archives. -* `http_url`: location of information resource (e.g. https://feodotracker.abuse.ch/blocklist/?download=domainblocklist) -* `http_url_formatting`: (`bool|JSON`, default: `false`) If `true`, `{time[format]}` will be replaced by the current time in local timezone formatted by the given format. E.g. if the URL is `http://localhost/{time[%Y]}`, then the resulting URL is `http://localhost/2019` for the year 2019. (Python's `Format Specification Mini-Language `_ is used for this.). You may use a `JSON` specifying `time-delta `_ parameters to shift the current time accordingly. For example use `{"days": -1}` for the yesterday's date; the URL `http://localhost/{time[%Y-%m-%d]}` will get translated to "http://localhost/2018-12-31" for the 1st Jan of 2019. -* `verify_pgp_signatures`: `bool`, defaults to `false`. If `true`, signature file is downloaded and report file is checked. On error (missing signature, mismatch, ...), the error is logged and the report is not processed. Public key has to be imported in local keyring. This requires the `python-gnupg` library. -* `signature_url`: Location of signature file for downloaded content. For path `http://localhost/data/latest.json` this may be for example `http://localhost/data/latest.asc`. -* `signature_url_formatting`: (`bool|JSON`, default: `false`) The same as `http_url_formatting`, only for the signature file. -* `gpg_keyring`: `string` or `none` (default). If specified, the string represents path to keyring file, otherwise the PGP keyring file for current `intelmq` user is used. - -Zipped files are automatically extracted if detected. - -For extracted files, every extracted file is sent in its own report. Every report has a field named `extra.file_name` with the file name in the archive the content was extracted from. - -**HTTP Response status code checks** - -If the HTTP response' status code is not 2xx, this is treated as error. - -In Debug logging level, the request's and response's headers and body are logged for further inspection. - - -.. _intelmq.bots.collectors.http.collector_http_stream: - -Generic URL Stream Fetcher -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -**Information** - -* `name:` intelmq.bots.collectors.http.collector_http_stream -* `lookup:` yes -* `public:` yes -* `cache (redis db):` none -* `description:` Opens a streaming connection to the URL and sends the received lines. - -**Configuration Parameters** - -* **Feed parameters** (see above) -* **HTTP parameters** (see above) -* `strip_lines`: boolean, if single lines should be stripped (removing whitespace from the beginning and the end of the line) - -If the stream is interrupted, the connection will be aborted using the timeout parameter. -No error will be logged if the number of consecutive connection fails does not reach the parameter `error_max_retries`. Instead of errors, an INFO message is logged. This is a measurement against too frequent ERROR logging messages. The consecutive connection fails are reset if a data line has been successfully transferred. -If the consecutive connection fails reaches the parameter `error_max_retries`, an exception will be thrown and `rate_limit` applies, if not null. - -The parameter `http_timeout_max_tries` is of no use in this collector. - - -.. _intelmq.bots.collectors.mail.collector_mail_url: - -Generic Mail URL Fetcher -^^^^^^^^^^^^^^^^^^^^^^^^ - -**Information** - -* `name:` intelmq.bots.collectors.mail.collector_mail_url -* `lookup:` yes -* `public:` yes -* `cache (redis db):` none -* `description:` collect messages from mailboxes, extract URLs from that messages and download the report messages from the URLs. - -**Configuration Parameters** - -* **Feed parameters** (see above) -* **HTTP parameters** (see above) -* `mail_host`: FQDN or IP of mail server -* `mail_user`: user account of the email account -* `mail_password`: password associated with the user account -* `mail_port`: IMAP server port, optional (default: 143 without SSL, 993 for SSL) -* `mail_ssl`: whether the mail account uses SSL (default: `true`) -* `folder`: folder in which to look for mails (default: `INBOX`) -* `subject_regex`: regular expression to look for a subject -* `url_regex`: regular expression of the feed URL to search for in the mail body -* `sent_from`: filter messages by sender -* `sent_to`: filter messages by recipient -* `ssl_ca_certificate`: Optional string of path to trusted CA certificate. Applies only to IMAP connections, not HTTP. If the provided certificate is not found, the IMAP connection will fail on handshake. By default, no certificate is used. - -The resulting reports contains the following special fields: - -* `feed.url`: The URL the data was downloaded from -* `extra.email_date`: The content of the email's `Date` header -* `extra.email_subject`: The subject of the email -* `extra.email_from`: The email's from address -* `extra.email_message_id`: The email's message ID -* `extra.file_name`: The file name of the downloaded file (extracted from the HTTP Response Headers if possible). - -**Chunking** - -For line-based inputs the bot can split up large reports into smaller chunks. - -This is particularly important for setups that use Redis as a message queue -which has a per-message size limitation of 512 MB. - -To configure chunking, set `chunk_size` to a value in bytes. -`chunk_replicate_header` determines whether the header line should be repeated -for each chunk that is passed on to a parser bot. - -Specifically, to configure a large file input to work around Redis' size -limitation set `chunk_size` to something like `384000000`, i.e., ~384 MB. - - -.. _intelmq.bots.collectors.mail.collector_mail_attach: - -Generic Mail Attachment Fetcher -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -**Information** - -* `name:` intelmq.bots.collectors.mail.collector_mail_attach -* `lookup:` yes -* `public:` yes -* `cache (redis db):` none -* `description:` collect messages from mailboxes, download the report messages from the attachments. - -**Configuration Parameters** - -* **Feed parameters** (see above) -* `extract_files`: Optional, boolean or list of strings. See documentation of the Generic URL Fetcher for more details. -* `mail_host`: FQDN or IP of mail server -* `mail_user`: user account of the email account -* `mail_password`: password associated with the user account -* `mail_port`: IMAP server port, optional (default: 143 without SSL, 993 for SSL) -* `mail_ssl`: whether the mail account uses SSL (default: `true`) -* `folder`: folder in which to look for mails (default: `INBOX`) -* `subject_regex`: regular expression to look for a subject -* `attach_regex`: regular expression of the name of the attachment -* `attach_unzip`: whether to unzip the attachment. Only extracts the first file. Deprecated, use `extract_files` instead. -* `sent_from`: filter messages by sender -* `sent_to`: filter messages by recipient -* `ssl_ca_certificate`: Optional string of path to trusted CA certificate. Applies only to IMAP connections, not HTTP. If the provided certificate is not found, the IMAP connection will fail on handshake. By default, no certificate is used. - -The resulting reports contains the following special fields: - -* `extra.email_date`: The content of the email's `Date` header -* `extra.email_subject`: The subject of the email -* `extra.email_from`: The email's from address -* `extra.email_message_id`: The email's message ID -* `extra.file_name`: The file name of the attachment or the file name in the attached archive if attachment is to uncompress. - - -.. _intelmq.bots.collectors.mail.collector_mail_body: - -Generic Mail Body Fetcher -^^^^^^^^^^^^^^^^^^^^^^^^^ - -**Information** - -* `name:` intelmq.bots.collectors.mail.collector_mail_body -* `lookup:` yes -* `public:` yes -* `cache (redis db):` none -* `description:` collect messages from mailboxes, forwards the bodies as reports. Each non-empty body with the matching content type is sent as individual report. - -**Configuration Parameters** - -* **Feed parameters** (see above) -* `mail_host`: FQDN or IP of mail server -* `mail_user`: user account of the email account -* `mail_password`: password associated with the user account -* `mail_port`: IMAP server port, optional (default: 143 without SSL, 993 for SSL) -* `mail_ssl`: whether the mail account uses SSL (default: `true`) -* `folder`: folder in which to look for mails (default: `INBOX`) -* `subject_regex`: regular expression to look for a subject -* `sent_from`: filter messages by sender -* `sent_to`: filter messages by recipient -* `ssl_ca_certificate`: Optional string of path to trusted CA certificate. Applies only to IMAP connections, not HTTP. If the provided certificate is not found, the IMAP connection will fail on handshake. By default, no certificate is used. -* `content_types`: Which bodies to use based on the content_type. Default: `true`/`['html', 'plain']` for all: - - string with comma separated values, e.g. `['html', 'plain']` - - `true`, `false`, `null`: Same as default value - - `string`, e.g. `'plain'` - -The resulting reports contains the following special fields: - -* `extra.email_date`: The content of the email's `Date` header -* `extra.email_subject`: The subject of the email -* `extra.email_from`: The email's from address -* `extra.email_message_id`: The email's message ID - - -.. _intelmq.bots.collectors.github_api.collector_github_contents_api: - -Github API -^^^^^^^^^^ - -**Information** - -* `name:` intelmq.bots.collectors.github_api.collector_github_contents_api -* `lookup:` yes -* `public:` yes -* `cache (redis db):` none -* `description:` Collects files matched by regular expression from GitHub repository via the GitHub API. - Optionally with GitHub credentials, which are used as the Basic HTTP authentication. - -**Configuration Parameters** - -* **Feed parameters** (see above) -* `personal_access_token:` GitHub account personal access token [GitHub documentation: Creating a personal access token](https://developer.github.com/changes/2020-02-14-deprecating-password-auth/#removal) -* `repository:` GitHub target repository (`/`) -* `regex:` Valid regular expression of target files within the repository (defaults to `.*.json`) -* `extra_fields:` Comma-separated list of extra fields from `GitHub contents API `_. - -**Workflow** - -The optional authentication parameters provide a high limit of the GitHub API requests. -With the git hub user authentication, the requests are rate limited to 5000 per hour, otherwise to 60 requests per hour. - -The collector recursively searches for `regex`-defined files in the provided `repository`. -Additionally it adds extra file metadata defined by the `extra_fields`. - -The bot always sets the url, from which downloaded the file, as `feed.url`. - - -.. _intelmq.bots.collectors.file.collector_file: - -Fileinput -^^^^^^^^^ - -**Information** - -* `name:` intelmq.bots.collectors.file.collector_file -* `lookup:` yes -* `public:` yes -* `cache (redis db):` none -* `description:` This bot is capable of reading files from the local file-system. - This is handy for testing purposes, or when you need to react to spontaneous - events. In combination with the Generic CSV Parser this should work great. - -**Configuration Parameters** - -* **Feed parameters** (see above) -* `path`: path to file -* `postfix`: The postfix (file ending) of the files to look for. For example `.csv`. -* `delete_file`: whether to delete the file after reading (default: `false`) - -The resulting reports contains the following special fields: - -* `feed.url`: The URI using the `file://` scheme and localhost, with the full path to the processed file. -* `extra.file_name`: The file name (without path) of the processed file. - -**Chunking** - -Additionally, for line-based inputs the bot can split up large reports into -smaller chunks. - -This is particularly important for setups that use Redis as a message queue -which has a per-message size limitation of 512 MB. - -To configure chunking, set `chunk_size` to a value in bytes. -`chunk_replicate_header` determines whether the header line should be repeated -for each chunk that is passed on to a parser bot. - -Specifically, to configure a large file input to work around Redis' size -limitation set `chunk_size` to something like `384000`, i.e., ~384 MB. - -**Workflow** - -The bot loops over all files in `path` and tests if their file name matches -*postfix, e.g. `*.csv`. If yes, the file will be read and inserted into the -queue. - -If `delete_file` is set, the file will be deleted after processing. If deletion -is not possible, the bot will stop. - -To prevent data loss, the bot also stops when no `postfix` is set and -`delete_file` was set. This cannot be overridden. - -The bot always sets the file name as feed.url - - -.. _intelmq.bots.collectors.fireeye.collector_fireeye: - -Fireeye -^^^^^^^ - -**Information** - -* `name:` `intelmq.bots.collectors.fireeye.collector_fireeye` -* `lookup:` yes -* `public:` no -* `cache (redis db):` none -* `description:` This bot is capable of collecting hashes and URLs from a Fireeye MAS appliance. - -The Python library `xmltodict` is required to run this bot. - -**Configuration Parameters** - -* **Feed parameters** (see above) -* `dns_name`: DNS name of the target appliance. -* `request_duration`: Length of the query in past eg. collect alerts from last 24hours/48hours. -* `http_username`: Password for authentication. -* `http_password`: Username for authentication. - -**Workflow** - -The bot collects all alerts which occurred during specified duration. After this we -make a second call and check if there is additional information like domains and hashes available. -After collecting the openioc data we send this information to the Fireeye parser. - - -.. _intelmq.bots.collectors.kafka.collector: - -Kafka -^^^^^ - -Requires the `kafka python library `_. - -**Information** - -* `name:` intelmq.bots.collectors.kafka.collector - -**Configuration parameters** - -* `topic:` the kafka topic the collector should get messages from -* `bootstrap_servers:` the kafka server(s) the collector should connect to. Defaults to `localhost:9092` -* `ssl_check_hostname`: `false` to ignore verifying SSL certificates, or `true` (default) to verify SSL certificates -* `ssl_client_certificate`: SSL client certificate to use. -* `ssl_ca_certificate`: Optional string of path to trusted CA certificate. Only used by some bots. - - -.. _intelmq.bots.collectors.misp.collector: - -MISP Generic -^^^^^^^^^^^^ - -**Information** - -* `name:` intelmq.bots.collectors.misp.collector -* `lookup:` yes -* `public:` yes -* `cache (redis db):` none -* `description:` collect messages from `MISP `_, a malware information sharing platform server. - -**Configuration Parameters** - -* **Feed parameters** (see above) -* `misp_url`: URL of MISP server (with trailing '/') -* `misp_key`: MISP Authkey -* `misp_tag_to_process`: MISP tag for events to be processed -* `misp_tag_processed`: MISP tag for processed events, optional - -Generic parameters used in this bot: - -* `http_verify_cert`: Verify the TLS certificate of the server, boolean (default: `true`) - -**Workflow** -This collector will search for events on a MISP server that have a -`to_process` tag attached to them (see the `misp_tag_to_process` parameter) -and collect them for processing by IntelMQ. Once the MISP event has been -processed the `to_process` tag is removed from the MISP event and a -`processed` tag is then attached (see the `misp_tag_processed` parameter). - -**NB.** The MISP tags must be configured to be 'exportable' otherwise they will -not be retrieved by the collector. - - -.. _intelmq.bots.collectors.rt.collector_rt: - -Request Tracker -^^^^^^^^^^^^^^^ - -**Information** - -* `name:` intelmq.bots.collectors.rt.collector_rt -* `lookup:` yes -* `public:` yes -* `cache (redis db):` none -* `description:` Request Tracker Collector fetches attachments from an RTIR instance. - -You need the rt-library >= 1.9 and < 3.0 from nic.cz, available via `pypi `_: `pip3 install 'rt<3'` - -.. warning:: - - At the moment, the bot only supports `python-rt` versions below 3.0. - -This rt bot will connect to RT and inspect the given `search_queue` for tickets matching all criteria in `search_*`, -Any matches will be inspected. For each match, all (RT-) attachments of the matching RT tickets are iterated over and within this loop, the first matching filename in the attachment is processed. -If none of the filename matches apply, the contents of the first (RT-) "history" item is matched against the regular expression for the URL (`url_regex`). - -**Configuration Parameters** - -* **Feed parameters** (see above) -* **HTTP parameters** (see above) -* `extract_attachment`: Optional, boolean or list of strings. See documentation of the Generic URL Fetcher parameter `extract_files` for more details. -* `extract_download`: Optional, boolean or list of strings. See documentation of the Generic URL Fetcher parameter `extract_files` for more details. -* `uri`: URL of the REST interface of the RT -* `user`: RT username -* `password`: RT password -* `search_not_older_than`: Absolute time (use ISO format) or relative time, e.g. `3 days`. -* `search_owner`: owner of the ticket to search for (default: `nobody`) -* `search_queue`: queue of the ticket to search for (default: `Incident Reports`) -* `search_requestor`: the e-mail address of the requestor -* `search_status`: status of the ticket to search for (default: `new`) -* `search_subject_like`: part of the subject of the ticket to search for (default: `Report`); use list for multiple required values, -* `search_subject_notlike`: exclude subject containing given value, use list for multiple excluding values, -* `set_status`: status to set the ticket to after processing (default: `open`). `false` or `null` to not set a different status. -* `take_ticket`: whether to take the ticket (default: `true`) -* `url_regex`: regular expression of an URL to search for in the ticket -* `attachment_regex`: regular expression of an attachment in the ticket -* `unzip_attachment`: whether to unzip a found attachment. Only the first file in the archive is used. Deprecated in favor of `extract_attachment`. - -The parameter `http_timeout_max_tries` is of no use in this collector. - -The resulting reports contains the following special fields: - -* `rtir_id`: The ticket ID -* `extra.email_subject` and `extra.ticket_subject`: The subject of the ticket -* `extra.email_from` and `extra.ticket_requestors`: Comma separated list of the ticket's requestor's email addresses. -* `extra.ticket_owner`: The ticket's owner name -* `extra.ticket_status`: The ticket's status -* `extra.ticket_queue`: The ticket's queue -* `extra.file_name`: The name of the extracted file, the name of the downloaded file or the attachments' filename without `.gz` postfix. -* `time.observation`: The creation time of the ticket or attachment. - -**Search** - -The parameters prefixed with `search_` allow configuring the ticket search. - -Empty strings and `null` as value for search parameters are ignored. - -**File downloads** - -Attachments can be optionally unzipped, remote files are downloaded with the `http_*` settings applied. - -If `url_regex` or `attachment_regex` are empty strings, false or null, they are ignored. - -**Ticket processing** - -Optionally, the RT bot can "take" RT tickets (i.e. the `user` is assigned this ticket now) and/or the status can be changed (leave `set_status` empty in case you don't want to change the status). Please note however that you **MUST** do one of the following: either "take" the ticket or set the status (`set_status`). Otherwise, the search will find the ticket every time and we will have generated an endless loop. - -In case a resource needs to be fetched and this resource is permanently not available (status code is 4xx), the ticket status will be set according to the configuration to avoid processing the ticket over and over. -For temporary failures the status is not modified, instead the ticket will be skipped in this run. - -**Time search** - -To find only tickets newer than a given absolute or relative time, you can use the `search_not_older_than` parameter. Absolute time specification can be anything parseable by dateutil, best use a ISO format. - -Relative must be in this format: `[number] [timespan]s`, e.g. `3 days`. `timespan` can be hour, day, week, month, year. Trailing 's' is supported for all timespans. Relative times are subtracted from the current time directly before the search is performed. - - -.. _intelmq.bots.collectors.rsync.collector_rsync: - -Rsync -^^^^^ - -Requires the rsync executable - -**Information** - -* `name:` intelmq.bots.collectors.rsync.collector_rsync -* `lookup:` yes -* `public:` yes -* `cache (redis db):` none -* `description:` Bot downloads a file by rsync and then load data from downloaded file. Downloaded file is located in `var/lib/bots/rsync_collector.` - -**Configuration Parameters** - -* `rsync_path`: Rsync server connection and path. It can be "/home/username/directory/" or "username@remote_host:/home/username/directory/". Supports formatting, see below. -* `file`: The filename to process, combined with `rsync_path`. Supports formatting, see below. -* `rsync_file_path_formatting`: Boolean if the file and rsync_path should be formatted by the given format (default: `false`). E.g. if the path is `/path/to_file/{time[%Y]}`, then the resulting path is `/path/to/file/2023` for the year 2023. (Python's `Format Specification Mini-Language `_ is used for this.). You may use a `JSON` specifying `time-delta `_ parameters to shift the current time accordingly. For example use `{"days": -1}` for the yesterday's date; the path `/path/to/file/{time[%Y-%m-%d]}` will get translated to "/path/to/file/2018-12-31" for the 1st Jan of 2023. -* `extra_params`: A list of extra parameters to pass to rsync. Optional. -* `private_key`: Private key to use for rsync authentication. Optional. -* `private_key_path`: Path to private key to use for rsync authentication. Optional. (Use `private_key` or `private_key_path`, not both.) -* `strict_host_key_checking`: Boolean if the host key should be checked (default: `false`). -* `temp_directory`: The temporary directory for rsync to use for rsync'd files. Optional. Default: `$VAR_STATE_PATH/rsync_collector`. `$VAR_STATE_PATH` is `/var/run/intelmq/` or `/opt/intelmq/var/run/`. - - -.. _intelmq.bots.collectors.shadowserver.collector_reports_api: - -Shadowserver Reports API -^^^^^^^^^^^^^^^^^^^^^^^^ - -The Cache is required to memorize which files have already been processed (TTL needs to be high enough to cover the oldest files available!). - -**Information** - -* `name`: `intelmq.bots.collectors.shadowserver.collector_reports_api` -* `description`: Connects to the `Shadowserver API `_, requests a list of all the reports for a specific country and processes the ones that are new. - -**Configuration Parameters** - -* `country`: **Deprecated:** The country you want to download the reports for. Will be removed in IntelMQ version 4.0.0, use *reports* instead. -* `apikey`: Your Shadowserver API key -* `secret`: Your Shadowserver API secret -* `reports`: A list of strings or a comma-separated list of the mailing lists you want to process. -* `types`: A list of strings or a string of comma-separated values with the names of report types you want to process. If you leave this empty, all the available reports will be downloaded and processed (i.e. 'scan', 'drones', 'intel', 'sandbox_connection', 'sinkhole_combined'). The possible report types are equivalent to the file names given in the section :ref:`Supported Reports ` of the Shadowserver parser. -* **Cache parameters** (see in section :ref:`common-parameters`, the default TTL is set to 10 days) - -The resulting reports contain the following special field: - -* `extra.file_name`: The name of the downloaded file, with fixed filename extension. The API returns file names with the extension `.csv`, although the files are JSON, not CSV. Therefore, for clarity and better error detection in the parser, the file name in `extra.file_name` uses `.json` as extension. - - -.. _intelmq.bots.collectors.shodan.collector_stream: - -Shodan Stream -^^^^^^^^^^^^^ - -Requires the shodan library to be installed: - * https://github.com/achillean/shodan-python/ - * https://pypi.org/project/shodan/ - -**Information** - -* `name:` intelmq.bots.collectors.shodan.collector_stream -* `lookup:` yes -* `public:` yes -* `cache (redis db):` none -* `description:` Queries the Shodan Streaming API - -**Configuration Parameters** - -* **Feed parameters** (see above) -* **HTTP parameters** (see above). Only the proxy is used (requires `shodan-python > 1.8.1`). Certificate is always verified. -* `countries`: A list of countries to query for. If it is a string, it will be spit by `,`. - -If the stream is interrupted, the connection will be aborted using the timeout parameter. -No error will be logged if the number of consecutive connection fails does not reach the parameter `error_max_retries`. Instead of errors, an INFO message is logged. This is a measurement against too frequent ERROR logging messages. The consecutive connection fails are reset if a data line has been successfully transferred. -If the consecutive connection fails reaches the parameter `error_max_retries`, an exception will be thrown and `rate_limit` applies, if not null. - - -.. _intelmq.bots.collectors.tcp.collector: - -TCP -^^^ - -**Information** - -* `name:` intelmq.bots.collectors.tcp.collector -* `lookup:` no -* `public:` yes -* `cache (redis db):` none -* `description:` TCP is the bot responsible to receive events on a TCP port (ex: from TCP Output of another IntelMQ instance). Might not be working on Python3.4.6. - -**Configuration Parameters** - -* `ip`: IP of destination server -* `port`: port of destination server - -**Response** - -TCP collector just sends an "Ok" message after every received message, this should not pose a problem for an arbitrary input. -If you intend to link two IntelMQ instance via TCP, have a look at the TCP output bot documentation. - - -.. _intelmq.bots.collectors.alienvault_otx.collector: - -Alien Vault OTX -^^^^^^^^^^^^^^^ - -**Information** - -* `name:` intelmq.bots.collectors.alienvault_otx.collector -* `lookup:` yes -* `public:` yes -* `cache (redis db):` none -* `description:` collect report messages from Alien Vault OTX API - -**Requirements** - - -Install the library from GitHub, as there is no package in PyPi: - -.. code-block:: bash - - pip3 install -r intelmq/bots/collectors/alienvault_otx/REQUIREMENTS.txt - -**Configuration Parameters** - -* **Feed parameters** (see above) -* `api_key`: API Key -* `modified_pulses_only`: get only modified pulses instead of all, set to it to true or false, default false -* `interval`: if "modified_pulses_only" is set, define the time in hours (integer value) to get modified pulse since then, default 24 hours - - -.. _intelmq.bots.collectors.blueliv.collector_crimeserver: - -Blueliv Crimeserver -^^^^^^^^^^^^^^^^^^^ - -**Information** - -* `name:` intelmq.bots.collectors.blueliv.collector_crimeserver -* `lookup:` yes -* `public:` no -* `cache (redis db):` none -* `description:` collect report messages from Blueliv API - -For more information visit https://github.com/Blueliv/api-python-sdk - -**Requirements** - - -Install the required library: - -.. code-block:: bash - - pip3 install -r intelmq/bots/collectors/blueliv/REQUIREMENTS.txt - -**Configuration Parameters** - -* **Feed parameters** (see above) -* `api_key`: location of information resource, see https://map.blueliv.com/?redirect=get-started#signup -* `api_url`: The optional API endpoint, by default `https://freeapi.blueliv.com`. - - -.. _intelmq.bots.collectors.calidog.collector_certstream: - -Calidog Certstream -^^^^^^^^^^^^^^^^^^ - -A Bot to collect data from the Certificate Transparency Log (CTL) -This bot works based on certstream library (https://github.com/CaliDog/certstream-python) - -**Information** - -* `name:` intelmq.bots.collectors.calidog.collector_certstream -* `lookup:` yes -* `public:` no -* `cache (redis db):` none -* `description:` collect data from Certificate Transparency Log - -**Configuration Parameters** - -* **Feed parameters** (see above) - - -.. _intelmq.bots.collectors.eset.collector: - -ESET ETI -^^^^^^^^ - -**Information** - -* `name:` intelmq.bots.collectors.eset.collector -* `lookup:` yes -* `public:` no -* `cache (redis db):` none -* `description:` collect data from ESET ETI TAXII server - -For more information visit https://www.eset.com/int/business/services/threat-intelligence/ - -**Requirements** - - -Install the required `cabby` library: - -.. code-block:: bash - - pip3 install -r intelmq/bots/collectors/eset/REQUIREMENTS.txt - -**Configuration Parameters** - -* **Feed parameters** (see above) -* `username`: Your username -* `password`: Your password -* `endpoint`: `eti.eset.com` -* `time_delta`: The time span to look back, in seconds. Default `3600`. -* `collection`: The collection to fetch. - - -.. _intelmq.bots.collectors.opendxl.collector: - -McAfee openDXL -^^^^^^^^^^^^^^ - -**Information** - -* `name:` intelmq.bots.collectors.opendxl.collector -* `lookup:` yes -* `public:` no -* `cache (redis db):` none -* `description:` collect messages via openDXL - -**Configuration Parameters** - -* **Feed parameters** (see above) -* `dxl_config_file`: location of the configuration file containing required information to connect $ -* `dxl_topic`: the name of the DXL topic to subscribe - - -.. _intelmq.bots.collectors.microsoft.collector_azure: - -Microsoft Azure -^^^^^^^^^^^^^^^ - -Iterates over all blobs in all containers in an Azure storage. -The Cache is required to memorize which files have already been processed (TTL needs to be high enough to cover the oldest files available!). - -This bot significantly changed in a backwards-incompatible way in IntelMQ Version 2.2.0 to support current versions of the Microsoft Azure Python libraries. -``azure-storage-blob>=12.0.0`` is required. - -**Information** - -* `name`: intelmq.bots.collectors.microsoft.collector_azure -* `lookup`: yes -* `public`: no -* `cache (redis db)`: 5 -* `description`: collect blobs from Microsoft Azure using their library - -**Configuration Parameters** - -* **Cache parameters** (see above) -* **Feed parameters** (see above) -* `connection_string`: connection string as given by Microsoft -* `container_name`: name of the container to connect to - - -.. _intelmq.bots.collectors.microsoft.collector_interflow: - -Microsoft Interflow -^^^^^^^^^^^^^^^^^^^ - -Iterates over all files available by this API. Make sure to limit the files to be downloaded with the parameters, otherwise you will get a lot of data! -The cache is used to remember which files have already been downloaded. Make sure the TTL is high enough, higher than `not_older_than`. - -**Information** - -* `name:` intelmq.bots.collectors.microsoft.collector_interflow -* `lookup:` yes -* `public:` no -* `cache (redis db):` 5 -* `description:` collect files from Microsoft Interflow using their API - -**Configuration Parameters** - -* **Feed parameters** (see above) -* `api_key`: API generate in their portal -* `file_match`: an optional regular expression to match file names -* `not_older_than`: an optional relative (minutes) or absolute time (UTC is assumed) expression to determine the oldest time of a file to be downloaded -* `redis_cache_*` and especially `redis_cache_ttl`: Settings for the cache where file names of downloaded files are saved. The cache's TTL must always be bigger than `not_older_than`. - -**Additional functionalities** - -* Files are automatically ungzipped if the filename ends with `.gz`. - -.. _stomp collector bot: - - -.. _intelmq.bots.collectors.stomp.collector: - -Stomp -^^^^^ - -**Information** - -* `name:` intelmq.bots.collectors.stomp.collector -* `lookup:` yes -* `public:` no -* `cache (redis db):` none -* `description:` collect messages from a stomp server - -**Requirements** - - -Install the `stomp.py` library from PyPI: - -.. code-block:: bash - - pip3 install -r intelmq/bots/collectors/stomp/REQUIREMENTS.txt - -**Configuration Parameters** - -* **Feed parameters** (see above) -* `exchange`: STOMP *destination* to subscribe to, e.g. "/exchange/my.org/*.*.*.*" -* `port`: 61614 -* `server`: hostname, e.g. "n6stream.cert.pl" -* `ssl_ca_certificate`: path to CA file -* `auth_by_ssl_client_certificate`: Boolean, default: true (note: set to false for new *n6* auth) -* `ssl_client_certificate`: path to client cert file, used only if `auth_by_ssl_client_certificate` is true -* `ssl_client_certificate_key`: path to client cert key file, used only if `auth_by_ssl_client_certificate` is true -* `username`: STOMP *login* (e.g., *n6* user login), used only if `auth_by_ssl_client_certificate` is false -* `password`: STOMP *passcode* (e.g., *n6* user API key), used only if `auth_by_ssl_client_certificate` is false - - -.. _intelmq.bots.collectors.twitter.collector_twitter: - -Twitter -^^^^^^^ - -Collects tweets from target_timelines. Up to tweet_count tweets from each user and up to timelimit back in time. The tweet text is sent separately and if allowed, links to pastebin are followed and the text sent in a separate report - -**Information** - -* `name:` intelmq.bots.collectors.twitter.collector_twitter -* `lookup:` yes -* `public:` yes -* `cache (redis db):` none -* `description:` Collects tweets - -**Configuration Parameters** - -* **Feed parameters** (see above) -* `target_timelines`: screen_names of twitter accounts to be followed -* `tweet_count`: number of tweets to be taken from each account -* `timelimit`: maximum age of the tweets collected in seconds -* `follow_urls`: list of screen_names for which URLs will be followed -* `exclude_replies`: exclude replies of the followed screen_names -* `include_rts`: whether to include retweets by given screen_name -* `consumer_key`: Twitter API login data -* `consumer_secret`: Twitter API login data -* `access_token_key`: Twitter API login data -* `access_token_secret`: Twitter API login data - - -.. _intelmq.bots.collectors.api.collector_api: - -API collector bot -^^^^^^^^^^^^^^^^^ - -**Information** - -* `name:` intelmq.bots.collectors.api.collector_api -* `lookup:` no -* `public:` no -* `cache (redis db):` none -* `description:` Bot for collecting data using API, you need to post JSON to /intelmq/push endpoint - -example usage: - -.. code-block:: bash - - curl -X POST http://localhost:5000/intelmq/push -H 'Content-Type: application/json' --data '{"source.ip": "127.0.0.101", "classification.type": "system-compromise"}' - -**Configuration Parameters** - -* **Feed parameters** (see above) -* `port`: 5000 - - -.. _parser bots: - -*********** -Parser Bots -*********** - -Not complete -^^^^^^^^^^^^ - -This list is not complete. Look at ``intelmqctl list bots`` or the list of parsers shown in the manager. But most parsers do not need configuration parameters. - -TODO - -**Configuration Parameters** - -* `default_fields`: map of statically added fields to each event (only applied if parsing the event doesn't set the value) - -example usage: - -.. code-block:: yaml - - defaults_fields: - classification.type: c2-server - protocol.transport: tcp - - -.. _intelmq.bots.parsers.anubisnetworks.parser: - -AnubisNetworks Cyberfeed Stream -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -**Information** - -* `name`: `intelmq.bots.parsers.anubisnetworks.parser` -* `lookup`: no -* `public`: yes -* `cache (redis db)`: none -* `description`: parsers data from AnubisNetworks Cyberfeed Stream - -**Description** - -The feed format changes over time. The parser supports at least data from 2016 and 2020. - -Events with the Malware "TestSinkholingLoss" are ignored, as they are for the feed provider's internal purpose only and should not be processed at all. - -**Configuration parameters** - -* `use_malware_familiy_as_classification_identifier`: default: `true`. Use the `malw.family` field as `classification.type`. If `false`, check if the same as `malw.variant`. If it is the same, it is ignored. Otherwise saved as `extra.malware.family`. - - -.. _intelmq.bots.parsers.generic.parser_csv: - -Generic CSV Parser -^^^^^^^^^^^^^^^^^^ - -**Information** - -* `name`: `intelmq.bots.parsers.generic.parser_csv` -* `lookup`: no -* `public`: yes -* `cache (redis db)`: none -* `description`: Parses CSV data - -Lines starting with `'#'` will be ignored. Headers won't be interpreted. - -**Configuration parameters** - - * `"columns"`: A list of strings or a string of comma-separated values with field names. The names must match the IntelMQ Data Format field names. Empty column specifications and columns named `"__IGNORE__"` are ignored. E.g. - - .. code-block:: json - - "columns": [ - "", - "source.fqdn", - "extra.http_host_header", - "__IGNORE__" - ], - - is equivalent to: - - .. code-block:: json - - "columns": ",source.fqdn,extra.http_host_header," - - The first and the last column are not used in this example. - - It is possible to specify multiple columns using the `|` character. E.g. - - .. code-block:: - - "columns": "source.url|source.fqdn|source.ip" - - First, bot will try to parse the value as URL, if it fails, it will try to parse it as FQDN, if that fails, it will try to parse it as IP, if that fails, an error will be raised. - Some use cases - - - - mixed data set, e.g. URL/FQDN/IP/NETMASK `"columns": "source.url|source.fqdn|source.ip|source.network"` - - parse a value and ignore if it fails `"columns": "source.url|__IGNORE__"` - - * `"column_regex_search"`: Optional. A dictionary mapping field names (as given per the columns parameter) to regular expression. The field is evaluated using `re.search`. Eg. to get the ASN out of `AS1234` use: `{"source.asn": "[0-9]*"}`. Make sure to properly escape any backslashes in your regular expression (See also :issue:`#1579 <1579>`). - * `"compose_fields"`: Optional, dictionary. Create fields from columns, e.g. with data like this: - - .. code-block:: csv - - # Host,Path - example.com,/foo/ - example.net,/bar/ - - using this compose_fields parameter: - - .. code-block:: json - - {"source.url": "http://{0}{1}"} - - You get: - - .. code-block:: - - http://example.com/foo/ - http://example.net/bar/ - - in the respective `source.url` fields. The value in the dictionary mapping is formatted whereas the columns are available with their index. - * `"default_url_protocol"`: For URLs you can give a default protocol which will be pretended to the data. - * `"delimiter"`: separation character of the CSV, e.g. `","` - * `"skip_header"`: Boolean or Int, skip the first N lines of the file (True -> 1, False -> 0), optional. Lines starting with `#` will be skipped additionally, make sure you do not skip more lines than needed! - * `time_format`: Optional. If `"timestamp"`, `"windows_nt"` or `"epoch_millis"` the time will be converted first. With the default `null` fuzzy time parsing will be used. - * `"type"`: set the `classification.type` statically, optional - * `"data_type"`: sets the data of specific type, currently only `"json"` is supported value. An example - - .. code-block:: json - - { - "columns": [ "source.ip", "source.url", "extra.tags"], - "data_type": "{\"extra.tags\":\"json\"}" - } - - It will ensure `extra.tags` is treated as `json`. - * `"filter_text"`: only process the lines containing or not containing specified text, to be used in conjunction with `filter_type` - * `"filter_type"`: value can be whitelist or blacklist. If `whitelist`, only lines containing the text in `filter_text` will be processed, if `blacklist`, only lines NOT containing the text will be processed. - - To process ipset format files use - - .. code-block:: json - - { - "filter_text": "ipset add ", - "filter_type": "whitelist", - "columns": [ "__IGNORE__", "__IGNORE__", "__IGNORE__", "source.ip"] - } - - * `"type_translation"`: If the source does have a field with information for `classification.type`, but it does not correspond to IntelMQ's types, - you can map them to the correct ones. The `type_translation` field can hold a dictionary, or a string with a JSON dictionary which maps the feed's values to IntelMQ's. - Example: - - .. code-block:: json - - {"malware_download": "malware-distribution"} - - * `"columns_required"`: A list of true/false for each column. By default, it is true for every column. - - -.. _intelmq.bots.parsers.calidog.parser_certstream: - -Calidog Certstream -^^^^^^^^^^^^^^^^^^ - -**Information** - -* `name:` intelmq.bots.parsers.calidog.parser_certstream -* `lookup:` no -* `public:` yes -* `cache (redis db):` none -* `description:` parsers data from Certificate Transparency Log - -**Description** - -For each domain in the `leaf_cert.all_domains` object one event with the domain in `source.fqdn` (and `source.ip` as fallback) is produced. -The seen-date is saved in `time.source` and the classification type is `other`. - -* **Feed parameters** (see above) - - -.. _intelmq.bots.parsers.eset.parser: - -ESET -^^^^ - -**Information** - -* `name:` intelmq.bots.parsers.eset.parser -* `lookup:` no -* `public:` yes -* `cache (redis db):` none -* `description:` Parses data from ESET ETI TAXII server - -**Description** - -Supported collections: - -* "ei.urls (json)" -* "ei.domains v2 (json)" - - -.. _intelmq.bots.parsers.cymru.parser_cap_program: - -Cymru CAP Program -^^^^^^^^^^^^^^^^^ - -**Information** - -* `name:` intelmq.bots.parsers.cymru.parser_cap_program -* `public:` no -* `cache (redis db):` none -* `description:` Parses data from Cymru's CAP program feed. - -**Description** - -There are two different feeds available: - - * `infected_$date.txt` ("old") - * `$certname_$date.txt` ("new") - -The new will replace the old at some point in time, currently you need to fetch both. The parser handles both formats. - -**Old feed** - -As little information on the format is available, the mappings might not be correct in all cases. -Some reports are not implemented at all as there is no data available to check if the parsing is correct at all. If you do get errors like `Report ... not implement` or similar please open an issue and report the (anonymized) example data. Thanks. - -The information about the event could be better in many cases but as Cymru does not want to be associated with the report, we can't add comments to the events in the parser, because then the source would be easily identifiable for the recipient. - - -.. _intelmq.bots.parsers.cymru.parser_full_bogons: - -Cymru Full Bogons -^^^^^^^^^^^^^^^^^ - -http://www.team-cymru.com/bogon-reference.html - -**Information** - -* `name:` intelmq.bots.parsers.cymru.parser_full_bogons -* `public:` no -* `cache (redis db):` none -* `description:` Parses data from full bogons feed. - - -.. _intelmq.bots.parsers.github_feed.parser: - -Github Feed -^^^^^^^^^^^ - -**Information** - - -* `name:` intelmq.bots.parsers.github_feed.parser -* `description:` Parses Feeds available publicly on GitHub (should receive from `github_api` collector) - - -.. _intelmq.bots.parsers.hibp.parser_callback: - -Have I Been Pwned Callback Parser -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -**Information** - -* `name:` intelmq.bots.parsers.hibp.parser_callback -* `public:` no -* `cache (redis db):` none -* `description:` Parses data from Have I Been Pwned feed. - -**Description** - -Parsers the data from a Callback of a Have I Been Pwned Enterprise Subscription. - -Parses breaches and pastes and creates one event per e-mail address. The e-mail address is stored in `source.account`. -`classification.type` is `leak` and `classification.identifier` is `breach` or `paste`. - - -.. _intelmq.bots.parsers.html_table.parser: - -HTML Table Parser -^^^^^^^^^^^^^^^^^ - -* `name:` intelmq.bots.parsers.html_table.parser -* `public:` yes -* `cache (redis db):` none -* `description:` Parses tables in HTML documents - -**Configuration parameters** - - * `"columns"`: A list of strings or a string of comma-separated values with field names. The names must match the IntelMQ Data Format field names. Empty column specifications and columns named `"__IGNORE__"` are ignored. E.g. - - .. code-block:: json - - "columns": [ - "", - "source.fqdn", - "extra.http_host_header", - "__IGNORE__" - ], - - is equivalent to: - - .. code-block:: json - - "columns": ",source.fqdn,extra.http_host_header," - - The first and the last column are not used in this example. - It is possible to specify multiple columns using the `|` character. E.g. - - .. code-block:: json - - "columns": "source.url|source.fqdn|source.ip" - - First, bot will try to parse the value as URL, if it fails, it will try to parse it as FQDN, if that fails, it will try to parse it as IP, if that fails, an error will be raised. - Some use cases - - - - mixed data set, e.g. URL/FQDN/IP/NETMASK `"columns": "source.url|source.fqdn|source.ip|source.network"` - - parse a value and ignore if it fails `"columns": "source.url|__IGNORE__"` - - * `"ignore_values"`: A list of strings or a string of comma-separated values which will not considered while assigning to the corresponding fields given in `columns`. E.g. - - .. code-block:: json - - "ignore_values": [ - "", - "unknown", - "Not listed", - ], - - is equivalent to: - - .. code-block:: json - - "ignore_values": ",unknown,Not listed," - - The following configuration will lead to assigning all values to malware.name and extra.SBL except `unknown` and `Not listed` respectively. - - .. code-block:: json - - "columns": [ - "source.url", - "malware.name", - "extra.SBL", - ], - "ignore_values": [ - "", - "unknown", - "Not listed", - ], - - Parameters **columns and ignore_values must have same length** - * `"attribute_name"`: Filtering table with table attributes, to be used in conjunction with `attribute_value`, optional. E.g. `class`, `id`, `style`. - * `"attribute_value"`: String. - To filter all tables with attribute `class='details'` use - - .. code-block:: json - - "attribute_name": "class", - "attribute_value": "details" - - * `"table_index"`: Index of the table if multiple tables present. If `attribute_name` and `attribute_value` given, index according to tables remaining after filtering with table attribute. Default: `0`. - * `"split_column"`: Padded column to be split to get values, to be used in conjunction with `split_separator` and `split_index`, optional. - * `"split_separator"`: Delimiter string for padded column. - * `"split_index"`: Index of unpadded string in returned list from splitting `split_column` with `split_separator` as delimiter string. Default: `0`. - E.g. - - .. code-block:: json - - "split_column": "source.fqdn", - "split_separator": " ", - "split_index": 1, - - With above configuration, column corresponding to `source.fqdn` with value `[D] lingvaworld.ru` will be assigned as `"source.fqdn": "lingvaworld.ru"`. - * `"skip_table_head"`: Boolean, skip the first row of the table, optional. Default: `true`. - * `"default_url_protocol"`: For URLs you can give a default protocol which will be pretended to the data. Default: `"http://"`. - * `"time_format"`: Optional. If `"timestamp"`, `"windows_nt"` or `"epoch_millis"` the time will be converted first. With the default `null` fuzzy time parsing will be used. - * `"type"`: set the `classification.type` statically, optional - * `"html_parser"`: The HTML parser to use, by default "html.parser", can also be e.g. "lxml", have a look at https://www.crummy.com/software/BeautifulSoup/bs4/doc/ - - -.. _intelmq.bots.parsers.key_value.parser: - -Key-Value Parser -^^^^^^^^^^^^^^^^ - -**Information** - -* `name:` intelmq.bots.parsers.key_value.parser -* `lookup:` no -* `public:` no -* `cache (redis db):` none -* `description:` Parses text lines in key=value format, for example FortiGate firewall logs. - -**Configuration Parameters** - -* `pair_separator`: String separating key=value pairs, default `" "` (space). -* `kv_separator`: String separating key and value, default `=`. -* `keys`: Array of string->string, names of keys to propagate mapped to IntelMQ event fields. Example: - - .. code-block:: json - - "keys": { - "srcip": "source.ip", - "dstip": "destination.ip" - } - - The value mapped to `time.source` is parsed. If the value is numeric, it is interpreted. Otherwise, or if it fails, it is parsed fuzzy with dateutil. - If the value cannot be parsed, a warning is logged per line. -* `strip_quotes`: Boolean, remove opening and closing quotes from values, default true. - -**Parsing limitations** - -The input must not have (quoted) occurrences of the separator in the values. For example, this is not parsable (with space as separator): - -.. code-block:: - - key="long value" key2="other value" - -In firewall logs like FortiGate, this does not occur. These logs usually look like: - -.. code-block:: - - srcip=192.0.2.1 srcmac="00:00:5e:00:17:17" - - -.. _intelmq.bots.parsers.mcafee.parser_atd: - -McAfee Advanced Threat Defense File -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -**Information** - -* `name:` intelmq.bots.parsers.mcafee.parser_atd -* `lookup:` yes -* `public:` no -* `cache (redis db):` none -* `description:` Parse IoCs from McAfee Advanced Threat Defense reports (hash, IP, URL) - -**Configuration Parameters** - -* **Feed parameters** (see above) -* `verdict_severity`: min report severity to parse - - -.. _intelmq.bots.parsers.microsoft.parser_ctip: - -Microsoft CTIP Parser -^^^^^^^^^^^^^^^^^^^^^ - -* `name`: `intelmq.bots.parsers.microsoft.parser_ctip` -* `public`: no -* `cache (redis db)`: none -* `description`: Parses data from the Microsoft CTIP Feed - - * `overwrite`: If an existing `feed.name` should be overwritten (only relevant for the azure data source). - -**Configuration Parameters** - -* ``overwrite``: Overwrite an existing field ``feed.name`` with ``DataFeed`` of the source. - -**Description** - -Can parse the JSON format provided by the Interflow interface (lists of dictionaries) as well as the format provided by the Azure interface (one dictionary per line). -The provided data differs between the two formats/providers. - -The parser is capable of parsing both feeds: -- `ctip-c2` -- `ctip-infected-summary` -The feeds only differ by a few fields, not in the format. - -The feeds contain a field called `Payload` which is nearly always a base64 encoded JSON structure. -If decoding works, the contained fields are saved as `extra.payload.*`, otherwise the field is saved as `extra.payload.text`. - - -.. _intelmq.bots.parsers.misp.parser: - -MISP -^^^^ - -* `name:` intelmq.bots.parsers.misp.parser -* `public:` no -* `cache (redis db):` none -* `description:` Parses MISP events - -**Description** - -MISP events collected by the MISPCollectorBot are passed to this parser -for processing. Supported MISP event categories and attribute types are -defined in the `SUPPORTED_MISP_CATEGORIES` and `MISP_TYPE_MAPPING` class -constants. - - -.. _n6 parser bot: - -.. _intelmq.bots.parsers.n6.parser_n6stomp: - -n6 -^^ - -**Information** - -* `name`: `intelmq.bots.parsers.n6.parser_n6stomp` -* `public`: no -* `cache (redis db)`: none -* `description`: Convert n6 data into IntelMQ format. - -**Configuration Parameters** -None - -**Description** - -Test messages are ignored, this is logged with debug logging level. -Also contains a mapping for the classification (results in taxonomy, type and identifier). -The `name` field is normally used as `malware.name`, if that fails due to disallowed characters, these characters are removed and the original value is saved as `event_description.text`. This can happen for names like `"further iocs: text with invalid ’ char"`. - -If an n6 message contains multiple IP addresses, multiple events are generated, resulting in events only differing in the address information. - - -.. _intelmq.bots.parsers.twitter.parser: - -Twitter -^^^^^^^ - -**Information** - -* `name:` intelmq.bots.parsers.twitter.parser -* `public:` no -* `cache (redis db):` none -* `description:` Extracts URLs from text, fuzzy, aimed at parsing tweets - -**Configuration Parameters** - -* `domain_whitelist`: domains to be filtered out -* `substitutions`: semicolon delimited list of even length of pairs of substitutions (for example: '[.];.;,;.' substitutes '[.]' for '.' and ',' for '.') -* `classification_type`: string with a valid classification type as defined in data format -* `default_scheme`: Default scheme for URLs if not given. See also the next section. - -**Default scheme** - -The dependency `url-normalize` changed it's behavior in version 1.4.0 from using `http://` as default scheme to `https://`. Version 1.4.1 added the possibility to specify it. Thus you can only use the `default_scheme` parameter with a current version of this library >= 1.4.1, with 1.4.0 you will always get `https://` as default scheme and for older versions < 1.4.0 `http://` is used. - -This does not affect URLs which already include the scheme. - - -.. _intelmq.bots.parsers.shadowserver.parser: -.. _intelmq.bots.parsers.shadowserver.parser_json: - -Shadowserver -^^^^^^^^^^^^ - -There are two Shadowserver parsers, one for data in ``CSV`` format (``intelmq.bots.parsers.shadowserver.parser``) and one for data in ``JSON`` format (``intelmq.bots.parsers.shadowserver.parser_json``). -The latter was added in IntelMQ 2.3 and is meant to be used together with the Shadowserver API collector. - -**Information** - -* `name:` `intelmq.bots.parsers.shadowserver.parser` (for CSV data) or `intelmq.bots.parsers.shadowserver.parser_json` (for JSON data) -* `public:` yes -* `description:` Parses different reports from Shadowserver. - -**Configuration Parameters** - - * `feedname`: Optional, the Name of the feed, see list below for possible values. - * `overwrite`: If an existing `feed.name` should be overwritten. - -**How this bot works?** - -There are two possibilities for the bot to determine which feed the data belongs to in order to determine the correct mapping of the columns: - -**Automatic feed detection** - -Since IntelMQ version 2.1 the parser can detect the feed based on metadata provided by the collector. - -When processing a report, this bot takes `extra.file_name` from the report and -looks in `config.py` how the report should be parsed. - -If this lookup is not possible, and the feed name is not given as parameter, the feed cannot be parsed. - -The field `extra.file_name` has the following structure: -`%Y-%m-%d-${report_name}[-suffix].csv` where suffix can be something like `country-geo`. For example, some possible filenames are `2019-01-01-scan_http-country-geo.csv` or `2019-01-01-scan_tftp.csv`. The important part is `${report_name}`, between the date and the suffix. -Since version 2.1.2 the date in the filename is optional, so filenames like `scan_tftp.csv` are also detected. - -**Fixed feed name** - -If the method above is not possible and for upgraded instances, the feed can be set with the `feedname` parameter. -Feed-names are derived from the subjects of the Shadowserver E-Mails. -A list of possible feeds can be found in the table below in the column "feed name". - -.. _shadowserver-supported-reports: - -**Supported reports** - -These are the supported feed name and their corresponding file name for automatic detection: - - ======================================= ========================= - feed name file name - ======================================= ========================= - Accessible-ADB `scan_adb` - Accessible-AFP `scan_afp` - Accessible-AMQP `scan_amqp` - Accessible-ARD `scan_ard` - Accessible-Cisco-Smart-Install `cisco_smart_install` - Accessible-CoAP `scan_coap` - Accessible-CWMP `scan_cwmp` - Accessible-MS-RDPEUDP `scan_msrdpeudp` - Accessible-FTP `scan_ftp` - Accessible-Hadoop `scan_hadoop` - Accessible-HTTP `scan_http` - Accessible-Radmin `scan_radmin` - Accessible-RDP `scan_rdp` - Accessible-Rsync `scan_rsync` - Accessible-SMB `scan_smb` - Accessible-Telnet `scan_telnet` - Accessible-Ubiquiti-Discovery-Service `scan_ubiquiti` - Accessible-VNC `scan_vnc` - Blacklisted-IP (deprecated) `blacklist` - Blocklist `blocklist` - Compromised-Website `compromised_website` - Device-Identification IPv4 / IPv6 `device_id`/`device_id6` - DNS-Open-Resolvers `scan_dns` - Honeypot-Amplification-DDoS-Events `event4_honeypot_ddos_amp` - Honeypot-Brute-Force-Events `event4_honeypot_brute_force` - Honeypot-Darknet `event4_honeypot_darknet` - Honeypot-HTTP-Scan `event4_honeypot_http_scan` - HTTP-Scanners `hp_http_scan` - ICS-Scanners `hp_ics_scan` - IP-Spoofer-Events `event4_ip_spoofer` - Microsoft-Sinkhole-Events IPv4 `event4_microsoft_sinkhole` - Microsoft-Sinkhole-Events-HTTP IPv4 `event4_microsoft_sinkhole_http` - NTP-Monitor `scan_ntpmonitor` - NTP-Version `scan_ntp` - Open-Chargen `scan_chargen` - Open-DB2-Discovery-Service `scan_db2` - Open-Elasticsearch `scan_elasticsearch` - Open-IPMI `scan_ipmi` - Open-IPP `scan_ipp` - Open-LDAP `scan_ldap` - Open-LDAP-TCP `scan_ldap_tcp` - Open-mDNS `scan_mdns` - Open-Memcached `scan_memcached` - Open-MongoDB `scan_mongodb` - Open-MQTT `scan_mqtt` - Open-MSSQL `scan_mssql` - Open-NATPMP `scan_nat_pmp` - Open-NetBIOS-Nameservice `scan_netbios` - Open-Netis `netis_router` - Open-Portmapper `scan_portmapper` - Open-QOTD `scan_qotd` - Open-Redis `scan_redis` - Open-SNMP `scan_snmp` - Open-SSDP `scan_ssdp` - Open-TFTP `scan_tftp` - Open-XDMCP `scan_xdmcp` - Outdated-DNSSEC-Key `outdated_dnssec_key` - Outdated-DNSSEC-Key-IPv6 `outdated_dnssec_key_v6` - Sandbox-URL `cwsandbox_url` - Sinkhole-DNS `sinkhole_dns` - Sinkhole-Events `event4_sinkhole`/`event6_sinkhole` - Sinkhole-Events IPv4 `event4_sinkhole` - Sinkhole-Events IPv6 `event6_sinkhole` - Sinkhole-HTTP-Events `event4_sinkhole_http`/`event6_sinkhole_http` - Sinkhole-HTTP-Events IPv4 `event4_sinkhole_http` - Sinkhole-HTTP-Events IPv6 `event6_sinkhole_http` - Sinkhole-Events-HTTP-Referer `event4_sinkhole_http_referer`/`event6_sinkhole_http_referer` - Sinkhole-Events-HTTP-Referer IPv4 `event4_sinkhole_http_referer` - Sinkhole-Events-HTTP-Referer IPv6 `event6_sinkhole_http_referer` - Spam-URL `spam_url` - SSL-FREAK-Vulnerable-Servers `scan_ssl_freak` - SSL-POODLE-Vulnerable-Servers `scan_ssl_poodle`/`scan6_ssl_poodle` - Vulnerable-Exchange-Server `*` `scan_exchange` - Vulnerable-ISAKMP `scan_isakmp` - Vulnerable-HTTP `scan_http` - Vulnerable-SMTP `scan_smtp_vulnerable` - ======================================= ========================= - -`*` This report can also contain data on active webshells (column `tag` is `exchange;webshell`), and are therefore not only vulnerable but also actively infected. - -In addition, the following legacy reports are supported: - - =========================== =================================================== ======================== - feed name successor feed name file name - =========================== =================================================== ======================== - Amplification-DDoS-Victim Honeypot-Amplification-DDoS-Events ``ddos_amplification`` - CAIDA-IP-Spoofer IP-Spoofer-Events ``caida_ip_spoofer`` - Darknet Honeypot-Darknet ``darknet`` - Drone Sinkhole-Events ``botnet_drone`` - Drone-Brute-Force Honeypot-Brute-Force-Events, Sinkhole-HTTP-Events ``drone_brute_force`` - Microsoft-Sinkhole Sinkhole-HTTP-Events ``microsoft_sinkhole`` - Sinkhole-HTTP-Drone Sinkhole-HTTP-Events ``sinkhole_http_drone`` - IPv6-Sinkhole-HTTP-Drone Sinkhole-HTTP-Events ``sinkhole6_http`` - =========================== =================================================== ======================== - -More information on these legacy reports can be found in `Changes in Sinkhole and Honeypot Report Types and Formats `_. - -**Development** - -**Structure of this Parser Bot** - -The parser consists of two files: - * ``_config.py`` - * ``parser.py`` or ``parser_json.py`` - -Both files are required for the parser to work properly. - -**Add new Feedformats** - -Add a new feed format and conversions if required to the file -``_config.py``. Don't forget to update the ``mapping`` dict. -It is required to look up the correct configuration. - -Look at the documentation in the bot's ``_config.py`` file for more information. - - -.. _intelmq.bots.parsers.shodan.parser: - -Shodan -^^^^^^ - -**Information** - -* `name:` intelmq.bots.parsers.shodan.parser -* `public:` yes -* `description:` Parses data from Shodan (search, stream etc). - -The parser is by far not complete as there are a lot of fields in a big nested structure. There is a minimal mode available which only parses the important/most useful fields and also saves everything in `extra.shodan` keeping the original structure. When not using the minimal mode if may be useful to ignore errors as many parsing errors can happen with the incomplete mapping. - -**Configuration Parameters** - -* `ignore_errors`: Boolean (default true) -* `minimal_mode`: Boolean (default false) - - -.. _intelmq.bots.parsers.zoneh.parser: - -ZoneH -^^^^^ - -**Information** - -* `name:` intelmq.bots.parsers.zoneh.parser -* `public:` yes -* `description:` Parses data from ZoneH. - -**Description** -This bot is designed to consume defacement reports from zone-h.org. It expects -fields normally present in CSV files distributed by email. - - -.. _expert bots: - -*********** -Expert Bots -*********** - - -.. _intelmq.bots.experts.abusix.expert: - -Abusix -^^^^^^ - -**Information** - -* `name:` intelmq.bots.experts.abusix.expert -* `lookup:` dns -* `public:` yes -* `cache (redis db):` 5 -* `description:` RIPE abuse contacts resolving through DNS TXT queries -* `notes`: https://abusix.com/contactdb.html - -**Configuration Parameters** - -* **Cache parameters** (see in section :ref:`common-parameters`) - -**Requirements** - -This bot can optionally use the python module *querycontacts* by Abusix itself: -https://pypi.org/project/querycontacts/ - -.. code-block:: bash - - pip3 install querycontacts - -If the package is not installed, our own routines are used. - -.. _intelmq.bots.experts.aggregate.expert: - -Aggregate -^^^^^^^^^ - -**Information** - -* `name:` intelmq.bots.experts.aggregate.expert -* `lookup:` no -* `public:` yes -* `cache (redis db):` 8 -* `description:` Aggregates events based upon given fields & timespan - -**Configuration Parameters** - -* **Cache parameters** (see in section :ref:`common-parameters`) - - * TTL is not used, using it would result in data loss. -* **fields** Given fields which are used to aggregate like `classification.type, classification.identifier` -* **threshold** If the aggregated event is lower than the given threshold after the timespan, the event will get dropped. -* **timespan** Timespan to aggregate events during the given time. I. e. `1 hour` - -**Usage** - -Define specific fields to filter incoming events and aggregate them. -Also set the timespan you want the events to get aggregated. -Usage i. e. `1 hour` - -**Note** - -The "cleanup" procedure, sends out the aggregated events or drops them based upon the given threshold value. -It is called on every incoming message and on the bot's initialization. -If you're potentially running on low traffic ( no incoming events within the given timestamp ) it is recommended to reload or restart the bot -via cronjob each 30 minutes (adapt to your configured timespan). -Otherwise you might loose information. - -I. e.: - -.. code-block:: bash - - crontab -e - - 0,30 * * * * intelmqctl reload my-aggregate-bot - - -For reloading/restarting please check the :doc:`intelmqctl` documentation. - -.. _intelmq.bots.experts.asn_lookup.expert: - -ASN Lookup -^^^^^^^^^^ - -**Information** - -* `name:` `intelmq.bots.experts.asn_lookup.expert` -* `lookup:` local database -* `public:` yes -* `cache (redis db):` none -* `description:` IP to ASN - -**Configuration Parameters** - -* `database`: Path to the downloaded database. - -**Requirements** - - -Install `pyasn` module - -.. code-block:: bash - - pip3 install pyasn - -**Database** - -Use this command to create/update the database and reload the bot: - -.. code-block:: bash - - intelmq.bots.experts.asn_lookup.expert --update-database - -The database is fetched from `routeviews.org `_ and licensed under the Creative Commons Attribution 4.0 International license (see the `routeviews FAQ `_). - - -.. _intelmq.bots.experts.csv_converter.expert: - -CSV Converter -^^^^^^^^^^^^^ - -**Information** - -* `name`: `intelmq.bots.experts.csv_converter.expert` -* `lookup`: no -* `public`: yes -* `cache (redis db)`: none -* `description`: Converts an event to CSV format, saved in the `output` field. - -**Configuration Parameters** - -* `delimiter`: String, default `","` -* `fieldnames`: Comma-separated list of field names, e.g. `"time.source,classification.type,source.ip"` - -**Usage** - -To use the CSV-converted data in an output bot - for example in a file output, -use the configuration parameter `single_key` of the output bot and set it to `output`. - - -.. _intelmq.bots.experts.cymru_whois.expert: - -Cymru Whois -^^^^^^^^^^^ - -**Information** - -* `name:` `intelmq.bots.experts.cymru_whois.expert` -* `lookup:` Cymru DNS -* `public:` yes -* `cache (redis db):` 5 -* `description:` IP to geolocation, ASN, BGP prefix - -Public documentation: https://www.team-cymru.com/IP-ASN-mapping.html#dns - -**Configuration Parameters** - -* **Cache parameters** (see in section :ref:`common-parameters`) -* ``: Overwrite existing fields. Default: `True` if not given (for backwards compatibility, will change in version 3.0.0) - - -.. _intelmq.bots.experts.remove_affix.expert: - -RemoveAffix -^^^^^^^^^^^ - -**Information** - -* `name:` `intelmq.bots.experts.remove_affix.expert` -* `lookup:` none -* `public:` yes -* `cache (redis db):` none -* `description:` Cut string from string - -**Configuration Parameters** - -* `remove_prefix`: True - cut from start, False - cut from end. Default: True -* `affix`: example 'www.' -* `field`: example field 'source.fqdn' - -**Description** -Remove part of string from string, example: `www.` from domains. - - -.. _intelmq.bots.experts.domain_suffix.expert: - -Domain Suffix -^^^^^^^^^^^^^ - -This bots adds the public suffix to the event, derived by a domain. -See or information on the public suffix list: https://publicsuffix.org/list/ -Only rules for ICANN domains are processed. The list can (and should) contain -Unicode data, punycode conversion is done during reading. - -Note that the public suffix is not the same as the top level domain (TLD). E.g. -`co.uk` is a public suffix, but the TLD is `uk`. -Privately registered suffixes (such as `blogspot.co.at`) which are part of the -public suffix list too, are ignored. - -**Information** - -* `name:` `intelmq.bots.experts.domain_suffix.expert` -* `lookup:` no -* `public:` yes -* `cache (redis db):` - -* `description:` extracts the domain suffix from the FQDN - -**Configuration Parameters** - -* `field`: either `"fqdn"` or `"reverse_dns"` -* `suffix_file`: path to the suffix file - -**Rule processing** - -A short summary how the rules are processed: - -The simple ones: - -.. code-block:: - - com - at - gv.at - -`example.com` leads to `com`, `example.gv.at` leads to `gv.at`. - -Wildcards: - -.. code-block:: - - *.example.com - -`www.example.com` leads to `www.example.com`. - -And additionally the exceptions, together with the above wildcard rule: - -.. code-block:: - - !www.example.com - -`www.example.com` does now not lead to `www.example.com`, but to `example.com`. - - -**Database** - -Use this command to create/update the database and reload the bot: - -.. code-block:: bash - - intelmq.bots.experts.domain_suffix.expert --update-database - - -.. _intelmq.bots.experts.domain_valid.expert: - -Domain valid -^^^^^^^^^^^^ - -**Information** - -* `name:` `intelmq.bots.experts.domain_valid.expert` -* `lookup:` no -* `public:` yes -* `cache (redis db):` none -* `description:` Checks if a domain is valid by performing multiple validity checks (see below). - -**Configuration Parameters** - - * `domain_field`: The name of the field to be validated. - * `tlds_domains_list`: local file with all valid TLDs, default location ``/opt/intelmq/var/lib/bots/domain_valid/tlds-alpha-by-domain.txt`` - -**Description** - -If the field given in `domain_field` does not exist in the event, the event is dropped. -If the domain contains underscores (``_``), the event is dropped. -If the domain is not valid according to the `validators library `_, the event is dropped. -If the domain's last part (the TLD) is not in the TLD-list configured by parameter ``tlds_domains_list``, the field is dropped. -Latest TLD list: https://data.iana.org/TLD/ - - -.. _intelmq.bots.experts.deduplicator.expert: - -Deduplicator -^^^^^^^^^^^^ - -**Information** - -* `name:` `intelmq.bots.experts.deduplicator.expert` -* `lookup:` redis cache -* `public:` yes -* `cache (redis db):` 6 -* `description:` Bot responsible for ignore duplicated messages. The bot can be configured to perform deduplication just looking to specific fields on the message. - -**Configuration Parameters** - -* **Cache parameters** (see in section :ref:`common-parameters`) -* `bypass`- true or false value to bypass the deduplicator. When set to true, messages will not be deduplicated. Default: false - -**Parameters for "fine-grained" deduplication** - -* `filter_type`: type of the filtering which can be "blacklist" or "whitelist". The filter type will be used to define how Deduplicator bot will interpret the parameter `filter_keys` in order to decide whether an event has already been seen or not, i.e., duplicated event or a completely new event. - - * "whitelist" configuration: only the keys listed in `filter_keys` will be considered to verify if an event is duplicated or not. - * "blacklist" configuration: all keys except those in `filter_keys` will be considered to verify if an event is duplicated or not. -* `filter_keys`: string with multiple keys separated by comma. Please note that `time.observation` key will not be considered even if defined, because the system always ignore that key. - -When using a whitelist field pattern and a small number of fields (keys), it becomes more important, that these fields exist in the events themselves. -If a field does not exist, but is part of the hashing/deduplication, this field will be ignored. -If such events should not get deduplicated, you need to filter them out before the deduplication process, e.g. using a sieve expert. -See also `this discussion thread `_ on the mailing-list. - -**Parameters Configuration Example** - -*Example 1* - -The bot with this configuration will detect duplication only based on `source.ip` and `destination.ip` keys. - -.. code-block:: yaml - - parameters: - redis_cache_db: 6 - redis_cache_host: "127.0.0.1" - redis_cache_password: null - redis_cache_port: 6379 - redis_cache_ttl: 86400 - filter_type: "whitelist" - filter_keys: "source.ip,destination.ip" - -*Example 2* - -The bot with this configuration will detect duplication based on all keys, except `source.ip` and `destination.ip` keys. - -.. code-block:: yaml - - parameters: - redis_cache_db: 6 - redis_cache_host: "127.0.0.1" - redis_cache_password: null - redis_cache_port: 6379 - redis_cache_ttl: 86400 - filter_type: "blacklist" - filter_keys: "source.ip,destination.ip" - -**Flushing the cache** - -To flush the deduplicator's cache, you can use the `redis-cli` tool. Enter the database used by the bot and submit the `flushdb` command: - -.. code-block:: bash - - redis-cli -n 6 - flushdb - - -.. _intelmq.bots.experts.do_portal.expert: - -DO Portal Expert Bot -^^^^^^^^^^^^^^^^^^^^ - -**Information** - -* `name:` `intelmq.bots.experts.do_portal.expert` -* `lookup:` yes -* `public:` no -* `cache (redis db):` none -* `description:` The DO portal retrieves the contact information from a DO portal instance: http://github.com/certat/do-portal/ - -**Configuration Parameters** - -* `mode` - Either `replace` or `append` the new abuse contacts in case there are existing ones. -* `portal_url` - The URL to the portal, without the API-path. The used URL is `$portal_url + '/api/1.0/ripe/contact?cidr=%s'`. -* `portal_api_key` - The API key of the user to be used. Must have sufficient privileges. - - -.. _intelmq.bots.experts.field_reducer.expert: - -Field Reducer Bot -^^^^^^^^^^^^^^^^^ - -**Information** - -* `name:` `intelmq.bots.experts.field_reducer.expert` -* `lookup:` none -* `public:` yes -* `cache (redis db):` none -* `description:` The field reducer bot is capable of removing fields from events. - -**Configuration Parameters** - -* `type` - either `"whitelist"` or `"blacklist"` -* `keys` - Can be a JSON-list of field names (`["raw", "source.account"]`) or a string with a comma-separated list of field names (`"raw,source.account"`). - -**Whitelist** - -Only the fields in `keys` will passed along. - -**Blacklist** - -The fields in `keys` will be removed from events. - - -.. _intelmq.bots.experts.filter.expert: - -Filter -^^^^^^ - -The filter bot is capable of filtering specific events. - -**Information** - -* `name:` `intelmq.bots.experts.filter.expert` -* `lookup:` none -* `public:` yes -* `cache (redis db):` none -* `description:` A simple filter for messages (drop or pass) based on a exact string comparison or regular expression - -**Configuration Parameters** - -*Parameters for filtering with key/value attributes* - -* ``filter_key`` - key from data format -* ``filter_value`` - value for the key -* ``filter_action`` - action when a message match to the criteria (possible actions: keep/drop) -* ``filter_regex`` - attribute determines if the ``filter_value`` shall be treated as regular expression or not. - If this attribute is not empty (can be ``true``, ``yes`` or whatever), the bot uses python's ```re.search`` `_ function to evaluate the filter with regular expressions. - If this attribute is empty or evaluates to false, an exact string comparison is performed. A check on string *inequality* can be achieved with the usage of *Paths* described below. - -*Parameters for time based filtering* - -* `not_before` - events before this time will be dropped -* `not_after` - events after this time will be dropped - -Both parameters accept string values describing absolute or relative time: - -* absolute - - * basically anything parseable by datetime parser, eg. "2015-09-12T06:22:11+00:00" - * `time.source` taken from the event will be compared to this value to decide the filter behavior - -* relative - - * accepted string formatted like this " ", where epoch could be any of following strings (could optionally end with trailing 's'): hour, day, week, month, year - * time.source taken from the event will be compared to the value (now - relative) to decide the filter behavior - -*Examples of time filter definition* - -* ```"not_before" : "2015-09-12T06:22:11+00:00"``` events older than the specified time will be dropped -* ```"not_after" : "6 months"``` just events older than 6 months will be passed through the pipeline - -**Possible paths** - - * `_default`: default path, according to the configuration - * `action_other`: Negation of the default path - * `filter_match`: For all events the filter matched on - * `filter_no_match`: For all events the filter does not match - - ======= ====== ============ ============== ============== ================= - action match `_default` `action_other` `filter_match` `filter_no_match` - ======= ====== ============ ============== ============== ================= - keep ✓ ✓ ✗ ✓ ✗ - keep ✗ ✗ ✓ ✗ ✓ - drop ✓ ✗ ✓ ✓ ✗ - drop ✗ ✓ ✗ ✗ ✓ - ======= ====== ============ ============== ============== ================= - -In `DEBUG` logging level, one can see that the message is sent to both matching paths, also if one of the paths is not configured. Of course the message is only delivered to the configured paths. - - -.. _intelmq.bots.experts.format_field.expert: - -Format Field -^^^^^^^^^^^^ - -**Information** - -* `name:` `intelmq.bots.experts.format_field.expert` -* `lookup:` none -* `cache (redis db):` none -* `description:` String method operations on column values - -**Configuration Parameters** - -*Parameters for stripping chars* - -* `strip_columns` - A list of strings or a string of comma-separated values with field names. The names must match the IntelMQ Data Format field names. E.g. - - .. code-block:: json - - "columns": [ - "malware.name", - "extra.tags" - ], - - is equivalent to: - - .. code-block:: json - - "columns": "malware.name,extra.tags" - -* `strip_chars` - a set of characters to remove as leading/trailing characters(default: space) - -*Parameters for replacing chars* - -* `replace_column` - key from data format -* `old_value` - the string to search for -* `new_value` - the string to replace the old value with -* `replace_count` - number specifying how many occurrences of the old value you want to replace(default: `1`) - -*Parameters for splitting string to list of string* - -* `split_column` - key from data format -* `split_separator` - specifies the separator to use when splitting the string(default: `,`) - -Order of operation: `strip -> replace -> split`. These three methods can be combined such as first strip and then split. - - -.. _intelmq.bots.experts.generic_db_lookup.expert: - -Generic DB Lookup -^^^^^^^^^^^^^^^^^ - -This bot is capable for enriching intelmq events by lookups to a database. -Currently only PostgreSQL and SQLite are supported. - -If more than one result is returned, a ValueError is raised. - -**Information** - -* `name:` `intelmq.bots.experts.generic_db_lookup.expert` -* `lookup:` database -* `public:` yes -* `cache (redis db):` none -* `description:` This bot is capable for enriching intelmq events by lookups to a database. - -**Configuration Parameters** - -*Connection* - -* `engine`: `postgresql` or `sqlite` -* `database`: string, defaults to "intelmq", database name or the SQLite filename -* `table`: defaults to "contacts" - -*PostgreSQL specific* - -* `host`: string, defaults to "localhost" -* `password`: string -* `port`: integer, defaults to 5432 -* `sslmode`: string, defaults to "require" -* `user`: defaults to "intelmq" - -*Lookup* - -* `match_fields`: defaults to `{"source.asn": "asn"}` - -The value is a key-value mapping an arbitrary number **intelmq** field names **to table** column names. -The values are compared with `=` only. - -*Replace fields* - -* `overwrite`: defaults to `false`. Is applied per field -* `replace_fields`: defaults to `{"contact": "source.abuse_contact"}` - -`replace_fields` is again a key-value mapping an arbitrary number of **table** column names **to intelmq** field names - - -.. _intelmq.bots.experts.gethostbyname.expert: - -Gethostbyname -^^^^^^^^^^^^^ - -**Information** - -* `name:` `intelmq.bots.experts.gethostbyname.expert` -* `lookup:` DNS -* `public:` yes -* `cache (redis db):` none -* `description:` DNS name (FQDN) to IP - -**Configuration Parameters** - -- `fallback_to_url` If True and no `source.fqdn` present, use `source.url` instead while producing `source.ip` -- `gaierrors_to_ignore`: Optional, list (comma-separated) of gaierror codes to ignore, e.g. `-3` for EAI_AGAIN (Temporary failure in name resolution). Only accepts the integer values, not the names. -- `overwrite`: Boolean. If true, overwrite existing IP addresses. Default: False. - -**Description** - -Resolves the `source/destination.fqdn` hostname using the `gethostbyname` syscall and saves the resulting IP address as `source/destination.ip`. -The following gaierror resolution errors are ignored and treated as if the hostname cannot be resolved: - -- `-2`/`EAI_NONAME`: NAME or SERVICE is unknown -- `-4`/`EAI_FAIL`: Non-recoverable failure in name res. -- `-5`/`EAI_NODATA`: No address associated with NAME. -- `-8`/`EAI_SERVICE`: SERVICE not supported for `ai_socktype'. -- `-11`/`EAI_SYSTEM`: System error returned in `errno'. - -Other errors result in an exception if not ignored by the parameter `gaierrors_to_ignore` (see above). -All gaierrors can be found here: http://www.castaglia.org/proftpd/doc/devel-guide/src/lib/glibc-gai_strerror.c.html - - -.. _intelmq.bots.experts.http.expert_status: - -HTTP Status -^^^^^^^^^^^ - -Fetches the HTTP Status for a given URI - -**Information** - -* `name:` intelmq.bots.experts.http.expert_status -* `description:` The bot fetches the HTTP status for a given URL and saves it in the event. - -**Configuration Parameters** - -* `field:` The name of the field containing the URL to be checked (required). -* `success_status_codes:` A list of success status codes. If this parameter is omitted or the list is empty, successful status codes are the ones between 200 and 400. -* `overwrite:` Specifies if an existing 'status' value should be overwritten. - - -.. _intelmq.bots.experts.http.expert_content: - -HTTP Content -^^^^^^^^^^^^ - -Fetches an HTTP resource and checks if it contains a specific string. - -**Information** - -* `name:` intelmq.bots.experts.http.expert_content -* `description:` The bot fetches an HTTP resource and checks if it contains a specific string. - -**Configuration Parameters** - -* `field:` The name of the field containing the URL to be checked (defaults to `source.url`) -* `needle:` The string that the content available on URL is checked for -* `overwrite:` A boolean value that specifies if an existing 'status' value should be overwritten. - - -.. _intelmq.bots.experts.idea.expert: - -IDEA Converter -^^^^^^^^^^^^^^ - -Converts the event to IDEA format and saves it as JSON in the field `output`. All other fields are not modified. - -Documentation about IDEA: https://idea.cesnet.cz/en/index - -**Information** - -* `name:` intelmq.bots.experts.idea.expert -* `lookup:` no -* `public:` yes -* `cache (redis db):` none -* `description:` The bot does a best effort translation of events into the IDEA format. - -**Configuration Parameters** - -* `test_mode`: add `Test` category to mark all outgoing IDEA events as informal (meant to simplify setting up and debugging new IDEA producers) (default: `true`) - - -.. _intelmq.bots.experts.jinja.expert: - -Jinja2 Template Expert -^^^^^^^^^^^^^^^^^^^^^^ - -This bot lets you modify the content of your IntelMQ message fields using Jinja2 templates. - -Documentation about Jinja2 templating language: https://jinja.palletsprojects.com/ - -**Information** - -* `name:` intelmq.bots.experts.jinja.expert -* `description:` Modify the content of IntelMQ messages using jinja2 templates - -**Configuration Parameters** - -* `fields`: a dict containing as key the name of the field where the result of the Jinja2 template should be written to and as value either a Jinja2 template or a filepath to a Jinja2 template file (starting with ``file:///``). Because the experts decides if it is a filepath based on the value starting with ``file:///`` it is not possible to simply write values starting with ``file:///`` to fields. - The object containing the existing message will be passed to the Jinja2 template with the name ``msg``. - - .. code-block:: yaml - - fields: - output: The provider is {{ msg['feed.provider'] }}! - feed.url: "{{ msg['feed.url'] | upper }}" - extra.somejinjaoutput: file:///etc/intelmq/somejinjatemplate.j2 - - -.. _intelmq.bots.experts.lookyloo.expert: - -Lookyloo -^^^^^^^^ - -Lookyloo is a website screenshotting and analysis tool. For more information and installation instructions visit https://www.lookyloo.eu/ - -The bot sends a request for `source.url` to the configured Lookyloo instance and saves the retrieved website screenshot link in the field `screenshot_url`. Lookyloo only *queues* the website for screenshotting, therefore the screenshot may not be directly ready after the bot requested it. -The `pylookyloo` library is required for this bot. -The `http_user_agent` parameter is passed on, but not other HTTP-related parameter like proxies. - -Events without `source.url` are ignored. - -**Information** - -* `name:` intelmq.bots.experts.lookyloo.expert -* `description:` LookyLoo expert bot for automated website screenshots - -**Configuration Parameters** - -* `instance_url`: LookyLoo instance to connect to - - -.. _intelmq.bots.experts.maxmind_geoip.expert: - -MaxMind GeoIP -^^^^^^^^^^^^^ - -**Information** - -* `name:` intelmq.bots.experts.maxmind_geoip.expert -* `lookup:` local database -* `public:` yes -* `cache (redis db):` none -* `description:` IP to geolocation - -**Setup** - -The bot requires the MaxMind's `geoip2` Python library, version 2.2.0 has been tested. - -To download the database a free license key is required. More information can be found at https://blog.maxmind.com/2019/12/18/significant-changes-to-accessing-and-using-geolite2-databases/ - -**Configuration Parameters** - -* `database`: Path to the local database, e.g. `"/opt/intelmq/var/lib/bots/maxmind_geoip/GeoLite2-City.mmdb"` -* `overwrite`: boolean -* `use_registered`: boolean. MaxMind has two country ISO codes: One for the physical location of the address and one for the registered location. Default is `false` (backwards-compatibility). See also https://github.com/certtools/intelmq/pull/1344 for a short explanation. -* `license_key`: License key is necessary for downloading the GeoLite2 database. - -**Database** - -Use this command to create/update the database and reload the bot: - -.. code-block:: bash - - intelmq.bots.experts.maxmind_geoip.expert --update-database - - -.. _intelmq.bots.experts.misp.expert: - -MISP -^^^^ - -Queries a MISP instance for the `source.ip` and adds the MISP Attribute UUID and MISP Event ID of the newest attribute found. - -**Information** - -* `name:` intelmq.bots.experts.misp.expert -* `lookup:` yes -* `public:` no -* `cache (redis db):` none -* `description:` IP address to MISP attribute and event - -**Configuration Parameters** - -* `misp_key`: MISP Authkey -* `misp_url`: URL of MISP server (with trailing '/') - -Generic parameters used in this bot: - -* `http_verify_cert`: Verify the TLS certificate of the server, boolean (default: `true`) - - -.. _intelmq.bots.experts.mcafee.expert_mar: - -McAfee Active Response lookup -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -**Information** - -* `name:` intelmq.bots.experts.mcafee.expert_mar -* `lookup:` yes -* `public:` no -* `cache (redis db):` none -* `description:` Queries DXL bus for hashes, IP addresses or FQDNs. - -**Configuration Parameters** - -* `dxl_config_file`: location of file containing required information to connect to DXL bus -* `lookup_type`: One of: - - `Hash`: looks up `malware.hash.md5`, `malware.hash.sha1` and `malware.hash.sha256` - - `DestSocket`: looks up `destination.ip` and `destination.port` - - `DestIP`: looks up `destination.ip` - - `DestFQDN`: looks up in `destination.fqdn` - - -.. _intelmq.bots.experts.modify.expert: - -Modify -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -**Information** - -* `name:` `intelmq.bots.experts.modify.expert` -* `lookup:` local config -* `public:` yes -* `cache (redis db):` none -* `description:` modify expert bot allows you to change arbitrary field values of events just using a configuration file - -**Configuration Parameters** - -* `configuration_path`: filename -* `case_sensitive`: boolean, default: true -* `maximum_matches`: Maximum number of matches. Processing stops after the limit is reached. Default: no limit (`null`, `0`). -* `overwrite`: Overwrite any existing fields by matching rules. Default if the parameter is given: `true`, for backwards compatibility. Default will change to `false` in version 3.0.0. - -**Configuration File** - -The modify expert bot allows you to change arbitrary field values of events just using a configuration file. Thus it is possible to adapt certain values or adding new ones only by changing JSON-files without touching the code of many other bots. - -The configuration is called `modify.conf` and looks like this: - -.. code-block:: json - - [ - { - "rulename": "Standard Protocols http", - "if": { - "source.port": "^(80|443)$" - }, - "then": { - "protocol.application": "http" - } - }, - { - "rulename": "Spamhaus Cert conficker", - "if": { - "malware.name": "^conficker(ab)?$" - }, - "then": { - "classification.identifier": "conficker" - } - }, - { - "rulename": "bitdefender", - "if": { - "malware.name": "bitdefender-(.*)$" - }, - "then": { - "malware.name": "{matches[malware.name][1]}" - } - }, - { - "rulename": "urlzone", - "if": { - "malware.name": "^urlzone2?$" - }, - "then": { - "classification.identifier": "urlzone" - } - }, - { - "rulename": "default", - "if": { - "feed.name": "^Spamhaus Cert$" - }, - "then": { - "classification.identifier": "{msg[malware.name]}" - } - } - ] - -In our example above we have five groups labeled `Standard Protocols http`, -`Spamhaus Cert conficker`, `bitdefender`, `urlzone` and `default`. -All sections will be considered, in the given order (from top to bottom). - -Each rule consists of *conditions* and *actions*. -Conditions and actions are dictionaries holding the field names of events -and regular expressions to match values (selection) or set values (action). -All matching rules will be applied in the given order. -The actions are only performed if all selections apply. - -If the value for a condition is an empty string, the bot checks if the field does not exist. -This is useful to apply default values for empty fields. - - -**Actions** - -You can set the value of the field to a string literal or number. - -In addition you can use the `standard Python string format syntax `_ -to access the values from the processed event as `msg` and the match groups -of the conditions as `matches`, see the bitdefender example above. -Group 0 (`[0]`) contains the full matching string. See also the documentation on `re.Match.group `_. - -Note that `matches` will also contain the match groups -from the default conditions if there were any. - -**Examples** - -We have an event with `feed.name = Spamhaus Cert` and `malware.name = confickerab`. The expert loops over all sections in the file and eventually enters section `Spamhaus Cert`. First, the default condition is checked, it matches! OK, going on. Otherwise the expert would have selected a different section that has not yet been considered. Now, go through the rules, until we hit the rule `conficker`. We combine the conditions of this rule with the default conditions, and both rules match! So we can apply the action: `classification.identifier` is set to `conficker`, the trivial name. - -Assume we have an event with `feed.name = Spamhaus Cert` and `malware.name = feodo`. The default condition matches, but no others. So the default action is applied. The value for `classification.identifier` will be set to `feodo` by `{msg[malware.name]}`. - -**Types** - -If the rule is a string, a regular expression search is performed, also for numeric values (`str()` is called on them). If the rule is numeric for numeric values, a simple comparison is done. If other types are mixed, a warning will be thrown. - -For boolean values, the comparison value needs to be `true` or `false` as in JSON they are written all-lowercase. - - -.. _intelmq.bots.experts.national_cert_contact_certat.expert: - -National CERT contact lookup by CERT.AT -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -**Information** - -* `name:` `intelmq.bots.experts.national_cert_contact_certat.expert` -* `lookup:` https -* `public:` yes -* `cache (redis db):` none -* `description:` https://contacts.cert.at offers an IP address to national CERT contact (and cc) mapping. See https://contacts.cert.at for more info. - -**Configuration Parameters** - -* `filter`: (true/false) act as a filter for AT. -* `overwrite_cc`: set to true if you want to overwrite any potentially existing cc fields in the event. - - -.. _intelmq.bots.experts.rdap.expert: - -RDAP -^^^^ - -**Information** - -* `name:` `intelmq.bots.experts.rdap.expert` -* `lookup:` http/https -* `public:` yes/no -* `cache (redis db):` 5 -* `description:` Asks rdap servers for a given domain. - -**Configuration Parameters** - -* ``rdap_order``: a list of strings, default ``['abuse', 'technical']``. Search order of contacts with these roles. -* ``rdap_bootstrapped_servers``: Customized RDAP servers. Do not forget the trailing slash. For example: - -.. code-block:: bash - - { - "at": { - "url": "rdap.server.at/v1/, - "auth": { - "type": "jwt", - "token": "ey..." - } - }, - "de": "rdap.service:1337/v1/" - } - - -.. _intelmq.bots.experts.recordedfuture_iprisk.expert: - -RecordedFuture IP risk -^^^^^^^^^^^^^^^^^^^^^^ - -This Bot tags events with score found in recorded futures large IP risklist. - -**Information** - -* `name:` `intelmq.bots.experts.recordedfuture_iprisk.expert` -* `lookup:` local database -* `public:` no -* `cache (redis db):` none -* `description:` Record risk score associated to source and destination IP if they are present. Assigns 0 to IP addresses not in the RF list. - -**Configuration Parameters** - -* `database`: Location of csv file obtained from recorded future API (a script is provided to download the large IP set) -* `overwrite`: set to true if you want to overwrite any potentially existing risk score fields in the event. -* `api_token`: This needs to contain valid API token to download the latest database data. - -**Description** - -For both `source.ip` and `destination.ip` the corresponding risk score is fetched from a local database created from Recorded Future's API. The score is recorded in `extra.rf_iprisk.source` and `extra.rf_iprisk.destination`. If a lookup for an IP fails a score of 0 is recorded. - -See https://www.recordedfuture.com/products/api/ and speak with your recorded future representative for more information. - - -The list is obtained from recorded future API and needs a valid API TOKEN -The large list contains all IP's with a risk score of 25 or more. -If IP's are not present in the database a risk score of 0 is given - -A script is supplied that may be run as intelmq to update the database. - -**Database** - -Use this command to create/update the database and reload the bot: - -.. code-block:: bash - - intelmq.bots.experts.recordedfuture_iprisk.expert --update-database - - -.. _intelmq.bots.experts.reverse_dns.expert: - -Reverse DNS -^^^^^^^^^^^ - -For both `source.ip` and `destination.ip` the PTR record is fetched and the first valid result is used for `source.reverse_dns`/`destination.reverse_dns`. - -**Information** - -* `name:` `intelmq.bots.experts.reverse_dns.expert` -* `lookup:` DNS -* `public:` yes -* `cache (redis db):` 8 -* `description:` IP to domain - -**Configuration Parameters** - -* **Cache parameters** (see in section :ref:`common-parameters`) -* `cache_ttl_invalid_response`: The TTL for cached invalid responses. -* `overwrite`: Overwrite existing fields. Default: `True` if not given (for backwards compatibility, will change in version 3.0.0) - - -.. _intelmq.bots.experts.rfc1918.expert: - -RFC1918 -^^^^^^^ - -Several RFCs define ASNs, IP Addresses and Hostnames (and TLDs) reserved for *documentation*. -Events or fields of events can be dropped if they match the criteria of either being reserved for documentation (e.g. AS 64496, Domain `example.com`) -or belonging to a local area network (e.g. `192.168.0.0/24`). These checks can applied to URLs, IP Addresses, FQDNs and ASNs. - -It is configurable if the whole event should be dropped ("policies") or just the field removed, as well as which fields should be checked. - -Sources: - -* :rfc:`1918` -* :rfc:`2606` -* :rfc:`3849` -* :rfc:`4291` -* :rfc:`5737` -* https://en.wikipedia.org/wiki/IPv4 -* https://en.wikipedia.org/wiki/Autonomous\_system\_(Internet) - -**Information** - -* `name:` `intelmq.bots.experts.rfc1918.expert` -* `lookup:` none -* `public:` yes -* `cache (redis db):` none -* `description:` removes events or single fields with invalid data - -**Configuration Parameters** - -* `fields`: string, comma-separated list of fields e.g. `destination.ip,source.asn,source.url`. Supported fields are: - - * `destination.asn` & `source.asn` - * `destination.fqdn` & `source.fqdn` - * `destination.ip` & `source.ip` - * `destination.url` & `source.url` -* `policy`: string, comma-separated list of policies, e.g. `del,drop,drop`. `drop` will cause that the the entire event to be removed if the field is , `del` causes the field to be removed. - -With the example parameter values given above, this means that: - -* If a `destination.ip` value is part of a reserved network block, the field will be removed (policy "del"). -* If a `source.asn` value is in the range of reserved AS numbers, the event will be removed altogether (policy "drop). -* If a `source.url` value contains a host with either an IP address part of a reserved network block, or a reserved domain name (or with a reserved TLD), the event will be dropped (policy "drop") - - -.. _intelmq.bots.experts.ripe.expert: - -RIPE -^^^^ - -Online RIPE Abuse Contact and Geolocation Finder for IP addresses and Autonomous Systems. - -**Information** - -* `name:` `intelmq.bots.experts.ripe.expert` -* `lookup:` HTTPS API -* `public:` yes -* `cache (redis db):` 10 -* `description:` IP to abuse contact - -**Configuration Parameters** - -* **Cache parameters** (see section :ref:`common-parameters`) -* `mode`: either `append` (default) or `replace` -* `query_ripe_db_asn`: Query for IPs at `http://rest.db.ripe.net/abuse-contact/%s.json`, default `true` -* `query_ripe_db_ip`: Query for ASNs at `http://rest.db.ripe.net/abuse-contact/as%s.json`, default `true` -* `query_ripe_stat_asn`: Query for ASNs at `https://stat.ripe.net/data/abuse-contact-finder/data.json?resource=%s`, default `true` -* `query_ripe_stat_ip`: Query for IPs at `https://stat.ripe.net/data/abuse-contact-finder/data.json?resource=%s`, default `true` -* `query_ripe_stat_geolocation`: Query for IPs at `https://stat.ripe.net/data/maxmind-geo-lite/data.json?resource=%s`, default `true` - - -.. _intelmq.bots.experts.sieve.expert: - -Sieve -^^^^^ - -**Information** - -* `name:` `intelmq.bots.experts.sieve.expert` -* `lookup:` none -* `public:` yes -* `cache (redis db):` none -* `description:` Filtering with a sieve-based configuration language - -**Configuration Parameters** - -* `file`: Path to sieve file. Syntax can be validated with `intelmq_sieve_expert_validator`. - - -**Description** - -The sieve bot is used to filter and/or modify events based on a set of rules. The -rules are specified in an external configuration file and with a syntax *similar* -to the `Sieve language `_ used for mail filtering. - -Each rule defines a set of matching conditions on received events. Events can be -matched based on keys and values in the event. Conditions can be combined using -parenthesis and the boolean operators ``&&`` and ``||``. If the processed event -matches a rule's conditions, the corresponding actions are performed. Actions -can specify whether the event should be kept or dropped in the pipeline -(filtering actions) or if keys and values should be changed (modification -actions). - -**Requirements** - -To use this bot, you need to install the required dependencies: - -.. code-block:: bash - - pip3 install -r intelmq/bots/experts/sieve/REQUIREMENTS.txt - -**Examples** - -The following excerpts illustrate some of the basic features of the sieve file -format: - -.. code-block:: - - if :exists source.fqdn { - keep // aborts processing of subsequent rules and forwards the event. - } - - - if :notexists source.abuse_contact || source.abuse_contact =~ '.*@example.com' { - drop // aborts processing of subsequent rules and drops the event. - } - - if source.ip << '192.0.0.0/24' { - add! comment = 'bogon' // sets the field comment to this value and overwrites existing values - path 'other-path' // the message is sent to the given path - } - - if classification.type :in ['phishing', 'malware-distribution'] && source.fqdn =~ '.*\.(ch|li)$' { - add! comment = 'domainabuse' - keep - } elif classification.type == 'scanner' { - add! comment = 'ignore' - drop - } else { - remove comment - } - - -**Reference** - -*Sieve File Structure* - -The sieve file contains an arbitrary number of rules of the form: - -.. code-block:: - - if EXPRESSION { - ACTIONS - } elif EXPRESSION { - ACTIONS - } else { - ACTIONS - } - - -Nested if-statements and mixed if statements and rules in the same scope are possible. - -*Expressions* - -Each rule specifies on or more expressions to match an event based on its keys -and values. Event keys are specified as strings without quotes. String values -must be enclosed in single quotes. Numeric values can be specified as integers -or floats and are unquoted. IP addresses and network ranges (IPv4 and IPv6) are -specified with quotes. List values for use with list/set operators are specified -as string, float, int, bool and string literals separated by commas and enclosed -in square brackets. -Expression statements can be combined and chained using -parentheses and the boolean operators ``&&`` and ``||``. -The following operators may be used to match events: - - * `:exists` and `:notexists` match if a given key exists, for example: - - ``if :exists source.fqdn { ... }`` - - * `==` and `!=` match for equality of strings, numbers, and booleans, for example: - - ``if feed.name != 'acme-security' || feed.accuracy == 100 || extra.false_positive == false { ... }`` - - * `:contains` matches on substrings. - - * `=~` matches strings based on the given regular expression. `!~` is the inverse regular expression match. - - * Numerical comparisons are evaluated with `<`, `<=`, `>`, `>=`. - - * `<<` matches if an IP address is contained in the specified network range: - - ``if source.ip << '10.0.0.0/8' { ... }`` - - * String values to match against can also be specified as lists of strings, which have separate operators. For example: - - ``if source.ip :in ['8.8.8.8', '8.8.4.4'] { ... }`` - - In this case, the event will match if it contains a key `source.ip` with - either value `8.8.8.8` or `8.8.4.4`. - - There are also `:containsany` to match at least one of a list of substrings, and `:regexin` to match at least one of - a list of regular expressions, similar to the `:contains` and `=~` operators. - - * Lists of numeric values support `:in` to check for inclusion in a list of numbers: - - ``if source.port :in [80, 443] { ... }`` - - * `:equals` tests for equality between lists, including order. Example for checking a hostname-port pair: - ``if extra.host_tuple :equals ['dns.google', 53] { ... }`` - * `:setequals` tests for set-based equality (ignoring duplicates and value order) between a list of given values. Example for checking for the first nameserver of two domains, regardless of the order they are given in the list: - ``if extra.hostnames :setequals ['ns1.example.com', 'ns1.example.mx'] { ... }`` - - * `:overlaps` tests if there is at least one element in common between the list specified by a key and a list of values. Example for checking if at least one of the ICS, database or vulnerable tags is given: - ``if extra.tags :overlaps ['ics', 'database', 'vulnerable'] { ... } `` - - * `:subsetof` tests if the list of values from the given key only contains values from a set of values specified as the argument. Example for checking for a host that has only ns1.example.com and/or ns2.[...] as its apparent hostname: - ``if extra.hostnames :subsetof ['ns1.example.com', 'ns2.example.com'] { ... }`` - - * `:supersetof` tests if the list of values from the given key is a superset of the values specified as the argument. Example for matching hosts with at least the IoT and vulnerable tags: - ``if extra.tags :supersetof ['iot', 'vulnerable'] { ... }`` - - * `:before` tests if the date value occurred before given time ago. The time might be absolute (basically anything parseable by pendulum parser, eg. “2015-09-12T06:22:11+00:00”) or relative (accepted string formatted like this “ ”, where epoch could be any of following strings (could optionally end with trailing ‘s’): hour, day, week, month, year) - ``if time.observation :before '1 week' { ... }`` - - * `:after` tests if the date value occurred after given time ago; see `:before` - ``if time.observation :after '2015-09-12' { ... } # happened after midnight the 12th Sep`` - - * Boolean values can be matched with `==` or `!=` followed by `true` or `false`. Example: - ``if extra.has_known_vulns == true { ... }`` - - * The combination of multiple expressions can be done using parenthesis and boolean operators: - - ``if (source.ip == '127.0.0.1') && (comment == 'add field' || classification.taxonomy == 'vulnerable') { ... }`` - - * Any single expression or a parenthesised group of expressions can be negated using `!`: - - ``if ! source.ip :contains '127.0.0.' || ! ( source.ip == '172.16.0.5' && source.port == 25 ) { ... }`` - - * Note: Since 3.0.0, list-based operators are used on list values, such as `foo :in [1, 2, 3]` instead of `foo == [1, 2, 3]` - and `foo :regexin ['.mx', '.zz']` rather than `foo =~ ['.mx', '.zz']`, and similarly for `:containsany` vs `:contains`. - Besides that, ``:notcontains` has been removed, with e.g `foo :notcontains ['.mx', '.zz']` now being represented using negation - as `! foo :contains ['.mx', '.zz']`. - -*Actions* - -If part of a rule matches the given conditions, the actions enclosed in `{` and -`}` are applied. By default, all events that are matched or not matched by rules -in the sieve file will be forwarded to the next bot in the pipeline, unless the -`drop` action is applied. - - * `add` adds a key value pair to the event. It can be a string, number, or boolean. This action only applies if the key is not yet defined in the event. If the key is already defined, the action is ignored. Example: - - ``add comment = 'hello, world'`` - - Some basic mathematical expressions are possible, but currently support only relative time specifications objects are supported. - For example: - ```add time.observation += '1 hour'``` - ```add time.observation -= '10 hours'``` - - * `add!` same as above, but will force overwrite the key in the event. - - * `update` modifies an existing value for a key. Only applies if the key is already defined. If the key is not defined in the event, this action is ignored. This supports mathematical expressions like above. Example: - - ``update feed.accuracy = 50`` - - Some basic mathematical expressions are possible, but currently support only relative time specifications objects are supported. - For example: - ```update time.observation += '1 hour'``` - ```update time.observation -= '10 hours'``` - - * `remove` removes a key/value from the event. Action is ignored if the key is not defined in the event. Example: - - ``remove extra.comments`` - - * `keep` sends the message to the next bot in the pipeline (same as the default behaviour), and stops sieve file processing. - - ``keep`` - - * `path` sets the path (named queue) the message should be sent to (implicitly - or with the command `keep`. The named queue needs to configured in the - pipeline, see the User Guide for more information. - - ``path 'named-queue'`` - - You can as well set multiple destination paths with the same syntax as for value lists: - - ``path ['one', 'two']`` - - This will result in two identical message, one sent to the path `one` and the other sent to the path `two`. - - If the path is not configured, the error looks like: - - ``` - File "/path/to/intelmq/intelmq/lib/pipeline.py", line 353, in send - for destination_queue in self.destination_queues[path]: - KeyError: 'one' - ``` - - * `drop` marks the event to be dropped. The event will not be forwarded to the next bot in the pipeline. The sieve file processing is interrupted upon - reaching this action. No other actions may be specified besides the `drop` action within `{` and `}`. - - -*Comments* - -Comments may be used in the sieve file: all characters after `//` and until the end of the line will be ignored. - - -*Validating a sieve file* - -Use the following command to validate your sieve files: - -.. code-block:: bash - - $ intelmq.bots.experts.sieve.validator - usage: intelmq.bots.experts.sieve.validator [-h] sievefile - - Validates the syntax of sievebot files. - - positional arguments: - sievefile Sieve file - - optional arguments: - -h, --help show this help message and exit - - -.. _intelmq.bots.experts.splunk_saved_search.expert: - -Splunk saved search -^^^^^^^^^^^^^^^^^^^ - -**Information** - -* `name`: `intelmq.bots.experts.splunk_saved_search.expert` -* `lookup`: splunk database -* `public`: no -* `cache (redis db)`: none -* `description`: Enrich an event from Splunk search results. - -**Configuration Parameters** - -* **HTTP parameters** (see above) -* `auth_token`: String, Splunk API authentication token -* `url`: String, base URL of the Splunk REST API -* `retry_interval`: Integer, optional, default 5, number of seconds to wait between polling for search results to be available -* `saved_search`: String, name of Splunk saved search to run -* `search_parameters`: Array of string->string, optional, default ``{}``, IntelMQ event fields containing the data to search for, mapped to parameters of the Splunk saved search. Example: - - .. code-block:: json - - "search_parameters": { - "source.ip": "ip" - } - -* `result_fields`: Array of string->string, optional, default ``{}``, Splunk search result fields mapped to IntelMQ event fields to store the results in. Example: - - .. code-block:: json - - "result_fields": { - "username": "source.account" - } - -* `not_found`: List of strings, default ``[ "warn", "send" ]``, what to do if the search returns zero results. All specified actions are performed. Valid values are: - - * `warn`: log a warning message - * `send`: send the event on unmodified - * `drop`: drop the message - - * `send` and `drop` are mutually exclusive - -* `multiple_result_handling`: List of strings, default ``[ "warn", "use_first", "send" ]``, what to do if the search returns more than one result. All specified actions are performed. Valid values are: - - * `limit`: limit the search so that duplicates are impossible - * `warn`: log a warning message - * `use_first`: use the first search result - * `ignore`: do not modify the event - * `send`: send the event on - * `drop`: drop the message - - * `limit` cannot be combined with any other value - * `send` and `drop` are mutually exclusive - * `ignore` and `use_first` are mutually exclusive - -* `overwrite`: Boolean or null, optional, default null, whether search results overwrite values already in the message or not. If null, attempting to add a field that already exists throws an exception. - -**Description** - -Runs a saved search in Splunk using fields in an event, adding fields from the search result into the event. - -Splunk documentation on saved searches: https://docs.splunk.com/Documentation/Splunk/latest/Report/Createandeditreports - -The saved search should take parameters according to the `search_parameters` configuration and deliver results according to `result_fields`. The examples above match a saved search of this format: - -:: - - index="dhcp" ipv4address="$ip$" | ... | fields _time username ether - -The time window used is the one saved with the search. - -Waits for Splunk to return an answer for each message, so slow searches will delay the entire botnet. If you anticipate a load of more than one search every few seconds, consider running multiple load-balanced copies of this bot. - - -.. _intelmq.bots.experts.taxonomy.expert: - -Taxonomy -^^^^^^^^ - -**Information** - -* `name:` `intelmq.bots.experts.taxonomy.expert` -* `lookup:` no -* `public:` yes -* `cache (redis db):` none -* `description:` Adds the `classification.taxonomy` field according to the RSIT taxonomy. - -Please note that there is a :issue:`slight mismatch of IntelMQ's taxonomy to the upstream taxonomy <1409>`, but it should not matter here much. - -**Configuration Parameters** - -None. - -**Description** - -Information on the "Reference Security Incident Taxonomy" can be found here: https://github.com/enisaeu/Reference-Security-Incident-Taxonomy-Task-Force - -For brevity, "type" means `classification.type` and "taxonomy" means `classification.taxonomy`. - -- If taxonomy is missing, and type is given, the according taxonomy is set. -- If neither taxonomy, not type is given, taxonomy is set to "other" and type to "unknown". -- If taxonomy is given, but type is not, type is set to "unknown". - - -.. _intelmq.bots.experts.threshold.expert: - -Threshold -^^^^^^^^^ - -**Information** - -* `name`: `intelmq.bots.experts.threshold.expert` -* `lookup`: redis cache -* `public`: no -* `cache (redis db)`: 11 -* `description`: Check if the number of similar messages during a specified time interval exceeds a set value. - -**Configuration Parameters** - -* **Cache parameters** (see section :ref:`common-parameters`), especially ``redis_cache_ttl`` as number of seconds before threshold counter is reset. Since version 3.1 (until 3.1 `timeout` was used). -* `filter_keys`: String, comma-separated list of field names to consider or ignore when determining which messages are similar. -* `filter_type`: String, `whitelist` (consider only the fields in `filter_keys`) or `blacklist` (consider everything but the fields in `filter_keys`). -* `threshold`: Integer, number of messages required before propagating one. In forwarded messages, the threshold is saved in the message as `extra.count`. -* `add_keys`: Array of string->string, optional, fields and values to add (or update) to propagated messages. Example: - - .. code-block:: json - - "add_keys": { - "classification.type": "spam", - "comment": "Started more than 10 SMTP connections" - } - -**Limitations** - -This bot has certain limitations and is not a true threshold filter (yet). It works like this: - -1. Every incoming message is hashed according to the `filter_*` parameters. -2. The hash is looked up in the cache and the count is incremented by 1, and the TTL of the key is (re-)set to the timeout. -3. If the new count matches the threshold exactly, the message is forwarded. Otherwise it is dropped. - -Please note: Even if a message is sent, any further identical messages are dropped, if the time difference to the last message is less than the timeout! The counter is not reset if the threshold is reached. - - -.. _intelmq.bots.experts.tor_nodes.expert: - -Tor Nodes -^^^^^^^^^ - -**Information** - -* `name:` `intelmq.bots.experts.tor_nodes.expert` -* `lookup:` local database -* `public:` yes -* `cache (redis db):` none -* `description:` check if IP is tor node - -**Configuration Parameters** - -* `database`: Path to the database - -**Database** - -Use this command to create/update the database and reload the bot: - -.. code-block:: bash - - intelmq.bots.experts.tor_nodes.expert --update-database - -.. _intelmq.bots.experts.trusted_introducer_lookup.expert: - -Trusted Introducer Lookup Expert -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -**Information** - -* `name:` `intelmq.bots.experts.trusted_introducer_lookup.expert` -* `lookup:` internet -* `public:` yes -* `cache (redis db):` none -* `description:` Lookups data from trusted introducer public teams list. - -**Configuration Parameters** - -* **order**: Possible values are 'domain', 'asn'. You can set multiple values, so first match wins. -* If 'domain' is set, it will lookup the `source.fqdn` field. It will go from high-order to low-order, i.e. 1337.super.example.com -> super.example.com -> example.com -> `.com` -* If 'asn' is set, it will lookup `source.asn`. - -After a match, the abuse contact will be fetched from the trusted introducer teams list and will be stored in the event as `source.abuse_contact`. -If there is no match, the event will not be enriched and will be sent to the next configured step. - - -.. _intelmq.bots.experts.tuency.expert: - -Tuency -^^^^^^ - -**Information** - -* `name:` `intelmq.bots.experts.tuency.expert` -* `lookup:` yes -* `public:` no -* `cache (redis db):` none -* `description:` Queries the `IntelMQ API `_ of a `Tuency Contact Database `_ instance. - -**Configuration Parameters** - -- `url`: Tuency instance URL. Without the API path. -- `authentication_token`: The Bearer authentication token. Without the ``Bearer`` prefix. -- `overwrite`: Boolean, if existing data in ``source.abuse_contact`` should be overwritten. Default: true - -**Description** - -*tuency* is a contact management database addressing the needs of CERTs. -Users of *tuency* can configure contact addresses and delivery settings for IP objects (addresses, netblocks), Autonomous Systems, and (sub-)domains. -This expert queries the information for ``source.ip`` and ``source.fqdn`` using the following other fields: - -- ``classification.taxonomy`` -- ``classification.type`` -- ``feed.provider`` -- ``feed.name`` - -These fields therefore need to exist, otherwise the message is skipped. - -The API parameter "feed_status" is currently set to "production" constantly, until IntelMQ supports this field. - -The API answer is processed as following. For the notification interval: - -- If *suppress* is true, then ``extra.notify`` is set to false. -- Otherwise: - - - If the interval is *immediate*, then ``extra.ttl`` is set to 0. - - Otherwise the interval is converted into seconds and saved in ``extra.ttl``. - -For the contact lookup: -For both fields *ip* and *domain*, the *destinations* objects are iterated and its *email* fields concatenated to a comma-separated list in ``source.abuse_contact``. - -The IntelMQ fields used by this bot may change in the next IntelMQ release, as soon as better suited fields are available. - - -.. _intelmq.bots.experts.truncate_by_delimiter.expert: - -Truncate By Delimiter -^^^^^^^^^^^^^^^^^^^^^ - -**Information** - -* `name:` `intelmq.bots.experts.truncate_by_delimiter.expert` -* `lookup:` no -* `public:` yes -* `cache (redis db):` none -* `description:` Cut string if length is bigger than maximum length - -**Configuration Parameters** - -* `delimiter`: The delimiter to be used for truncating, for example ``.`` or ``;`` -* `max_length`: The maximum string length. -* `field`: The field to be truncated, e.g. ``source.fqdn`` - -The given field is truncated step-by-step using the delimiter from the beginning, until the field is shorter than `max_length`. - -Example: Cut through a long domain with a dot. The string is truncated until the domain does not exceed the configured maximum length. - -- input domain (e.g. ``source.fqdn``): ``www.subdomain.web.secondsubomain.test.domain.com`` -- delimiter: ``.`` -- ``max_length``: 20 -- Resulting value ``test.domain.com`` (length: 15 characters) - -.. _intelmq.bots.experts.url.expert: - -URL -^^^ - -This bot extracts additional information from `source.url` and `destination.url` fields. It can fill the following fields: - -* `source.fqdn` -* `source.ip` -* `source.port` -* `source.urlpath` -* `source.account` -* `destination.fqdn` -* `destination.ip` -* `destination.port` -* `destination.urlpath` -* `destination.account` -* `protocol.application` -* `protocol.transport` - -**Information** - -* `name:` `intelmq.bots.experts.url.expert` -* `lookup:` none -* `public:` yes -* `cache (redis db):` none -* `description:` extract additional information from the URL - -**Configuration Parameters** - -* `overwrite`: boolean, replace existing fields? -* `skip_fields`: list of fields to not extract from the URL - - - -.. _intelmq.bots.experts.url2fqdn.expert: - -Url2FQDN -^^^^^^^^ - -This bot is deprecated and will be removed in version 4.0. Use 'URL Expert' bot instead. - -This bot extracts the Host from the `source.url` and `destination.url` fields and -writes it to `source.fqdn` or `destination.fqdn` if it is a hostname, or -`source.ip` or `destination.ip` if it is an IP address. - -**Information** - -* `name:` `intelmq.bots.experts.url2fqdn.expert` -* `lookup:` none -* `public:` yes -* `cache (redis db):` none -* `description:` writes domain name from URL to FQDN or IP address - -**Configuration Parameters** - -* `overwrite`: boolean, replace existing FQDN / IP address? - - -.. _intelmq.bots.experts.uwhoisd.expert: - -uWhoisd -^^^^^^^ - -`uWhoisd `_ is a universal Whois server that supports -caching and stores whois entries for historical purposes. - -The bot sends a request for `source.url`, `source.fqdn`, `source.ip` or `source.asn` -to the configured uWhoisd instance and saves the retrieved whois entry: - -* If both `source.url` and `source.fqdn` are present, it will only do a request for `source.fqdn`, - as the hostname of `source.url` should be the same as `source.fqdn`. - The whois entry will be saved in `extra.whois.fqdn`. -* If `source.ip` is present, the whois entry will be saved in `extra.whois.ip` -* If `source.asn` is present, he whois entry will be saved in `extra.whois.asn` - -Events without `source.url`, `source.fqdn`, `source.ip`, or `source.asn`, are ignored. - -**Note**: requesting a whois entry for a fully qualified domain name (FQDN) only works if the request -only contains the domain. uWhoisd will automatically strip the subdomain part if it is present in the request. - -Example: `https://www.theguardian.co.uk` - -* TLD: `co.uk` (uWhoisd uses the `Mozilla public suffix list `_ as a reference) -* Domain: `theguardian.co.uk` -* Subdomain: `www` - -The whois request will be for `theguardian.co.uk` - -**Information** - -* `name:` intelmq.bots.experts.uwhoisd.expert -* `description:` uWhoisd is a universal Whois server - -**Configuration Parameters** - -* `server`: IP or hostname to connect to (default: localhost) -* `port`: Port to connect to (default: 4243) - - -.. _intelmq.bots.experts.wait.expert: - -Wait -^^^^ - -**Information** - -* `name:` `intelmq.bots.experts.wait.expert` -* `lookup:` none -* `public:` yes -* `cache (redis db):` none -* `description:` Waits for a some time or until a queue size is lower than a given number. - -**Configuration Parameters** - -* `queue_db`: Database number of the database, default `2`. Converted to integer. -* `queue_host`: Host of the database, default `localhost`. -* `queue_name`: Name of the queue to be watched, default `null`. This is not the name of a bot but the queue's name. -* `queue_password`: Password for the database, default `None`. -* `queue_polling_interval`: Interval to poll the list length in seconds. Converted to float. -* `queue_port`: Port of the database, default `6379`. Converted to integer. -* `queue_size`: Maximum size of the queue, default `0`. Compared by <=. Converted to integer. -* `sleep_time`: Time to sleep before sending the event. - -Only one of the two modes is possible. -If a queue name is given, the queue mode is active. If the sleep_time is a number, sleep mode is active. -Otherwise the dummy mode is active, the events are just passed without an additional delay. - -Note that SIGHUPs and reloads interrupt the sleeping. - -.. _output bots: - -*********** -Output Bots -*********** - - -.. _intelmq.bots.outputs.amqptopic.output: - -AMQP Topic -^^^^^^^^^^ - -Sends data to an AMQP Server -See https://www.rabbitmq.com/tutorials/amqp-concepts.html for more details on amqp topic exchange. - -Requires the `pika python library `_. - -**Information** - -* `name`: `intelmq.bots.outputs.amqptopic.output` -* `lookup`: to the amqp server -* `public`: yes -* `cache`: no -* `description`: Sends the event to a specified topic of an AMQP server - -**Configuration parameters** - -* connection_attempts : The number of connection attempts to defined server, defaults to 3 -* connection_heartbeat : Heartbeat to server, in seconds, defaults to 3600 -* connection_host : Name/IP for the AMQP server, defaults to 127.0.0.1 -* connection_port : Port for the AMQP server, defaults to 5672 -* connection_vhost : Virtual host to connect, on an http(s) connection would be http:/IP/ -* content_type : Content type to deliver to AMQP server, currently only supports "application/json" -* delivery_mode : 1 - Non-persistent, 2 - Persistent. On persistent mode, messages are delivered to 'durable' queues and will be saved to disk. -* exchange_durable : If set to True, the exchange will survive broker restart, otherwise will be a transient exchange. -* exchange_name : The name of the exchange to use -* exchange_type : Type of the exchange, e.g. `topic`, `fanout` etc. -* keep_raw_field : If set to True, the message 'raw' field will be sent -* password : Password for authentication on your AMQP server -* require_confirmation : If set to True, an exception will be raised if a confirmation error is received -* routing_key : The routing key for your amqptopic -* `single_key` : Only send the field instead of the full event (expecting a field name as string) -* username : Username for authentication on your AMQP server -* `use_ssl` : Use ssl for the connection, make sure to also set the correct port, usually 5671 (`true`/`false`) -* message_hierarchical_output: Convert the message to hierarchical JSON, default: false -* message_with_type : Include the type in the sent message, default: false -* message_jsondict_as_string: Convert fields of type JSONDict (extra) as string, default: false - -If no authentication should be used, leave username or password empty or `null`. - -**Examples of usage** - -* Useful to send events to a RabbitMQ exchange topic to be further processed in other platforms. - -**Confirmation** - -If routing key or exchange name are invalid or non existent, the message is -accepted by the server but we receive no confirmation. -If parameter require_confirmation is True and no confirmation is received, an -error is raised. - -**Common errors** - -*Unroutable messages / Undefined destination queue* - -The destination exchange and queue need to exist beforehand, -with your preferred settings (e.g. durable, `lazy queue `_. -If the error message says that the message is "unroutable", the queue doesn't exist. - - -.. _intelmq.bots.outputs.blackhole.output: - -Blackhole -^^^^^^^^^ - -This output bot discards all incoming messages. - -**Information** - -* `name`: `intelmq.bots.outputs.blackhole.output` -* `lookup`: no -* `public`: yes -* `cache`: no -* `description`: discards messages - - -.. _intelmq.bots.outputs.bro_file.output: - -Bro file -^^^^^^^^^ - -**Information** - -* `name`: `intelmq.bots.outputs.bro_file.output` -* `lookup`: no -* `public`: yes -* `cache`: no -* `description`: BRO (zeek) file output - -**Description** -File example: -``` -#fields indicator indicator_type meta.desc meta.cif_confidence meta.source -xxx.xxx.xxx.xxx Intel::ADDR phishing 100 MISP XXX -www.testdomain.com Intel::DOMAIN apt 85 CERT -``` - -.. _intelmq.bots.outputs.cif3.output: - -CIF3 API -^^^^^^^^ - -**Information** - -* `name:` `intelmq.bots.outputs.cif3.output` -* `lookup:` no -* `public:` no -* `cache (redis db):` none -* `description:` Connect to a CIFv3 instance and add new indicator if not there already. - -The cifsdk library >= 3.0.0rc4,<4.0.0 is required, see -`REQUIREMENTS.txt `_. - -**Configuration Parameters** - -* **Feed parameters** (see above) -* `add_feed_provider_as_tag`: boolean (use `false` when in doubt) -* `cif3_additional_tags`: list of tags to set on submitted indicator(s) -* `cif3_feed_confidence`: float, used when mapping a feed's confidence fails or - if static confidence param is true -* `cif3_static_confidence`: bool, when true it always sends the `cif3_feed_confidence` value - as confidence rather than dynamically interpret feed value (use false when in doubt) -* `cif3_token`: str, API key for accessing CIF -* `cif3_url`: str, URL of the CIFv3 instance -* `fireball`: int, used to batch events before submitting to a CIFv3 instance - (default is 500 per batch, use 0 to disable batch and send each event as received) -* `http_verify_cert`: bool, used to tell whether the CIFv3 instance cert should be verified - (default true, but can be set to false if using a local test instance) - -By default, CIFv3 does an upsert check and will only insert entirely new indicators. Otherwise, -upsert matches will have their count increased by 1. By default, the CIF3 output bot will batch indicators -up to 500 at a time prior to doing a single bulk send. If the output bot doesn't receive a full 500 -indicators within 5 seconds of the first received indicator, it will send what it has so far. - -CIFv3 should be able to process indicators as fast as IntelMQ can -send them. - -(More details can be found in the docstring of `output.py `_. - -.. _intelmq.bots.outputs.elasticsearch.output: - -Elasticsearch Output Bot -^^^^^^^^^^^^^^^^^^^^^^^^ - -**Information** - -* `name`: `intelmq.bots.outputs.elasticsearch.output` -* `lookup`: yes -* `public`: yes -* `cache`: no -* `description`: Output Bot that sends events to Elasticsearch - -Only ElasticSearch version 7 supported. - -It is also possible to feed data into ElasticSearch using ELK-Stack via Redis and Logstash, see :doc:`ELK-Stack` for more information. This methods supports various different versions of ElasticSearch. - -**Configuration parameters** - -* `elastic_host`: Name/IP for the Elasticsearch server, defaults to 127.0.0.1 -* `elastic_port`: Port for the Elasticsearch server, defaults to 9200 -* `elastic_index`: Index for the Elasticsearch output, defaults to intelmq -* `rotate_index`: If set, will index events using the date information associated with the event. - - Options: 'never', 'daily', 'weekly', 'monthly', 'yearly'. Using 'intelmq' as the elastic_index, the following are examples of the generated index names: - - .. code-block:: - - 'never' --> intelmq - 'daily' --> intelmq-2018-02-02 - 'weekly' --> intelmq-2018-42 - 'monthly' --> intelmq-2018-02 - 'yearly' --> intelmq-2018 - -* `http_username`: HTTP basic authentication username -* `http_password`: HTTP basic authentication password -* `use_ssl`: Whether to use SSL/TLS when connecting to Elasticsearch. Default: False -* `http_verify_cert`: Whether to require verification of the server's certificate. Default: False -* `ssl_ca_certificate`: An optional path to a certificate bundle to use for verifying the server -* `ssl_show_warnings`: Whether to show warnings if the server's certificate cannot be verified. Default: True -* `replacement_char`: If set, dots ('.') in field names will be replaced with this character prior to indexing. This is for backward compatibility with ES 2.X. Default: null. Recommended for ES2.X: '_' -* `flatten_fields`: In ES, some query and aggregations work better if the fields are flat and not JSON. Here you can provide a list of fields to convert. - - Can be a list of strings (fieldnames) or a string with field names separated by a comma (,). eg `extra,field2` or `['extra', 'field2']` - Default: ['extra'] - -See `contrib/elasticsearch/elasticmapper` for a utility for creating Elasticsearch mappings and templates. - -If using `rotate_index`, the resulting index name will be of the form [elastic_index]-[event date]. -To query all intelmq indices at once, use an alias (https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-aliases.html), or a multi-index query. - -The data in ES can be retrieved with the HTTP-Interface: - -.. code-block:: bash - - > curl -XGET 'http://localhost:9200/intelmq/events/_search?pretty=True' - - -.. _intelmq.bots.outputs.file.output: - -File -^^^^ - -**Information** - -* `name:` `intelmq.bots.outputs.file.output` -* `lookup:` no -* `public:` yes -* `cache (redis db):` none -* `description:` output messages (reports or events) to file - -Multihreading is disabled for this bot, as this would lead to corrupted files. - -**Configuration Parameters** - -* `encoding_errors_mode`: By default `'strict'`, see for more details and options: https://docs.python.org/3/library/functions.html#open For example with `'backslashreplace'` all characters which cannot be properly encoded will be written escaped with backslashes. -* `file`: file path of output file. Missing directories will be created if possible with the mode 755. -* `format_filename`: Boolean if the filename should be formatted (default: `false`). -* `hierarchical_output`: If true, the resulting dictionary will be hierarchical (field names split by dot). -* `single_key`: if `none`, the whole event is saved (default); otherwise the bot saves only contents of the specified key. In case of `raw` the data is base64 decoded. - -**Filename formatting** - -The filename can be formatted using pythons string formatting functions if `format_filename` is set. See https://docs.python.org/3/library/string.html#formatstrings - -For example: - * The filename `.../{event[source.abuse_contact]}.txt` will be (for example) `.../abuse@example.com.txt`. - * `.../{event[time.source]:%Y-%m-%d}` results in the date of the event used as filename. - -If the field used in the format string is not defined, `None` will be used as fallback. - - -.. _intelmq.bots.outputs.files.output: - -Files -^^^^^ - -**Information** - -* `name:` `intelmq.bots.outputs.files.output` -* `lookup:` no -* `public:` yes -* `cache (redis db):` none -* `description:` saving of messages as separate files - -**Configuration Parameters** - -* `dir`: output directory (default `/opt/intelmq/var/lib/bots/files-output/incoming`) -* `tmp`: temporary directory (must reside on the same filesystem as `dir`) (default: `/opt/intelmq/var/lib/bots/files-output/tmp`) -* `suffix`: extension of created files (default `.json`) -* `hierarchical_output`: if `true`, use nested dictionaries; if `false`, use flat structure with dot separated keys (default) -* `single_key`: if `none`, the whole event is saved (default); otherwise the bot saves only contents of the specified key - - -.. _intelmq.bots.outputs.mcafee.output_esm_ip: - -McAfee Enterprise Security Manager -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -**Information** - -* `name:` `intelmq.bots.outputs.mcafee.output_esm_ip` -* `lookup:` yes -* `public:` no -* `cache (redis db):` none -* `description:` Writes information out to McAfee ESM watchlist - -**Configuration Parameters** - -* **Feed parameters** (see above) -* `esm_ip`: IP address of ESM instance -* `esm_user`: username of user entitled to write to watchlist -* `esm_pw`: password of user -* `esm_watchlist`: name of the watchlist to write to -* `field`: name of the IntelMQ field to be written to ESM - - -.. _intelmq.bots.outputs.misp.output_feed: - -MISP Feed -^^^^^^^^^ - -**Information** - -* `name:` `intelmq.bots.outputs.misp.output_feed` -* `lookup:` no -* `public:` no -* `cache (redis db):` none -* `description:` Create a directory layout in the MISP Feed format - -The PyMISP library >= 2.4.119.1 is required, see `REQUIREMENTS.txt `_. - -**Configuration Parameters** - -* **Feed parameters** (see above) -* `misp_org_name`: Org name which creates the event, string -* `misp_org_uuid`: Org UUID which creates the event, string -* `output_dir`: Output directory path, e.g. `/opt/intelmq/var/lib/bots/mispfeed-output`. Will be created if it does not exist and possible. -* `interval_event`: The output bot creates one event per each interval, all data in this time frame is part of this event. Default "1 hour", string. - -**Usage in MISP** - -Configure the destination directory of this feed as feed in MISP, either as local location, or served via a web server. See `the MISP documentation on Feeds `_ for more information - - -.. _intelmq.bots.outputs.misp.output_api: - -MISP API -^^^^^^^^ - -**Information** - -* `name:` `intelmq.bots.outputs.misp.output_api` -* `lookup:` no -* `public:` no -* `cache (redis db):` none -* `description:` Connect to a MISP instance and add event as MISPObject if not there already. - -The PyMISP library >= 2.4.120 is required, see -`REQUIREMENTS.txt `_. - -**Configuration Parameters** - -* **Feed parameters** (see above) -* `add_feed_provider_as_tag`: boolean (use `true` when in doubt) -* `add_feed_name_as_tag`: boolean (use `true` when in doubt) -* `misp_additional_correlation_fields`: list of fields for which the correlation flags will be enabled (in addition to those which are in significant_fields) -* `misp_additional_tags`: list of tags to set not be searched for when looking for duplicates -* `misp_key`: string, API key for accessing MISP -* `misp_publish`: boolean, if a new MISP event should be set to "publish". - - Expert setting as MISP may really make it "public"! - (Use `false` when in doubt.) -* `misp_tag_for_bot`: string, used to mark MISP events -* `misp_to_ids_fields`: list of fields for which the `to_ids` flags will be set -* `misp_url`: string, URL of the MISP server -* `significant_fields`: list of intelmq field names - -The `significant_fields` values -will be searched for in all MISP attribute values -and if all values are found in the same MISP event, no new MISP event -will be created. -Instead if the existing MISP events have the same feed.provider -and match closely, their timestamp will be updated. - -If a new MISP event is inserted the `significant_fields` and the -`misp_additional_correlation_fields` will be the attributes -where correlation is enabled. - -Make sure to build the IntelMQ Botnet in a way the rate of incoming -events is what MISP can handle, as IntelMQ can process many more events faster -than MISP (which is by design as MISP is for manual handling). -Also remove the fields of the IntelMQ events with an expert bot -that you do not want to be inserted into MISP. - -(More details can be found in the docstring of `output_api.py `_. - - -.. _intelmq.bots.outputs.mongodb.output: - -MongoDB -^^^^^^^ - -Saves events in a MongoDB either as hierarchical structure or flat with full key names. `time.observation` and `time.source` are saved as datetime objects, not as ISO formatted string. - -**Information** - -* `name:` `intelmq.bots.outputs.mongodb.output` -* `lookup:` no -* `public:` yes -* `cache (redis db):` none -* `description:` MongoDB is the bot responsible to send events to a MongoDB database - -**Configuration Parameters** - -* `collection`: MongoDB collection -* `database`: MongoDB database -* `db_user` : Database user that should be used if you enabled authentication -* `db_pass` : Password associated to `db_user` -* `host`: MongoDB host (FQDN or IP) -* `port`: MongoDB port, default: 27017 -* `hierarchical_output`: Boolean (default true) as MongoDB does not allow saving keys with dots, we split the dictionary in sub-dictionaries. -* `replacement_char`: String (default `'_'`) used as replacement character for the dots in key names if hierarchical output is not used. - -**Installation Requirements** - -.. code-block:: bash - - pip3 install pymongo>=2.7.1 - -The bot has been tested with pymongo versions 2.7.1, 3.4 and 3.10.1 (server versions 2.6.10 and 3.6.8). - - -.. _intelmq.bots.outputs.redis.output: - -Redis -^^^^^ - -**Information** - -* `name:` `intelmq.bots.outputs.redis.output` -* `lookup:` to the Redis server -* `public:` yes -* `cache (redis db):` none -* `description:` Output Bot that sends events to a remote Redis server/queue. - -**Configuration Parameters** - -* `redis_db`: remote server database, e.g.: 2 -* `redis_password`: remote server password -* `redis_queue`: remote server list (queue), e.g.: "remote-server-queue" -* `redis_server_ip`: remote server IP address, e.g.: 127.0.0.1 -* `redis_server_port`: remote server Port, e.g.: 6379 -* `redis_timeout`: Connection timeout, in milliseconds, e.g.: 50000 -* `hierarchical_output`: whether output should be sent in hierarchical JSON format (default: false) -* `with_type`: Send the `__type` field (default: true) - -**Examples of usage** - -* Can be used to send events to be processed in another system. E.g.: send events to Logstash. -* In a multi tenant installation can be used to send events to external/remote IntelMQ instance. Any expert bot queue can receive the events. -* In a complex configuration can be used to create logical sets in IntelMQ-Manager. - - -.. _intelmq.bots.outputs.rt.output: - -Request Tracker -^^^^^^^^^^^^^^^ - -**Information** - -* `name:` `intelmq.bots.outputs.rt.output` -* `lookup:` to the Request Tracker instance -* `public:` yes -* `cache (redis db):` none -* `description:` Output Bot that creates Request Tracker tickets from events. - -**Description** - -The bot creates tickets in Request Tracker and uses event fields for the ticket body text. The bot follows the workflow of the RTIR: - -- create ticket in Incidents queue (or any other queue) - - - all event fields are included in the ticket body, - - event attributes are assigned to tickets' CFs according to the attribute mapping, - - ticket taxonomy can be assigned according to the CF mapping. If you use taxonomy different from `ENISA RSIT `_, consider using some extra attribute field and do value mapping with modify or sieve bot, - -- create linked ticket in Investigations queue, if these conditions are met - - - if first ticket destination was Incidents queue, - - if there is source.abuse_contact is specified, - - if description text is specified in the field appointed by configuration, - -- RT/RTIR supposed to do relevant notifications by script working on condition "On Create", -- configuration option investigation_fields specifies which event fields has to be included in the investigation, -- Resolve Incident ticket, according to configuration (Investigation ticket status should depend on RT script configuration), - -Take extra caution not to flood your ticketing system with enormous amount of tickets. Add extra filtering for that to pass only critical events to the RT, and/or deduplicating events. - -**Configuration Parameters** - -- `rt_uri`, `rt_user`, `rt_password`, `verify_cert`: RT API endpoint connection details, string. -- `queue`: ticket destination queue. If set to 'Incidents', 'Investigations' ticket will be created if create_investigation is set to true, string. -- `CF_mapping`: mapping attributes to ticket CFs, dictionary. E.g `{"event_description.text":"Description","source.ip":"IP","extra.classification.type":"Incident Type","classification.taxonomy":"Classification"}` -- `final_status`: the final status for the created ticket, string. E.g. `resolved` if you want to resolve the created ticket. The linked Investigation ticket will be resolved automatically by RTIR scripts. -- `create_investigation`: if an Investigation ticket should be created (in case of RTIR workflow). `true` or `false`, boolean. -- `investigation_fields`: attributes to include into investigation ticket, comma-separated string. E.g. `time.source,source.ip,source.port,source.fqdn,source.url,classification.taxonomy,classification.type,classification.identifier,event_description.url,event_description.text,malware.name,protocol.application,protocol.transport`. -- `description_attr`: which event attribute contains text message being sent to the recipient, string. If it is not specified or not found in the event, the Investigation ticket is not going to be created. Example: `extra.message.text`. - - -.. _intelmq.bots.outputs.restapi.output: - -REST API -^^^^^^^^ - -**Information** - -* `name:` `intelmq.bots.outputs.restapi.output` -* `lookup:` no -* `public:` yes -* `cache (redis db):` none -* `description:` REST API is the bot responsible to send events to a REST API listener through POST - -**Configuration Parameters** - -* `auth_token`: the user name / HTTP header key -* `auth_token_name`: the password / HTTP header value -* `auth_type`: one of: `"http_basic_auth"`, `"http_header"` -* `hierarchical_output`: boolean -* `host`: destination URL -* `use_json`: boolean - - -.. _intelmq.bots.outputs.rpz_file.output: - -RPZ -^^^^^^^^ - -The DNS RPZ functionality is "DNS firewall". Bot generate a blocklist. - -**Information** - -* `name:` `intelmq.bots.outputs.rpz_file.output` -* `lookup:` no -* `public:` yes -* `cache (redis db):` none -* `description:` Generate RPZ file - -**Configuration Parameters** - -* `cname`: example rpz.yourdomain.eu -* `organization_name`: Your organisation name -* `rpz_domain`: Information website about RPZ -* `hostmaster_rpz_domain`: Technical website -* `rpz_email`: Contact email -* `ttl`: Time to live -* `ncachttl`: DNS negative cache -* `serial`: Time stamp or another numbering -* `refresh`: Refresh time -* `retry`: Retry time -* `expire`: Expiration time -* `test_domain`: For test domain, it's added in first rpz file (after header) - -File example: -``` -$TTL 3600 -@ SOA rpz.yourdomain.eu. hostmaster.rpz.yourdomain.eu. 2105260601 60 60 432000 60 -NS localhost. -; -; yourdomain.eu. CERT.XX Response Policy Zones (RPZ) -; Last updated: 2021-05-26 06:01:41 (UTC) -; -; Terms Of Use: https://rpz.yourdomain.eu -; For questions please contact rpz [at] yourdomain.eu -; -*.maliciousdomain.com CNAME rpz.yourdomain.eu. -*.secondmaliciousdomain.com CNAME rpz.yourdomain.eu. -``` - -**Description** - -The prime motivation for creating this feature was to protect users from badness on the Internet related to known-malicious global identifiers such as host names, domain names, IP addresses, or nameservers. -More information: https://dnsrpz.info - - -.. _intelmq.bots.outputs.smtp_batch.output: - -SMTP Batch Output Bot - -Aggregate events by e-mail addresses in the `source.abuse_contact` field and batch send them at once as a zipped CSV file attachment in a GPG signed message. - -**Information** - -* `name:` intelmq.bots.outputs.smtp_batch.output -* `lookup:` no -* `public:` yes -* `cache (redis db):` none -* `description:` Sends events collected over a period of time via SMTP in a GPG signed messages - -**Configuration Parameters** - -* `alternative_mails`: string or null. Path to CSV in the form `original@email.com,alternative@email.com`. - - Needed when some of the recipients ask you to forward their e-mails to another address. -* `attachment_name`: string. Attachment file name for the outgoing messages. May contain date formatting like this `%Y-%m-%d`. Example: "events_%Y-%m-%d" will appear as "events_2022-12-01.zip". -* `bcc`: list or null. A list of e-mails to be put in the `Bcc` field for every mail. -* `email_from`: string. Sender's e-mail of the outgoing messages. -* `gpg_key`: string or null. The Key or the fingerprint of a GPG key stored in ~/.gnupg keyring folder. -* `gpg_pass`: string or null. Password for the GPG key if needed. -* `mail_template`: string. Path to the file containing the body of the mail for the outgoing messages. -* `ignore_older_than_days`: int or null, default 0. If 1..n skip all events with time.observation older than 1..n day; 0 disabled (allow all). - - If your queue gets stuck for a reason, you do not want to send old (and probably already solved) events. -* `limit_results`: int or null. Intended as a debugging option, allows loading just first N e-mails from the queue. -* `redis_cache_db`: int. Redis database used for event aggregation. As the databases < 10 are reserved for the IntelMQ core, recommended is a bigger number. -* `redis_cache_host`: string -* `redis_cache_port`: int -* `redis_cache_ttl`: int. Recommended 1728000 for 20 days. -* `smtp_server`: mixed. SMTP server information and credentials. - - See SMTP parameter of https://github.com/CZ-NIC/envelope#sending - - Examples: "mailer", `{"host": "mailer", "port": 587, "user": "john", "password": "123"}`, `["mailer", 587, "john", "password"]` -* `subject`: string. Subject for the outgoing messages. May contain date formatting like this `%Y-%m-%d`. Example: "IntelMQ weekly warning (%d.%m.%Y)". -* `testing_to`: string or null. Tester's e-mail. - -When the bot is run normally by IntelMQ, it just aggregates the events for later use into a custom Redis database. -If run through CLI (by a cron or manually), it shows e-mail messages that are ready to be sent and let you send them to the tester's e-mail OR to abuse contact e-mails. -E-mails are sent in a zipped CSV file, delimited by a comma, while keeping strings in double quotes. -Note: The field "raw" gets base64 decoded if possible. Bytes `\n` and `\r` are replaced with "\n" and "\r" strings in order to guarantee best CSV files readability both in Microsoft Office and LibreOffice. (A multiline string may be stored in "raw" which completely confused Microsoft Excel.) - -Launch it like that: -` cli [--tester tester's email]` -Ex: -`intelmq.bots.outputs.smtp_batch.output smtp_batch-output-cz --cli --tester your-email@example.com` - -CLI flags: -``` - -h, --help show this help message and exit - --cli initiate CLI interface - --tester TESTING_TO tester's e-mail - --ignore-older-than-days IGNORE_OLDER_THAN_DAYS - 1..n skip all events with time.observation older than 1..n day; 0 disabled (allow all) - --gpg-key GPG_KEY fingerprint of gpg key to be used - --limit-results LIMIT_RESULTS - Just send first N mails. - --send Sends now, without dialog. -``` - -You can schedule the batch sending easily with a cron script, I.E. put this into `crontab -e` of the `intelmq` user: - -``` -# Send the e-mails every day at 6 AM -0 6 * * * /usr/local/bin/intelmq.bots.outputs.smtp_batch.output smtp_batch-output-cz cli --ignore-older-than-days 4 --send > /tmp/intelmq-send.log -``` - -.. _intelmq.bots.outputs.smtp.output: - -SMTP Output Bot -^^^^^^^^^^^^^^^ - -Sends a MIME Multipart message containing the text and the event as CSV for every single event. - -**Information** - -* `name:` `intelmq.bots.outputs.smtp.output` -* `lookup:` no -* `public:` yes -* `cache (redis db):` none -* `description:` Sends events via SMTP - -**Configuration Parameters** - -* `fieldnames`: a list of field names to be included in the email, comma separated string or list of strings. If empty, no attachment is sent - this can be useful if the actual data is already in the body (parameter ``text``) or the ``subject``. -* `mail_from`: string. Supports formatting, see below -* `mail_to`: string of email addresses, comma separated. Supports formatting, see below -* `smtp_host`: string -* `smtp_password`: string or null, Password for authentication on your SMTP server -* `smtp_port`: port -* `smtp_username`: string or null, Username for authentication on your SMTP server -* `ssl`: boolean -* `starttls`: boolean -* `subject`: string. Supports formatting, see below -* `text`: string or null. Supports formatting, see below - -For several strings you can use values from the string using the `standard Python string format syntax `_. -Access the event's values with `{ev[source.ip]}` and similar. Any not existing fields will result in `None`. -For example, to set the recipient(s) to the value given in the event's `source.abuse_contact` field, use this as `mail_to` parameter: `{ev[source.abuse_contact]}` - -Authentication is optional. If both username and password are given, these -mechanism are tried: CRAM-MD5, PLAIN, and LOGIN. - -Client certificates are not supported. If `http_verify_cert` is true, TLS certificates are checked. - - -.. _intelmq.bots.outputs.sql.output: - -SQL -^^^ - -**Information** - -* `name:` `intelmq.bots.outputs.sql.output` -* `lookup:` no -* `public:` yes -* `cache (redis db):` none -* `description:` SQL is the bot responsible to send events to a PostgreSQL, SQLite, or MSSQL Database, e.g. the IntelMQ :doc:`eventdb` -* `notes`: When activating autocommit, transactions are not used: http://initd.org/psycopg/docs/connection.html#connection.autocommit - -**Configuration Parameters** - -The parameters marked with 'PostgreSQL' will be sent to libpq via psycopg2. Check the `libpq parameter documentation `_ for the versions you are using. - -* `autocommit`: `psycopg's autocommit mode `_, optional, default True -* `connect_timeout`: Database connect_timeout, optional, default 5 seconds -* `engine`: 'postgresql', 'sqlite', or 'mssql' -* `database`: Database or SQLite file -* `host`: Database host -* `jsondict_as_string`: save JSONDict fields as JSON string, boolean. Default: true (like in versions before 1.1) -* `port`: Database port -* `user`: Database user -* `password`: Database password -* `sslmode`: Database sslmode, can be `'disable'`, `'allow'`, `'prefer'` (default), `'require'`, `'verify-ca'` or `'verify-full'`. See postgresql docs: https://www.postgresql.org/docs/current/static/libpq-connect.html#libpq-connect-sslmode -* `table`: name of the database table into which events are to be inserted -* `fields`: list of fields to read from the event. If None, read all fields -* `reconnect_delay`: number of seconds to wait before reconnecting in case of an error -* `fail_on_errors`: If any error should cause the bot to fail (raise an exception) or otherwise rollback. If false (default), the bot eventually waits and re-try (e.g. re-connect) etc. to solve the issue. If true, the bot raises an exception and - depending on the IntelMQ error handling configuration - stops. - -PostgreSQL -~~~~~~~~~~ - -You have two basic choices to run PostgreSQL: - -1. on the same machine as intelmq, then you could use Unix sockets if available on your platform -2. on a different machine. In which case you would need to use a TCP connection and make sure you give the right connection parameters to each psql or client call. - -Make sure to consult your PostgreSQL documentation -about how to allow network connections and authentication in case 2. - -**PostgreSQL Version** - -Any supported version of PostgreSQL should work (v>=9.2 as of Oct 2016) `[1] `_. - -If you use PostgreSQL server v >= 9.4, it gives you the possibility -to use the time-zone `formatting string `_ "OF" for date-times -and the `GiST index for the CIDR type `_. This may be useful depending on how -you plan to use the events that this bot writes into the database. - -**How to install** - -Use `intelmq_psql_initdb` to create initial SQL statements -from `harmonization.conf`. The script will create the required table layout -and save it as `/tmp/initdb.sql` - -You need a PostgreSQL database-user to own the result database. -The recommendation is to use the name `intelmq`. -There may already be such a user for the PostgreSQL database-cluster -to be used by other bots. (For example from setting up -the expert/certbund_contact bot.) - -Therefore if still necessary: create the database-user -as postgresql superuser, which usually is done via the system user `postgres`: - -.. code-block:: bash - - createuser --no-superuser --no-createrole --no-createdb --encrypted --pwprompt intelmq - -Create the new database: - -.. code-block:: bash - - createdb --encoding='utf-8' --owner=intelmq intelmq-events - -(The encoding parameter should ensure the right encoding on platform -where this is not the default.) - -Now initialize it as database-user `intelmq` (in this example -a network connection to localhost is used, so you would get to test -if the user `intelmq` can authenticate): - -.. code-block:: bash - - psql -h localhost intelmq-events intelmq .read /tmp/initdb.sql - -Then, set the `database` parameter to the `your-db.db` file path. - -.. _stomp output bot: - -.. _intelmq.bots.outputs.stomp.output: - -**MSSQL** - -For MSSQL support, the library `pymssql>=2.2` is required. - -STOMP -^^^^^ - -**Information** - -* `name`: intelmq.bots.outputs.stomp.output -* `lookup`: yes -* `public`: yes -* `cache (redis db)`: none -* `description`: This collector will push data to any STOMP stream. STOMP stands for Streaming Text Oriented Messaging Protocol. See: https://en.wikipedia.org/wiki/Streaming_Text_Oriented_Messaging_Protocol - -**Requirements** -: - -Install the stomp.py library, e.g. `apt install python3-stomp.py` or `pip install stomp.py`. - -You need a CA certificate, client certificate and key file from the organization / server you are connecting to. -Also you will need a so called "exchange point". - -**Configuration Parameters** - -* `exchange`: STOMP *destination* to push at, e.g. "/exchange/_push" -* `heartbeat`: default: 60000 -* `message_hierarchical_output`: Boolean, default: false -* `message_jsondict_as_string`: Boolean, default: false -* `message_with_type`: Boolean, default: false -* `port`: Integer, default: 61614 -* `server`: Host or IP address of the STOMP server -* `single_key`: Boolean or string (field name), default: false -* `ssl_ca_certificate`: path to CA file -* `auth_by_ssl_client_certificate`: Boolean, default: true (note: set to false for new *n6* auth) -* `ssl_client_certificate`: path to client cert file, used only if `auth_by_ssl_client_certificate` is true -* `ssl_client_certificate_key`: path to client cert key file, used only if `auth_by_ssl_client_certificate` is true -* `username`: STOMP *login* (e.g., *n6* user login), used only if `auth_by_ssl_client_certificate` is false -* `password`: STOMP *passcode* (e.g., *n6* user API key), used only if `auth_by_ssl_client_certificate` is false - - -.. _intelmq.bots.outputs.tcp.output: - -TCP -^^^ - -**Information** - -* `name:` intelmq.bots.outputs.tcp.output -* `lookup:` no -* `public:` yes -* `cache (redis db):` none -* `description:` TCP is the bot responsible to send events to a TCP port (Splunk, another IntelMQ, etc..). - -Multihreading is disabled for this bot. - -**Configuration Parameters** - -* `counterpart_is_intelmq`: Boolean. If you are sending to an IntelMQ TCP collector, set this to True, otherwise e.g. with filebeat, set it to false. -* `ip`: IP of destination server -* `hierarchical_output`: true for a nested JSON, false for a flat JSON (when sending to a TCP collector). -* `port`: port of destination server -* `separator`: separator of messages, e.g. "\n", optional. When sending to a TCP collector, parameter shouldn't be present. - In that case, the output waits every message is acknowledged by "Ok" message the TCP collector bot implements. - -**Sending to an IntelMQ TCP collector** - -If you intend to link two IntelMQ instance via TCP, set the parameter `counterpart_is_intelmq` to true. The bot then awaits an "Ok" message to be received after each message is sent. -The TCP collector just sends "Ok" after every message it gets. - - -.. _intelmq.bots.outputs.templated_smtp.output: - -Templated SMTP -^^^^^^^^^^^^^^ - -Sends a MIME Multipart message built from an event and static text using Jinja2 templates. - -**Information** - -* `name:` intelmq.bots.outputs.templated_smtp.output -* `lookup:` no -* `public:` yes -* `cache (redis db):` none -* `description:` Sends events via SMTP - -**Requirements** - -Install the required `jinja2` library: - -.. code-block:: bash - - pip3 install -r intelmq/bots/collectors/templated_smtp/REQUIREMENTS.txt - -**Configuration Parameters** - -Parameters: - -* `attachments`: list of objects with structure:: - - - content-type: string, templated, content-type to use. - text: string, templated, attachment text. - name: string, templated, filename of attachment. - -* `body`: string, optional, templated, body text. The default body template prints every field in the event except 'raw', in undefined order, one field per line, as "field: value". - -* `mail_from`: string, templated, sender address. - -* `mail_to`: string, templated, recipient addresses, comma-separated. - -* `smtp_host`: string, optional, default "localhost", hostname of SMTP server. - -* `smtp_password`: string, default null, password (if any) for authenticated SMTP. - -* `smtp_port`: integer, default 25, TCP port to connect to. - -* `smtp_username`: string, default null, username (if any) for authenticated SMTP. - -* `tls`: boolean, default false, whether to use use SMTPS. If true, also set smtp_port to the SMTPS port. - -* `starttls`: boolean, default true, whether to use opportunistic STARTTLS over SMTP. - -* `subject`: string, optional, default "IntelMQ event", templated, e-mail subject line. - -* `verify_cert`: boolean, default true, whether to verify the server certificate in STARTTLS or SMTPS. - -Authentication is attempted only if both username and password are specified. - -Templates are in Jinja2 format with the event provided in the variable "event". E.g.:: - - mail_to: "{{ event['source.abuse_contact'] }}" - -See the Jinja2 documentation at https://jinja.palletsprojects.com/ . - -As an extension to the Jinja2 environment, the function "from_json" is -available for parsing JSON strings into Python structures. This is -useful if you want to handle complicated structures in the "output" -field of an event. In that case, you would start your template with a -line like:: - - {%- set output = from_json(event['output']) %} - -and can then use "output" as a regular Python object in the rest of -the template. - -Attachments are template strings, especially useful for sending -structured data. E.g. to send a JSON document including "malware.name" -and all other fields starting with "source.":: - - attachments: - - content-type: application/json - text: | - { - "malware": "{{ event['malware.name'] }}", - {%- set comma = joiner(", ") %} - {%- for key in event %} - {%- if key.startswith('source.') %} - {{ comma() }}"{{ key }}": "{{ event[key] }}" - {%- endif %} - {%- endfor %} - } - name: report.json - -You are responsible for making sure that the text produced by the -template is valid according to the content-type. - -If you are migrating from the SMTP output bot that produced CSV format -attachments, use the following configuration to produce a matching -format:: - - attachments: - - content-type: text/csv - text: | - {%- set fields = ["classification.taxonomy", "classification.type", "classification.identifier", "source.ip", "source.asn", "source.port"] %} - {%- set sep = joiner(";") %} - {%- for field in fields %}{{ sep() }}{{ field }}{%- endfor %} - {% set sep = joiner(";") %} - {%- for field in fields %}{{ sep() }}{{ event[field] }}{%- endfor %} - name: event.csv - - -.. _intelmq.bots.outputs.touch.output: - -Touch -^^^^^ - -**Information** - -* `name:` intelmq.bots.outputs.touch.output -* `lookup:` no -* `public:` yes -* `cache (redis db):` none -* `description:` Touches a file for every event received. - -**Configuration Parameters** - -* `path`: Path to the file to touch. - - -.. _intelmq.bots.outputs.udp.output: - -UDP -^^^ - -**Information** - -* `name:` intelmq.bots.outputs.udp.output -* `lookup:` no -* `public:` yes -* `cache (redis db):` none -* `description:` Output Bot that sends events to a remote UDP server. - -Multihreading is disabled for this bot. - -**Configuration Parameters** - -* `field_delimiter`: If the format is 'delimited' this will be added between fields. String, default: `"|"` -* `format`: Can be `'json'` or `'delimited'`. The JSON format outputs the event 'as-is'. Delimited will deconstruct the event and print each field:value separated by the field delimit. See examples below. -* `header`: Header text to be sent in the UDP datagram, string. -* `keep_raw_field`: boolean, default: false -* `udp_host`: Destination's server's Host name or IP address -* `udp_port`: Destination port - -**Examples of usage** - -Consider the following event: - -.. code-block:: json - - {"raw": "MjAxNi8wNC8yNV8xMTozOSxzY2hpenppbm8ub21hcmF0aG9uLmNvbS9na0NDSnVUSE0vRFBlQ1pFay9XdFZOSERLbC1tWFllRk5Iai8sODUuMjUuMTYwLjExNCxzdGF0aWMtaXAtODUtMjUtMTYwLTExNC5pbmFkZHIuaXAtcG9vbC5jb20uLEFuZ2xlciBFSywtLDg5NzI=", "source": {"asn": 8972, "ip": "85.25.160.114", "url": "http://schizzino.omarathon.com/gkCCJuTHM/DPeCZEk/WtVNHDKl-mXYeFNHj/", "reverse_dns": "static-ip-85-25-160-114.inaddr.ip-pool.com"}, "classification": {"type": "malware-distribution"}, "event_description": {"text": "Angler EK"}, "feed": {"url": "http://www.malwaredomainlist.com/updatescsv.php", "name": "Malware Domain List", "accuracy": 100.0}, "time": {"observation": "2016-04-29T10:59:34+00:00", "source": "2016-04-25T11:39:00+00:00"}} - -With the following Parameters: - -* field_delimiter : | -* format : json -* Header : header example -* keep_raw_field : true -* ip : 127.0.0.1 -* port : 514 - -Resulting line in syslog: - -.. code-block:: - - Apr 29 11:01:29 header example {"raw": "MjAxNi8wNC8yNV8xMTozOSxzY2hpenppbm8ub21hcmF0aG9uLmNvbS9na0NDSnVUSE0vRFBlQ1pFay9XdFZOSERLbC1tWFllRk5Iai8sODUuMjUuMTYwLjExNCxzdGF0aWMtaXAtODUtMjUtMTYwLTExNC5pbmFkZHIuaXAtcG9vbC5jb20uLEFuZ2xlciBFSywtLDg5NzI=", "source": {"asn": 8972, "ip": "85.25.160.114", "url": "http://schizzino.omarathon.com/gkCCJuTHM/DPeCZEk/WtVNHDKl-mXYeFNHj/", "reverse_dns": "static-ip-85-25-160-114.inaddr.ip-pool.com"}, "classification": {"type": "malware-distribution"}, "event_description": {"text": "Angler EK"}, "feed": {"url": "http://www.malwaredomainlist.com/updatescsv.php", "name": "Malware Domain List", "accuracy": 100.0}, "time": {"observation": "2016-04-29T10:59:34+00:00", "source": "2016-04-25T11:39:00+00:00"}} - -With the following Parameters: - -* field_delimiter : | -* format : delimited -* Header : IntelMQ-event -* keep_raw_field : false -* ip : 127.0.0.1 -* port : 514 - -Resulting line in syslog: - -.. code-block:: - - Apr 29 11:17:47 localhost IntelMQ-event|source.ip: 85.25.160.114|time.source:2016-04-25T11:39:00+00:00|feed.url:http://www.malwaredomainlist.com/updatescsv.php|time.observation:2016-04-29T11:17:44+00:00|source.reverse_dns:static-ip-85-25-160-114.inaddr.ip-pool.com|feed.name:Malware Domain List|event_description.text:Angler EK|source.url:http://schizzino.omarathon.com/gkCCJuTHM/DPeCZEk/WtVNHDKl-mXYeFNHj/|source.asn:8972|classification.type:malware-distribution|feed.accuracy:100.0 diff --git a/docs/user/configuration-management.rst b/docs/user/configuration-management.rst deleted file mode 100644 index 509df9476..000000000 --- a/docs/user/configuration-management.rst +++ /dev/null @@ -1,683 +0,0 @@ -.. - SPDX-FileCopyrightText: 2015 Aaron Kaplan , 2015-2021 Sebastian Wagner, 2020-2021 Birger Schacht - SPDX-License-Identifier: AGPL-3.0-or-later - -############################ -Configuration and Management -############################ - -.. contents:: - -For installation instructions, see :doc:`installation`. -For upgrade instructions, see :doc:`upgrade`. - - -****************** -Configure services -****************** -You need to enable and start Redis if not already done. Using systemd it can be done with: - -.. code-block:: bash - - systemctl enable redis.service - systemctl start redis.service - -.. _configuration: - -************* -Configuration -************* - -.. _configuration-paths: - -/opt and LSB paths -================== - -If you installed the packages, standard Linux paths (LSB paths) are used: ``/var/log/intelmq/``, ``/etc/intelmq/``, ``/var/lib/intelmq/``, ``/var/run/intelmq/``. -Otherwise, the configuration directory is ``/opt/intelmq/etc/``. Using the environment variable ``INTELMQ_ROOT_DIR`` allows setting any arbitrary root directory. - -You can switch this by setting the environment variables ``INTELMQ_PATHS_NO_OPT`` and ``INTELMQ_PATHS_OPT``, respectively. -* When installing the Python packages, you can set ``INTELMQ_PATHS_NO_OPT`` to something non-empty to use LSB-paths. -* When installing the deb/rpm packages, you can set ``INTELMQ_PATHS_OPT`` to something non-empty to use ``/opt/intelmq/`` paths, or a path set with ``INTELMQ_ROOT_DIR``. - -The environment variable ``ROOT_DIR`` is meant to set an alternative root directory instead of `/`. This is primarily meant for package build environments an analogous to setuptools' ``--root`` parameter. Thus it is only used in LSB-mode. - -Overview -======== - -The main configuration file is formatted in the YAML format since IntelMQ 3.0 (before it was JSON, which had some downsides). -Although, comments in YAML are currently not preserved by IntelMQ (known bug `#2003 `_). -For new installations a default setup with some examples is provided by the `intelmqsetup` tool. If this is not the case, make sure the program was run (see :doc:`installation` instructions). - - -* ``runtime.yaml``: Configuration for the individual bots. See :doc:`bots` for more details. -* ``harmonization.conf``: Configuration of the internal data format, see :doc:`/dev/data-format` and :doc:`/dev/harmonization-fields`. - -To configure a new bot, you need to define and configure it in ``runtime.yaml``. -You can base your configuration on the output of ``intelmqctl list bots`` and the :doc:`feeds` documentation page. -Use the IntelMQ Manager mentioned above to generate the configuration files if unsure. - -In the shipped examples 4 collectors and parsers, 6 common experts and one output are configured. The default collector and the parser handle data from malware domain list, the file output bot writes all data to ``/opt/intelmq/var/lib/bots/file-output/events.txt``/``/var/lib/intelmq/bots/file-output/events.txt``. - -********************************* -Systemwide Configuration (global) -********************************* - -All bots inherit the global configuration parameters in the ``runtime.yaml`` and they can overwrite them using the same parameters in their individual configuration in the ``runtime.yaml`` file. - -.. _configuration-logging: - -Logging -======= - -The logging can be configured with the following parameters: - -* ``logging_handler``: Can be one of ``"file"`` or ``"syslog"``. -* ``logging_level``: Defines the system-wide log level that will be use by all bots and the intelmqctl tool. Possible values are: ``"CRITICAL"``, ``"ERROR"``, ``"WARNING"``, ``"INFO"`` and ``"DEBUG"``. -* ``logging_path``: If ``logging_handler`` is ``file``. Defines the system-wide log-folder that will be use by all bots and the intelmqctl tool. Default value: ``/opt/intelmq/var/log/`` or ``/var/log/intelmq/`` respectively. -* ``logging_syslog``: If ``logging_handler`` is ``syslog``. Either a list with hostname and UDP port of syslog service, e.g. ``["localhost", 514]`` or a device name/path, e.g. the default ``"/var/log"``. - -We recommend ``logging_level`` ``WARNING`` for production environments and ``INFO`` if you want more details. In any case, watch your free disk space! - -Log rotation ------------- - -To rotate the logs, you can use the standard Linux-tool logrotate. -An example logrotate configuration is given in ``contrib/logrotate/`` and delivered with all deb/rpm-packages. -When not using logrotate, IntelMQ can rotate the logs itself, which is not enabled by default! You need to set both values. - -* ``logging_max_size``: Maximum number of bytes to be stored in one logfile before the file is rotated (default: 0, equivalent to unset). -* ``logging_max_copies``: Maximum number of logfiles to keep (default: unset). Compression is not supported. - -Some information can as well be found in Python's documentation on the used `RotatingFileHandler `_. - -Error Handling -============== - -* **error_log_message** - in case of an error, this option will allow the bot to write the message (report or event) to the log file. Use the following values: - * **true/false** - write or not write message to the log file - -* **error_log_exception** - in case of an error, this option will allow the bot to write the error exception to the log file. Use the following values: - * **true/false** - write or not write exception to the log file - -* **error_procedure** - in case of an error, this option defines the procedure that the bot will adopt. Use the following values: - - * **stop** - stop bot after retrying X times (as defined in ``error_max_retries``) with a delay between retries (as defined in ``error_retry_delay``). If the bot reaches the ``error_max_retries`` value, it will remove the message from the pipeline and stop. If the option ``error_dump_message`` is also enable, the bot will dump the removed message to its dump file (to be found in var/log). - - * **pass** - will skip this message and will process the next message after retrying X times, removing the current message from pipeline. If the option ``error_dump_message`` is also enable, then the bot will dump the removed message to its dump file. After max retries are reached, the rate limit is applied (e.g. a collector bot fetch an unavailable resource does not try forever). - -* **error_max_retries** - in case of an error, the bot will try to re-start processing the current message X times as defined by this option. int value. - -* **error_retry_delay** - defines the number of seconds to wait between subsequent re-tries in case of an error. int value. - -* **error_dump_message** - specifies if the bot will write queued up messages to its dump file (use intelmqdump to re-insert the message). - * **true/false** - write or not write message to the dump file - -If the path ``_on_error`` exists for a bot, the message is also sent to this queue, instead of (only) dumping the file if configured to do so. - -Miscellaneous -============= - -* **load_balance** - this option allows you to choose the behavior of the queue. Use the following values: - * **true** - splits the messages into several queues without duplication - * **false** - duplicates the messages into each queue - * When using AMQP as message broker, take a look at the :ref:`multithreading` section and the ``instances_threads`` parameter. - -* **rate_limit** - time interval (in seconds) between messages processing. int value. - -* **ssl_ca_certificate** - trusted CA certificate for IMAP connections (supported by some bots). - -* **source_pipeline_broker** & **destination_pipeline_broker** - select which broker IntelMQ should use. There are two options - * **redis** (default) - Please note that persistence has to be `manually activated `_. - * **amqp** - The AMQP pipeline is currently beta but there are no known issues. A popular AMQP broker is `RabbitMQ `_. See :ref:`aqmp pipeline broker` for more details. - - * As these parameters can be set per bot, this allows usage of different broker systems and hosts, as well as switching between them on the same IntelMQ instance. - -* **source_pipeline_host** - broker IP, FQDN or Unix socket that the bot will use to connect and receive messages. - -* **source_pipeline_port** - broker port that the bot will use to connect and receive messages. Can be empty for Unix socket. - -* **source_pipeline_password** - broker password that the bot will use to connect and receive messages. Can be null for unprotected broker. - -* **source_pipeline_db** - broker database that the bot will use to connect and receive messages (requirement from redis broker). - -* **destination_pipeline_host** - broker IP, FQDN or Unix socket that the bot will use to connect and send messages. - -* **destination_pipeline_port** - broker port that the bot will use to connect and send messages. Can be empty for Unix socket. - -* **destination_pipeline_password** - broker password that the bot will use to connect and send messages. Can be null for unprotected broker. - -* **destination_pipeline_db** - broker database that the bot will use to connect and send messages (requirement from redis broker). - -* **http_proxy** - HTTP proxy the that bot will use when performing HTTP requests (e.g. bots/collectors/collector_http.py). The value must follow :rfc:`1738`. - -* **https_proxy** - HTTPS proxy that the bot will use when performing secure HTTPS requests (e.g. bots/collectors/collector_http.py). - -* **http_user_agent** - user-agent string that the bot will use when performing HTTP/HTTPS requests (e.g. bots/collectors/collector_http.py). - -* **http_verify_cert** - defines if the bot will verify SSL certificates when performing HTTPS requests (e.g. bots/collectors/collector_http.py). - * **true/false** - verify or not verify SSL certificates - - -Using supervisor as process manager (Beta) ------------------------------------------- - -First of all: Do not use it in production environments yet! It has not been tested thoroughly yet. - -`Supervisor `_ is process manager written in Python. The main advantage is that it take care about processes, so if bot process exit with failure (exit code different than 0), supervisor try to run it again. Another advantage is that it not require writing PID files. - -This was tested on Ubuntu 18.04. - -Install supervisor. ``supervisor_twiddler`` is extension for supervisor, that makes possible to create process dynamically. (Ubuntu ``supervisor`` package is currently based on Python 2, so ``supervisor_twiddler`` must be installed with Python 2 ``pip``.) - -.. code-block:: bash - - apt install supervisor python-pip - pip install supervisor_twiddler - - -Create default config ``/etc/supervisor/conf.d/intelmq.conf`` and restart ``supervisor`` service: - -.. code-block:: ini - - [rpcinterface:twiddler] - supervisor.rpcinterface_factory=supervisor_twiddler.rpcinterface:make_twiddler_rpcinterface - - [group:intelmq] - -Change IntelMQ process manager in the *global* configuration: - -.. code-block:: yaml - - process_manager: supervisor - -After this it is possible to manage bots like before with ``intelmqctl`` command. - - -.. _runtime-configuration: - -********************* -Runtime Configuration -********************* - -This configuration is used by each bot to load its specific (runtime) parameters. The IntelMQ Manager can generate this configuration for you. You may edit it manually as well. Be sure to re-load the bot (see the :doc:`intelmqctl`). - -**Template:** - -.. code-block:: yaml - - : - group: - name: - module: - description: - parameters: - : - : - : - -**Example:** - -.. code-block:: yaml - - blocklistde-apache-collector: - group: Collector - name: Blocklist.de Apache List - module: intelmq.bots.collectors.http.collector_http - description: Blocklist.de Apache Collector fetches all IP addresses which have been reported within the last 48 hours as having run attacks on the service Apache, Apache-DDOS, RFI-Attacks. - parameters: - http_url: https://lists.blocklist.de/lists/apache.txt - name: Blocklist.de Apache - rate_limit: 3600 - -More examples can be found in the ``intelmq/etc/runtime.yaml`` file. See :doc:`bots` for more details. - -By default, all of the bots are started when you start the whole botnet, however there is a possibility to *disable* a bot. This means that the bot will not start every time you start the botnet, but you can start and stop the bot if you specify the bot explicitly. To disable a bot, add the following to your ``runtime.yaml``: ``"enabled": false``. For example: - -.. code-block:: yaml - - blocklistde-apache-collector: - group: Collector - name: Blocklist.de Apache List - module: intelmq.bots.collectors.http.collector_http - description: Blocklist.de Apache Collector fetches all IP addresses which have been reported within the last 48 hours as having run attacks on the service Apache, Apache-DDOS, RFI-Attacks. - enabled: false - parameters: - http_url: https://lists.blocklist.de/lists/apache.txt - name: Blocklist.de Apache - rate_limit: 3600 - -Pipeline Configuration -====================== - -The pipeline configuration defines how the data is exchanges between the bots. For each bot, it defines the source queue (there is always only one) and one or multiple destination queues. This section shows the possibilities and definition as well as examples. The configuration of the pipeline can be done by the |intelmq-manager-github-link| with no need to intervene manually. It is recommended to use this tool as it guarantees that the configuration is correct. The configuration of the pipelines is done in the ``runtime.yaml`` as part of the individual bots settings. - -Source queue ------------- - -This setting is **optional**, by default, the source queue is the bot ID plus "-queue" appended. -For example, if the bot ID is ``example-bot``, the source queue name is ``example-bot-queue``. - -.. code-block:: yaml - - source-queue: example-bot-queue - -For collectors, this field does not exist, as the fetch the data from outside the IntelMQ system by definition. - -Destination queues ------------------- - -Destination queues are defined using a dictionary with a name as key and a list of queue-identifiers as the value. - -.. code-block:: yaml - - destination-queues: - _default: - - - - - _on_error: - - - - - other-path: - - - - - -In this case, bot will be able to send the message to one of defined paths. The path ``"_default"`` is used if none is specified by the bot itself. -In case of errors during processing, and the optional path ``"_on_error"`` is specified, the message will be sent to the pipelines given given as on-error. -Other destination queues can be explicitly addressed by the bots, e.g. bots with filtering capabilities. Some expert bots are capable of sending messages to paths, this feature is explained in their documentation, e.g. the :ref:`intelmq.bots.experts.filter.expert` expert and the :ref:`intelmq.bots.experts.sieve.expert` expert. -The named queues need to be explicitly addressed by the bot (e.g. filtering) or the core (``_on_error``) to be used. Setting arbitrary paths has no effect. - -.. _aqmp pipeline broker: - -AMQP (Beta) ------------ - -Starting with IntelMQ 1.2 the AMQP protocol is supported as message queue. -To use it, install a broker, for example RabbitMQ. -The configuration and the differences are outlined here. -Keep in mind that it is slower, but has better monitoring capabilities and is more stable. -The AMQP support is considered beta, so small problems might occur. So far, only RabbitMQ as broker has been tested. - -You can change the broker for single bots (set the parameters in the runtime configuration per bot) or for the whole botnet (using the global configuration). - -You need to set the parameter ``source_pipeline_broker``/``destination_pipeline_broker`` to ``amqp``. There are more parameters available: - -* ``destination_pipeline_broker``: ``"amqp"`` -* ``destination_pipeline_host`` (default: ``'127.0.0.1'``) -* ``destination_pipeline_port`` (default: 5672) -* ``destination_pipeline_username`` -* ``destination_pipeline_password`` -* ``destination_pipeline_socket_timeout`` (default: no timeout) -* ``destination_pipeline_amqp_exchange``: Only change/set this if you know what you do. If set, the destination queues are not declared as queues, but used as routing key. (default: ``''``). -* ``destination_pipeline_amqp_virtual_host`` (default: ``'/'``) -* ``source_pipeline_host`` (default: ``'127.0.0.1'``) -* ``source_pipeline_port`` (default: 5672) -* ``source_pipeline_username`` -* ``source_pipeline_password`` -* ``source_pipeline_socket_timeout`` (default: no timeout) -* ``source_pipeline_amqp_exchange``: Only change/set this if you know what you do. If set, the destination queues are not declared as queues, but used as routing key. (default: `''`). -* ``source_pipeline_amqp_virtual_host`` (default: ``'/'``) -* ``intelmqctl_rabbitmq_monitoring_url`` string, see below (default: ``"http://{host}:15672"``) - -For getting the queue sizes, ``intelmqctl`` needs to connect to the monitoring interface of RabbitMQ. If the monitoring interface is not available under ``http://{host}:15672`` you can manually set using the parameter ``intelmqctl_rabbitmq_monitoring_url``. -In a RabbitMQ's default configuration you might not provide a user account, as by default the administrator (``guest``:``guest``) allows full access from localhost. If you create a separate user account, make sure to add the tag "monitoring" to it, otherwise IntelMQ can't fetch the queue sizes. - -.. figure:: /_static/rabbitmq-user-monitoring.png - :alt: RabbitMQ User Account Monitoring Tag - -Setting the statistics (and cache) parameters is necessary when the local redis is running under a non-default host/port. If this is the case, you can set them explicitly: - -* ``statistics_database``: ``3`` -* ``statistics_host``: ``"127.0.0.1"`` -* ``statistics_password``: ``null`` -* ``statistics_port``: ``6379`` - -.. _multithreading: - -Multithreading (Beta) -===================== - -First of all: Do not use it in production environments yet! There are a few bugs, see below - -Since IntelMQ 2.0 it is possible to provide the following parameter: - -* ``instances_threads`` - -Set it to a non-zero integer, then this number of worker threads will be spawn. -This is useful if bots often wait for system resources or if network-based lookups are a bottleneck. - -However, there are currently a few cavecats: - -* This is not possible for all bots, there are some exceptions (collectors and some outputs), see the :doc:`FAQ` for some reasons. -* Only use it with the AMQP pipeline, as with Redis, messages may get duplicated because there's only one internal queue -* In the logs, you can see the main thread initializing first, then all of the threads which log with the name ``[bot-id].[thread-id]``. - -*************************** -Harmonization Configuration -*************************** - -This configuration is used to specify the fields for all message types. The harmonization library will load this configuration to check, during the message processing, if the values are compliant to the "harmonization" format. Usually, this configuration doesn't need any change. It is mostly maintained by the intelmq maintainers. - -**Template:** - -.. code-block:: json - - { - "": { - "": { - "description": "", - "type": "" - }, - "": { - "description": "", - "type": "" - } - }, - } - -**Example:** - -.. code-block:: json - - { - "event": { - "destination.asn": { - "description": "The autonomous system number from which originated the connection.", - "type": "Integer" - }, - "destination.geolocation.cc": { - "description": "Country-Code according to ISO3166-1 alpha-2 for the destination IP.", - "regex": "^[a-zA-Z0-9]{2}$", - "type": "String" - }, - }, - } - -More examples can be found in the ``intelmq/etc/harmonization.conf`` directory. - - -********* -Utilities -********* - -Management -========== - -IntelMQ has a modular structure consisting of bots. There are four types of bots: - -* :ref:`collector bots` retrieve data from internal or external sources, the output are *reports* consisting of many individual data sets / log lines. -* :ref:`parser bots` parse the (report) data by splitting it into individual *events* (log lines) and giving them a defined structure, see also :doc:`/dev/data-format` for the list of fields an event may be split up into. -* :ref:`expert bots` enrich the existing events by e.g. lookup up information such as DNS reverse records, geographic location information (country code) or abuse contacts for an IP address or domain name. -* :ref:`output bots` write events to files, databases, (REST)-APIs or any other data sink that you might want to write to. - -Each bot has one source queue (except collectors) and can have multiple -destination queues (except outputs). But multiple bots can write to the same pipeline (queue), resulting in multiple inputs for the next bot. - -Every bot runs in a separate process. A bot is identifiable by a *bot id*. - -Currently only one instance (i.e. *with the same bot id*) of a bot can run at the same time. Concepts for multiprocessing are being discussed, see this issue: :issue:`Multiprocessing per queue is not supported #186 <186>`. -Currently you can run multiple processes of the same bot (with *different bot ids*) in parallel. - -Example: multiple gethostbyname bots (with different bot ids) may run in parallel, with the same input queue and sending to the same output queue. Note that the bot providing the input queue **must** have the ``load_balance`` option set to ``true``. - -Web interface: IntelMQ Manager -============================== - -IntelMQ has a tool called IntelMQ Manager that gives users an easy way to configure all pipelines with bots that your team needs. For beginners, it's recommended to use the IntelMQ Manager to become acquainted with the functionalities and concepts. The IntelMQ Manager offers some of the possibilities of the intelmqctl tool and has a graphical interface for runtime and pipeline configurations. - -See the |intelmq-manager-github-link| repository. - -Command-line interface: intelmqctl -================================== - -**Syntax** see ``intelmqctl -h`` - -* Starting a bot: ``intelmqctl start bot-id`` -* Stopping a bot: ``intelmqctl stop bot-id`` -* Reloading a bot: ``intelmqctl reload bot-id`` -* Restarting a bot: ``intelmqctl restart bot-id`` -* Get status of a bot: ``intelmqctl status bot-id`` - -* Run a bot directly for debugging purpose and temporarily leverage the logging level to DEBUG: ``intelmqctl run bot-id`` -* Get a pdb (or ipdb if installed) live console. ``intelmqctl run bot-id console`` -* See the message that waits in the input queue. ``intelmqctl run bot-id message get`` -* See additional help for further explanation. ``intelmqctl run bot-id --help`` - -* Starting the botnet (all bots): ``intelmqctl start`` -* Starting a group of bots: ``intelmqctl start --group experts`` - -* Get a list of all configured bots: ``intelmqctl list bots`` -* Get a list of all queues: ``intelmqctl list queues`` - If -q is given, only queues with more than one item are listed. -* Get a list of all queues and status of the bots: ``intelmqctl list queues-and-status`` - -* Clear a queue: ``intelmqctl clear queue-id`` -* Get logs of a bot: ``intelmqctl log bot-id number-of-lines log-level`` - Reads the last lines from bot log. - Log level should be one of DEBUG, INFO, ERROR or CRITICAL. - Default is INFO. Number of lines defaults to 10, -1 gives all. Result - can be longer due to our logging format! - -* Upgrade from a previous version: ``intelmqctl upgrade-config`` - Make a backup of your configuration first, also including bot's configuration files. - - -Botnet Concept --------------- - -The "botnet" represents all currently configured bots which are explicitly enabled. It is, in essence, the graph of the bots which are connected together via their input source queues and destination queues. - -To get an overview which bots are running, use ``intelmqctl status`` or use the IntelMQ Manager. Set ``"enabled": true`` in the runtime configuration to add a bot to the botnet. By default, bots will be configured as ``"enabled": true``. See :doc:`bots` for more details on configuration. - -Disabled bots can still be started explicitly using ``intelmqctl start ``, but will remain in the state ``disabled`` if stopped (and not be implicitly enabled by the ``start`` command). They are not started by ``intelmqctl start`` in analogy to the behavior of widely used initialization systems. - - -Scheduled Run Mode ------------------- - -In many cases, it is useful to schedule a bot at a specific time (i.e. via cron(1)), for example to collect information from a website every day at midnight. To do this, set ``run_mode`` to ``scheduled`` in the ``runtime.yaml`` for the bot. Check out the following example: - -.. code-block:: yaml - - blocklistde-apache-collector: - name: Generic URL Fetcher - group: Collector - module: intelmq.bots.collectors.http.collector_http - description: All IP addresses which have been reported within the last 48 hours as having run attacks on the service Apache, Apache-DDOS, RFI-Attacks. - enabled: false - run_mode: scheduled - parameters: - feed: Blocklist.de Apache - provider: Blocklist.de - http_url: https://lists.blocklist.de/lists/apache.txt - ssl_client_certificate: null - -You can schedule the bot with a crontab-entry like this: - -.. code-block:: cron - - 0 0 * * * intelmqctl start blocklistde-apache-collector - -Bots configured as ``scheduled`` will exit after the first successful run. -Setting ``enabled`` to ``false`` will cause the bot to not start with ``intelmqctl start``, but only with an explicit start, in this example ``intelmqctl start blocklistde-apache-collector``. - - -Continuous Run Mode -------------------- - -Most of the cases, bots will need to be configured as ``continuous`` run mode (the default) in order to have them always running and processing events. Usually, the types of bots that will require the continuous mode will be Parsers, Experts and Outputs. To do this, set ``run_mode`` to ``continuous`` in the ``runtime.yaml`` for the bot. Check the following example: - -.. code-block:: yaml - - blocklistde-apache-parser: - name: Blocklist.de Parser - group: Parser - module: intelmq.bots.parsers.blocklistde.parser - description: Blocklist.DE Parser is the bot responsible to parse the report and sanitize the information. - enabled: false - run_mode: continuous - parameters: ... - -You can now start the bot using the following command: - -.. code-block:: bash - - intelmqctl start blocklistde-apache-parser - -Bots configured as ``continuous`` will never exit except if there is an error and the error handling configuration requires the bot to exit. See the Error Handling section for more details. - - -Reloading ---------- - -Whilst restart is a mere stop & start, performing ``intelmqctl reload `` will not stop the bot, permitting it to keep the state: the same common behavior as for (Linux) daemons. It will initialize again (including reading all configuration again) after the current action is finished. Also, the rate limit/sleep is continued (with the *new* time) and not interrupted like with the restart command. So if you have a collector with a rate limit of 24 h, the reload does not trigger a new fetching of the source at the time of the reload, but just 24 h after the last run – with the new configuration. -Which state the bots are keeping depends on the bots of course. - -Forcing reset pipeline and cache (be careful) ---------------------------------------------- - -If you are using the default broker (Redis), in some test situations you may need to quickly clear all pipelines and caches. Use the following procedure: - -.. code-block:: bash - - redis-cli FLUSHDB - redis-cli FLUSHALL - -************** -Error Handling -************** - -Tool: intelmqdump -================= - -When bots are failing due to bad input data or programming errors, they can dump the problematic message to a file along with a traceback, if configured accordingly. These dumps are saved at in the logging directory as ``[botid].dump`` as JSON files. IntelMQ comes with an inspection and reinjection tool, called ``intelmqdump``. It is an interactive tool to show all dumped files and the number of dumps per file. Choose a file by bot-id or listed numeric id. You can then choose to delete single entries from the file with ``e 1,3,4``, show a message in more readable format with ``s 1`` (prints the raw-message, can be long!), recover some messages and put them back in the pipeline for the bot by ``a`` or ``r 0,4,5``. Or delete the file with all dumped messages using ``d``. - -.. code-block:: bash - - intelmqdump -h - usage: - intelmqdump [botid] - intelmqdump [-h|--help] - - intelmqdump can inspect dumped messages, show, delete or reinject them into - the pipeline. It's an interactive tool, directly start it to get a list of - available dumps or call it with a known bot id as parameter. - - positional arguments: - botid botid to inspect dumps of - - optional arguments: - -h, --help show this help message and exit - --truncate TRUNCATE, -t TRUNCATE - Truncate raw-data with more characters than given. 0 for no truncating. Default: 1000. - - Interactive actions after a file has been selected: - - r, Recover by IDs - > r id{,id} [queue name] - > r 3,4,6 - > r 3,7,90 modify-expert-queue - The messages identified by a consecutive numbering will be stored in the - original queue or the given one and removed from the file. - - a, Recover all - > a [queue name] - > a - > a modify-expert-queue - All messages in the opened file will be recovered to the stored or given - queue and removed from the file. - - d, Delete entries by IDs - > d id{,id} - > d 3,5 - The entries will be deleted from the dump file. - - d, Delete file - > d - Delete the opened file as a whole. - - s, Show by IDs - > s id{,id} - > s 0,4,5 - Show the selected IP in a readable format. It's still a raw format from - repr, but with newlines for message and traceback. - - e, Edit by ID - > e id - > e 0 - > e 1,2 - Opens an editor (by calling `sensible-editor`) on the message. The modified message is then saved in the dump. - - q, Quit - > q - - $ intelmqdump - id: name (bot id) content - 0: alienvault-otx-parser 1 dumps - 1: cymru-whois-expert 8 dumps - 2: deduplicator-expert 2 dumps - 3: dragon-research-group-ssh-parser 2 dumps - 4: file-output2 1 dumps - 5: fraunhofer-dga-parser 1 dumps - 6: spamhaus-cert-parser 4 dumps - 7: test-bot 2 dumps - Which dump file to process (id or name)? 3 - Processing dragon-research-group-ssh-parser: 2 dumps - 0: 2015-09-03T13:13:22.159014 InvalidValue: invalid value u'NA' () for key u'source.asn' - 1: 2015-09-01T14:40:20.973743 InvalidValue: invalid value u'NA' () for key u'source.asn' - (r)ecover by ids, recover (a)ll, delete (e)ntries, (d)elete file, (s)how by ids, (q)uit, edit id (v)? d - Deleted file /opt/intelmq/var/log/dragon-research-group-ssh-parser.dump - -Bots and the intelmqdump tool use file locks to prevent writing to already opened files. Bots are trying to lock the file for up to 60 seconds if the dump file is locked already by another process (intelmqdump) and then give up. Intelmqdump does not wait and instead only shows an error message. - -By default, the ``show`` command truncates the ``raw`` field of messages at 1000 characters to change this limit or disable truncating at all (value 0), use the ``--truncate`` parameter. - -*************** -Monitoring Logs -*************** - -All bots and ``intelmqctl`` log to ``/opt/intelmq/var/log/``/``var/log/intelmq/`` (depending on your installation). In case of failures, messages are dumped to the same directory with the file ending ``.dump``. - -.. code-block:: bash - - tail -f /opt/intelmq/var/log/*.log - tail -f /var/log/intelmq/*.log - -********* -Uninstall -********* - -If you installed intelmq with native packages: Use the package management tool to remove the package ``intelmq``. These tools do not remove configuration by default. - -If you installed manually via pip (note that this also deletes all configuration and possibly data): - -.. code-block:: bash - - pip3 uninstall intelmq - rm -r /opt/intelmq - -************************************* -Integration with ticket systems, etc. -************************************* - -First of all, IntelMQ is a message (event) processing system: it collects feeds, processes them, enriches them, filters them and then stores them somewhere or sends them to another system. It does this in a composable, data flow oriented fashion, based on single events. There are no aggregation or grouping features. Now, if you want to integrate IntelMQ with your ticket system or some other system, you need to send its output to somewhere where your ticket system or other services can pick up IntelMQ's data. This could be a database, splunk, or you could send your events directly via email to a ticket system. - -Different users came up with different solutions for this, each of them fitting their own organisation. Hence these solutions are not part of the core IntelMQ repository. - * CERT.at uses a postgresql DB (sql output bot) and has a small tool ``intelmqcli`` which fetches the events in the postgresql DB which are marked as "new" and will group them and send them out via the RT ticket system. - * Others, including BSI, use a tool called ``intelmq-mailgen``. It sends E-Mails to the recipients, optionally PGP-signed with defined text-templates, CSV formatted attachments with grouped events and generated ticket numbers. - -The following lists external github repositories which you might consult for examples on how to integrate IntelMQ into your workflow: - - * `certat repository `_ - * `Intevation's Mailgen `_ - -If you came up with another solution for integration, we'd like to hear from you! Please reach out to us on the |intelmq-users-list-link|. - -************************** -Frequently Asked Questions -************************** - -Consult the :doc:`FAQ` if you encountered any problems. - - -********************** -Additional Information -********************** - -Bash Completion -=============== - -To enable bash completion on ``intelmqctl`` and ``intelmqdump`` in order to help you run the commands in an easy manner, follow the installation process `here `_. diff --git a/docs/user/event.md b/docs/user/event.md new file mode 100644 index 000000000..796440c63 --- /dev/null +++ b/docs/user/event.md @@ -0,0 +1,646 @@ + + +# Event + +An event represents individual piece of data processed by IntelMQ. It uses JSON format. + +Example Event: + +```json +{ + "source.geolocation.cc": "JO", + "malware.name": "qakbot", + "source.ip": "82.212.115.188", + "source.asn": 47887, + "classification.type": "c2-server", + "extra.status": "offline", + "source.port": 443, + "classification.taxonomy": "malicious-code", + "source.geolocation.latitude": 31.9522, + "feed.accuracy": 100, + "extra.last_online": "2023-02-16", + "time.observation": "2023-02-16T09:55:12+00:00", + "source.geolocation.city": "amman", + "source.network": "82.212.115.0/24", + "time.source": "2023-02-15T14:19:09+00:00", + "source.as_name": "NEU-AS", + "source.geolocation.longitude": 35.939, + "feed.name": "abusech-feodo-c2-tracker" + } +``` + +## Minimum Requirements + +Below, we have enumerated the minimum recommended requirements for an actionable abuse event. These keys should be +present for the abuse report to make sense for the end recipient. Please note that if you choose to anonymize your +sources, you can substitute **feed.name** with **feed.code**. At least one of the fields **ip**, **fqdn**, **url** or **account** should be present. All the rest of the keys are optional. This list of required fields is *not* enforced by IntelMQ. + +| Field | Terminology | +| ----------------------- | ----------- | +| feed.name | Should | +| classification.type | Should | +| classification.taxonomy | Should | +| time.source | Should | +| time.observation | Should | +| source.ip | Should\* | +| source.fqdn | Should\* | +| source.url | Should\* | +| source.account | Should\* | + +\* at least one of them + +## Classification + +IntelMQ classifies events using three labels: `classification.taxonomy`, `classification.type` and `classification.identifier`. This tuple of three values can be used for deduplication of events and describes what happened. + +The taxonomy can be automatically added by the taxonomy expert bot based on the given type. The following classification scheme loosely follows the [Reference Security Incident Taxonomy (RSIT)](https://github.com/enisaeu/Reference-Security-Incident-Taxonomy-Task-Force/): + +| Classification Taxonomy | Classification Type | Description | +|----------------------|----------------------|----------------------| +| abusive-content | harmful-speech | Discreditation or discrimination of somebody, cyber stalking, racism or threats against one or more individuals. | +| abusive-content | spam | Or 'Unsolicited Bulk Email', this means that the recipient has not granted verifiable permission for the message to be sent and that the message is sent as part of a larger collection of messages, all having a functionally comparable content. | +| abusive-content | violence | Child pornography, glorification of violence, etc. | +| availability | ddos | Distributed Denial of Service attack, e.g. SYN-Flood or UDP-based reflection/amplification attacks. | +| availability | dos | Denial of Service attack, e.g. sending specially crafted requests to a web application which causes the application to crash or slow down. | +| availability | misconfiguration | Software misconfiguration resulting in service availability issues, e.g. DNS server with outdated DNSSEC Root Zone KSK. | +| availability | outage | Outage caused e.g. by air condition failure or natural disaster. | +| availability | sabotage | Physical sabotage, e.g cutting wires or malicious arson. | +| fraud | copyright | Offering or Installing copies of unlicensed commercial software or other copyright protected materials (Warez). | +| fraud | masquerade | Type of attack in which one entity illegitimately impersonates the identity of another in order to benefit from it. | +| fraud | phishing | Masquerading as another entity in order to persuade the user to reveal private credentials. | +| fraud | unauthorized-use-of-resources | Using resources for unauthorized purposes including profit-making ventures, e.g. the use of e-mail to participate in illegal profit chain letters or pyramid schemes. | +| information-content-security | data-leak | Leaked confidential information like credentials or personal data. | +| information-content-security | data-loss | Loss of data, e.g. caused by harddisk failure or physical theft. | +| information-content-security | unauthorised-information-access | Unauthorized access to information, e.g. by abusing stolen login credentials for a system or application, intercepting traffic or gaining access to physical documents. | +| information-content-security | unauthorised-information-modification | Unauthorised modification of information, e.g. by an attacker abusing stolen login credentials for a system or application or a ransomware encrypting data. | +| information-gathering | scanner | Attacks that send requests to a system to discover weaknesses. This also includes testing processes to gather information on hosts, services and accounts. Examples: fingerd, DNS querying, ICMP, SMTP (EXPN, RCPT, \...), port scanning. | +| information-gathering | sniffing | Observing and recording of network traffic (wiretapping). | +| information-gathering | social-engineering | Gathering information from a human being in a non-technical way (e.g. lies, tricks, bribes, or threats). This IOC refers to a resource, which has been observed to perform brute-force attacks over a given application protocol. | +| intrusion-attempts | brute-force | Multiple login attempts (Guessing/cracking of passwords, brute force). | +| intrusion-attempts | exploit | An attack using an unknown exploit. | +| intrusion-attempts | ids-alert | IOCs based on a sensor network. This is a generic IOC denomination, should it be difficult to reliably denote the exact type of activity involved for example due to an anecdotal nature of the rule that triggered the alert. | +| intrusions | application-compromise| Compromise of an application by exploiting (un)known software vulnerabilities, e.g. SQL injection. | +| intrusions | burglary | Physical intrusion, e.g. into corporate building or data center. | +| intrusions | privileged-account-compromise | Compromise of a system where the attacker gained administrative privileges. | +| intrusions | system-compromise | Compromise of a system, e.g. unauthorised logins or commands. This includes compromising attempts on honeypot systems. | +| intrusions | unprivileged-account-compromise | Compromise of a system using an unprivileged (user/service) account. | +| malicious-code | c2-server | This is a command and control server in charge of a given number of botnet drones. | +| malicious-code | infected-system | This is a compromised machine, which has been observed to make a connection to a command and control server. | +| malicious-code | malware-configuration | This is a resource which updates botnet drones with a new configuration. | +| malicious-code | malware-distribution | URI used for malware distribution, e.g. a download URL included in fake invoice malware spam. | +| other | blacklist | Some sources provide blacklists, which clearly refer to abusive behavior, such as spamming, but fail to denote the exact reason why a given identity has been blacklisted. The reason may be that the justification is anecdotal or missing entirely. This type should only be used if the typing fits the definition of a blacklist, but an event specific denomination is not possible for one reason or another. Not in RSIT. | +| other | dga-domain | DGA Domains are seen various families of malware that are used to periodically generate a large number of domain names that can be used as rendezvous points with their command and control servers. Not in RSIT. | +| other | other | All incidents which don't fit in one of the given categories should be put into this class. | +| other | malware | An IoC referring to a malware (sample) itself. Not in RSIT. | +| other | proxy | This refers to the use of proxies from inside your network. Not in RSIT. | +| test | test | Meant for testing. Not in RSIT. | +| other | tor | This IOC refers to incidents related to TOR network infrastructure. Not in RSIT. | +| other | undetermined | The categorisation of the incident is unknown/undetermined. | +| vulnerable | ddos-amplifier | Publicly accessible services that can be abused for conducting DDoS reflection/amplification attacks, e.g. DNS open-resolvers or NTP servers with monlist enabled. | +| vulnerable | information-disclosure | Publicly accessible services potentially disclosing sensitive information, e.g. SNMP or Redis. | +| vulnerable | potentially-unwanted-accessible | Potentially unwanted publicly accessible services, e.g. Telnet, RDP or VNC. | +| vulnerable | vulnerable-system | A system which is vulnerable to certain attacks. Example: misconfigured client proxy settings (example: WPAD), outdated operating system version, etc. | +| vulnerable | weak-crypto | Publicly accessible services offering weak crypto, e.g. web servers susceptible to POODLE/FREAK attacks. | + +## Meaning of source and destination identities + +Meaning of source and destination identities for each `classification.type` can be different. Usually the main information is in the `source.*` fields. + +The `classification.identifier` is often a normalized malware name, grouping many variants or the affected network protocol. + +Examples of the meaning of the *source* and *destination* fields for various `classification.type` and possible identifiers are shown here. + +| Classification Type | Source | Destination | Possible Identifiers | +| --------------------- | -------------------------------------- | -------------------- | ------------------------------------ | +| blacklist | blacklisted device | | | +| brute-force | attacker | target | | +| c2-server | (sinkholed) c&c server | | zeus, palevo, feodo | +| ddos | attacker | target | | +| dga-domain | infected device | | | +| dropzone | server hosting stolen data | | | +| exploit | hosting server | | | +| ids-alert | triggering device | | | +| infected-system | infected device | contacted c&c server | | +| malware | infected device | | zeus, palevo, feodo | +| malware-configuration | infected device | | | +| malware-distribution | server hosting malware | | | +| phishing | phishing website | | | +| proxy | server allowing policy/security bypass | | | +| scanner | scanning device | scanned device | http, modbus, wordpress | +| spam | infected device | targeted server | | +| system-compromise | server | | | +| vulnerable-system | vulnerable device | | heartbleed, openresolver, snmp, wpad | + +Examples: + +- If an event describes IP address that connects to a zeus command and control server, it's about the infected device. Therefore the `classification.taxonomy` is `malicious-code`, `classification.type` is `infected-system` and the `classification.identifier` is `zeus`. + +- If an event describes IP address where a command and control server is running, the event's +`classification.type` is `c2server`. The `malware.name` can have the full name, eg. `zeus_p2p`. + +## Additional Information + +Information that do not fit into any of the event fields should be placed in the `extra` namespace.Therefore the keys must be prefixed `extra.` string. There are no other rules on key names and values for additional information. + +## Fields Reference + +Here you can find detailed information about all the possible fields used in an event. + + +### `classification.identifier`
+ +**Type:** [String](#string) + +The lowercase identifier defines the actual software or service (e.g. ``heartbleed`` or ``ntp_version``) or standardized malware name (e.g. ``zeus``). Note that you MAY overwrite this field during processing for your individual setup. This field is not standardized across IntelMQ setups/users. + +### `classification.taxonomy`
+ +**Type:** [ClassificationTaxonomy](#classificationtaxonomy) + +We recognize the need for the CSIRT teams to apply a static (incident) taxonomy to abuse data. With this goal in mind the type IOC will serve as a basis for this activity. Each value of the dynamic type mapping translates to a an element in the static taxonomy. The European CSIRT teams for example have decided to apply the eCSIRT.net incident classification. The value of the taxonomy key is thus a derivative of the dynamic type above. For more information about check `ENISA taxonomies `_. + +### `classification.type`
+ +**Type:** [ClassificationType](#classificationtype) + +The abuse type IOC is one of the most crucial pieces of information for any given abuse event. The main idea of dynamic typing is to keep our ontology flexible, since we need to evolve with the evolving threatscape of abuse data. In contrast with the static taxonomy below, the dynamic typing is used to perform business decisions in the abuse handling pipeline. Furthermore, the value data set should be kept as minimal as possible to avoid *type explosion*, which in turn dilutes the business value of the dynamic typing. In general, we normally have two types of abuse type IOC: ones referring to a compromised resource or ones referring to pieces of the criminal infrastructure, such as a command and control servers for example. + +### `comment`
+ +**Type:** [String](#string) + +Free text commentary about the abuse event inserted by an analyst. + +### `destination.abuse_contact`
+ +**Type:** [LowercaseString](#lowercasestring) + +Abuse contact for destination address. A comma separated list. + +### `destination.account`
+ +**Type:** [String](#string) + +An account name or email address, which has been identified to relate to the destination of an abuse event. + +### `destination.allocated`
+ +**Type:** [DateTime](#datetime) + +Allocation date corresponding to BGP prefix. + +### `destination.as_name`
+ +**Type:** [String](#string) + +The autonomous system name to which the connection headed. + +### `destination.asn`
+ +**Type:** [ASN](#asn) + +The autonomous system number to which the connection headed. + +### `destination.domain_suffix`
+ +**Type:** [FQDN](#fqdn) + +The suffix of the domain from the public suffix list. + +### `destination.fqdn`
+ +**Type:** [FQDN](#fqdn) + +A DNS name related to the host from which the connection originated. DNS allows even binary data in DNS, so we have to allow everything. A final point is stripped, string is converted to lower case characters. + +### `destination.geolocation.cc`
+ +**Type:** [UppercaseString](#uppercasestring) + +Country-Code according to ISO3166-1 alpha-2 for the destination IP. + +### `destination.geolocation.city`
+ +**Type:** [String](#string) + +Some geolocation services refer to city-level geolocation. + +### `destination.geolocation.country`
+ +**Type:** [String](#string) + +The country name derived from the ISO3166 country code (assigned to cc field). + +### `destination.geolocation.latitude`
+ +**Type:** [Float](#float) + +Latitude coordinates derived from a geolocation service, such as MaxMind geoip db. + +### `destination.geolocation.longitude`
+ +**Type:** [Float](#float) + +Longitude coordinates derived from a geolocation service, such as MaxMind geoip db. + +### `destination.geolocation.region`
+ +**Type:** [String](#string) + +Some geolocation services refer to region-level geolocation. + +### `destination.geolocation.state`
+ +**Type:** [String](#string) + +Some geolocation services refer to state-level geolocation. + +### `destination.ip`
+ +**Type:** [IPAddress](#ipaddress) + +The IP which is the target of the observed connections. + +### `destination.local_hostname`
+ +**Type:** [String](#string) + +Some sources report an internal hostname within a NAT related to the name configured for a compromised system + +### `destination.local_ip`
+ +**Type:** [IPAddress](#ipaddress) + +Some sources report an internal (NATed) IP address related a compromised system. N.B. RFC1918 IPs are OK here. + +### `destination.network`
+ +**Type:** [IPNetwork](#ipnetwork) + +CIDR for an autonomous system. Also known as BGP prefix. If multiple values are possible, select the most specific. + +### `destination.port`
+ +**Type:** [Integer](#integer) + +The port to which the connection headed. + +### `destination.registry`
+ +**Type:** [Registry](#registry) + +The IP registry a given ip address is allocated by. + +### `destination.reverse_dns`
+ +**Type:** [FQDN](#fqdn) + +Reverse DNS name acquired through a reverse DNS query on an IP address. N.B. Record types other than PTR records may also appear in the reverse DNS tree. Furthermore, unfortunately, there is no rule prohibiting people from writing anything in a PTR record. Even JavaScript will work. A final point is stripped, string is converted to lower case characters. + +### `destination.tor_node`
+ +**Type:** [Boolean](#boolean) + +If the destination IP was a known tor node. + +### `destination.url`
+ +**Type:** [URL](#url) + +A URL denotes on IOC, which refers to a malicious resource, whose interpretation is defined by the abuse type. A URL with the abuse type phishing refers to a phishing resource. + +### `destination.urlpath`
+ +**Type:** [String](#string) + +The path portion of an HTTP or related network request. + +### `event_description.target`
+ +**Type:** [String](#string) + +Some sources denominate the target (organization) of a an attack. + +### `event_description.text`
+ +**Type:** [String](#string) + +A free-form textual description of an abuse event. + +### `event_description.url`
+ +**Type:** [URL](#url) + +A description URL is a link to a further description of the the abuse event in question. + +### `event_hash`
+ +**Type:** [UppercaseString](#uppercasestring) + +Computed event hash with specific keys and values that identify a unique event. At present, the hash should default to using the SHA1 function. Please note that for an event hash to be able to match more than one event (deduplication) the receiver of an event should calculate it based on a minimal set of keys and values present in the event. Using for example the observation time in the calculation will most likely render the checksum useless for deduplication purposes. + +### `extra`
+ +**Type:** [JSONDict](#jsondict) + +All anecdotal information, which cannot be parsed into the data harmonization elements. E.g. os.name, os.version, etc. **Note**: this is only intended for mapping any fields which can not map naturally into the data harmonization. It is not intended for extending the data harmonization with your own fields. + +### `feed.accuracy`
+ +**Type:** [Accuracy](#accuracy) + +A float between 0 and 100 that represents how accurate the data in the feed is + +### `feed.code`
+ +**Type:** [String](#string) + +Code name for the feed, e.g. DFGS, HSDAG etc. + +### `feed.documentation`
+ +**Type:** [String](#string) + +A URL or hint where to find the documentation of this feed. + +### `feed.name`
+ +**Type:** [String](#string) + +Name for the feed, usually found in collector bot configuration. + +### `feed.provider`
+ +**Type:** [String](#string) + +Name for the provider of the feed, usually found in collector bot configuration. + +### `feed.url`
+ +**Type:** [URL](#url) + +The URL of a given abuse feed, where applicable + +### `malware.hash.md5`
+ +**Type:** [String](#string) + +A string depicting an MD5 checksum for a file, be it a malware sample for example. + +### `malware.hash.sha1`
+ +**Type:** [String](#string) + +A string depicting a SHA1 checksum for a file, be it a malware sample for example. + +### `malware.hash.sha256`
+ +**Type:** [String](#string) + +A string depicting a SHA256 checksum for a file, be it a malware sample for example. + +### `malware.name`
+ +**Type:** [LowercaseString](#lowercasestring) + +The malware name in lower case. + +### `malware.version`
+ +**Type:** [String](#string) + +A version string for an identified artifact generation, e.g. a crime-ware kit. + +### `misp.attribute_uuid`
+ +**Type:** [LowercaseString](#lowercasestring) + +MISP - Malware Information Sharing Platform & Threat Sharing UUID of an attribute. + +### `misp.event_uuid`
+ +**Type:** [LowercaseString](#lowercasestring) + +MISP - Malware Information Sharing Platform & Threat Sharing UUID. + +### `output`
+ +**Type:** [JSON](#json) + +Event data converted into foreign format, intended to be exported by output plugin. + +### `protocol.application`
+ +**Type:** [LowercaseString](#lowercasestring) + +e.g. vnc, ssh, sip, irc, http or smtp. + +### `protocol.transport`
+ +**Type:** [LowercaseString](#lowercasestring) + +e.g. tcp, udp, icmp. + +### `raw`
+ +**Type:** [Base64](#base64) + +The original line of the event from encoded in base64. + +### `rtir_id`
+ +**Type:** [Integer](#integer) + +Request Tracker Incident Response ticket id. + +### `screenshot_url`
+ +**Type:** [URL](#url) + +Some source may report URLs related to a an image generated of a resource without any metadata. Or an URL pointing to resource, which has been rendered into a webshot, e.g. a PNG image and the relevant metadata related to its retrieval/generation. + +### `source.abuse_contact`
+ +**Type:** [LowercaseString](#lowercasestring) + +Abuse contact for source address. A comma separated list. + +### `source.account`
+ +**Type:** [String](#string) + +An account name or email address, which has been identified to relate to the source of an abuse event. + +### `source.allocated`
+ +**Type:** [DateTime](#datetime) + +Allocation date corresponding to BGP prefix. + +### `source.as_name`
+ +**Type:** [String](#string) + +The autonomous system name from which the connection originated. + +### `source.asn`
+ +**Type:** [ASN](#asn) + +The autonomous system number from which originated the connection. + +### `source.domain_suffix`
+ +**Type:** [FQDN](#fqdn) + +The suffix of the domain from the public suffix list. + +### `source.fqdn`
+ +**Type:** [FQDN](#fqdn) + +A DNS name related to the host from which the connection originated. DNS allows even binary data in DNS, so we have to allow everything. A final point is stripped, string is converted to lower case characters. + +### `source.geolocation.cc`
+ +**Type:** [UppercaseString](#uppercasestring) + +Country-Code according to ISO3166-1 alpha-2 for the source IP. + +### `source.geolocation.city`
+ +**Type:** [String](#string) + +Some geolocation services refer to city-level geolocation. + +### `source.geolocation.country`
+ +**Type:** [String](#string) + +The country name derived from the ISO3166 country code (assigned to cc field). + +### `source.geolocation.cymru_cc`
+ +**Type:** [UppercaseString](#uppercasestring) + +The country code denoted for the ip by the Team Cymru asn to ip mapping service. + +### `source.geolocation.geoip_cc`
+ +**Type:** [UppercaseString](#uppercasestring) + +MaxMind Country Code (ISO3166-1 alpha-2). + +### `source.geolocation.latitude`
+ +**Type:** [Float](#float) + +Latitude coordinates derived from a geolocation service, such as MaxMind geoip db. + +### `source.geolocation.longitude`
+ +**Type:** [Float](#float) + +Longitude coordinates derived from a geolocation service, such as MaxMind geoip db. + +### `source.geolocation.region`
+ +**Type:** [String](#string) + +Some geolocation services refer to region-level geolocation. + +### `source.geolocation.state`
+ +**Type:** [String](#string) + +Some geolocation services refer to state-level geolocation. + +### `source.ip`
+ +**Type:** [IPAddress](#ipaddress) + +The ip observed to initiate the connection + +### `source.local_hostname`
+ +**Type:** [String](#string) + +Some sources report a internal hostname within a NAT related to the name configured for a compromised system + +### `source.local_ip`
+ +**Type:** [IPAddress](#ipaddress) + +Some sources report a internal (NATed) IP address related a compromised system. N.B. RFC1918 IPs are OK here. + +### `source.network`
+ +**Type:** [IPNetwork](#ipnetwork) + +CIDR for an autonomous system. Also known as BGP prefix. If multiple values are possible, select the most specific. + +### `source.port`
+ +**Type:** [Integer](#integer) + +The port from which the connection originated. + +### `source.registry`
+ +**Type:** [Registry](#registry) + +The IP registry a given ip address is allocated by. + +### `source.reverse_dns`
+ +**Type:** [FQDN](#fqdn) + +Reverse DNS name acquired through a reverse DNS query on an IP address. N.B. Record types other than PTR records may also appear in the reverse DNS tree. Furthermore, unfortunately, there is no rule prohibiting people from writing anything in a PTR record. Even JavaScript will work. A final point is stripped, string is converted to lower case characters. + +### `source.tor_node`
+ +**Type:** [Boolean](#boolean) + +If the source IP was a known tor node. + +### `source.url`
+ +**Type:** [URL](#url) + +A URL denotes an IOC, which refers to a malicious resource, whose interpretation is defined by the abuse type. A URL with the abuse type phishing refers to a phishing resource. + +### `source.urlpath`
+ +**Type:** [String](#string) + +The path portion of an HTTP or related network request. + +### `status`
+ +**Type:** [String](#string) + +Status of the malicious resource (phishing, dropzone, etc), e.g. online, offline. + +### `time.observation`
+ +**Type:** [DateTime](#datetime) + +The time the collector of the local instance processed (observed) the event. + +### `time.source`
+ +**Type:** [DateTime](#datetime) + +The time of occurrence of the event as reported the feed (source). + +### `tlp`
+ +**Type:** [TLP](#tlp) + +Traffic Light Protocol level of the event. + diff --git a/docs/user/eventdb.rst b/docs/user/eventdb.rst deleted file mode 100644 index b3704cd0a..000000000 --- a/docs/user/eventdb.rst +++ /dev/null @@ -1,176 +0,0 @@ -.. - SPDX-FileCopyrightText: 2021 Birger Schacht, Sebastian Wagner - SPDX-License-Identifier: AGPL-3.0-or-later - -======= -EventDB -======= - -The EventDB is not a software itself. - -The EventDB is a database (usually `PostgreSQL `_) that gets filled with data -from IntelMQ using the :ref:`intelmq.bots.outputs.sql.output` Output Bot. Its core is the `events` -table with the structure corresponding to the :doc:`IntelMQ Data Format `. -Having the table created is required to use the EventDB. - -.. _intelmq_psql_initdb: - ------------------------ -intelmq_psql_initdb ------------------------ - -IntelMQ comes with the ``intelmq_psql_initdb`` command line tool designed to help with creating the -EventDB. It creates in the first line: - -- A ``CREATE TABLE events`` statement with all valid IntelMQ fields as columns and correct types -- Several indexes as examples for a good read & search performance - -Having an `events` table as outlined in the SQL file, IntelMQ's :ref:`intelmq.bots.outputs.sql.output` -Output Bot can write all received events into this database table. - -In addition, the script supports some additional features supporting use cases described later in -this document: - -- ``--partition-key`` - for generating schema aligned with :ref:`TimescaleDB ` - or partitioned tables, -- ``--separate-raws`` - for generating views and triggers needed to :ref:`eventdb_raws_table` - (works also together with adjustments for partitioning). - -For a full list of supported parameters, call the script help using ``-h`` parameter. - -All elements of the generated SQL file can be adapted and extended before running the SQL file against -a database, especially the indexes. Please review the generated script before applying. - -Be aware that if you create tables using another DB user that is used later by the output bot, you may -need to adjust ownership or privileges in the database. If you have problems with database permissions, -refer to `PostgreSQL documentation `. - ------------------ -EventDB Utilities ------------------ - -Some scripts related to the EventDB are located in the `contrib/eventdb `_ folder in the IntelMQ git repository. - -Apply Malware Name Mapping --------------------------- - -The `apply_mapping_eventdb.py` script applies the malware name mapping to the EventDB. -Source and destination columns can be given, also a local file. If no local file is present, the mapping can be downloaded on demand. -It queries the database for all distinct malware names with the taxonomy "malicious-code" and sets another column to the malware family name. - - -Apply Domain Suffix -------------------- - -The `apply_domain_suffix.py` script writes the public domain suffix to the `source.domain_suffix` / `destination.domain_suffix` columns, extracted from `source.fqdn` / `destination.fqdn`. - -Usage -^^^^^ - -The Python scripts can connect to a PostgreSQL server with an `eventdb` database and an `events` table. The command line arguments interface for both scripts are the same. -See `--help` for more information: - -.. code-block:: bash - - apply_mapping_eventdb.py -h - apply_domain_suffix.py -h - - -PostgreSQL trigger ------------------- - -PostgreSQL trigger is a trigger keeping track of the oldest inserted/updated "time.source" data. This can be useful to (re-)generate statistics or aggregation data. - - -The SQL script can be executed in the database directly. - ------------------- -EventDB Statistics ------------------- - -The EventDB provides a great base for statistical analysis of the data. - -The `eventdb-stats repository `_ contains a Python script that generates an HTML file and includes the `Plotly JavaScript Open Source Graphing Library `_. -By modifying the configuration file it is possible to configure various queries that are then displayed using graphs: - -.. image:: /_static/eventdb_stats.png - :alt: EventDB Statistics Example - - -.. _timescaledb: - -------------------------------- -Using EventDB with Timescale DB -------------------------------- - -`Timescale DB `_ is a PostgreSQL extension to add time-series support, which is quite handy as you don't have to learn other syntaxes as you already know. You can use the SQL Queries as before, the extension will handle the rest. -To see all limitations, please check the `Timescale DB Documentation `_. - -What is time-series? --------------------- - -Time-series has been invented as traditional database design like relational or nosql are not made for time-based data. -A big benefit of time-series instead of other database designs over a time-based search pattern is the performance. -As IntelMQ uses data based upon time, this design is awesome & will give you a performance boost. - -How to choose the time column? ------------------------------- - -To utilize the time-series, choose a column containing the right time. This is then -used by you for manual queries and graphs, and also by the database itself for organizing the data. - -The :doc:`IntelMQ Data Format ` has two fields that can be used for this: -``time.source`` or ``time.observation``. Depending on your needs (tracking when the event occurred or when -it was detected, if different), choose one of them. - -You can use the :ref:`intelmq_psql_initdb` tool to generate SQL schema valid for TimescaleDB by passing -the partitioning key: - -.. code-block:: bash - - intelmq_psql_initdb --partition-key "time.source" - -How to setup? -------------- - -Thanks to TimescaleDB, it's very easy to setup. - -1. Choose your preferred `Timescale DB `_ environment & follow the installation instructions. -2. Now lets create a `hypertable `_, which is the timescale DB time-series structure. ``SELECT create_hypertable('', 'time.source');``. -3. Now our hypertable is setup & timescaleDB takes care of the rest. You can perform queries as usual, for further information please check `Timescale DB Documentation `_. - -How to upgrade from my existing database? ------------------------------------------ - -To update your existing database to use this awesome time-series feature, just follow the ``How to setup`` instruction. -You can perform the ``hypertable`` command even on already existing databases. **BUT** there are `some limitations `_ from timescaleDB. - - -.. _eventdb_raws_table: - ----------------------------------------------------------- -Separating raw values in PostgreSQL using view and trigger ----------------------------------------------------------- - -In order to reduce the row size in the events table, the `raw` column's data can be separated from the other columns. -While the raw-data is about 30-50% of the data row's size, it is not used in most database queries, as it serves only a backup functionality. -Other possibilities to reduce or getting rid of this field are described in the FAQ, section :ref:`faq-remove-raw-data`. - -The steps described here are best performed before the `events` table is filled with data, but can as well be done with existing data. - -The approach requires four steps: - -1. An existing `events` table, see the first section of this document. -2. Deleting or renaming the `raw` column of the `events` table. -3. Creating a table `raws` which holds only the `raw` field of the events and linking both tables using the `event_id`. -4. Creating the view `v_events` which joins the tables `events` and `raws`. -5. Creating the function `process_v_events_insert` and `INSERT` trigger `tr_events`. - -The last steps brings us several advantages: - -- All `INSERT` statements can contain all data, including the `raw` field. -- No code changes are needed in the IntelMQ output bot or your own scripts. A migration is seamless. -- PostgreSQL itself ensures that the data of both tables is consistent and linked correctly. - -The complete SQL script can be generated using :ref:`intelmq_psql_initdb`. -It does *not* cover step 2 to avoid accidental data loss - you need to do this step manually. diff --git a/docs/user/feeds.md b/docs/user/feeds.md new file mode 100644 index 000000000..6eb231666 --- /dev/null +++ b/docs/user/feeds.md @@ -0,0 +1,3061 @@ + + +# Feeds + +The available feeds are grouped by the provider of the feeds. +For each feed the collector and parser that can be used is documented as well as any feed-specific parameters. +To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then rebuild the documentation. + +## Abuse.ch + +### Feodo Tracker + +List of botnet Command & Control servers (C&Cs) tracked by Feodo Tracker, associated with Dridex and Emotet (aka Heodo). + +**Public:** yes + +**Revision:** 2022-11-15 + +**Documentation:** + +**Additional Information:** The data in the column Last Online is used for `time.source` if available, with 00:00 as time. Otherwise first_seen is used as `time.source`. + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://feodotracker.abuse.ch/downloads/ipblocklist.json + name: Feodo Tracker + provider: Abuse.ch + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.abusech.parser_feodotracker +``` + +--- + + +### URLhaus + +URLhaus is a project from abuse.ch with the goal of sharing malicious URLs that are being used for malware distribution. URLhaus offers a country, ASN (AS number) and Top Level Domain (TLD) feed for network operators / Internet Service Providers (ISPs), Computer Emergency Response Teams (CERTs) and domain registries. + +**Public:** yes + +**Revision:** 2020-07-07 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://urlhaus.abuse.ch/feeds/tld//, https://urlhaus.abuse.ch/feeds/country//, or https://urlhaus.abuse.ch/feeds/asn// + name: URLhaus + provider: Abuse.ch + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.generic.parser_csv +parameters: + columns: ["time.source", "source.url", "status", "classification.type|__IGNORE__", "source.fqdn|__IGNORE__", "source.ip", "source.asn", "source.geolocation.cc"] + default_url_protocol: http:// + delimiter: , + skip_header: False + type_translation: [{"malware_download": "malware-distribution"}] +``` + +--- + + +## AlienVault + +### OTX + +AlienVault OTX Collector is the bot responsible to get the report through the API. Report could vary according to subscriptions. + +**Public:** no + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.alienvault_otx.collector +parameters: + api_key: {{ your API key }} + name: OTX + provider: AlienVault +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.alienvault.parser_otx +``` + +--- + + +### Reputation List + +List of malicious IPs. + +**Public:** yes + +**Revision:** 2018-01-20 + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://reputation.alienvault.com/reputation.data + name: Reputation List + provider: AlienVault + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.alienvault.parser +``` + +--- + + +## AnubisNetworks + +### Cyberfeed Stream + +Fetches and parsers the Cyberfeed data stream. + +**Public:** no + +**Revision:** 2020-06-15 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http_stream +parameters: + http_url: https://prod.cyberfeed.net/stream?key={{ your API key }} + name: Cyberfeed Stream + provider: AnubisNetworks + strip_lines: true +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.anubisnetworks.parser +parameters: + use_malware_familiy_as_classification_identifier: True +``` + +--- + + +## Bambenek + +### C2 Domains + +Master Feed of known, active and non-sinkholed C&Cs domain names. Requires access credentials. + +**Public:** no + +**Revision:** 2018-01-20 + +**Documentation:** + +**Additional Information:** License: https://osint.bambenekconsulting.com/license.txt + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_password: __PASSWORD__ + http_url: https://faf.bambenekconsulting.com/feeds/c2-dommasterlist.txt + http_username: __USERNAME__ + name: C2 Domains + provider: Bambenek + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.bambenek.parser +``` + +--- + + +### C2 IPs + +Master Feed of known, active and non-sinkholed C&Cs IP addresses. Requires access credentials. + +**Public:** no + +**Revision:** 2018-01-20 + +**Documentation:** + +**Additional Information:** License: https://osint.bambenekconsulting.com/license.txt + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_password: __PASSWORD__ + http_url: https://faf.bambenekconsulting.com/feeds/c2-ipmasterlist.txt + http_username: __USERNAME__ + name: C2 IPs + provider: Bambenek + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.bambenek.parser +``` + +--- + + +### DGA Domains + +Domain feed of known DGA domains from -2 to +3 days + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + +**Additional Information:** License: https://osint.bambenekconsulting.com/license.txt + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://faf.bambenekconsulting.com/feeds/dga-feed.txt + name: DGA Domains + provider: Bambenek + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.bambenek.parser +``` + +--- + + +## Benkow + +### Malware Panels Tracker + +Benkow Panels tracker is a list of fresh panel from various malware. The feed is available on the webpage: http://benkow.cc/passwords.php + +**Public:** yes + +**Revision:** 2022-11-16 + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: http://benkow.cc/export.php + name: Malware Panels Tracker + provider: Benkow +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.generic.parser_csv +parameters: + columns: ["__IGNORE__", "malware.name", "source.url", "source.fqdn|source.ip", "time.source"] + columns_required: [false, true, true, false, true] + defaults_fields: {'classification.type': 'c2-server'} + delimiter: ; + skip_header: True +``` + +--- + + +## Blocklist.de + +### Apache + +Blocklist.DE Apache Collector is the bot responsible to get the report from source of information. All IP addresses which have been reported within the last 48 hours as having run attacks on the service Apache, Apache-DDOS, RFI-Attacks. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://lists.blocklist.de/lists/apache.txt + name: Apache + provider: Blocklist.de + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.blocklistde.parser +``` + +--- + + +### Bots + +Blocklist.DE Bots Collector is the bot responsible to get the report from source of information. All IP addresses which have been reported within the last 48 hours as having run attacks attacks on the RFI-Attacks, REG-Bots, IRC-Bots or BadBots (BadBots = he has posted a Spam-Comment on a open Forum or Wiki). + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://lists.blocklist.de/lists/bots.txt + name: Bots + provider: Blocklist.de + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.blocklistde.parser +``` + +--- + + +### Brute-force Logins + +Blocklist.DE Brute-force Login Collector is the bot responsible to get the report from source of information. All IPs which attacks Joomlas, Wordpress and other Web-Logins with Brute-Force Logins. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://lists.blocklist.de/lists/bruteforcelogin.txt + name: Brute-force Logins + provider: Blocklist.de + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.blocklistde.parser +``` + +--- + + +### FTP + +Blocklist.DE FTP Collector is the bot responsible to get the report from source of information. All IP addresses which have been reported within the last 48 hours for attacks on the Service FTP. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://lists.blocklist.de/lists/ftp.txt + name: FTP + provider: Blocklist.de + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.blocklistde.parser +``` + +--- + + +### IMAP + +Blocklist.DE IMAP Collector is the bot responsible to get the report from source of information. All IP addresses which have been reported within the last 48 hours for attacks on the service like IMAP, SASL, POP3, etc. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://lists.blocklist.de/lists/imap.txt + name: IMAP + provider: Blocklist.de + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.blocklistde.parser +``` + +--- + + +### IRC Bots + +No description provided by feed provider. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://lists.blocklist.de/lists/ircbot.txt + name: IRC Bots + provider: Blocklist.de + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.blocklistde.parser +``` + +--- + + +### Mail + +Blocklist.DE Mail Collector is the bot responsible to get the report from source of information. All IP addresses which have been reported within the last 48 hours as having run attacks on the service Mail, Postfix. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://lists.blocklist.de/lists/mail.txt + name: Mail + provider: Blocklist.de + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.blocklistde.parser +``` + +--- + + +### SIP + +Blocklist.DE SIP Collector is the bot responsible to get the report from source of information. All IP addresses that tried to login in a SIP-, VOIP- or Asterisk-Server and are included in the IPs-List from http://www.infiltrated.net/ (Twitter). + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://lists.blocklist.de/lists/sip.txt + name: SIP + provider: Blocklist.de + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.blocklistde.parser +``` + +--- + + +### SSH + +Blocklist.DE SSH Collector is the bot responsible to get the report from source of information. All IP addresses which have been reported within the last 48 hours as having run attacks on the service SSH. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://lists.blocklist.de/lists/ssh.txt + name: SSH + provider: Blocklist.de + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.blocklistde.parser +``` + +--- + + +### Strong IPs + +Blocklist.DE Strong IPs Collector is the bot responsible to get the report from source of information. All IPs which are older then 2 month and have more then 5.000 attacks. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://lists.blocklist.de/lists/strongips.txt + name: Strong IPs + provider: Blocklist.de + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.blocklistde.parser +``` + +--- + + +## Blueliv + +### CrimeServer + +Blueliv Crimeserver Collector is the bot responsible to get the report through the API. + +**Public:** no + +**Revision:** 2018-01-20 + +**Documentation:** + +**Additional Information:** The service uses a different API for free users and paying subscribers. In 'CrimeServer' feed the difference lies in the data points present in the feed. The non-free API available from Blueliv contains, for this specific feed, following extra fields not present in the free API; "_id" - Internal unique ID "subType" - Subtype of the Crime Server "countryName" - Country name where the Crime Server is located, in English "city" - City where the Crime Server is located "domain" - Domain of the Crime Server "host" - Host of the Crime Server "createdAt" - Date when the Crime Server was added to Blueliv CrimeServer database "asnCidr" - Range of IPs that belong to an ISP (registered via Autonomous System Number (ASN)) "asnId" - Identifier of an ISP registered via ASN "asnDesc" Description of the ISP registered via ASN + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.blueliv.collector_crimeserver +parameters: + api_key: __APIKEY__ + name: CrimeServer + provider: Blueliv + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.blueliv.parser_crimeserver +``` + +--- + + +## CERT-Bund + +### CB-Report Malware infections via IMAP + +CERT-Bund sends reports for the malware-infected hosts. + +**Public:** no + +**Revision:** 2020-08-20 + +**Additional Information:** Traffic from malware related hosts contacting command-and-control servers is caught and sent to national CERT teams. There are two e-mail feeds with identical CSV structure -- one reports on general malware infections, the other on the Avalanche botnet. + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.mail.collector_mail_attach +parameters: + attach_regex: events.csv + extract_files: False + folder: INBOX + mail_host: __HOST__ + mail_password: __PASSWORD__ + mail_ssl: True + mail_user: __USERNAME__ + name: CB-Report Malware infections via IMAP + provider: CERT-Bund + rate_limit: 86400 + subject_regex: ^\\[CB-Report#.* Malware infections (\\(Avalanche\\) )?in country +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.generic.parser_csv +parameters: + columns: ["source.asn", "source.ip", "time.source", "classification.type", "malware.name", "source.port", "destination.ip", "destination.port", "destination.fqdn", "protocol.transport"] + default_url_protocol: http:// + defaults_fields: {'classification.type': 'infected-system'} + delimiter: , + skip_header: True + time_format: from_format|%Y-%m-%d %H:%M:%S +``` + +--- + + +## CERT.PL + +### N6 Stomp Stream + +N6 Collector - CERT.pl's N6 Collector - N6 feed via STOMP interface. Note that rate_limit does not apply for this bot as it is waiting for messages on a stream. + +**Public:** no + +**Revision:** 2023-09-23 + +**Documentation:** + +**Additional Information:** Contact cert.pl to get access to the feed. + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.stomp.collector +parameters: + auth_by_ssl_client_certificate: False + exchange: {insert your exchange point as given by CERT.pl} + name: N6 Stomp Stream + password: {insert n6 user's API key} + port: 61614 + provider: CERT.PL + server: n6stream.cert.pl + ssl_ca_certificate: {insert path to CA file for CERT.pl's n6} + username: {insert n6 user's login} +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.n6.parser_n6stomp +``` + +--- + + +## CINS Army + +### CINS Army List + +The CINS Army (CIArmy.com) list is a subset of the CINS Active Threat Intelligence ruleset, and consists of IP addresses that meet one of two basic criteria: 1) The IP's recent Rogue Packet score factor is very poor, or 2) The IP has tripped a designated number of 'trusted' alerts across a given number of our Sentinels deployed around the world. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: http://cinsscore.com/list/ci-badguys.txt + name: CINS Army List + provider: CINS Army + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.ci_army.parser +``` + +--- + + +## CZ.NIC + +### HaaS + +SSH attackers against HaaS (Honeypot as a Service) provided by CZ.NIC, z.s.p.o. The dump is published once a day. + +**Public:** yes + +**Revision:** 2020-07-22 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + extract_files: True + http_url: https://haas.nic.cz/stats/export/{time[%Y/%m/%Y-%m-%d]}.json.gz + http_url_formatting: {'days': -1} + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.cznic.parser_haas +``` + +--- + + +### Proki + +Aggregation of various sources on malicious IP addresses (malware spreaders or C&C servers). + +**Public:** no + +**Revision:** 2020-08-17 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://proki.csirt.cz/api/1/__APIKEY__/data/day/{time[%Y/%m/%d]} + http_url_formatting: {'days': -1} + name: Proki + provider: CZ.NIC + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.cznic.parser_proki +``` + +--- + + +## Calidog + +### CertStream + +HTTP Websocket Stream from certstream.calidog.io providing data from Certificate Transparency Logs. + +**Public:** yes + +**Revision:** 2018-06-15 + +**Documentation:** + +**Additional Information:** Be aware that this feed provides a lot of data and may overload your system quickly. + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.calidog.collector_certstream +parameters: + name: CertStream + provider: Calidog +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.calidog.parser_certstream +``` + +--- + + +## CleanMX + +### Phishing + +In order to download the CleanMX feed you need to use a custom user agent and register that user agent. + +**Public:** no + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_timeout_sec: 120 + http_url: http://support.clean-mx.de/clean-mx/xmlphishing?response=alive&domain= + http_user_agent: {{ your user agent }} + name: Phishing + provider: CleanMX + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.cleanmx.parser +``` + +--- + + +### Virus + +In order to download the CleanMX feed you need to use a custom user agent and register that user agent. + +**Public:** no + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_timeout_sec: 120 + http_url: http://support.clean-mx.de/clean-mx/xmlviruses?response=alive&domain= + http_user_agent: {{ your user agent }} + name: Virus + provider: CleanMX + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.cleanmx.parser +``` + +--- + + +## CyberCrime Tracker + +### Latest + +C2 servers + +**Public:** yes + +**Revision:** 2019-03-19 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://cybercrime-tracker.net/index.php + name: Latest + provider: CyberCrime Tracker + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.html_table.parser +parameters: + columns: ["time.source", "source.url", "source.ip", "malware.name", "__IGNORE__"] + default_url_protocol: http:// + defaults_fields: {'classification.type': 'c2-server'} + skip_table_head: True +``` + +--- + + +## DShield + +### AS Details + +No description provided by feed provider. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://dshield.org/asdetailsascii.html?as={{ AS Number }} + name: AS Details + provider: DShield + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.dshield.parser_asn +``` + +--- + + +### Block + +This list summarizes the top 20 attacking class C (/24) subnets over the last three days. The number of 'attacks' indicates the number of targets reporting scans from this subnet. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://www.dshield.org/block.txt + name: Block + provider: DShield + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.dshield.parser_block +``` + +--- + + +## Danger Rulez + +### Bruteforce Blocker + +Its main purpose is to block SSH bruteforce attacks via firewall. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: http://danger.rulez.sk/projects/bruteforceblocker/blist.php + name: Bruteforce Blocker + provider: Danger Rulez + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.danger_rulez.parser +``` + +--- + + +## Dataplane + +### DNS Recursion Desired + +Entries consist of fields with identifying characteristics of a source IP address that has been seen performing a DNS recursion desired query to a remote host. This report lists hosts that are suspicious of more than just port scanning. The host may be DNS server cataloging or searching for hosts to use for DNS-based DDoS amplification. + +**Public:** yes + +**Revision:** 2021-09-09 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://dataplane.org/dnsrd.txt + name: DNS Recursion Desired + provider: Dataplane + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.dataplane.parser +``` + +--- + + +### DNS Recursion Desired ANY + +Entries consist of fields with identifying characteristics of a source IP address that has been seen performing a DNS recursion desired IN ANY query to a remote host. This report lists hosts that are suspicious of more than just port scanning. The host may be DNS server cataloging or searching for hosts to use for DNS-based DDoS amplification. + +**Public:** yes + +**Revision:** 2021-09-09 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://dataplane.org/dnsrdany.txt + name: DNS Recursion Desired ANY + provider: Dataplane + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.dataplane.parser +``` + +--- + + +### DNS Version + +Entries consist of fields with identifying characteristics of a source IP address that has been seen performing a DNS CH TXT version.bind query to a remote host. This report lists hosts that are suspicious of more than just port scanning. The host may be DNS server cataloging or searching for vulnerable DNS servers. + +**Public:** yes + +**Revision:** 2021-09-09 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://dataplane.org/dnsversion.txt + name: DNS Version + provider: Dataplane + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.dataplane.parser +``` + +--- + + +### Protocol 41 + +Entries consist of fields with identifying characteristics of a host that has been detected to offer open IPv6 over IPv4 tunneling. This could allow for the host to be used a public proxy against IPv6 hosts. + +**Public:** yes + +**Revision:** 2021-09-09 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://dataplane.org/proto41.txt + name: Protocol 41 + provider: Dataplane + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.dataplane.parser +``` + +--- + + +### SIP Query + +Entries consist of fields with identifying characteristics of a source IP address that has been seen initiating a SIP OPTIONS query to a remote host. This report lists hosts that are suspicious of more than just port scanning. The hosts may be SIP server cataloging or conducting various forms of telephony abuse. Report is updated hourly. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://dataplane.org/sipquery.txt + name: SIP Query + provider: Dataplane + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.dataplane.parser +``` + +--- + + +### SIP Registration + +Entries consist of fields with identifying characteristics of a source IP address that has been seen initiating a SIP REGISTER operation to a remote host. This report lists hosts that are suspicious of more than just port scanning. The hosts may be SIP client cataloging or conducting various forms of telephony abuse. Report is updated hourly. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://dataplane.org/sipregistration.txt + name: SIP Registration + provider: Dataplane + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.dataplane.parser +``` + +--- + + +### SMTP Data + +Entries consist of fields with identifying characteristics of a host that has been seen initiating a SMTP DATA operation to a remote host. The source report lists hosts that are suspicious of more than just port scanning. The host may be SMTP server cataloging or conducting various forms of email abuse. + +**Public:** yes + +**Revision:** 2021-09-09 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://dataplane.org/smtpdata.txt + name: SMTP Data + provider: Dataplane + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.dataplane.parser +``` + +--- + + +### SMTP Greet + +Entries consist of fields with identifying characteristics of a host that has been seen initiating a SMTP HELO/EHLO operation to a remote host. The source report lists hosts that are suspicious of more than just port scanning. The host may be SMTP server cataloging or conducting various forms of email abuse. + +**Public:** yes + +**Revision:** 2021-09-09 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://dataplane.org/smtpgreet.txt + name: SMTP Greet + provider: Dataplane + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.dataplane.parser +``` + +--- + + +### SSH Client Connection + +Entries below consist of fields with identifying characteristics of a source IP address that has been seen initiating an SSH connection to a remote host. This report lists hosts that are suspicious of more than just port scanning. The hosts may be SSH server cataloging or conducting authentication attack attempts. Report is updated hourly. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://dataplane.org/sshclient.txt + name: SSH Client Connection + provider: Dataplane + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.dataplane.parser +``` + +--- + + +### SSH Password Authentication + +Entries below consist of fields with identifying characteristics of a source IP address that has been seen attempting to remotely login to a host using SSH password authentication. The report lists hosts that are highly suspicious and are likely conducting malicious SSH password authentication attacks. Report is updated hourly. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://dataplane.org/sshpwauth.txt + name: SSH Password Authentication + provider: Dataplane + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.dataplane.parser +``` + +--- + + +### Telnet Login + +Entries consist of fields with identifying characteristics of a host that has been seen initiating a telnet connection to a remote host. The source report lists hosts that are suspicious of more than just port scanning. The host may be telnet server cataloging or conducting authentication attack attempts. + +**Public:** yes + +**Revision:** 2021-09-09 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://dataplane.org/telnetlogin.txt + name: Telnet Login + provider: Dataplane + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.dataplane.parser +``` + +--- + + +### VNC/RFB Login + +Entries consist of fields with identifying characteristics of a host that has been seen initiating a VNC remote buffer session to a remote host. The source report lists hosts that are suspicious of more than just port scanning. The host may be VNC/RFB server cataloging or conducting authentication attack attempts. + +**Public:** yes + +**Revision:** 2021-09-09 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://dataplane.org/vncrfb.txt + name: VNC/RFB Login + provider: Dataplane + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.dataplane.parser +``` + +--- + + +## ESET + +### ETI Domains + +Domain data from ESET's TAXII API. + +**Public:** no + +**Revision:** 2020-06-30 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.eset.collector +parameters: + collection: ei.domains v2 (json) + endpoint: eti.eset.com + password: + time_delta: 3600 + username: +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.eset.parser +``` + +--- + + +### ETI URLs + +URL data from ESET's TAXII API. + +**Public:** no + +**Revision:** 2020-06-30 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.eset.collector +parameters: + collection: ei.urls (json) + endpoint: eti.eset.com + password: + time_delta: 3600 + username: +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.eset.parser +``` + +--- + + +## Fireeye + +### Malware Analysis System + +Process data from Fireeye mail and file analysis appliances. SHA1 and MD5 malware hashes are extracted and if there is network communication, also URLs and domains. + +**Public:** no + +**Revision:** 2021-05-03 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.fireeye.collector_mas +parameters: + host: + http_password: + http_username: + request_duration: +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.fireeye.parser +``` + +--- + + +## Fraunhofer + +### DGA Archive + +Fraunhofer DGA collector fetches data from Fraunhofer's domain generation archive. + +**Public:** no + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_password: {{ your password }} + http_url: https://dgarchive.caad.fkie.fraunhofer.de/today + http_username: {{ your username }} + name: DGA Archive + provider: Fraunhofer + rate_limit: 10800 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.fraunhofer.parser_dga +``` + +--- + + +## Have I Been Pwned + +### Enterprise Callback + +With the Enterprise Subscription of 'Have I Been Pwned' you are able to provide a callback URL and any new leak data is submitted to it. It is recommended to put a webserver with Authorization check, TLS etc. in front of the API collector. + +**Public:** no + +**Revision:** 2019-09-11 + +**Documentation:** + +**Additional Information:** A minimal nginx configuration could look like: +``` +server { + listen 443 ssl http2; + server_name [your host name]; + client_max_body_size 50M; + + ssl_certificate [path to your key]; + ssl_certificate_key [path to your certificate]; + + location /[your private url] { + if ($http_authorization != '[your private password]') { + return 403; + } + proxy_pass http://localhost:5001/intelmq/push; + proxy_read_timeout 30; + proxy_connect_timeout 30; + } +} +``` + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.api.collector_api +parameters: + name: Enterprise Callback + port: 5001 + provider: Have I Been Pwned +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.hibp.parser_callback +``` + +--- + + +## MalwarePatrol + +### DansGuardian + +Malware block list with URLs + +**Public:** no + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://lists.malwarepatrol.net/cgi/getfile?receipt={{ your API key }}&product=8&list=dansguardian + name: DansGuardian + provider: MalwarePatrol + rate_limit: 180000 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.malwarepatrol.parser_dansguardian +``` + +--- + + +## MalwareURL + +### Latest malicious activity + +Latest malicious domains/IPs. + +**Public:** yes + +**Revision:** 2018-02-05 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://www.malwareurl.com/ + name: Latest malicious activity + provider: MalwareURL + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.malwareurl.parser +``` + +--- + + +## McAfee Advanced Threat Defense + +### Sandbox Reports + +Processes reports from McAfee's sandboxing solution via the openDXL API. + +**Public:** no + +**Revision:** 2018-07-05 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.opendxl.collector +parameters: + dxl_config_file: {{ location of dxl configuration file }} + dxl_topic: /mcafee/event/atd/file/report +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.mcafee.parser_atd +parameters: + verdict_severity: 4 +``` + +--- + + +## Microsoft + +### BingMURLs via Interflow + +Collects Malicious URLs detected by Bing from the Interflow API. The feed is available via Microsoft’s Government Security Program (GSP). + +**Public:** no + +**Revision:** 2018-05-29 + +**Documentation:** + +**Additional Information:** Depending on the file sizes you may need to increase the parameter 'http_timeout_sec' of the collector. + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.microsoft.collector_interflow +parameters: + api_key: {{ your API key }} + file_match: ^bingmurls_ + http_timeout_sec: 300 + name: BingMURLs via Interflow + not_older_than: 2 days + provider: Microsoft + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.microsoft.parser_bingmurls +``` + +--- + + +### CTIP C2 via Azure + +Collects the CTIP C2 feed from a shared Azure Storage. The feed is available via Microsoft’s Government Security Program (GSP). + +**Public:** no + +**Revision:** 2020-05-29 + +**Documentation:** + +**Additional Information:** The cache is needed for memorizing which files have already been processed, the TTL should be higher than the oldest file available in the storage (currently the last three days are available). The connection string contains endpoint as well as authentication information. + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.microsoft.collector_azure +parameters: + connection_string: {{ your connection string }} + container_name: ctip-c2 + name: CTIP C2 via Azure + provider: Microsoft + rate_limit: 3600 + redis_cache_db: 5 + redis_cache_host: 127.0.0.1 + redis_cache_port: 6379 + redis_cache_ttl: 864000 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.microsoft.parser_ctip +``` + +--- + + +### CTIP Infected via Azure + +Collects the CTIP (Sinkhole data) from a shared Azure Storage. The feed is available via Microsoft’s Government Security Program (GSP). + +**Public:** no + +**Revision:** 2022-06-01 + +**Documentation:** + +**Additional Information:** The cache is needed for memorizing which files have already been processed, the TTL should be higher than the oldest file available in the storage (currently the last three days are available). The connection string contains endpoint as well as authentication information. As many IPs occur very often in the data, you may want to use a deduplicator specifically for the feed. More information about the feed can be found on www.dcuctip.com after login with your GSP account. + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.microsoft.collector_azure +parameters: + connection_string: {{ your connection string }} + container_name: ctip-infected-summary + name: CTIP Infected via Azure + provider: Microsoft + rate_limit: 3600 + redis_cache_db: 5 + redis_cache_host: 127.0.0.1 + redis_cache_port: 6379 + redis_cache_ttl: 864000 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.microsoft.parser_ctip +``` + +--- + + +### CTIP Infected via Interflow + +Collects the CTIP Infected feed (Sinkhole data for your country) files from the Interflow API.The feed is available via Microsoft’s Government Security Program (GSP). + +**Public:** no + +**Revision:** 2018-03-06 + +**Documentation:** + +**Additional Information:** Depending on the file sizes you may need to increase the parameter 'http_timeout_sec' of the collector. As many IPs occur very often in the data, you may want to use a deduplicator specifically for the feed. More information about the feed can be found on www.dcuctip.com after login with your GSP account. + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.microsoft.collector_interflow +parameters: + api_key: {{ your API key }} + file_match: ^ctip_ + http_timeout_sec: 300 + name: CTIP Infected via Interflow + not_older_than: 2 days + provider: Microsoft + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.microsoft.parser_ctip +``` + +--- + + +## Netlab 360 + +### DGA + +This feed lists DGA family, Domain, Start and end of valid time(UTC) of a number of DGA families. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: http://data.netlab.360.com/feeds/dga/dga.txt + name: DGA + provider: Netlab 360 + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.netlab_360.parser +``` + +--- + + +### Hajime Scanner + +This feed lists IP address for know Hajime bots network. These IPs data are obtained by joining the DHT network and interacting with the Hajime node + +**Public:** yes + +**Revision:** 2019-08-01 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://data.netlab.360.com/feeds/hajime-scanner/bot.list + name: Hajime Scanner + provider: Netlab 360 + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.netlab_360.parser +``` + +--- + + +### Magnitude EK + +This feed lists FQDN and possibly the URL used by Magnitude Exploit Kit. Information also includes the IP address used for the domain and last time seen. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: http://data.netlab.360.com/feeds/ek/magnitude.txt + name: Magnitude EK + provider: Netlab 360 + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.netlab_360.parser +``` + +--- + + +## OpenPhish + +### Premium Feed + +OpenPhish is a fully automated self-contained platform for phishing intelligence. It identifies phishing sites and performs intelligence analysis in real time without human intervention and without using any external resources, such as blacklists. + +**Public:** no + +**Revision:** 2018-02-06 + +**Documentation:** + +**Additional Information:** Discounts available for Government and National CERTs a well as for Nonprofit and Not-for-Profit organizations. + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_password: {{ your password }} + http_url: https://openphish.com/prvt-intell/ + http_username: {{ your username }} + name: Premium Feed + provider: OpenPhish + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.openphish.parser_commercial +``` + +--- + + +### Public feed + +OpenPhish is a fully automated self-contained platform for phishing intelligence. It identifies phishing sites and performs intelligence analysis in real time without human intervention and without using any external resources, such as blacklists. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://www.openphish.com/feed.txt + name: Public feed + provider: OpenPhish + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.openphish.parser +``` + +--- + + +## PhishTank + +### Online + +PhishTank is a collaborative clearing house for data and information about phishing on the Internet. + +**Public:** no + +**Revision:** 2022-11-21 + +**Documentation:** + +**Additional Information:** Updated hourly as per the documentation. Download is possible without API key, but limited to few downloads per day. + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + extract_files: True + http_url: https://data.phishtank.com/data/{{ your API key }}/online-valid.json.gz + name: Online + provider: PhishTank + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.phishtank.parser +``` + +--- + + +## PrecisionSec + +### Agent Tesla + +Agent Tesla IoCs, URLs where the malware is hosted. + +**Public:** yes + +**Revision:** 2019-04-02 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://precisionsec.com/threat-intelligence-feeds/agent-tesla/ + name: Agent Tesla + provider: PrecisionSec + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.html_table.parser +parameters: + columns: ["source.ip|source.url", "time.source"] + default_url_protocol: http:// + defaults_fields: {'classification.type': 'malware-distribution'} + skip_table_head: True +``` + +--- + + +## Shadowserver + +### Via API + +Shadowserver sends out a variety of reports to subscribers, see documentation. + +**Public:** no + +**Revision:** 2020-01-08 + +**Documentation:** + +**Additional Information:** This configuration fetches user-configurable reports from the Shadowserver Reports API. For a list of reports, have a look at the Shadowserver collector and parser documentation. + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.shadowserver.collector_reports_api +parameters: + api_key: + country: + rate_limit: 86400 + redis_cache_db: 12 + redis_cache_host: 127.0.0.1 + redis_cache_port: 6379 + redis_cache_ttl: 864000 + secret: + types: +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.shadowserver.parser_json +``` + +--- + + +### Via IMAP + +Shadowserver sends out a variety of reports (see https://www.shadowserver.org/wiki/pmwiki.php/Services/Reports). + +**Public:** no + +**Revision:** 2018-01-20 + +**Documentation:** + +**Additional Information:** The configuration retrieves the data from a e-mails via IMAP from the attachments. + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.mail.collector_mail_attach +parameters: + attach_regex: csv.zip + extract_files: True + folder: INBOX + mail_host: __HOST__ + mail_password: __PASSWORD__ + mail_ssl: True + mail_user: __USERNAME__ + name: Via IMAP + provider: Shadowserver + rate_limit: 86400 + subject_regex: __REGEX__ +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.shadowserver.parser +``` + +--- + + +### Via Request Tracker + +Shadowserver sends out a variety of reports (see https://www.shadowserver.org/wiki/pmwiki.php/Services/Reports). + +**Public:** no + +**Revision:** 2018-01-20 + +**Documentation:** + +**Additional Information:** The configuration retrieves the data from a RT/RTIR ticketing instance via the attachment or an download. + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.rt.collector_rt +parameters: + attachment_regex: \\.csv\\.zip$ + extract_attachment: True + extract_download: False + http_password: {{ your HTTP Authentication password or null }} + http_username: {{ your HTTP Authentication username or null }} + password: __PASSWORD__ + provider: Shadowserver + rate_limit: 3600 + search_not_older_than: {{ relative time or null }} + search_owner: nobody + search_queue: Incident Reports + search_requestor: autoreports@shadowserver.org + search_status: new + search_subject_like: \[__COUNTRY__\] Shadowserver __COUNTRY__ + set_status: open + take_ticket: True + uri: http://localhost/rt/REST/1.0 + url_regex: https://dl.shadowserver.org/[a-zA-Z0-9?_-]* + user: __USERNAME__ +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.shadowserver.parser +``` + +--- + + +## Shodan + +### Country Stream + +Collects the Shodan stream for one or multiple countries from the Shodan API. + +**Public:** no + +**Revision:** 2021-03-22 + +**Documentation:** + +**Additional Information:** A Shodan account with streaming permissions is needed. + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.shodan.collector_stream +parameters: + api_key: + countries: + error_retry_delay: 0 + name: Country Stream + provider: Shodan +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.shodan.parser +parameters: + error_retry_delay: 0 + ignore_errors: False + minimal_mode: False +``` + +--- + + +## Spamhaus + +### ASN Drop + +ASN-DROP contains a list of Autonomous System Numbers controlled by spammers or cyber criminals, as well as "hijacked" ASNs. ASN-DROP can be used to filter BGP routes which are being used for malicious purposes. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://www.spamhaus.org/drop/asndrop.txt + name: ASN Drop + provider: Spamhaus + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.spamhaus.parser_drop +``` + +--- + + +### CERT + +Spamhaus CERT Insight Portal. Access limited to CERTs and CSIRTs with national or regional responsibility. . + +**Public:** no + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: {{ your CERT portal URL }} + name: CERT + provider: Spamhaus + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.spamhaus.parser_cert +``` + +--- + + +### Drop + +The DROP list will not include any IP address space under the control of any legitimate network - even if being used by "the spammers from hell". DROP will only include netblocks allocated directly by an established Regional Internet Registry (RIR) or National Internet Registry (NIR) such as ARIN, RIPE, AFRINIC, APNIC, LACNIC or KRNIC or direct RIR allocations. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://www.spamhaus.org/drop/drop.txt + name: Drop + provider: Spamhaus + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.spamhaus.parser_drop +``` + +--- + + +### Dropv6 + +The DROPv6 list includes IPv6 ranges allocated to spammers or cyber criminals. DROPv6 will only include IPv6 netblocks allocated directly by an established Regional Internet Registry (RIR) or National Internet Registry (NIR) such as ARIN, RIPE, AFRINIC, APNIC, LACNIC or KRNIC or direct RIR allocations. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://www.spamhaus.org/drop/dropv6.txt + name: Dropv6 + provider: Spamhaus + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.spamhaus.parser_drop +``` + +--- + + +### EDrop + +EDROP is an extension of the DROP list that includes sub-allocated netblocks controlled by spammers or cyber criminals. EDROP is meant to be used in addition to the direct allocations on the DROP list. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://www.spamhaus.org/drop/edrop.txt + name: EDrop + provider: Spamhaus + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.spamhaus.parser_drop +``` + +--- + + +## Strangereal Intel + +### DailyIOC + +Daily IOC from tweets and articles + +**Public:** yes + +**Revision:** 2019-12-05 + +**Documentation:** + +**Additional Information:** collector's `extra_fields` parameter may be any of fields from the github `content API response `_ + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.github_api.collector_github_contents_api +parameters: + personal_access_token: https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token + regex: .*.json + repository: StrangerealIntel/DailyIOC +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.github_feed +``` + +--- + + +## Sucuri + +### Hidden IFrames + +Latest hidden iframes identified on compromised web sites. + +**Public:** yes + +**Revision:** 2018-01-28 + +**Documentation:** + +**Additional Information:** Please note that the parser only extracts the hidden iframes and the conditional redirects, not the encoded javascript. + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: http://labs.sucuri.net/?malware + name: Hidden IFrames + provider: Sucuri + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.sucuri.parser +``` + +--- + + +## Surbl + +### Malicious Domains + +Detected malicious domains. Note that you have to opened up Sponsored Datafeed Service (SDS) access to the SURBL data via rsync for your IP address. + +**Public:** no + +**Revision:** 2018-09-04 + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.rsync.collector_rsync +parameters: + file: wild.surbl.org.rbldnsd + rsync_path: blacksync.prolocation.net::surbl-wild/ +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.surbl.parser +``` + +--- + + +## Team Cymru + +### CAP + +Team Cymru provides daily lists of compromised or abused devices for the ASNs and/or netblocks with a CSIRT's jurisdiction. This includes such information as bot infected hosts, command and control systems, open resolvers, malware urls, phishing urls, and brute force attacks + +**Public:** no + +**Revision:** 2018-01-20 + +**Documentation:** + +**Additional Information:** "Two feeds types are offered: + * The new https://www.cymru.com/$certname/$certname_{time[%Y%m%d]}.txt + * and the old https://www.cymru.com/$certname/infected_{time[%Y%m%d]}.txt + Both formats are supported by the parser and the new one is recommended. + As of 2019-09-12 the old format will be retired soon." + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_password: {{ your password }} + http_url: https://www.cymru.com/$certname/$certname_{time[%Y%m%d]}.txt + http_url_formatting: True + http_username: {{ your username }} + name: CAP + provider: Team Cymru + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.cymru.parser_cap_program +``` + +--- + + +### Full Bogons IPv4 + +Fullbogons are a larger set which also includes IP space that has been allocated to an RIR, but not assigned by that RIR to an actual ISP or other end-user. IANA maintains a convenient IPv4 summary page listing allocated and reserved netblocks, and each RIR maintains a list of all prefixes that they have assigned to end-users. Our bogon reference pages include additional links and resources to assist those who wish to properly filter bogon prefixes within their networks. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://www.team-cymru.org/Services/Bogons/fullbogons-ipv4.txt + name: Full Bogons IPv4 + provider: Team Cymru + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.cymru.parser_full_bogons +``` + +--- + + +### Full Bogons IPv6 + +Fullbogons are a larger set which also includes IP space that has been allocated to an RIR, but not assigned by that RIR to an actual ISP or other end-user. IANA maintains a convenient IPv4 summary page listing allocated and reserved netblocks, and each RIR maintains a list of all prefixes that they have assigned to end-users. Our bogon reference pages include additional links and resources to assist those who wish to properly filter bogon prefixes within their networks. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://www.team-cymru.org/Services/Bogons/fullbogons-ipv6.txt + name: Full Bogons IPv6 + provider: Team Cymru + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.cymru.parser_full_bogons +``` + +--- + + +## Threatminer + +### Recent domains + +Latest malicious domains. + +**Public:** yes + +**Revision:** 2018-02-06 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://www.threatminer.org/ + name: Recent domains + provider: Threatminer + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.threatminer.parser +``` + +--- + + +## Turris + +### Greylist + +The data are processed and classified every week and behaviour of IP addresses that accessed a larger number of Turris routers is evaluated. The result is a list of addresses that have tried to obtain information about services on the router or tried to gain access to them. The list also contains a list of tags for each address which indicate what behaviour of the address was observed. + +**Public:** yes + +**Revision:** 2023-06-13 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://view.sentinel.turris.cz/greylist-data/greylist-latest.csv + name: Greylist + provider: Turris + rate_limit: 43200 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.turris.parser +``` + +--- + + +### Greylist with PGP signature verification + +The data are processed and classified every week and behaviour of +IP addresses that accessed a larger number of Turris routers is evaluated. +The result is a list of addresses that have tried to obtain information about +services on the router or tried to gain access to them. The list also +contains a list of tags for each address which +indicate what behaviour of the address was observed. + +The Turris Greylist feed provides PGP signatures for the provided files. +You will need to import the public PGP key from the linked documentation +page, currently available at +https://pgp.mit.edu/pks/lookup?op=vindex&search=0x10876666 +or from below. +See the URL Fetcher Collector documentation for more information on +PGP signature verification. + +PGP Public key: + +.. code-block:: + + -----BEGIN PGP PUBLIC KEY BLOCK----- + Version: SKS 1.1.6 + Comment: Hostname: pgp.mit.edu + + mQINBFRl7D8BEADaRFoDa/+r27Gtqrdn8sZL4aSYTU4Q3gDr3TfigK8H26Un/Y79a/DUL1o0 + o8SRae3uwVcjJDHZ6KDnxThbqF7URfpuCcCYxOs8p/eu3dSueqEGTODHWF4ChIh2japJDc4t + 3FQHbIh2e3GHotVqJGhvxMmWqBFoZ/mlWvhjs99FFBZ87qbUNk7l1UAGEXeWeECgz9nGox40 + 3YpCgEsnJJsKC53y5LD/wBf4z+z0GsLg2GMRejmPRgrkSE/d9VjF/+niifAj2ZVFoINSVjjI + 8wQFc8qLiExdzwLdgc+ggdzk5scY3ugI5IBt1zflxMIOG4BxKj/5IWsnhKMG2NLVGUYOODoG + pKhcY0gCHypw1bmkp2m+BDVyg4KM2fFPgQ554DAX3xdukMCzzZyBxR3UdT4dN7xRVhpph3Y2 + Amh1E/dpde9uwKFk1oRHkRZ3UT1XtpbXtFNY0wCiGXPt6KznJAJcomYFkeLHjJo3nMK0hISV + GSNetVLfNWlTkeo93E1innbSaDEN70H4jPivjdVjSrLtIGfr2IudUJI84dGmvMxssWuM2qdg + FSzoTHw9UE9KT3SltKPS+F7u9x3h1J492YaVDncATRjPZUBDhbvo6Pcezhup7XTnI3gbRQc2 + oEUDb933nwuobHm3VsUcf9686v6j8TYehsbjk+zdA4BoS/IdCwARAQABtC5UdXJyaXMgR3Jl + eWxpc3QgR2VuZXJhdG9yIDxncmV5bGlzdEB0dXJyaXMuY3o+iQI4BBMBAgAiBQJUZew/AhsD + BgsJCAcDAgYVCAIJCgsEFgIDAQIeAQIXgAAKCRDAQrU3EIdmZoH4D/9Jo6j9RZxCAPTaQ9WZ + WOdb1Eqd/206bObEX+xJAago+8vuy+waatHYBM9/+yxh0SIg2g5whd6J7A++7ePpt5XzX6hq + bzdG8qGtsCRu+CpDJ40UwHep79Ck6O/A9KbZcZW1z/DhbYT3z/ZVWALy4RtgmyC67Vr+j/C7 + KNQ529bs3kP9AzvEIeBC4wdKl8dUSuZIPFbgf565zRNKLtHVgVhiuDPcxKmBEl4/PLYF30a9 + 5Tgp8/PNa2qp1DV/EZjcsxvSRIZB3InGBvdKdSzvs4N/wLnKWedj1GGm7tJhSkJa4MLBSOIx + yamhTS/3A5Cd1qoDhLkp7DGVXSdgEtpoZDC0jR7nTS6pXojcgQaF7SfJ3cjZaLI5rjsx0YLk + G4PzonQKCAAQG1G9haCDniD8NrrkZ3eFiafoKEECRFETIG0BJHjPdSWcK9jtNCupBYb7JCiz + Q0hwLh2wrw/wCutQezD8XfsBFFIQC18TsJAVgdHLZnGYkd5dIbV/1scOcm52w6EGIeMBBYlB + J2+JNukH5sJDA6zAXNl2I1H1eZsP4+FSNIfB6LdovHVPAjn7qXCw3+IonnQK8+g8YJkbbhKJ + sPejfg+ndpe5u0zX+GvQCFBFu03muANA0Y/OOeGIQwU93d/akN0P1SRfq+bDXnkRIJQOD6XV + 0ZPKVXlNOjy/z2iN2A== + =wjkM + -----END PGP PUBLIC KEY BLOCK----- + + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://www.turris.cz/greylist-data/greylist-latest.csv + name: Greylist + provider: Turris + rate_limit: 43200 + signature_url: https://www.turris.cz/greylist-data/greylist-latest.csv.asc + verify_pgp_signatures: True +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.turris.parser +``` + +--- + + +## University of Toulouse + +### Blacklist + +Various blacklist feeds + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + extract_files: true + http_url: https://dsi.ut-capitole.fr/blacklists/download/{collection name}.tar.gz + name: Blacklist + provider: University of Toulouse + rate_limit: 43200 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.generic.parser_csv +parameters: + columns: {depends on a collection} + defaults_fields: {'classification.type': '{depends on a collection}'} + delimiter: false +``` + +--- + + +## VXVault + +### URLs + +This feed provides IP addresses hosting Malware. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: http://vxvault.net/URL_List.php + name: URLs + provider: VXVault + rate_limit: 3600 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.vxvault.parser +``` + +--- + + +## ViriBack + +### C2 Tracker + +Latest detected C2 servers. + +**Public:** yes + +**Revision:** 2022-11-15 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://tracker.viriback.com/dump.php + name: C2 Tracker + provider: ViriBack + rate_limit: 86400 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.generic.csv_parser +parameters: + columns: ["malware.name", "source.url", "source.ip", "time.source"] + defaults_fields: {'classification.type': 'malware-distribution'} + skip_header: True +``` + +--- + + +## WebInspektor + +### Unsafe sites + +Latest detected unsafe sites. + +**Public:** yes + +**Revision:** 2018-03-09 + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: https://app.webinspector.com/public/recent_detections/ + name: Unsafe sites + provider: WebInspektor + rate_limit: 60 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.webinspektor.parser +``` + +--- + + +## ZoneH + +### Defacements + +all the information contained in Zone-H's cybercrime archive were either collected online from public sources or directly notified anonymously to us. + +**Public:** no + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.mail.collector_mail_attach +parameters: + attach_regex: csv + extract_files: False + folder: INBOX + mail_host: __HOST__ + mail_password: __PASSWORD__ + mail_ssl: True + mail_user: __USERNAME__ + name: Defacements + provider: ZoneH + rate_limit: 3600 + sent_from: datazh@zone-h.org + subject_regex: Report +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.zoneh.parser +``` + +--- + + +## cAPTure + +### Ponmocup Domains CIF Format + +List of ponmocup malware redirection domains and infected web-servers from cAPTure. See also http://security-research.dyndns.org/pub/botnet-links.htm and http://c-apt-ure.blogspot.com/search/label/ponmocup The data in the CIF format is not equal to the Shadowserver CSV format. Reasons are unknown. + +**Public:** yes + +**Revision:** 2018-01-20 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: http://security-research.dyndns.org/pub/malware-feeds/ponmocup-infected-domains-CIF-latest.txt + name: Infected Domains + provider: cAPTure + rate_limit: 10800 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.dyn.parser +``` + +--- + + +### Ponmocup Domains Shadowserver Format + +List of ponmocup malware redirection domains and infected web-servers from cAPTure. See also http://security-research.dyndns.org/pub/botnet-links.htm and http://c-apt-ure.blogspot.com/search/label/ponmocup The data in the Shadowserver CSV is not equal to the CIF format format. Reasons are unknown. + +**Public:** yes + +**Revision:** 2020-07-08 + +**Documentation:** + + +**Collector configuration** + +```yaml +module: intelmq.bots.collectors.http.collector_http +parameters: + http_url: http://security-research.dyndns.org/pub/malware-feeds/ponmocup-infected-domains-shadowserver.csv + name: Infected Domains + provider: cAPTure + rate_limit: 10800 +``` + +**Parser configuration** + +```yaml +module: intelmq.bots.parsers.generic.parser_csv +parameters: + columns: ["time.source", "source.ip", "source.fqdn", "source.urlpath", "source.port", "protocol.application", "extra.tag", "extra.redirect_target", "extra.category"] + compose_fields: {'source.url': 'http://{0}{1}'} + defaults_fields: {'classification.type': 'malware-distribution'} + delimiter: , + skip_header: True +``` + +--- + + diff --git a/docs/user/hardware-requirements.rst b/docs/user/hardware-requirements.rst deleted file mode 100644 index fb2fff42e..000000000 --- a/docs/user/hardware-requirements.rst +++ /dev/null @@ -1,82 +0,0 @@ -.. - SPDX-FileCopyrightText: 2021 Sebastian Wagner - SPDX-License-Identifier: AGPL-3.0-or-later - -##################### -Hardware Requirements -##################### - -Do you ask yourself how much RAM do you need to give your new IntelMQ virtual machine? - -The honest answer is simple and pointless: It depends ;) - -.. contents:: - -**************************************** -IntelMQ and the messaging queue (broker) -**************************************** - -IntelMQ uses a messaging queue to move the messages between the bots. -All bot instances can only process one message at a time, therefore all other messages need to wait in the queue. -As not all bots are equally fast, the messages will naturally "queue up" before the slower ones. -Further, parsers produce many events with just one message (the report) as input. - -The following estimations assume Redis as messaging broker which is the default for IntelMQ. -When RabbitMQ is used, the required resources will differ, and RabbitMQ can handle system overload and therefore a shortage of memory. - -As Redis stores all data in memory, the data which is processed at any point in time must fit there, including overheads. Please note that IntelMQ does *neither store nor cache* any input data. These estimates therefore only relate to the processing step, not the storage. - -For a minimal system, these requirements suffice: - -- 4 GB of RAM -- 2 CPUs -- 10 GB disk size - -**Depending on your data input, you will need the twentiethfold of the input data size as memory for processing.** - -When using `Redis persistence `_, you will additionally need twice as much memory for Redis. - -Disk space -========== - -Disk space is only relevant if you save your data to a file, which is not recommended for production setups, and only useful for testing and evaluation. - -Do not forget to rotate your logs or use syslog, especially if you use the logging level "DEBUG". -logrotate is in use by default for all installation with deb/rpm packages. When other means of installation are used (pip, manual), configure log rotation manually. See :ref:`configuration-logging`. - -Background on memory -==================== -For experimentation, we used multiple Shadowserver Poodle reports for demonstration purpose, totaling in 120 MB of data. All numbers are estimates and are rounded. -In memory, the report data requires 160 MB. After parsing, the memory usage increases to 850 MB in total, as every data line is stored as JSON, with additional information plus the original data encoded in Base 64. -The further processing steps depend on the configuration, but you can estimate that caches (for lookups and deduplication) and other added information cause an additional size increase of about 2x. -Once a dataset finished processing in IntelMQ, it is no longer stored in memory. Therefore, the memory is only needed to catch high load. - -The above numbers result in a factor of 14 for input data size vs. memory required by Redis. Assuming some overhead and memory for the bots' processes, a factor of 20 seems sensible. - -To reduce the amount of required memory and disk size, you can optionally remove the `raw` data field, see :ref:`faq-remove-raw-data` in the FAQ. - -********************* -Additional components -********************* - -If some of the `optional` components of the :doc:`ecosystem` are in use, they can add additional hardware requirements. - -Those components do not add relevant requirements: - -- IntelMQ API: It is just an API for `intelmqctl`. -- IntelMQ Manager: Only contains static files served by the webserver. -- IntelMQ Webinput CSV: Just a webinterface to insert data. Requires the amount of processed data to fit in memory, see above. -- Stats Portal: The aggregation step and Graphana require some resources, but no exact numbers are known. -- Malware Name Mapping -- Docker: The docker layer adds only minimal hardware requirements. - -EventDB -======= - -When storing data in databases (such as MongoDB, PostgreSQL, ElasticSearch), it is recommended to do this on separate machines for operational reasons. -Using a different machine results in a separation of stream processing to data storage and allows for a specialized system optimization for both use-cases. - -IntelMQ cb mailgen -============================= - -While the Fody backend and frontend do not have significant requirements, the `RIPE import tool of the certbund-contact `_ requires about 8 GB of memory as of March 2021. diff --git a/docs/user/installation.rst b/docs/user/installation.rst deleted file mode 100644 index 0c026cb78..000000000 --- a/docs/user/installation.rst +++ /dev/null @@ -1,215 +0,0 @@ -.. - SPDX-FileCopyrightText: 2017-2023 Sebastian Wagner - SPDX-License-Identifier: AGPL-3.0-or-later - -Installation -============ - -.. contents:: - -Please report any errors an suggest improvements at `IntelMQ Issues `_. Thanks! - -For upgrade instructions, see :doc:`upgrade`. -For testing pre-releases see also :ref:`testing`. - -Following any one of the installation methods will setup the IntelMQ base. -Some bots may have additional special dependencies which are mentioned in their :doc:`own documentation `. - -The following installation methods are available: - -* native `.deb`/`.rpm` packages -* Docker, with and without docker-compose -* Python package from PyPI -* From the git-repository, see :ref:`development environment` - - -Base Requirements ------------------ - -The following instructions assume the following requirements. Python versions >= 3.7 are supported. - -Supported and recommended operating systems are: - -* Debian Debian 11 Bullseye, Debian 12 Bookworm -* openSUSE Tumbleweed, Leap 15.5 -* Ubuntu: 20.04 Focal, 22.04 Jammy -* For the Docker-installation: Docker Engine: 18.x and higher - -Other distributions which are (most probably) supported include AlmaLinux, CentOS, Fedora, FreeBSD 12, RHEL and RockyLinux. - -A short guide on hardware requirements can be found on the page :doc:`hardware-requirements`. - - -Native deb/rpm packages ------------------------ - -These are the operating systems which are currently supported by packages: - -* **Debian 11** Bullseye -* **openSUSE Tumbleweed** -* **Ubuntu 20.04** Focal Fossa - - * Enable the universe repositories by appending ``universe`` in ``/etc/apt/sources.list`` to ``deb http://[...].archive.ubuntu.com/ubuntu/ focal main`` first. - - * `intelmq-api` is only available with hug-based version 3.0.1, not the latest fastapi-based 3.1.0). - -Get the installation instructions for your operating system here: `Installation Native Packages `_. -The instructions show how to add the repository and install the `intelmq` package. You can also install the `intelmq-manager` package to get the `Web-Frontend IntelMQ Manager `_. - -.. _installation_docker: - -Docker ------- - -Attention: Currently you can't manage your botnet via :doc:`intelmqctl`. You need to use `IntelMQ-Manager `_ currently! - -The latest IntelMQ image is hosted on `Docker Hub `_ and the image build instructions are in our `intelmq-docker repository `. - -Follow `Docker Install `_ and -`Docker-Compose Install `_ instructions. - -Before you start using docker-compose or any docker related tools, make sure docker is running: - -.. code-block:: bash - - # To start the docker daemon - systemctl start docker.service - # To enable the docker daemon for the future - systemctl enable docker.service - -Now we can download IntelMQ and start the containers. -Navigate to your preferred installation directory and run the following commands: - -.. code-block:: bash - - git clone https://github.com/certat/intelmq-docker.git --recursive - cd intelmq-docker - sudo docker-compose pull - sudo docker-compose up - -Your installation should be successful now. You're now able to visit ``http://127.0.0.1:1337/`` to access the intelmq-manager. -You have to login with the username ``intelmq`` and the password ``intelmq``, if you want to change the username or password, -you can do this by adding the environment variables ``INTELMQ_API_USER`` for the username and ``INTELMQ_API_PASS`` for the -password. - -NOTE: If you get an `Permission denied`, you should use ``chown -R $USER:$USER example_config``. - - -With pip from PyPI ------------------- - -Requirements -^^^^^^^^^^^^ - -Ubuntu / Debian - -.. code-block:: bash - - apt install python3-pip python3-dnspython python3-psutil python3-redis python3-requests python3-termstyle python3-tz python3-dateutil redis-server bash-completion jq - # optional dependencies - apt install python3-pymongo python3-psycopg2 - -CentOS 7 / RHEL 7: - -.. code-block:: bash - - yum install epel-release - yum install python36 python36-dns python36-requests python3-setuptools redis bash-completion jq - yum install gcc gcc-c++ python36-devel - # optional dependencies - yum install python3-psycopg2 - -.. note:: - - We no longer support already end-of-life Python 3.6, which is the last Python version officially - packaged for CentOS Linux 7. You can either use alternative Python source, or stay on the IntelMQ - 3.0.2. - -CentOS 8: - -.. code-block:: bash - - dnf install epel-release - dnf install python3-dateutil python3-dns python3-pip python3-psutil python3-redis python3-requests redis bash-completion jq - # optional dependencies - dnf install python3-psycopg2 python3-pymongo - -openSUSE: - -.. code-block:: bash - - zypper install python3-dateutil python3-dnspython python3-psutil python3-redis python3-requests python3-python-termstyle redis bash-completion jq - # optional dependencies - zypper in python3-psycopg2 python3-pymongo - -Installation -^^^^^^^^^^^^ - -The base directory is ``/opt/intelmq/``, if the environment variable ``INTELMQ_ROOT_DIR`` is not set to something else, see :ref:`configuration-paths` for more information. - -.. code-block:: bash - - sudo -i - pip3 install intelmq - useradd -d /opt/intelmq -U -s /bin/bash intelmq - sudo intelmqsetup - -`intelmqsetup` will create all necessary directories, provides a default configuration for new setups. See the :ref:`configuration` for more information on them and how to influence them. - - -Docker without docker-compose ------------------------------ - -If not already installed, please install `Docker `_. - -Navigate to your preferred installation directory and run ``git clone https://github.com/certat/intelmq-docker.git --recursive``. - -You need to prepare some volumes & configs. Edit the left-side after -v, to change paths. - -Change ``redis_host`` to a running redis-instance. Docker will resolve it automatically. -All containers are connected using `Docker Networks `_. - -In order to work with your current infrastructure, you need to specify some environment variables - -.. code-block:: bash - - sudo docker pull redis:latest - - sudo docker pull certat/intelmq-full:latest - - sudo docker pull certat/intelmq-nginx:latest - - sudo docker network create intelmq-internal - - sudo docker run -v ~/intelmq/example_config/redis/redis.conf:/redis.conf \ - --network intelmq-internal \ - --name redis \ - redis:latest - - sudo docker run --network intelmq-internal \ - --name nginx \ - certat/intelmq-nginx:latest - - sudo docker run -e INTELMQ_IS_DOCKER="true" \ - -e INTELMQ_SOURCE_PIPELINE_BROKER: "redis" \ - -e INTELMQ_PIPELINE_BROKER: "redis" \ - -e INTELMQ_DESTIONATION_PIPELINE_BROKER: "redis" \ - -e INTELMQ_PIPELINE_HOST: redis \ - -e INTELMQ_SOURCE_PIPELINE_HOST: redis \ - -e INTELMQ_DESTINATION_PIPELINE_HOST: redis \ - -e INTELMQ_REDIS_CACHE_HOST: redis \ - -v $(pwd)/example_config/intelmq/etc/:/etc/intelmq/etc/ \ - -v $(pwd)/example_config/intelmq-api/config.json:/etc/intelmq/api-config.json \ - -v $(pwd)/intelmq_logs:/etc/intelmq/var/log \ - -v $(pwd)/intelmq_output:/etc/intelmq/var/lib/bots \ - -v ~/intelmq/lib:/etc/intelmq/var/lib \ - --network intelmq-internal \ - --name intelmq \ - certat/intelmq-full:latest - -If you want to use another username and password for the intelmq-manager / api login, additionally add two new environment variables. - -.. code-block:: bash - - -e INTELMQ_API_USER: "your username" - -e INTELMQ_API_PASS: "your password" diff --git a/docs/user/intelmq-api.rst b/docs/user/intelmq-api.rst deleted file mode 100644 index f319414e7..000000000 --- a/docs/user/intelmq-api.rst +++ /dev/null @@ -1,337 +0,0 @@ -.. - SPDX-FileCopyrightText: 2020-2021 Birger Schacht, Sebastian Wagner - SPDX-License-Identifier: AGPL-3.0-or-later - -########### -IntelMQ API -########### - -`intelmq-api` is a `FastAPI `_ based API for the `IntelMQ `_ project. - -.. contents:: - -********************************** -Installing and running intelmq-api -********************************** - -`intelmq-api` requires the IntelMQ package to be installed on the system (it uses ``intelmqctl`` to control the botnet). - -You can install the ``intelmq-api`` package using your preferred system package installation mechanism or using the ``pip`` Python package installer. -We provide packages for the `intelmq-api` for the same operating systems as we do for the `intelmq` package itself. -For the list of supported distributions, please see the intelmq :doc:`installation` page. - -Our repository page gives `installation instructions for various operating systems `_. -No additional set-up steps are needed if you use these packages. - -The `intelmq-api` provides the route ``/v1/api`` for managing the IntelMQ installation. - -For development purposes and testing, you can also run `intelmq-api` using development script -from this `repository `_: - -.. code-block:: bash - - ./scripts/run_dev.sh - -The API is then served on ``127.0.0.1:8000/v1/api``, and the interactive documentation on ``127.0.0.1:8000/docs``. -Please refer to the repository README for more development tips. - -Installation using pip -^^^^^^^^^^^^^^^^^^^^^^ - -.. note:: - - Native system packages (DEB, RPM) should automatically prepare steps described in this section. If - you wish to base on package from pip, you may need to do them manually, as described below. - -To configure your system to serve the API for not-development purposes, you need to prepare a -configuration for IntelMQ API, a position config for the IntelMQ Manager as well as a web application -server and a (reverse)proxy. For all those steps we have prepared example configuration, intended -to work with `Gunicorn `_ as the web server and `Apache 2 `_ -as the proxy. - -The `intelmq-api` package ships the following example files: - - ``${PREFIX}/etc/intelmq/api-config.json`` - the API configuration, - - ``${PREFIX}/etc/intelmq/manager/positions.conf`` - positions configuration for the manager, - - ``${PREFIX}/etc/intelmq/api-apache.conf`` - a virtualhost configuration file for Apache 2, - - ``${PREFIX}/etc/intelmq/api-sudoers.conf`` - a sudoers configuration file, - - ``${PREFIX}/etc/intelmq/intelmq-api.service`` - a systemd service unit configuration for Gunicorn, - - ``${PREFIX}/etc/intelmq/intelmq-api.socket`` - a systemd socket unit configuration. - -The value of ``${PREFIX}`` depends on your environment and is something like ``/usr/local/lib/pythonX.Y/dist-packages/`` (where ``X.Y`` is your Python version). - -.. note:: - - All configuration files have example paths to the IntelMQ API package. During the installation - please ensure to update them with the right value, as the ``${PREFIX}``. - -Installing packages -~~~~~~~~~~~~~~~~~~~ - -Let's start with installing the IntelMQ API package: - -.. code-block:: bash - - pip install intelmq-api - -You need to install Gunicorn and Apache2 on your own, e.g., using apt: - -.. code-block:: bash - - apt install gunicorn apache2 - -Then, if you didn't use it before, ensure to enable the ``proxy_http`` module for Apache: - -.. code-block:: bash - - a2enmod proxy_http - -Configuring Apache -~~~~~~~~~~~~~~~~~~ - -The file ``${PREFIX}/etc/intelmq/api-apache.conf`` needs to be placed in the correct place for your Apache 2 installation. - - On Debian and Ubuntu, move the file to ``/etc/apache2/conf-available.d/api-apache.conf`` and then execute ``a2enconf api-apache``. - - On CentOS, RHEL and Fedora, move the file to ``/etc/httpd/conf.d/``. - - On openSUSE, move the file to ``/etc/apache2/conf.d/``. - -Don't forget to reload the Apache2 afterwards. - -Configuring Systemd services -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. note:: - - This step could be also done by calling the script: - - .. code-block:: bash - - intelmq-api-setup-systemd - -The systemd configuration files (``intelmq-api.service`` and ``intelmq-api.socket``) are responsible -for instructing systemd daemon to start and keep running Gunicorn (that serves the API), and -forwarding requests between proxy and the Gunicorn instance. - -- Files ``${PREFIX}/etc/intelmq/intelmq-api.service`` and ``${PREFIX}/etc/intelmq/intelmq-api.socket`` - should be placed in ``/lib/systemd/system/`` directory. Then adapt the webserver username in - ``intelmq-api.service``. - -After moving files, you can enable the service by executing ``systemctl enable intelmq-api`` to -start it on the system startup. - -Setup API configuration files -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -- The file ``${PREFIX}/etc/intelmq/api-config.json`` needs to be moved to ``/etc/intelmq/api-config.json``. -- The file ``${PREFIX}/etc/intelmq/manager/positions.conf`` needs to be moved to ``/etc/intelmq/manager/positions.conf``. -- Last but not least move the file ``${PREFIX}/etc/intelmq/api-sudoers.conf`` to ``/etc/sudoers.d/01_intelmq-api`` and adapt the webserver username in this file. Set the file permissions to ``0o440``. - -Afterwards, continue with the section Permissions below. When you finish the configuration, -you can start the service using ``systemctl start intelmq-api``. You may need to restart the service -after any configuration change. - -Next steps -~~~~~~~~~~ - -The example Apache2 and Gunicorn configurations serve the IntelMQ API under ``/intelmq`` prefix, -what means that at this moment you should be able to get, e.g., the API documentation under -``/intelmq/docs`` etc. - -Now, you should continue with the API configuration and creating users. If you didn't do it before, -it's also time to configure IntelMQ itself. - -IntelMQ 2.3.1 comes with a tool ``intelmqsetup`` which helps with performing some steps automatically. -Please note that the tool is still under development and may not detect all situations correctly. -Please report us any bugs you are observing. The tool is idempotent, you can execute it multiple times. - -*********************** -Configuring intelmq-api -*********************** - -Depending on your setup, you might have to install ``sudo`` to make it possible for the ``intelmq-api`` to run the ``intelmq`` command as the user-account usually used to run ``intelmq`` (which is also often called ``intelmq``). - -``intelmq-api`` is configured using a configuration file in ``json`` format. -``intelmq-api`` tries to load the configuration file from ``/etc/intelmq/api-config.json`` and ``${PREFIX}/etc/intelmq/api-config.json``, but you can override the path setting the environment variable ``INTELMQ_API_CONFIG``. -(When using Gunicorn and systemd service, you can do this by modifying the ``intelmq-api.service`` configuration file shipped with ``intelmq-api``, the file contains an example) - -When running the API using development mode, you can set the environment variable like this: - -.. code-block:: bash - - INTELMQ_API_CONFIG=/etc/intelmq/api-config.json ./scripts/run_dev.sh - - -The default configuration which is shipped with the packages is also listed here for reference: - -.. code-block:: json - - { - "intelmq_ctl_cmd": ["sudo", "-u", "intelmq", "intelmqctl"], - "allowed_path": "/opt/intelmq/var/lib/bots/", - "session_store": "/etc/intelmq/api-session.sqlite", - "session_duration": 86400, - "allow_origins": ["*"] - } - - -On Debian based systems, the default path for the ``session_store`` is ``/var/lib/dbconfig-common/sqlite3/intelmq-api/intelmqapi`` because the Debian package uses the Debian packaging tools to manage the database file. - -The following configuration options are available: - -* ``intelmq_ctl_cmd``: Your ``intelmqctl`` command. If this is not set in a configuration file the default is used, which is ``["sudo", "-u", "intelmq", "/usr/local/bin/intelmqctl"]`` - The option ``"intelmq_ctl_cmd"`` is a list of strings so that we can avoid shell-injection vulnerabilities because no shell is involved when running the command. - This means that if the command you want to use needs parameters, they have to be separate strings. -* ``allowed_path``: intelmq-api can grant **read-only** access to specific files - this setting defines the path those files can reside in. -* ``session_store``: this is an optional path to a sqlite database, which is used for session storage and authentication. If it is not set (which is the default), no authentication is used! -* ``session_duration``: the maximal duration of a session, it's 86400 seconds by default -* ``allow_origins``: a list of origins the responses of the API can be shared with. Allows every origin by default. - -Permissions -^^^^^^^^^^^ - -``intelmq-api`` tries to write a couple of configuration files in the ``${PREFIX}/etc/intelmq`` directory - this is only possible if you set the permissions accordingly, given that ``intelmq-api`` runs under a different user. -The user the API run as also needs write access to the folder the ``session_store`` is located in; otherwise there will be an error accessing the session data. -If you're using the default Apache 2 setup, you might want to set the group of the files to ``www-data`` and give it write permissions (``chmod -R g+w ``). -In addition to that, the ``intelmq-manager`` tries to store the bot positions via the API into the file ``${PREFIX}/etc/intelmq/manager/positions.conf``. -You should therefore create the folder ``${PREFIX}/etc/intelmq/manager`` and the file ``positions.conf`` in it. - -************* -Adding a user -************* - -If you enable the ``session_store`` you will have to create user accounts to be able to access the API functionality. You can do this using ``intelmq-api-adduser``: - -.. code-block:: bash - - intelmq-api-adduser --user --password - -***************** -A note on SELinux -***************** - -On systems with SELinux enabled, the API will fail to call intelmqctl. -Therefore, SELinux needs to be disabled: - -.. code-block:: bash - - setenforce 0 - -We welcome contributions to provide SELinux policies. - -******************* -Usage from programs -******************* - -The IntelMQ API can also be used from programs, not just browsers. -To do so, first send a POST-Request with JSON-formatted data to http://localhost/intelmq/v1/api/login/ - -.. code-block:: json - - { - "username": "$your_username", - "password": "$your_password" - } - -With valid credentials, the JSON-formatted response contains the ``login_token``. -This token can be used like an API key in the Authorization header for the next API calls: - -.. code-block:: bash - - Authorization: $login_token - -Here is a full example using *curl*: - -.. code-block:: bash - - > curl --location --request POST "http://localhost/intelmq/v1/api/login/"\ - --header "Content-Type: application/x-www-form-urlencoded"\ - --data-urlencode "username=$username"\ - --data-urlencode "password=$password" - {"login_token":"68b329da9893e34099c7d8ad5cb9c940","username":"$username"} - > curl --location "http://localhost/intelmq/v1/api/version"\ - --header "Authorization: 68b329da9893e34099c7d8ad5cb9c940" - {"intelmq":"3.0.0rc1","intelmq-manager":"2.3.1"} - - -The same approach also works for *Ansible*, as you can see here: - -1. https://github.com/schacht-certat/intelmq-vagrant/blob/7082719609c0aafc9324942a8775cf2f8813703d/ansible/tasks/api/00_registerauth.yml#L1-L9 -2. https://github.com/schacht-certat/intelmq-vagrant/blob/7082719609c0aafc9324942a8775cf2f8813703d/ansible/tasks/api/02_queuestatus.yml#L1-L5 - -***************************** -Frequent operational problems -***************************** - -IntelMQCtlError -^^^^^^^^^^^^^^^ - -If the command is not configured correctly, you'll see exceptions on startup like this: - -.. code-block:: bash - - intelmq_manager.runctl.IntelMQCtlError: - -This means the intelmqctl command could not be executed as a subprocess. -The ```` should indicate why. - -Access Denied / Authentication Required "Please provide valid Token verification credentials" -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If you see the IntelMQ Manager interface and menu, but the API calls to the back-end querying configuration and status of IntelMQ fail with "Access Denied" or "Authentication Required: Please provide valid Token verification credentials" errors, you are maybe not logged in while the API requires authentication. - -By default, the API requires authentication. Create user accounts and login with them, or - if you have other protection means in place - deactivate the authentication requirement by removing or renaming the `session_store` parameter in the configuration. - -Internal Server Error -^^^^^^^^^^^^^^^^^^^^^ - -There can be various reasons for internal server errors. You need to look at the error log of your web server, for example ``/var/log/apache2/error.log`` or ``/var/log/httpd/error_log`` for Apache 2. It could be that the sudo-setup is not functional, the configuration file or session database file can not be read or written or other errors in regard to the execution of the API program. - -Can I just install it from the deb/rpm packages while installing IntelMQ from a different source? -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Yes, you can install the API and the Manager from the deb/rpm repositories, and install your IntelMQ from a somewhere else, e.g. a local repository. -However, knowledge about Python and system administration experience is recommended if you do so. - -The packages install IntelMQ to ``/usr/lib/python3*/site-packages/intelmq/``. -Installing with ``pip`` results in ``/usr/local/lib/python3*/site-packages/intelmq/`` (and some other accompanying resources) which overrides the installation in ``/usr/lib/``. -You probably need to adapt the configuration parameter ``intelmq_ctl_cmd`` to the ``/usr/local/bin/intelmqctl`` executable and some other tweaks. - -sqlite3.OperationalError: attempt to write a readonly database -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -SQLite does not only need write access to the database itself, but also the folder the database file is located in. Please check that the webserver has `write` permissions to the folder -the session file is located in. - -sqlite3.OperationalError: unable to open database file -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Please check the ``session_store`` in ``api-config.json`` and ensure the path is correct - the -directory exists and application can write to it. - -Gunicorn returns ``ModuleNotFoundError: No module named 'uvicorn'``, but Uvicorn is installed -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Most probably one of them (Gunicorn and Uvicorn) were installed using different method (e.g. one -from native system package, other from pip). Try to install both from one source. You may need -to eventually update the Gunicorn executable path in `intelmq-api.service`. - -Can I use other web servers or proxy? -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Yes, the proposed setup with Gunicorn and Apache 2 is just one of many possibilities. You can -refer to the `FastAPI documentation `_ for another -examples. - -How to debug API running as system service? -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If you experience any issues with the API, please first check the logs provided in journal: - -.. code-block:: bash - - journalctl -u intelmq-api - - -************ -Getting help -************ - -You can use the `IntelMQ users mailing lists `_ and `GitHub issues `_ for getting help and getting in touch with other users and developers. See also the :doc:`introduction` page. diff --git a/docs/user/intelmq-manager.rst b/docs/user/intelmq-manager.rst deleted file mode 100644 index cd4431112..000000000 --- a/docs/user/intelmq-manager.rst +++ /dev/null @@ -1,192 +0,0 @@ -.. - SPDX-FileCopyrightText: 2020-2021 Birger Schacht - SPDX-License-Identifier: AGPL-3.0-or-later - -############### -IntelMQ Manager -############### - -**IntelMQ Manager** is a graphical interface to manage configurations for IntelMQ. -Its goal is to provide an intuitive tool to allow non-programmers to specify the data flow in IntelMQ. - -.. contents:: - -************ -Installation -************ - -To use the `intelmq-manager` webinterface, a working `intelmq` installation which provides access to the :doc:`intelmq-api` is required. -Please refer to the IntelMQ :doc:`installation` page. - -`intelmq-manager` can be installed with different methods. Use the same one as you did for IntelMQ itself and the IntelMQ API. - -Native Packages -=============== - -As the repositories are already set-up on your system, you can simply install the package ``intelmq-manager``. - -Our repository page gives `installation instructions for various operating systems `_. -No additional set-up steps are needed. - -The webserver configuration (which is also shown below) for Apache will be automatically installed and the HTML files are stored under ``/usr/share/intelmq-manager/html``. -The webinterface is then available at ``http://localhost/intelmq-manager``. - -Docker -====== - -The IntelMQ Manager is included in our Docker-images. See the section :ref:`installation_docker` in our installation guide. - - -Installation using pip -====================== - - -For installation via pip, the situation is more complex. -The intelmq-manager package does not contain ready-to-use files, they need to be built locally. -First, lets install the Manager itself: - -.. code-block:: - - pip3 install intelmq-manager - -If your system uses wheel-packages, not the source distribution, you can use the ``intelmqsetup`` tool. -``intelmqsetup`` which performs these set-up steps automatically but it may not detect all situations correctly. -If it finds `intelmq-manager` installed, calls its build routine is called. -The files are placed in ``/usr/share/intelmq_manager/html``, where the default Apache configuration expect it. - -If your system used the dist-package or if you are using a local source, the tool may not do all required steps. -To call the build routine manually, use ``intelmq-manager-build --output-dir your/preferred/output/directory/``. - -`intelmq-manager` ships with a default configuration for the Apache webserver (``manager-apache.conf``): - -.. code-block:: - - Alias /intelmq-manager /usr/share/intelmq_manager/html/ - - - - Header set Content-Security-Policy "script-src 'self'" - Header set X-Content-Security-Policy "script-src 'self'" - - - -This file needs to be placed in the correct place for your Apache 2 installation. - -- On Debian and Ubuntu, the file needs to be placed at ``/etc/apache2/conf-available.d/manager-apache.conf`` and then execute ``a2enconf manager-apache``. -- On CentOS, RHEL and Fedora, the file needs to be placed at ``/etc/httpd/conf.d/`` and reload the webserver. -- On openSUSE, the file needs to be placed at ``/etc/apache2/conf.d/`` and reload the webserver. - -*********************** -Security considerations -*********************** - -Never ever run intelmq-manager on a public webserver without SSL and proper authentication! - -The way the current version is written, anyone can send a POST request and change intelmq's configuration files via sending HTTP POST requests. -Intelmq-manager will reject non JSON data but nevertheless, we don't want anyone to be able to reconfigure an intelmq installation. - -Therefore you will need authentication and SSL. Authentication can be handled by the :doc:`intelmq-api`. -Please refer to its documentation on how to enable authentication and setup accounts. - -Never ever allow unencrypted, unauthenticated access to intelmq-manager! - -************* -Configuration -************* - -In the file ``/usr/share/intelmq-manager/html/js/vars.js`` set ``ROOT`` to the URL of your ``intelmq-api`` installation- by default that's on the same host as ``intelmq-manager``. - -CSP Headers -=========== - -It is recommended to set these two headers for all requests: - -.. code-block:: - - Content-Security-Policy: script-src 'self' - X-Content-Security-Policy: script-src 'self' - -*********** -Screenshots -*********** - -Pipeline -======== - -This interface lets you visually configure the whole IntelMQ pipeline and the parameters of every single bot. -You will be able to see the pipeline in a graph-like visualisation similar to the following screenshot (click to enlarge): - -.. image:: /_static/intelmq-manager/configuration.png - :alt: Main Interface - -Bots Configuration -================== - -When you add a node or edit one you'll be presented with a form with the available parameters for a bot. There you can easily change the parameters as shown in the screenshot: - -.. image:: /_static/intelmq-manager/configuration2.png - :alt: Parameter editing - -After editing the bots' configuration and pipeline, simply click "Save Configuration" to automatically write the changes to the correct files. The configurations are now ready to be deployed. - -**Note well**: if you do not press "Save Configuration" your changes will be lost whenever you reload the web page or move between different tabs within the IntelMQ manager page. - - -Botnet Management -================= - -When you save a configuration you can go to the 'Management' section to see what bots are running and start/stop the entire botnet, or a single bot. - -.. image:: /_static/intelmq-manager/management.png - :alt: Botnet Management - -Botnet Monitoring -================= - -You can also monitor the logs of individual bots or see the status of the queues for the entire system or for single bots. - -In this next example we can see the number of queued messages for all the queues in the system. - -.. image:: /_static/intelmq-manager/monitor.png - :alt: Botnet Monitor - -The following example we can see the status information of a single bot. Namely, the number of queued messages in the queues that are related to that bot and also the last 20 log lines of that single bot. - -.. image:: /_static/intelmq-manager/monitor2.png - :alt: Bot Monitor - -***** -Usage -***** - -Keyboard Shortcuts -================== - -Any underscored letter denotes access key shortcut. The needed shortcut-keyboard is different per Browser: - -* Firefox: Alt + Shift + letter -* Chrome & Chromium: Alt + letter - -Configuration Paths -=================== - -The IntelMQ Manager queries the configuration file paths and directory names from ``intelmqctl`` and therefore any global environment variables (if set) are effective in the Manager too. -The interface for this query is ``intelmqctl debug --get-paths``, the result is also shown in the ``/about.html`` page of your IntelMQ Manager installation. - -For more information on the ability to adapt paths, have a look at the :ref:`configuration` section. - -Configuration page -================== - -Named queues / paths -^^^^^^^^^^^^^^^^^^^^ - -With IntelMQ Manager you can set the name of certain paths by double-clicking on the line which connects two bots: - -.. image:: /_static/intelmq-manager/configuration-path-form.png - :alt: Enter path - -The name is then displayed along the edge: - -.. image:: /_static/intelmq-manager/configuration-path-set.png - :alt: Show path name diff --git a/docs/user/intelmqctl.rst b/docs/user/intelmqctl.rst deleted file mode 100644 index 8cf5b8947..000000000 --- a/docs/user/intelmqctl.rst +++ /dev/null @@ -1,481 +0,0 @@ -.. - SPDX-FileCopyrightText: 2017 Sebastian Wagner - SPDX-License-Identifier: AGPL-3.0-or-later - -======================== -intelmqctl documentation -======================== - -.. contents:: - ------------- -Introduction ------------- - -intelmqctl is the main tool to handle a intelmq installation. -It handles the bots themselves and has some tools to handle the installation. - ------------ -Output type ------------ - -intelmqctl can be used as command line tool, as library and as tool by other programs. -If called directly, it will print all output to the console (stderr). -If used as python library, the python types themselves are returned. -The third option is to use machine-readable JSON as output (used by other managing tools). - ----------------------- -Manage individual bots ----------------------- - -As all init systems, intelmqctl has the methods start, stop, restart, reload and status. - -start -===== - -This will start the bot with the ID `file-output`. A file with it's PID will be created in `/opt/intelmq/var/run/[bot-id].pid`. - -.. code-block:: bash - - > intelmqctl start file-output - Starting file-output... - file-output is running. - -If the bot is already running, it won't be started again: - -.. code-block:: bash - - > intelmqctl start file-output - file-output is running. - -stop -==== - -If the PID file does exist, a SIGINT will be sent to the process. After 0.25s we check if the process is running. If not, the PID file will be removed. - -.. code-block:: bash - - > intelmqctl stop file-output - Stopping file-output... - file-output is stopped. - -If there's no running bot, there's nothing to do. - -.. code-block:: bash - - > intelmqctl stop file-output - file-output was NOT RUNNING. - -If the bot did not stop in 0.25s, intelmqctl will say it's still running: - -.. code-block:: bash - - > intelmqctl stop file-output - file-output is still running - -status -====== - -Checks for the PID file and if the process with the given PID is alive. If the PID file exists, but the process does not exist, it will be removed. - -.. code-block:: bash - - > intelmqctl status file-output - file-output is stopped. - > intelmqctl start file-output - Starting file-output... - file-output is running. - > intelmqctl status file-output - file-output is running. - -restart -======= - -The same as stop and start consecutively. - -.. code-block:: bash - - > intelmqctl restart file-output - Stopping file-output... - file-output is stopped. - Starting file-output... - file-output is running. - -reload -====== - -Sends a SIGHUP to the bot, which will then reload the configuration. - -.. code-block:: bash - - > intelmqctl reload file-output - Reloading file-output ... - file-output is running. - -If the bot is not running, we can't reload it: - -.. code-block:: bash - - > intelmqctl reload file-output - file-output was NOT RUNNING. - -run -=== - -Run a bot directly for debugging purpose. - -If launched with no arguments, the bot will call its init method and start processing messages as usual – but you see everything happens. - -.. code-block:: bash - - > intelmqctl run file-output - file-output: RestAPIOutputBot initialized with id file-output and version 3.5.2 as process 12345. - file-output: Bot is starting. - file-output: Loading source pipeline and queue 'file-output-queue'. - file-output: Connected to source queue. - file-output: No destination queues to load. - file-output: Bot initialization completed. - file-output: Waiting for incoming message. - -Should you get lost any time, just use the **--help** after any argument for further explanation. - -.. code-block:: bash - - > intelmqctl run file-output --help - -Note that if another instance of the bot is running, only warning will be displayed. - -.. code-block:: bash - - > intelmqctl run file-output - Main instance of the bot is running in the background. You may want to launch: intelmqctl stop file-output - -You can set the log level with the `-l` flag, e.g. `-l DEBUG`. For the 'console' subcommand, 'DEBUG' is the default. - -console -------- - -If launched with **console** argument, you get a ```pdb``` live console; or ```ipdb``` or ```pudb``` consoles if they were previously installed (I.E. ```pip3 install ipdb --user```). - -.. code-block:: bash - - > intelmqctl run file-output console - *** Using console ipdb. Please use 'self' to access to the bot instance properties. *** - ipdb> self. ... - -You may specify the desired console in the next argument. - -.. code-block:: bash - - > intelmqctl run file-output console pudb - -message -------- - -Operate directly with the input / output pipelines. - -If **get** is the parameter, you see the message that waits in the input (source or internal) queue. If the argument is **pop**, the message gets popped as well. - -.. code-block:: bash - - > intelmqctl run file-output message get - file-output: Waiting for a message to get... - { - "classification.type": "c&c", - "feed.url": "https://example.com", - "raw": "1233", - "source.ip": "1.2.3.4", - "time.observation": "2017-05-17T22:00:33+00:00", - "time.source": "2017-05-17T22:00:32+00:00" - } - -To send directly to the bot's output queue, just as it was sent by ```self.send_message()``` in bot's ```process()``` method, use the **send** argument. -In our case of ```file-output```, it has no destination queue so that nothing happens. - -.. code-block:: bash - - > intelmqctl run file-output message send '{"time.observation": "2017-05-17T22:00:33+00:00", "time.source": "2017-05-17T22:00:32+00:00"}' - file-output: Bot has no destination queues. - -Note, if you would like to know possible parameters of the message, put a wrong one – you will be prompted if you want to list all the current bot harmonization. - -process -------- - -With no other arguments, bot\'s ```process()``` method will be run one time. - -.. code-block:: bash - - > intelmqctl run file-output process - file-output: Bot is starting. - file-output: Bot initialization completed. - file-output: Processing... - file-output: Waiting for incoming message. - file-output: Received message {'raw': '1234'}. - -If run with **--dryrun|-d** flag, the message gets never really popped out from the source or internal pipeline, nor sent to the output pipeline. -Plus, you receive a note about the exact moment the message would get sent, or acknowledged. If the message would be sent to a non-default path, the name of this path is printed on the console. - -.. code-block:: bash - - > intelmqctl run file-output process -d - file-output: * Dryrun only, no message will be really sent through. - ... - file-output: DRYRUN: Message would be acknowledged now! - -You may trick the bot to process a JSON instead of the Message in its pipeline with **--msg|-m** flag. - -.. code-block:: bash - - > intelmqctl run file-output process -m '{"source.ip":"1.2.3.4"}' - file-output: * Message from cli will be used when processing. - ... - -If you wish to display the processed message as well, you the **--show-sent|-s** flag. Then, if sent through (either with `--dryrun` or without), the message gets displayed as well. - - -disable -======= - -Sets the `enabled` flag in the runtime configuration of the bot to `false`. -By default, all bots are enabled. - -Example output: - -.. code-block:: bash - - > intelmqctl status file-output - file-output is stopped. - > intelmqctl disable file-output - > intelmqctl status file-output - file-output is disabled. - -enable -====== - -Sets the `enabled` flag in the runtime configuration of the bot to `true`. - -Example output: - -.. code-block:: bash - - > intelmqctl status file-output - file-output is disabled. - > intelmqctl enable file-output - > intelmqctl status file-output - file-output is stopped. - ------------------ -Manage the botnet ------------------ - -In IntelMQ, the botnet is the set of all currently configured and enabled bots. -All configured bots have their configuration in ``runtime.yaml``. -By default, all bots are enabled. To disable a bot set `enabled` to `false`. -Also see :doc:`bots` and :ref:`runtime-configuration`. - -If not bot id is given, the command applies to all bots / the botnet. -All commands except the start action are applied to all bots. -But only enabled bots are started. - -In the examples below, a very minimal botnet is used. - -start -===== - -The start action applies to all bots which are enabled. - -.. code-block:: bash - - > intelmqctl start - Starting abusech-domain-parser... - abusech-domain-parser is running. - Starting abusech-feodo-domains-collector... - abusech-feodo-domains-collector is running. - Starting deduplicator-expert... - deduplicator-expert is running. - file-output is disabled. - Botnet is running. - -As we can file-output is disabled and thus has not been started. You can always explicitly start disabled bots. - -stop -==== - -The stop action applies to all bots. Assume that all bots have been running: - -.. code-block:: bash - - > intelmqctl stop - Stopping Botnet... - Stopping abusech-domain-parser... - abusech-domain-parser is stopped. - Stopping abusech-feodo-domains-collector... - abusech-feodo-domains-collector is stopped. - Stopping deduplicator-expert... - deduplicator-expert is stopped. - Stopping file-output... - file-output is stopped. - Botnet is stopped. - -status -====== - -With this command we can see the status of all configured bots. Here, the botnet was started beforehand: - -.. code-block:: bash - - > intelmqctl status - abusech-domain-parser is running. - abusech-feodo-domains-collector is running. - deduplicator-expert is running. - file-output is disabled. - -And if the disabled bot has also been started: - -.. code-block:: bash - - > intelmqctl status - abusech-domain-parser is running. - abusech-feodo-domains-collector is running. - deduplicator-expert is running. - file-output is running. - -If the botnet is stopped, the output looks like this: - -.. code-block:: bash - - > intelmqctl status - abusech-domain-parser is stopped. - abusech-feodo-domains-collector is stopped. - deduplicator-expert is stopped. - file-output is disabled. - -restart -======= - -The same as start and stop consecutively. - -reload -====== - -The same as reload of every bot. - -enable / disable -================ - -The sub commands `enable` and `disable` set the corresponding flags in ``runtime.yaml``. - -.. code-block:: bash - - > intelmqctl status - file-output is stopped. - malware-domain-list-collector is stopped. - malware-domain-list-parser is stopped. - > intelmqctl disable file-output - > intelmqctl status - file-output is disabled. - malware-domain-list-collector is stopped. - malware-domain-list-parser is stopped. - > intelmqctl enable file-output - > intelmqctl status - file-output is stopped. - malware-domain-list-collector is stopped. - malware-domain-list-parser is stopped. - ---------- -List bots ---------- - -`intelmqctl list bots` does list all configured bots and their description. - ------------ -List queues ------------ - -`intelmqctl list queues` shows all queues which are currently in use according to the configuration and how much events are in it: - -.. code-block:: bash - - > intelmqctl list queues - abusech-domain-parser-queue - 0 - abusech-domain-parser-queue-internal - 0 - deduplicator-expert-queue - 0 - deduplicator-expert-queue-internal - 0 - file-output-queue - 234 - file-output-queue-internal - 0 - -Use the `-q` or `--quiet` flag to only show non-empty queues: - -.. code-block:: bash - - > intelmqctl list queues -q - file-output-queue - 234 - -The `--sum` or `--count` flag will show the sum of events on all queues: - -.. code-block:: bash - - > intelmqctl list queues --sum - 42 - ---- -Log ---- - -intelmqctl can show the last log lines for a bot, filtered by the log level. - -See the help page for more information. - ------ -Check ------ - -This command will do various sanity checks on the installation and especially the configuration. - - -.. _orphan-queues: - -Orphaned Queues -=============== - -The `intelmqctl check` tool can search for orphaned queues. "Orphaned queues" are queues that have been used in the past and are no longer in use. For example you had a bot which you removed or renamed afterwards, but there were still messages in it's source queue. The source queue won't be renamed automatically and is now disconnected. As this queue is no longer configured, it won't show up in the list of IntelMQ's queues too. In case you are using redis as message broker, you can use the `redis-cli` tool to examine or remove these queues: - -.. code-block:: bash - - redis-cli -n 2 - keys * # lists all existing non-empty queues - llen [queue-name] # shows the length of the queue [queue-name] - lindex [queue-name] [index] # show the [index]'s message of the queue [queue-name] - del [queue-name] # remove the queue [queue-name] - -To ignore certain queues in this check, you can set the parameter `intelmqctl_check_orphaned_queues_ignore` in the *defaults* configuration file. For example: - -.. code-block:: json - - "intelmqctl_check_orphaned_queues_ignore": ["Taichung-Parser"], - ---------------------- -Configuration upgrade ---------------------- - -The `intelmqctl upgrade-config` function upgrade, upgrade the configuration from previous versions to the current one. -It keeps track of previously installed versions and the result of all "upgrade functions" in the "state file", locate in the `$var_state_path/state.json` (`/opt/intelmq/var/lib/state.json` or `/var/lib/intelmq/state.json`). - -This function has been introduced in version 2.0.1. - -It makes backups itself for all changed files before every run. Backups are overridden if they already exists. So make sure to always have a backup of your configuration just in case. - ---------- -Exit code ---------- - -In case of errors, unsuccessful operations, the exit code is higher than 0. -For example, when running `intelmqctl start` and one enabled bot is not running, the exit code is 1. -The same is valid for e.g. `intelmqctl status`, which can be used for monitoring, and all other operations. - ------------- -Known issues ------------- - -The currently implemented process managing using PID files is very erroneous. diff --git a/docs/user/intro.md b/docs/user/intro.md new file mode 100644 index 000000000..5a8e994c4 --- /dev/null +++ b/docs/user/intro.md @@ -0,0 +1,50 @@ + + +# Intro + +The User Guide provides information on how to use installed IntelMQ and it's components. Let's start with a basic not-so-technical description of how IntelMQ works and the used terminology: + +- It consists of small (python) programs called **bots**. +- Bots communicate witch each other (using something called message broker) by passing so called **events** (JSON objects). +- An example event can look like this: + +```json +{ + "source.geolocation.cc": "JO", + "malware.name": "qakbot", + "source.ip": "82.212.115.188", + "source.asn": 47887, + "classification.type": "c2-server", + "extra.status": "offline", + "source.port": 443, + "classification.taxonomy": "malicious-code", + "source.geolocation.latitude": 31.9522, + "feed.accuracy": 100, + "extra.last_online": "2023-02-16", + "time.observation": "2023-02-16T09:55:12+00:00", + "source.geolocation.city": "amman", + "source.network": "82.212.115.0/24", + "time.source": "2023-02-15T14:19:09+00:00", + "source.as_name": "NEU-AS", + "source.geolocation.longitude": 35.939, + "feed.name": "abusech-feodo-c2-tracker" + } +``` + +- Bots are divided into following groups: + + - **Collectors** - bots that collect data from sources such as website, mailbox, api, etc. + - **Parsers** - bots that split and parse collected data into individual events. + - **Experts** - bots that can do additional processing of events such as enriching, filtering, etc. + - **Outputs** - bots that can output events to files, databases, etc. + +- Data sources supported by IntelMQ are called **feeds**. + - IntelMQ provides recommended configuration of collector and parser bot combinations for selected feeds. +- The collection of all configured bots and their communication paths is called **pipeline** (or **botnet**). +- Individual bots as well as the complete pipeline can be configured, managed and monitored via: + - Web interface called **IntelMQ Manager** (best suited for regular users). + - Command line tool called **intelmqctl** (best suited for administrators). + - REST API provided by the **IntelMQ API** extension (best suited for other programs). \ No newline at end of file diff --git a/docs/user/introduction.rst b/docs/user/introduction.rst deleted file mode 100644 index 44ddc5144..000000000 --- a/docs/user/introduction.rst +++ /dev/null @@ -1,86 +0,0 @@ -.. - SPDX-FileCopyrightText: 2020-2021 Birger Schacht - SPDX-License-Identifier: AGPL-3.0-or-later - -############ -Introduction -############ - -***** -About -***** - -**IntelMQ** is a solution for IT security teams (CERTs & CSIRTs, SOCs abuse -departments, etc.) for collecting and processing security feeds (such as -log files) using a message queuing protocol. It's a community driven -initiative called **IHAP** (Incident Handling Automation Project) which -was conceptually designed by European CERTs/CSIRTs during several -InfoSec events. Its main goal is to give to incident responders an easy -way to collect & process threat intelligence thus improving the incident -handling processes of CERTs. - -**Incident Handling Automation Project** - -- **URL:** -- **Mailing-list:** - -Several pieces of software are evolved around IntelMQ. For an overview, -look at the :doc:`universe`. - -IntelMQ can be used for -- automated incident handling -- situational awareness -- automated notifications -- as data collector for other tools -- etc. - -IntelMQ's design was influenced by -`AbuseHelper `__ however it was -re-written from scratch and aims at: - -- Reducing the complexity of system administration -- Reducing the complexity of writing new bots for new data feeds -- Reducing the probability of events lost in all process with - persistence functionality (even system crash) -- Use and improve the existing Data Harmonization Ontology -- Use JSON format for all messages -- Provide easy way to store data into Log Collectors like - ElasticSearch, Splunk, databases (such as PostgreSQL) -- Provide easy way to create your own black-lists -- Provide easy communication with other systems via HTTP RESTful API - -It follows the following basic meta-guidelines: - -- Don't break simplicity - KISS -- Keep it open source - forever -- Strive for perfection while keeping a deadline -- Reduce complexity/avoid feature bloat -- Embrace unit testing -- Code readability: test with inexperienced programmers -- Communicate clearly - -***** -Usage -***** - -Various approaches of installing `intelmq` are described in :doc:`installation`. - -The :doc:`configuration-management` gives an overview how a `intelmq` installation is set up and how to configure and maintain the setup. -There is also a list of available :doc:`feeds` as well as a detailed description of the different :doc:`bots` intelmq brings with it. - -If you know additional feeds and how to parse them, please contribute your code or your configuration (by issues or the mailing lists). - -If you need help, read here about your options: :doc:`support`. - -IntelMQ Manager -=============== - -Check out `this graphical tool `_ to easily manage an IntelMQ system. - -********** -Contribute -********** - -- Subscribe to the |intelmq-developers-list-link| -- Via `GitHub issues `_ -- Via `Pull requests `_ (please have a look at the :doc:`/dev/guide` first) diff --git a/docs/user/manager.md b/docs/user/manager.md new file mode 100644 index 000000000..feb28da62 --- /dev/null +++ b/docs/user/manager.md @@ -0,0 +1,70 @@ + + +# Using IntelMQ Manager +**IntelMQ Manager** is a graphical interface to manage configurations for IntelMQ. It's goal is to provide an intuitive tool to allow non-programmers to specify the data flow in IntelMQ. + +## Configuration Pages + +### Pipeline + +This interface lets you visually configure the whole IntelMQ pipeline +and the parameters of every single bot. You will be able to see the +pipeline in a graph-like visualisation similar to the following +screenshot (click to enlarge): + +![Main Interface](../static/images/intelmq-manager/configuration.png) + +#### Named queues / paths + +With IntelMQ Manager you can set the name of certain paths by double-clicking on the line which connects two bots: + +![Enter path](../static/images/intelmq-manager/configuration-path-form.png) + +The name is then displayed along the edge: + +![Show path name](../static/images/intelmq-manager/configuration-path-set.png) + +### Bots Configuration + +When you add a node or edit one you will be presented with a form with the available parameters for the bot. There you can easily change the parameters as shown in the screenshot: + +![Parameter editing](../static/images/intelmq-manager/configuration2.png) + +After editing the bot's configuration and pipeline, simply click **Save Configuration** to automatically write the changes to the correct files. The configurations are now ready to be deployed. + +!!! warning + Without saving the configuration your changes will be lost whenever you reload the web page or move between different tabs within the IntelMQ manager page. + +### Botnet Management + +When you save a configuration you can go to the **Management** section to see what bots are running and start/stop the entire botnet, or a single bot. + +![Botnet Management](../static/images/intelmq-manager/management.png) + +### Botnet Monitoring + +You can also monitor the logs of individual bots or see the status of +the queues for the entire system or for single bots. + +In this next example we can see the number of queued messages for all +the queues in the system. + +![Botnet Monitor](../static/images/intelmq-manager/monitor.png) + +The following example we can see the status information of a single bot. +Namely, the number of queued messages in the queues that are related to +that bot and also the last 20 log lines of that single bot. + +![Bot Monitor](../static/images/intelmq-manager/monitor2.png) + + +## Keyboard Shortcuts + +Any underscored letter denotes access key shortcut. The needed +shortcut-keyboard is different per Browser: + +- Firefox: ++ctrl+alt++ + Letter +- Chrome & Chromium: ++alt++ + Letter \ No newline at end of file diff --git a/docs/user/n6-integrations.rst b/docs/user/n6-integrations.rst deleted file mode 100644 index 681678224..000000000 --- a/docs/user/n6-integrations.rst +++ /dev/null @@ -1,76 +0,0 @@ -.. - SPDX-FileCopyrightText: 2020-2021 Sebastian Wagner - SPDX-License-Identifier: AGPL-3.0-or-later - -IntelMQ - n6 Integration -======================== - -n6 is an Open Source Tool with very similar aims as IntelMQ: processing and distributing IoC data. -The use-cases, architecture and features differ and both tools have non-overlapping strengths. -n6 is maintained and developed by `CERT.pl `_. - -Information about n6 can be found here: - -- Website: `cert.pl/en/n6 `_ -- Source Code: `github.com/CERT-Polska/n6 `_ -- n6 documentation: `n6.readthedocs.io `_ - -.. image:: /_static/n6/n6-schemat2.png - :alt: n6 schema - -.. image:: /_static/n6/data-flow.png - :alt: n6 data flow - -Data format -------------------------------- - -The internal data representation differs between IntelMQ and n6, so any data exchange between the systems requires a format conversion. -For example, in n6 one message can contain multiple IP addresses, but IntelMQ is intentionally restricted to one IP address per message. -Therefore, one n6 event results in *one or more* IntelMQ events. -Because of this, and some other naming differences and ambiguities, the format conversion is *not* bidirectional. - -Data exchange interface -------------------------------- - -n6 offers a STOMP interface via the RabbitMQ broker, which can be used for both sending and receiving data. -IntelMQ offers both a STOMP collector bot for receiving data from n6, as well as a STOMP output bot for sending data to n6 instances. - -- :ref:`IntelMQ's Stomp collector bot ` -- :ref:`IntelMQ's n6 parser bot ` -- :ref:`IntelMQ's Stomp output bot ` - -Data conversion -------------------------------- - -IntelMQ can parse n6 data using the n6 parser and n6 can parse IntelMQ data using the Intelmq2n6 parser. - -- :ref:`IntelMQ's n6 parser bot ` - -Complete example ----------------- - -Data flow n6 to IntelMQ -^^^^^^^^^^^^^^^^^^^^^^^ - -.. image:: /_static/n6/n6-to-intelmq.png - :alt: dataflow from n6 to IntelMQ - -Data flow IntelMQ to n6 -^^^^^^^^^^^^^^^^^^^^^^^ - -.. image:: /_static/n6/intelmq-to-n6.png - :alt: dataflow from IntelMQ to n6 - -CERT.pl Data feed -^^^^^^^^^^^^^^^^^ - -CERT.pl offers data feed available to their partners through the STOMP interface. -Our feeds documentation contains details how it can be enabled in IntelMQ: `CERT.pl n6 STOMP stream `_ - - -Webinput CSV -------------------------------- - -The IntelMQ Webinput CSV software can also be used together with n6. -The documentation on this component can be found in the software's repository: -https://github.com/certat/intelmq-webinput-csv/blob/master/docs/webinput-n6.md diff --git a/docs/user/organization.rst b/docs/user/organization.rst deleted file mode 100644 index 419f22f8a..000000000 --- a/docs/user/organization.rst +++ /dev/null @@ -1,55 +0,0 @@ -.. - SPDX-FileCopyrightText: 2022 Sebastian Wagner - SPDX-License-Identifier: AGPL-3.0-or-later - -IntelMQ Organizational Structure -################################ - -.. contents:: - -The central IntelMQ components are maintained by multiple people and organizations in the IntelMQ community. -Please note that some components of the :doc:`universe` can have a different project governance, but all are part of the IntelMQ universe and community. - -IntelMQ Enhancement Proposals (IEP) -*********************************** - -Major changes, including architecture, strategy and the internal data format, require so-called IEPs, IntelMQ Enhancement Proposals. -Their name is based on the famous `"PEPs" of Python `_. - -IEPs are collected in the separate `iep repository `_. - -Code-Reviews and Merging ------------------------- - -Every line of code checked in for the IntelMQ Core, is checked by at least one trusted developer (excluding the author of the changes) of the IntelMQ community. -Afterwards, the code can be merged. Currently, these three contributors, have the permission to push and merging code to IntelMQ Core, Manager and API: - * Aaron Kaplan (`aaronkaplan `_) - * Sebastian Wagner (`sebix `_) - * Sebastian Waldbauer (`waldbauer-certat `_) - -Additionally, these people significantly contributed to IntelMQ: - * Bernhard Reiter - * Birger Schacht - * Edvard Rejthar - * Filip Pokorný - * Karl-Johan Karlsson - * Marius Karotkis - * Marius Urkus - * Mikk Margus Möll - * navtej - * Pavel Kácha - * Robert Šefr - * Tomas Bellus - * Zach Stone - -Short history -------------- - -The idea and overall concept of an free, simple and extendible software for automated incident handling was born at an meeting of several European CSIRTs in Heraklion, Greece, in 2014. -Following the event, `Tomás Lima "SYNchroACK" `_ (working at CERT.pt back then) created IntelMQ from scratch. IntelMQ was born on June 24th, 2014. -A major support came from CERT.pt at this early stage. -Aaron Kaplan (CERT.at until 2020) engaged in the long-term advancement and from 2015 on, CERT.at took the burden of the maintenance and development (Sebastian Wagner 2015-2021 at CERT.at). -From 2016 onward, CERT.at started projects, initiated and lead by Aaron Kaplan, receiving CEFF-funding from the European Union to support IntelMQ's development. -IntelMQ became a software component of the EU-funded MeliCERTes framework for CSIRTs. - -In 2020, IntelMQ's organizational structure and architectural development gained new thrive by the newly founded Board and the start of the IEP process, creating more structure and more transparency in the IntelMQ community's decisions. diff --git a/docs/user/support.rst b/docs/user/support.rst deleted file mode 100644 index 0b18ddf7a..000000000 --- a/docs/user/support.rst +++ /dev/null @@ -1,64 +0,0 @@ -.. - SPDX-FileCopyrightText: 2022 Sebastian Wagner - SPDX-License-Identifier: AGPL-3.0-or-later - -############### -Getting support -############### - -In case you are lost, you need assistance or something is not discussed in this guide, you can ask the community for help. - -.. contents:: - -General tips -************ - -To be most efficient in seeking help, please describe your problem or question with all necessary information, for example: - * Name and version of the operating system - * Way of installation (deb/rpm packages, PyPI, local git repository) - * Used bots and configuration - * Logs of bots or terminal output - * Any other useful messages, screenshots - -Mailing list -************ - -The most traditional way is to ask your question, make a proposal or discuss a topic on the |intelmq-users-list-link|. -You need to subscribe to the mailing list before posting, but the archive is publicly available: `IntelMQ-Users Archive `_. - -GitHub -****** - -To report bugs, `GitHub issues `_ are the ideal place to do so. -Every IntelMQ component has it's own repository on GitHub, with a separate Issue tracker. - -GitHub also offers a `discussion platform `_. - -To participate on GitHub, you first need to create an account on the platform. - -Assistance -********** - -If your organisation is a member of the `CSIRTs Network `_, you are eligible for support in the `MeliCERTes project `_. -You can also ask on |intelmq-users-list-link| for individual support, some members offer support, including, but not limited to: - -* `Aaron Kaplan `_ (founder of IntelMQ) -* `Institute for Common Good Technology `_ (chairmen Sebastian Wager is a IntelMQ maintainer and developer) -* `Intevation GmbH `_ (Develops and maintains several IntelMQ components) - -########### -Development -########### - -Mailing list -************ - -There is a separate mailing list for developers to discuss development topics: |intelmq-developers-list-link| -The `IntelMQ-Dev Archive `_ is public as well. - -Please also read the :doc:`/dev/guide`. - -GitHub -****** - -The ideal way to propose changes and additions to IntelMQ is to open a `Pull Request `_ on GitHub. diff --git a/docs/user/universe.rst b/docs/user/universe.rst deleted file mode 100644 index 7d7b6ad1b..000000000 --- a/docs/user/universe.rst +++ /dev/null @@ -1,156 +0,0 @@ -.. - SPDX-FileCopyrightText: 2019-2022 Sebastian Wagner - SPDX-License-Identifier: AGPL-3.0-or-later - -IntelMQ Universe -================= - -.. contents:: - -IntelMQ is more than a the core library itself and many programs are developed around in the IntelMQ universe. -This document provides an overview of the ecosystem and all related tools. If you think something is missing, please let us know! - -Unless otherwise stated, the products are maintained by the IntelMQ community. - -IntelMQ Core ------------- - -This is IntelMQ itself, as it is available on `GitHub `_. - -The Core includes all the components required for processing data feeds. -This includes the bots, configuration, pipeline, the internal data format, management tools etc. - -IntelMQ Manager ---------------- - -The Manager is the most known software and can be seen as the face of IntelMQ. -This software provides a graphical user interface to the management tool `intelmqctl`. - -→ `Repository: IntelMQ Manager `_ - -.. image:: /_static/intelmq-manager/landing_page.png - :alt: IntelMQ Manager Landing page - -IntelMQ Webinput CSV --------------------- - -A web-based interface to ingest CSV data into IntelMQ with on-line validation and live feedback. - -This interface allows inserting "one-shot" data feeds into IntelMQ without the need to configure bots in IntelMQ. - -Developed and maintained by `CERT.at `_. - -→ `Repository: intelmq-webinput-csv `_ - -.. image:: https://raw.githubusercontent.com/certat/intelmq-webinput-csv/c20413a401c2077140dd17fb7651db1132fde648/docs/images/screenshot.png - :alt: IntelMQ Webinput CSV Preview page - -IntelMQ Mailgen ------------------- - -A solution allowing an IntelMQ setup with a complex contact database, -managed by a web interface and sending out aggregated email reports. -In different words: To send grouped notifications to network owners using SMTP. - -Developed and maintained by `Intevation `_, initially funded by `BSI `_. - -It consists of these three components, which can also be used on their own. - -IntelMQ CertBUND Contact -^^^^^^^^^^^^^^^^^^^^^^^^ - -The certbund-contact consists of two IntelMQ expert bots, which fetch and process the information from the contact database, and scripts to import RIPE data into the contact database. -Based on user-defined rules, the experts determine to which contact the event is to be sent to, and which e-mail template and attachment format to use. - -→ `Repository: intelmq-certbund-contact `_ - -IntelMQ Fody -^^^^^^^^^^^^ - -Fody is a web based interface for Mailgen. -It allows to read and edit contacts, query sent mails (tickets) and call up data from the :doc:`eventdb`. - -It can also be used to just query the :doc:`eventdb` without using Mailgen. - -.. image:: https://raw.githubusercontent.com/Intevation/intelmq-fody/6e41b836d0a2c350a5f2c5c95a4b3be4d3f46027/docs/images/landing_page.png - :alt: IntelMQ Fody Dashboard - -→ `Repository: intelmq-fody `_ - -→ `Repository: intelmq-fody-backend `_ - -intelmq-mailgen -^^^^^^^^^^^^^^^ - -Sends emails with grouped event data to the contacts determined by the certbund-contact. -Mails can be encrypted with PGP. - -→ `Repository: intelmq-mailgen `_ - - -"Constituency Portal" tuency ----------------------------- - -A web application helping CERTs to enable members of their constituency -to self-administrate how they get warnings related to their network objects -(IP addresses, IP ranges, autonomous systems, domains). -*tuency* is developed by `Intevation `_ for -`CERT.at `_. - -If features organizational hierarchies, contact roles, self-administration -and network objects per organization (Autonomous systems, network ranges, -(sub-)domains, RIPE organization handles). A network object claiming and -approval process prevents abuse. -An hierarchical rule-system on the network objects allow fine-grained settings. -The tagging system for contacts and organization complement the -contact-management features of the portal. -Authentication is based on keycloak, which enables the re-use of the user -accounts in the portal. -The integrated API enables IntelMQ to query the portal for the right abuse -contact and notification settings with the -:ref:`intelmq.bots.experts.tuency.expert` expert. - -.. image:: https://gitlab.com/intevation/tuency/tuency/-/raw/64b95ec0/docs/images/netobjects.png - :alt: Tuency Netobjects Overview - -→ `Repository: tuency `_ - - -"Constituency Portal" do-portal (not developed any further) ------------------------------------------------------------ - -*Note:* The *do-portal* is deprecated and succeeded by *tuency*. - -A contact portal with organizational hierarchies, role functionality and network objects based on RIPE, allows self-administration by the contacts. -Can be queried from IntelMQ and integrates the stats-portal. - -Originally developed by `CERT-EU `_, then adapted by `CERT.at `_. - -→ `Repository: do-portal `_ - -Stats Portal ------------- - -A Grafana-based statistics portal for the :doc:`eventdb`. Can be integrated into do-portal. -It uses aggregated data to serve statistical data quickly. - -.. image:: https://raw.githubusercontent.com/certtools/stats-portal/38515266aabdf661a0b4becd8e921b03f32429fa/architecture-overview-stats-portal-screen.png - :alt: Stats Portal Architecture - -→ `Repository: stats-portal `_ - -Malware Name Mapping --------------------- - -A mapping for malware names of different feeds with different names to a common family name. - -→ `Repository: malware_name_mapping `_ - -IntelMQ-Docker --------------- - -A repository with tools for IntelMQ docker instance. - -Developed and maintained by `CERT.at `_. - -→ `Repository: intelmq-docker `_ diff --git a/docs/user/upgrade.rst b/docs/user/upgrade.rst deleted file mode 100644 index ede4bd447..000000000 --- a/docs/user/upgrade.rst +++ /dev/null @@ -1,131 +0,0 @@ -.. - SPDX-FileCopyrightText: 2017 Sebastian Wagner - SPDX-License-Identifier: AGPL-3.0-or-later - -Upgrade instructions -==================== - -.. contents:: - -For installation instructions, see :doc:`installation`. - -Read NEWS.md ------------- - -Read the `NEWS.md `_ file to look for things you need to have a look at. - -Stop IntelMQ and create a Backup --------------------------------- - -* Make sure that your IntelMQ system is completely stopped: `intelmqctl stop` -* Create a backup of IntelMQ Home directory, which includes all configurations. They are not overwritten, but backups are always nice to have! - -.. code-block:: bash - - sudo cp -R /opt/intelmq /opt/intelmq-backup - -Upgrade IntelMQ ---------------- - -Before upgrading, check that your setup is clean and there are no events in the queues: - -.. code-block:: bash - - intelmqctl check - intelmqctl list queues -q - -The upgrade depends on how you installed IntelMQ. - -Packages -^^^^^^^^ - -Use your systems package management. - -Docker (beta) -^^^^^^^^^^^^^ - -You can check out all current versions on our `DockerHub `_. - -.. code-block:: bash - - docker pull certat/intelmq-full:latest - - docker pull certat/intelmq-nginx:latest - -Alternatively you can use `docker-compose`: - -.. code-block:: bash - - docker-compose pull - -You can check the current versions from intelmq & intelmq-manager & intelmq-api via git commit ref. - -The Version format for each included item is `key=value` and they are saparated via `,`. I. e. `IntelMQ=ab12cd34f, IntelMQ-API=xy65z23`. - -.. code-block:: bash - - docker inspect --format '{{ index .Config.Labels "org.opencontainers.image.version" }}' intelmq-full:latest - -Now restart your container, if you're using docker-compose you simply write: - -.. code-block:: bash - - docker-compose down - -If you dont use docker-compose, you can restart a single container using: - -.. code-block:: bash - - docker ps | grep certat - - docker stop CONTAINER_ID - -PyPi -^^^^ - -.. code-block:: bash - - pip install -U --no-deps intelmq - sudo intelmqsetup - -Using `--no-deps` will not upgrade dependencies, which would probably overwrite the system's libraries. -Remove this option to also upgrade dependencies. - -Local repository -^^^^^^^^^^^^^^^^ - -If you have an editable installation, refer to the instructions in the :doc:`/dev/guide`. - -Update the repository depending on your setup (e.g. `git pull origin master`). - -And run the installation again: - -.. code-block:: bash - - pip install . - sudo intelmqsetup - -For editable installations (development only), run `pip install -e .` instead. - -Upgrade configuration and check the installation ------------------------------------------------- - -Go through `NEWS.md `_ and apply necessary adaptions to your setup. -If you have adapted IntelMQ's code, also read the `CHANGELOG.md `_. - -Check your installation and configuration to detect any problems: - -.. code-block:: bash - - intelmqctl upgrade-config - intelmqctl check - -``intelmqctl upgrade-config`` supports upgrades from one IntelMQ version to the succeeding. -If you skip one or more IntelMQ versions, some automatic upgrades *may not* work and manual intervention *may* be necessary. - -Start IntelMQ -------------- - -.. code-block:: bash - - intelmqctl start diff --git a/intelmq/etc/feeds.yaml b/intelmq/etc/feeds.yaml index f79de2dfb..d8817f847 100644 --- a/intelmq/etc/feeds.yaml +++ b/intelmq/etc/feeds.yaml @@ -159,8 +159,8 @@ providers: module: intelmq.bots.collectors.http.collector_http parameters: http_url: https://openphish.com/prvt-intell/ - http_password: "{{ your password}}" - http_username: "{{ your username}}" + http_password: "{{ your password }}" + http_username: "{{ your username }}" rate_limit: 86400 name: __FEED__ provider: __PROVIDER__ @@ -228,8 +228,7 @@ providers: Abuse.ch: Feodo Tracker: description: 'List of botnet Command & Control servers (C&Cs) tracked by Feodo Tracker, associated with Dridex and Emotet (aka Heodo).' - additional_information: https://feodotracker.abuse.ch/ - The data in the column Last Online is used for `time.source` if available, with 00:00 as time. Otherwise first seen is used as `time.source`. + additional_information: The data in the column Last Online is used for `time.source` if available, with 00:00 as time. Otherwise first_seen is used as `time.source`. bots: collector: module: intelmq.bots.collectors.http.collector_http @@ -265,7 +264,8 @@ providers: parameters: skip_header: false default_url_protocol: http:// - type_translation: '{"malware_download": "malware-distribution"}' + type_translation: + - malware_download: malware-distribution delimiter: "," columns: - time.source @@ -288,16 +288,16 @@ providers: feed the difference lies in the data points present in the feed. The non-free API available from Blueliv contains, for this specific feed, following extra fields not present in the free API; - "_id" - Internal unique ID - "subType" - Subtype of the Crime Server - "countryName" - Country name where the Crime Server is located, in English - "city" - City where the Crime Server is located - "domain" - Domain of the Crime Server - "host" - Host of the Crime Server - "createdAt" - Date when the Crime Server was added to Blueliv CrimeServer database - "asnCidr" - Range of IPs that belong to an ISP (registered via Autonomous System Number (ASN)) - "asnId" - Identifier of an ISP registered via ASN - "asnDesc" Description of the ISP registered via ASN + "_id" - Internal unique ID + "subType" - Subtype of the Crime Server + "countryName" - Country name where the Crime Server is located, in English + "city" - City where the Crime Server is located + "domain" - Domain of the Crime Server + "host" - Host of the Crime Server + "createdAt" - Date when the Crime Server was added to Blueliv CrimeServer database + "asnCidr" - Range of IPs that belong to an ISP (registered via Autonomous System Number (ASN)) + "asnId" - Identifier of an ISP registered via ASN + "asnDesc" Description of the ISP registered via ASN bots: collector: module: intelmq.bots.collectors.blueliv.collector_crimeserver @@ -328,10 +328,10 @@ providers: collector: module: intelmq.bots.collectors.http.collector_http parameters: - http_password: "{{your password}}" + http_password: "{{ your password }}" http_url_formatting: true http_url: https://www.cymru.com/$certname/$certname_{time[%Y%m%d]}.txt - http_username: "{{your login}}" + http_username: "{{ your username }}" rate_limit: 86400 name: __FEED__ provider: __PROVIDER__ @@ -669,7 +669,7 @@ providers: services on the router or tried to gain access to them. The list also contains a list of tags for each address which indicate what behaviour of the address was observed. - + The Turris Greylist feed provides PGP signatures for the provided files. You will need to import the public PGP key from the linked documentation page, currently available at @@ -677,15 +677,15 @@ providers: or from below. See the URL Fetcher Collector documentation for more information on PGP signature verification. - + PGP Public key: - + .. code-block:: - + -----BEGIN PGP PUBLIC KEY BLOCK----- Version: SKS 1.1.6 Comment: Hostname: pgp.mit.edu - + mQINBFRl7D8BEADaRFoDa/+r27Gtqrdn8sZL4aSYTU4Q3gDr3TfigK8H26Un/Y79a/DUL1o0 o8SRae3uwVcjJDHZ6KDnxThbqF7URfpuCcCYxOs8p/eu3dSueqEGTODHWF4ChIh2japJDc4t 3FQHbIh2e3GHotVqJGhvxMmWqBFoZ/mlWvhjs99FFBZ87qbUNk7l1UAGEXeWeECgz9nGox40 @@ -1373,7 +1373,7 @@ providers: - extra.tag - extra.redirect_target - extra.category - compose_fields: {"source.url": "http://{0}{1}"} + compose_fields: { "source.url": "http://{0}{1}" } skip_header: true delimiter: "," defaults_fields: @@ -1381,6 +1381,7 @@ providers: revision: 2020-07-08 documentation: http://security-research.dyndns.org/pub/malware-feeds/ public: true + DShield: Block: description: This list summarizes the top 20 attacking class C (/24) subnets over the last three days. The number of 'attacks' indicates the number of @@ -1524,8 +1525,8 @@ providers: module: intelmq.bots.collectors.http.collector_http parameters: http_url: https://dgarchive.caad.fkie.fraunhofer.de/today - http_password: "{{ your password}}" - http_username: "{{ your username}}" + http_password: "{{ your password }}" + http_username: "{{ your username }}" rate_limit: 10800 name: __FEED__ provider: __PROVIDER__ @@ -1561,7 +1562,7 @@ providers: collector: module: intelmq.bots.collectors.microsoft.collector_interflow parameters: - api_key: "{{your API key}}" + api_key: "{{ your API key }}" file_match: "^bingmurls_" not_older_than: "2 days" rate_limit: 3600 @@ -1581,7 +1582,7 @@ providers: collector: module: intelmq.bots.collectors.microsoft.collector_interflow parameters: - api_key: "{{your API key}}" + api_key: "{{ your API key }}" file_match: "^ctip_" not_older_than: "2 days" rate_limit: 3600 @@ -1601,7 +1602,7 @@ providers: collector: module: intelmq.bots.collectors.microsoft.collector_azure parameters: - connection_string: "{{your connection string}}" + connection_string: "{{ your connection string }}" container_name: "ctip-infected-summary" name: __FEED__ provider: __PROVIDER__ @@ -1623,7 +1624,7 @@ providers: collector: module: intelmq.bots.collectors.microsoft.collector_azure parameters: - connection_string: "{{your connection string}}" + connection_string: "{{ your connection string }}" container_name: "ctip-c2" name: __FEED__ provider: __PROVIDER__ @@ -1680,7 +1681,7 @@ providers: collector: module: intelmq.bots.collectors.opendxl.collector parameters: - dxl_config_file: "{{location of dxl configuration file}}" + dxl_config_file: "{{ location of dxl configuration file }}" dxl_topic: "/mcafee/event/atd/file/report" parser: module: intelmq.bots.parsers.mcafee.parser_atd @@ -1704,7 +1705,12 @@ providers: parser: module: intelmq.bots.parsers.html_table.parser parameters: - columns: ["time.source", "source.url", "source.ip", "malware.name", "__IGNORE__"] + columns: + - "time.source" + - "source.url" + - "source.ip" + - "malware.name" + - "__IGNORE__" skip_table_head: true default_url_protocol: http:// defaults_fields: @@ -1727,7 +1733,9 @@ providers: parser: module: intelmq.bots.parsers.html_table.parser parameters: - columns: ["source.ip|source.url", "time.source"] + columns: + - "source.ip|source.url" + - "time.source" skip_table_head: true default_url_protocol: http:// defaults_fields: @@ -1740,25 +1748,25 @@ providers: description: With the Enterprise Subscription of 'Have I Been Pwned' you are able to provide a callback URL and any new leak data is submitted to it. It is recommended to put a webserver with Authorization check, TLS etc. in front of the API collector. additional_information: | A minimal nginx configuration could look like: - .. code-block:: + ``` + server { + listen 443 ssl http2; + server_name [your host name]; + client_max_body_size 50M; + + ssl_certificate [path to your key]; + ssl_certificate_key [path to your certificate]; - server { - listen 443 ssl http2; - server_name [your host name]; - client_max_body_size 50M; - - ssl_certificate [path to your key]; - ssl_certificate_key [path to your certificate]; - - location /[your private url] { - if ($http_authorization != '[your private password]') { - return 403; - } - proxy_pass http://localhost:5001/intelmq/push; - proxy_read_timeout 30; - proxy_connect_timeout 30; - } - } + location /[your private url] { + if ($http_authorization != '[your private password]') { + return 403; + } + proxy_pass http://localhost:5001/intelmq/push; + proxy_read_timeout 30; + proxy_connect_timeout 30; + } + } + ``` bots: collector: module: intelmq.bots.collectors.api.collector_api diff --git a/intelmq/lib/harmonization.py b/intelmq/lib/harmonization.py index 0114c906d..130acbaba 100644 --- a/intelmq/lib/harmonization.py +++ b/intelmq/lib/harmonization.py @@ -249,18 +249,19 @@ class ClassificationType(String): with extensions. These old values are automatically mapped to the new ones: - 'botnet drone' -> 'infected-system' - 'ids alert' -> 'ids-alert' - 'c&c' -> 'c2-server' - 'c2server' -> 'c2-server' - 'infected system' -> 'infected-system' - 'malware configuration' -> 'malware-configuration' - 'Unauthorised-information-access' -> 'unauthorised-information-access' - 'leak' -> 'data-leak' - 'vulnerable client' -> 'vulnerable-system' - 'vulnerable service' -> 'vulnerable-system' - 'ransomware' -> 'infected-system' - 'unknown' -> 'undetermined' + + - 'botnet drone' -> 'infected-system' + - 'ids alert' -> 'ids-alert' + - 'c&c' -> 'c2-server' + - 'c2server' -> 'c2-server' + - 'infected system' -> 'infected-system' + - 'malware configuration' -> 'malware-configuration' + - 'Unauthorised-information-access' -> 'unauthorised-information-access' + - 'leak' -> 'data-leak' + - 'vulnerable client' -> 'vulnerable-system' + - 'vulnerable service' -> 'vulnerable-system' + - 'ransomware' -> 'infected-system' + - 'unknown' -> 'undetermined' These values changed their taxonomy: 'malware': In terms of the taxonomy 'malicious-code' they can be either 'infected-system' or 'malware-distribution' diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 000000000..f434a7d64 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,147 @@ +site_name: IntelMQ +site_url: https://intelmq.org +site_author: Gethvi +site_description: | + Isn't it sad to see all public and private feeds available and not use them + properly because it's too much "monkey" work? Thanks to IntelMQ you can + collect and process all of them automatically and without effort. + +repo_name: certtools/intelmq +repo_url: https://github.com/certtools/intelmq + +edit_uri: edit/develop/docs +site_dir: docs_build + +theme: + name: material + logo: static/images/Logo_Intel_MQ.svg + palette: + # Palette toggle for light mode + - scheme: default + primary: white + toggle: + icon: material/brightness-7 + name: Switch to dark mode + + # Palette toggle for dark mode + - scheme: slate + primary: white + toggle: + icon: material/brightness-4 + name: Switch to light mode + + features: + - content.code.copy # copy code button + - navigation.tracking + - search.highlight + - search.share + - search.suggest + +plugins: + - minify: + minify_html: true + - mkdocstrings: + enabled: false + custom_templates: templates + default_handler: python + handlers: + python: + options: + show_source: false + + - search: + lang: en + + - glightbox # enlarging images + + - redirects: + redirect_maps: # TODO add other redirects from old docs + 'en/latest/dev/data-format.html': 'dev/data-format.md' + +extra: + version: + provider: mike + default: latest + social: + - icon: fontawesome/brands/github + link: https://github.com/certtools/intelmq + - icon: fontawesome/brands/python + link: https://pypi.org/project/intelmq/ + - icon: fontawesome/brands/docker + link: https://hub.docker.com/u/certat + +markdown_extensions: + - pymdownx.highlight: + anchor_linenums: true + use_pygments: true # highlight code during build time, not in javascript + linenums: false # enable line numbering + linenums_style: pymdownx-inline # how lines are numbered + - pymdownx.inlinehilite + - pymdownx.snippets + - admonition + - pymdownx.details + - pymdownx.superfences + - pymdownx.keys # keyboard keys + - tables + - footnotes + +nav: + - Introduction: 'index.md' + - Overview: 'overview.md' + - Tutorials: + - Using IntelMQ Manager: 'tutorials/intelmq-manager.md' + - User Guide: + - Intro: 'user/intro.md' + - Event: 'user/event.md' + - Bots: 'user/bots.md' + - Feeds: 'user/feeds.md' + - Abuse Contacts: 'user/abuse-contacts.md' + - Manager: 'user/manager.md' + - API: 'user/api.md' + + - Administrator Guide: + - Intro: 'admin/intro.md' + - Installation: + - Linux Package: 'admin/installation/linux-packages.md' + - PyPI: 'admin/installation/pypi.md' + - DockerHub: 'admin/installation/dockerhub.md' + - Source Repository: 'admin/installation/source-repository.md' + - Upgrade: 'admin/upgrade.md' + - Hardware Requirements: 'admin/hardware-requirements.md' + - Configuration: + - IntelMQ: 'admin/configuration/intelmq.md' + - IntelMQ API: 'admin/configuration/intelmq-api.md' + - IntelMQ Manager: 'admin/configuration/intelmq-manager.md' + - Management: + - IntelMQ: 'admin/management/intelmq.md' + - IntelMQ API: 'admin/management/intelmq-api.md' + - Database: + - PostgreSQL: 'admin/database/postgresql.md' + - Elasticsearch: 'admin/database/elasticsearch.md' + - Splunk: 'admin/database/splunk.md' + - Utilities: + - Bash Completion: 'admin/utilities/bash-completion.md' + - Integrations: + - MISP: 'admin/integrations/misp.md' + - N6: 'admin/integrations/n6.md' + - CIFv3: 'admin/integrations/cifv3.md' + - Beta Features: 'admin/beta-features.md' + - Common Problems: 'admin/common-problems.md' + - FAQ: 'admin/faq.md' + + - Developer Guide: + - Intro: 'dev/intro.md' + - Guidelines: 'dev/guidelines.md' + - Structure: 'dev/structure.md' + - Environment: 'dev/environment.md' + - Data Format: 'dev/data-format.md' + - Adding Feeds: 'dev/adding-feeds.md' + - Bot Development: 'dev/bot-development.md' + - Testing: 'dev/testing.md' + - Documentation: 'dev/documentation.md' + - Release: 'dev/release.md' + + - Changelog: 'changelog.md' + - Security: 'security.md' + - Community: 'community.md' + - Help: 'help.md' \ No newline at end of file diff --git a/scripts/generate-event-docs.py b/scripts/generate-event-docs.py new file mode 100644 index 000000000..8570460ad --- /dev/null +++ b/scripts/generate-event-docs.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python3 + +# SPDX-FileCopyrightText: 2020 Sebastian Wagner, 2023 Filip Pokorný +# SPDX-License-Identifier: AGPL-3.0-or-later + +# This script generates the "event.md" documentation page. + +import codecs +import json +import os.path + +from ruamel.yaml import YAML + +BASEDIR = os.path.join(os.path.dirname(__file__), '../') + +yaml = YAML(typ="safe", pure=True) + +HEADER = """\ + + +# Event + +An event represents individual piece of data processed by IntelMQ. It uses JSON format. + +Example Event: + +```json +{ + "source.geolocation.cc": "JO", + "malware.name": "qakbot", + "source.ip": "82.212.115.188", + "source.asn": 47887, + "classification.type": "c2-server", + "extra.status": "offline", + "source.port": 443, + "classification.taxonomy": "malicious-code", + "source.geolocation.latitude": 31.9522, + "feed.accuracy": 100, + "extra.last_online": "2023-02-16", + "time.observation": "2023-02-16T09:55:12+00:00", + "source.geolocation.city": "amman", + "source.network": "82.212.115.0/24", + "time.source": "2023-02-15T14:19:09+00:00", + "source.as_name": "NEU-AS", + "source.geolocation.longitude": 35.939, + "feed.name": "abusech-feodo-c2-tracker" + } +``` + +## Minimum Requirements + +Below, we have enumerated the minimum recommended requirements for an actionable abuse event. These keys should be +present for the abuse report to make sense for the end recipient. Please note that if you choose to anonymize your +sources, you can substitute **feed.name** with **feed.code**. At least one of the fields **ip**, **fqdn**, **url** or **account** should be present. All the rest of the keys are optional. This list of required fields is *not* enforced by IntelMQ. + +| Field | Terminology | +| ----------------------- | ----------- | +| feed.name | Should | +| classification.type | Should | +| classification.taxonomy | Should | +| time.source | Should | +| time.observation | Should | +| source.ip | Should\* | +| source.fqdn | Should\* | +| source.url | Should\* | +| source.account | Should\* | + +\* at least one of them + +## Classification + +IntelMQ classifies events using three labels: `classification.taxonomy`, `classification.type` and `classification.identifier`. This tuple of three values can be used for deduplication of events and describes what happened. + +The taxonomy can be automatically added by the taxonomy expert bot based on the given type. The following classification scheme loosely follows the [Reference Security Incident Taxonomy (RSIT)](https://github.com/enisaeu/Reference-Security-Incident-Taxonomy-Task-Force/): + +| Classification Taxonomy | Classification Type | Description | +|----------------------|----------------------|----------------------| +| abusive-content | harmful-speech | Discreditation or discrimination of somebody, cyber stalking, racism or threats against one or more individuals. | +| abusive-content | spam | Or 'Unsolicited Bulk Email', this means that the recipient has not granted verifiable permission for the message to be sent and that the message is sent as part of a larger collection of messages, all having a functionally comparable content. | +| abusive-content | violence | Child pornography, glorification of violence, etc. | +| availability | ddos | Distributed Denial of Service attack, e.g. SYN-Flood or UDP-based reflection/amplification attacks. | +| availability | dos | Denial of Service attack, e.g. sending specially crafted requests to a web application which causes the application to crash or slow down. | +| availability | misconfiguration | Software misconfiguration resulting in service availability issues, e.g. DNS server with outdated DNSSEC Root Zone KSK. | +| availability | outage | Outage caused e.g. by air condition failure or natural disaster. | +| availability | sabotage | Physical sabotage, e.g cutting wires or malicious arson. | +| fraud | copyright | Offering or Installing copies of unlicensed commercial software or other copyright protected materials (Warez). | +| fraud | masquerade | Type of attack in which one entity illegitimately impersonates the identity of another in order to benefit from it. | +| fraud | phishing | Masquerading as another entity in order to persuade the user to reveal private credentials. | +| fraud | unauthorized-use-of-resources | Using resources for unauthorized purposes including profit-making ventures, e.g. the use of e-mail to participate in illegal profit chain letters or pyramid schemes. | +| information-content-security | data-leak | Leaked confidential information like credentials or personal data. | +| information-content-security | data-loss | Loss of data, e.g. caused by harddisk failure or physical theft. | +| information-content-security | unauthorised-information-access | Unauthorized access to information, e.g. by abusing stolen login credentials for a system or application, intercepting traffic or gaining access to physical documents. | +| information-content-security | unauthorised-information-modification | Unauthorised modification of information, e.g. by an attacker abusing stolen login credentials for a system or application or a ransomware encrypting data. | +| information-gathering | scanner | Attacks that send requests to a system to discover weaknesses. This also includes testing processes to gather information on hosts, services and accounts. Examples: fingerd, DNS querying, ICMP, SMTP (EXPN, RCPT, \...), port scanning. | +| information-gathering | sniffing | Observing and recording of network traffic (wiretapping). | +| information-gathering | social-engineering | Gathering information from a human being in a non-technical way (e.g. lies, tricks, bribes, or threats). This IOC refers to a resource, which has been observed to perform brute-force attacks over a given application protocol. | +| intrusion-attempts | brute-force | Multiple login attempts (Guessing/cracking of passwords, brute force). | +| intrusion-attempts | exploit | An attack using an unknown exploit. | +| intrusion-attempts | ids-alert | IOCs based on a sensor network. This is a generic IOC denomination, should it be difficult to reliably denote the exact type of activity involved for example due to an anecdotal nature of the rule that triggered the alert. | +| intrusions | application-compromise| Compromise of an application by exploiting (un)known software vulnerabilities, e.g. SQL injection. | +| intrusions | burglary | Physical intrusion, e.g. into corporate building or data center. | +| intrusions | privileged-account-compromise | Compromise of a system where the attacker gained administrative privileges. | +| intrusions | system-compromise | Compromise of a system, e.g. unauthorised logins or commands. This includes compromising attempts on honeypot systems. | +| intrusions | unprivileged-account-compromise | Compromise of a system using an unprivileged (user/service) account. | +| malicious-code | c2-server | This is a command and control server in charge of a given number of botnet drones. | +| malicious-code | infected-system | This is a compromised machine, which has been observed to make a connection to a command and control server. | +| malicious-code | malware-configuration | This is a resource which updates botnet drones with a new configuration. | +| malicious-code | malware-distribution | URI used for malware distribution, e.g. a download URL included in fake invoice malware spam. | +| other | blacklist | Some sources provide blacklists, which clearly refer to abusive behavior, such as spamming, but fail to denote the exact reason why a given identity has been blacklisted. The reason may be that the justification is anecdotal or missing entirely. This type should only be used if the typing fits the definition of a blacklist, but an event specific denomination is not possible for one reason or another. Not in RSIT. | +| other | dga-domain | DGA Domains are seen various families of malware that are used to periodically generate a large number of domain names that can be used as rendezvous points with their command and control servers. Not in RSIT. | +| other | other | All incidents which don't fit in one of the given categories should be put into this class. | +| other | malware | An IoC referring to a malware (sample) itself. Not in RSIT. | +| other | proxy | This refers to the use of proxies from inside your network. Not in RSIT. | +| test | test | Meant for testing. Not in RSIT. | +| other | tor | This IOC refers to incidents related to TOR network infrastructure. Not in RSIT. | +| other | undetermined | The categorisation of the incident is unknown/undetermined. | +| vulnerable | ddos-amplifier | Publicly accessible services that can be abused for conducting DDoS reflection/amplification attacks, e.g. DNS open-resolvers or NTP servers with monlist enabled. | +| vulnerable | information-disclosure | Publicly accessible services potentially disclosing sensitive information, e.g. SNMP or Redis. | +| vulnerable | potentially-unwanted-accessible | Potentially unwanted publicly accessible services, e.g. Telnet, RDP or VNC. | +| vulnerable | vulnerable-system | A system which is vulnerable to certain attacks. Example: misconfigured client proxy settings (example: WPAD), outdated operating system version, etc. | +| vulnerable | weak-crypto | Publicly accessible services offering weak crypto, e.g. web servers susceptible to POODLE/FREAK attacks. | + +## Meaning of source and destination identities + +Meaning of source and destination identities for each `classification.type` can be different. Usually the main information is in the `source.*` fields. + +The `classification.identifier` is often a normalized malware name, grouping many variants or the affected network protocol. + +Examples of the meaning of the *source* and *destination* fields for various `classification.type` and possible identifiers are shown here. + +| Classification Type | Source | Destination | Possible Identifiers | +| --------------------- | -------------------------------------- | -------------------- | ------------------------------------ | +| blacklist | blacklisted device | | | +| brute-force | attacker | target | | +| c2-server | (sinkholed) c&c server | | zeus, palevo, feodo | +| ddos | attacker | target | | +| dga-domain | infected device | | | +| dropzone | server hosting stolen data | | | +| exploit | hosting server | | | +| ids-alert | triggering device | | | +| infected-system | infected device | contacted c&c server | | +| malware | infected device | | zeus, palevo, feodo | +| malware-configuration | infected device | | | +| malware-distribution | server hosting malware | | | +| phishing | phishing website | | | +| proxy | server allowing policy/security bypass | | | +| scanner | scanning device | scanned device | http, modbus, wordpress | +| spam | infected device | targeted server | | +| system-compromise | server | | | +| vulnerable-system | vulnerable device | | heartbleed, openresolver, snmp, wpad | + +Examples: + +- If an event describes IP address that connects to a zeus command and control server, it's about the infected device. Therefore the `classification.taxonomy` is `malicious-code`, `classification.type` is `infected-system` and the `classification.identifier` is `zeus`. + +- If an event describes IP address where a command and control server is running, the event's +`classification.type` is `c2server`. The `malware.name` can have the full name, eg. `zeus_p2p`. + +## Additional Information + +Information that do not fit into any of the event fields should be placed in the `extra` namespace.Therefore the keys must be prefixed `extra.` string. There are no other rules on key names and values for additional information. + +## Fields Reference + +Here you can find detailed information about all the possible fields used in an event. + + +""" + + +def info(key, value=""): + return f"**{key.title()}:** {str(value).strip()}\n\n" + + +def main(): + output = HEADER + + with codecs.open(os.path.join(BASEDIR, 'intelmq/etc/harmonization.conf'), encoding='utf-8') as f: + harmonization = json.load(f)['event'] + + for key, value in sorted(harmonization.items()): + # output += '| {:32} | {:27} | {} |\n'.format(key, + # f"[{value['type']}](#{value['type'].lower()})", + # value['description']) + output += f"""### `{key}`
\n\n""" + output += f"**Type:** [{value['type']}](#{value['type'].lower()})\n\n" + output += value['description'] + output += "\n\n" + + return output + + +if __name__ == '__main__': # pragma: no cover + + with codecs.open(os.path.join(BASEDIR, 'docs/user/event.md'), 'w', encoding='utf-8') as f: + f.write(main()) diff --git a/scripts/generate-feeds-docs.py b/scripts/generate-feeds-docs.py new file mode 100644 index 000000000..74f8eb279 --- /dev/null +++ b/scripts/generate-feeds-docs.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 + +# SPDX-FileCopyrightText: 2020 Sebastian Wagner, 2023 Filip Pokorný +# SPDX-License-Identifier: AGPL-3.0-or-later + +# This script generates the "feeds.md" documentation page. + +import codecs +import json +import os.path + +from ruamel.yaml import YAML + +BASEDIR = os.path.join(os.path.dirname(__file__), '../') +yaml = YAML(typ="safe", pure=True) + +HEADER = """\ + + +# Feeds + +The available feeds are grouped by the provider of the feeds. +For each feed the collector and parser that can be used is documented as well as any feed-specific parameters. +To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then rebuild the documentation. + +""" + + +def info(key, value=""): + return f"**{key.title()}:** {str(value).strip()}\n\n" + + +def main(): + with codecs.open(os.path.join(BASEDIR, 'intelmq/etc/feeds.yaml'), encoding='utf-8') as fhandle: + config = yaml.load(fhandle.read()) + + output = HEADER + + for provider, feeds in sorted(config['providers'].items(), key=lambda x: x[0]): + + output += f"## {provider}\n\n" + + for feed_name, feed_info in sorted(feeds.items(), key=lambda x: x[0]): + + output += f"### {feed_name}\n\n" + + output += feed_info['description'] + output += '\n\n' + + output += info("public", "yes") if feed_info.get('public') else info("public", "no") + output += info("revision", feed_info['revision']) + + if feed_info.get('documentation') is not None: + output += info("documentation", f"<{feed_info['documentation']}>") + + if feed_info.get('additional_information') is not None: + output += info("additional information", feed_info['additional_information']) + + output += '\n' + + for bot, bot_info in sorted(feed_info['bots'].items(), key=lambda x: x[0]): + + output += f"**{bot.title()} configuration**\n\n" + + output += "```yaml\n" + output += f"module: {bot_info['module']}\n" + + if bot_info.get('parameters'): + output += "parameters:\n" + for key, value in sorted(bot_info['parameters'].items(), key=lambda x: x[0]): + + if value == "__FEED__": + value = feed_name + + if value == "__PROVIDER__": + value = provider + + # format non-empty lists with double-quotes + # single quotes are not conform JSON and not correctly detected/transformed by the manager + if isinstance(value, (list, tuple)) and value: + value = json.dumps(value) + + output += f" {key}: {value}\n" + + output += "```\n\n" + + output += "---\n\n\n" + + return output + + +if __name__ == '__main__': + with codecs.open(os.path.join(BASEDIR, 'docs/user/feeds.md'), 'w', encoding='utf-8') as f: + f.write(main()) diff --git a/setup.py b/setup.py index b19b2f8f5..a19539167 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ module = '.'.join(file.with_suffix('').parts) BOTS.append('{0} = {0}:BOT.run'.format(module)) -with open(os.path.join(os.path.dirname(__file__), 'README.rst')) as handle: +with open(os.path.join(os.path.dirname(__file__), 'README.md')) as handle: README = handle.read() setup(