From 74f1856eac103ec51894025a270ffc4c9f452c6d Mon Sep 17 00:00:00 2001 From: Vincent Li Date: Mon, 6 Jan 2025 21:09:25 -0800 Subject: [PATCH] xdp-tailcall: add DNS XDP program add DNS XDP program as tail called program Signed-off-by: Vincent Li --- xdp-tailcall/Makefile | 2 +- xdp-tailcall/xdp_dns.c | 121 ++++++++++++ xdp-tailcall/xdp_dns.h | 91 +++++++++ xdp-tailcall/xdp_dns_log.c | 110 +++++++++++ xdp-tailcall/xdp_tailcall.bpf.c | 318 ++++++++++++++++++++++++++++++++ 5 files changed, 641 insertions(+), 1 deletion(-) create mode 100644 xdp-tailcall/xdp_dns.c create mode 100644 xdp-tailcall/xdp_dns.h create mode 100644 xdp-tailcall/xdp_dns_log.c diff --git a/xdp-tailcall/Makefile b/xdp-tailcall/Makefile index 03a25f6e..206a59b3 100644 --- a/xdp-tailcall/Makefile +++ b/xdp-tailcall/Makefile @@ -2,7 +2,7 @@ XDP_TARGETS := xdp_tailcall.bpf BPF_SKEL_TARGETS := $(XDP_TARGETS) -USER_TARGETS := xdp_sni xdp_sni_log +USER_TARGETS := xdp_sni xdp_sni_log xdp_dns xdp_dns_log LIB_DIR = ../lib diff --git a/xdp-tailcall/xdp_dns.c b/xdp-tailcall/xdp_dns.c new file mode 100644 index 00000000..6bac48ff --- /dev/null +++ b/xdp-tailcall/xdp_dns.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2024, BPFire. All rights reserved. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#define MAX_DOMAIN_SIZE 63 // Increased size to handle larger domains + +struct domain_key { + struct bpf_lpm_trie_key lpm_key; + char data[MAX_DOMAIN_SIZE + 1]; +}; + +// Function to encode a domain name with label lengths +static void encode_domain(const char *domain, char *encoded) +{ + const char *ptr = domain; + char *enc_ptr = encoded; + size_t label_len; + + while (*ptr) { + // Find the length of the current label + label_len = strcspn(ptr, "."); + if (label_len > 0) { + // Set the length of the label + *enc_ptr++ = (char)label_len; + // Copy the label itself + memcpy(enc_ptr, ptr, label_len); + enc_ptr += label_len; + } + // Move to the next label + ptr += label_len; + if (*ptr == '.') { + ptr++; // Skip the dot + } + } + // Append a zero-length label to mark the end of the domain name + *enc_ptr++ = 0; +} + +static void reverse_string(char *str) +{ + int len = strlen(str); + for (int i = 0; i < len / 2; i++) { + char temp = str[i]; + str[i] = str[len - i - 1]; + str[len - i - 1] = temp; + } +} + +int main(int argc, char *argv[]) +{ + int map_fd; + struct domain_key dkey = { 0 }; + __u8 value = 1; + + // Check for proper number of arguments + if (argc != 4) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + // Encode the domain name with label lengths + encode_domain(argv[3], dkey.data); + reverse_string(dkey.data); + + // Set the LPM trie key prefix length + dkey.lpm_key.prefixlen = strlen(dkey.data) * 8; + + // Open the BPF map + const char *map_path = argv[1]; + map_fd = bpf_obj_get(map_path); + if (map_fd < 0) { + fprintf(stderr, "Failed to open map at %s: %s\n", map_path, strerror(errno)); + return 1; + } + + // Add or delete the domain based on the first argument + if (strcmp(argv[2], "add") == 0) { + // Update the map with the encoded domain name + if (bpf_map_update_elem(map_fd, &dkey, &value, BPF_ANY) != 0) { + fprintf(stderr, "Failed to add domain to map: %s\n", + strerror(errno)); + return 1; + } + printf("Domain %s added to denylist\n", argv[3]); + } else if (strcmp(argv[2], "delete") == 0) { + // Remove the domain from the map + if (bpf_map_delete_elem(map_fd, &dkey) != 0) { + fprintf(stderr, + "Failed to remove domain from map: %s\n", + strerror(errno)); + return 1; + } + printf("Domain %s removed from denylist\n", argv[3]); + } else { + fprintf(stderr, "Invalid command: %s. Use 'add' or 'delete'.\n", + argv[2]); + return 1; + } + + return 0; +} diff --git a/xdp-tailcall/xdp_dns.h b/xdp-tailcall/xdp_dns.h new file mode 100644 index 00000000..006fbe6b --- /dev/null +++ b/xdp-tailcall/xdp_dns.h @@ -0,0 +1,91 @@ +#define DNS_PORT 53 +#define RR_TYPE_OPT 41 + +#define FRAME_SIZE 1000000000 + +/* + * Store the DNS header + */ +struct dnshdr { + __u16 id; + union { + struct { + __u8 rd : 1; + __u8 tc : 1; + __u8 aa : 1; + __u8 opcode : 4; + __u8 qr : 1; + + __u8 rcode : 4; + __u8 cd : 1; + __u8 ad : 1; + __u8 z : 1; + __u8 ra : 1; + } as_bits_and_pieces; + __u16 as_value; + } flags; + __u16 qdcount; + __u16 ancount; + __u16 nscount; + __u16 arcount; +}; + +struct dns_qrr { + __u16 qtype; + __u16 qclass; +}; + +struct dns_rr { + __u16 type; + __u16 class; + __u32 ttl; + __u16 rdata_len; +} __attribute__((packed)); + +struct option { + __u16 code; + __u16 len; + __u8 data[]; +} __attribute__((packed)); + +/* + * Recalculate the checksum + */ +static __always_inline +void update_checksum(__u16 *csum, __u16 old_val, __u16 new_val) +{ + __u32 new_csum_value; + __u32 new_csum_comp; + __u32 undo; + + undo = ~((__u32)*csum) + ~((__u32)old_val); + new_csum_value = undo + (undo < ~((__u32)old_val)) + (__u32)new_val; + new_csum_comp = new_csum_value + (new_csum_value < ((__u32)new_val)); + new_csum_comp = (new_csum_comp & 0xFFFF) + (new_csum_comp >> 16); + new_csum_comp = (new_csum_comp & 0xFFFF) + (new_csum_comp >> 16); + *csum = (__u16)~new_csum_comp; +} + + +//TCP + +#define NSEC_PER_SEC 1000000000L + +#define ETH_ALEN 6 +#define ETH_P_IP 0x0800 +#define ETH_P_IPV6 0x86DD + +#define IP_DF 0x4000 +#define IP_MF 0x2000 +#define IP_OFFSET 0x1fff + +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + +#define __get_unaligned_t(type, ptr) ({ \ + const struct { type x; } __attribute__((__packed__)) *__pptr = (typeof(__pptr))(ptr); \ + __pptr->x; \ +}) + +#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr)) + diff --git a/xdp-tailcall/xdp_dns_log.c b/xdp-tailcall/xdp_dns_log.c new file mode 100644 index 00000000..0940fefe --- /dev/null +++ b/xdp-tailcall/xdp_dns_log.c @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2024, BPFire. All rights reserved. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include + +#define MAX_DOMAIN_SIZE 63 + +struct qname_event { + __u8 len; + __u32 src_ip; // IPv4 address + char qname[MAX_DOMAIN_SIZE + 1]; +}; + +// Helper function to convert DNS label to a standard domain format +void dns_label_to_dot_notation(char *dns_name, char *output, size_t len) +{ + size_t pos = 0, out_pos = 0; + + while (pos < len) { + __u8 label_len = dns_name[pos]; + if (label_len == 0) + break; // End of domain name + + if (out_pos != 0) { + output[out_pos++] = '.'; // Add a dot between labels + } + + // Copy the label + for (int i = 1; i <= label_len; i++) { + output[out_pos++] = dns_name[pos + i]; + } + + pos += label_len + 1; // Move to the next label + } + + output[out_pos] = '\0'; // Null-terminate the result +} + +// Corrected handle_event function to match the signature expected by ring_buffer__new +int handle_event(void *ctx __attribute__((unused)), void *data, + size_t data_sz __attribute__((unused))) +{ + struct qname_event *event = (struct qname_event *)data; + + char src_ip_str[INET_ADDRSTRLEN]; + inet_ntop(AF_INET, &event->src_ip, src_ip_str, sizeof(src_ip_str)); + + char domain_str[MAX_DOMAIN_SIZE] = { 0 }; + dns_label_to_dot_notation(event->qname, domain_str, event->len); + + syslog(LOG_INFO, "Received qname: %s from source IP: %s", domain_str, + src_ip_str); + + return 0; // Return 0 to indicate success +} + +int main(int argc, char *argv[]) +{ + if (argc != 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + const char *ringbuf_path = argv[1]; + struct ring_buffer *rb; + int ringbuf_fd; + + openlog("qname_logger", LOG_CONS | LOG_PID | LOG_NDELAY, LOG_LOCAL1); + + // Open the ring buffer + ringbuf_fd = bpf_obj_get(ringbuf_path); + if (ringbuf_fd < 0) { + perror("Failed to open ring buffer"); + return 1; + } + + // Set up ring buffer polling with the corrected function signature + rb = ring_buffer__new(ringbuf_fd, handle_event, NULL, NULL); + if (!rb) { + perror("Failed to create ring buffer"); + return 1; + } + + // Poll the ring buffer + while (1) { + ring_buffer__poll(rb, -1); // Block indefinitely + } + + ring_buffer__free(rb); + closelog(); + return 0; +} diff --git a/xdp-tailcall/xdp_tailcall.bpf.c b/xdp-tailcall/xdp_tailcall.bpf.c index df560d8e..a3484da0 100644 --- a/xdp-tailcall/xdp_tailcall.bpf.c +++ b/xdp-tailcall/xdp_tailcall.bpf.c @@ -27,11 +27,207 @@ #include #include +#include +#include +#include +#include +#include "bpf/compiler.h" +#include "xdp_dns.h" +/* with vmlinux.h, define here to avoid the undefined error */ +#define ETH_P_8021Q 0x8100 /* 802.1Q VLAN Extended Header */ +#define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */ + +// do not use libc includes because this causes clang +// to include 32bit headers on 64bit ( only ) systems. +#define memcpy __builtin_memcpy + #define SERVER_NAME_EXTENSION 0 #define MAX_DOMAIN_SIZE 63 // Program identifiers for the array map #define PROG_SNI_INDEX 0 +#define PROG_DNS_INDEX 1 + +struct meta_data { + __u16 eth_proto; + __u16 ip_pos; + __u16 opt_pos; + __u16 unused; +}; + +/* Define the LPM Trie Map for domain names */ +struct domain_key { + struct bpf_lpm_trie_key lpm_key; + char data[MAX_DOMAIN_SIZE + 1]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_LPM_TRIE); + __type(key, struct domain_key); + __type(value, __u8); + __uint(max_entries, 10000); + __uint(pinning, LIBBPF_PIN_BY_NAME); + __uint(map_flags, BPF_F_NO_PREALLOC); +} domain_denylist SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1 << 12); // 4KB buffer + __uint(pinning, LIBBPF_PIN_BY_NAME); +} dns_ringbuf SEC(".maps"); + +struct qname_event { + __u8 len; + __u32 src_ip; // Store IPv4 address + char qname[MAX_DOMAIN_SIZE + 1]; +}; + +/* + * Store the VLAN header + */ +struct vlanhdr { + __u16 tci; + __u16 encap_proto; +}; + +/* + * Helper pointer to parse the incoming packets + */ +struct cursor { + void *pos; + void *end; +}; + +static __always_inline void cursor_init(struct cursor *c, struct xdp_md *ctx) +{ + c->end = (void *)(long)ctx->data_end; + c->pos = (void *)(long)ctx->data; +} + +#define PARSE_FUNC_DECLARATION(STRUCT) \ + static __always_inline struct STRUCT *parse_##STRUCT(struct cursor *c) \ + { \ + struct STRUCT *ret = c->pos; \ + if (c->pos + sizeof(struct STRUCT) > c->end) \ + return 0; \ + c->pos += sizeof(struct STRUCT); \ + return ret; \ + } + +PARSE_FUNC_DECLARATION(ethhdr) +PARSE_FUNC_DECLARATION(vlanhdr) +PARSE_FUNC_DECLARATION(iphdr) +PARSE_FUNC_DECLARATION(udphdr) +PARSE_FUNC_DECLARATION(dnshdr) + +static __always_inline struct ethhdr *parse_eth(struct cursor *c, + __u16 *eth_proto) +{ + struct ethhdr *eth; + + if (!(eth = parse_ethhdr(c))) + return 0; + + *eth_proto = eth->h_proto; + if (*eth_proto == __bpf_htons(ETH_P_8021Q) || + *eth_proto == __bpf_htons(ETH_P_8021AD)) { + struct vlanhdr *vlan; + + if (!(vlan = parse_vlanhdr(c))) + return 0; + + *eth_proto = vlan->encap_proto; + if (*eth_proto == __bpf_htons(ETH_P_8021Q) || + *eth_proto == __bpf_htons(ETH_P_8021AD)) { + if (!(vlan = parse_vlanhdr(c))) + return 0; + + *eth_proto = vlan->encap_proto; + } + } + return eth; +} + +static __always_inline char *parse_dname(struct cursor *c) +{ + __u8 *dname = c->pos; + __u8 i; + + for (i = 0; i < 128; i++) { /* Maximum 128 labels */ + __u8 o; + + // Check bounds before accessing the next byte + if (c->pos + 1 > c->end) + return 0; + + o = *(__u8 *)c->pos; + + // Check for DNS name compression + if ((o & 0xC0) == 0xC0) { + // If the current label is compressed, skip the next 2 bytes + if (c->pos + 2 > + c->end) // Ensure we have 2 bytes to skip + return 0; + + c->pos += 2; + return (char *)dname; // Return the parsed domain name + } else if (o > 63 || c->pos + o + 1 > c->end) { + // Label is invalid or out of bounds + return 0; + } + + // Move the cursor by label length and its leading length byte + c->pos += o + 1; + + // End of domain name (null label length) + if (o == 0) + return (char *)dname; + } + + // If we exit the loop without finding a terminating label, return NULL + return 0; +} + +static __always_inline void *custom_memcpy(void *dest, const void *src, + __u8 len) +{ + __u8 i; + + // Perform the copy byte-by-byte to satisfy the BPF verifier + for (i = 0; i < len; i++) { + *((__u8 *)dest + i) = *((__u8 *)src + i); + } + + return dest; +} + +// Custom strlen function for BPF +static __always_inline __u8 custom_strlen(const char *str, struct cursor *c) +{ + __u8 len = 0; + +// Loop through the string, ensuring not to exceed MAX_STRING_LEN +#pragma unroll + for (int i = 0; i < MAX_DOMAIN_SIZE; i++) { + if (str + i >= + c->end) // Check if we are at or beyond the end of the packet + break; + if (str[i] == '\0') + break; + len++; + } + + return len; +} + +static __always_inline void reverse_string(char *str, __u8 len) +{ + for (int i = 0; i < len / 2; i++) { + char temp = str[i]; + str[i] = str[len - 1 - i]; + str[len - 1 - i] = temp; + } +} struct { __uint(type, BPF_MAP_TYPE_HASH); // Hash map for SNI denylist @@ -270,6 +466,116 @@ int xdp_tls_sni(struct xdp_md *ctx) return XDP_PASS; } +SEC("xdp") +int xdp_dns_denylist(struct xdp_md *ctx) +{ + struct meta_data *md = (void *)(long)ctx->data_meta; + struct cursor c; + struct ethhdr *eth; + struct iphdr *ipv4; + struct udphdr *udp; + struct dnshdr *dns; + char *qname; + __u8 len = 0; + + struct domain_key dkey = { 0 }; // LPM trie key + + if (bpf_xdp_adjust_meta(ctx, -(int)sizeof(struct meta_data))) + return XDP_PASS; + + cursor_init(&c, ctx); + md = (void *)(long)ctx->data_meta; + if ((void *)(md + 1) > c.pos) + return XDP_PASS; + + if (!(eth = parse_eth(&c, &md->eth_proto))) + return XDP_PASS; + md->ip_pos = c.pos - (void *)eth; + + if (md->eth_proto == __bpf_htons(ETH_P_IP)) { + if (!(ipv4 = parse_iphdr(&c))) + return XDP_PASS; /* Not IPv4 */ + switch (ipv4->protocol) { + case IPPROTO_UDP: + if (!(udp = parse_udphdr(&c)) || + !(udp->dest == __bpf_htons(DNS_PORT)) || + !(dns = parse_dnshdr(&c))) + return XDP_PASS; /* Not DNS */ + + if (dns->flags.as_bits_and_pieces.qr || + dns->qdcount != __bpf_htons(1) || dns->ancount || + dns->nscount || dns->arcount > __bpf_htons(2)) + return XDP_ABORTED; // Return FORMERR? + + qname = parse_dname(&c); + if (!qname) { + return XDP_ABORTED; // Return FORMERR? + } + + len = custom_strlen(qname, &c); + //bpf_printk("qname %s len %d ipid %d from %pI4", qname, len, ipv4->id, &ipv4->saddr); + + //avoid R2 offset is outside of the packet error + if (qname + len > c.end) + return XDP_ABORTED; // Return FORMERR? + + int copy_len = len < MAX_DOMAIN_SIZE ? len : + MAX_DOMAIN_SIZE; + + // Allocate a buffer from the ring buffer + struct qname_event *event = bpf_ringbuf_reserve( + &dns_ringbuf, sizeof(*event), 0); + + // Log debug info about event reservation + //log_debug_info(ctx, qname, event, len, ipv4->saddr); + + if (!event) + return XDP_PASS; // Drop if no space + + // Set event fields + event->len = copy_len; + event->src_ip = + ipv4->saddr; // Extract source IP address + custom_memcpy(event->qname, qname, copy_len); + event->qname[copy_len] = + '\0'; // Ensure null termination + + // Submit the event + bpf_ringbuf_submit(event, 0); + + custom_memcpy(dkey.data, qname, copy_len); + dkey.data[MAX_DOMAIN_SIZE] = + '\0'; // Ensure null-termination + reverse_string(dkey.data, copy_len); + + // Set the LPM key prefix length (the length of the domain name string) + dkey.lpm_key.prefixlen = + copy_len * 8; // Prefix length in bits + + //bpf_printk("domain_key %s copy_len is %d from %pI4", dkey.data, copy_len, &ipv4->saddr); + + if (bpf_map_lookup_elem(&domain_denylist, &dkey)) { + bpf_printk( + "Domain %s found in denylist, dropping packet\n", + dkey.data); + return XDP_DROP; + } + +/* + __u8 value = 1; + if (bpf_map_update_elem(&domain_denylist, &dkey, &value, BPF_ANY) < 0) { + bpf_printk("Domain %s not updated in denylist\n", dkey.data); + } else { + bpf_printk("Domain %s updated in denylist\n", dkey.data); + } +*/ + + break; + } + } + return XDP_PASS; +} + struct { __uint(type, BPF_MAP_TYPE_PROG_ARRAY); __uint(max_entries, 3); @@ -280,6 +586,7 @@ struct { } tail_call_array SEC(".maps") = { .values = { [PROG_SNI_INDEX] = (void *)&xdp_tls_sni, + [PROG_DNS_INDEX] = (void *)&xdp_dns_denylist, }, }; @@ -313,6 +620,17 @@ int xdp_tailcall(struct xdp_md *ctx) { bpf_tail_call(ctx, &tail_call_array, PROG_SNI_INDEX); return XDP_ABORTED; // Should not reach here if tail call succeeds } + } else if (ip->protocol == IPPROTO_UDP) { + struct udphdr *udp = (void *)(ip + 1); + if ((void *)(udp + 1) > data_end) + return XDP_PASS; + + // Check if destination port is 53 (DNS) + if (udp->dest == bpf_htons(53)) { + // Tail call the DNS program + bpf_tail_call(ctx, &tail_call_array, PROG_DNS_INDEX); + return XDP_ABORTED; // Should not reach here if tail call succeeds + } } return XDP_PASS;