From ea44e2825658be9b3bc01add0ab80841234e156f Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 2 Jan 2026 23:28:29 -0500 Subject: [PATCH] Add a custom nftables firewall config. --- nix/kubernetes/roles/firewall/default.nix | 5 + nix/kubernetes/roles/firewall/files/my-fw.nft | 140 ++++++++++++++++++ 2 files changed, 145 insertions(+) create mode 100644 nix/kubernetes/roles/firewall/files/my-fw.nft diff --git a/nix/kubernetes/roles/firewall/default.nix b/nix/kubernetes/roles/firewall/default.nix index 771aa1d9..b983de65 100644 --- a/nix/kubernetes/roles/firewall/default.nix +++ b/nix/kubernetes/roles/firewall/default.nix @@ -52,5 +52,10 @@ # Check logs for blocked connections: # journalctl -k or dmesg + + networking.nftables.tables."my-fw" = { + family = "inet"; + content = (builtins.readFile ./files/my-fw.nft); + }; }; } diff --git a/nix/kubernetes/roles/firewall/files/my-fw.nft b/nix/kubernetes/roles/firewall/files/my-fw.nft new file mode 100644 index 00000000..c434f6f8 --- /dev/null +++ b/nix/kubernetes/roles/firewall/files/my-fw.nft @@ -0,0 +1,140 @@ +set internal-iface { + type ifname + # Tell the kernel this set may include ranges, so it can pick the best datastructure. + flags interval + elements = { + "lxc*", + "cilium_net" + } +} + +set node-cidr-ipv4 { + type ipv4_addr + flags constant, interval + elements = { 10.215.1.0/24 } +} + +set node-cidr-ipv6 { + type ipv6_addr + flags constant, interval + elements = { 2620:11f:7001:7:ffff:ffff:0ad7:0100/120 } +} + +set service-cidr-ipv4 { + type ipv4_addr + flags constant, interval + elements = { 10.197.0.0/16 } +} + +set service-cidr-ipv6 { + type ipv6_addr + flags constant, interval + elements = { fd00:3e42:e349::/112 } +} + +set pod-cidr-ipv4 { + type ipv4_addr + flags constant, interval + elements = { 10.200.0.0/16 } +} + +set pod-cidr-ipv6 { + type ipv6_addr + flags constant, interval + elements = { 2620:11f:7001:7:ffff:eeee::/96 } +} + +set public-ports { + # These are open to all IP addresses + type inet_proto . inet_service + flags constant + elements = { + tcp . 22, # ssh + tcp . 6443 # kubernetes API server + } +} + +set node-to-node-ports { + # Ports open for nodes sending packets to nodes + type inet_proto . inet_service + flags constant, interval + elements = { + tcp . 2379-2380, # etcd + tcp . 4240, # cilium health monitoring + tcp . 10250, # kubelet API + # udp . 51871, # cilium wireguard + # tcp . 30000-32767, # nodeport range + # udp . 30000-32767, # nodeport range + # udp . 8472, # cilium vxlan + tcp . 7946, # MetalLB memberlist + udp . 7946 # MetalLB memberlist + } +} + +chain rpfilter { + type filter hook prerouting priority mangle + 10; policy drop; + meta nfproto ipv4 udp sport . udp dport { 68 . 67, 67 . 68 } accept comment "DHCPv4 client/server" + # Reverse path forwarding filter. Check that a route exists back to the source address on the interface which received the packet. If the packet came from the wrong interface, then the packet is likely spoofed. + fib saddr . mark . iif check exists accept + jump rpfilter-allow + meta pkttype host log prefix "Failed rpfilter: " level info +} + +chain rpfilter-allow { + # Allow packets on internal interfaces from pods + meta iifname @internal-iface ip saddr @pod-cidr-ipv4 accept + meta iifname @internal-iface ip6 saddr @pod-cidr-ipv6 accept +} + + +chain input { + type filter hook input priority filter; policy drop; + iifname "lo" accept comment "trusted interfaces" + # Drop invalid connections, accept packets for established or related connections and send packets for new or untracked connections to the input-allow chain + ct state vmap { invalid : drop, established : accept, related : accept, new : jump input-allow, untracked : jump input-allow } + # If the packet is a new connection and reaches this point, then we are going to reject it. So log that rejection. + tcp flags & (fin | syn | rst | ack) == syn log prefix "refused connection: " level info + # Log rejected packets destined for this machine (as opposed to packets being routed or broadcast packets) + meta pkttype host log prefix "refused packet: " level info + # When rejecting packets, send a TCP Reset (RST) instead of simply dropping the packet. + meta l4proto tcp reject with tcp reset + # Reject any packets that make it here. + reject +} + +chain input-allow { + # Allow pings. + icmp type echo-request accept comment "allow ping" + icmpv6 type != { nd-redirect, 139 } accept comment "Accept all ICMPv6 messages except redirects and node information queries (type 139). See RFC 4890, section 4.4." + ip6 daddr fe80::/64 udp dport 546 accept comment "DHCPv6 client" + + # Allow public ports + meta l4proto . th dport @public-ports accept + + # Allow node to node + ip saddr @node-cidr-ipv4 ip daddr @node-cidr-ipv4 meta l4proto . th dport @node-to-node-ports accept + ip6 saddr @node-cidr-ipv6 ip6 daddr @node-cidr-ipv6 meta l4proto . th dport @node-to-node-ports accept +} + +chain forward { + type filter hook forward priority filter; policy drop; + # Drop invalid connections, accept packets for established or related connections and send packets for new or untracked connections to the forward-allow chain + ct state vmap { invalid : drop, established : accept, related : accept, new : jump forward-allow, untracked : jump forward-allow } + + log prefix "blocked forwarding packet: " level info +} + +chain forward-allow { + icmpv6 type != { router-renumbering, 139 } accept comment "Accept all ICMPv6 messages except renumbering and node information queries (type 139). See RFC 4890, section 4.3." + + # When connection tracking (ct) shows the status as destination nat (dnat) then accept the packet. + ct status dnat accept comment "allow port forward" + + # Allow packets from pods + ip saddr @pod-cidr-ipv4 accept + ip6 saddr @pod-cidr-ipv6 accept + + # Allow node-to-pod + ip saddr @node-cidr-ipv4 ip daddr @pod-cidr-ipv4 accept + ip6 saddr @node-cidr-ipv6 ip6 daddr @pod-cidr-ipv6 accept +}