From 61e2f2877db8deb161d5584dfa6be8b3c4bb757b Mon Sep 17 00:00:00 2001 From: Yang Song Date: Tue, 13 Jan 2026 04:10:18 +0800 Subject: [PATCH] [posix] handle infra interface index change (#12059) This change updates the netlink message handling for the infra link on the POSIX platform to be more robust and efficient for `RTM_NEWLINK` and `RTM_DELLINK` messages. The logic for handling infra index changes is: 1. On `RTM_DELLINK`, the code verifies `ifinfo->ifi_index` is for the currently infra index `mInfraIfIndex`. 2. On `RTM_NEWLINK`, it identifies the infra by its name `if_indextoname(ifinfo->ifi_index)` and updates `mInfraIfIndex`. --- .github/workflows/posix.yml | 23 ++++ script/check-infra-if-index-changed | 173 ++++++++++++++++++++++++++++ src/posix/platform/infra_if.cpp | 94 +++++++++++---- src/posix/platform/infra_if.hpp | 1 + 4 files changed, 271 insertions(+), 20 deletions(-) create mode 100755 script/check-infra-if-index-changed diff --git a/.github/workflows/posix.yml b/.github/workflows/posix.yml index 80015b30e..6ca5f4b7b 100644 --- a/.github/workflows/posix.yml +++ b/.github/workflows/posix.yml @@ -236,6 +236,29 @@ jobs: path: tmp/coverage.info retention-days: 1 + infra-if-index-changed-linux: + name: infra-if-index-changed-linux + runs-on: ubuntu-22.04 + steps: + - name: Harden Runner + uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1 + with: + egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs + + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + submodules: recursive + - name: Bootstrap + run: | + sudo apt-get update + sudo apt-get --no-install-recommends install -y net-tools ninja-build + - name: Build + run: | + script/check-infra-if-index-changed build + - name: Run + run: | + script/check-infra-if-index-changed check + pty-macos: name: pty-macos OT_DAEMON=${{ matrix.OT_DAEMON }} runs-on: macos-14 diff --git a/script/check-infra-if-index-changed b/script/check-infra-if-index-changed new file mode 100755 index 000000000..d72ff7879 --- /dev/null +++ b/script/check-infra-if-index-changed @@ -0,0 +1,173 @@ +#!/bin/bash +# +# Copyright (c) 2025, The OpenThread Authors. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. Neither the name of the copyright holder nor the +# names of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# + +set -euxo pipefail + +die() +{ + echo " *** ERROR: " "$*" + exit 1 +} + +at_exit() +{ + EXIT_CODE=$? + + sudo killall ot-daemon || true + + sudo ip link del veth-a || true + sudo ip link del veth-b || true + + exit $EXIT_CODE +} + +wait_for() +{ + local command="$1" + local expect="$2" + + local count=15 + while [ ${count} -ne 0 ]; do + count=$((count - 1)) + eval sudo "${OT_CTL_PATH}" "${command}" | grep -q "${expect}" && return 0 + sleep 1 + done + + return 1 +} + +wait_for_onlinkprefix() +{ + local count=30 + + while [ "$count" -gt 0 ]; do + onlinkprefix=$(sudo "${OT_CTL_PATH}" br onlinkprefix favored) + onlinkprefix_addr=${onlinkprefix%%::*} + if [ -n "$onlinkprefix_addr" ]; then + if sudo ip addr show dev veth-a | grep -q "${onlinkprefix_addr}"; then + return 0 + fi + fi + sleep 1 + count=$((count - 1)) + done + + echo "Timed out waiting for onlink prefix on veth-a" + return 1 +} + +setup_veth() +{ + sudo ip link add veth-a type veth peer name veth-b + sudo ip link set veth-a up + sudo ip link set veth-b up + sudo sysctl -w net.ipv6.conf.veth-a.accept_ra=2 + sudo sysctl -w net.ipv6.conf.veth-a.accept_ra_rt_info_max_plen=128 +} + +do_build() +{ + ./script/cmake-build simulation + ./script/cmake-build posix -DOT_BORDER_ROUTING=ON -DOT_PLATFORM_NETIF=1 -DOT_PLATFORM_UDP=1 -DOT_UDP_FORWARD=0 -DOT_POSIX_MAX_POWER_TABLE=1 -DOT_DAEMON=ON +} + +do_check() +{ + trap at_exit INT TERM EXIT + + export OT_CTL_PATH="$PWD/build/posix/src/posix/ot-ctl" + + # Setup virtual ethernet pair + sudo ip link del veth-a || true + sudo ip link del veth-b || true + + setup_veth + + RADIO_NCP_PATH="$PWD/build/simulation/examples/apps/ncp/ot-rcp" + RADIO_URL="spinel+hdlc+forkpty://${RADIO_NCP_PATH}?forkpty-arg=2" + + sudo -E "$PWD/build/posix/src/posix/ot-daemon" -d7 -v -I wpan0 -B veth-a "${RADIO_URL}" & + wait_for "state" "Done" || die "failed to start daemon" + + # verify this reset and factoryreset end immediately + sudo "${OT_CTL_PATH}" reset + wait_for "state" "disabled" || die "daemon did not enter disabled state after reset" + + sudo "${OT_CTL_PATH}" factoryreset + wait_for "state" "disabled" || die "daemon did not enter disabled state after factoryreset" + + # Bring up Thread network + sudo "${OT_CTL_PATH}" dataset init new || die "failed to init new dataset" + sudo "${OT_CTL_PATH}" dataset commit active || die "failed to commit active dataset" + sudo "${OT_CTL_PATH}" ifconfig up || die "failed to bring ifconfig up" + sudo "${OT_CTL_PATH}" thread start || die "failed to start thread" + + wait_for "state" "leader" || die "did not become leader" + wait_for "br state" "running" || die "failed to start br" + + # Simulate infra interface index change + sudo ip link del veth-a + wait_for "br state" "stopped" || die "failed to stop br" + + setup_veth + + # Verify border routing is still functional by checking for a published prefix + # The border router should re-detect the interface and re-establish routing. + # A key indicator of this is the successful publication of the onlink prefix. + wait_for_onlinkprefix + + echo "Test passed: OTBR correctly handled infra interface index change." +} + +main() +{ + if [[ $# == 0 ]]; then + do_build + do_check + return 0 + fi + + while [[ $# != 0 ]]; do + case $1 in + build) + do_build + ;; + check) + do_check + ;; + *) + echo "Unknown action: $1" + return 1 + ;; + esac + shift + done +} + +main "$@" diff --git a/src/posix/platform/infra_if.cpp b/src/posix/platform/infra_if.cpp index 514328f52..abe975824 100644 --- a/src/posix/platform/infra_if.cpp +++ b/src/posix/platform/infra_if.cpp @@ -305,7 +305,7 @@ exit: bool InfraNetif::IsRunning(void) const { return mInfraIfIndex - ? (if_nametoindex(mInfraIfName) == mInfraIfIndex && (GetFlags() & IFF_RUNNING) && HasLinkLocalAddress()) + ? (if_nametoindex(mInfraIfName) == mInfraIfIndex && HasLinkLocalAddress() && (GetFlags() & IFF_RUNNING)) : false; } @@ -572,6 +572,74 @@ exit: #ifdef __linux__ +void InfraNetif::ProcessNetLinkMessage(const struct nlmsghdr *aNetlinkMessage) +{ + switch (aNetlinkMessage->nlmsg_type) + { + case RTM_DELADDR: + case RTM_NEWADDR: + { + const struct ifaddrmsg *ifaddr = reinterpret_cast(NLMSG_DATA(aNetlinkMessage)); + + VerifyOrExit(ifaddr->ifa_index == mInfraIfIndex); + + // Address added/removed on current interface. This might indicate link local address is added/removed. We + // need to check and update its running state. +#if OPENTHREAD_CONFIG_BORDER_ROUTING_ENABLE + SuccessOrDie(otPlatInfraIfStateChanged(gInstance, mInfraIfIndex, IsRunning())); +#endif + break; + } + case RTM_DELLINK: + { + const struct ifinfomsg *ifinfo = reinterpret_cast(NLMSG_DATA(aNetlinkMessage)); + + VerifyOrExit(ifinfo->ifi_index == static_cast(mInfraIfIndex)); + + // The current interface is deleted. We must update its running state to false. +#if OPENTHREAD_CONFIG_BORDER_ROUTING_ENABLE + SuccessOrDie(otPlatInfraIfStateChanged(gInstance, mInfraIfIndex, /* aIsRunning */ false)); +#endif + + mInfraIfIndex = 0; + break; + } + case RTM_NEWLINK: + { + const struct ifinfomsg *ifinfo = reinterpret_cast(NLMSG_DATA(aNetlinkMessage)); + + // The interface is re-created: + // 1. If the interface index stays the same, we simply check and update the running state. + // 2. If the interface is re-created with a different index, we need to re-initialize the Border Routing state + // with the new index. + char ifname[IF_NAMESIZE] = {}; + + VerifyOrExit(if_indextoname(ifinfo->ifi_index, ifname) != nullptr && strcmp(ifname, mInfraIfName) == 0); + + if (ifinfo->ifi_index != static_cast(mInfraIfIndex)) + { + LogInfo("The infra interface index changed from %u to %d", mInfraIfIndex, ifinfo->ifi_index); + mInfraIfIndex = static_cast(ifinfo->ifi_index); +#if OPENTHREAD_CONFIG_BORDER_ROUTING_ENABLE + SuccessOrDie(otBorderRoutingInit(gInstance, mInfraIfIndex, IsRunning())); +#endif + } + else + { +#if OPENTHREAD_CONFIG_BORDER_ROUTING_ENABLE + SuccessOrDie(otPlatInfraIfStateChanged(gInstance, mInfraIfIndex, IsRunning())); +#endif + } + break; + } + default: + break; + } + +exit: + return; +} + void InfraNetif::ReceiveNetLinkMessage(void) { const size_t kMaxNetLinkBufSize = 8192; @@ -592,29 +660,15 @@ void InfraNetif::ReceiveNetLinkMessage(void) for (struct nlmsghdr *header = &msgBuffer.mHeader; NLMSG_OK(header, static_cast(len)); header = NLMSG_NEXT(header, len)) { - switch (header->nlmsg_type) + if (header->nlmsg_type == NLMSG_ERROR) { - // There are no effective netlink message types to get us notified - // of interface RUNNING state changes. But addresses events are - // usually associated with interface state changes. - case RTM_NEWADDR: - case RTM_DELADDR: - case RTM_NEWLINK: - case RTM_DELLINK: -#if OPENTHREAD_CONFIG_BORDER_ROUTING_ENABLE - SuccessOrDie(otPlatInfraIfStateChanged(gInstance, mInfraIfIndex, otSysInfraIfIsRunning())); -#endif - break; - case NLMSG_ERROR: - { - struct nlmsgerr *errMsg = reinterpret_cast(NLMSG_DATA(header)); + const struct nlmsgerr *errMsg = reinterpret_cast(NLMSG_DATA(header)); - OT_UNUSED_VARIABLE(errMsg); LogWarn("netlink NLMSG_ERROR response: seq=%u, error=%d", header->nlmsg_seq, errMsg->error); - break; } - default: - break; + else + { + ProcessNetLinkMessage(header); } } diff --git a/src/posix/platform/infra_if.hpp b/src/posix/platform/infra_if.hpp index e0c58ad5b..02be4cd0a 100644 --- a/src/posix/platform/infra_if.hpp +++ b/src/posix/platform/infra_if.hpp @@ -228,6 +228,7 @@ private: bool HasLinkLocalAddress(void) const; #ifdef __linux__ + void ProcessNetLinkMessage(const struct nlmsghdr *aNetlinkMessage); void ReceiveNetLinkMessage(void); #endif