Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 23 additions & 5 deletions packages/pytcp/pytcp/protocols/tcp/session/tcp__session.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,10 @@ def __init__(
# See 'state/tcp__state__window.py'.
self._win: WindowState = WindowState()
self._win.rcv_mss = self._egress_interface_mtu() - self._ip_tcp_overhead
# Linux 'net.ipv4.tcp_rmem' (max) — operator ceiling on the receive
# window this session advertises. Seeded once at creation; the default
# preserves the historical 65535-byte cap.
self._win.rcv_wnd_max = tcp__constants.TCP__RCV_WND_MAX

# RFC 4821 / RFC 8899 per-session PLPMTUD adapter.
# Wraps a PmtuSearch engine bound to the remote
Expand Down Expand Up @@ -631,18 +635,32 @@ def _mss_ceiling(self) -> int:
or below the link ceiling so probes have somewhere to
climb to.

Finally the 'tcp.snd_mss_max' operator cap (0 = uncapped)
is applied last so it bounds the send-side MSS regardless of
probing state, WITHOUT touching 'rcv_mss' — the advertised
receive MSS stays at the interface ceiling so a large MTU can
still invite large inbound segments while output stays small.

Reference: RFC 4821 §3 (Probing without ICMP).
Reference: Linux 'tcp_mtu_probing=2' MSS-ceiling semantics.
"""

iface_ceiling = self._egress_interface_mtu() - self._ip_tcp_overhead
if not self._plpmtud_probing_enabled:
return iface_ceiling
base_mss: int = sysctl_iface.get_for_iface(
"tcp.base_mss",
if self._plpmtud_probing_enabled:
base_mss: int = sysctl_iface.get_for_iface(
"tcp.base_mss",
self._egress_interface_name(),
)
ceiling = min(base_mss - self._ip_tcp_overhead, iface_ceiling)
else:
ceiling = iface_ceiling
snd_mss_max: int = sysctl_iface.get_for_iface(
"tcp.snd_mss_max",
self._egress_interface_name(),
)
return min(base_mss - self._ip_tcp_overhead, iface_ceiling)
if snd_mss_max:
ceiling = min(ceiling, snd_mss_max)
return ceiling

def _arm_timer(self, name: str, delay_ms: int, /) -> None:
"""
Expand Down
67 changes: 67 additions & 0 deletions packages/pytcp/pytcp/protocols/tcp/tcp__constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,15 @@
# arithmetic-friendly.
TCP__TS_RECENT__OUTDATED_THRESHOLD_MS = 24 * 86_400 * 1_000

# Linux 'net.ipv4.tcp_rmem' (max slot) — the ceiling on the receive window a
# session will advertise. A bulk inbound transfer is bound by window / RTT, so
# the historical 65535-byte default throttles high bandwidth-delay-product paths
# (fast links, tunnels) far below the link rate; raising it lets the peer keep a
# full BDP in flight. PyTCP negotiates RFC 7323 window scaling (WSCALE 7), so the
# advertised value can represent well beyond 64 KiB. Kept at 65535 by default to
# preserve historical behaviour; operators raise it per the deployment's BDP.
TCP__RCV_WND_MAX = 65535

# Per-interface conf-plane policy storage. 'dict[str, int]' keyed by
# interface name with a mandatory '"default"' template slot — the
# operator addresses a specific interface ('tcp.<ifname>.<field>') or
Expand Down Expand Up @@ -151,6 +160,18 @@
# block.
TCP__MTU_PROBING: dict[str, int] = {"default": 0}

# Ceiling on the send-side MSS — the largest segment this stack will EMIT —
# applied independently of the receive MSS advertised to the peer. 0 (default)
# means uncapped: 'snd_mss' rises to 'interface_mtu - overhead' as today. A
# non-zero value caps the segments we send WITHOUT lowering the MSS option we
# advertise, so a large interface MTU can still invite large inbound segments
# (fast download) while host->peer output stays small. The motivating case is an
# overlay/tunnel whose host->peer path MTU is smaller than the local interface
# MTU; classical PMTUD cannot discover that when the small hop is past a relay
# that does not emit ICMP PTB. Per-interface like 'tcp.base_mss'; floor 88
# (Linux 'TCP_MIN_MSS') matches the base-MSS knob, with 0 reserved for "off".
TCP__SND_MSS_MAX: dict[str, int] = {"default": 0}


# Sysctl registration. Every constant above is a policy knob,
# operator-tunable at boot via 'stack.init(sysctls={"tcp....": ...})'
Expand Down Expand Up @@ -208,6 +229,26 @@ def validator(value: Any) -> None:
return validator


def _is_zero_or_int_at_least(name: str, *, low: int) -> Any:
"""
Build a validator that accepts 0 (a documented "disabled" sentinel)
or any integer ≥ 'low' — used for opt-in cap knobs whose floor matches
a hard limit (e.g. 'tcp.snd_mss_max' off-or-≥-TCP_MIN_MSS).
"""

def validator(value: Any) -> None:
"""
Raise 'ValueError' unless 'value' is 0 or an int ≥ low.
"""

if isinstance(value, bool) or not isinstance(value, int) or (value != 0 and value < low):
raise ValueError(
f"sysctl {name!r} must be 0 (disabled) or an int ≥ {low}; got {value!r}",
)

return validator


register(
key="tcp.rto.initial_ms",
module_name=__name__,
Expand Down Expand Up @@ -288,6 +329,18 @@ def validator(value: Any) -> None:
validator=is_positive_int("tcp.ts_recent.outdated_threshold_ms"),
description="RFC 7323 §5.5 outdated-timestamps threshold in milliseconds (~24 days).",
)
register(
key="tcp.rcv_wnd_max",
module_name=__name__,
attr="TCP__RCV_WND_MAX",
default=TCP__RCV_WND_MAX,
validator=is_positive_int("tcp.rcv_wnd_max"),
description=(
"Linux 'net.ipv4.tcp_rmem' (max) — ceiling on the advertised "
"receive window (default 65535). Raise for high bandwidth-delay-"
"product paths; WSCALE lets it exceed 64 KiB on the wire."
),
)
register(
key="tcp.base_mss",
module_name=__name__,
Expand Down Expand Up @@ -338,6 +391,20 @@ def _tcp_mtu_probing_validator(value: object) -> None:
),
interface_scope=True,
)
register(
key="tcp.snd_mss_max",
module_name=__name__,
attr="TCP__SND_MSS_MAX",
default=TCP__SND_MSS_MAX["default"],
validator=_is_zero_or_int_at_least("tcp.snd_mss_max", low=88),
description=(
"Ceiling on the send-side MSS (largest segment emitted), applied "
"independently of the advertised receive MSS. 0=uncapped (default); "
"a non-zero value bounds host->peer output for tunnels whose path "
"MTU is below the interface MTU (floor 88 = Linux TCP_MIN_MSS)."
),
interface_scope=True,
)


def _finalize__persist_max_ge_rto_initial() -> None:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
################################################################################
## ##
## PyTCP - Python TCP/IP stack ##
## Copyright (C) 2020-present Sebastian Majewski ##
## ##
## This program is free software: you can redistribute it and/or modify ##
## it under the terms of the GNU General Public License as published by ##
## the Free Software Foundation, either version 3 of the License, or ##
## (at your option) any later version. ##
## ##
## This program is distributed in the hope that it will be useful, ##
## but WITHOUT ANY WARRANTY; without even the implied warranty of ##
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ##
## GNU General Public License for more details. ##
## ##
## You should have received a copy of the GNU General Public License ##
## along with this program. If not, see <https://www.gnu.org/licenses/>. ##
## ##
## Author's email: ccie18643@gmail.com ##
## Github repository: https://github.com/ccie18643/PyTCP ##
## ##
################################################################################


"""
This module contains the session-level behaviour pins for the two TCP
throughput sysctls: 'tcp.rcv_wnd_max' (the advertised receive-window
ceiling, seeded into 'WindowState.rcv_wnd_max' at session creation) and
'tcp.snd_mss_max' (the send-side MSS cap applied in '_mss_ceiling()'
independently of the advertised receive MSS).

The registration / validator / override-round-trip pins live in
'test__tcp__sysctls.py'; this file pins that a live session actually
honours the knobs.

pytcp/tests/integration/protocols/tcp/test__tcp__session__throughput_knobs.py

ver 3.0.7
"""

from typing import override

from net_addr import Ip4Address
from pytcp import stack
from pytcp.protocols.tcp.session import TcpSession
from pytcp.socket import AddressFamily
from pytcp.socket.tcp__socket import TcpSocket
from pytcp.stack import sysctl as sysctl_module
from pytcp.tests.lib.network_testcase import (
HOST_A__IP4_ADDRESS,
STACK__IP4_HOST,
)
from pytcp.tests.lib.tcp_testcase import TcpTestCase

STACK__IP: Ip4Address = STACK__IP4_HOST.address
STACK__PORT: int = 12345
PEER__IP: Ip4Address = HOST_A__IP4_ADDRESS
PEER__PORT: int = 80
LOCAL__ISS: int = 0x0000_1000


class _ThroughputKnobFixture(TcpTestCase):
"""
Shared fixture — resets every sysctl slot on teardown so a
knob write in one test does not leak into the next.
"""

@override
def tearDown(self) -> None:
"""
Restore the registered sysctl defaults after each test.
"""

sysctl_module.reset_to_defaults()
super().tearDown()

def _make_session(self) -> TcpSession:
"""
Build an unstarted IPv4 session against PEER, useful for
pinning '__init__'-time window state and '_mss_ceiling()'
without a handshake.
"""

self._force_iss(LOCAL__ISS)
sock = TcpSocket(family=AddressFamily.INET4)
sock._local_ip_address = STACK__IP
sock._local_port = STACK__PORT
sock._remote_ip_address = PEER__IP
sock._remote_port = PEER__PORT
session = TcpSession(
local_ip_address=STACK__IP,
local_port=STACK__PORT,
remote_ip_address=PEER__IP,
remote_port=PEER__PORT,
socket=sock,
)
sock._tcp_session = session
stack.sockets[sock.socket_id] = sock
return session


class TestTcpRcvWndMax(_ThroughputKnobFixture):
"""
The 'tcp.rcv_wnd_max' session-seeding behaviour tests.
"""

def test__tcp__rcv_wnd_max__default_seeds_65535(self) -> None:
"""
Ensure a fresh session seeds 'WindowState.rcv_wnd_max' from
the registered default, preserving the historical 65535-byte
advertised-window ceiling.

Reference: Linux net.ipv4.tcp_rmem (receive-window max).
"""

session = self._make_session()
self.assertEqual(
session._win.rcv_wnd_max,
65535,
msg="Default 'tcp.rcv_wnd_max' must seed the session window ceiling at 65535.",
)

def test__tcp__rcv_wnd_max__override_seeds_session(self) -> None:
"""
Ensure raising 'tcp.rcv_wnd_max' is picked up by a session
created afterwards — the per-session ceiling reflects the
live sysctl value, letting a high-BDP path keep a full
window in flight.

Reference: Linux net.ipv4.tcp_rmem (receive-window max).
"""

sysctl_module.set("tcp.rcv_wnd_max", 4 * 1024 * 1024)
session = self._make_session()
self.assertEqual(
session._win.rcv_wnd_max,
4 * 1024 * 1024,
msg="A session must seed 'rcv_wnd_max' from the live 'tcp.rcv_wnd_max' value.",
)


class TestTcpSndMssMax(_ThroughputKnobFixture):
"""
The 'tcp.snd_mss_max' '_mss_ceiling()' cap behaviour tests.
"""

def test__tcp__snd_mss_max__default_uncapped(self) -> None:
"""
Ensure with 'tcp.snd_mss_max=0' (default) the send-side MSS
ceiling is the interface ceiling ('interface_mtu - overhead'),
i.e. the cap is inert.

Reference: PyTCP test infrastructure (no RFC clause).
"""

session = self._make_session()
expected = session._egress_interface_mtu() - session._ip_tcp_overhead
self.assertEqual(
session._mss_ceiling(),
expected,
msg="With the cap disabled, '_mss_ceiling()' must equal 'interface_mtu - overhead'.",
)

def test__tcp__snd_mss_max__caps_send_ceiling(self) -> None:
"""
Ensure a non-zero 'tcp.snd_mss_max' clamps '_mss_ceiling()'
to the configured value while leaving the advertised receive
MSS ('rcv_mss') at the interface ceiling — so a large MTU can
still invite large inbound segments while output stays small.

Reference: PyTCP test infrastructure (no RFC clause).
"""

sysctl_module.set("tcp.default.snd_mss_max", 576)
session = self._make_session()
iface_ceiling = session._egress_interface_mtu() - session._ip_tcp_overhead

self.assertEqual(
session._mss_ceiling(),
576,
msg="A non-zero 'tcp.snd_mss_max' must cap the send-side MSS ceiling.",
)
self.assertEqual(
session._win.rcv_mss,
iface_ceiling,
msg="'tcp.snd_mss_max' must NOT lower the advertised receive MSS.",
)

def test__tcp__snd_mss_max__cap_above_interface_is_inert(self) -> None:
"""
Ensure a 'tcp.snd_mss_max' larger than the interface ceiling
leaves '_mss_ceiling()' at the interface ceiling — the cap
only ever lowers, never raises, the send MSS.

Reference: PyTCP test infrastructure (no RFC clause).
"""

session_default = self._make_session()
iface_ceiling = session_default._egress_interface_mtu() - session_default._ip_tcp_overhead

sysctl_module.set("tcp.default.snd_mss_max", iface_ceiling + 1000)
session = self._make_session()
self.assertEqual(
session._mss_ceiling(),
iface_ceiling,
msg="A cap above the interface ceiling must be inert.",
)
Loading