Skip to content

Commit 4c4c869

Browse files
authored
[Core] Add jitter to token refresh intervals (#43720)
Reduce thundering herds of refresh requests by adding some jitter. Signed-off-by: Paul Van Eck <paulvaneck@microsoft.com>
1 parent aa65d3c commit 4c4c869

5 files changed

Lines changed: 256 additions & 25 deletions

File tree

sdk/core/azure-core/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212

1313
### Other Changes
1414

15+
- Added jitter to token refresh timing in `BearerTokenCredentialPolicy` and `AsyncBearerTokenCredentialPolicy` to prevent simultaneous token refresh attempts across multiple processes. This helps mitigate the thundering herd problem during token refresh operations. #43720
16+
1517
## 1.38.2 (2026-02-18)
1618

1719
### Bugs Fixed

sdk/core/azure-core/azure/core/pipeline/policies/_authentication.py

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
# -------------------------------------------------------------------------
66
import time
77
import base64
8+
import random
89
from typing import TYPE_CHECKING, Optional, TypeVar, MutableMapping, Any, Union, cast
910

1011
from azure.core.credentials import (
@@ -36,6 +37,38 @@
3637
HTTPResponseType = TypeVar("HTTPResponseType", HttpResponse, LegacyHttpResponse)
3738
HTTPRequestType = TypeVar("HTTPRequestType", HttpRequest, LegacyHttpRequest)
3839

40+
DEFAULT_REFRESH_WINDOW_SECONDS = 300 # 5 minutes
41+
MAX_REFRESH_JITTER_SECONDS = 60 # 1 minute
42+
43+
44+
def _should_refresh_token(token: Optional[Union["AccessToken", "AccessTokenInfo"]], refresh_jitter: int) -> bool:
45+
"""Check if a new token is needed based on expiry and refresh logic.
46+
47+
:param token: The current token or None if no token exists
48+
:type token: Optional[Union[~azure.core.credentials.AccessToken, ~azure.core.credentials.AccessTokenInfo]]
49+
:param int refresh_jitter: The jitter to apply to refresh timing
50+
:return: True if a new token is needed, False otherwise
51+
:rtype: bool
52+
"""
53+
if not token:
54+
return True
55+
56+
now = time.time()
57+
if token.expires_on <= now:
58+
return True
59+
60+
refresh_on = getattr(token, "refresh_on", None)
61+
62+
if refresh_on:
63+
# Apply jitter, but ensure that adding it doesn't push the refresh time past the actual expiration.
64+
# This is a safeguard, as refresh_on is typically well before expires_on.
65+
effective_refresh_time = min(refresh_on + refresh_jitter, token.expires_on)
66+
return effective_refresh_time <= now
67+
68+
time_until_expiry = token.expires_on - now
69+
# Reduce refresh window by jitter to delay refresh and distribute load
70+
return time_until_expiry < (DEFAULT_REFRESH_WINDOW_SECONDS - refresh_jitter)
71+
3972

4073
# pylint:disable=too-few-public-methods
4174
class _BearerTokenCredentialPolicyBase:
@@ -54,6 +87,7 @@ def __init__(self, credential: TokenProvider, *scopes: str, **kwargs: Any) -> No
5487
self._credential = credential
5588
self._token: Optional[Union["AccessToken", "AccessTokenInfo"]] = None
5689
self._enable_cae: bool = kwargs.get("enable_cae", False)
90+
self._refresh_jitter = 0
5791

5892
@staticmethod
5993
def _enforce_https(request: PipelineRequest[HTTPRequestType]) -> None:
@@ -82,9 +116,7 @@ def _update_headers(headers: MutableMapping[str, str], token: str) -> None:
82116

83117
@property
84118
def _need_new_token(self) -> bool:
85-
now = time.time()
86-
refresh_on = getattr(self._token, "refresh_on", None)
87-
return not self._token or (refresh_on and refresh_on <= now) or self._token.expires_on - now < 300
119+
return _should_refresh_token(self._token, self._refresh_jitter)
88120

89121
def _get_token(self, *scopes: str, **kwargs: Any) -> Union["AccessToken", "AccessTokenInfo"]:
90122
if self._enable_cae:
@@ -108,6 +140,7 @@ def _request_token(self, *scopes: str, **kwargs: Any) -> None:
108140
:param str scopes: The type of access needed.
109141
"""
110142
self._token = self._get_token(*scopes, **kwargs)
143+
self._refresh_jitter = random.randint(0, MAX_REFRESH_JITTER_SECONDS)
111144

112145

113146
class BearerTokenCredentialPolicy(_BearerTokenCredentialPolicyBase, HTTPPolicy[HTTPRequestType, HTTPResponseType]):

sdk/core/azure-core/azure/core/pipeline/policies/_authentication_async.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Licensed under the MIT License. See LICENSE.txt in the project root for
44
# license information.
55
# -------------------------------------------------------------------------
6-
import time
6+
import random
77
import base64
88
from typing import Any, Awaitable, Optional, cast, TypeVar, Union
99

@@ -18,6 +18,8 @@
1818
from azure.core.pipeline.policies import AsyncHTTPPolicy
1919
from azure.core.pipeline.policies._authentication import (
2020
_BearerTokenCredentialPolicyBase,
21+
_should_refresh_token,
22+
MAX_REFRESH_JITTER_SECONDS,
2123
)
2224
from azure.core.pipeline.transport import (
2325
AsyncHttpResponse as LegacyAsyncHttpResponse,
@@ -50,6 +52,7 @@ def __init__(self, credential: AsyncTokenProvider, *scopes: str, **kwargs: Any)
5052
self._lock_instance = None
5153
self._token: Optional[Union["AccessToken", "AccessTokenInfo"]] = None
5254
self._enable_cae: bool = kwargs.get("enable_cae", False)
55+
self._refresh_jitter = 0
5356

5457
@property
5558
def _lock(self):
@@ -192,9 +195,7 @@ def on_exception(self, request: PipelineRequest[HTTPRequestType]) -> None:
192195
return
193196

194197
def _need_new_token(self) -> bool:
195-
now = time.time()
196-
refresh_on = getattr(self._token, "refresh_on", None)
197-
return not self._token or (refresh_on and refresh_on <= now) or self._token.expires_on - now < 300
198+
return _should_refresh_token(self._token, self._refresh_jitter)
198199

199200
async def _get_token(self, *scopes: str, **kwargs: Any) -> Union["AccessToken", "AccessTokenInfo"]:
200201
if self._enable_cae:
@@ -226,3 +227,4 @@ async def _request_token(self, *scopes: str, **kwargs: Any) -> None:
226227
:param str scopes: The type of access needed.
227228
"""
228229
self._token = await self._get_token(*scopes, **kwargs)
230+
self._refresh_jitter = random.randint(0, MAX_REFRESH_JITTER_SECONDS)

sdk/core/azure-core/tests/async_tests/test_authentication_async.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
AsyncRedirectPolicy,
2121
SensitiveHeaderCleanupPolicy,
2222
)
23+
from azure.core.pipeline.policies._authentication import MAX_REFRESH_JITTER_SECONDS
2324
from azure.core.pipeline.transport import AsyncHttpTransport, HttpRequest
2425
import pytest
2526
import trio
@@ -244,7 +245,9 @@ async def test_bearer_policy_access_token_info_caching(http_request):
244245
await pipeline.run(http_request("GET", "https://spam.eggs"))
245246
assert credential.get_token_info.call_count == 2 # token is expired -> policy should call get_token_info again
246247

247-
refreshable_token = AccessTokenInfo("token", int(time.time() + 3600), refresh_on=int(time.time() - 1))
248+
refreshable_token = AccessTokenInfo(
249+
"token", int(time.time() + 3600), refresh_on=int(time.time() - (MAX_REFRESH_JITTER_SECONDS + 5))
250+
)
248251
credential.get_token_info.reset_mock()
249252
credential.get_token_info.return_value = refreshable_token
250253
pipeline = AsyncPipeline(transport=AsyncMock(), policies=[AsyncBearerTokenCredentialPolicy(credential, "scope")])
@@ -735,3 +738,33 @@ async def mock_transport_send(request, **kwargs):
735738
# Verify the exception chaining
736739
assert exc_info.value.__cause__ is not None
737740
assert isinstance(exc_info.value.__cause__, HttpResponseError)
741+
742+
743+
@pytest.mark.asyncio
744+
async def test_jitter_set_on_token_request_async():
745+
"""Test that _refresh_jitter is set when _request_token is called on the async policy."""
746+
token = AccessToken("test_token", int(time.time()) + 3600)
747+
748+
credential = AsyncMock(spec_set=["get_token"])
749+
credential.get_token.return_value = token
750+
policy = AsyncBearerTokenCredentialPolicy(credential, "scope")
751+
752+
# Initially jitter should be 0
753+
assert policy._refresh_jitter == 0
754+
755+
with patch("azure.core.pipeline.policies._authentication_async.random.randint") as mock_randint:
756+
mock_randint.return_value = 42
757+
758+
await policy._request_token("scope")
759+
760+
assert policy._refresh_jitter == 42
761+
mock_randint.assert_called_once_with(0, MAX_REFRESH_JITTER_SECONDS)
762+
763+
# Test that jitter is updated on subsequent token requests
764+
with patch("azure.core.pipeline.policies._authentication_async.random.randint") as mock_randint:
765+
mock_randint.return_value = 25
766+
767+
await policy._request_token("scope")
768+
769+
assert policy._refresh_jitter == 25
770+
mock_randint.assert_called_once_with(0, MAX_REFRESH_JITTER_SECONDS)

0 commit comments

Comments
 (0)