feat(releases): Cache calls to compare-commits #112494
Conversation
- Added a new org feature flag in `src/sentry/features/temporary.py`: - `organizations:integrations-github-fetch-commits-compare-cache` - Refactored commit fetching in `src/sentry/tasks/commits.py`: - Added `fetch_compare_commits(...)` helper to centralize compare-commits behavior. - Added cache key generation with `get_github_compare_commits_cache_key(...)`. - Added cache-backed reuse of GitHub compare-commits results (TTL: 120s), gated by the new feature flag. - Limited caching to GitHub providers (`integrations:github`, `integrations:github_enterprise`) and only when `start_sha` is present. - Added lifecycle extras for cache telemetry (`compare_commits_cache_enabled`, `compare_commits_cache_hit`). - `fetch_commits(...)` now evaluates the feature flag once and routes compare calls through the helper. - Added provider-level test coverage in `tests/sentry/integrations/github/test_repository.py`: - New test verifies repeated compare calls reuse cached patchset data and reduce API calls. - Added task-level cache behavior tests in `tests/sentry/tasks/test_commits.py`: - Cache disabled: compare called on each fetch. - Cache enabled: compare result reused across releases with the same compare range. - Cache key variance: different `end_sha` values do not share cache entries. - Included a follow-up typing fix in `src/sentry/tasks/commits.py`: - Narrowed `repo.provider` to `str` before cache-key creation to satisfy mypy (`str | None` -> `str`).
88b35be to
6b1a01d
Compare
There was a problem hiding this comment.
Cursor Bugbot has reviewed your changes and found 1 potential issue.
Autofix Details
Bugbot Autofix prepared a fix for the issue found in the latest run.
- ✅ Fixed: Provider parameter overwritten with string, breaking compare_commits calls
- Renamed repo.provider assignment to repo_provider_name to preserve the provider object parameter for compare_commits calls.
Or push these changes by commenting:
@cursor push 24519ed48a
Preview (24519ed48a)
diff --git a/src/sentry/tasks/commits.py b/src/sentry/tasks/commits.py
--- a/src/sentry/tasks/commits.py
+++ b/src/sentry/tasks/commits.py
@@ -95,15 +95,15 @@
lifecycle,
):
cache_key = None
- provider = repo.provider
+ repo_provider_name = repo.provider
if (
cache_enabled
- and isinstance(provider, str)
- and provider in GITHUB_CACHEABLE_REPOSITORY_PROVIDERS
+ and isinstance(repo_provider_name, str)
+ and repo_provider_name in GITHUB_CACHEABLE_REPOSITORY_PROVIDERS
and start_sha is not None
):
cache_key = get_github_compare_commits_cache_key(
- repo.organization_id, repo.id, provider, start_sha, end_sha
+ repo.organization_id, repo.id, repo_provider_name, start_sha, end_sha
)
if cache_key is not None:You can send follow-ups to the cloud agent here.
This comment was marked as outdated.
This comment was marked as outdated.
There was a problem hiding this comment.
Cursor Bugbot has reviewed your changes and found 1 potential issue.
Bugbot Autofix prepared a fix for the issue found in the latest run.
- ✅ Fixed: Tests pass wrong kwarg, silently breaking cache validation
- Changed all 13 occurrences of 'previous_release_id=' to 'prev_release_id=' to match the actual function parameter name, enabling proper cache validation testing.
Or push these changes by commenting:
@cursor push c275806d00
Preview (c275806d00)
diff --git a/tests/sentry/tasks/test_commits.py b/tests/sentry/tasks/test_commits.py
--- a/tests/sentry/tasks/test_commits.py
+++ b/tests/sentry/tasks/test_commits.py
@@ -52,7 +52,7 @@
release_id=release2.id,
user_id=user.id,
refs=refs,
- previous_release_id=release.id,
+ prev_release_id=release.id,
)
commit_list = list(
@@ -129,13 +129,13 @@
release_id=first_release.id,
user_id=self.user.id,
refs=refs,
- previous_release_id=previous_release.id,
+ prev_release_id=previous_release.id,
)
fetch_commits(
release_id=second_release.id,
user_id=self.user.id,
refs=refs,
- previous_release_id=previous_release.id,
+ prev_release_id=previous_release.id,
)
assert mock_compare_commits.call_count == 2
@@ -178,13 +178,13 @@
release_id=first_release.id,
user_id=self.user.id,
refs=refs,
- previous_release_id=previous_release.id,
+ prev_release_id=previous_release.id,
)
fetch_commits(
release_id=second_release.id,
user_id=self.user.id,
refs=refs,
- previous_release_id=previous_release.id,
+ prev_release_id=previous_release.id,
)
assert mock_compare_commits.call_count == 1
@@ -231,13 +231,13 @@
release_id=first_release.id,
user_id=self.user.id,
refs=refs_first,
- previous_release_id=previous_release.id,
+ prev_release_id=previous_release.id,
)
fetch_commits(
release_id=second_release.id,
user_id=self.user.id,
refs=refs_second,
- previous_release_id=previous_release.id,
+ prev_release_id=previous_release.id,
)
assert mock_compare_commits.call_count == 2
@@ -264,7 +264,7 @@
release_id=new_release.id,
user_id=self.user.id,
refs=refs,
- previous_release_id=old_release.id,
+ prev_release_id=old_release.id,
)
count_query = ReleaseHeadCommit.objects.filter(release=new_release)
# No release commits should be made as the task should return early.
@@ -297,7 +297,7 @@
mock_compare_commits.side_effect = InvalidIdentity(identity=usa)
fetch_commits(
- release_id=release2.id, user_id=self.user.id, refs=refs, previous_release_id=release.id
+ release_id=release2.id, user_id=self.user.id, refs=refs, prev_release_id=release.id
)
mock_handle_invalid_identity.assert_called_once_with(identity=usa, commit_failure=True)
@@ -334,7 +334,7 @@
release_id=release2.id,
user_id=self.user.id,
refs=refs,
- previous_release_id=release.id,
+ prev_release_id=release.id,
)
msg = mail.outbox[-1]
@@ -375,7 +375,7 @@
release_id=release2.id,
user_id=sentry_app.proxy_user_id,
refs=refs,
- previous_release_id=release.id,
+ prev_release_id=release.id,
)
msg = mail.outbox[-1]
@@ -415,7 +415,7 @@
release_id=release2.id,
user_id=self.user.id,
refs=refs,
- previous_release_id=release.id,
+ prev_release_id=release.id,
)
msg = mail.outbox[-1]
@@ -456,7 +456,7 @@
release_id=release2.id,
user_id=self.user.id,
refs=refs,
- previous_release_id=release.id,
+ prev_release_id=release.id,
)
msg = mail.outbox[-1]You can send follow-ups to the cloud agent here.
Reviewed by Cursor Bugbot for commit 6bab863. Configure here.
|
@sentry review |
Backend Test FailuresFailures on
|
|
@sentry review |
| @@ -1,12 +1,15 @@ | |||
| from __future__ import annotations | |||
|
|
|||
| import logging | |||
| from collections.abc import Mapping, Sequence | |||
There was a problem hiding this comment.
I'm adding stronger typing for this PR to be a bit more certain of the changes.
| user: RpcUser | None, | ||
| lifecycle: Any, | ||
| ) -> list[dict[str, Any]]: | ||
| if cache_enabled: |
There was a problem hiding this comment.
This if/else block is the new caching mechanism.
| return f"fetch-commits:compare-commits:v1:{digest}" | ||
|
|
||
|
|
||
| def fetch_compare_commits( |
There was a problem hiding this comment.
As part of this PR, I'm extracting some of the code in the main for/loop into a couple of new functions (to make the final code easier to read):
- fetch_compare_commits
- get_repo_and_provider_for_ref
sentry/src/sentry/tasks/commits.py
Line 100 in 82814b2
| else: | ||
| lifecycle.add_extra("compare_commits_cache_enabled", False) | ||
|
|
||
| if is_integration_repo_provider: |
There was a problem hiding this comment.
This if/else comes from the main loop:
sentry/src/sentry/tasks/commits.py
Lines 174 to 177 in 82814b2
| ref: Mapping[str, str], | ||
| user_id: int, | ||
| ) -> tuple[Repository, Any, bool, str] | None: | ||
| repo = ( |
There was a problem hiding this comment.
The code here comes from the main loop (no logic is changed):
sentry/src/sentry/tasks/commits.py
Lines 101 to 130 in 82814b2
sentry/src/sentry/tasks/commits.py
Lines 149 to 155 in 82814b2
| continue | ||
| repo, provider, is_integration_repo_provider, provider_key = resolved |
There was a problem hiding this comment.
All the removed lines above are replaced with these four lines.
| provider_cls.repo_provider | ||
| if is_integration_repo_provider | ||
| else provider_cls.auth_provider | ||
| ) |
There was a problem hiding this comment.
Moved within get_repo_and_provider_for_ref.
| compare_commits_cache_enabled = ( | ||
| github_compare_commits_cache_feature_enabled | ||
| and isinstance(provider_name, str) | ||
| and provider_name in GITHUB_CACHEABLE_REPOSITORY_PROVIDERS |
There was a problem hiding this comment.
I don't want to mess with all the other integrations.
| prev_release_id=previous_release.id, | ||
| ) | ||
|
|
||
| assert mock_compare_commits.call_count == 2 |
There was a problem hiding this comment.
Without the feature we call it twice.
| {"id": end_sha, "repository": repo_name}, | ||
| ] | ||
|
|
||
| def _setup_github_compare_commits_cache_context(self): |
There was a problem hiding this comment.
This helper function will simplify a lot of the tests in this module.
| @@ -44,7 +87,7 @@ def _test_simple_action(self, user, org): | |||
| release_id=release2.id, | |||
| user_id=user.id, | |||
| refs=refs, | |||
| previous_release_id=release.id, | |||
| prev_release_id=release.id, | |||
There was a problem hiding this comment.
This bug can be found in many of the tests in this file. The task does not have previous_release_id but prev_release_id:
sentry/src/sentry/tasks/commits.py
Line 83 in fb92eb8
| prev_release_id=previous_release.id, | ||
| ) | ||
|
|
||
| assert mock_compare_commits.call_count == 1 |
There was a problem hiding this comment.
When the feature flag is enabled, we only make one call.
| assert mock_compare_commits.call_count == 1 | ||
|
|
||
| @patch("sentry.integrations.github.repository.GitHubRepositoryProvider.compare_commits") | ||
| def test_github_compare_commits_cache_key_variance_on_end_sha( |
There was a problem hiding this comment.
This test verifies that cache entries are keyed by commit range, not just repo/org.
| fetch_commits( | ||
| release_id=first_release.id, | ||
| user_id=self.user.id, | ||
| refs=refs_first, |
| fetch_commits( | ||
| release_id=second_release.id, | ||
| user_id=self.user.id, | ||
| refs=refs_second, |
| prev_release_id=previous_release.id, | ||
| ) | ||
|
|
||
| assert mock_compare_commits.call_count == 2 |
There was a problem hiding this comment.
They each will make a call.
| @@ -80,9 +192,14 @@ def handle_invalid_identity(identity, commit_failure=False): | |||
| silo_mode=SiloMode.CELL, | |||
| ) | |||
| @retry(exclude=(Release.DoesNotExist, User.DoesNotExist)) | |||
| def fetch_commits(release_id: int, user_id: int, refs, prev_release_id=None, **kwargs): | |||
| # TODO(dcramer): this function could use some cleanup/refactoring as it's a bit unwieldy | |||
|
|
||
| lifecycle.add_extra("compare_commits_cache_hit", False) | ||
| else: | ||
| lifecycle.add_extra("compare_commits_cache_enabled", False) |
There was a problem hiding this comment.
It's something the integrations team added to add more context to what events happen in the lifecycle of an event.
| end_sha: str, | ||
| ) -> str: | ||
| digest = hash_values( | ||
| [organization_id, repository_id, provider or "", start_sha or "", end_sha], |
There was a problem hiding this comment.
is it actually possible that provider is null or is this mostly a typing issue?
There was a problem hiding this comment.
A typing thing. It should be tighter higher up but I did not want to diverge this PR. A lot of the integrations code has lose typing.
A customer has a large number of GitHub API rate limit emails coming in and the highest referrer is compare-commits. This branch aims to temporarily cache and protect from many incoming calls to compare-commits and see if it solves the problem.


A customer has a large number of GitHub API rate limit emails coming in and the highest referrer is compare-commits.
This branch aims to temporarily cache and protect from many incoming calls to compare-commits and see if it solves the problem.