diff --git a/pyproject.toml b/pyproject.toml index cf052bb24e5359..49aff6b2f4e498 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -677,6 +677,7 @@ module = [ "sentry.tasks.beacon", "sentry.tasks.codeowners.*", "sentry.tasks.commit_context", + "sentry.tasks.commits", "sentry.tasks.on_demand_metrics", "sentry.tasks.reprocessing2", "sentry.tasks.seer.delete_seer_grouping_records", diff --git a/src/sentry/features/temporary.py b/src/sentry/features/temporary.py index 1122038668af8e..c0b0c8a52856de 100644 --- a/src/sentry/features/temporary.py +++ b/src/sentry/features/temporary.py @@ -133,6 +133,7 @@ def register_temporary_features(manager: FeatureManager) -> None: manager.add("organizations:integrations-claude-code", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) manager.add("organizations:integrations-cursor", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) manager.add("organizations:integrations-github-copilot-agent", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) + manager.add("organizations:integrations-github-fetch-commits-compare-cache", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False) manager.add("organizations:integrations-github-platform-detection", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) manager.add("organizations:integrations-perforce", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) # Project Management Integrations Feature Parity Flags diff --git a/src/sentry/tasks/commits.py b/src/sentry/tasks/commits.py index a1056a65f2420b..bb68885efa4d89 100644 --- a/src/sentry/tasks/commits.py +++ b/src/sentry/tasks/commits.py @@ -1,12 +1,15 @@ from __future__ import annotations import logging +from collections.abc import Mapping, Sequence +from typing import Any import sentry_sdk from django.urls import reverse from sentry_sdk import set_tag from taskbroker_client.retry import Retry +from sentry import features from sentry.constants import ObjectStatus from sentry.exceptions import InvalidIdentity, PluginError from sentry.integrations.source_code_management.metrics import ( @@ -28,13 +31,23 @@ from sentry.users.models.user import User from sentry.users.services.user import RpcUser from sentry.users.services.user.service import user_service +from sentry.utils.cache import cache from sentry.utils.email import MessageBuilder +from sentry.utils.hashlib import hash_values from sentry.utils.http import absolute_uri logger = logging.getLogger(__name__) +GITHUB_FETCH_COMMITS_COMPARE_CACHE_FEATURE = ( + "organizations:integrations-github-fetch-commits-compare-cache" +) +GITHUB_FETCH_COMMITS_COMPARE_CACHE_TTL_SECONDS = 120 +GITHUB_CACHEABLE_REPOSITORY_PROVIDERS = frozenset( + ("integrations:github", "integrations:github_enterprise") +) + -def generate_invalid_identity_email(identity, commit_failure=False): +def generate_invalid_identity_email(identity: Any, commit_failure: bool = False) -> MessageBuilder: new_context = { "identity": identity, "auth_url": absolute_uri(reverse("socialauth_associate", args=[identity.provider])), @@ -49,7 +62,9 @@ def generate_invalid_identity_email(identity, commit_failure=False): ) -def generate_fetch_commits_error_email(release, repo, error_message): +def generate_fetch_commits_error_email( + release: Release, repo: Repository, error_message: str +) -> MessageBuilder: new_context = {"release": release, "error_message": error_message, "repo": repo} return MessageBuilder( @@ -63,7 +78,7 @@ def generate_fetch_commits_error_email(release, repo, error_message): # we're future proofing this function a bit so it could be used with other code -def handle_invalid_identity(identity, commit_failure=False): +def handle_invalid_identity(identity: Any, commit_failure: bool = False) -> None: # email the user msg = generate_invalid_identity_email(identity, commit_failure) msg.send_async(to=[identity.user.email]) @@ -72,6 +87,103 @@ def handle_invalid_identity(identity, commit_failure=False): identity.delete() +def get_github_compare_commits_cache_key( + organization_id: int, + repository_id: int, + provider: str | None, + start_sha: str | None, + end_sha: str, +) -> str: + digest = hash_values( + [organization_id, repository_id, provider or "", start_sha or "", end_sha], + seed="fetch-commits:compare-commits", + ) + return f"fetch-commits:compare-commits:v1:{digest}" + + +def fetch_compare_commits( + *, + cache_enabled: bool, + repo: Repository, + provider: Any, + is_integration_repo_provider: bool, + start_sha: str | None, + end_sha: str, + user: RpcUser | None, + lifecycle: Any, +) -> list[dict[str, Any]]: + if cache_enabled: + cache_key = get_github_compare_commits_cache_key( + repo.organization_id, repo.id, repo.provider, start_sha, end_sha + ) + cached_repo_commits = cache.get(cache_key) + lifecycle.add_extra("compare_commits_cache_enabled", True) + if cached_repo_commits is not None: + lifecycle.add_extra("compare_commits_cache_hit", True) + return cached_repo_commits + + lifecycle.add_extra("compare_commits_cache_hit", False) + else: + lifecycle.add_extra("compare_commits_cache_enabled", False) + + if is_integration_repo_provider: + repo_commits = provider.compare_commits(repo, start_sha, end_sha) + else: + # XXX: This only works for plugins that support actor context + repo_commits = provider.compare_commits(repo, start_sha, end_sha, actor=user) + + if cache_enabled: + cache.set( + cache_key, + repo_commits, + GITHUB_FETCH_COMMITS_COMPARE_CACHE_TTL_SECONDS, + ) + return repo_commits + + +def get_repo_and_provider_for_ref( + *, + release: Release, + ref: Mapping[str, str], + user_id: int, +) -> tuple[Repository, Any, bool, str] | None: + repo = ( + Repository.objects.filter( + organization_id=release.organization_id, + name=ref["repository"], + status=ObjectStatus.ACTIVE, + ) + .order_by("-pk") + .first() + ) + if not repo: + logger.info( + "repository.missing", + extra={ + "organization_id": release.organization_id, + "user_id": user_id, + "repository": ref["repository"], + }, + ) + return None + + is_integration_repo_provider = is_integration_provider(repo.provider) + binding_key = ( + "integration-repository.provider" if is_integration_repo_provider else "repository.provider" + ) + try: + provider_cls = bindings.get(binding_key).get(repo.provider) + except KeyError: + return None + + provider = provider_cls(id=repo.provider) + provider_key = ( + provider_cls.repo_provider if is_integration_repo_provider else provider_cls.auth_provider + ) + + return repo, provider, is_integration_repo_provider, provider_key + + @instrumented_task( name="sentry.tasks.commits.fetch_commits", namespace=issues_tasks, @@ -80,9 +192,14 @@ def handle_invalid_identity(identity, commit_failure=False): silo_mode=SiloMode.CELL, ) @retry(exclude=(Release.DoesNotExist, User.DoesNotExist)) -def fetch_commits(release_id: int, user_id: int, refs, prev_release_id=None, **kwargs): - # TODO(dcramer): this function could use some cleanup/refactoring as it's a bit unwieldy - commit_list = [] +def fetch_commits( + release_id: int, + user_id: int, + refs: Sequence[Mapping[str, str]], + prev_release_id: int | None = None, + **kwargs: Any, +) -> None: + commit_list: list[dict[str, Any]] = [] release = Release.objects.get(id=release_id) set_tag("organization.slug", release.organization.slug) @@ -97,37 +214,16 @@ def fetch_commits(release_id: int, user_id: int, refs, prev_release_id=None, **k except Release.DoesNotExist: pass - for ref in refs: - repo = ( - Repository.objects.filter( - organization_id=release.organization_id, - name=ref["repository"], - status=ObjectStatus.ACTIVE, - ) - .order_by("-pk") - .first() - ) - if not repo: - logger.info( - "repository.missing", - extra={ - "organization_id": release.organization_id, - "user_id": user_id, - "repository": ref["repository"], - }, - ) - continue + organization = release.organization + github_compare_commits_cache_feature_enabled = features.has( + GITHUB_FETCH_COMMITS_COMPARE_CACHE_FEATURE, organization, actor=user + ) - is_integration_repo_provider = is_integration_provider(repo.provider) - binding_key = ( - "integration-repository.provider" - if is_integration_repo_provider - else "repository.provider" - ) - try: - provider_cls = bindings.get(binding_key).get(repo.provider) - except KeyError: + for ref in refs: + resolved = get_repo_and_provider_for_ref(release=release, ref=ref, user_id=user_id) + if resolved is None: continue + repo, provider, is_integration_repo_provider, provider_key = resolved # if previous commit isn't provided, try to get from # previous release otherwise, try to get @@ -146,13 +242,6 @@ def fetch_commits(release_id: int, user_id: int, refs, prev_release_id=None, **k pass end_sha = ref["commit"] - provider = provider_cls(id=repo.provider) - - provider_key = ( - provider_cls.repo_provider - if is_integration_repo_provider - else provider_cls.auth_provider - ) with SCMIntegrationInteractionEvent( SCMIntegrationInteractionType.COMPARE_COMMITS, @@ -171,10 +260,23 @@ def fetch_commits(release_id: int, user_id: int, refs, prev_release_id=None, **k } ) try: - if is_integration_repo_provider: - repo_commits = provider.compare_commits(repo, start_sha, end_sha) - else: - repo_commits = provider.compare_commits(repo, start_sha, end_sha, actor=user) + provider_name = repo.provider + compare_commits_cache_enabled = ( + github_compare_commits_cache_feature_enabled + and isinstance(provider_name, str) + and provider_name in GITHUB_CACHEABLE_REPOSITORY_PROVIDERS + and start_sha is not None + ) + repo_commits = fetch_compare_commits( + cache_enabled=compare_commits_cache_enabled, + repo=repo, + provider=provider, + is_integration_repo_provider=is_integration_repo_provider, + start_sha=start_sha, + end_sha=end_sha, + user=user, + lifecycle=lifecycle, + ) except NotImplementedError: pass except IntegrationResourceNotFoundError: @@ -277,11 +379,11 @@ def fetch_commits(release_id: int, user_id: int, refs, prev_release_id=None, **k Deploy.notify_if_ready(deploy_id, fetch_complete=True) -def is_integration_provider(provider): - return provider and provider.startswith("integrations:") +def is_integration_provider(provider: str | None) -> bool: + return bool(provider and provider.startswith("integrations:")) -def get_emails_for_user_or_org(user: RpcUser | None, orgId: int): +def get_emails_for_user_or_org(user: RpcUser | None, orgId: int) -> list[str]: emails: list[str] = [] if not user: return [] diff --git a/tests/sentry/integrations/github/test_repository.py b/tests/sentry/integrations/github/test_repository.py index f974e752ba636e..76afa972ab9403 100644 --- a/tests/sentry/integrations/github/test_repository.py +++ b/tests/sentry/integrations/github/test_repository.py @@ -206,6 +206,33 @@ def test_patchset_caching(self, get_jwt: mock.MagicMock) -> None: # Now that patchset was cached, github shouldn't have been called again assert len(responses.calls) == 1 + @mock.patch("sentry.integrations.github.client.get_jwt", return_value="jwt_token_1") + @responses.activate + def test_compare_commits_reuses_cached_patchset_across_calls( + self, get_jwt: mock.MagicMock + ) -> None: + responses.add( + responses.GET, + "https://api.github.com/repos/getsentry/example-repo/compare/xyz123...abcdef", + json=orjson.loads(COMPARE_COMMITS_EXAMPLE), + ) + responses.add( + responses.GET, + "https://api.github.com/repos/getsentry/example-repo/compare/xyz123...abcdef", + json=orjson.loads(COMPARE_COMMITS_EXAMPLE), + ) + responses.add( + responses.GET, + "https://api.github.com/repos/getsentry/example-repo/commits/6dcb09b5b57875f334f61aebed695e2e4193db5e", + json=orjson.loads(GET_COMMIT_EXAMPLE), + ) + + first = self.provider.compare_commits(self.repository, "xyz123", "abcdef") + second = self.provider.compare_commits(self.repository, "xyz123", "abcdef") + + assert first == second + assert len(responses.calls) == 3 + @responses.activate def test_compare_commits_failure(self) -> None: responses.add( diff --git a/tests/sentry/tasks/test_commits.py b/tests/sentry/tasks/test_commits.py index 96736c5fab418a..7cf3f4bad547e1 100644 --- a/tests/sentry/tasks/test_commits.py +++ b/tests/sentry/tasks/test_commits.py @@ -13,15 +13,58 @@ from sentry.models.releaseheadcommit import ReleaseHeadCommit from sentry.models.repository import Repository from sentry.silo.base import SiloMode -from sentry.tasks.commits import fetch_commits, handle_invalid_identity +from sentry.tasks.commits import ( + fetch_commits, + get_github_compare_commits_cache_key, + handle_invalid_identity, +) from sentry.testutils.asserts import assert_slo_metric from sentry.testutils.cases import TestCase from sentry.testutils.silo import assume_test_silo_mode, control_silo_test +from sentry.utils.cache import cache from social_auth.models import UserSocialAuth @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") class FetchCommitsTest(TestCase): + def _github_compare_commits_result(self, repo_name: str, end_sha: str) -> list[dict[str, str]]: + return [ + {"id": "62de626b7c7cfb8e77efb4273b1a3df4123e6216", "repository": repo_name}, + {"id": "58de626b7c7cfb8e77efb4273b1a3df4123e6345", "repository": repo_name}, + {"id": end_sha, "repository": repo_name}, + ] + + def _setup_github_compare_commits_cache_context(self): + org = self.create_organization(owner=self.user, name="baz") + repo = Repository.objects.create( + name="example", + provider="integrations:github", + organization_id=org.id, + ) + previous_release = Release.objects.create(organization_id=org.id, version="old-release") + previous_commit = Commit.objects.create( + organization_id=org.id, repository_id=repo.id, key="a" * 40 + ) + ReleaseHeadCommit.objects.create( + organization_id=org.id, + repository_id=repo.id, + release=previous_release, + commit=previous_commit, + ) + + refs = [{"repository": repo.name, "commit": "b" * 40}] + first_release = Release.objects.create(organization_id=org.id, version="new-release-1") + second_release = Release.objects.create(organization_id=org.id, version="new-release-2") + return org, repo, previous_release, first_release, second_release, refs + + def test_github_compare_commits_cache_key_avoids_ambiguous_id_collisions( + self, mock_record: MagicMock + ) -> None: + key_one = get_github_compare_commits_cache_key(1, 23, "integrations:github", "a", "b") + key_two = get_github_compare_commits_cache_key(12, 3, "integrations:github", "a", "b") + + assert key_one != key_two + def _test_simple_action(self, user, org): repo = Repository.objects.create(name="example", provider="dummy", organization_id=org.id) release = Release.objects.create(organization_id=org.id, version="abcabcabc") @@ -44,7 +87,7 @@ def _test_simple_action(self, user, org): release_id=release2.id, user_id=user.id, refs=refs, - previous_release_id=release.id, + prev_release_id=release.id, ) commit_list = list( @@ -86,6 +129,100 @@ def test_duplicate_repositories(self, mock_record: MagicMock) -> None: Repository.objects.create(name="example", provider="dummy", organization_id=org.id) self._test_simple_action(user=self.user, org=org) + @patch("sentry.integrations.github.repository.GitHubRepositoryProvider.compare_commits") + def test_github_compare_commits_cache_flag_disabled( + self, mock_compare_commits: MagicMock, mock_record: MagicMock + ) -> None: + self.login_as(user=self.user) + cache.clear() + + _, repo, previous_release, first_release, second_release, refs = ( + self._setup_github_compare_commits_cache_context() + ) + mock_compare_commits.return_value = self._github_compare_commits_result(repo.name, "b" * 40) + + with self.tasks(): + fetch_commits( + release_id=first_release.id, + user_id=self.user.id, + refs=refs, + prev_release_id=previous_release.id, + ) + fetch_commits( + release_id=second_release.id, + user_id=self.user.id, + refs=refs, + prev_release_id=previous_release.id, + ) + + assert mock_compare_commits.call_count == 2 + + @patch("sentry.integrations.github.repository.GitHubRepositoryProvider.compare_commits") + def test_github_compare_commits_cache_flag_enabled( + self, mock_compare_commits: MagicMock, mock_record: MagicMock + ) -> None: + self.login_as(user=self.user) + cache.clear() + + org, repo, previous_release, first_release, second_release, refs = ( + self._setup_github_compare_commits_cache_context() + ) + mock_compare_commits.return_value = self._github_compare_commits_result(repo.name, "b" * 40) + + with self.feature( + {"organizations:integrations-github-fetch-commits-compare-cache": [org.slug]} + ): + with self.tasks(): + fetch_commits( + release_id=first_release.id, + user_id=self.user.id, + refs=refs, + prev_release_id=previous_release.id, + ) + fetch_commits( + release_id=second_release.id, + user_id=self.user.id, + refs=refs, + prev_release_id=previous_release.id, + ) + + assert mock_compare_commits.call_count == 1 + + @patch("sentry.integrations.github.repository.GitHubRepositoryProvider.compare_commits") + def test_github_compare_commits_cache_key_variance_on_end_sha( + self, mock_compare_commits: MagicMock, mock_record: MagicMock + ) -> None: + self.login_as(user=self.user) + cache.clear() + + org, repo, previous_release, first_release, second_release, refs_first = ( + self._setup_github_compare_commits_cache_context() + ) + refs_second = [{"repository": repo.name, "commit": "c" * 40}] + mock_compare_commits.side_effect = [ + self._github_compare_commits_result(repo.name, "b" * 40), + self._github_compare_commits_result(repo.name, "c" * 40), + ] + + with self.feature( + {"organizations:integrations-github-fetch-commits-compare-cache": [org.slug]} + ): + with self.tasks(): + fetch_commits( + release_id=first_release.id, + user_id=self.user.id, + refs=refs_first, + prev_release_id=previous_release.id, + ) + fetch_commits( + release_id=second_release.id, + user_id=self.user.id, + refs=refs_second, + prev_release_id=previous_release.id, + ) + + assert mock_compare_commits.call_count == 2 + def test_release_locked(self, mock_record_event: MagicMock) -> None: self.login_as(user=self.user) org = self.create_organization(owner=self.user, name="baz") @@ -108,7 +245,7 @@ def test_release_locked(self, mock_record_event: MagicMock) -> None: release_id=new_release.id, user_id=self.user.id, refs=refs, - previous_release_id=old_release.id, + prev_release_id=old_release.id, ) count_query = ReleaseHeadCommit.objects.filter(release=new_release) # No release commits should be made as the task should return early. @@ -141,7 +278,7 @@ def test_fetch_error_invalid_identity( mock_compare_commits.side_effect = InvalidIdentity(identity=usa) fetch_commits( - release_id=release2.id, user_id=self.user.id, refs=refs, previous_release_id=release.id + release_id=release2.id, user_id=self.user.id, refs=refs, prev_release_id=release.id ) mock_handle_invalid_identity.assert_called_once_with(identity=usa, commit_failure=True) @@ -178,7 +315,7 @@ def test_fetch_error_plugin_error( release_id=release2.id, user_id=self.user.id, refs=refs, - previous_release_id=release.id, + prev_release_id=release.id, ) msg = mail.outbox[-1] @@ -219,7 +356,7 @@ def test_fetch_error_plugin_error_for_sentry_app( release_id=release2.id, user_id=sentry_app.proxy_user_id, refs=refs, - previous_release_id=release.id, + prev_release_id=release.id, ) msg = mail.outbox[-1] @@ -259,7 +396,7 @@ def test_fetch_error_random_exception( release_id=release2.id, user_id=self.user.id, refs=refs, - previous_release_id=release.id, + prev_release_id=release.id, ) msg = mail.outbox[-1] @@ -300,7 +437,7 @@ def test_fetch_error_random_exception_integration(self, mock_record: MagicMock) release_id=release2.id, user_id=self.user.id, refs=refs, - previous_release_id=release.id, + prev_release_id=release.id, ) msg = mail.outbox[-1]