Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/sentry/features/temporary.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ def register_temporary_features(manager: FeatureManager) -> None:
manager.add("organizations:integrations-claude-code", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
manager.add("organizations:integrations-cursor", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
manager.add("organizations:integrations-github-copilot-agent", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
manager.add("organizations:integrations-github-fetch-commits-compare-cache", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
manager.add("organizations:integrations-github-platform-detection", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
manager.add("organizations:integrations-perforce", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
# Project Management Integrations Feature Parity Flags
Expand Down
87 changes: 83 additions & 4 deletions src/sentry/tasks/commits.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from sentry_sdk import set_tag
from taskbroker_client.retry import Retry

from sentry import features
from sentry.constants import ObjectStatus
from sentry.exceptions import InvalidIdentity, PluginError
from sentry.integrations.source_code_management.metrics import (
Expand All @@ -28,11 +29,21 @@
from sentry.users.models.user import User
from sentry.users.services.user import RpcUser
from sentry.users.services.user.service import user_service
from sentry.utils.cache import cache
from sentry.utils.email import MessageBuilder
from sentry.utils.hashlib import md5_text
from sentry.utils.http import absolute_uri

logger = logging.getLogger(__name__)

GITHUB_FETCH_COMMITS_COMPARE_CACHE_FEATURE = (
"organizations:integrations-github-fetch-commits-compare-cache"
)
GITHUB_FETCH_COMMITS_COMPARE_CACHE_TTL_SECONDS = 120
GITHUB_CACHEABLE_REPOSITORY_PROVIDERS = frozenset(
("integrations:github", "integrations:github_enterprise")
)


def generate_invalid_identity_email(identity, commit_failure=False):
new_context = {
Expand Down Expand Up @@ -63,6 +74,63 @@
# we're future proofing this function a bit so it could be used with other code


def get_github_compare_commits_cache_key(
organization_id: int, repository_id: int, provider: str, start_sha: str | None, end_sha: str
) -> str:
digest = md5_text(
organization_id, repository_id, provider, start_sha or "", end_sha
).hexdigest()
return f"fetch-commits:compare-commits:v1:{digest}"


def fetch_compare_commits(
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As part of this PR, I'm extracting some of the code in the main for/loop into a couple of new functions (to make the final code easier to read):

*,
cache_enabled: bool,
repo: Repository,
provider,
is_integration_repo_provider: bool,
start_sha: str | None,
end_sha: str,
user: RpcUser | None,
lifecycle,
):
cache_key = None
provider = repo.provider

Check failure on line 98 in src/sentry/tasks/commits.py

View check run for this annotation

@sentry/warden / warden: sentry-backend-bugs

Variable shadowing causes AttributeError: 'str' object has no attribute 'compare_commits'

On line 98, `provider = repo.provider` reassigns the `provider` parameter to a string (the repository's provider field, e.g., `"integrations:github"`). Later, lines 120-123 call `provider.compare_commits(...)` which will fail with `AttributeError` because strings don't have a `compare_commits` method. This will crash every call to `fetch_compare_commits` that results in a cache miss.
if (
cache_enabled
and isinstance(provider, str)
and provider in GITHUB_CACHEABLE_REPOSITORY_PROVIDERS
and start_sha is not None
):
cache_key = get_github_compare_commits_cache_key(
repo.organization_id, repo.id, provider, start_sha, end_sha
)

if cache_key is not None:
cached_repo_commits = cache.get(cache_key)
lifecycle.add_extra("compare_commits_cache_enabled", True)
if cached_repo_commits is not None:
lifecycle.add_extra("compare_commits_cache_hit", True)
return cached_repo_commits

lifecycle.add_extra("compare_commits_cache_hit", False)
else:
lifecycle.add_extra("compare_commits_cache_enabled", False)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is lifecycle?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's something the integrations team added to add more context to what events happen in the lifecycle of an event.


if is_integration_repo_provider:
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This if/else comes from the main loop:

if is_integration_repo_provider:
repo_commits = provider.compare_commits(repo, start_sha, end_sha)
else:
repo_commits = provider.compare_commits(repo, start_sha, end_sha, actor=user)

repo_commits = provider.compare_commits(repo, start_sha, end_sha)
else:
repo_commits = provider.compare_commits(repo, start_sha, end_sha, actor=user)

if cache_key is not None:
cache.set(
cache_key,
repo_commits,
GITHUB_FETCH_COMMITS_COMPARE_CACHE_TTL_SECONDS,
)
return repo_commits


def handle_invalid_identity(identity, commit_failure=False):
# email the user
msg = generate_invalid_identity_email(identity, commit_failure)
Expand Down Expand Up @@ -97,6 +165,11 @@
except Release.DoesNotExist:
pass

organization = release.organization
github_compare_commits_cache_enabled = features.has(
GITHUB_FETCH_COMMITS_COMPARE_CACHE_FEATURE, organization, actor=user
)

for ref in refs:
repo = (
Repository.objects.filter(
Expand Down Expand Up @@ -171,10 +244,16 @@
}
)
try:
if is_integration_repo_provider:
repo_commits = provider.compare_commits(repo, start_sha, end_sha)
else:
repo_commits = provider.compare_commits(repo, start_sha, end_sha, actor=user)
repo_commits = fetch_compare_commits(
cache_enabled=github_compare_commits_cache_enabled,
repo=repo,
provider=provider,
is_integration_repo_provider=is_integration_repo_provider,
start_sha=start_sha,
end_sha=end_sha,
user=user,
lifecycle=lifecycle,
)

Check failure on line 256 in src/sentry/tasks/commits.py

View check run for this annotation

@sentry/warden / warden: sentry-backend-bugs

[N57-GFA] Variable shadowing causes AttributeError: 'str' object has no attribute 'compare_commits' (additional location)

On line 98, `provider = repo.provider` reassigns the `provider` parameter to a string (the repository's provider field, e.g., `"integrations:github"`). Later, lines 120-123 call `provider.compare_commits(...)` which will fail with `AttributeError` because strings don't have a `compare_commits` method. This will crash every call to `fetch_compare_commits` that results in a cache miss.
except NotImplementedError:
pass
except IntegrationResourceNotFoundError:
Expand Down
27 changes: 27 additions & 0 deletions tests/sentry/integrations/github/test_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,33 @@ def test_patchset_caching(self, get_jwt: mock.MagicMock) -> None:
# Now that patchset was cached, github shouldn't have been called again
assert len(responses.calls) == 1

@mock.patch("sentry.integrations.github.client.get_jwt", return_value="jwt_token_1")
@responses.activate
def test_compare_commits_reuses_cached_patchset_across_calls(
self, get_jwt: mock.MagicMock
) -> None:
responses.add(
responses.GET,
"https://api.github.com/repos/getsentry/example-repo/compare/xyz123...abcdef",
json=orjson.loads(COMPARE_COMMITS_EXAMPLE),
)
responses.add(
responses.GET,
"https://api.github.com/repos/getsentry/example-repo/compare/xyz123...abcdef",
json=orjson.loads(COMPARE_COMMITS_EXAMPLE),
)
responses.add(
responses.GET,
"https://api.github.com/repos/getsentry/example-repo/commits/6dcb09b5b57875f334f61aebed695e2e4193db5e",
json=orjson.loads(GET_COMMIT_EXAMPLE),
)

first = self.provider.compare_commits(self.repository, "xyz123", "abcdef")
second = self.provider.compare_commits(self.repository, "xyz123", "abcdef")

assert first == second
assert len(responses.calls) == 3

@responses.activate
def test_compare_commits_failure(self) -> None:
responses.add(
Expand Down
156 changes: 156 additions & 0 deletions tests/sentry/tasks/test_commits.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,19 @@
from sentry.testutils.asserts import assert_slo_metric
from sentry.testutils.cases import TestCase
from sentry.testutils.silo import assume_test_silo_mode, control_silo_test
from sentry.utils.cache import cache
from social_auth.models import UserSocialAuth


@patch("sentry.integrations.utils.metrics.EventLifecycle.record_event")
class FetchCommitsTest(TestCase):
def _github_compare_commits_result(self, repo_name: str, end_sha: str) -> list[dict[str, str]]:
return [
{"id": "62de626b7c7cfb8e77efb4273b1a3df4123e6216", "repository": repo_name},
{"id": "58de626b7c7cfb8e77efb4273b1a3df4123e6345", "repository": repo_name},
{"id": end_sha, "repository": repo_name},
]

def _test_simple_action(self, user, org):
repo = Repository.objects.create(name="example", provider="dummy", organization_id=org.id)
release = Release.objects.create(organization_id=org.id, version="abcabcabc")
Expand Down Expand Up @@ -86,6 +94,154 @@ def test_duplicate_repositories(self, mock_record: MagicMock) -> None:
Repository.objects.create(name="example", provider="dummy", organization_id=org.id)
self._test_simple_action(user=self.user, org=org)

@patch("sentry.integrations.github.repository.GitHubRepositoryProvider.compare_commits")
def test_github_compare_commits_cache_flag_disabled(
self, mock_compare_commits: MagicMock, mock_record: MagicMock
) -> None:
self.login_as(user=self.user)
cache.clear()

org = self.create_organization(owner=self.user, name="baz")
repo = Repository.objects.create(
name="example",
provider="integrations:github",
organization_id=org.id,
)
previous_release = Release.objects.create(organization_id=org.id, version="old-release")
previous_commit = Commit.objects.create(
organization_id=org.id, repository_id=repo.id, key="a" * 40
)
ReleaseHeadCommit.objects.create(
organization_id=org.id,
repository_id=repo.id,
release=previous_release,
commit=previous_commit,
)

refs = [{"repository": repo.name, "commit": "b" * 40}]
mock_compare_commits.return_value = self._github_compare_commits_result(repo.name, "b" * 40)

first_release = Release.objects.create(organization_id=org.id, version="new-release-1")
second_release = Release.objects.create(organization_id=org.id, version="new-release-2")

with self.tasks():
fetch_commits(
release_id=first_release.id,
user_id=self.user.id,
refs=refs,
previous_release_id=previous_release.id,
)
fetch_commits(
release_id=second_release.id,
user_id=self.user.id,
refs=refs,
previous_release_id=previous_release.id,
)

assert mock_compare_commits.call_count == 2
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without the feature we call it twice.


@patch("sentry.integrations.github.repository.GitHubRepositoryProvider.compare_commits")
def test_github_compare_commits_cache_flag_enabled(
self, mock_compare_commits: MagicMock, mock_record: MagicMock
) -> None:
self.login_as(user=self.user)
cache.clear()

org = self.create_organization(owner=self.user, name="baz")
repo = Repository.objects.create(
name="example",
provider="integrations:github",
organization_id=org.id,
)
previous_release = Release.objects.create(organization_id=org.id, version="old-release")
previous_commit = Commit.objects.create(
organization_id=org.id, repository_id=repo.id, key="a" * 40
)
ReleaseHeadCommit.objects.create(
organization_id=org.id,
repository_id=repo.id,
release=previous_release,
commit=previous_commit,
)

refs = [{"repository": repo.name, "commit": "b" * 40}]
mock_compare_commits.return_value = self._github_compare_commits_result(repo.name, "b" * 40)

first_release = Release.objects.create(organization_id=org.id, version="new-release-1")
second_release = Release.objects.create(organization_id=org.id, version="new-release-2")

with self.feature(
{"organizations:integrations-github-fetch-commits-compare-cache": [org.slug]}
):
with self.tasks():
fetch_commits(
release_id=first_release.id,
user_id=self.user.id,
refs=refs,
previous_release_id=previous_release.id,
)
fetch_commits(
release_id=second_release.id,
user_id=self.user.id,
refs=refs,
previous_release_id=previous_release.id,
)

assert mock_compare_commits.call_count == 1
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When the feature flag is enabled, we only make one call.


@patch("sentry.integrations.github.repository.GitHubRepositoryProvider.compare_commits")
def test_github_compare_commits_cache_key_variance_on_end_sha(
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test verifies that cache entries are keyed by commit range, not just repo/org.

self, mock_compare_commits: MagicMock, mock_record: MagicMock
) -> None:
self.login_as(user=self.user)
cache.clear()

org = self.create_organization(owner=self.user, name="baz")
repo = Repository.objects.create(
name="example",
provider="integrations:github",
organization_id=org.id,
)
previous_release = Release.objects.create(organization_id=org.id, version="old-release")
previous_commit = Commit.objects.create(
organization_id=org.id, repository_id=repo.id, key="a" * 40
)
ReleaseHeadCommit.objects.create(
organization_id=org.id,
repository_id=repo.id,
release=previous_release,
commit=previous_commit,
)

refs_first = [{"repository": repo.name, "commit": "b" * 40}]
refs_second = [{"repository": repo.name, "commit": "c" * 40}]
mock_compare_commits.side_effect = [
self._github_compare_commits_result(repo.name, "b" * 40),
self._github_compare_commits_result(repo.name, "c" * 40),
]

first_release = Release.objects.create(organization_id=org.id, version="new-release-1")
second_release = Release.objects.create(organization_id=org.id, version="new-release-2")

with self.feature(
{"organizations:integrations-github-fetch-commits-compare-cache": [org.slug]}
):
with self.tasks():
fetch_commits(
release_id=first_release.id,
user_id=self.user.id,
refs=refs_first,
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Passing refs_first.

previous_release_id=previous_release.id,
)
fetch_commits(
release_id=second_release.id,
user_id=self.user.id,
refs=refs_second,
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Passing refs_second.

previous_release_id=previous_release.id,
)

assert mock_compare_commits.call_count == 2
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They each will make a call.


def test_release_locked(self, mock_record_event: MagicMock) -> None:
self.login_as(user=self.user)
org = self.create_organization(owner=self.user, name="baz")
Expand Down
Loading