diff --git a/pontoon/base/migrations/0108_add_translation_value_and_properties_schema.py b/pontoon/base/migrations/0108_add_translation_value_and_properties_schema.py new file mode 100644 index 0000000000..3ebabc3146 --- /dev/null +++ b/pontoon/base/migrations/0108_add_translation_value_and_properties_schema.py @@ -0,0 +1,20 @@ +# Generated by Django 4.2.22 on 2025-10-06 20:59 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [("base", "0107_add_search_settings_to_userprofile")] + + operations = [ + migrations.AddField( + model_name="translation", + name="value", + field=models.JSONField(default=list), + ), + migrations.AddField( + model_name="translation", + name="properties", + field=models.JSONField(blank=True, null=True), + ), + ] diff --git a/pontoon/base/migrations/0109_add_translation_value_and_properties_data.py b/pontoon/base/migrations/0109_add_translation_value_and_properties_data.py new file mode 100644 index 0000000000..d4601ad803 --- /dev/null +++ b/pontoon/base/migrations/0109_add_translation_value_and_properties_data.py @@ -0,0 +1,102 @@ +from math import ceil + +from moz.l10n.formats import Format +from moz.l10n.formats.fluent import fluent_parse_entry +from moz.l10n.message import message_to_json, parse_message +from moz.l10n.model import CatchallKey, PatternMessage, SelectMessage + +from django.db import migrations, models + + +batch_size = 10000 + + +def set_value_and_properties(apps, schema_editor): + Resource = apps.get_model("base", "Resource") + Translation = apps.get_model("base", "Translation") + + batch_total = ceil(Translation.objects.count() / batch_size) + batch_count = 0 + + def print_progress(): + nonlocal batch_count + if batch_count % 10 == 0: + print(f".({(batch_count / batch_total):.1%})", end="", flush=True) + else: + print(".", end="", flush=True) + batch_count += 1 + + pv_trans = [] + v_trans = [] + format_q = models.Subquery( + Resource.objects.filter(id=models.OuterRef("entity__resource_id")).values( + "format" + ) + ) + for trans in Translation.objects.annotate(format=format_q).iterator(): + string = trans.string + try: + match trans.format: + case "fluent": + fe = fluent_parse_entry(string, with_linepos=False) + msg = fe.value + trans.properties = { + name: message_to_json(msg) + for name, msg in fe.properties.items() + } or None + case "lang" | "properties" | "": + msg = PatternMessage([string]) + case "android" | "gettext" | "webext" | "xcode" | "xliff": + msg = parse_message(Format.mf2, string) + case _: + msg = parse_message(Format[trans.format], string) + + # MF2 syntax does not retain the catchall name/label + if isinstance(msg, SelectMessage) and trans.format != "fluent": + for keys in msg.variants: + for key in keys: + if isinstance(key, CatchallKey): + key.value = "other" + + trans.value = message_to_json(msg) + if trans.properties: + pv_trans.append(trans) + else: + v_trans.append(trans) + except Exception: + if ( + trans.approved + and not trans.entity.obsolete + and not trans.entity.resource.project.disabled + ): + print( + f"\nUsing fallback value for approved and active {trans.format} translation {trans.pk} " + f"for entity {trans.entity.pk}, locale {trans.locale.code}:\n{trans.string}", + flush=True, + ) + trans.value = [trans.string] + v_trans.append(trans) + if len(pv_trans) == batch_size: + Translation.objects.bulk_update(pv_trans, ["value", "properties"]) + pv_trans.clear() + print_progress() + if len(v_trans) == batch_size: + Translation.objects.bulk_update(v_trans, ["value"]) + v_trans.clear() + print_progress() + if pv_trans: + Translation.objects.bulk_update(pv_trans, ["value", "properties"]) + print_progress() + if v_trans: + Translation.objects.bulk_update(v_trans, ["value"]) + print_progress() + + +class Migration(migrations.Migration): + dependencies = [("base", "0108_add_translation_value_and_properties_schema")] + + operations = [ + migrations.RunPython( + set_value_and_properties, reverse_code=migrations.RunPython.noop + ), + ] diff --git a/pontoon/base/migrations/0110_require_translation_value.py b/pontoon/base/migrations/0110_require_translation_value.py new file mode 100644 index 0000000000..f7ba364185 --- /dev/null +++ b/pontoon/base/migrations/0110_require_translation_value.py @@ -0,0 +1,17 @@ +# Generated by Django 4.2.29 on 2026-03-31 10:14 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("base", "0109_add_translation_value_and_properties_data"), + ] + + operations = [ + migrations.AlterField( + model_name="translation", + name="value", + field=models.JSONField(), + ), + ] diff --git a/pontoon/base/models/entity.py b/pontoon/base/models/entity.py index 816da8d2c5..0b5db25c92 100644 --- a/pontoon/base/models/entity.py +++ b/pontoon/base/models/entity.py @@ -46,7 +46,7 @@ def combine_entity_filters(entities, filter_choices, filters, *args): return reduce(ior, filters) -class EntityQuerySet(models.QuerySet): +class EntityQuerySet(models.QuerySet["Entity"]): def _get_query(self, locale: Locale, project: Project | None, query: Q) -> Q: from pontoon.base.models.translation import Translation @@ -253,7 +253,9 @@ def prefetch_entities_data(self, locale: Locale, preferred_source_locale: str): class Entity(DirtyFieldsMixin, models.Model): - resource = models.ForeignKey(Resource, models.CASCADE, related_name="entities") + resource: models.ForeignKey["Resource"] = models.ForeignKey( + Resource, models.CASCADE, related_name="entities" + ) section = models.ForeignKey( Section, models.SET_NULL, related_name="entities", null=True, blank=True ) diff --git a/pontoon/base/models/resource.py b/pontoon/base/models/resource.py index 593c9cc2bc..9e08e5b507 100644 --- a/pontoon/base/models/resource.py +++ b/pontoon/base/models/resource.py @@ -2,9 +2,13 @@ from django.db import models from django.utils import timezone +from pontoon.base.models.project import Project + class Resource(models.Model): - project = models.ForeignKey("Project", models.CASCADE, related_name="resources") + project: models.ForeignKey["Project"] = models.ForeignKey( + "Project", models.CASCADE, related_name="resources" + ) path = models.TextField() # Path to localization file meta = ArrayField(ArrayField(models.TextField(), size=2), default=list) comment = models.TextField(blank=True) diff --git a/pontoon/base/models/translation.py b/pontoon/base/models/translation.py index fb04a76497..82682a7fbd 100644 --- a/pontoon/base/models/translation.py +++ b/pontoon/base/models/translation.py @@ -19,7 +19,7 @@ from pontoon.checks.utils import save_failed_checks -class TranslationQuerySet(models.QuerySet): +class TranslationQuerySet(models.QuerySet["Translation"]): def aggregate_stats(self) -> dict[str, int]: """ Aggregate translation stats for this queryset. @@ -154,6 +154,8 @@ class Translation(DirtyFieldsMixin, models.Model): locale = models.ForeignKey(Locale, models.CASCADE) user = models.ForeignKey(User, models.SET_NULL, null=True, blank=True) string = models.TextField() + value = models.JSONField() + properties = models.JSONField(null=True, blank=True) date = models.DateTimeField(default=timezone.now) # Active translations are displayed in the string list and as the first diff --git a/pontoon/base/tests/__init__.py b/pontoon/base/tests/__init__.py index 9407cabb75..a08582d967 100644 --- a/pontoon/base/tests/__init__.py +++ b/pontoon/base/tests/__init__.py @@ -135,6 +135,7 @@ class TranslationFactory(DjangoModelFactory): entity = SubFactory(EntityFactory) locale = SubFactory(LocaleFactory) string = Sequence(lambda n: f"translation {n}") + value = Sequence(lambda n: [f"translation {n}"]) user = SubFactory(UserFactory) class Meta: diff --git a/pontoon/batch/utils.py b/pontoon/batch/utils.py index a550678fae..bb63e76731 100644 --- a/pontoon/batch/utils.py +++ b/pontoon/batch/utils.py @@ -8,9 +8,11 @@ from django.utils import timezone -from pontoon.base.models import Entity, Resource +from pontoon.base.models import Entity, Resource, User +from pontoon.base.models.translation import TranslationQuerySet from pontoon.checks import DB_FORMATS from pontoon.checks.libraries import run_checks +from pontoon.translations.utils import parse_db_string_to_json parser = FluentParser() @@ -64,7 +66,9 @@ def visit_TextElement(self, node): return serializer.serialize_entry(new_ast) -def find_and_replace(translations, find, replace, user): +def find_and_replace( + translations: TranslationQuerySet, find: str, replace: str, user: User +): """Replace text in a set of translation. :arg QuerySet translations: a list of Translation objects in which to search @@ -97,7 +101,8 @@ def find_and_replace(translations, find, replace, user): # Cache the old value to identify changed translations new_translation = deepcopy(translation) - if translation.entity.resource.format == Resource.Format.FLUENT: + res_format = translation.entity.resource.format + if res_format == Resource.Format.FLUENT: new_translation.string = ftl_find_and_replace( translation.string, find, replace ) @@ -119,15 +124,21 @@ def find_and_replace(translations, find, replace, user): new_translation.pretranslated = False new_translation.fuzzy = False - if new_translation.entity.resource.format in DB_FORMATS: + errors = False + try: + new_translation.value, new_translation.properties = parse_db_string_to_json( + res_format, new_translation.string + ) + except ValueError: + errors = True + + if not errors and res_format in DB_FORMATS: errors = run_checks( new_translation.entity, new_translation.locale.code, new_translation.string, use_tt_checks=False, ) - else: - errors = {} if errors: translations_with_errors.append(translation.pk) diff --git a/pontoon/pretranslation/tasks.py b/pontoon/pretranslation/tasks.py index 118a2af203..52c5f3eff1 100644 --- a/pontoon/pretranslation/tasks.py +++ b/pontoon/pretranslation/tasks.py @@ -21,6 +21,7 @@ from pontoon.base.tasks import PontoonTask from pontoon.checks.libraries import run_checks from pontoon.checks.utils import bulk_run_checks +from pontoon.translations.utils import parse_db_string_to_json from . import AUTHORS from .pretranslate import get_pretranslation @@ -135,11 +136,16 @@ def pretranslate(project: Project, paths: set[str] | None): log.info(f"Pretranslation error: {e}") continue + string, author_key = pretranslation + value, properties = parse_db_string_to_json(entity.resource.format, string) + t = Translation( entity=entity, locale=locale, - string=pretranslation[0], - user=pt_authors[pretranslation[1]], + string=string, + value=value, + properties=properties, + user=pt_authors[author_key], approved=False, pretranslated=True, active=True, diff --git a/pontoon/sync/core/translations_from_repo.py b/pontoon/sync/core/translations_from_repo.py index 2abd2b5c46..bbc0bc3e45 100644 --- a/pontoon/sync/core/translations_from_repo.py +++ b/pontoon/sync/core/translations_from_repo.py @@ -7,6 +7,7 @@ from fluent.syntax import FluentParser from moz.l10n.formats import l10n_extensions +from moz.l10n.message import message_to_json from moz.l10n.model import Id as L10nId from moz.l10n.paths import L10nConfigPaths, L10nDiscoverPaths, parse_android_locale from moz.l10n.resource import parse_resource @@ -32,13 +33,13 @@ from pontoon.checks.utils import bulk_run_checks from pontoon.sync.core.checkout import Checkout, Checkouts from pontoon.sync.core.paths import UploadPaths -from pontoon.sync.formats import as_repo_translations +from pontoon.sync.formats import RepoTranslation, as_repo_translations log = logging.getLogger(__name__) -Updates = dict[tuple[int, int], tuple[str | None, bool]] -""" (entity.id, locale.id) -> (string, fuzzy) """ +Updates = dict[tuple[int, int], RepoTranslation | None] +""" (entity.id, locale.id) -> RepoTranslation """ def sync_translations_from_repo( @@ -129,7 +130,7 @@ def find_db_updates( db_changes: Iterable[ChangedEntityLocale], ) -> Updates | None: """ - `(entity.id, locale.id) -> (string|None, fuzzy)` + `(entity.id, locale.id) -> RepoTranslation` Translations in changed resources, excluding: - Exact matches with previous approved or pretranslated translations @@ -140,8 +141,8 @@ def find_db_updates( resource_paths: set[str] = set() # db_path -> {locale.id} translated_resources: dict[str, set[int]] = defaultdict(set) - # (db_path, tx.key, locale.id) -> (string|None, fuzzy) - translations: dict[tuple[str, L10nId, int], tuple[str | None, bool]] = {} + # (db_path, tx.key, locale.id) -> RepoTranslation|None + translations: dict[tuple[str, L10nId, int], RepoTranslation | None] = {} for target_path in changed_target_paths: ref = paths.find_reference(target_path) if ref: @@ -161,8 +162,8 @@ def find_db_updates( resource_paths.add(db_path) translated_resources[db_path].add(locale.pk) translations.update( - ((db_path, tx.key, locale.pk), (tx.string, tx.fuzzy)) - for tx in as_repo_translations(l10n_res) + ((db_path, rt.key, locale.pk), rt) + for rt in as_repo_translations(l10n_res) ) except Exception as error: scope = f"[{project.slug}:{db_path}, {locale.code}]" @@ -215,18 +216,18 @@ def find_db_updates( trans_values["locale_id"], ) if key in translations: - string, _ = translations[key] - if translations_equal( + rt = translations[key] + if rt is not None and translations_equal( project, key[0], trans_values["entity__resource__format"], - string, + rt.string, trans_values["string"], ): del translations[key] else: # The translation has been removed from the repo - translations[key] = (None, False) + translations[key] = None if paginator.num_pages > 3: log.debug( f"[{project.slug}] Filtering matches from translations... {page_number}/{paginator.num_pages}" @@ -258,10 +259,10 @@ def find_db_updates( .iterator() } updates: Updates = {} - for (db_path, ent_key, locale_id), tx in translations.items(): + for (db_path, ent_key, locale_id), rt in translations.items(): entity_id = entities.get((db_path, ent_key), None) if entity_id is not None: - updates[(entity_id, locale_id)] = tx + updates[(entity_id, locale_id)] = rt log.debug(f"[{project.slug}] Compiling updates... Found {len(updates)}") return updates @@ -302,14 +303,14 @@ def update_db_translations( # Approve matching suggestions matching_suggestions_q = Q() repo_rm_count = 0 - for (entity_id, locale_id), (string, _) in repo_translations.items(): - if string is None: + for (entity_id, locale_id), rt in repo_translations.items(): + if rt is None: # The translation has been removed from the repo translations_to_reject |= Q(entity_id=entity_id, locale_id=locale_id) repo_rm_count += 1 else: matching_suggestions_q |= Q( - entity_id=entity_id, locale_id=locale_id, string=string + entity_id=entity_id, locale_id=locale_id, string=rt.string ) # (entity_id, locale_id) => translation suggestions: dict[tuple[int, int], Translation] = ( @@ -325,7 +326,8 @@ def update_db_translations( update_fields: set[str] = set() approve_count = 0 for tx in suggestions.values(): - _, fuzzy = repo_translations[(tx.entity_id, tx.locale_id)] + rt = repo_translations[(tx.entity_id, tx.locale_id)] + fuzzy = rt.fuzzy if rt is not None else False if fuzzy and tx.fuzzy: # Keep fuzzy suggestions unchanged continue @@ -371,17 +373,24 @@ def update_db_translations( new_translations: list[Translation] = [] if repo_translations: # Add new approved translations for the remainder - for (entity_id, locale_id), (string, fuzzy) in repo_translations.items(): - if string is not None: + for (entity_id, locale_id), rt in repo_translations.items(): + if rt is not None: + json_properties = ( + {key: message_to_json(msg) for key, msg in rt.properties.items()} + if rt.properties + else None + ) tx = Translation( entity_id=entity_id, locale_id=locale_id, - string=string, + string=rt.string, + value=message_to_json(rt.value), + properties=json_properties, date=now, active=True, user=user, ) - if fuzzy: + if rt.fuzzy: tx.fuzzy = True else: tx.approved = True diff --git a/pontoon/sync/formats/__init__.py b/pontoon/sync/formats/__init__.py index 65c2cfabe4..c9f7709959 100644 --- a/pontoon/sync/formats/__init__.py +++ b/pontoon/sync/formats/__init__.py @@ -34,6 +34,8 @@ class RepoTranslation: key: tuple[str, ...] string: str + value: Message + properties: dict[str, Message] | None = None fuzzy: bool = False @@ -94,6 +96,8 @@ def as_repo_translations(res: MozL10nResource[Message]) -> Iterator[RepoTranslat yield RepoTranslation( key=section.id + entry.id, string=_as_string(res.format, entry), + value=entry.value, + properties=entry.properties, fuzzy=fuzzy, ) diff --git a/pontoon/test/factories.py b/pontoon/test/factories.py index 9698904e1e..c35e8a6e4a 100644 --- a/pontoon/test/factories.py +++ b/pontoon/test/factories.py @@ -133,6 +133,7 @@ class TranslationFactory(DjangoModelFactory): entity = SubFactory(EntityFactory) locale = SubFactory(LocaleFactory) string = Sequence(lambda n: f"translation {n}") + value = Sequence(lambda n: [f"translation {n}"]) user = SubFactory(UserFactory) class Meta: diff --git a/pontoon/translations/forms.py b/pontoon/translations/forms.py index 642d7f3bb4..fdf415240d 100644 --- a/pontoon/translations/forms.py +++ b/pontoon/translations/forms.py @@ -29,7 +29,7 @@ class CreateTranslationForm(forms.Form): def clean_entity(self): pk = self.cleaned_data["entity"] try: - return Entity.objects.get(pk=pk) + return Entity.objects.select_related("resource__project").get(pk=pk) except Entity.DoesNotExist: raise forms.ValidationError(f"Entity `{pk}` could not be found") diff --git a/pontoon/translations/tests/test_views.py b/pontoon/translations/tests/test_views.py index df39997440..d9c6b42cc9 100644 --- a/pontoon/translations/tests/test_views.py +++ b/pontoon/translations/tests/test_views.py @@ -6,7 +6,7 @@ from pontoon.base.models import Translation from pontoon.checks.models import FailedCheck, Warning -from pontoon.test.factories import TranslationFactory +from pontoon.test.factories import EntityFactory, ResourceFactory, TranslationFactory def request_create_translation(client, **args): @@ -36,6 +36,50 @@ def test_create_translation_success(member, entity_a, locale_a, project_locale_a ).exists() +@pytest.mark.django_db +def test_create_translation_success_fluent( + member, project_a, locale_a, project_locale_a +): + resource = ResourceFactory(project=project_a, path="resource.ftl", format="fluent") + entity = EntityFactory(resource=resource, key=["key"], string="key = value\n") + string = "key = Bonjour !\n" + response = request_create_translation( + member.client, + entity=entity.pk, + locale=locale_a.code, + translation=string, + ) + assert response.status_code == 200 + assert response.json()["status"] + + translation = Translation.objects.get(entity=entity, locale=locale_a, string=string) + assert translation.value == ["Bonjour !"] + + +@pytest.mark.django_db +def test_create_translation_success_mf2(member, project_a, locale_a, project_locale_a): + resource = ResourceFactory(project=project_a, path="strings.xml", format="android") + entity = EntityFactory( + resource=resource, key=["key"], string="the {$arg :string @source=|%s|} message" + ) + string = "le {$arg :string @source=|%s|} message" + response = request_create_translation( + member.client, + entity=entity.pk, + locale=locale_a.code, + translation=string, + ) + assert response.status_code == 200 + assert response.json()["status"] + + translation = Translation.objects.get(entity=entity, locale=locale_a, string=string) + assert translation.value == [ + "le ", + {"$": "arg", "fn": "string", "attr": {"source": "%s"}}, + " message", + ] + + @pytest.mark.django_db def test_create_translation_not_logged_in(client, entity_a, locale_a): response = request_create_translation( @@ -311,6 +355,8 @@ def test_view_translation_delete(approved_translation, rejected_translation, mem rejected_translation = Translation.objects.create( entity=approved_translation.entity, locale=approved_translation.locale, + string="", + value=[], user=member.user, rejected=True, ) diff --git a/pontoon/translations/utils.py b/pontoon/translations/utils.py new file mode 100644 index 0000000000..3f25e07a04 --- /dev/null +++ b/pontoon/translations/utils.py @@ -0,0 +1,42 @@ +from typing import Any + +from moz.l10n.formats.fluent import fluent_parse_entry +from moz.l10n.formats.mf2 import mf2_parse_message +from moz.l10n.message import message_to_json +from moz.l10n.model import CatchallKey, SelectMessage + +from pontoon.base.models import Resource + + +JsonMessage = list[Any] | dict[str, Any] + + +def parse_db_string_to_json( + res_format: str, + source: str, +) -> tuple[JsonMessage, dict[str, JsonMessage] | None]: + match res_format: + case Resource.Format.FLUENT: + fe = fluent_parse_entry(source) + value = message_to_json(fe.value) + properties = { + name: message_to_json(prop) for name, prop in fe.properties.items() + } or None + return value, properties + case ( + Resource.Format.ANDROID + | Resource.Format.GETTEXT + | Resource.Format.WEBEXT + | Resource.Format.XCODE + | Resource.Format.XLIFF + ): + msg = mf2_parse_message(source) + # MF2 syntax does not retain the catchall name/label + if isinstance(msg, SelectMessage): + for keys in msg.variants: + for key in keys: + if isinstance(key, CatchallKey): + key.value = "other" + return message_to_json(msg), None + case _: + return [source] if source else [], None diff --git a/pontoon/translations/views.py b/pontoon/translations/views.py index 8a0c5fecd3..1e0db4b5ec 100644 --- a/pontoon/translations/views.py +++ b/pontoon/translations/views.py @@ -1,3 +1,5 @@ +from typing import cast + from notifications.signals import notify from django.conf import settings @@ -14,16 +16,21 @@ from pontoon.actionlog.utils import log_action from pontoon.base import utils from pontoon.base.models import ( + Entity, + Locale, + Resource, TranslatedResource, Translation, ) from pontoon.checks.libraries import run_checks from pontoon.checks.utils import are_blocking_checks from pontoon.messaging.notifications import send_badge_notification -from pontoon.translations import forms + +from .forms import CreateTranslationForm +from .utils import parse_db_string_to_json -def _add_stats(response_data, resource, locale, stats): +def _add_stats(response_data, resource: Resource, locale: Locale, stats): if stats: paths = [resource.path] if stats == "resource" else [] response_data["stats"] = TranslatedResource.objects.query_stats( @@ -51,25 +58,22 @@ def create_translation(request): """ Create a new translation. """ - form = forms.CreateTranslationForm(request.POST) + form = CreateTranslationForm(request.POST) if not form.is_valid(): problems = [] for field, errors in form.errors.items(): - problems.append( - 'Error validating field `{}`: "{}"'.format(field, " ".join(errors)) - ) + problems.append(f'Error validating field `{field}`: "{" ".join(errors)}"') return JsonResponse( {"status": False, "message": "\n".join(problems)}, status=400 ) - entity = form.cleaned_data["entity"] + entity = cast(Entity, form.cleaned_data["entity"]) string = form.cleaned_data["translation"] - locale = form.cleaned_data["locale"] + locale = cast(Locale, form.cleaned_data["locale"]) ignore_warnings = form.cleaned_data["ignore_warnings"] approve = form.cleaned_data["approve"] force_suggestions = form.cleaned_data["force_suggestions"] - machinery_sources = form.cleaned_data["machinery_sources"] stats = form.cleaned_data["stats"] resource = entity.resource @@ -109,6 +113,8 @@ def create_translation(request): if are_blocking_checks(failed_checks, ignore_warnings): return JsonResponse({"status": False, "failedChecks": failed_checks}) + value, properties = parse_db_string_to_json(resource.format, string) + now = timezone.now() can_translate = user.can_translate(project=project, locale=locale) and ( not force_suggestions or approve @@ -118,10 +124,12 @@ def create_translation(request): entity=entity, locale=locale, string=string, + value=value, + properties=properties, user=user, date=now, approved=can_translate, - machinery_sources=machinery_sources, + machinery_sources=form.cleaned_data["machinery_sources"], ) if can_translate: