Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 116 additions & 0 deletions pontoon/base/migrations/0101_webext_as_mf2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
from json import dumps
from re import compile

from moz.l10n.formats.mf2 import mf2_parse_message, mf2_serialize_message
from moz.l10n.formats.webext import webext_serialize_message
from moz.l10n.message import message_from_json
from moz.l10n.model import Expression, PatternMessage, VariableRef

from django.db import migrations


webext_placeholder = compile(r"\$([a-zA-Z0-9_@]+)\$|(\$[1-9])|\$(\$+)")


def webext_translation_parse(translation):
db_source = translation.string
declarations = message_from_json(translation.entity.value).declarations
pattern = []
pos = 0
for m in webext_placeholder.finditer(db_source):
start = m.start()
if start > pos:
pattern.append(db_source[pos:start])
if m[1]:
# Named placeholder, with content & optional example in placeholders object
ph_name = m[1].replace("@", "_")
if ph_name[0].isdigit():
ph_name = f"_{ph_name}"
ph_name = next(
(name for name in declarations if name.lower() == ph_name.lower()),
ph_name,
)
var = VariableRef(ph_name)
pattern.append(Expression(var, attributes={"source": m[0]}))
elif m[2]:
# Indexed placeholder
var = VariableRef(f"arg{m[2][1]}")
pattern.append(Expression(var, attributes={"source": m[0]}))
else:
# Escaped literal dollar sign
if pattern and isinstance(pattern[-1], str):
pattern[-1] += m[3]
else:
pattern.append(m[3])
pos = m.end()
if pos < len(db_source):
pattern.append(db_source[pos:])
return PatternMessage(pattern, declarations)


def mf2_entity_changed(entity):
db_source = entity.string
msg = message_from_json(entity.value)
mf2_source = mf2_serialize_message(msg)
if mf2_source == db_source:
return False
entity.string = mf2_source
entity.meta = [m for m in entity.meta if m[0] != "placeholders"]
return True


def mf2_translation_changed(translation):
db_source = translation.string
msg = webext_translation_parse(translation)
mf2_source = mf2_serialize_message(msg)
if mf2_source == db_source:
return False
translation.string = mf2_source
return True


def webext_as_mf2(apps, schema_editor):
Entity = apps.get_model("base", "Entity")
entities = Entity.objects.filter(resource__format="webext")
ent_fixed = [e for e in entities if mf2_entity_changed(e)]
n = Entity.objects.bulk_update(ent_fixed, ["meta", "string"], batch_size=10_000)
print(f" ({n} entities)", end="", flush=True)

Translation = apps.get_model("base", "Translation")
translations = Translation.objects.filter(
entity__resource__format="webext"
).select_related("entity")
trans_fixed = [t for t in translations if mf2_translation_changed(t)]
n = Translation.objects.bulk_update(trans_fixed, ["string"], batch_size=10_000)
print(f" ({n} translations)", end="", flush=True)


def webext_string_changed(obj, with_placeholders: bool):
mf2_source = obj.string
msg = mf2_parse_message(mf2_source)
string, placeholders = webext_serialize_message(msg)
if string == mf2_source:
return False
obj.string = string
if with_placeholders:
obj.meta.append(["placeholders", dumps(placeholders)])
return True


def mf2_as_webext(apps, schema_editor):
Entity = apps.get_model("base", "Entity")
entities = Entity.objects.filter(resource__format="webext")
ent_fixed = [e for e in entities if webext_string_changed(e, True)]
n = Entity.objects.bulk_update(ent_fixed, ["meta", "string"], batch_size=10_000)
print(f" ({n} entities)", end="", flush=True)

Translation = apps.get_model("base", "Translation")
translations = Translation.objects.filter(entity__resource__format="webext")
trans_fixed = [t for t in translations if webext_string_changed(t, False)]
n = Translation.objects.bulk_update(trans_fixed, ["string"], batch_size=10_000)
print(f" ({n} translations)", end="", flush=True)


class Migration(migrations.Migration):
dependencies = [("base", "0100_android_as_mf2")]
operations = [migrations.RunPython(webext_as_mf2, reverse_code=mf2_as_webext)]
2 changes: 1 addition & 1 deletion pontoon/base/simple_preview.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def get_simple_preview(format: str, string: str):
msg = mf2_parse_message(string)
return android_simple_preview(msg)

case Resource.Format.GETTEXT:
case Resource.Format.GETTEXT | Resource.Format.WEBEXT:
msg = mf2_parse_message(string)
msg = as_pattern_message(msg)
return serialize_message(None, msg)
Expand Down
8 changes: 8 additions & 0 deletions pontoon/checks/libraries/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from moz.l10n.formats.mf2 import mf2_parse_message, mf2_serialize_pattern
from moz.l10n.formats.webext import webext_serialize_message
from moz.l10n.model import CatchallKey, Pattern, PatternMessage, SelectMessage

from pontoon.base.models import Entity, Resource
Expand Down Expand Up @@ -107,6 +108,13 @@ def run_checks(
(as_gettext(src_msg.pattern), as_gettext(tgt_msg.pattern))
)

case Resource.Format.WEBEXT:
src_msg = mf2_parse_message(entity.string)
tgt_msg = mf2_parse_message(string)
src_str, _ = webext_serialize_message(src_msg)
tgt_str, _ = webext_serialize_message(tgt_msg)
tt_patterns.append((src_str, tgt_str))

case _:
tt_patterns.append((entity.string, string))
tt_warnings = {}
Expand Down
37 changes: 37 additions & 0 deletions pontoon/checks/libraries/custom.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from html import escape
from re import fullmatch
from typing import Iterable, Iterator

from fluent.syntax import FluentParser, ast
from fluent.syntax.visitor import Visitor
from moz.l10n.formats.android import android_parse_message
from moz.l10n.formats.mf2 import mf2_parse_message
from moz.l10n.formats.webext import webext_parse_message, webext_serialize_message
from moz.l10n.model import (
Expression,
Markup,
Expand Down Expand Up @@ -161,6 +163,41 @@ def run_custom_checks(entity: Entity, string: str) -> dict[str, list[str]]:
if visitor.is_empty:
warnings.append("Empty translation")

case Resource.Format.WEBEXT:
try:
msg = mf2_parse_message(string)
except ValueError as e:
msg = None
errors.append(f"Parse error: {e}")
if isinstance(msg, PatternMessage):
try:
orig_msg = mf2_parse_message(entity.string)
_, placeholders = webext_serialize_message(orig_msg)
except ValueError:
placeholders = None

# The default moz.l10n serialization would escape $ in literal content,
# which we don't want here -- instead looking for typos in placeholders.
webext_src = ""
for part in msg.pattern:
if isinstance(part, str):
webext_src += part
else:
part_source = part.attributes.get("source", None)
if isinstance(part_source, str):
webext_src += part_source
else:
errors.append(f"Unsupported placeholder: {part}")
try:
webext_parse_message(webext_src, placeholders)
except Exception as e:
bad_ph = fullmatch(r"Missing placeholders entry for (\w+)", str(e))
errors.append(
f"Placeholder ${bad_ph.group(1).upper()}$ not found in reference"
if bad_ph
else f"Parse error: {e}"
)

checks: dict[str, list[str]] = {}
if errors:
checks["pErrors"] = errors
Expand Down
68 changes: 68 additions & 0 deletions pontoon/checks/tests/test_custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ def mock_entity(
ext = "ftl"
case "gettext":
ext = "po"
case "webext":
ext = "json"
case _:
ext = format
entity = MagicMock()
Expand Down Expand Up @@ -258,3 +260,69 @@ def test_android_bad_html():
"pErrors": ["Placeholder <a> not found in reference"],
"pndbWarnings": ["Placeholder <b> not found in translation"],
}


def test_webext_literal_index_placeholder_as_placeholder():
original = "Source string with a {$arg1 @source=|$1|}"
translation = "Translation with a {$arg1 @source=|$1|}"
entity = mock_entity("webext", string=original)
assert run_custom_checks(entity, translation) == {}


def test_webext_literal_index_placeholder_as_literal():
original = "Source string with a {$arg1 @source=|$1|}"
translation = "Translation with a $1"
entity = mock_entity("webext", string=original)
assert run_custom_checks(entity, translation) == {}


def test_webext_literal_named_placeholder_as_placeholder():
original = (
".local $FOO = {$arg1 @source=|$1|}\n"
+ "{{Source string with a {$FOO @source=|$FOO$|}}}"
)
translation = (
".local $FOO = {$arg1 @source=|$1|}\n"
+ "{{Translation with a {$FOO @source=|$FOO$|}}}"
)
entity = mock_entity("webext", string=original)
assert run_custom_checks(entity, translation) == {}


def test_webext_literal_named_placeholder_as_literal():
original = (
".local $FOO = {$arg1 @source=|$1|}\n"
+ "{{Source string with a {$FOO @source=|$FOO$|}}}"
)
translation = "Translation with a $FOO$"
entity = mock_entity("webext", string=original)
assert run_custom_checks(entity, translation) == {}


def test_webext_extra_index_placeholder():
original = "Source string"
translation = "Translation with a $1"
entity = mock_entity("webext", string=original)
# This should probably also be caught
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the reason it is not?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have no strings using indexed rather than named variables in production, and this will get refactored with the upcoming wider changes to validation/linting, so adding the complexity needed for this isn't worthwhile atm.

assert run_custom_checks(entity, translation) == {}


def test_webext_extra_named_placeholder_as_literal():
original = "Source string"
translation = "Translation with a $FOO$"
entity = mock_entity("webext", string=original)
assert run_custom_checks(entity, translation) == {
"pErrors": ["Placeholder $FOO$ not found in reference"]
}


def test_webext_extra_named_placeholder_as_placeholder():
original = "Source string"
translation = (
".local $FOO = {$arg1 @source=|$1|}\n"
+ "{{Translation with a {$FOO @source=|$FOO$|}}}"
)
entity = mock_entity("webext", string=original)
assert run_custom_checks(entity, translation) == {
"pErrors": ["Placeholder $FOO$ not found in reference"]
}
12 changes: 10 additions & 2 deletions pontoon/pretranslation/pretranslate.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,11 @@ def get_pretranslation(
set_accesskey(entry, key, prop)
pt_res = FluentSerializer().serialize_entry(fluent_astify_entry(entry))
else:
if entity.resource.format in {Resource.Format.ANDROID, Resource.Format.GETTEXT}:
if entity.resource.format in {
Resource.Format.ANDROID,
Resource.Format.GETTEXT,
Resource.Format.WEBEXT,
}:
format = Format.mf2
msg = parse_message(format, entity.string)
else:
Expand All @@ -82,7 +86,11 @@ def __init__(self, entity: Entity, locale: Locale, preserve_placeables: bool):
match entity.resource.format:
case Resource.Format.FLUENT:
self.format = Format.fluent
case Resource.Format.ANDROID | Resource.Format.GETTEXT:
case (
Resource.Format.ANDROID
| Resource.Format.GETTEXT
| Resource.Format.WEBEXT
):
self.format = Format.mf2
case _:
self.format = None
Expand Down
37 changes: 1 addition & 36 deletions pontoon/sync/core/translations_to_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,12 @@
from moz.l10n.model import (
CatchallKey,
Entry,
Expression,
Id,
Metadata,
PatternMessage,
Resource,
Section,
SelectMessage,
VariableRef,
)
from moz.l10n.paths import L10nConfigPaths, L10nDiscoverPaths
from moz.l10n.resource import parse_resource, serialize_resource
Expand Down Expand Up @@ -353,7 +351,7 @@ def set_translation(
return False

match format:
case Format.android | Format.gettext:
case Format.android | Format.gettext | Format.webext:
msg = parse_message(Format.mf2, tx.string)
if isinstance(entry.value, SelectMessage):
entry.value.variants = (
Expand All @@ -373,39 +371,6 @@ def set_translation(
elif fuzzy_flag in entry.meta:
entry.meta = [m for m in entry.meta if m != fuzzy_flag]

case Format.webext if (
isinstance(entry.value, PatternMessage) and entry.value.declarations
):
# With a message value, placeholders in string parts would have their
# $ characters doubled to escape them.
entry.value.pattern = []
pos = 0
for m in webext_placeholder.finditer(tx.string):
start = m.start()
if start > pos:
entry.value.pattern.append(tx.string[pos:start])
if m[1]:
ph_name = m[1].replace("@", "_")
if ph_name[0].isdigit():
ph_name = f"_{ph_name}"
ph_name = next(
(
name
for name in entry.value.declarations
if name.lower() == ph_name.lower()
),
ph_name,
)
pass
else:
ph_name = m[0]
entry.value.pattern.append(
Expression(VariableRef(ph_name), attributes={"source": m[0]})
)
pos = m.end()
if pos < len(tx.string):
entry.value.pattern.append(tx.string[pos:])

case _:
entry.value = tx.string

Expand Down
Loading