Skip to content

Commit f82c3c9

Browse files
authored
Disable localisation when SOURCE_DATE_EPOCH is set (#10949)
This commit disables Sphinx's localisation features when reproducible builds are requested, as determined by a non-empty SOURCE_DATE_EPOCH_ environment variable. The `Reproducible Builds`_ project aims to provide confidence to consumers of packaged software that the artefacts they're downloading and installing have not been altered by the environment they were built in, and can be replicated at a later date if required. Builds of localised documentation using Sphinx currently account for a large category of reproducible build testing failures, because the builders intentionally use varying environment locales at build-time. This can affect the contents of the ``objects.inv`` file. During investigation, it turned out that many ``gettext``-localised values (particularly in Python modules under ``sphinx.domains``) were being translated at module-load-time and would not subsequently be re-localised. This creates two unusual effects: 1. Attempting to write a test case to build the same application in two different languages was not initially possible, as the first-loaded translation catalogue (as found in the ``sphinx.locale.translators`` global variable) would remain in-use for subsequent application builds under different locales. 2. Localisation of strings could vary depending on whether the relevant modules were loaded before or after the resource catalogues were populated. We fix this by performing all translations lazily so that module imports can occur in any order and localisation of inventory entries should occur only when translations of those items are requested. Localisation can then be disabled by configuring the ``gettext`` language to the ISO-639-3 'undetermined' code (``'und'``), as this should not have an associated translation catalogue. We also want to prevent ``gettext`` from attempting to determine the host's locale from environment variables (including ``LANGUAGE``). .. _SOURCE_DATE_EPOCH: https://reproducible-builds.org/docs/source-date-epoch/ .. _Reproducible Builds: https://www.reproducible-builds.org/
1 parent 4659fc2 commit f82c3c9

File tree

13 files changed

+128
-20
lines changed

13 files changed

+128
-20
lines changed

CHANGES

+2
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ Bugs fixed
8383
* #11192: Restore correct parallel search index building.
8484
Patch by Jeremy Maitin-Shepard
8585
* Use the new Transifex ``tx`` client
86+
* #9778: Disable localisation when the ``SOURCE_DATE_EPOCH`` environment
87+
variable is set, to assist with 'reproducible builds'. Patch by James Addison
8688

8789
Testing
8890
--------

sphinx/builders/html/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,7 @@ def prepare_writing(self, docnames: set[str]) -> None:
502502
# typically doesn't include the time of day
503503
lufmt = self.config.html_last_updated_fmt
504504
if lufmt is not None:
505-
self.last_updated = format_date(lufmt or _('%b %d, %Y'),
505+
self.last_updated = format_date(lufmt or str(_('%b %d, %Y')),
506506
language=self.config.language)
507507
else:
508508
self.last_updated = None

sphinx/builders/latex/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def init_context(self) -> None:
179179
if self.config.today:
180180
self.context['date'] = self.config.today
181181
else:
182-
self.context['date'] = format_date(self.config.today_fmt or _('%b %d, %Y'),
182+
self.context['date'] = format_date(self.config.today_fmt or str(_('%b %d, %Y')),
183183
language=self.config.language)
184184

185185
if self.config.latex_logo:

sphinx/domains/std.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -242,9 +242,9 @@ def add_target_and_index(self, firstname: str, sig: str, signode: desc_signature
242242

243243
# create an index entry
244244
if currprogram:
245-
descr = _('%s command line option') % currprogram
245+
descr = str(_('%s command line option') % currprogram)
246246
else:
247-
descr = _('command line option')
247+
descr = str(_('command line option'))
248248
for option in signode.get('allnames', []):
249249
entry = '; '.join([descr, option])
250250
self.indexnode['entries'].append(('pair', entry, signode['ids'][0], '', None))

sphinx/locale/__init__.py

+18-11
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import locale
44
from gettext import NullTranslations, translation
5-
from os import path
5+
from os import getenv, path
66
from typing import Any, Callable, Dict, List, Optional, Tuple
77

88

@@ -111,9 +111,21 @@ def init(
111111
# the None entry is the system's default locale path
112112
has_translation = True
113113

114-
if language and '_' in language:
114+
if getenv('SOURCE_DATE_EPOCH') is not None:
115+
# Disable localization during reproducible source builds
116+
# See https://reproducible-builds.org/docs/source-date-epoch/
117+
#
118+
# Note: Providing an empty/none value to gettext.translation causes
119+
# it to consult various language-related environment variables to find
120+
# locale(s). We don't want that during a reproducible build; we want
121+
# to run through the same code path, but to return NullTranslations.
122+
#
123+
# To achieve that, specify the ISO-639-3 'undetermined' language code,
124+
# which should not match any translation catalogs.
125+
languages: Optional[List[str]] = ['und']
126+
elif language and '_' in language:
115127
# for language having country code (like "de_AT")
116-
languages: Optional[List[str]] = [language, language.split('_')[0]]
128+
languages = [language, language.split('_')[0]]
117129
elif language:
118130
languages = [language]
119131
else:
@@ -167,7 +179,7 @@ def is_translator_registered(catalog: str = 'sphinx', namespace: str = 'general'
167179
return (namespace, catalog) in translators
168180

169181

170-
def _lazy_translate(catalog: str, namespace: str, message: str) -> str:
182+
def _lazy_translate(catalog: str, namespace: str, message: str, *args: Any) -> str:
171183
"""Used instead of _ when creating TranslationProxy, because _ is
172184
not bound yet at that time.
173185
"""
@@ -200,13 +212,8 @@ def setup(app):
200212
201213
.. versionadded:: 1.8
202214
"""
203-
def gettext(message: str) -> str:
204-
if not is_translator_registered(catalog, namespace):
205-
# not initialized yet
206-
return _TranslationProxy(_lazy_translate, catalog, namespace, message) # type: ignore[return-value] # noqa: E501
207-
else:
208-
translator = get_translator(catalog, namespace)
209-
return translator.gettext(message)
215+
def gettext(message: str, *args: Any) -> str:
216+
return _TranslationProxy(_lazy_translate, catalog, namespace, message, *args) # type: ignore[return-value] # NOQA
210217

211218
return gettext
212219

sphinx/transforms/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def apply(self, **kwargs: Any) -> None:
106106
text = self.config[refname]
107107
if refname == 'today' and not text:
108108
# special handling: can also specify a strftime format
109-
text = format_date(self.config.today_fmt or _('%b %d, %Y'),
109+
text = format_date(self.config.today_fmt or str(_('%b %d, %Y')),
110110
language=self.config.language)
111111
ref.replace_self(nodes.Text(text))
112112

sphinx/writers/manpage.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def __init__(self, document: nodes.document, builder: Builder) -> None:
9393
if self.config.today:
9494
self._docinfo['date'] = self.config.today
9595
else:
96-
self._docinfo['date'] = format_date(self.config.today_fmt or _('%b %d, %Y'),
96+
self._docinfo['date'] = format_date(self.config.today_fmt or str(_('%b %d, %Y')),
9797
language=self.config.language)
9898
self._docinfo['copyright'] = self.config.copyright
9999
self._docinfo['version'] = self.config.version

sphinx/writers/texinfo.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ def init_settings(self) -> None:
220220
'project': self.escape(self.config.project),
221221
'copyright': self.escape(self.config.copyright),
222222
'date': self.escape(self.config.today or
223-
format_date(self.config.today_fmt or _('%b %d, %Y'),
223+
format_date(self.config.today_fmt or str(_('%b %d, %Y')),
224224
language=self.config.language)),
225225
})
226226
# title

sphinx/writers/text.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -791,8 +791,8 @@ def visit_acks(self, node: Element) -> None:
791791

792792
def visit_image(self, node: Element) -> None:
793793
if 'alt' in node.attributes:
794-
self.add_text(_('[image: %s]') % node['alt'])
795-
self.add_text(_('[image]'))
794+
self.add_text(str(_('[image: %s]') % node['alt']))
795+
self.add_text(str(_('[image]')))
796796
raise nodes.SkipNode
797797

798798
def visit_transition(self, node: Element) -> None:
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
msgid "Hello world"
2+
msgstr "Tere maailm"

tests/test_locale.py

+41
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,44 @@ def test_add_message_catalog(app, rootdir):
5555
assert _('Hello world') == 'HELLO WORLD'
5656
assert _('Hello sphinx') == 'Hello sphinx'
5757
assert _('Hello reST') == 'Hello reST'
58+
59+
60+
def _empty_language_translation(rootdir):
61+
locale_dirs, catalog = [rootdir / 'test-locale' / 'locale1'], 'myext'
62+
locale.translators.clear()
63+
locale.init(locale_dirs, language=None, catalog=catalog)
64+
return locale.get_translation(catalog)
65+
66+
67+
def test_init_environment_language(rootdir, monkeypatch):
68+
with monkeypatch.context() as m:
69+
m.setenv("LANGUAGE", "en_US:en")
70+
_ = _empty_language_translation(rootdir)
71+
assert _('Hello world') == 'HELLO WORLD'
72+
73+
with monkeypatch.context() as m:
74+
m.setenv("LANGUAGE", "et_EE:et")
75+
_ = _empty_language_translation(rootdir)
76+
assert _('Hello world') == 'Tere maailm'
77+
78+
79+
def test_init_reproducible_build_language(rootdir, monkeypatch):
80+
with monkeypatch.context() as m:
81+
m.setenv("SOURCE_DATE_EPOCH", "0")
82+
m.setenv("LANGUAGE", "en_US:en")
83+
_ = _empty_language_translation(rootdir)
84+
sde_en_translation = str(_('Hello world')) # str cast to evaluate lazy method
85+
86+
with monkeypatch.context() as m:
87+
m.setenv("SOURCE_DATE_EPOCH", "0")
88+
m.setenv("LANGUAGE", "et_EE:et")
89+
_ = _empty_language_translation(rootdir)
90+
sde_et_translation = str(_('Hello world')) # str cast to evaluate lazy method
91+
92+
with monkeypatch.context() as m:
93+
m.setenv("LANGUAGE", "et_EE:et")
94+
_ = _empty_language_translation(rootdir)
95+
loc_et_translation = str(_('Hello world')) # str cast to evaluate lazy method
96+
97+
assert sde_en_translation == sde_et_translation
98+
assert sde_et_translation != loc_et_translation

tests/test_util_inventory.py

+56
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import zlib
55
from io import BytesIO
66

7+
from sphinx.testing.util import SphinxTestApp
78
from sphinx.util.inventory import InventoryFile
89

910
inventory_v1 = b'''\
@@ -83,3 +84,58 @@ def test_read_inventory_v2_not_having_version():
8384
invdata = InventoryFile.load(f, '/util', posixpath.join)
8485
assert invdata['py:module']['module1'] == \
8586
('foo', '', '/util/foo.html#module-module1', 'Long Module desc')
87+
88+
89+
def _write_appconfig(dir, language, prefix=None):
90+
prefix = prefix or language
91+
(dir / prefix).makedirs()
92+
(dir / prefix / 'conf.py').write_text(f'language = "{language}"', encoding='utf8')
93+
(dir / prefix / 'index.rst').write_text('index.rst', encoding='utf8')
94+
assert sorted((dir / prefix).listdir()) == ['conf.py', 'index.rst']
95+
assert (dir / prefix / 'index.rst').exists()
96+
return (dir / prefix)
97+
98+
99+
def _build_inventory(srcdir):
100+
app = SphinxTestApp(srcdir=srcdir)
101+
app.build()
102+
app.cleanup()
103+
return (app.outdir / 'objects.inv')
104+
105+
106+
def test_inventory_localization(tempdir):
107+
# Build an app using Estonian (EE) locale
108+
srcdir_et = _write_appconfig(tempdir, "et")
109+
inventory_et = _build_inventory(srcdir_et)
110+
111+
# Build the same app using English (US) locale
112+
srcdir_en = _write_appconfig(tempdir, "en")
113+
inventory_en = _build_inventory(srcdir_en)
114+
115+
# Ensure that the inventory contents differ
116+
assert inventory_et.read_bytes() != inventory_en.read_bytes()
117+
118+
119+
def test_inventory_reproducible(tempdir, monkeypatch):
120+
with monkeypatch.context() as m:
121+
# Configure reproducible builds
122+
# See: https://reproducible-builds.org/docs/source-date-epoch/
123+
m.setenv("SOURCE_DATE_EPOCH", "0")
124+
125+
# Build an app using Estonian (EE) locale
126+
srcdir_et = _write_appconfig(tempdir, "et")
127+
reproducible_inventory_et = _build_inventory(srcdir_et)
128+
129+
# Build the same app using English (US) locale
130+
srcdir_en = _write_appconfig(tempdir, "en")
131+
reproducible_inventory_en = _build_inventory(srcdir_en)
132+
133+
# Also build the app using Estonian (EE) locale without build reproducibility enabled
134+
srcdir_et = _write_appconfig(tempdir, "et", prefix="localized")
135+
localized_inventory_et = _build_inventory(srcdir_et)
136+
137+
# Ensure that the reproducible inventory contents are identical
138+
assert reproducible_inventory_et.read_bytes() == reproducible_inventory_en.read_bytes()
139+
140+
# Ensure that inventory contents are different between a localized and non-localized build
141+
assert reproducible_inventory_et.read_bytes() != localized_inventory_et.read_bytes()

0 commit comments

Comments
 (0)