152 lines
4.8 KiB
Python
152 lines
4.8 KiB
Python
|
#!/usr/bin/env python3
|
||
|
from collections import defaultdict
|
||
|
import codecs
|
||
|
import argparse
|
||
|
import os
|
||
|
import polib
|
||
|
import struct
|
||
|
import sys
|
||
|
|
||
|
import lib.charset as cs
|
||
|
from lib.io import info, warn, fatal, load_map
|
||
|
|
||
|
FW_MAGIC = 0x4bb45aa5
|
||
|
|
||
|
|
||
|
def translation_ref(translation):
|
||
|
cmt = translation.comment
|
||
|
if cmt and cmt.startswith('MSG_'):
|
||
|
return cmt.split(' ', 1)[0]
|
||
|
else:
|
||
|
return repr(translation.msgid)
|
||
|
|
||
|
|
||
|
def main():
|
||
|
ap = argparse.ArgumentParser()
|
||
|
ap.add_argument('--warn-unused', action='store_true',
|
||
|
help='Warn about unused translations')
|
||
|
ap.add_argument('--show-coalesced', action='store_true',
|
||
|
help='List coalesced translations')
|
||
|
ap.add_argument('map', help='Firmware symbol map file')
|
||
|
ap.add_argument('po', help='PO file')
|
||
|
ap.add_argument('out', help='output')
|
||
|
args = ap.parse_args()
|
||
|
|
||
|
# check arguments
|
||
|
for path in [args.map, args.po]:
|
||
|
if not os.path.isfile(path):
|
||
|
fatal("{} does not exist or is not a regular file".format(args.po))
|
||
|
|
||
|
# load the map file
|
||
|
syms = load_map(args.map)
|
||
|
fw_sig_data = None
|
||
|
msgid_data = defaultdict(list)
|
||
|
id_msgid = {}
|
||
|
sym_cnt = 0
|
||
|
for sym in syms:
|
||
|
if sym['name'] == '_PRI_LANG_SIGNATURE':
|
||
|
fw_sig_data = sym['data']
|
||
|
else:
|
||
|
# redo forward text transformation for transparent matching
|
||
|
msgid = cs.source_to_unicode(codecs.decode(sym['data'], 'unicode_escape', 'strict'))
|
||
|
msgid_data[msgid].append(sym)
|
||
|
id_msgid[sym['id']] = msgid
|
||
|
|
||
|
# update the max symbol count
|
||
|
if sym_cnt <= sym['id']:
|
||
|
sym_cnt = sym['id'] + 1
|
||
|
|
||
|
if fw_sig_data is None:
|
||
|
fatal('_PRI_LANG_SIGNATURE not found in map')
|
||
|
|
||
|
# open translations
|
||
|
po = polib.pofile(args.po)
|
||
|
lang_code = po.metadata['Language']
|
||
|
if not lang_code.isascii() or len(lang_code) != 2:
|
||
|
fatal(f'unsupported language code {lang_code}')
|
||
|
|
||
|
# build a catalog of all translations
|
||
|
trans_table = {}
|
||
|
for translation in po:
|
||
|
msgid = translation.msgid
|
||
|
found = msgid in msgid_data
|
||
|
if found:
|
||
|
trans_table[msgid] = (translation, msgid_data[msgid])
|
||
|
elif args.warn_unused:
|
||
|
err = "{}:{}".format(args.po, translation.linenum)
|
||
|
err += ": unused translation "
|
||
|
err += translation_ref(translation)
|
||
|
warn(err)
|
||
|
|
||
|
for msgid, syms in msgid_data.items():
|
||
|
if msgid not in trans_table:
|
||
|
# warn about missing translations
|
||
|
warn("untranslated text: " + repr(msgid))
|
||
|
|
||
|
# write the binary catalog
|
||
|
with open(args.out, "w+b") as fd:
|
||
|
fixed_offset = 16+2*sym_cnt
|
||
|
written_locs = {}
|
||
|
|
||
|
# compute final data tables
|
||
|
offsets = b''
|
||
|
strings = b'\0'
|
||
|
for i in range(sym_cnt):
|
||
|
msgid = id_msgid.get(i)
|
||
|
translation = trans_table.get(msgid)
|
||
|
if translation is None or len(translation[0].msgstr) == 0 or translation[0].msgstr == msgid:
|
||
|
# first slot reserved for untraslated/identical entries
|
||
|
offsets += struct.pack("<H", fixed_offset)
|
||
|
else:
|
||
|
string_bin = cs.unicode_to_source(translation[0].msgstr)
|
||
|
|
||
|
# check for invalid characters
|
||
|
invalid_char = cs.translation_check(string_bin)
|
||
|
if invalid_char is not None:
|
||
|
line = translation[0].linenum
|
||
|
warn(f'{args.po}:{line} contains unhandled character ' + repr(invalid_char))
|
||
|
|
||
|
string_bin = string_bin.encode('raw_unicode_escape', 'ignore')
|
||
|
string_off = written_locs.get(string_bin)
|
||
|
offset = fixed_offset + len(strings)
|
||
|
if string_off is not None:
|
||
|
# coalesce repeated strings
|
||
|
if args.show_coalesced:
|
||
|
info(f'coalescing {offset:04x}:{string_off:04x} {string_bin}')
|
||
|
offset = string_off
|
||
|
else:
|
||
|
# allocate a new string
|
||
|
written_locs[string_bin] = offset
|
||
|
strings += string_bin + b'\0'
|
||
|
offsets += struct.pack("<H", offset)
|
||
|
|
||
|
# header
|
||
|
size = 16 + len(offsets) + len(strings)
|
||
|
header = struct.pack(
|
||
|
"<IHHHHI",
|
||
|
FW_MAGIC,
|
||
|
size,
|
||
|
sym_cnt,
|
||
|
0, # no checksum yet
|
||
|
(ord(lang_code[0]) << 8) + ord(lang_code[1]),
|
||
|
fw_sig_data)
|
||
|
|
||
|
fd.write(header)
|
||
|
fd.write(offsets)
|
||
|
fd.write(strings)
|
||
|
|
||
|
# calculate and update the checksum
|
||
|
cksum = 0
|
||
|
fd.seek(0)
|
||
|
for i in range(size):
|
||
|
cksum += (ord(fd.read(1)) << (0 if i % 2 else 8))
|
||
|
cksum &= 0xffff
|
||
|
fd.seek(8)
|
||
|
fd.write(struct.pack("<H", cksum))
|
||
|
|
||
|
return 0
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
exit(main())
|