Prusa-Firmware/lang/lang-extract.py

#!/usr/bin/env python3
import argparse
import bisect
import codecs
import polib
import regex
import sys
import lib.charset as cs

def line_warning(path, line, msg):
    print(f'{path}:{line}: {msg}', file=sys.stderr)

def line_error(path, line, msg):
    print(f'{path}:{line}: {msg}', file=sys.stderr)

def entry_warning_locs(entries):
    for msgid, data in entries:
        print('   text: ' + repr(msgid), file=sys.stderr)
        positions = ', '.join(map(lambda x: x[0] + ':' + str(x[1]), data['occurrences']))
        print('     in: ' + positions, file=sys.stderr)

def entries_warning(entries, msg):
    print('warning: ' + msg, file=sys.stderr)
    entry_warning_locs(entries)

def entry_warning(entry, msg):
    entries_warning([entry], msg)


def newline_positions(source):
    lines = [-1]
    while True:
        idx = source.find('\n', lines[-1] + 1)
        if idx < 0:
            break
        lines.append(idx)
    if lines[-1] != len(source) - 1:
        lines.append(len(source) - 1)
    return lines[1:]

def index_to_line(index, lines):
    return bisect.bisect_left(lines, index) + 1


def extract_file(path, catalog, warn_skipped=False):
    source = open(path).read()
    newlines = newline_positions(source)

    # match internationalized quoted strings
    RE_START = r'\b (_[iI]|ISTR) \s* \('
    RE_META = r'//// \s* ([^\n]*)$'

    RE_I = fr'''
        (?<!(?:/[/*]|^\s*\#) [^\n]*)  # not on a comment or preprocessor
        {RE_START}                    # $1 ref type _i( or ISTR(
        (?:
          \s*
          ("(?:[^"\\]|\\.)*")         # $2 quoted string (chunk)
          (?:\s* {RE_META} )?         # $3 inline metadata
        )+
        \s* \)                        # )
        (?:
          (?:[^\n] (?!{RE_START}))*   # anything except another entry
          {RE_META}                   # $5 final metadata
        )?
    '''

    r_ref_type = 1
    r_quoted_chunk = 2
    r_inline_data = 3
    r_eol_data = 5

    for m in regex.finditer(RE_I, source, regex.M|regex.X):
        # parse the text
        line = index_to_line(m.start(0), newlines)

        text = ""
        for block in m.captures(r_quoted_chunk):
            # remove quotes and unescape
            block = block[1:-1]
            block = codecs.decode(block, 'unicode-escape', 'strict')
            block = cs.source_to_unicode(block)
            text += block

        # check if text is non-empty
        if len(text) == 0:
            line_warning(path, line, 'empty source text, ignored')
            continue

        data = set()
        comments = set()
        for n in [r_inline_data, r_eol_data]:
            meta = m.group(n)
            if meta is not None:
                meta_parts = meta.split('//', 1)
                data.add(meta_parts[0].strip())
                if len(meta_parts) > 1:
                    comments.add(meta_parts[1].strip())

        # check if this message should be ignored
        ignored = False
        for meta in data:
            if regex.search(r'\bIGNORE\b', meta) is not None:
                ignored = True
                break
        if ignored:
            if warn_skipped:
                line_warning(path, line, 'skipping explicitly ignored translation')
            continue

        # extra message catalog name (if any)
        cat_name = set()
        for meta in data:
            sm = regex.search(r'\b(MSG_\w+)', meta)
            if sm is not None:
                cat_name.add(sm.group(1))

        # reference type annotation
        ref_type = 'def' if m.group(r_ref_type) == 'ISTR' else 'ref'
        if ref_type == 'def':
            # ISTR definition: lookup nearby assignment
            lineup_def = source[newlines[line-2]+1:m.end(r_ref_type)]
            sm = regex.search(r'\b PROGMEM_(\S+) \s*=\s* ISTR $', lineup_def, regex.M|regex.X)
            if sm is None:
                line_warning(path, line, 'ISTR not used in an assignment')
            elif sm.group(1) != 'I1':
                line_warning(path, line, 'ISTR not used with PROGMEM_I1')

        # append the translation to the catalog
        pos = [(path, line)]
        entry = catalog.get(text)
        if entry is None:
            catalog[text] = {'occurrences': set(pos),
                             'data': data,
                             'cat_name': cat_name,
                             'comments': comments,
                             'ref_type': set([ref_type])}
        else:
            entry['occurrences'] = entry['occurrences'].union(pos)
            entry['data'] = entry['data'].union(data)
            entry['cat_name'] = entry['cat_name'].union(cat_name)
            entry['comments'] = entry['comments'].union(comments)
            entry['ref_type'].add(ref_type)


def extract_refs(path, catalog):
    source = open(path).read()
    newlines = newline_positions(source)

    # match message catalog references to add backrefs
    RE_CAT = r'''
        (?<!(?:/[/*]|^\s*\#) [^\n]*)         # not on a comment or preprocessor
        \b (?:_[TO]) \s* \( \s* (\w+) \s* \) # $1 catalog name
    '''

    for m in regex.finditer(RE_CAT, source, regex.M|regex.X):
        line = index_to_line(m.start(0), newlines)
        pos = [(path, line)]
        cat_name = m.group(1)
        found = False
        defined = False
        for entry in catalog.values():
            if cat_name in entry['cat_name']:
                entry['occurrences'] = entry['occurrences'].union(pos)
                entry['ref_type'].add('ref')
                found = True
                if 'def' in entry['ref_type']:
                    defined = True
        if not found:
            line_error(path, line, f'{cat_name} not found')
        elif not defined:
            line_error(path, line, f'{cat_name} referenced but never defined')


def check_entries(catalog, warn_missing, warn_same_line):
    cat_entries = {}

    for entry in catalog.items():
        msgid, data = entry

        # ensure we have at least one name
        if len(data['cat_name']) == 0 and warn_missing:
            entry_warning(entry, 'missing MSG identifier')

        # ensure references are being defined
        if data['ref_type'] == set(['def']):
            if len(data['cat_name']) == 0:
                if warn_missing:
                    entry_warning(entry, 'entry defined, but never used')
            else:
                id_name = next(iter(data['cat_name']))
                entry_warning(entry, f'{id_name} defined, but never used')

        # check custom characters
        invalid_char = cs.source_check(msgid)
        if invalid_char is not None:
            entry_warning(entry, 'source contains unhandled custom character ' + repr(invalid_char))

        tokens = []
        for meta in data['data']:
            tokens.extend(regex.split(r'\s+', meta))
        seen_keys = set()
        for token in tokens:
            if len(token) == 0:
                continue

            # check metadata syntax
            if regex.match(r'[cr]=\d+$', token) is None and \
               regex.match(r'MSG_[A-Z_0-9]+$', token) is None:
                entry_warning(entry, 'bogus annotation: ' + repr(token))

            # check for repeated keys
            key = regex.match(r'([^=])+=', token)
            if key is not None:
                key_name = key.group(1)
                if key_name in seen_keys:
                    entry_warning(entry, 'repeated annotation: ' + repr(token))
                else:
                    seen_keys.add(key_name)

            # build the inverse catalog map
            if token.startswith('MSG_'):
                if token not in cat_entries:
                    cat_entries[token] = [entry]
                else:
                    cat_entries[token].append(entry)

    # ensure the same id is not used in multiple entries
    for cat_name, entries in cat_entries.items():
        if len(entries) > 1:
            entries_warning(entries, f'{cat_name} used in multiple translations')

    if warn_same_line:
        # build the inverse location map
        entry_locs = {}
        for entry in catalog.items():
            msgid, data = entry
            for loc in data['occurrences']:
                if loc not in entry_locs:
                    entry_locs[loc] = [loc]
                else:
                    entry_locs[loc].append(loc)

        # check for multiple translations on the same location
        for loc, entries in entry_locs.items():
            if len(entries) > 1:
                line_warning(loc[0], loc[1], f'line contains multiple translations')


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument('-o', dest='pot', required=True, help='PO template output file')
    ap.add_argument('--no-missing', action='store_true',
                    help='Do not warn about missing MSG entries')
    ap.add_argument('--warn-same-line', action='store_true',
                    help='Warn about multiple translations on the same line')
    ap.add_argument('--warn-skipped', action='store_true',
                    help='Warn about explicitly ignored translations')
    ap.add_argument('-s', '--sort', action='store_true',
                    help='Sort output catalog')
    ap.add_argument('file', nargs='+', help='Input files')
    args = ap.parse_args()

    # extract strings
    catalog = {}
    for path in args.file:
        extract_file(path, catalog, warn_skipped=args.warn_skipped)

    # process backreferences in a 2nd pass
    for path in args.file:
        extract_refs(path, catalog)

    # check the catalog entries
    check_entries(catalog, warn_missing=not args.no_missing, warn_same_line=args.warn_same_line)

    # write the output PO template
    po = polib.POFile()
    po.metadata = {
        'Language': 'en',
        'MIME-Version': '1.0',
        'Content-Type': 'text/plain; charset=utf-8',
        'Content-Transfer-Encoding': '8bit'}

    messages = catalog.keys()
    if args.sort:
        messages = sorted(messages)
    for msgid in messages:
        data = catalog[msgid]
        comment = ', '.join(data['data'])
        if len(data['comments']):
            comment += '\n' + '\n'.join(data['comments'])
        occurrences = data['occurrences']
        if args.sort:
            occurrences = list(sorted(occurrences))
        po.append(
            polib.POEntry(
                msgid=msgid,
                comment=comment,
                occurrences=occurrences))

    po.save(args.pot)
    return 0

if __name__ == '__main__':
    exit(main())
New PO-based language translation support (#3471) * lang: Add a PO language extractor with FW metadata support Implement a straight-to-po language extractor which supports our custom language requirements: - _i/_I/ISTR for text string definitions - _T for catalog translations (with back-reference support) - //// EOL comments with: - MSG_ catalog entry name identifiers - c=X r=Y annotations for screen dimensioning checks - Crude support for commented lines All source locations are correctly referenced in the PO, with the metadata colleted in the comment for further processing. Several checks are implemented already during extraction: - Correct catalog name assignment (no duplicates) - Metadata checks for each entry Further checks will be implemented by directly checking the translated PO file. Requires "polib" and "regex" python modules. * lang: Adapt lang-check to work directly on PO/POT files * lang: Allow lang-extract to generate stable (pre-sorted) output directly * lang: Further extend lang-extract consistency/error checking - Do not parse inside preprocessor conditionals - Distinguish between references and definitions - Warn about missing references and definitions * lang: lang-extract: warn about incorrect PROGMEM assignments Check that ISTR is used along with PROGMEM_I1 in an attempt to spot useless translated catalogs. * lang: lang-extract: Improved handling of same-line translations Correctly reference metadata on same-line translations. * lang: lang-extract: Handle _O as a cat-ref https://github.com/prusa3d/Prusa-Firmware/pull/3434 * lang: lang-extract: Warn about unused catalog definitions * lang: lang-extract: Allow propagating translation comments via // The definition: code //// definition [// comment] will check [definition] as before, but blindly accumulate // comment. The comment is then re-appended back into the PO files for translators with the form: definition comment comment... * lang: Fix incorrect display definitions * lang: lang-extract: Check source encoding/charmap * lang: Translate the degree symbol * lang: Unbreak/cleanup DEBUG_SEC_LANG * lang: Improve meaning of comment * lang: Split charset conversions into an aux lib for future use * lang: Implement lang-map.py to extract the translation symbol map - Extracts the translatable symbol map for further use - Computes a stable "language signature" from the map itself - Optionally patches the binary update the symbols * lang: Check for translation recoding problems * lang: Implement a transliteration map to post-process translations TRANS_CHARS is now used to replace unavailable symbols to the source encoding, only while producing the language catalog. * lang: Handle/check character replacements in lang-check Filter the translation through TRANS_CHARS, so that the preview and length check are performed correctly for expanding replacements such as 'ß' to 'ss'. * lang: Implement lang-build.py to generate the final language catalog * Cleanup .gitignore * lang: Drop txt language files * lang: Remove outdated translation scripts and obsolete docs * lang: Update build scripts for new infrastructure * lang: [no] Integrate accents from po/new/no.po We now support accents natively * lang: Remove redundant directory po/new/ * lang: Fix encoding of LCD characters in PO files * lang: [hr] Fix wrapping in MSG_CRASH_DET_ONLY_IN_NORMAL * lang: Sort and reformat PO files for further massaging * lang: Switch to developer (dot) comments for PO metadata * lang: Allow the IGNORE annotation to skip extraction * lang: Fix missing/broken language metadata in sources * lang: Add update-pot.sh and regenerate po/Firmware.pot * lang: Add update-po.sh and refresh all PO files * lang: Add summary documentation about the new translation workflow * Add more ignored files * CI: Add new required dependencies to travis * lang: lang-build: Improve warning message "referenced" was really meaning that data is being duplicated. * lang: Respect the language order as defined in config.sh This correctly splits normal and community-made entries during language selection. * lang: More typos in the documentation * lang: Check for the maximum size of each language Each table needs to fit within LANG_SIZE_RESERVED * lang: Properly align _SEC_LANG to page boundaries ... instead of relying on _SEC_LANG_TABLE to calculate the offset * lang: Build support for dual-language hex files Detect the printer type by checking the current variant type. On printers with no xflash (MK2), generate one hex file for each additional language file by patching the built-in secondary language table during the build process lang: Mention lang-patchsec.py * lang: Use color() instead of tput for clarity * lang: Allow disabling terminal colors with NO_COLOR/TERM=dumb * lang: Consistent use of redirection in config.sh * lang: Stricter variant-type check for xflash support * lang: Output size stats when building double-language hex files * lang: Respect NO_COLOR in lang-check.py * lang: Check for repeated/incorrect annotations Catch errors such as "c=1 c=2" * lang: Correct MSG_SLIGHT_SKEW/MSG_SEVERE_SKEW annotations * lang: [it] Improve MSG__SKEW translation lang: Use INTLHEX instead of OUTHEX_P/S for configuration We already have OUTHEX which is the compiled firmware. Use INTLHEX for the final internationalized firmware, which is less confusing. Also, assume it being a prefix for all generated hex files, which reduces the number of variables set. * lang: Move lang_map to lib.io for further use * lang: lang-check: Accept a firmware map file to suppress unused string warnings * lang: Use the map file to reduce useless warnings during fw-build * lang: lang-check: Also suppress unused empty annotations * lang: Fix MSG_MOVE_CARRIAGE_TO_THE_TOP_Z annotation Refresh pot file * lang: lang-check: Do not warn about same-word translations by default Do not warn when one-word translations such as "No" result in "No" also in other languages, since this is common in latin languages. Allow to re-enable the warning with --warn-same * lang: lang-build: Handle same-source/translation efficiently * lang: [it] Explicitly add On/Off/Reset/Wizard to suppress warnings Instead of displaying a warning, supress the warning and explicitly translate each entry using english (which is the common/acceptable word in these cases). * lang: [it] Suppress more warnings * lang: lang-check: Add intermediate "suggest" warning category Warnings in the "suggest" category as shown as [S] as based on pure speculation from the checking tool, such as the translation being significantly shorter than the original. As a result, they can be suppressed with --no-suggest * lang: Return translation status from lang-check - 0 if the translation only contains suggestions - 1 if the translation contains warnings or errors Check for the exit status in fw-build.sh, but do nothing at the moment except printing a non-fatal error. * lang: Remove "trim_trailing_whitespace=false" for po files PO files got cleaned up/rewritten. We can now ensure they stay consistent. * lang: [sv] Re-integrate changes from 70c73cb * lang: [no] Reintegrate changes from @pkg2000 2022-06-16 13:03:30 +00:00			`#!/usr/bin/env python3`
			`import argparse`
			`import bisect`
			`import codecs`
			`import polib`
			`import regex`
			`import sys`
			`import lib.charset as cs`

			`def line_warning(path, line, msg):`
			`print(f'{path}:{line}: {msg}', file=sys.stderr)`

			`def line_error(path, line, msg):`
			`print(f'{path}:{line}: {msg}', file=sys.stderr)`

			`def entry_warning_locs(entries):`
			`for msgid, data in entries:`
			`print(' text: ' + repr(msgid), file=sys.stderr)`
			`positions = ', '.join(map(lambda x: x[0] + ':' + str(x[1]), data['occurrences']))`
			`print(' in: ' + positions, file=sys.stderr)`

			`def entries_warning(entries, msg):`
			`print('warning: ' + msg, file=sys.stderr)`
			`entry_warning_locs(entries)`

			`def entry_warning(entry, msg):`
			`entries_warning([entry], msg)`


			`def newline_positions(source):`
			`lines = [-1]`
			`while True:`
			`idx = source.find('\n', lines[-1] + 1)`
			`if idx < 0:`
			`break`
			`lines.append(idx)`
			`if lines[-1] != len(source) - 1:`
			`lines.append(len(source) - 1)`
			`return lines[1:]`

			`def index_to_line(index, lines):`
			`return bisect.bisect_left(lines, index) + 1`


			`def extract_file(path, catalog, warn_skipped=False):`
			`source = open(path).read()`
			`newlines = newline_positions(source)`

			`# match internationalized quoted strings`
			`RE_START = r'\b (_[iI]\|ISTR) \s* \('`
			`RE_META = r'//// \s* ([^\n]*)$'`

			`RE_I = fr'''`
			`(?<!(?:/[/]\|^\s\#) [^\n]*) # not on a comment or preprocessor`
			`{RE_START} # $1 ref type _i( or ISTR(`
			`(?:`
			`\s*`
			`("(?:[^"\\]\|\\.)*") # $2 quoted string (chunk)`
			`(?:\s* {RE_META} )? # $3 inline metadata`
			`)+`
			`\s* \) # )`
			`(?:`
			`(?:[^\n] (?!{RE_START}))* # anything except another entry`
			`{RE_META} # $5 final metadata`
			`)?`
			`'''`

			`r_ref_type = 1`
			`r_quoted_chunk = 2`
			`r_inline_data = 3`
			`r_eol_data = 5`

			`for m in regex.finditer(RE_I, source, regex.M\|regex.X):`
			`# parse the text`
			`line = index_to_line(m.start(0), newlines)`

			`text = ""`
			`for block in m.captures(r_quoted_chunk):`
			`# remove quotes and unescape`
			`block = block[1:-1]`
			`block = codecs.decode(block, 'unicode-escape', 'strict')`
			`block = cs.source_to_unicode(block)`
			`text += block`

			`# check if text is non-empty`
			`if len(text) == 0:`
			`line_warning(path, line, 'empty source text, ignored')`
			`continue`

			`data = set()`
			`comments = set()`
			`for n in [r_inline_data, r_eol_data]:`
			`meta = m.group(n)`
			`if meta is not None:`
			`meta_parts = meta.split('//', 1)`
			`data.add(meta_parts[0].strip())`
			`if len(meta_parts) > 1:`
			`comments.add(meta_parts[1].strip())`

			`# check if this message should be ignored`
			`ignored = False`
			`for meta in data:`
			`if regex.search(r'\bIGNORE\b', meta) is not None:`
			`ignored = True`
			`break`
			`if ignored:`
			`if warn_skipped:`
			`line_warning(path, line, 'skipping explicitly ignored translation')`
			`continue`

			`# extra message catalog name (if any)`
			`cat_name = set()`
			`for meta in data:`
			`sm = regex.search(r'\b(MSG_\w+)', meta)`
			`if sm is not None:`
			`cat_name.add(sm.group(1))`

			`# reference type annotation`
			`ref_type = 'def' if m.group(r_ref_type) == 'ISTR' else 'ref'`
			`if ref_type == 'def':`
			`# ISTR definition: lookup nearby assignment`
			`lineup_def = source[newlines[line-2]+1:m.end(r_ref_type)]`
			`sm = regex.search(r'\b PROGMEM_(\S+) \s=\s ISTR $', lineup_def, regex.M\|regex.X)`
			`if sm is None:`
			`line_warning(path, line, 'ISTR not used in an assignment')`
			`elif sm.group(1) != 'I1':`
			`line_warning(path, line, 'ISTR not used with PROGMEM_I1')`

			`# append the translation to the catalog`
			`pos = [(path, line)]`
			`entry = catalog.get(text)`
			`if entry is None:`
			`catalog[text] = {'occurrences': set(pos),`
			`'data': data,`
			`'cat_name': cat_name,`
			`'comments': comments,`
			`'ref_type': set([ref_type])}`
			`else:`
			`entry['occurrences'] = entry['occurrences'].union(pos)`
			`entry['data'] = entry['data'].union(data)`
			`entry['cat_name'] = entry['cat_name'].union(cat_name)`
			`entry['comments'] = entry['comments'].union(comments)`
			`entry['ref_type'].add(ref_type)`


			`def extract_refs(path, catalog):`
			`source = open(path).read()`
			`newlines = newline_positions(source)`

			`# match message catalog references to add backrefs`
			`RE_CAT = r'''`
			`(?<!(?:/[/]\|^\s\#) [^\n]*) # not on a comment or preprocessor`
			`\b (?:_[TO]) \s* \( \s* (\w+) \s* \) # $1 catalog name`
			`'''`

			`for m in regex.finditer(RE_CAT, source, regex.M\|regex.X):`
			`line = index_to_line(m.start(0), newlines)`
			`pos = [(path, line)]`
			`cat_name = m.group(1)`
			`found = False`
			`defined = False`
			`for entry in catalog.values():`
			`if cat_name in entry['cat_name']:`
			`entry['occurrences'] = entry['occurrences'].union(pos)`
			`entry['ref_type'].add('ref')`
			`found = True`
			`if 'def' in entry['ref_type']:`
			`defined = True`
			`if not found:`
			`line_error(path, line, f'{cat_name} not found')`
			`elif not defined:`
			`line_error(path, line, f'{cat_name} referenced but never defined')`


			`def check_entries(catalog, warn_missing, warn_same_line):`
			`cat_entries = {}`

			`for entry in catalog.items():`
			`msgid, data = entry`

			`# ensure we have at least one name`
			`if len(data['cat_name']) == 0 and warn_missing:`
			`entry_warning(entry, 'missing MSG identifier')`

			`# ensure references are being defined`
			`if data['ref_type'] == set(['def']):`
			`if len(data['cat_name']) == 0:`
			`if warn_missing:`
			`entry_warning(entry, 'entry defined, but never used')`
			`else:`
			`id_name = next(iter(data['cat_name']))`
			`entry_warning(entry, f'{id_name} defined, but never used')`

			`# check custom characters`
			`invalid_char = cs.source_check(msgid)`
			`if invalid_char is not None:`
			`entry_warning(entry, 'source contains unhandled custom character ' + repr(invalid_char))`

			`tokens = []`
			`for meta in data['data']:`
			`tokens.extend(regex.split(r'\s+', meta))`
			`seen_keys = set()`
			`for token in tokens:`
			`if len(token) == 0:`
			`continue`

			`# check metadata syntax`
			`if regex.match(r'[cr]=\d+$', token) is None and \`
			`regex.match(r'MSG_[A-Z_0-9]+$', token) is None:`
			`entry_warning(entry, 'bogus annotation: ' + repr(token))`

			`# check for repeated keys`
			`key = regex.match(r'([^=])+=', token)`
			`if key is not None:`
			`key_name = key.group(1)`
			`if key_name in seen_keys:`
			`entry_warning(entry, 'repeated annotation: ' + repr(token))`
			`else:`
			`seen_keys.add(key_name)`

			`# build the inverse catalog map`
			`if token.startswith('MSG_'):`
			`if token not in cat_entries:`
			`cat_entries[token] = [entry]`
			`else:`
			`cat_entries[token].append(entry)`

			`# ensure the same id is not used in multiple entries`
			`for cat_name, entries in cat_entries.items():`
			`if len(entries) > 1:`
			`entries_warning(entries, f'{cat_name} used in multiple translations')`

			`if warn_same_line:`
			`# build the inverse location map`
			`entry_locs = {}`
			`for entry in catalog.items():`
			`msgid, data = entry`
			`for loc in data['occurrences']:`
			`if loc not in entry_locs:`
			`entry_locs[loc] = [loc]`
			`else:`
			`entry_locs[loc].append(loc)`

			`# check for multiple translations on the same location`
			`for loc, entries in entry_locs.items():`
			`if len(entries) > 1:`
			`line_warning(loc[0], loc[1], f'line contains multiple translations')`


			`def main():`
			`ap = argparse.ArgumentParser()`
			`ap.add_argument('-o', dest='pot', required=True, help='PO template output file')`
			`ap.add_argument('--no-missing', action='store_true',`
			`help='Do not warn about missing MSG entries')`
			`ap.add_argument('--warn-same-line', action='store_true',`
			`help='Warn about multiple translations on the same line')`
			`ap.add_argument('--warn-skipped', action='store_true',`
			`help='Warn about explicitly ignored translations')`
			`ap.add_argument('-s', '--sort', action='store_true',`
			`help='Sort output catalog')`
			`ap.add_argument('file', nargs='+', help='Input files')`
			`args = ap.parse_args()`

			`# extract strings`
			`catalog = {}`
			`for path in args.file:`
			`extract_file(path, catalog, warn_skipped=args.warn_skipped)`

			`# process backreferences in a 2nd pass`
			`for path in args.file:`
			`extract_refs(path, catalog)`

			`# check the catalog entries`
			`check_entries(catalog, warn_missing=not args.no_missing, warn_same_line=args.warn_same_line)`

			`# write the output PO template`
			`po = polib.POFile()`
			`po.metadata = {`
			`'Language': 'en',`
			`'MIME-Version': '1.0',`
			`'Content-Type': 'text/plain; charset=utf-8',`
			`'Content-Transfer-Encoding': '8bit'}`

			`messages = catalog.keys()`
			`if args.sort:`
			`messages = sorted(messages)`
			`for msgid in messages:`
			`data = catalog[msgid]`
			`comment = ', '.join(data['data'])`
			`if len(data['comments']):`
			`comment += '\n' + '\n'.join(data['comments'])`
			`occurrences = data['occurrences']`
			`if args.sort:`
			`occurrences = list(sorted(occurrences))`
			`po.append(`
			`polib.POEntry(`
			`msgid=msgid,`
			`comment=comment,`
			`occurrences=occurrences))`

			`po.save(args.pot)`
			`return 0`

			`if __name__ == '__main__':`
			`exit(main())`