#! /usr/bin/python3

"""
This program extracts translatable contents from xlm files under
the subdirectory "doc/", and builds a PO template file with them.

Currently translatable contents are provided by tags
'title', 'ulink', 'term' and 'para'.

Each entry is prepended a comment with a mean to access the translatable
text chunk, with xml.etree.ElementTree

How to use this script:
$ devtools/blend-doc-makepot doc/en > path_to_blends.pot
"""

import xml.etree.ElementTree as ET
import sys, re, os, hashlib, collections
sys.path.append(os.path.dirname(__file__))

from utils import normalizedText

def content(tag):
    return (tag.text or "") + ''.join(ET.tostring(e, encoding='unicode') for e in tag)

class PotEntry():
    """
    Class to implement POT entries.

    Parameters of the constructor:
    ------------------------------

    - comment (str) a comment a comment to acces the text in a source file
    - text (str) the original text to process
    """

    entries = collections.OrderedDict()

    def __init__(self, comment, text):
        self.comment = comment
        self.text = "\n".join([
            f'"{l}"' for l in normalizedText(text, escape=True)])
        self.hash = hashlib.sha256(self.text.encode("UTF-8")).hexdigest()
        if self.hash in self.entries:
            self.entries[self.hash].append(self)
        else:
            self.entries[self.hash] = [self]
        return

    def __str__(self):
        return """\
msgid ""
{self.text}
msgstr ""

"""

    @staticmethod
    def header():
        """
        Returns a template for the begin of the POT file
        """
        return """\
# LANG translations for Debian Blends documentation.
# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
# This file is distributed under the same license as the blends package.
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: blender-doc\\n"
"Report-Msgid-Bugs-To: blender@packages.debian.org\\n"
"POT-Creation-Date: 2024-11-20 10:11+0200\\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
"Language-Team: LANGUAGE <LL@li.org>\\n"
"Language: \\n"
"MIME-Version: 1.0\\n"
"Content-Type: text/plain; charset=UTF-8\\n"
"Content-Transfer-Encoding: 8bit\\n"

"""

    @staticmethod
    def entries_str():
        result = ""
        for h, list_p in PotEntry.entries.items():
            comments = "\n".join([f"#. {p.comment}" for p in list_p])
            text = PotEntry.entries[h][0].text
            result += f"""
{comments}
msgid ""
{text}
msgstr ""

"""
        return result

    @staticmethod
    def potfile():
        return f"""
{PotEntry.header()}
{PotEntry.entries_str()}
"""        
            
def pot_entries(filename):
    """
    collects POT entries from a file
    
    Parameter:
    ----------
    
    - filename (str) path to an xml file
    
    Returns:
    --------
    None

    Side-effects:
    -------------
    
    PotEntry.entries will bear all information collected from the file
    """
    tree = ET.parse(filename)
    root = tree.getroot()
    for tag in ("title", "ulink", "term", "para"):
        xpath = ".//" + tag
        elements = root.findall(xpath)
        for i, t in enumerate(elements):
            if t.text and t.text.strip():
                comment = f'{filename} : root.findall("{xpath}")[{i}].text'
                _ = PotEntry(comment, t.text)
            dejavu = {}
            for e in t:
                if e.tag in dejavu:
                    dejavu[e.tag] += 1
                else:
                    dejavu[e.tag] = 1
                xpath2 = f"./{e.tag}[{dejavu[e.tag]}]"
                if e.tag == "emphasis":
                    if e.text and e.text.strip():
                        # it is an emphasis inside some translatable text
                        # so we must output its text into the POT file
                        comment = f'''\
{filename} : root.findall("{xpath}")[{i}].findall("{xpath2}")[0].text'''
                        _ = PotEntry(comment, e.text)
                        dejavu_em = {}
                        for e_em in e:
                            # we are under one layer of emphasis tag now!
                            if e_em in dejavu_em:
                                dejavu_em[e_em.tag] += 1
                            else:
                                dejavu_em[e_em.tag] = 1
                            xpath2_em = f"./{e_em.tag}[{dejavu_em[e_em.tag]}]"
                            if e_em.tail and e_em.tail.strip():
                                comment = f'''\
{filename} : root.findall("{xpath}")[{i}].findall("{xpath2}")[0].findall("{xpath2_em}")[0].tail'''
                                
                                _ = PotEntry(comment, e_em.tail)
                if e.tail and e.tail.strip():
                    comment = f'''\
{filename} : root.findall("{xpath}")[{i}].findall("{xpath2}")[0].tail'''
                    _ = PotEntry(comment, e.tail)
    return


if __name__ == "__main__":
    # write a POT file to the standard output
    path = "."
    if len(sys.argv) > 1 :
        path = sys.argv[1]
    for root, dirs, files in os.walk(path):
        for f in files:
            if f.endswith(".xml"):
                filename = os.path.join(root, f)
                pot_entries(filename)
    print(PotEntry.potfile())
