refactor: rewrite some parts of fluentscanner.py

code is now much better organized and maintainable also made the 2nd parameter (en,ru,es,etc) not required when the command "unused" or "missing" is being used
2023-06-11 16:06:28 -04:00 · 2023-06-11 16:06:28 -04:00 · 956d4f85d2
commit 956d4f85d2
parent 4db774eb6d
1 changed files with 112 additions and 69 deletions
--- a/fluentscan.py
+++ b/fluentscan.py
@ -7,21 +7,23 @@ import os
 import sys
 import glob
 import re
+import pydoc

 valid_commands = ["diff", "unused", "missing"]
+if len(sys.argv) >= 2 and sys.argv[1] != "diff":
+    path = "assets/locales/en/"

-if len(sys.argv) < 3:
+elif len(sys.argv) < 3:
    print(f"Command format: ./fluentscan.py [command] [locale]\nAvailable commands: {valid_commands}")
-
    sys.exit()

-if sys.argv[1] not in valid_commands:
-    print(f"Invalid command \"{sys.argv[1]}\". Available commands: {valid_commands}")
+else: path = "assets/locales/" + sys.argv[2] + "/"

+command = sys.argv[1]
+if command not in valid_commands:
+    print(f"Invalid command \"{command}\". Available commands: {valid_commands}")
    sys.exit()

-path = "assets/locales/" + sys.argv[2] + "/"
-
 try:
    open(path + "/main.ftl", "r").close()

@ -30,8 +32,6 @@ except:

    sys.exit()

-all_entries = {}
-
 def dict_compare(d1, d2):
    d1_keys = set(d1.keys())
    d2_keys = set(d2.keys())
@ -61,92 +61,135 @@ def to_dict(text):

    return result

-def get_line_num(text,pattern):
+def get_line_num(file,pattern):
    line = 1

+    file.seek(0)
+    text = file.read()
+
    for i in text.split("\n"):
+        if sys.argv[1] == "diff":
+            if i == pattern:
+                return line
+
        if pattern in i:
            return line

        line += 1

+# format = {variable : file name}
+used_entries = {}
+unused_entries = {}
+missing_entries = {}
+# format = {variable : [file name, line number]}
+script_entries = {}
+all_entries = {}
+
+# format = {variable : definition}
+every_variable = {}
+
+files = glob.glob("src/**/*.rs", recursive=True)
+
+output=""
+
 for filename in os.listdir("assets/locales/en"):
    with open(os.path.join("assets/locales/en", filename), 'r') as locale_file:
        created_locale = open(path + filename)

-        expected = to_dict(locale_file)
-        expected2 = to_dict(created_locale)
+        en_variables = to_dict(locale_file)
+        every_variable.update(en_variables)

-        all_entries.update(expected)
+        for i in en_variables:
+            all_entries[i] = [filename, get_line_num(locale_file, i)]
+        # resets the file read
+        locale_file.seek(0)

-        added, removed, same = dict_compare(expected, expected2)
+        locale_variables = to_dict(created_locale)

-        if sys.argv[1] == "unused" or sys.argv[1] == "missing":
-            files = glob.glob("src/**/*.rs", recursive=True)
-
-            used = []
-            vars = {}
-
-            for i in files:
-                with open(i, "r") as script:
-                    text = script.read()
-
-                    if sys.argv[1] == "unused":
-                        for j in expected:
-                            if f"\"{j}\"" in text:
-                                used.append(j)
-
-                    elif sys.argv[1] == "missing":
-                        for j in text.split():
-                            # TODO: ignore comments
-                            if 'tr("' in j:
-                                index = j.find('tr("')
-
-                                var_name = re.sub('[^\\w-]+', '', j[index:].replace('tr("', '').replace("Some", ""))
-
-                                # TODO: index multiple matches
-                                vars[var_name] = [script.name, get_line_num(text,var_name)]
-
-            if sys.argv[1] == "unused":
-                for i in expected:
-                    if i not in used:
-                        print(f"[{locale_file.name}]\n"
-                               "  [Unused]\n"
-                              f"    {i}")
-
-            continue
-
-        if (added or removed or same) and sys.argv[1] == "diff":
-            print(f"[{created_locale.name[15:]}]")
+        added, removed, same = dict_compare(en_variables, locale_variables)

+        # perhaps theres a better way to do this
+        # FIXME : output isn't sorted
+        if command == "diff" and (added or removed or same):
+            output += f"[{created_locale.name[15:]}]\n"
            if added:
-                print("  [Added]")
-
+                output += "  [Added]\n"
                for i in added:
-                    print(f"    {i} = {expected[i]}")
+                    output += f"    {get_line_num(locale_file,i)} | {i} = {en_variables[i].strip()}\n"

            if removed:
-                print("  [Removed]")
-
+                output += "  [Removed]\n"
                for i in removed:
-                    print(f"    {i} = {expected2[i]}")
-
-            #workaround
-            if same and same != "set()":
-                print("  [Untranslated]")
+                    output += f"    {get_line_num(created_locale,i)} | {i} = {locale_variables[i].strip()}\n"

+            if same:
+                output += "  [Untranslated]\n"
                for i in same:
-                    print(f"    {i} = {expected[i]}")
+                    output += f"    {get_line_num(locale_file,i)} | {i} = {en_variables[i].strip()}\n"
+            output += "\n"
+            continue
+        # on some cycles the code below runs, even if there are differences
+        #else:
+            #print(f"No differences spotted between \"en\" and \"{sys.argv[2]}\" locale")
+            #sys.exit()

-            print("")
+        # for "missing" and "unused" command
+        for i in files:
+            with open(i, "r") as script:
+                # for unused entries
+                for j in list(en_variables.keys()):
+                    script.seek(0)

-if sys.argv[1] == "missing":
-    added, removed, same = dict_compare(vars, all_entries)
+                    if f"\"{j}\"" in script.read():
+                        used_entries[j] = script.name

-    if not added:
+                    script.seek(0)
+                # forgive me for my programming war crimes, this is 6 indentations!!!!!!
+                # FIXME: it only indexes the first match
+                for line in script.read().split("\n"):
+                    if ("tr(\"" in line) and ("#" not in line):
+                        index = line.find('tr("')
+                        indexLast = line.find('")',index)
+                        var_name = re.sub('[^\\w-]+', '', line[index:indexLast].replace('tr("', '').replace("Some", ""))
+
+                        script_entries[var_name] = [script.name, get_line_num(script, var_name)]
+
+## results stage ##
+
+if command == "unused":
+    printed = []
+    for i in all_entries:
+        if i not in used_entries:
+            unused_entries[i] = all_entries[i]
+
+    for i in unused_entries:
+        if all_entries[i][0] not in printed:
+            printed.append(all_entries[i][0])
+            output += f"[en/{all_entries[i][0]}]\n  [Unused]\n"
+        output += f"    {all_entries[i][1]} | {i} = {every_variable[i].strip()}\n"
+
+    output = re.sub("\[en", "\n[en", output)
+
+    if not output:
+        print("Nothing is unused")
+        sys.exit()
+
+# The way "unused" and "missing" process the data is very similar, perhaps it will be possible to do this in a function instead?
+elif command == "missing":
+    printed = []
+    for i in script_entries:
+        if i not in all_entries:
+            missing_entries[i] = script_entries[i][0]
+
+    for i in missing_entries:
+        if script_entries[i][0] not in printed:
+            printed.append(script_entries[i][0])
+            output += f"[{missing_entries[i]}]\n  [Missing]\n"
+        output += f"    {script_entries[i][1]} | {i}\n"
+
+    output = re.sub("\[src/", "\n[src/", output)
+    if not output:
        print("Nothing is missing")
+        sys.exit()

-    for i in added:
-        print(f"[{vars[i][0]}, line {vars[i][1]}]\n"
-              "  [Missing]\n"
-              f"    {i}")
+pydoc.pager(output)