summaryrefslogtreecommitdiff
path: root/bin/unidump
diff options
context:
space:
mode:
Diffstat (limited to 'bin/unidump')
-rwxr-xr-xbin/unidump57
1 files changed, 57 insertions, 0 deletions
diff --git a/bin/unidump b/bin/unidump
new file mode 100755
index 0000000..702f8cc
--- /dev/null
+++ b/bin/unidump
@@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+
+# unidump version 0.1.1
+# made by ubq323 in the year 2021
+# please use this software for GOOD, not for EVIL
+
+import unicodedata
+import sys
+
+uniquify = True
+arg=""
+args = sys.argv[1:]
+while len(args) > 0:
+ arg = args.pop(0)
+ if arg[0] != '-' or arg == "--":
+ break
+ if arg == "-a":
+ uniquify = False
+ arg = ""
+ if arg == "-h" or arg == "--help":
+ print(f"usage: {sys.argv[0]} [-a] [text...]")
+ print("\t-a: don't deduplicate input characters")
+ print("\tif no text supplied, takes input from stdin")
+ sys.exit(0)
+
+
+s = arg+" ".join(args)
+
+if len(s) == 0:
+ s = sys.stdin.read()
+
+def row(c):
+ try:
+ name = unicodedata.name(c).rjust(50)
+ except ValueError:
+ name = " "*50
+
+ number = ("U+"+hex(ord(c))[2:].zfill(4)).rjust(7).upper()
+
+ to_c = c
+ if c == '\n':
+ to_c = ' '
+
+ cat = unicodedata.category(c)
+ if cat[0] == "M":
+ to_c = chr(0x25cc) + to_c
+
+
+ return f"{to_c} | {unicodedata.category(c)} | {name} | {number}"
+
+seen = set()
+for c in s:
+ if uniquify:
+ if c in seen:
+ continue
+ seen.add(c)
+ print(row(c))