1 files changed, 57 insertions, 0 deletions
diff --git a/bin/unidump b/bin/unidump
new file mode 100755
index 0000000..702f8cc
--- /dev/null
+++ b/bin/unidump
@@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+
+# unidump version 0.1.1
+# made by ubq323 in the year 2021
+# please use this software for GOOD, not for EVIL
+
+import unicodedata
+import sys
+
+uniquify = True
+arg=""
+args = sys.argv[1:]
+while len(args) > 0:
+	arg = args.pop(0)
+	if arg[0] != '-' or arg == "--":
+		break
+	if arg == "-a":
+		uniquify = False
+		arg = ""
+	if arg == "-h" or arg == "--help":
+		print(f"usage: {sys.argv[0]} [-a] [text...]")
+		print("\t-a: don't deduplicate input characters")
+		print("\tif no text supplied, takes input from stdin")
+		sys.exit(0)
+
+
+s = arg+" ".join(args)
+
+if len(s) == 0:
+	s = sys.stdin.read()
+
+def row(c):
+	try:
+		name = unicodedata.name(c).rjust(50)
+	except ValueError:
+		name = " "*50
+
+	number = ("U+"+hex(ord(c))[2:].zfill(4)).rjust(7).upper()
+
+	to_c = c
+	if c == '\n':
+		to_c = ' '
+
+	cat = unicodedata.category(c)
+	if cat[0] == "M":
+		to_c = chr(0x25cc) + to_c
+
+
+	return f"{to_c} | {unicodedata.category(c)} | {name} | {number}"
+
+seen = set()
+for c in s:
+	if uniquify:
+		if c in seen:
+			continue
+		seen.add(c)
+	print(row(c))