summaryrefslogtreecommitdiff
path: root/bin/unidump
blob: 702f8cc168cf11f3d083518786206bc69653712c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python3

# unidump version 0.1.1
# made by ubq323 in the year 2021
# please use this software for GOOD, not for EVIL

import unicodedata
import sys

uniquify = True
arg=""
args = sys.argv[1:]
while len(args) > 0:
	arg = args.pop(0)
	if arg[0] != '-' or arg == "--":
		break
	if arg == "-a":
		uniquify = False
		arg = ""
	if arg == "-h" or arg == "--help":
		print(f"usage: {sys.argv[0]} [-a] [text...]")
		print("\t-a: don't deduplicate input characters")
		print("\tif no text supplied, takes input from stdin")
		sys.exit(0)


s = arg+" ".join(args)

if len(s) == 0:
	s = sys.stdin.read()

def row(c):
	try:
		name = unicodedata.name(c).rjust(50)
	except ValueError:
		name = " "*50

	number = ("U+"+hex(ord(c))[2:].zfill(4)).rjust(7).upper()

	to_c = c
	if c == '\n':
		to_c = ' '

	cat = unicodedata.category(c)
	if cat[0] == "M":
		to_c = chr(0x25cc) + to_c


	return f"{to_c} | {unicodedata.category(c)} | {name} | {number}"

seen = set()
for c in s:
	if uniquify:
		if c in seen:
			continue
		seen.add(c)
	print(row(c))