~dricottone/ado-personal

ref: fa4c1624b47056a024f2e931cc742209fa3c0612 ado-personal/count_nonascii.ado -rw-r--r-- 607 bytes
fa4c1624Dominic Ricottone Time travel 3 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
// Dominic R, 7/22/2021

program count_nonascii
	syntax [varlist(string default=none)]

	if "`varlist'"=="" {
		quietly ds, has(type string)
		local varlist `r(varlist)'
	}

	local affected 0
	local total 0
	foreach v of varlist `varlist' {
		quietly count if ustrregexm(`v',"[^ -~]")
		local affected = `affected' + r(N)

		quietly generate int _nonascii = ustrlen(`v') - ustrlen(ustrregexra(`v',"[^ -~]",""))
		quietly egen _subtotal = total(_nonascii)
		local total = `total' + _subtotal[1]

		quietly drop _nonascii _subtotal
	}
	display "There are `total' Unicode characters in `affected' values"
end