From 0e0681676824534589231032090e2035965e1744 Mon Sep 17 00:00:00 2001 From: dricottone Date: Thu, 22 Jul 2021 13:09:33 -0400 Subject: [PATCH] Add count_nonascii --- count_nonascii.ado | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 count_nonascii.ado diff --git a/count_nonascii.ado b/count_nonascii.ado new file mode 100644 index 0000000..5d15b74 --- /dev/null +++ b/count_nonascii.ado @@ -0,0 +1,25 @@ + +program count_nonascii + syntax [varlist(string default=none)] + + if "`varlist'"=="" { + quietly ds, has(type string) + local varlist `r(varlist)' + } + + local affected 0 + local total 0 + + foreach v of varlist `varlist' { + quietly count if ustrregexm(`v',"[^ -~]") + local affected = `affected' + r(N) + + quietly generate int _nonascii = ustrlen(`v') - ustrlen(ustrregexra(`v',"[^ -~]","")) + quietly egen _subtotal = total(_nonascii) + local total = `total' + _subtotal[1] + + quietly drop _nonascii _subtotal + } + + display "There are `total' Unicode characters in `affected' values" +end -- 2.45.2