From 5e992c25e37a87afa0bbb2c2e36a368a9b8bee9a Mon Sep 17 00:00:00 2001 From: Dominic Ricottone Date: Wed, 14 Sep 2022 00:32:24 -0500 Subject: [PATCH] Rebuild project Added modern packaging process. Rewrote code to be executable as a module, more readable, and more maintainable. --- Makefile | 51 +++++++++++++++ README.md | 44 +++++-------- hist.py | 121 ----------------------------------- hist/__init__.py | 0 hist/__main__.py | 29 +++++++++ hist/cli.py | 126 +++++++++++++++++++++++++++++++++++++ hist/cli.toml | 16 +++++ hist/histogram.py | 156 ++++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 17 +++++ 9 files changed, 412 insertions(+), 148 deletions(-) create mode 100644 Makefile delete mode 100644 hist.py create mode 100644 hist/__init__.py create mode 100644 hist/__main__.py create mode 100644 hist/cli.py create mode 100644 hist/cli.toml create mode 100644 hist/histogram.py create mode 100644 pyproject.toml diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1e09d15 --- /dev/null +++ b/Makefile @@ -0,0 +1,51 @@ +VERSION=1.0.0 + +PY_COMPILE_BIN=python -m py_compile + +#BUILD_BIN=python -m build +BUILD_BIN=pyproject-build + +#UNITTEST_FILE_BIN=python -m unittest +#UNITTEST_DIR_BIN=python -m unittest discover --top-level-directory . +UNITTEST_FILE_BIN=unittest --color +UNITTEST_DIR_BIN=unittest --color --working-directory . + +#MYPY_BIN=python -m mypy +MYPY_BIN=MYPY_CACHE_DIR=hist/__mypycache__ mypy + +#PIPX_BIN=python -m pipx +PIPX_BIN=pipx + +.PHONY: clean +clean: + rm -rf **/__pycache__ **/__mypycache__ **/*.pyc build *.egg-info + +hist/cli.py: hist/cli.toml + gap hist/cli.toml --no-debug-mode --output=hist/cli.py + +.PHONY: test +test: + $(PY_COMPILE_BIN) hist/*.py + $(MYPY_BIN) -p hist + +PY_FILES=hist/cli.py hist/__main__.py hist/histogram.py +PYBUILD_FILES=pyproject.toml LICENSE.md README.md + +build/hist-$(VERSION)-py3-none-any.whl: $(PY_FILES) $(PYBUILD_FILES) + mkdir -p build + $(BUILD_BIN) --wheel --no-isolation --outdir build/ + +.PHONY: build +build: build/hist-$(VERSION)-py3-none-any.whl + +.PHONY: reinstall +reinstall: uninstall install + +.PHONY: install +install: build/hist-$(VERSION)-py3-none-any.whl + $(PIPX_BIN) install build/hist-$(VERSION)-py3-none-any.whl + +.PHONY: uninstall +uninstall: + $(PIPX_BIN) uninstall hist + diff --git a/README.md b/README.md index 2d189f6..d0e51e4 100644 --- a/README.md +++ b/README.md @@ -1,33 +1,23 @@ -syntax: `hist.hist(list_object [, number_bins])` +# hist -- list_object can be any list-like object (lists, tuples, etc.) -- number_bins can be any numeric type (default is 1) +Create a histogram on the terminal. - -Example output: +## Usage ``` -python3 hist.py -│ 17 -│ █ -│ █ -│ █ -│ █ -│ █ -│ █ -│ 10 10 █ 10 10 -│ █ █ █ █ █ -│ █ █ █ █ █ 8 -│ █ █ █ █ 7 █ 7 █ -│ 6 6 █ █ █ █ █ █ █ █ -│ █ █ █ █ █ █ █ █ █ █ -│ █ █ █ █ █ █ █ 4 █ █ █ -│ █ █ █ █ █ █ █ █ █ █ █ -│ █ █ █ █ █ █ █ █ █ █ █ -│ █ █ █ █ █ █ █ █ █ █ █ -┼──────────────────────────────────────────── - 8 16 24 32 40 48 56 64 72 80 88 - Average = 48.63 - Excluded outliers: 1, 2, 2, 3, 6 +$ ps -eo pmem --sort=-pmem --no-headers | head -n 20 | python -m hist --bins=20 --positive +│ 10 +│ █ +│ █ +│ █ +│ █ +│ █ 5 +│ █ █ +│ █ █ +│ █ █ 2 2 +│ █ █ █ █ 1 +┼──────────────────────────────────────────────────────────── + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +Avg. = 2.25 ``` diff --git a/hist.py b/hist.py deleted file mode 100644 index 642ab69..0000000 --- a/hist.py +++ /dev/null @@ -1,121 +0,0 @@ -#!/usr/bin/env python3 - -def _bin_iter(avg, bin_width, num_bins): - bin_ = int(avg - (bin_width * (num_bins // 2))) - a = 0 - while a < num_bins: - yield bin_ - bin_ = bin_ + bin_width - a = a + 1 - -def hist(args, bin_num=1): - """ - Creates a formatted unicode histogram. Requires an iterable of numeric - values. Optionally set number of bins. By default, creates a single bin - histogram. - """ - - avg = sum(args)/len(args) - bin_num = int(bin_num) - - # a single bin - if bin_num == 1: - hist_height = len(args) - print(u'\u2502' + ' {:>3}'.format(hist_height)) - hist_height = hist_height - 1 - while hist_height > 0: - print(u'\u2502' + ' ' + u'\u2588') - hist_height = hist_height - 1 - print('{:{}<5}\n'.format(u'\u253c', u'\u2500') - + ' all\n' - + ' Avg. = {:.2f}'.format(avg)) - return - - # histogram design - avg = sum(args)/len(args) - - ordered = sorted(args) - range_ = ordered[-1] - ordered[0] - bin_width = range_ // bin_num - if bin_width == 0: - bin_width = 1 - - - - # collect histogram data - bin_labels = [] - bin_freq = [] - outliers = [] - for bin_ in _bin_iter(avg, bin_width, bin_num): - bin_labels.append(str(bin_)) - - freq = 0 - x = 0 - while x < len(ordered): - if (ordered[x] < bin_): - x = x + 1 - elif (ordered[x] >= bin_) and (ordered[x] < bin_ + bin_width): - ordered.pop(x) - freq = freq + 1 - else: - break - bin_freq.append(freq) - - for unbinned in ordered: - outliers.append(unbinned) - - # draw histogram - hist_width = (bin_num * 4) + 1 - hist_height = max(bin_freq) - - y_val = max(bin_freq) - while y_val > 0: - row = u'\u2502' - for x_val in range(bin_num): - if bin_freq[x_val] == y_val: - cell_val = str(y_val) - elif bin_freq[x_val] > y_val: - cell_val = u'\u2588' - else: - cell_val = '' - row = row + ' {:>3}'.format(cell_val) - print(row) - y_val = y_val - 1 - - # draw histogram labels - hist_width = (bin_num * 4) + 1 - horizontal_line = '{corner:{line}<{length}}'.format(corner = u'\u253c', - line = u'\u2500', - length = hist_width) - print(horizontal_line) - - labels = ' ' - for label in bin_labels: - labels = labels + ' {:>3}'.format(label) - print(labels) - - # draw histogram details as applicable - if hist_width > 15: - details_width = hist_width - else: - details_width = 15 - - print('{text:>{length}} {average:<.2f}'.format(text = 'Average =', - length = details_width - 6, - average = avg)) - - if outliers != []: - string = ', '.join(str(outlier) for outlier in outliers) - string = 'Excluded outliers: ' + string - print('{text:>{length}}'.format(text = string, length = details_width)) - - return - -if __name__ == '__main__': - import random - i = 0 - lst = [] - while i < 100: - lst.append(random.randrange(1,101)) - i = i + 1 - hist(lst, 11) diff --git a/hist/__init__.py b/hist/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/hist/__main__.py b/hist/__main__.py new file mode 100644 index 0000000..1fdcc12 --- /dev/null +++ b/hist/__main__.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python + +import sys +import fileinput + +from . import cli +from . import histogram + +def main(): + _config, _positionals = cli.main(sys.argv[1:]) + + if "version" in _config.keys(): + sys.exit(0) + elif "help" in _config.keys(): + sys.exit(0) + + _bins = int(_config.get("bins", 10)) + _positive = "positive" in _config.keys() + + data = [] + with fileinput.input(files=_positionals, encoding="utf-8") as f: + data.extend([float(line.rstrip()) for line in f]) + + histogram.draw(data, _bins, _positive) + + +if __name__ == '__main__': + main() + diff --git a/hist/cli.py b/hist/cli.py new file mode 100644 index 0000000..5b9fe39 --- /dev/null +++ b/hist/cli.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 + +import re + +def main(arguments): + config=dict() + positional=[] + pattern=re.compile(r"(?:-(?:b|h|x|p|v|V)|--(?:bins|help|positive|version))(?:=.*)?$") + consuming,needing,wanting=None,0,0 + attached_value=None + while len(arguments) and arguments[0]!="--": + if consuming is not None: + if config[consuming] is None: + config[consuming]=arguments.pop(0) + else: + config[consuming].append(arguments.pop(0)) + needing-=1 + wanting-=1 + if wanting==0: + consuming,needing,wanting=None,0,0 + elif pattern.match(arguments[0]): + option = arguments.pop(0).lstrip('-') + if '=' in option: + option,attached_value=option.split('=',1) + if option=="bins": + if attached_value is not None: + config["bins"]=attached_value + attached_value=None + consuming,needing,wanting=None,0,0 + else: + config["bins"]=None + consuming,needing,wanting="bins",1,1 + elif option=="help": + if attached_value is not None: + message=( + 'unexpected value while parsing "help"' + ' (expected 0 values)' + ) + raise ValueError(message) from None + config["help"]=True + elif option=="positive": + if attached_value is not None: + message=( + 'unexpected value while parsing "positive"' + ' (expected 0 values)' + ) + raise ValueError(message) from None + config["positive"]=True + elif option=="version": + if attached_value is not None: + message=( + 'unexpected value while parsing "version"' + ' (expected 0 values)' + ) + raise ValueError(message) from None + config["version"]=True + elif option=="b": + if attached_value is not None: + config["bins"]=attached_value + attached_value=None + consuming,needing,wanting=None,0,0 + else: + config["bins"]=None + consuming,needing,wanting="bins",1,1 + elif option=="h": + if attached_value is not None: + message=( + 'unexpected value while parsing "help"' + ' (expected 0 values)' + ) + raise ValueError(message) from None + config["help"]=True + elif option=="x": + if attached_value is not None: + message=( + 'unexpected value while parsing "help"' + ' (expected 0 values)' + ) + raise ValueError(message) from None + config["help"]=True + elif option=="p": + if attached_value is not None: + message=( + 'unexpected value while parsing "positive"' + ' (expected 0 values)' + ) + raise ValueError(message) from None + config["positive"]=True + elif option=="v": + if attached_value is not None: + message=( + 'unexpected value while parsing "version"' + ' (expected 0 values)' + ) + raise ValueError(message) from None + config["version"]=True + elif option=="V": + if attached_value is not None: + message=( + 'unexpected value while parsing "version"' + ' (expected 0 values)' + ) + raise ValueError(message) from None + config["version"]=True + else: + positional.append(arguments.pop(0)) + if needing>0: + message=( + f'unexpected end while parsing "{consuming}"' + f' (expected {needing} values)' + ) + raise ValueError(message) from None + for argument in arguments[1:]: + positional.append(argument) + return config,positional + +if __name__=="__main__": + import sys + cfg,pos = main(sys.argv[1:]) + cfg = {k:v for k,v in cfg.items() if v is not None} + if len(cfg): + print("Options:") + for k,v in cfg.items(): + print(f"{k:20} = {v}") + if len(pos): + print("Positional arguments:", ", ".join(pos)) diff --git a/hist/cli.toml b/hist/cli.toml new file mode 100644 index 0000000..4b3b8eb --- /dev/null +++ b/hist/cli.toml @@ -0,0 +1,16 @@ +[bins] +number = 1 +alternatives = ['b'] + +[help] +number = 0 +alternatives = ['h', 'x'] + +[positive] +number = 0 +alternatives = ['p'] + +[version] +number = 0 +alternatives = ['v', 'V'] + diff --git a/hist/histogram.py b/hist/histogram.py new file mode 100644 index 0000000..11f347e --- /dev/null +++ b/hist/histogram.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python3 + +from typing import Iterator, Sequence, Callable +from math import floor + +VBAR = "\u2502" +HBAR = "\u2500" +CORNER = "\u253c" +BLOCK = "\u2588" + +def bin_iter(average_value: float, bin_width: int, bins: int) -> Iterator[int]: + """Iterate over lower bounds of bins that are evenly distributed around the + average value. + """ + lower_bound = int(average_value - (bin_width * (bins // 2))) + cursor = 0 + while cursor < bins: + yield lower_bound + lower_bound += bin_width + cursor += 1 + +def positive_bin_iter(highest_value: float, bin_width: int, bins: int) -> Iterator[int]: + """Iterate over lower bounds of bins that start at zero.""" + lower_bound = 0 + cursor = 0 + while cursor < bins: + yield lower_bound + lower_bound += bin_width + cursor += 1 + +def spaces(count: int) -> str: + return " " * count + +def bars(count: int) -> str: + return HBAR * count + +def build_inclusion_func(low: int, high: int) -> Callable[[float], bool]: + def inclusion_func(n: float) -> bool: + return n >= low and n < high + return inclusion_func + +def format_tower(count_values: int, average_value: float) -> str: + """Handle edge case where all values are in the same bin.""" + # Besides handling an edge case, this is an excellent documentation of the + # desired layout. Each step is carefully explained in comments. + + # get the wider of 'all' or the count of values; + # this is the 'column width' + count_width = len(str(count_values)) + if count_width < 3: #len('all') + col_width = 3 + else: + col_width = count_width + + # draw y-axis, border space, and count of values right-aligned with the bar + buf = f"{VBAR} {str(count_values).rjust(col_width)}\n" + + # draw y-axis, border space, and bar at right edge of the 'column width' + for _ in range(count_values - 1): + buf += f"{VBAR} {BLOCK.rjust(col_width)}\n" + + # draw x-axis the full length of the border space and the 'column width' + buf += f"{CORNER}{bars(1 + col_width)}\n" + + # draw x-axis label right-aligned with the bar + buf += f" {'all'.rjust(col_width)}\n" + + # draw summary statistics + buf += f"Avg. = {average_value:.2f}\n" + + return buf + +def format(values: Sequence[float], bins: int, positive: bool) -> str: + """Creates a histogram.""" + count_values = len(values) + average_value = sum(values) / count_values + + # handle edge cases where there is a single bin + if bins == 1: + return format_tower(count_values, average_value) + + # determine the floored bin range + ordered_values = sorted(values) + value_range = ordered_values[-1] - ordered_values[0] + bin_range = floor(value_range // bins) + + # handle edge cases where floored bin range is less than 1; + # force to 1 and accept that there will be empty trailing bins + if bin_range == 0: + bin_range = 1 + + # determine the bin lower bounds + bin_lower_bounds = [n for n in bin_iter(average_value, bin_range, bins)] + + # if positive is True and the lowest bound is negative, redraw bin lower + # bounds starting at zero + if positive and bin_lower_bounds[0] < 0: + bin_lower_bounds = [n for n in positive_bin_iter(ordered_values[-1], bin_range, bins)] + + # handle outlier values that cannot be drawn; + # flooring the bin ranges causes values (esp. lower outliers) to be cut + check_inclusion = build_inclusion_func(bin_lower_bounds[0], bin_lower_bounds[-1] + bin_range) + included_values = list(filter(check_inclusion, values)) + excluded_count = count_values - len(included_values) + + # count values within a bin range + bin_counts = [0] * bins + for i in range(bins): + bin_counter = build_inclusion_func(bin_lower_bounds[i], bin_lower_bounds[i] + bin_range) + bin_values = list(filter(bin_counter, included_values)) + bin_counts[i] = len(bin_values) + + # get the widest column, either a bin label or a bar count + col_width = max(len(str(n)) for n in bin_lower_bounds) + widest_bin_count = max(len(str(n)) for n in bin_counts) + if col_width < widest_bin_count: + col_width = widest_bin_count + + # draw y-axis, border space, and a column and border space for each bin + buf = "" + cursor = max(bin_counts) + while cursor > 0: + buf += VBAR + for bin_count in bin_counts: + if bin_count == cursor: + buf += f" {str(bin_count).rjust(col_width)}" + elif bin_count > cursor: + buf += f" {BLOCK.rjust(col_width)}" + else: + buf += spaces(col_width + 1) + buf += "\n" + cursor -= 1 + + # draw x-axis labels that will be right-aligned with the bars; + # cannot push to buffer yet + x_axis_labels = " " + for bin_label in bin_lower_bounds: + x_axis_labels += f" {str(bin_label).rjust(col_width)}" + chart_width = len(x_axis_labels) + + # draw x-axis the full length of the chart + buf += f"{CORNER}{bars(chart_width - 1)}\n" + + # now push x-axis labels + buf += f"{x_axis_labels}\n" + + # draw statistics + buf += f"Avg. = {average_value:.2f}\n" + if excluded_count != 0: + buf += f"excluded {excluded_count} values\n" + + return buf + +def draw(values: Sequence[float], bins: int, positive: bool): + print(format(values, bins, positive)) + diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..8c38ad3 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,17 @@ +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[project] +name = "hist" +description = "Draw a histogram" +readme = "README.md" +version = "1.0.0" +authors = [ { name = "Dominic Ricottone", email = "me@dominic-ricottone.com" } ] +urls = { source = "git.dominic-ricottone.com/~dricottone/hist" } +license = { file = "LICENSE.md" } +requires-python = ">=3.6" + +[project.scripts] +hist = "hist.__main__:main" + -- 2.45.2