~dricottone/hist

5e992c25e37a87afa0bbb2c2e36a368a9b8bee9a — Dominic Ricottone 2 years ago 98a50e2
Rebuild project

Added modern packaging process.

Rewrote code to be executable as a module, more readable, and more
maintainable.
9 files changed, 412 insertions(+), 148 deletions(-)

A Makefile
M README.md
D hist.py
A hist/__init__.py
A hist/__main__.py
A hist/cli.py
A hist/cli.toml
A hist/histogram.py
A pyproject.toml
A Makefile => Makefile +51 -0
@@ 0,0 1,51 @@
VERSION=1.0.0

PY_COMPILE_BIN=python -m py_compile

#BUILD_BIN=python -m build
BUILD_BIN=pyproject-build

#UNITTEST_FILE_BIN=python -m unittest
#UNITTEST_DIR_BIN=python -m unittest discover --top-level-directory .
UNITTEST_FILE_BIN=unittest --color
UNITTEST_DIR_BIN=unittest --color --working-directory .

#MYPY_BIN=python -m mypy
MYPY_BIN=MYPY_CACHE_DIR=hist/__mypycache__ mypy

#PIPX_BIN=python -m pipx
PIPX_BIN=pipx

.PHONY: clean
clean:
	rm -rf **/__pycache__ **/__mypycache__ **/*.pyc build *.egg-info

hist/cli.py: hist/cli.toml
	gap hist/cli.toml --no-debug-mode --output=hist/cli.py

.PHONY: test
test:
	$(PY_COMPILE_BIN) hist/*.py
	$(MYPY_BIN) -p hist

PY_FILES=hist/cli.py hist/__main__.py hist/histogram.py
PYBUILD_FILES=pyproject.toml LICENSE.md README.md

build/hist-$(VERSION)-py3-none-any.whl: $(PY_FILES) $(PYBUILD_FILES)
	mkdir -p build
	$(BUILD_BIN) --wheel --no-isolation --outdir build/

.PHONY: build
build: build/hist-$(VERSION)-py3-none-any.whl

.PHONY: reinstall
reinstall: uninstall install

.PHONY: install
install: build/hist-$(VERSION)-py3-none-any.whl
	$(PIPX_BIN) install build/hist-$(VERSION)-py3-none-any.whl

.PHONY: uninstall
uninstall:
	$(PIPX_BIN) uninstall hist


M README.md => README.md +17 -27
@@ 1,33 1,23 @@
syntax: `hist.hist(list_object [, number_bins])`
# hist

- list_object can be any list-like object (lists, tuples, etc.)
- number_bins can be any numeric type (default is 1)
Create a histogram on the terminal.


Example output:
## Usage

```
python3 hist.py
│                  17                        
│                   █                        
│                   █                        
│                   █                        
│                   █                        
│                   █                        
│                   █                        
│          10  10   █  10          10        
│           █   █   █   █           █        
│           █   █   █   █           █       8
│           █   █   █   █   7       █   7   █
│   6   6   █   █   █   █   █       █   █   █
│   █   █   █   █   █   █   █       █   █   █
│   █   █   █   █   █   █   █   4   █   █   █
│   █   █   █   █   █   █   █   █   █   █   █
│   █   █   █   █   █   █   █   █   █   █   █
│   █   █   █   █   █   █   █   █   █   █   █
┼────────────────────────────────────────────
    8  16  24  32  40  48  56  64  72  80  88
                              Average = 48.63
             Excluded outliers: 1, 2, 2, 3, 6
$ ps -eo pmem --sort=-pmem --no-headers | head -n 20 | python -m hist --bins=20 --positive
│ 10                                                         
│  █                                                         
│  █                                                         
│  █                                                         
│  █                                                         
│  █  5                                                      
│  █  █                                                      
│  █  █                                                      
│  █  █  2        2                                          
│  █  █  █        █                          1               
┼────────────────────────────────────────────────────────────
   0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19
Avg. = 2.25
```


D hist.py => hist.py +0 -121
@@ 1,121 0,0 @@
#!/usr/bin/env python3

def _bin_iter(avg, bin_width, num_bins):
    bin_ = int(avg - (bin_width * (num_bins // 2)))
    a = 0
    while a < num_bins:
        yield bin_
        bin_ = bin_ + bin_width
        a = a + 1

def hist(args, bin_num=1):
    """
    Creates a formatted unicode histogram. Requires an iterable of numeric
    values. Optionally set number of bins. By default, creates a single bin
    histogram.
    """

    avg = sum(args)/len(args)
    bin_num = int(bin_num)

    # a single bin
    if bin_num == 1:
        hist_height = len(args)
        print(u'\u2502' + ' {:>3}'.format(hist_height))
        hist_height = hist_height - 1
        while hist_height > 0:
            print(u'\u2502' + '   ' + u'\u2588')
            hist_height = hist_height - 1
        print('{:{}<5}\n'.format(u'\u253c', u'\u2500')
              + '  all\n'
              + '  Avg. = {:.2f}'.format(avg))
        return
        
    # histogram design
    avg = sum(args)/len(args)

    ordered = sorted(args)
    range_ = ordered[-1] - ordered[0]
    bin_width = range_ // bin_num
    if bin_width == 0:
        bin_width = 1



    # collect histogram data
    bin_labels = []
    bin_freq = []
    outliers = []
    for bin_ in _bin_iter(avg, bin_width, bin_num):
        bin_labels.append(str(bin_))
        
        freq = 0
        x = 0
        while x < len(ordered):
            if (ordered[x] < bin_):
                x = x + 1
            elif (ordered[x] >= bin_) and (ordered[x] < bin_ + bin_width):
                ordered.pop(x)
                freq = freq + 1
            else:
                break
        bin_freq.append(freq)

    for unbinned in ordered:
        outliers.append(unbinned)

    # draw histogram
    hist_width = (bin_num * 4) + 1
    hist_height = max(bin_freq)

    y_val = max(bin_freq)
    while y_val > 0:
        row = u'\u2502'
        for x_val in range(bin_num):
            if bin_freq[x_val] == y_val:
                cell_val = str(y_val)
            elif bin_freq[x_val] > y_val:
                cell_val = u'\u2588'
            else:
                cell_val = ''
            row = row + ' {:>3}'.format(cell_val)
        print(row)
        y_val = y_val - 1

    # draw histogram labels
    hist_width = (bin_num * 4) + 1
    horizontal_line = '{corner:{line}<{length}}'.format(corner = u'\u253c',
                                                        line = u'\u2500',
                                                        length = hist_width)
    print(horizontal_line)
    
    labels = ' '
    for label in bin_labels:
        labels = labels + ' {:>3}'.format(label)
    print(labels)

    # draw histogram details as applicable
    if hist_width > 15:
        details_width = hist_width
    else:
        details_width = 15
        
    print('{text:>{length}} {average:<.2f}'.format(text = 'Average =',
                                                   length = details_width - 6,
                                                   average = avg))

    if outliers != []:
        string = ', '.join(str(outlier) for outlier in outliers)
        string = 'Excluded outliers: ' + string
        print('{text:>{length}}'.format(text = string, length = details_width))

    return

if __name__ == '__main__':
    import random
    i = 0
    lst = []
    while i < 100:
        lst.append(random.randrange(1,101))
        i = i + 1
    hist(lst, 11)

A hist/__init__.py => hist/__init__.py +0 -0
A hist/__main__.py => hist/__main__.py +29 -0
@@ 0,0 1,29 @@
#!/usr/bin/env python

import sys
import fileinput

from . import cli
from . import histogram

def main():
    _config, _positionals = cli.main(sys.argv[1:])

    if "version" in _config.keys():
        sys.exit(0)
    elif "help" in _config.keys():
        sys.exit(0)

    _bins = int(_config.get("bins", 10))
    _positive = "positive" in _config.keys()

    data = []
    with fileinput.input(files=_positionals, encoding="utf-8") as f:
        data.extend([float(line.rstrip()) for line in f])

    histogram.draw(data, _bins, _positive)


if __name__ == '__main__':
    main()


A hist/cli.py => hist/cli.py +126 -0
@@ 0,0 1,126 @@
#!/usr/bin/env python3

import re

def main(arguments):
	config=dict()
	positional=[]
	pattern=re.compile(r"(?:-(?:b|h|x|p|v|V)|--(?:bins|help|positive|version))(?:=.*)?$")
	consuming,needing,wanting=None,0,0
	attached_value=None
	while len(arguments) and arguments[0]!="--":
		if consuming is not None:
			if config[consuming] is None:
				config[consuming]=arguments.pop(0)
			else:
				config[consuming].append(arguments.pop(0))
			needing-=1
			wanting-=1
			if wanting==0:
				consuming,needing,wanting=None,0,0
		elif pattern.match(arguments[0]):
			option = arguments.pop(0).lstrip('-')
			if '=' in option:
				option,attached_value=option.split('=',1)
			if option=="bins":
				if attached_value is not None:
					config["bins"]=attached_value
					attached_value=None
					consuming,needing,wanting=None,0,0
				else:
					config["bins"]=None
					consuming,needing,wanting="bins",1,1
			elif option=="help":
				if attached_value is not None:
					message=(
						'unexpected value while parsing "help"'
						' (expected 0 values)'
					)
					raise ValueError(message) from None
				config["help"]=True
			elif option=="positive":
				if attached_value is not None:
					message=(
						'unexpected value while parsing "positive"'
						' (expected 0 values)'
					)
					raise ValueError(message) from None
				config["positive"]=True
			elif option=="version":
				if attached_value is not None:
					message=(
						'unexpected value while parsing "version"'
						' (expected 0 values)'
					)
					raise ValueError(message) from None
				config["version"]=True
			elif option=="b":
				if attached_value is not None:
					config["bins"]=attached_value
					attached_value=None
					consuming,needing,wanting=None,0,0
				else:
					config["bins"]=None
					consuming,needing,wanting="bins",1,1
			elif option=="h":
				if attached_value is not None:
					message=(
						'unexpected value while parsing "help"'
						' (expected 0 values)'
					)
					raise ValueError(message) from None
				config["help"]=True
			elif option=="x":
				if attached_value is not None:
					message=(
						'unexpected value while parsing "help"'
						' (expected 0 values)'
					)
					raise ValueError(message) from None
				config["help"]=True
			elif option=="p":
				if attached_value is not None:
					message=(
						'unexpected value while parsing "positive"'
						' (expected 0 values)'
					)
					raise ValueError(message) from None
				config["positive"]=True
			elif option=="v":
				if attached_value is not None:
					message=(
						'unexpected value while parsing "version"'
						' (expected 0 values)'
					)
					raise ValueError(message) from None
				config["version"]=True
			elif option=="V":
				if attached_value is not None:
					message=(
						'unexpected value while parsing "version"'
						' (expected 0 values)'
					)
					raise ValueError(message) from None
				config["version"]=True
		else:
			positional.append(arguments.pop(0))
	if needing>0:
		message=(
			f'unexpected end while parsing "{consuming}"'
			f' (expected {needing} values)'
		)
		raise ValueError(message) from None
	for argument in arguments[1:]:
		positional.append(argument)
	return config,positional

if __name__=="__main__":
	import sys
	cfg,pos = main(sys.argv[1:])
	cfg = {k:v for k,v in cfg.items() if v is not None}
	if len(cfg):
		print("Options:")
		for k,v in cfg.items():
			print(f"{k:20} = {v}")
	if len(pos):
		print("Positional arguments:", ", ".join(pos))

A hist/cli.toml => hist/cli.toml +16 -0
@@ 0,0 1,16 @@
[bins]
number = 1
alternatives = ['b']

[help]
number = 0
alternatives = ['h', 'x']

[positive]
number = 0
alternatives = ['p']

[version]
number = 0
alternatives = ['v', 'V']


A hist/histogram.py => hist/histogram.py +156 -0
@@ 0,0 1,156 @@
#!/usr/bin/env python3

from typing import Iterator, Sequence, Callable
from math import floor

VBAR = "\u2502"
HBAR = "\u2500"
CORNER = "\u253c"
BLOCK = "\u2588"

def bin_iter(average_value: float, bin_width: int, bins: int) -> Iterator[int]:
    """Iterate over lower bounds of bins that are evenly distributed around the
    average value.
    """
    lower_bound = int(average_value - (bin_width * (bins // 2)))
    cursor = 0
    while cursor < bins:
        yield lower_bound
        lower_bound += bin_width
        cursor += 1

def positive_bin_iter(highest_value: float, bin_width: int, bins: int) -> Iterator[int]:
    """Iterate over lower bounds of bins that start at zero."""
    lower_bound = 0
    cursor = 0
    while cursor < bins:
        yield lower_bound
        lower_bound += bin_width
        cursor += 1

def spaces(count: int) -> str:
    return " " * count

def bars(count: int) -> str:
    return HBAR * count

def build_inclusion_func(low: int, high: int) -> Callable[[float], bool]:
    def inclusion_func(n: float) -> bool:
        return n >= low and n < high
    return inclusion_func

def format_tower(count_values: int, average_value: float) -> str:
    """Handle edge case where all values are in the same bin."""
    # Besides handling an edge case, this is an excellent documentation of the
    # desired layout. Each step is carefully explained in comments.

    # get the wider of 'all' or the count of values;
    # this is the 'column width'
    count_width = len(str(count_values))
    if count_width < 3: #len('all')
        col_width = 3
    else:
        col_width = count_width

    # draw y-axis, border space, and count of values right-aligned with the bar
    buf = f"{VBAR} {str(count_values).rjust(col_width)}\n"

    # draw y-axis, border space, and bar at right edge of the 'column width'
    for _ in range(count_values - 1):
        buf += f"{VBAR} {BLOCK.rjust(col_width)}\n"

    # draw x-axis the full length of the border space and the 'column width'
    buf += f"{CORNER}{bars(1 + col_width)}\n"

    # draw x-axis label right-aligned with the bar
    buf += f"  {'all'.rjust(col_width)}\n"

    # draw summary statistics
    buf += f"Avg. = {average_value:.2f}\n"

    return buf

def format(values: Sequence[float], bins: int, positive: bool) -> str:
    """Creates a histogram."""
    count_values = len(values)
    average_value = sum(values) / count_values

    # handle edge cases where there is a single bin
    if bins == 1:
        return format_tower(count_values, average_value)

    # determine the floored bin range
    ordered_values = sorted(values)
    value_range = ordered_values[-1] - ordered_values[0]
    bin_range = floor(value_range // bins)

    # handle edge cases where floored bin range is less than 1;
    # force to 1 and accept that there will be empty trailing bins
    if bin_range == 0:
        bin_range = 1

    # determine the bin lower bounds
    bin_lower_bounds = [n for n in bin_iter(average_value, bin_range, bins)]

    # if positive is True and the lowest bound is negative, redraw bin lower
    # bounds starting at zero
    if positive and bin_lower_bounds[0] < 0:
        bin_lower_bounds = [n for n in positive_bin_iter(ordered_values[-1], bin_range, bins)]

    # handle outlier values that cannot be drawn;
    # flooring the bin ranges causes values (esp. lower outliers) to be cut
    check_inclusion = build_inclusion_func(bin_lower_bounds[0], bin_lower_bounds[-1] + bin_range)
    included_values = list(filter(check_inclusion, values))
    excluded_count = count_values - len(included_values)

    # count values within a bin range
    bin_counts = [0] * bins
    for i in range(bins):
        bin_counter = build_inclusion_func(bin_lower_bounds[i], bin_lower_bounds[i] + bin_range)
        bin_values = list(filter(bin_counter, included_values))
        bin_counts[i] = len(bin_values)

    # get the widest column, either a bin label or a bar count
    col_width = max(len(str(n)) for n in bin_lower_bounds)
    widest_bin_count = max(len(str(n)) for n in bin_counts)
    if col_width < widest_bin_count:
        col_width = widest_bin_count

    # draw y-axis, border space, and a column and border space for each bin
    buf = ""
    cursor = max(bin_counts)
    while cursor > 0:
        buf += VBAR
        for bin_count in bin_counts:
            if bin_count == cursor:
                buf += f" {str(bin_count).rjust(col_width)}"
            elif bin_count > cursor:
                buf += f" {BLOCK.rjust(col_width)}"
            else:
                buf += spaces(col_width + 1)
        buf += "\n"
        cursor -= 1

    # draw x-axis labels that will be right-aligned with the bars;
    # cannot push to buffer yet
    x_axis_labels = " "
    for bin_label in bin_lower_bounds:
        x_axis_labels += f" {str(bin_label).rjust(col_width)}"
    chart_width = len(x_axis_labels)

    # draw x-axis the full length of the chart
    buf += f"{CORNER}{bars(chart_width - 1)}\n"

    # now push x-axis labels
    buf += f"{x_axis_labels}\n"

    # draw statistics
    buf += f"Avg. = {average_value:.2f}\n"
    if excluded_count != 0:
        buf += f"excluded {excluded_count} values\n"

    return buf

def draw(values: Sequence[float], bins: int, positive: bool):
    print(format(values, bins, positive))


A pyproject.toml => pyproject.toml +17 -0
@@ 0,0 1,17 @@
[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"

[project]
name = "hist"
description = "Draw a histogram"
readme = "README.md"
version = "1.0.0"
authors = [ { name = "Dominic Ricottone", email = "me@dominic-ricottone.com" } ]
urls = { source = "git.dominic-ricottone.com/~dricottone/hist" }
license = { file = "LICENSE.md" }
requires-python = ">=3.6"

[project.scripts]
hist = "hist.__main__:main"