Example Sweep 2 Float 1 Cat With Repeats

"""Auto-generated example: 2 Float, 1 Categorical (with repeats)."""

import random
import math

import bencher as bn

class CompressionCodec(bn.ParametrizedSweep):
    """Compression ratio across block size, entropy, and codec."""

    block_size = bn.FloatSweep(default=4096, bounds=[512, 65536], doc="Block size in bytes")
    entropy = bn.FloatSweep(default=0.5, bounds=[0.0, 1.0], doc="Input data entropy")
    codec = bn.StringSweep(["zlib", "lz4", "zstd"], doc="Compression codec")

    ratio = bn.ResultFloat(units="x", doc="Compression ratio")

    def benchmark(self):
        codec_eff = {"zlib": 1.0, "lz4": 0.7, "zstd": 1.1}[self.codec]
        self.ratio = codec_eff * (1.0 - 0.7 * self.entropy) * (1.0 + 0.3 * math.log2(self.block_size / 512))
        self.ratio += random.gauss(0, 0.15 * 0.3)


def example_sweep_2_float_1_cat_with_repeats(run_cfg: bn.BenchRunCfg | None = None) -> bn.Bench:
    """2 Float, 1 Categorical (with repeats)."""
    bench = CompressionCodec().to_bench(run_cfg)
    bench.plot_sweep(input_vars=['block_size', 'entropy', 'codec'], result_vars=['ratio'], description='A 2 float + 1 categorical parameter sweep with multiple repeats per combination. Repeating measurements reveals the noise structure of your benchmark. If your function is deterministic, all repeats will be identical; if it has stochastic components, repeats let you estimate confidence intervals and distinguish signal from noise. The benchmark function must be pure -- if past calls affect future calls through side effects, the statistics will be invalid. A 2D float sweep produces a heatmap. Additional categorical variables create faceted heatmaps, one per category combination.', post_description='Swarm/violin plots show the distribution of repeated measurements. If repeat has high variance, it suggests either measurement noise or unintended side effects in the benchmark function.')

    return bench


if __name__ == "__main__":
    bn.run(example_sweep_2_float_1_cat_with_repeats, subsampling_divisions=4, repeats=3)