概述
在本项目中,我们将15中的想法转化为实际工作流:在编译时生成小型查找表,并在运行时以零开销使用它们。这种技术从热循环中移除分支,用常量数据替换重复工作,并保持代码简单。我们将采用"先测量"的心态,展示表格何时有帮助以及何时不值得二进制大小。
我们将实现三个自包含的演示:
- ASCII分类表:常量时间字符分类(数字/字母/空格/标点)
- Popcount表:字节的快速位计数,可组合用于更大的聚合
- 乘法表:紧凑渲染的参数化N×N矩阵
每个示例都使用Zig的现代stdout写入器(参见Writergate更改),并在直接运行时打印可见结果。参见v0.15.2和ascii.zig。
学习目标
ASCII分类表
我们构建一个256个条目的表格,将字节映射到数字/字母/空格/标点的位掩码。在运行时,我们对输入字符串进行汇总。"标点"集合派生自isPrint && !isAlphanumeric && !isWhitespace(对于ASCII足够)。
const std = @import("std");
/// 辅助函数,获取缓冲的标准输出写入器。
/// 使用静态缓冲区避免重复分配。
fn stdout() *std.Io.Writer {
const g = struct {
var buf: [4096]u8 = undefined;
var w = std.fs.File.stdout().writer(&buf);
};
return &g.w.interface;
}
/// 表示ASCII字符类的位标志。
/// 可以使用按位OR组合多个标志。
const Class = struct {
pub const digit: u8 = 0x01; // 0-9
pub const alpha: u8 = 0x02; // A-Z, a-z
pub const space: u8 = 0x04; // 空格、换行、制表符、回车
pub const punct: u8 = 0x08; // 标点符号
};
/// 构建查找表,将每个字节(0-255)映射到其字符类标志。
/// 此函数在编译时运行,产生嵌入在二进制文件中的常量表。
fn buildAsciiClassTable() [256]u8 {
// 将所有条目初始化为0(未设置类标志)
var t: [256]u8 = .{0} ** 256;
// 在编译时迭代所有可能的字节值
comptime var b: usize = 0;
inline while (b < 256) : (b += 1) {
const ch: u8 = @intCast(b);
var m: u8 = 0; // 类标志的累加器
// 检查字符是否为数字(0-9)
if (ch >= '0' and ch <= '9') m |= Class.digit;
// 检查字符是否为字母(A-Z或a-z)
if ((ch >= 'A' and ch <= 'Z') or (ch >= 'a' and ch <= 'z')) m |= Class.alpha;
// 检查字符是否为空白字符(空格、换行、制表符、回车)
if (ch == ' ' or ch == '\n' or ch == '\t' or ch == '\r') m |= Class.space;
// 检查字符是否为标点符号(可打印、非字母数字、非空白)
if (std.ascii.isPrint(ch) and !std.ascii.isAlphanumeric(ch) and !std.ascii.isWhitespace(ch)) m |= Class.punct;
// 为此字节值存储计算出的标志
t[b] = m;
}
return t;
}
/// 计算输入字符串中每个字符类的出现次数。
/// 使用预计算的查找表实现每个字符的O(1)分类。
fn countKinds(s: []const u8) struct { digits: usize, letters: usize, spaces: usize, punct: usize } {
// 构建分类表(在编译时发生)
const T = buildAsciiClassTable();
// 为每个字符类初始化计数器
var c = struct { digits: usize = 0, letters: usize = 0, spaces: usize = 0, punct: usize = 0 }{};
// 遍历输入字符串中的每个字节
var i: usize = 0;
while (i < s.len) : (i += 1) {
// 查找当前字节的类标志
const m = T[s[i]];
// 测试每个标志并增加相应的计数器
if ((m & Class.digit) != 0) c.digits += 1;
if ((m & Class.alpha) != 0) c.letters += 1;
if ((m & Class.space) != 0) c.spaces += 1;
if ((m & Class.punct) != 0) c.punct += 1;
}
// 返回计数作为匿名结构
return .{ .digits = c.digits, .letters = c.letters, .spaces = c.spaces, .punct = c.punct };
}
pub fn main() !void {
// 获取缓冲输出写入器
const out = stdout();
// 定义包含各种字符类的测试字符串
const s = "Hello, Zig 0.15.2! \t\n";
// 计算测试字符串中每个字符类
const c = countKinds(s);
// 打印输入字符串
try out.print("input: {s}\n", .{s});
// 打印每个字符类的计算计数
try out.print("digits={} letters={} spaces={} punct={}\n", .{ c.digits, c.letters, c.spaces, c.punct });
// 确保缓冲输出写入stdout
try out.flush();
}
$ zig run ascii_class_table.ziginput: Hello, Zig 0.15.2!
digits=4 letters=8 spaces=6 punct=4像这样的表格可以移除内循环中的重复分支。保持推导逻辑易于审查,并尽可能优先使用std.ascii助手。15
字节的Popcount表
与其为每个字节调用位操作例程,我们烘焙一个256个条目的popcount表并在输入上进行归约。这可以从玩具示例扩展到"计算缓冲区中设置位"的原语。
const std = @import("std");
// Returns a reference to a buffered stdout writer.
// The buffer and writer are stored in a private struct to persist across calls.
fn stdout() *std.Io.Writer {
const g = struct {
// Static buffer for stdout writes—survives function returns
var buf: [4096]u8 = undefined;
// Writer wraps stdout with the buffer; created once
var w = std.fs.File.stdout().writer(&buf);
};
// Return pointer to the writer's generic interface
return &g.w.interface;
}
// Counts the number of set bits (1s) in a single byte using bit manipulation.
// Uses a well-known parallel popcount algorithm that avoids branches.
fn popcountByte(x: u8) u8 {
var v = x;
// Step 1: Count bits in pairs (2-bit groups)
// Subtracts neighbor bit from each 2-bit group to get counts 0-2
v = v - ((v >> 1) & 0x55);
// Step 2: Count bits in nibbles (4-bit groups)
// Adds adjacent 2-bit counts to get nibble counts 0-4
v = (v & 0x33) + ((v >> 2) & 0x33);
// Step 3: Combine nibbles and mask low 4 bits (result 0-8)
// Adding the two nibbles gives total count, truncate to u8
return @truncate(((v + (v >> 4)) & 0x0F));
}
// Builds a 256-entry lookup table at compile time.
// Each entry [i] holds the number of set bits in byte value i.
fn buildPopcountTable() [256]u8 {
// Initialize table with zeros (all 256 entries)
var t: [256]u8 = .{0} ** 256;
// Compile-time loop index (required for inline while)
comptime var i: usize = 0;
// Unrolled loop: compute popcount for each possible byte value
inline while (i < 256) : (i += 1) {
// Store the bit count for byte value i
t[i] = popcountByte(@intCast(i));
}
// Return the fully populated table as a compile-time constant
return t;
}
pub fn main() !void {
// Acquire the buffered stdout writer
const out = stdout();
// Generate the popcount lookup table at compile time
const T = buildPopcountTable();
// Test data: array of bytes to analyze
const bytes = [_]u8{ 0x00, 0x0F, 0xF0, 0xAA, 0xFF };
// Accumulator for total set bits across all test bytes
var sum: usize = 0;
// Sum up set bits by indexing into the precomputed table
for (bytes) |b| sum += T[b];
// Print label for the output
try out.print("bytes: ", .{});
// Print each byte in hex format with spacing
for (bytes, 0..) |b, idx| {
// Add space separator between bytes (not before first)
if (idx != 0) try out.print(" ", .{});
// Format as 0x-prefixed 2-digit hex (e.g., 0x0F)
try out.print("0x{X:0>2}", .{b});
}
// Print the final sum of all set bits
try out.print(" -> total set bits = {}\n", .{sum});
// Flush the buffered writer to ensure all output appears
try out.flush();
}
$ zig run popcount_table.zigbytes: 0x00 0x0F 0xF0 0xAA 0xFF -> total set bits = 20在许多工作负载中,CPU的POPCNT指令(或std.math.popCount)已经很快。仅当您的性能分析显示它有助于您的数据访问模式和平台时,才优先使用表格。52
参数化乘法表 (N×N)
这里表格维度是一个comptime参数,因此编译器展开生成并存储紧凑的[N][N]u16。我们格式化一个12×12的"乘法表",并且只打印子集以保持输出可读。
const std = @import("std");
// Returns a reference to a buffered stdout writer.
// The buffer and writer are stored in a private struct to persist across calls.
fn stdout() *std.Io.Writer {
const g = struct {
// Static buffer for stdout writes—survives function returns
var buf: [8192]u8 = undefined;
// Writer wraps stdout with the buffer; created once
var w = std.fs.File.stdout().writer(&buf);
};
// Return pointer to the writer's generic interface
return &g.w.interface;
}
// Builds an N×N multiplication table at compile time.
// Each cell [i][j] holds (i+1) * (j+1) (1-indexed).
fn buildMulTable(comptime N: usize) [N][N]u16 {
// Declare the result table; will be computed entirely at compile time
var t: [N][N]u16 = undefined;
// Outer loop: row index (compile-time variable required for inline while)
comptime var i: usize = 0;
inline while (i < N) : (i += 1) {
// Inner loop: column index
comptime var j: usize = 0;
inline while (j < N) : (j += 1) {
// Store (row+1) * (col+1) in the table
t[i][j] = @intCast((i + 1) * (j + 1));
}
}
// Return the fully populated table as a compile-time constant
return t;
}
pub fn main() !void {
// Acquire the buffered stdout writer
const out = stdout();
// Table dimension (classic 12×12 times table)
const N = 12;
// Generate the multiplication table at compile time
const T = buildMulTable(N);
// Print header line
try out.print("{s}x{s} multiplication table (partial):\n", .{ "12", "12" });
// Print only first 6 rows to keep output concise (runtime loop)
var i: usize = 0;
while (i < 6) : (i += 1) {
// Print all 12 columns for this row
var j: usize = 0;
while (j < N) : (j += 1) {
// Format each cell right-aligned in a 4-character field
try out.print("{d: >4}", .{T[i][j]});
}
// End the row with a newline
try out.print("\n", .{});
}
// Flush the buffered writer to ensure all output appears
try out.flush();
}
$ zig run mult_table.zig12x12 multiplication table (partial):
1 2 3 4 5 6 7 8 9 10 11 12
2 4 6 8 10 12 14 16 18 20 22 24
3 6 9 12 15 18 21 24 27 30 33 36
4 8 12 16 20 24 28 32 36 40 44 48
5 10 15 20 25 30 35 40 45 50 55 60
6 12 18 24 30 36 42 48 54 60 66 72inline while/for构造需要编译时已知的边界;将它们与comptime var索引配对使意图明确。除非有展开的理由,否则选择普通循环。15