/* Test speed of proportional pixel-font rendering in C.
*
* To the extent possible under law, Kragen Javier Sitaker has waived
* all copyright and related or neighboring rights to this code. This
* work is published from Argentina. For more information, please see
* .
*
* Performance tests compiled with:
cc -O -Wall -Werror -std=gnu99 propfont.c -o propfont
* To generate an assembly listing:
cc -fverbose-asm -Wa,-adhln=propfont.lst -g -Os -Wall -std=gnu99 \
propfont.c -o propfont
* Basically, the objective here is to come up with some kind of upper
* limit on how fast computers can reasonably format text from plain
* ASCII to plain pixels. Surely you can do 128 kilobytes per second;
* that’s just repainting a VT100 at 64Hz. But can you render 1
* megabyte per second on a modern machine? 8? 64? 512? 4096? I
* want to get to where memcpy bandwidth into the output pixel buffers is
* the bottleneck.
*
* This code does a little more than a VT100, in that it does
* proportional font rendering and word wrap.
*
* Current results have it rendering 12.6 megabytes in 1.05" on my
* netbook, so 12 megabytes of text per second,
* including word wrapping, but always rendered into the same buffer.
* On my laptop, it runs at 70 megabytes of text per second,
* about seven times as fast.
* This
* is far from memcpy-limited!
* 12.6 megabytes of text input is about 260 megapixels of pixel output,
* at one byte per pixel.
* Even my netbook is capable of 1700 megabytes per second from main memory!
* On a newer but lower-power laptop (Pentium N3700 1.6GHz) it does 126
* megabytes in 5.7", 22 megabytes per second.
*
* Possible ways to speed this up further:
*
* - Maybe have the word-wrap code store the glyph offsets and byte
* widths into a “compositing stick” that the rendering code can
* then use, rather than recomputing them.
*
* - Use the GPU!
*
* - Compile each line of text into machine code that copies the
* relevant part of the glyph into the relevant part of the image.
* This hoists the memcpy-size test out of the inner loop even more
* effectively.
*
* - Maybe initially compose the pixels as bits using shifts, only
* translating to bytes or whole pixels later (with, say, a
* 256-by-8-pixel lookup buffer)?
*/
#include
#include
#include
#include
/* I designed this 6-pixel-tall proportional square-pixel pixel font
* (based, obviously, on the work of others) in 2012 for
*
* and rendered the KJV Bible with it at
* . This is the output
* of, basically, pngtopnm | pnmtopgm | pgmtopbm | pbmtoxbm. Though I
* haven’t read the XBM spec, it seems to be organized as 7 sequential
* rows of 47 bytes, each containing 8 horizontal pixels in
* little-endian order (376 in total, even though the original image
* is only 374 pixel wide), which are 1 for ink (originally black) or
* 0 for paper.
*/
#define propfont_width 374
#define propfont_height 7
const static char propfont_bits[] = {
0x54,0x8a,0x84,0x48,0x29,0x00,0x20,0x91,0x5c,0x9d,0x9d,0x09,0x20,0xc4,0x38,
0x19,0x99,0x3b,0xab,0xa3,0x8a,0x28,0xc9,0xc8,0xd8,0x55,0x45,0x55,0xdf,0x62,
0x82,0x40,0x00,0x01,0x41,0xa4,0x18,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x40,
0x85,0x12,0x54,0xdf,0x51,0x29,0x12,0x01,0xa0,0x5a,0x51,0x45,0x50,0x55,0x92,
0x09,0xa5,0xaa,0xaa,0x88,0x28,0xa1,0x8a,0x6d,0x55,0x55,0x85,0x54,0x55,0x55,
0x48,0x42,0x05,0xd9,0x98,0x89,0xd8,0x80,0x92,0x65,0x64,0xac,0xec,0xaa,0xa2,
0xaa,0x27,0x49,0x19,0x04,0xca,0x88,0x20,0xba,0xc3,0x90,0x13,0xc9,0xcd,0x88,
0x18,0x08,0x90,0xb4,0x9b,0xa8,0x99,0x3a,0xa1,0x89,0xaa,0xd5,0xd4,0x88,0x54,
0x55,0x22,0x44,0x44,0x00,0x54,0x45,0xd5,0x55,0xa5,0x91,0xaa,0xaa,0x6a,0x46,
0xaa,0x2a,0x29,0x32,0x18,0x18,0x00,0x1f,0x45,0x21,0x12,0x01,0x88,0x92,0x10,
0x51,0x45,0x11,0x90,0x09,0x84,0xaa,0xaa,0x88,0x2a,0xa9,0x8a,0x2a,0x55,0x54,
0x91,0xd4,0x6d,0x25,0x42,0x48,0x00,0x54,0x45,0x8d,0x58,0xa5,0x92,0xaa,0x6a,
0x2a,0x48,0xaa,0x2a,0x31,0x21,0x09,0x18,0x04,0xca,0x91,0x22,0x2a,0x10,0x0a,
0xd9,0x0d,0x8d,0xc4,0x4c,0x22,0x84,0xb8,0x1a,0x99,0x0b,0xab,0x93,0xba,0x28,
0x49,0x48,0x8d,0x9c,0x44,0x25,0xdf,0x68,0x00,0xd8,0x98,0x99,0x50,0xa5,0xba,
0xaa,0x24,0x2c,0x86,0x4c,0x94,0xa2,0x27,0x09,0x10,0x00,0x80,0x00,0x40,0x01,
0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0x0c,
0x10,0x00,0x00,0x20,0x08,0x00,0x00,0x00,0x18,0x40,0x04,0x10,0x8a,0x20,0x22,
0x94,0x44,0x24,0x45,0x24,0x22,0x22,0x22,0xa2,0x44,0x22,0x42,0x44,0x44,0x44,
0x44,0x44,0x44,0x10,0x22,0x22,0x22,0x22,0x82,0x88,0x20,0x91,0x48,0x22,0x22,
0x22,0x22,0x4a,0x44,0x10,0x11,0x11,0x11,0x11,0x41,0x44,0x88,0x22,0x24};
enum { propfont_glyphs = 96 };
/* Safely convert an ASCII byte into a glyph index for propfont. */
static inline int _glyph_index_from_ascii(char c) {
unsigned x = c;
if (x == '\n') x = 127;
x -= ' ';
return x > 127 - ' ' ? 127 : x;
}
#define ARRAY_SIZE(x) (sizeof(x)/sizeof(*(x)))
static char glyph_indices[256];
static void setup_glyph_indices() {
for (int i = 0; i < ARRAY_SIZE(glyph_indices); i++) {
glyph_indices[i] = _glyph_index_from_ascii(i);
}
}
static inline int glyph_index_from_ascii(char c) {
return glyph_indices[(int)(unsigned char)c];
}
/* Return true if the propfont pixel at x, y has ink */
static inline int propfont_ink(int x, int y) {
return (propfont_bits[y * ((propfont_width + 7) / 8) + x / 8] >> x % 8) & 1;
}
static int *propfont_columns = 0;
static void setup_propfont_columns() {
static int columns[propfont_glyphs + 1];
/* Compute char widths from last row at startup */
int propfont_index = 1; /* char 0, the space, starts at 0 */
for (int x = 0; x < propfont_width; x++) {
if (propfont_ink(x, propfont_height - 1)) {
assert(propfont_index < ARRAY_SIZE(columns));
columns[propfont_index++] = x + 1;
}
}
assert(propfont_index == propfont_glyphs + 1); /* otherwise, invalid font */
columns[propfont_glyphs] = propfont_width; /* quasi-sentinel */
propfont_columns = columns;
}
/* Compute the pixel width in propfont of ASCII char c. Also return
* the column where it starts via passed-by-reference int column.
*/
static inline int pixel_width_from_glyph_index(int glyph_index, int *column) {
int *columns = propfont_columns;
int col = columns[glyph_index];
*column = col;
return columns[glyph_index + 1] - col;
}
static int columns_by_ascii[256], pixel_widths_by_ascii[256];
static inline void setup_ascii_tables() {
for (int c = 0; c < 256; c++) {
int glyph_index = glyph_index_from_ascii(c);
int *column = &columns_by_ascii[c];
pixel_widths_by_ascii[c] = pixel_width_from_glyph_index(glyph_index,
column);
}
}
static inline int pixel_width_from_ascii(unsigned char ascii_char,
int *column) {
*column = columns_by_ascii[(int)ascii_char];
return pixel_widths_by_ascii[(int)ascii_char];
}
/* This is sort of like a two-dimensional, dynamically-typed pointer
* into pixels.
*/
typedef struct {
char *pixels;
int pixel_bytes, line_stride;
} pixel_buf;
static inline char *index_pixbuf(pixel_buf buf, int x, int y) {
return buf.pixels + y * buf.line_stride + x * buf.pixel_bytes;
}
static inline pixel_buf offset_pixbuf(pixel_buf buf, int dx, int dy) {
pixel_buf result = {
index_pixbuf(buf, dx, dy),
buf.pixel_bytes,
buf.line_stride,
};
return result;
}
static inline void fill_pixbuf(pixel_buf buf, char *ink, int w, int h) {
for (int x = 0; x < w; x++) {
memcpy(index_pixbuf(buf, x, 0), ink, buf.pixel_bytes);
}
for (int y = 0; y < h; y++) {
memcpy(index_pixbuf(buf, 0, y), index_pixbuf(buf, 0, 0), w * buf.pixel_bytes);
}
}
/* Render a version of propfont at a given size, suitable for
* memcpying chunks of pixels from later.
*/
static void render_propfont(pixel_buf buf, char *ink_pixel, char *paper_pixel) {
setup_propfont_columns();
setup_glyph_indices();
setup_ascii_tables();
for (int y = 0; y < propfont_height; y++) {
char *pixel = buf.pixels + buf.line_stride * y;
for (int x = 0; x < propfont_width; x++) {
char *color = propfont_ink(x, y) ? ink_pixel : paper_pixel;
memcpy(pixel, color, buf.pixel_bytes);
pixel += buf.pixel_bytes;
}
}
}
/* A version of memcpy optimized for
* the very short chunks of bytes we’re mostly copying here. Using
* this makes the program as a whole run twice as fast.
* GCC is smart enough to use optimized versions of memcpy for a
* particular constant number of bytes if it knows what that number
* is. So this code looks sort of stupid but it actually makes a big
* performance difference.
*/
static void short_memcpy(char *dest, const char *src, size_t nbytes) {
if (nbytes == 4) {
memcpy(dest, src, 4);
} else if (nbytes < 4) {
if (nbytes == 2) {
memcpy(dest, src, 2);
} else if (nbytes < 2) {
if (nbytes == 1) {
*dest = *src;
}
} else { /* nbytes == 3 */
memcpy(dest, src, 3);
}
} else {
memcpy(dest, src, nbytes);
}
}
/* Render the given line of ASCII into the given pixel buffer. */
static void render_line(char *line, int len, pixel_buf font, pixel_buf output) {
int column;
assert(output.pixel_bytes == font.pixel_bytes);
for (int i = 0; i < len; i++) {
int pixel_width = pixel_width_from_ascii(line[i], &column);
int glyph_width = output.pixel_bytes * pixel_width;
for (int y = 0; y < propfont_height - 1; y++) {
short_memcpy(index_pixbuf(output, 0, y),
index_pixbuf(font, column, y),
glyph_width);
}
output = offset_pixbuf(output, pixel_width, 0);
}
}
/* Return the number of characters from this line that can fit into
* the given pixel width, using a crude word-wrap. */
static int line_word_wrap_point(char *line, int len, int pixel_width) {
int i, x = 0, last_break_point = -1, column;
for (i = 0; i < len; i++) {
if (line[i] == '\n') return i;
x += pixel_width_from_ascii(line[i], &column);
if (x > pixel_width) {
return last_break_point == -1 ? i - 1 : last_break_point;
}
if (line[i] == ' ') {
last_break_point = i;
} else if (line[i] == '-') {
last_break_point = i+1;
}
}
return i;
}
static void render_text(char *text,
int len,
pixel_buf font,
pixel_buf output,
int pixel_width)
{
int line_start = 0;
int lineno = 0;
while (line_start < len) {
char *line = text + line_start;
int line_len = line_word_wrap_point(line, len - line_start, pixel_width);
render_line(line, line_len, font, output);
line_start += line_len;
if (line_start < len
&& (text[line_start] == '\n'
|| (text[line_start] == ' '
&& line_start > 0
&& text[line_start-1] != '\n'
)
)
) {
line_start++;
}
output = offset_pixbuf(output, 0, propfont_height - 1);
lineno++;
}
}
/* PPM P6 file format; see
; copied from
*/
static void
ppm_header(int ww, int hh)
{ printf("P6\n%d %d\n255\n", ww, hh); }
static void
ppm_pixel(unsigned char r, unsigned char g, unsigned char b)
{ putchar(r); putchar(g); putchar(b); }
enum { canvas_width = 96, canvas_height = 96 };
int main() {
char buf[canvas_height * canvas_width] = {0};
pixel_buf canvas = { buf, 1, canvas_width };
pixel_buf dest = offset_pixbuf(canvas, 2, 2);
char fontbuf[propfont_height * propfont_width];
pixel_buf font = { fontbuf, 1, propfont_width };
char black = 0, white = 255, gray = 127;
render_propfont(font, &black, &white);
char *hello = "hello, world\nThis is a test of tiny text rendering!\n"
"And it was an I'll-be-gobsmacked-upon-the-butt moment when it worked!\n"
"Yay!";
fill_pixbuf(canvas, &gray, canvas_width - 1, canvas_height);
int n = 100*1000;
int len = strlen(hello);
fprintf(stderr, "running %d iterations of rendering %d bytes\n", n, len);
for (int i = 0; i < n; i++) {
render_text(hello, len, font, dest, canvas_width - 2);
}
ppm_header(canvas_width, canvas_height);
for (int i = 0; i < ARRAY_SIZE(buf); i++) {
ppm_pixel(buf[i], buf[i], buf[i]);
}
return 0;
}