#include // Bresenham’s line algorithm with clipping, size-optimized as far as // I could. d here is the doubled, negated y distance of the ideal // line from the pixel center? Scaled by some factor? I forget. void octant_1(int x, int y, int x1, int y1, char *canvas, int w, int h) { int dy = y1 - y, dx = x1 - x, d = dx; do { d -= 2*dy; // https://stackoverflow.com/a/2711560 in C++ and // https://stackoverflow.com/a/50632 in C explains that this // conversion is safe; GCC doesn’t optimize the straightforward // code that contains four comparisons to it. if ((unsigned)x < (unsigned)w && (unsigned)y < (unsigned)h) { canvas[y*w + x] = 1; } if (d < 0) { y++; d += 2*dx; } x++; } while (x <= x1); } // That’s 61 bytes with GCC 12.2.0 with -Os. Here’s my attempted // disassembly: // // 0000000000000000 : // 0: 41 89 d2 mov %edx,%r10d copy x1 to r10d so we don’t clobber it // 3: 29 f1 sub %esi,%ecx compute dy in ecx, clobbering y1 (esi is y) // 5: 29 fa sub %edi,%edx compute dx in edx (edi is x) // 7: 89 f8 mov %edi,%eax move x to eax for no good reason // 9: 01 c9 add %ecx,%ecx double dy // b: 44 8d 1c 12 lea (%rdx,%rdx,1),%r11d compute dx*2 in r11d // f: 29 ca sub %ecx,%edx initialize d to dx-dy??? // 11: 44 39 c8 cmp %r9d,%eax x < w? eax is x, r9d is w // 14: 73 16 jae 2c // 16: 3b 74 24 08 cmp 0x8(%rsp),%esi y < h? esi is y, rsp[8] is h // 1a: 73 10 jae 2c // 1c: 89 f7 mov %esi,%edi copy y // 1e: 41 0f af f9 imul %r9d,%edi multiply by w // 22: 01 c7 add %eax,%edi add x // 24: 48 63 ff movslq %edi,%rdi sign-extend // 27: 41 c6 04 38 01 movb $0x1,(%r8,%rdi,1) canvas[*] = 1 r8 is canvas // 2c: 85 d2 test %edx,%edx if (d < 0) edx is d // 2e: 79 05 jns 35 { // 30: ff c6 inc %esi y++ // 32: 44 01 da add %r11d,%edx d += 2*dx; } r11d is 2*dx // 35: ff c0 inc %eax x++ // 37: 41 39 c2 cmp %eax,%r10d while (x <= x1) r10d is x1 // 3a: 7d d3 jge f // 3c: c3 ret // In C it’s more compact with conditional expressions, also closer to // a hardware implementation, but GCC compiles this to 77 bytes instead of 61: void octant_1_x(int x, int y, int x1, int y1, char *canvas, int w, int h) { int dy = y1 - y, dx = x1 - x, d = dx - 2*dy; do { if ((unsigned)x < (unsigned)w && (unsigned)y < (unsigned)h) { canvas[y*w + x] = 1; } y += d < 0; d += (d < 0 ? 2*dx - 2*dy : -2*dy); x++; } while (x <= x1); } // Here’s a version that generalizes to a second octant, octant 8. // It’s already 93 bytes! void octant_18(int x, int y, int x1, int y1, char *canvas, int w, int h) { int dy = y1 - y, dx = x1 - x, d = dx, ystep = w, pix = y*w + x; if (dy < 0) { dy = -dy; ystep = -ystep; } do { d -= 2*dy; if ((unsigned)x < (unsigned)w && (unsigned)pix < (unsigned)(w*h)) { canvas[pix] = 1; } if (d < 0) { pix += ystep; d += 2*dx; } x++; pix++; } while (x <= x1); } // And it’s easy enough to generalize that to octants 4 and 5, but // this adds another 36 bytes, though I think that’s GCC being stupid: void octant_1458(int x, int y, int x1, int y1, char *canvas, int w, int h) { if (x <= x1) return octant_18(x, y, x1, y1, canvas, w, h); return octant_18(x1, y1, x, y, canvas, w, h); } // Here’s a representation of a canvas that can support transposing // and reversing rows or columns: typedef struct { char *pixels; int w, h, xstride, ystride; } tranvas; // A version of the original function implemented in terms of that // comes out to 79 bytes: void octrant_1(int x, int y, int x1, int y1, tranvas *t) { int dy = y1 - y, dx = x1 - x, d = dx; do { d -= 2*dy; if ((unsigned)x < (unsigned)t->w && (unsigned)y < (unsigned)t->h) { t->pixels[y * t->ystride + x * t->xstride] = 1; } if (d < 0) { y++; d += 2*dx; } x++; } while (x <= x1); } // A façade that supports the original interface is simple (though // bloated): void tranwrap(int x, int y, int x1, int y1, char *canvas, int w, int h) { tranvas t = { .pixels = canvas, .w = w, .h = h, .xstride = 1, .ystride = w }; octrant_1(x, y, x1, y1, &t); } // But you can easily extend it to all octants by flipping and // transposing it. 25 bytes: void transpose(tranvas *t) { int tmp = t->h; t->h = t->w; t->w = tmp; tmp = t->ystride; t->ystride = t->xstride; t->xstride = tmp; } // 19 bytes: void xflip(tranvas *t) { int xstride = t->xstride; t->pixels += xstride - 1; t->xstride = -xstride; } // Hmm, okay, well, maybe not *easily*. You still have to translate // the points! // Here’s an attempt to generalize to the whole first quadrant. It // doesn’t work right. Only 80 bytes tho. void quadrant_1(int x, int y, int x1, int y1, char *canvas, int w, int h) { int dy = y1 - y, dx = x1 - x, d = dx; do { d -= 2*dy; while (y <= y1) { if ((unsigned)x < (unsigned)w && (unsigned)y < (unsigned)h) { canvas[y*w + x] = 1; } if (d >= 0) break; y++; d += 2*dx; } x++; } while (x <= x1); } // Minimal smoke test. int main(int argc, char **argv) { enum {w = 80, h = 24}; char canvas[w*h] = {0}; for (int i = -64; i <= 24; i += 8) { //quadrant_1(10, i, 90, 20, canvas, w, h); octant_1_x(10, i, 90, 20, canvas, w, h); //tranwrap(10, i, 90, 20, canvas, w, h); //octant_1458(10, i, 90, 20, canvas, w, h); //octant_1458(90, i, 10, 20, canvas, w, h); } for (int line = 0; line < h; line++) { for (int col = 0; col < w; col++) { printf(canvas[line * w + col] ? "#" : " "); } printf("\n"); } return 0; }