game-engine / devlog / triangle-rasterization

Software rasterization: a fast and accurate algorithm for rasterizing triangles in C

Оригинал статьи на русском

The basis of three-dimensional graphics is triangle rasterization algorithm. In this article, I explain how a software triangle rasterization works in my own game engine.

First, I will present the full algorithm, and then we'll break it down in detail:

static inline void SwapFloat(float* a, float* b) { float c = *a; *a = *b; *b = c; }

void DrawTriangle(
    float ax, float ay, float az, float au, float av,
    float bx, float by, float bz, float bu, float bv,
    float cx, float cy, float cz, float cu, float cv
) {
    // For every vertex:
    // x, y - screen coordinates
    // z    - depth
    // u, v - texture coordinates

    // Sort vertices by y-coordinate ascending (y1 <= y2 <= y3)
    if (ay > by) {
        // swap(a, b)
        SwapFloat(&ax, &bx);
        SwapFloat(&ay, &by);
        SwapFloat(&az, &bz);
        SwapFloat(&au, &bu);
        SwapFloat(&av, &bv);
    }
    if (ay > cy) {
        // swap(a, c)
        SwapFloat(&ax, &cx);
        SwapFloat(&ay, &cy);
        SwapFloat(&az, &cz);
        SwapFloat(&au, &cu);
        SwapFloat(&av, &cv);
    }
    if (by > cy) {
        // swap(b, c)
        SwapFloat(&bx, &cx);
        SwapFloat(&by, &cy);
        SwapFloat(&bz, &cz);
        SwapFloat(&bu, &cu);
        SwapFloat(&bv, &cv);
    }

    // Round y-coordinates and clamp to screen bounds
    int y_min = max(    (int)ceilf(ay - 0.5f), 0);
    int y_mid = min(max((int)ceilf(by - 0.5f), 0), screen_height);
    int y_max = min(    (int)ceilf(cy - 0.5f),     screen_height);

    // Prepare values for perspective-correct interpolation
    az = 1.0f / az;
    bz = 1.0f / bz;
    cz = 1.0f / cz;

    au *= az;
    av *= az;

    bu *= bz;
    bv *= bz;

    cu *= cz;
    cv *= cz;

    // Rasterize triangle scanlines
    for (int y = y_min; y < y_max; y++) {
        float pixel_center = (float)y + 0.5f;

        float lt, lx, lz, lu, lv; // left edge
        float rt, rx, rz, ru, rv; // right edge

        // Calculate left edge
        lt = (pixel_center - ay) / (cy - ay);
        lx = ax + (cx - ax) * lt;
        lz = az + (cz - az) * lt;
        lu = au + (cu - au) * lt;
        lv = av + (cv - av) * lt;

        // Determine if we are in the first or the second half of the triangle
        if (y < y_mid) {
            // Calculate right edge (first half)
            rt = (pixel_center - ay) / (by - ay);
            rx = ax + (bx - ax) * rt;
            rz = az + (bz - az) * rt;
            ru = au + (bu - au) * rt;
            rv = av + (bv - av) * rt;
        } else {
            // Calculate right edge (second half)
            rt = (pixel_center - by) / (cy - by);
            rx = bx + (cx - bx) * rt;
            rz = bz + (cz - bz) * rt;
            ru = bu + (cu - bu) * rt;
            rv = bv + (cv - bv) * rt;
        }

        // Ensure "l" is always to the left of "r"
        if (lx > rx) {
            // swap(l, r)
            SwapFloat(&lx, &rx);
            SwapFloat(&lz, &rz);
            SwapFloat(&lu, &ru);
            SwapFloat(&lv, &rv);
        }

        // Round x-coordinates and clamp to screen bounds
        int ilx = (int) ceilf(lx - 0.5f); // top-left rule: ceil to include left edge
        int irx = (int)floorf(rx - 0.5f); // top-left rule: floor to exclude right edge
        int x_min = max(ilx, 0);
        int x_max = min(irx + 1, screen_width);

        // Loop over the horizontal range of the current scanline
        for (int x = x_min; x < x_max; x++) {
            int i = (y * screen_width) + x;

            float t = (float)(x - ilx) / (rx - lx); // x interpolation factor (0.0 - 1.0)
            float z = lz + (rz - lz) * t;
            if (z > screen_depth[i]) {

                float u = (lu + (ru - lu) * t) / z;
                float v = (lv + (rv - lv) * t) / z;

                screen_color[i] = GetTextureColor(texture, u, v);
                screen_depth[i] = z;
            }
        }
    }
}

Algorithm Breakdown

We will perform scanline rasterization - drawing pixels row by row to ensure linear memory access for cache efficiency.

1. Sort vertices by y-coordinate

if (ay > by) { SwapPoint(&a, &b); }
if (ay > cy) { SwapPoint(&a, &c); }
if (by > cy) { SwapPoint(&b, &c); }

Let's start by sorting the vertices by y-coordinate. So we have this order: a - min, b - middle, c - max. This splits the triangle into upper and lower parts, simplifying edge calculation.

2. Top-left filling convention

To avoid gaps between adjacent triangles, use consistent rounding: round y - 0.5 up for y, round left x using ceil up and right x using floor down. This matches Vulkan and DirectX conventions:

// ...
// Before y loop:
int y_min = (int)ceilf(ay - 0.5f);
int y_mid = (int)ceilf(by - 0.5f);
int y_max = (int)ceilf(cy - 0.5f);

// ...
// Before x loop:
int x_min = (int) ceilf(lx - 0.5f); // top-left rule: ceil to include left edge
int x_max = (int)floorf(rx - 0.5f); // top-left rule: floor to exclude right edge

3. Perspective‑correct interpolation

az = 1.0f / az;
bz = 1.0f / bz;
cz = 1.0f / cz;

au *= az;
av *= az;

bu *= bz;
bv *= bz;

cu *= cz;
cv *= cz;

Before interpolation, z should be inverted (like this: 1/z) and texture coordinates (u, v) should be multiplied by inverted z. Without this step, textures will look distorted (as in PS1 graphics).

4. Y-loop

for (int y = y_min; y < y_max; y++) {
    // we need to find the x-start and x-end of the scanline segment...
}

For each scanline, compute segment start (lx) and end (rx) x-coordinates. That's the step where the vertex sorting from step 1 comes in handy.

Left edge is always from A → C (min to max):

lt = (pixel_center - ay) / (cy - ay);
lx = ax + (cx - ax) * lt;
lz = az + (cz - az) * lt;

Right edge from either A → B (upper part) or B → C (lower part) based on the current y:

if (y < y_mid) {
    rt = (center_y - ay) / (by - ay);
    rx = ax + (bx - ax) * rt;
    rz = az + (bz - az) * rt;
    // ...
} else {
    rt = (center_y - by) / (cy - by);
    rx = bx + (cx - bx) * rt;
    rz = bz + (cz - bz) * rt;
    // ...
}

5. Ensure correct edge ordering

Swap left/right if needed to maintain left <= right.

if (lx > rx) {
    SwapFloat(&lx, &rx); 
    SwapFloat(&lz, &rz);
    // ... 
}

6. X-loop

For each x on the scanline, interpolate depth z, compare it to the z-buffer, and if closer, interpolate texture coords (u, v) with perspective correction and sample the texture.:

for (int x = x_min; x < x_max; x++) {
    int i = (y * screen_width) + x;

    float t = (float)(x - ilx) / (rx - lx); // x interpolation factor (0.0 - start, 1.0 - end)

    float z = lz + (rz - lz) * t; // depth

    if (z > screen_depth[i]) { // depth-test

        float u = (lu + (ru - lu) * t) / z; // interpolated vertex attributes (like texture UVs)
        float v = (lv + (rv - lv) * t) / z;

        screen_color[i] = GetTextureColor(texture, u, v);
        screen_depth[i] = z;
    }
}

7. Vertex attribute extensibility

The algorithm can handle additional per-vertex attributes (e.g. lighting factor, extra UVs for multitexturing or lightmaps, etc.). Just add extra variables in same way as u and v are implemented.

8. Optimizations

I intentionally simplified the code shown at the beginning of the article to improve its readability at the expense of performance. If you've read this far and are interested in further optimization, below you will find the final version of the algorithm intended for a real renderer.

static inline void SwapFloat(float *a, float *b) { float c = *a; *a = *b; *b = c; }

static void DrawTriangle_Span(
    const Texture *texture,
    int y,
    float lx, float lz, float lu, float lv, // left edge
    float rx, float rz, float ru, float rv  // right edge
) {
    // Ensure "l" is always to the left of "r"
    if (lx > rx) {
        SwapFloat(&lx, &rx);
        SwapFloat(&lz, &rz);
        SwapFloat(&lu, &ra);
        SwapFloat(&lb, &rb);
    }

    // Precalculate spans
    float span_x = rx - lx;
    if (span_x <= 0.0f) {
        return; // skip if no pixels to draw
    }
    float inv_span_x = 1.0f / span_x;
    float span_z = rz - lz;
    float span_u = ru - lu;
    float span_v = rv - lv;

    // Round x-coordinates and clamp to screen bounds
    int ilx = (int) ceilf(lx - 0.5f); // top-left rule: ceil to include left edge
    int irx = (int)floorf(rx - 0.5f); // top-left rule: floor to exclude right edge
    int x_min = max(ilx, 0);
    int x_max = min(irx + 1, screen_width);

    // Precompute interpolant steps
    float z_step = span_z * inv_span_x;
    float u_step = span_u * inv_span_x;
    float v_step = span_v * inv_span_x;

    // Initial interpolant values
    float t = ((float)x_min + 0.5f - lx) * inv_span_x;
    float z = lz + span_z * t;
    float u = lu + span_u * t;
    float v = lv + span_v * t;

    // Get scanline pointers
    int scanline = (screen_width * y) + x_min;
    DWORD *color_ptr = &screen_color[scanline];
    float *depth_ptr = &screen_depth[scanline];

    // Loop over the horizontal range of the current scanline
    for (int x = x_min; x < x_max; x++) {

        // Depth test
        if (z <= *depth_ptr) {
            goto DISCARD;
        }
        float inv_z = 1.0f / z; // precompute 1/z for perspective-correct interpolation

        // Perspective-correct texture interpolation
        float correct_u = u * inv_z;
        float correct_v = v * inv_z;

        // Sample texture color
        *color_ptr = SampleTexture(texture, correct_u, correct_v);
        *depth_ptr = z;

    DISCARD:
        // Advance interpolants
        z += z_step;
        u += u_step;
        v += v_step;

        // Advance scanline pointers
        color_ptr++;
        depth_ptr++;
    }
}

void DrawTriangle(
    const Texture *texture,
    float x1, float y1, float z1, float u1, float v1,
    float x2, float y2, float z2, float u2, float v2,
    float x3, float y3, float z3, float u3, float v3
) {
    // Sort vertices by y-coordinate ascending (y1 <= y2 <= y3)
    if (y1 > y2) {
        SwapFloat(&x1, &x2);
        SwapFloat(&y1, &y2);
        SwapFloat(&z1, &z2);
        SwapFloat(&u1, &u2);
        SwapFloat(&v1, &v2);
    }
    if (y1 > y3) {
        SwapFloat(&x1, &x3);
        SwapFloat(&y1, &y3);
        SwapFloat(&z1, &z3);
        SwapFloat(&u1, &u3);
        SwapFloat(&v1, &v3);
    }
    if (y2 > y3) {
        SwapFloat(&x2, &x3);
        SwapFloat(&y2, &y3);
        SwapFloat(&z2, &z3);
        SwapFloat(&u2, &u3);
        SwapFloat(&v2, &v3);
    }

    // Round y-coordinates and clamp to screen bounds
    int y_min = max(    (int)ceilf(y1 - 0.5f), 0);
    int y_mid = min(max((int)ceilf(y2 - 0.5f), 0), screen_height);
    int y_max = min(    (int)ceilf(y3 - 0.5f),     screen_height);

    if (y_min >= y_max) {
        return; // skip if no pixels to draw
    }

    // Prepare the triangle vertices for perspective-correct interpolation
    z1 = 1.0f / z1;
    z2 = 1.0f / z2;
    z3 = 1.0f / z3;

    u1 *= z1;
    v1 *= z1;

    u2 *= z2;
    v2 *= z2;

    u3 *= z3;
    v3 *= z3;

    // Precalculate edges
    float dy13 = y3 - y1;
    float dx13 = x3 - x1;
    float dz13 = z3 - z1;
    float du13 = u3 - u1;
    float dv13 = v3 - v1;

    float dy12 = y2 - y1;
    float dx12 = x2 - x1;
    float dz12 = z2 - z1;
    float du12 = u2 - u1;
    float dv12 = v2 - v1;

    float dy23 = y3 - y2;
    float dx23 = x3 - x2;
    float dz23 = z3 - z2;
    float du23 = u3 - u2;
    float dv23 = v3 - v2;

    // Draw the top part of the triangle
    for (int y = y_min; y < y_mid; y++) {
        float pixel_center = (float)y + 0.5f;
        float lt = (pixel_center - y1) / dy13;
        float rt = (pixel_center - y1) / dy12;
        DrawTriangle_Span(
            texture, y,
            x1 + dx13 * lt,
            z1 + dz13 * lt,
            u1 + du13 * lt,
            v1 + dv13 * lt,
            x1 + dx12 * rt,
            z1 + dz12 * rt,
            u1 + du12 * rt,
            v1 + dv12 * rt
        );
    }

    // Draw the bottom part of the triangle
    for (int y = y_mid; y < y_max; y++) {
        float pixel_center = (float)y + 0.5f;
        float lt = (pixel_center - y1) / dy13;
        float rt = (pixel_center - y2) / dy23;
        DrawTriangle_Span(
            texture, y,
            x1 + dx13 * lt,
            z1 + dz13 * lt,
            u1 + du13 * lt,
            v1 + dv13 * lt,
            x2 + dx23 * rt,
            z2 + dz23 * rt,
            u2 + du23 * rt,
            v2 + dv23 * rt
        );
    }
}

Thank you for reading! I hope you find this algorithm useful in your projects.

rasterization graphics optimization 3d algorithms