Code from Data

source code

WASM bytecode is just a byte array that can be validated. If we want to write simple valid bytecode without relying WASM Text Format or even higher level toolchains, attention need to be paid to the details.

Let's start from scratch again to see if we can find simple, straightforward, easy to understand way to construct WASM bytecode from data in JS without using advanced features like types. The goal is not to make a robust general purpose WASM AST generator that covers all the cases but to make it easy to write small and simple WASM modules.

Empty module

Generator

We use mutable let wasm = so that we can update it in future sections.

let wasm = async () => {
    const magic = [0x00, 0x61, 0x73, 0x6d];
    const version = [0x01, 0x00, 0x00, 0x00];
    const bytes = magic.concat(version);
    console.log({ bytes });
    return await WebAssembly.compile((new Uint8Array(bytes)).buffer);
}

Test

{ // begin block namespace
    const module = await wasm(); 
    const env = {}
    const instance = await WebAssembly.instantiate(module, env);
    console.log('empty module', instance);
} // end block namescape

Module sections

The binary encoding of modules is organized into 13 optional sections. We can rewrite our wasm generate to take an array of 13 empty arrays.

const empty_data = [
    [], // 0 custom section
    [], // 1 type section
    [], // 2 import section
    [], // 3 function section 
    [], // 4 table section
    [], // 5 memory section
    [], // 6 global section
    [], // 7 export section
    [], // 8 start section
    [], // 9 element section
    [], // 10 code section
    [], // 11 data section
    [], // 12 data count section
];

wasm = async (data = empty_data) => {
    const magic = [0x00, 0x61, 0x73, 0x6d];
    const version = [0x01, 0x00, 0x00, 0x00];
    let bytes = magic.concat(version);
    for(let i = 0; i < data.length; i++ ) {
        bytes = bytes.concat(data[i]);
    }
    console.log({ bytes });
    return await WebAssembly.compile((new Uint8Array(bytes)).buffer);
}

Test

{ // begin block namespace
    const module = await wasm(); 
    const env = {}
    const instance = await WebAssembly.instantiate(module, env);
    console.log('empty sections module', instance);
} // end block namescape

Think in arrays

Since WASM is an abstraction on stack machine and linear memory, one of the goals of this exercise of generating WASM bytecode from data is to help programming think in arrays.

Let's see if we can get rid of the noise of variable names or labels so that we focus on the basic concepts of array and index.

Empty function

$ cat empty_func.wat
(func)
$ wat2wasm empty_func.wat
$ hexdump -C empty_func.wasm 
00000000  00 61 73 6d 01 00 00 00  01 04 01 60 00 00 03 02  |.asm.......`....|
00000010  01 00 0a 04 01 02 00 0b                           |........|
00000018
$ 
let section = (i, data) => {
    if(i === 0x01) {
        return [i, 0x04, 0x01, 0x60, 0x00, 0x00];
    }
    if(i === 0x03) {
        return [i, 0x02, 0x01, 0x00];
    }
    if(i === 0x0a) {
        return [i, 0x04, 0x01, 0x02, 0x00, 0x0b];
    }
    return [];
}

wasm = async (data = empty_data) => {
    const magic = [0x00, 0x61, 0x73, 0x6d];
    const version = [0x01, 0x00, 0x00, 0x00];
    let bytes = magic.concat(version);
    for(let i = 0; i < data.length; i++) {
        console.log('section', i, section(i, data));
        bytes = bytes.concat(section(i, data));
    }
    console.log({ bytes });
    return await WebAssembly.compile((new Uint8Array(bytes)).buffer);
}

Test

{ // begin block namespace
    const module = await wasm(); 
    const env = {}
    const instance = await WebAssembly.instantiate(module, env);
    console.log('empty_func', instance);
} // end block namescape

Empty function export

$ cat empty_func_export.wat  
(func (export "f"))
$ hexdump -C empty_func_export.wasm 
00000000  00 61 73 6d 01 00 00 00  01 04 01 60 00 00 03 02  |.asm.......`....|
00000010  01 00 07 05 01 01 66 00  00 0a 04 01 02 00 0b     |......f........|
0000001f

section = (i, data) => {
    if(i === 0x01) {
        return [i, 0x04, 0x01, 0x60, 0x00, 0x00];
    }
    if(i === 0x03) {
        return [i, 0x02, 0x01, 0x00];
    }
    if(i === 0x07) {
        if(data[i].length === 1) {
            count = 0x01;
            const name = data[i][0].split('');
            const total = name.length + 4;
            return [
              i, 
              total, 
              count, 
              name.length, name.map(d => (d.charCodeAt(0))),
              0x00,  
              0x00
            ].flat();
        }
        else {
            return [];
        }
    }
    if(i === 0x0a) {
        return [i, 0x04, 0x01, 0x02, 0x00, 0x0b];
    }
    return [];
}

wasm = async (data = empty_data) => {
    const magic = [0x00, 0x61, 0x73, 0x6d];
    const version = [0x01, 0x00, 0x00, 0x00];
    let bytes = magic.concat(version);
    for(let i = 0; i < data.length; i++) {
        console.log('section', i, section(i, data));
        bytes = bytes.concat(section(i, data));
    }
    console.log({ bytes });
    return await WebAssembly.compile((new Uint8Array(bytes)).buffer);
}

Test

{ // begin block namespace
    const data = [
        [], // 0 custom section
        [], // 1 type section
        [], // 2 import section memory type
        [], // 3 function section 
        [], // 4 table section
        [], // 5 memory section
        [], // 6 global section
        ["f"], // 7 export section
        [], // 8 start section
        [], // 9 element section
        [], // 10 code section
        [], // 11 data section
        [], // 12 data count section
    ];
    const module = await wasm(data); 
    const env = {}
    const instance = await WebAssembly.instantiate(module, env);
    const { exports } = instance;
    console.log('empty_func_export', instance, exports);
} // end block namescape

Import memory

$cat memory.wat 
(memory (import "j" "m") 1)
$ wat2wasm memory.wat
$ hexdump -C memory.wasm
00000000  00 61 73 6d 01 00 00 00  02 08 01 01 6a 01 6d 02  |.asm........j.m.|
00000010  00 01                                             |..|
00000012
section = (i, data) => {
    if(i === 0x01) {
        return [i, 0x04, 0x01, 0x60, 0x00, 0x00];
    }
    if(i === 0x02) {
        return [i, 0x08, 0x01, 0x01, 0x6a, 0x01, 0x6d, 0x02, 0x00, 0x01];
    }
    if(i === 0x03) {
        return [i, 0x02, 0x01, 0x00];
    }
    if(i === 0x07) {
        if(data[i].length === 1) {
            count = 0x01;
            const name = data[i][0].split('');
            const total = name.length + 4;
            return [
              i, 
              total, 
              count, 
              name.length, name.map(d => (d.charCodeAt(0))),
              0x00,  
              0x00
            ].flat();
        }
        else {
            return [];
        }
    }
    if(i === 0x0a) {
        return [i, 0x04, 0x01, 0x02, 0x00, 0x0b];
    }
    return [];
}

Test

{ // begin block namespace
    const module = await wasm();
    const m = new WebAssembly.Memory({ initial: 1, maximum: 1 }); 
    const env = { j: { m }};
    const instance = await WebAssembly.instantiate(module, env);
    console.log('memory', instance);
} // end block namescape

Import memory with a pair of mod and name

Import Section

$ cat memory.wat        
(memory (import "js" "mem") 1 )
$ wat2wasm memory.wat
$ hexdump -C memory.wasm 
00000000  00 61 73 6d 01 00 00 00  02 0b 01 02 6a 73 03 6d  |.asm........js.m|
00000010  65 6d 02 00 01                                    |em...|
00000015
section = (i, data) => {
    if(i === 0x01) {
        return [i, 0x04, 0x01, 0x60, 0x00, 0x00];
    }
    if(i === 0x02) {
        if(data[i].length === 5) {
            const mod = data[i][0].split('');
            const name = data[i][1].split('');
            const type = data[i][2];
            const min = data[i][3];
            const max = data[i][4];
            const total = mod.length + name.length + 6;
            return [
              i, 
              total, 
              0x01, 
              mod.length, mod.map(d => (d.charCodeAt(0))),
              name.length, name.map(d => (d.charCodeAt(0))),
              type, 
              min, 
              max].flat();
        }
        else {
            return [];
        }
    }
    if(i === 0x03) {
        return [i, 0x02, 0x01, 0x00];
    }
    if(i === 0x07) {
        if(data[i].length === 1) {
            count = 0x01;
            const name = data[i][0].split('');
            const total = name.length + 4;
            return [
              i, 
              total, 
              count, 
              name.length, name.map(d => (d.charCodeAt(0))),
              0x00,  
              0x00
            ].flat();
        }
        else {
            return [];
        }
    }
    if(i === 0x0a) {
        return [i, 0x04, 0x01, 0x02, 0x00, 0x0b];
    }
    return [];
}

Test

{ // begin block namespace
    const data = [
        [], // 0 custom section
        [], // 1 type section
        ["js", "mem", 0x02, 0x00, 0x01], // 2 import section memory type
        [], // 3 function section 
        [], // 4 table section
        [], // 5 memory section
        [], // 6 global section
        ["fun"], // 7 export section
        [], // 8 start section
        [], // 9 element section
        [], // 10 code section
        [], // 11 data section
        [], // 12 data count section
    ];
    const module = await wasm(data);
    const mem = new WebAssembly.Memory({ initial: 1, maximum: 1 }); 
    const env = { js: { mem }};
    const instance = await WebAssembly.instantiate(module, env);
    const { exports } = instance;
    console.log('import memory js.mem, export fun()', instance, exports);
    exports.fun();
} // end block namescape

Start section

$ cat start.wat
(module (func) (start 0))
$ hexdump -C start.wasm
00000000  00 61 73 6d 01 00 00 00  01 04 01 60 00 00 03 02  |.asm.......`....|
00000010  01 00 08 01 00 0a 04 01  02 00 0b                 |...........|
0000001b
section = (i, data) => {
    if(i === 0x01) { // type
        return [i, 0x04, 0x01, 0x60, 0x00, 0x00];
    }
    if(i === 0x02) { // import
        if(data[i].length === 5) {
            const mod = data[i][0].split('');
            const name = data[i][1].split('');
            const type = data[i][2];
            const min = data[i][3];
            const max = data[i][4];
            const total = mod.length + name.length + 6;
            return [
              i, 
              total, 
              0x01, 
              mod.length, mod.map(d => (d.charCodeAt(0))),
              name.length, name.map(d => (d.charCodeAt(0))),
              type, 
              min, 
              max].flat();
        }
        else {
            return [];
        }
    }
    if(i === 0x03) { // func
        return [i, 0x02, 0x01, 0x00];
    }
    if(i === 0x07) { // export
        if(data[i].length === 1) {
            count = 0x01;
            const name = data[i][0].split('');
            const total = name.length + 4;
            return [
              i, 
              total, 
              count, 
              name.length, name.map(d => (d.charCodeAt(0))),
              0x00,  
              0x00
            ].flat();
        }
        else {
            return [];
        }
    }
    if(i === 0x08) { // start
        if(data[i].length === 1) {
          return [i, 0x01, data[i][0]];
        }
        else {
            return [];
        }
    }
    if(i === 0x0a) { // code
        return [i, 0x04, 0x01, 0x02, 0x00, 0x0b];
    }
    return [];
}

Test

{ // begin block namespace
    const data = [
        [], // 0 custom section
        [], // 1 type section
        ["js", "mem", 0x02, 0x00, 0x01], // 2 import section memory type
        [], // 3 function section 
        [], // 4 table section
        [], // 5 memory section
        [], // 6 global section
        [], // 7 export section
        [0x00], // 8 start section
        [], // 9 element section
        [], // 10 code section
        [], // 11 data section
        [], // 12 data count section
    ];
    const module = await wasm(data);
    const mem = new WebAssembly.Memory({ initial: 1, maximum: 1 }); 
    const env = { js: { mem }};
    const instance = await WebAssembly.instantiate(module, env);
    console.log('import memory js.mem, with start but no export', instance);
} // end block namescape

A minimum wasm program

Set the first 4 byte of the memory to i32 0x01.


section = (i, data) => {
    if(i === 0x01) { // type
        return [i, 0x04, 0x01, 0x60, 0x00, 0x00];
    }
    if(i === 0x02) { // import
        if(data[i].length === 5) {
            const mod = data[i][0].split('');
            const name = data[i][1].split('');
            const type = data[i][2];
            const min = data[i][3];
            const max = data[i][4];
            const total = mod.length + name.length + 6;
            return [
              i, 
              total, 
              0x01, 
              mod.length, mod.map(d => (d.charCodeAt(0))),
              name.length, name.map(d => (d.charCodeAt(0))),
              type, 
              min, 
              max].flat();
        }
        else {
            return [];
        }
    }
    if(i === 0x03) { // func
        return [i, 0x02, 0x01, 0x00];
    }
    if(i === 0x07) { // export
        if(data[i].length === 1) {
            count = 0x01;
            const name = data[i][0].split('');
            const total = name.length + 4;
            return [
              i, 
              total, 
              count, 
              name.length, name.map(d => (d.charCodeAt(0))),
              0x00,  
              0x00
            ].flat();
        }
        else {
            return [];
        }
    }
    if(i === 0x08) { // start
        if(data[i].length === 1) {
          return [i, 0x01, data[i][0]];
        }
        else {
            return [];
        }
    }
    if(i === 0x0a) { // code
        if(data[i].length > 1 ) {
            const total = data[i].length + 4;
            return [i, total, 0x01, data[i].length + 2, 0x00, data[i], 0x0b].flat();
           
        }
        else {
          return [i, 0x04, 0x01, 0x02, 0x00, 0x0b];
        }
    }
    return [];
}
{
  const canvas = document.getElementById('minimum_canvas');
  const hex = document.getElementById('minimum_hex');
  const width = 0x80; // 128
  const height = 0x80;
  canvas.width = width;
  canvas.height = height;
  canvas.style.border = '1px solid black';
  const ctx = canvas.getContext('2d');
  ctx.fillStyle = '#00000000';
  ctx.fillRect(0, 0, width, height);
  let imageData = ctx.getImageData(0, 0, width, height);
  let x = 0;
  let y = 0;
  let offset = 0;
  const length = imageData.data.length;
  const mem = new WebAssembly.Memory({ initial: 1, maximum: 1 });
  let heap = new Uint8ClampedArray(mem.buffer);
  for (let index = 0; index < length; index++) {
    heap[index] = imageData.data[index];
  };

  const check_boundary = () => {
    if (x < 0) { x = 0 }
    if (x > width - 1) { x = 0 }
    if (y < 0) { y = 0 }
    if (y > height - 1) { y = 0 }
    offset = 4 * (y * width + x);
  }

  const move = (e) => {
    e.preventDefault();
    const rect = canvas.getBoundingClientRect();

    const { changedTouches } = e;
    if (changedTouches && changedTouches[0]) {
        x = Math.floor(changedTouches[0].pageX - canvas.offsetLeft);
        y = Math.floor(changedTouches[0].pageY - canvas.offsetTop);
    }
    else {
        x = Math.floor(e.clientX - rect.left);
        y = Math.floor(e.clientY - rect.top);
    }
    check_boundary();
    hexdump();
  };

  canvas.addEventListener('mousemove', move, false);
  canvas.addEventListener('touchmove', move, false);
  
  const hexdump = () => {
    const { data } = imageData;
    let output = '';
    for (let i = offset; i < offset + 0x100; i += 4) {
        if (i < data.length - 4) {
            for (let j = 3; j > -1; j--) {
                if (data[i + j] < 0x10) {
                    output += `0${data[i + j].toString(16)}`;
                } else {
                    output += `${data[i + j].toString(16)}`;
                }
            }
            if (((i - offset) % 0x20) === 0x1c) {
                output += '\n';
            }
            else {
                output += ' ';
            }
        }
    }
    hex.innerHTML = `
x: 0x${parseInt(x).toString(16)}
y: 0x${parseInt(y).toString(16)}
offset: 0x${offset.toString(16)} 
(mouse point = upper left corner of hexdump)        

(8 x 8) i32 = 256 bytes (1/256 of the total memory of a 64K page)
(0x${0x10000.toString(16)} = 0x${0x100.toString(16)} * 0x${0x100.toString(16)} = ${0x100 * 0x100} )

${output}
`;
};

const canvas_render = () => {
    for (let i = 0; i < length; i++) {
        imageData.data[i] = heap[i];
    }
    ctx.putImageData(imageData, 0, 0);
}

  const code = [
    0x41, 0x00, // address: i32.const 0x00
    0x41, 0x01, // value: i32.const 0x01
    0x36, 0x02, 0x00, // i32.store 
  ];
  
  const data = [
        [], // 0 custom section
        [], // 1 type section
        ["js", "mem", 0x02, 0x00, 0x01], // 2 import section memory type
        [], // 3 function section 
        [], // 4 table section
        [], // 5 memory section
        [], // 6 global section
        [], // 7 export section
        [0x00], // 8 start section
        [], // 9 element section
        code, // 10 code section store 0x01 at the first byte
        [], // 11 data section
        [], // 12 data count section
    ];
    const module = await wasm(data);
    const env = { js: { mem }};
    const instance = await WebAssembly.instantiate(module, env);
    console.log('a minimum wasm program', instance);
    
    canvas_render();
    hexdump();
}