You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mvm-tests/microvium.c

7719 lines
284 KiB
C

// Copyright 2020 Michael Hunter. Part of the Microvium project. Links to full code at https://microvium.com for license details.
/*
* Microvium Bytecode Interpreter
*
* Version: 0.0.21
*
* This file contains the Microvium virtual machine C implementation.
*
* The key functions are mvm_restore() and mvm_call(), which perform the
* initialization and run loop respectively.
*
* I've written Microvium in C because lots of embedded projects for small
* processors are written in pure-C, and so integration for them will be easier.
* Also, there are a surprising number of C++ compilers in the embedded world
* that deviate from the standard, and I don't want to be testing on all of them
* individually.
*
* For the moment, I'm keeping Microvium all in one file for usability. Users
* can treat this file as a black box that contains the VM, and there's only one
* file they need to have built into their project in order to have Microvium
* running. The build process also pulls in the dependent header files, so
* there's only one header file and it's the one that users of Microvium need to
* see. Certain compilers and optimization settings also do a better job when
* related functions are co-located the same compilation unit.
*
* User-facing functions and definitions are all prefixed with `mvm_` to
* namespace them separately from other functions in their project, some of
* which use the prefix `vm_` and some without a prefix. (TODO: this should be
* consolidated)
*/
#include "microvium.h"
#include <ctype.h>
#include <stdlib.h>
#include "math.h"
// See microvium.c for design notes.
#include "stdbool.h"
#include "stdint.h"
#include "assert.h"
#include "string.h"
#include "stdlib.h"
#include "microvium.h"
#include "microvium_port.h"
#include "stdint.h"
#define MVM_BYTECODE_VERSION 6
// Note: MVM_ENGINE_VERSION is at the top of `microvium_internals.h`
// These sections appear in the bytecode in the order they appear in this
// enumeration.
typedef enum mvm_TeBytecodeSection {
/**
* Import Table
*
* List of host function IDs (vm_TsImportTableEntry) which are called by the
* VM. References from the VM to host functions are represented as indexes
* into this table. These IDs are resolved to their corresponding host
* function pointers when a VM is restored.
*/
BCS_IMPORT_TABLE,
/**
* A list of immutable `vm_TsExportTableEntry` that the VM exports, mapping
* export IDs to their corresponding VM Value. Mostly these values will just
* be function pointers.
*/
// TODO: We need to test what happens if we export numbers and objects
BCS_EXPORT_TABLE,
/**
* Short Call Table. Table of vm_TsShortCallTableEntry.
*
* To make the representation of function calls in IL more compact, up to 256
* of the most frequent function calls are listed in this table, including the
* function target and the argument count.
*
* See `LBL_CALL_SHORT`
*/
BCS_SHORT_CALL_TABLE,
/**
* Builtins
*
* Table of `Value`s that need to be directly identifiable by the engine, such
* as the Array prototype.
*
* These are not copied into RAM, they are just constant values like the
* exports, but like other values in ROM they are permitted to hold mutable
* values by pointing (as BytecodeMappedPtr) to the corresponding global
* variable slot.
*
* Note: at one point, I had these as single-byte offsets into the global
* variable space, but this made the assumption that all accessible builtins
* are also mutable, which is probably not true. The new design makes the
* opposite assumption: most builtins will be immutable at runtime (e.g.
* nobody changes the array prototype), so they can be stored in ROM and
* referenced by immutable Value pointers, making them usable but not
* consuming RAM at all. It's the exception rather than the rule that some of
* these may be mutable and require indirection through the global slot table.
*/
BCS_BUILTINS,
/**
* Interned Strings Table
*
* To keep property lookup efficient, Microvium requires that strings used as
* property keys can be compared using pointer equality. This requires that
* there is only one instance of each string (see
* https://en.wikipedia.org/wiki/String_interning). This table is the
* alphabetical listing of all the strings in ROM (or at least, all those
* which are valid property keys). See also TC_REF_INTERNED_STRING.
*
* There may be two string tables: one in ROM and one in RAM. The latter is
* required in general if the program might use arbitrarily-computed strings
* as property keys. For efficiency, the ROM string table is contiguous and
* sorted, to allow for binary searching, while the RAM string table is a
* linked list for efficiency in appending (expected to be used only
* occasionally).
*/
BCS_STRING_TABLE,
/**
* Functions and other immutable data structures.
*
* While the whole bytecode is essentially "ROM", only this ROM section
* contains addressable allocations.
*/
BCS_ROM,
/**
* Globals
*
* One `Value` entry for the initial value of each global variable. The number
* of global variables is determined by the size of this section.
*
* This section will be copied into RAM at startup (restore).
*
* Note: the global slots are used both for global variables and for "handles"
* (these are different to the user-defined handles for referencing VM objects
* from user space). Handles allow ROM allocations to reference RAM
* allocations, even though the ROM can't be updated when the RAM allocation
* moves during a GC collection. A handle is a slot in the "globals" space,
* where the slot itself is pointed to by a ROM value and it points to the
* corresponding RAM value. During a GC cycle, the RAM value may move and the
* handle slot is updated, but the handle slot itself doesn't move. See
* `offsetToDynamicPtr` in `encode-snapshot.ts`.
*
* The handles appear as the *last* global slots, and will generally not be
* referenced by `LOAD_GLOBAL` instructions.
*/
BCS_GLOBALS,
/**
* Heap Section: heap allocations.
*
* This section is copied into RAM when the VM is restored. It becomes the
* initial value of the GC heap. It contains allocations that are mutable
* (like the DATA section) but also subject to garbage collection.
*
* Note: the heap must be at the end, because it is the only part that changes
* size from one snapshot to the next. There is code that depends on this
* being the last section because the size of this section is computed as
* running to the end of the bytecode image.
*/
BCS_HEAP,
BCS_SECTION_COUNT,
} mvm_TeBytecodeSection;
typedef enum mvm_TeBuiltins {
BIN_INTERNED_STRINGS,
BIN_ARRAY_PROTO,
BIN_STR_PROTOTYPE, // If the string "prototype" is interned, this builtin points to it.
BIN_BUILTIN_COUNT
} mvm_TeBuiltins;
// Minimal bytecode is 32 bytes (sizeof(mvm_TsBytecodeHeader) + BCS_SECTION_COUNT*2 + BIN_BUILTIN_COUNT*2)
typedef struct mvm_TsBytecodeHeader {
uint8_t bytecodeVersion; // MVM_BYTECODE_VERSION
uint8_t headerSize;
uint8_t requiredEngineVersion;
uint8_t reserved; // =0
uint16_t bytecodeSize; // Including header
uint16_t crc; // CCITT16 (header and data, of everything after the CRC)
uint32_t requiredFeatureFlags;
/*
Note: the sections are assumed to be in order as per mvm_TeBytecodeSection, so
that the size of a section can be computed as the difference between the
adjacent offsets. The last section runs up until the end of the bytecode.
*/
uint16_t sectionOffsets[BCS_SECTION_COUNT];
} mvm_TsBytecodeHeader;
typedef enum mvm_TeFeatureFlags {
FF_FLOAT_SUPPORT = 0,
} mvm_TeFeatureFlags;
typedef struct vm_TsExportTableEntry {
mvm_VMExportID exportID;
mvm_Value exportValue;
} vm_TsExportTableEntry;
typedef struct vm_TsShortCallTableEntry {
/* Note: the `function` field has been broken up into separate low and high
* bytes, `functionL` and `functionH` respectively, for alignment purposes,
* since this is a 3-byte structure occuring in a packed table.
*
* `functionL` and `functionH` together make an `mvm_Value` which should be a
* callable value (a pointer to a `TsBytecodeFunc`, `TsHostFunc`, or
* `TsClosure`). TODO: I don't think this currently works, and I'm not even
* sure how we would test it. */
uint8_t functionL;
uint8_t functionH;
uint8_t argCount;
} vm_TsShortCallTableEntry;
/*
Note: the instruction set documentation is in
`microvium/doc/internals/instruction-set`
Microvium categorizes operations into groups based on common features. The first
nibble of an instruction is its vm_TeOpcode. This is followed by 4 bits which
can either be interpreted as a data parameter or as another opcode (e.g.
vm_TeOpcodeEx1). I call the first nibble the "primary opcode" and the second
nibble is the "secondary opcode".
There are a number of possible secondary opcodes, and each group has common
preparation logic across the group. Preparation logic means the code that runs
before the operation. For example, many operations require popping a value off
the stack before operating on the value. The VM implementation is more compact
if the pop code is common to all instructions that do the pop.
Operations can have different "follow through" logic grouped arbitrarily, since
the implementation of all instructions requires a "jump", those that have common
follow through logic simply jump to the same follow through without additional
cost, which eventually lands up back at the loop start. So the instruction
grouping does not need to cater for follow through logic, only preparation
logic.
To keep operation commonality as seamlessly as possible, the VM implementation
use 16-bit "registers", which have overloaded meaning depending on the context:
- `reg1`
- Initially holds the zero-extended 4-bit secondary nibble
- Operations that load an 8- or 16-bit literal will overwrite `reg1` with
the literal.
- "Pure" operations use reg1 as the first popped operand (none of the pure
operations have an embedded literal). "Pure" are what I'm calling
operations whose entire effect is to pop some operands off the stack,
operate on them, and push a result back onto the stack. For example,
`ADD`.
- `reg1` is also used as the "result" value for the common push-result tail
logic
- `reg2`
- used as the second popped value of binary operations
- used as the value to store, store-like operations
- `reg3`
- can be used arbitrarily by operations and does not have a common meaning
Additionally, the number operations have variations that work on 32 or 64 bit
values. These have their own local/ephemeral registers:
- `reg1I`: the value of the reg1 register unpacked to a `uint32_t`
- `reg2I`: the value of the reg2 register unpacked to a `uint32_t`
- `reg1F`: the value of the reg1 register unpacked to a `double`
- `reg2F`: the value of the reg2 register unpacked to a `double`
Operation groups and their corresponding preparation logic
- vm_TeOpcodeEx1:
- The prep does not read a literal (all these instructions are single-byte).
- The prep pops 0, 1, or 2 values from the stack depending on the
instruction range
- vm_TeOpcodeEx2:
- Prep reads 8-bit literal into reg1
- Two separate instruction ranges specify whether to sign extend or not.
- Two instruction ranges specify whether the prep will also pop an arg into
reg2.
- vm_TeOpcodeEx3:
- Prep reads a 16-bit value from byte stream into reg1. This can be
interpreted as either signed or unsigned by the particular instruction.
- A sub-range within the instruction specifies whether an argument is popped
from the stack.
- (Edit: there are violations of this pattern because I ran out space in
vm_TeOpcodeEx1)
- vm_TeOpcodeEx4:
- Not really any common logic. Just a bucket of miscellaneous instructions.
- vm_TeNumberOp:
- These are all dual-implementation instructions which have both 32 and 64
bit implementations.
- Prep pops one or two values off the stack and reads them into reg1 and
reg2 respectively. The choice of 1 or 2 depends on the sub-range. If
popping one value, the second is left as zero.
- Prep unpacks to either int32 or float64 depending on the corresponding
data types.
- The operations can dispatch to a different tail/follow through routine
depending on whether they overflow or not.
- vm_TeBitwiseOp:
- These operations all operate on 32-bit integers and produce 32-bit integer
results.
- Prep pops one or two values off the stack and reads them into reg1 and
reg2 respectively. The choice of 1 or 2 depends on the sub-range. If
popping one value, the second is left as zero.
- Prep unpacks reg1 and reg2 to int32
Follow-through/tail routines:
- Push float (reg1F)
- Push int32 (reg1I)
- Push 16-bit result (reg1)
*/
// TODO: I think this instruction set needs an overhaul. The categorization has
// become chaotic and not that efficient.
// TODO: If we wanted to make space in the primary opcode range, we could remove
// `VM_OP_LOAD_ARG_1` and just leave `VM_OP2_LOAD_ARG_2`, since static analysis
// should be able to convert many instances of `LoadArg` into `LoadVar`
// 4-bit enum
typedef enum vm_TeOpcode {
VM_OP_LOAD_SMALL_LITERAL = 0x0, // (+ 4-bit vm_TeSmallLiteralValue)
VM_OP_LOAD_VAR_1 = 0x1, // (+ 4-bit variable index relative to stack pointer)
VM_OP_LOAD_SCOPED_1 = 0x2, // (+ 4-bit scoped variable index)
VM_OP_LOAD_ARG_1 = 0x3, // (+ 4-bit arg index)
VM_OP_CALL_1 = 0x4, // (+ 4-bit index into short-call table)
VM_OP_FIXED_ARRAY_NEW_1 = 0x5, // (+ 4-bit length)
VM_OP_EXTENDED_1 = 0x6, // (+ 4-bit vm_TeOpcodeEx1)
VM_OP_EXTENDED_2 = 0x7, // (+ 4-bit vm_TeOpcodeEx2)
VM_OP_EXTENDED_3 = 0x8, // (+ 4-bit vm_TeOpcodeEx3)
VM_OP_CALL_5 = 0x9, // (+ 4-bit arg count)
VM_OP_DIVIDER_1, // <-- ops after this point pop at least one argument (reg2)
VM_OP_STORE_VAR_1 = 0xA, // (+ 4-bit variable index relative to stack pointer)
VM_OP_STORE_SCOPED_1 = 0xB, // (+ 4-bit scoped variable index)
VM_OP_ARRAY_GET_1 = 0xC, // (+ 4-bit item index)
VM_OP_ARRAY_SET_1 = 0xD, // (+ 4-bit item index)
VM_OP_NUM_OP = 0xE, // (+ 4-bit vm_TeNumberOp)
VM_OP_BIT_OP = 0xF, // (+ 4-bit vm_TeBitwiseOp)
VM_OP_END
} vm_TeOpcode;
typedef enum vm_TeOpcodeEx1 {
VM_OP1_RETURN = 0x0,
VM_OP1_THROW = 0x1,
// (target) -> TsClosure
VM_OP1_CLOSURE_NEW = 0x2,
// (TsClass, ...args) -> object
VM_OP1_NEW = 0x3, // (+ 8-bit unsigned arg count. Target is dynamic)
// (state, type) -> TsVirtual
VM_OP1_RESERVED_VIRTUAL_NEW = 0x4, // For future use for creating TsVirtual
VM_OP1_SCOPE_PUSH = 0x5, // (+ 8-bit variable count)
// (value) -> mvm_TeType
VM_OP1_TYPE_CODE_OF = 0x6, // More efficient than VM_OP1_TYPEOF
VM_OP1_POP = 0x7, // Pop one item
VM_OP1_TYPEOF = 0x8,
VM_OP1_OBJECT_NEW = 0x9,
// boolean -> boolean
VM_OP1_LOGICAL_NOT = 0xA,
VM_OP1_DIVIDER_1, // <-- ops after this point are treated as having at least 2 stack arguments
// (object, prop) -> any
VM_OP1_OBJECT_GET_1 = 0xB, // (field ID is dynamic)
// (string, string) -> string
// (number, number) -> number
VM_OP1_ADD = 0xC,
// (any, any) -> boolean
VM_OP1_EQUAL = 0xD,
VM_OP1_NOT_EQUAL = 0xE,
// (object, prop, any) -> void
VM_OP1_OBJECT_SET_1 = 0xF, // (field ID is dynamic)
VM_OP1_END
} vm_TeOpcodeEx1;
// All of these operations are implemented with an 8-bit literal embedded into
// the instruction. The literal is stored in reg1.
typedef enum vm_TeOpcodeEx2 {
VM_OP2_BRANCH_1 = 0x0, // (+ 8-bit signed offset)
VM_OP2_STORE_ARG = 0x1, // (+ 8-bit unsigned arg index)
VM_OP2_STORE_SCOPED_2 = 0x2, // (+ 8-bit unsigned scoped variable index)
VM_OP2_STORE_VAR_2 = 0x3, // (+ 8-bit unsigned variable index relative to stack pointer)
VM_OP2_ARRAY_GET_2_RESERVED = 0x4, // (+ 8-bit unsigned field index)
VM_OP2_ARRAY_SET_2_RESERVED = 0x5, // (+ 8-bit unsigned field index)
VM_OP2_DIVIDER_1, // <-- ops before this point pop from the stack into reg2
VM_OP2_JUMP_1 = 0x6, // (+ 8-bit signed offset)
VM_OP2_CALL_HOST = 0x7, // (+ 8-bit arg count + 8-bit unsigned index into resolvedImports)
VM_OP2_CALL_3 = 0x8, // (+ 8-bit unsigned arg count. Target is dynamic)
VM_OP2_CALL_6 = 0x9, // (+ 8-bit index into short-call table)
VM_OP2_LOAD_SCOPED_2 = 0xA, // (+ 8-bit unsigned scoped variable index)
VM_OP2_LOAD_VAR_2 = 0xB, // (+ 8-bit unsigned variable index relative to stack pointer)
VM_OP2_LOAD_ARG_2 = 0xC, // (+ 8-bit unsigned arg index)
VM_OP2_EXTENDED_4 = 0xD, // (+ 8-bit unsigned vm_TeOpcodeEx4)
VM_OP2_ARRAY_NEW = 0xE, // (+ 8-bit capacity count)
VM_OP2_FIXED_ARRAY_NEW_2 = 0xF, // (+ 8-bit length count)
VM_OP2_END
} vm_TeOpcodeEx2;
// Most of these instructions all have an embedded 16-bit literal value
typedef enum vm_TeOpcodeEx3 {
VM_OP3_POP_N = 0x0, // (+ 8-bit pop count) Pops N items off the stack
VM_OP3_SCOPE_POP = 0x1,
VM_OP3_SCOPE_CLONE = 0x2,
VM_OP3_LONG_JMP_RESERVED = 0x3,
VM_OP3_DIVIDER_1, // <-- ops before this point are miscellaneous and don't automatically get any literal values or stack values
VM_OP3_SET_JMP_RESERVED = 0x6, // (+ 16-bit unsigned bytecode address)
VM_OP3_JUMP_2 = 0x7, // (+ 16-bit signed offset)
VM_OP3_LOAD_LITERAL = 0x8, // (+ 16-bit value)
VM_OP3_LOAD_GLOBAL_3 = 0x9, // (+ 16-bit global variable index)
VM_OP3_LOAD_SCOPED_3 = 0xA, // (+ 16-bit scoped variable index)
VM_OP3_DIVIDER_2, // <-- ops after this point pop an argument into reg2
VM_OP3_BRANCH_2 = 0xB, // (+ 16-bit signed offset)
VM_OP3_STORE_GLOBAL_3 = 0xC, // (+ 16-bit global variable index)
VM_OP3_STORE_SCOPED_3 = 0xD, // (+ 16-bit scoped variable index)
VM_OP3_OBJECT_GET_2 = 0xE, // (+ 16-bit property key)
VM_OP3_OBJECT_SET_2 = 0xF, // (+ 16-bit property key)
VM_OP3_END
} vm_TeOpcodeEx3;
// This is a bucket of less frequently used instructions that didn't fit into the other opcodes
typedef enum vm_TeOpcodeEx4 {
VM_OP4_START_TRY = 0x0, // (+ 16-bit label to the catch block)
VM_OP4_END_TRY = 0x1, // (No literal operands)
VM_OP4_OBJECT_KEYS = 0x2, // (No literal operands)
VM_OP4_UINT8_ARRAY_NEW = 0x3, // (No literal operands)
// (constructor, props) -> TsClass
VM_OP4_CLASS_CREATE = 0x4, // Creates TsClass (does not in instantiate a class)
VM_OP4_TYPE_CODE_OF = 0x5, // Opcode for mvm_typeOf
VM_OP4_END
} vm_TeOpcodeEx4;
// Number operations. These are operations which take one or two arguments from
// the stack and coerce them to numbers. Each of these will have two
// implementations: one for 32-bit int, and one for 64-bit float.
typedef enum vm_TeNumberOp {
// (number, number) -> boolean
VM_NUM_OP_LESS_THAN = 0x0,
VM_NUM_OP_GREATER_THAN = 0x1,
VM_NUM_OP_LESS_EQUAL = 0x2,
VM_NUM_OP_GREATER_EQUAL = 0x3,
// (number, number) -> number
VM_NUM_OP_ADD_NUM = 0x4,
VM_NUM_OP_SUBTRACT = 0x5,
VM_NUM_OP_MULTIPLY = 0x6,
VM_NUM_OP_DIVIDE = 0x7,
VM_NUM_OP_DIVIDE_AND_TRUNC = 0x8, // Represented in JS as `x / y | 0`
VM_NUM_OP_REMAINDER = 0x9,
VM_NUM_OP_POWER = 0xA,
VM_NUM_OP_DIVIDER, // <-- ops after this point are unary
// number -> number
VM_NUM_OP_NEGATE = 0xB,
VM_NUM_OP_UNARY_PLUS = 0xC,
VM_NUM_OP_END
} vm_TeNumberOp;
// Bitwise operations:
typedef enum vm_TeBitwiseOp {
// (bits, bits) -> bits
VM_BIT_OP_SHR_ARITHMETIC = 0x0, // Aka signed shift right. Aka sign-propagating right shift.
VM_BIT_OP_SHR_LOGICAL = 0x1, // Aka unsigned shift right. Aka zero-fill right shift.
VM_BIT_OP_SHL = 0x2, // Shift left
VM_BIT_OP_END_OF_SHIFT_OPERATORS, // <-- ops before this point need their operand in the 0-32 range
VM_BIT_OP_OR = 0x3,
VM_BIT_OP_AND = 0x4,
VM_BIT_OP_XOR = 0x5,
VM_BIT_OP_DIVIDER_2, // <-- ops after this point are unary
// bits -> bits
VM_BIT_OP_NOT = 0x6,
VM_BIT_OP_END
} vm_TeBitwiseOp;
// vm_TeSmallLiteralValue : 4-bit enum
//
// Note: Only up to 16 values are allowed here.
typedef enum vm_TeSmallLiteralValue {
VM_SLV_DELETED = 0x0,
VM_SLV_UNDEFINED = 0x1,
VM_SLV_NULL = 0x2,
VM_SLV_FALSE = 0x3,
VM_SLV_TRUE = 0x4,
VM_SLV_INT_MINUS_1 = 0x5,
VM_SLV_INT_0 = 0x6,
VM_SLV_INT_1 = 0x7,
VM_SLV_INT_2 = 0x8,
VM_SLV_INT_3 = 0x9,
VM_SLV_INT_4 = 0xA,
VM_SLV_INT_5 = 0xB,
} vm_TeSmallLiteralValue;
#define MVM_ENGINE_VERSION 6
#define MVM_EXPECTED_PORT_FILE_VERSION 1
// Note: MVM_BYTECODE_VERSION is at the top of `microvium_bytecode.h`
typedef mvm_VM VM;
typedef mvm_TeError TeError;
/**
* mvm_Value
*
* Hungarian prefix: v
*
* Internally, the name `Value` refers to `mvm_Value`
*
* The Microvium Value type is 16 bits with a 1 or 2 bit discriminator in the
* lowest bits:
*
* - If the lowest bit is `0`, interpret the value as a `ShortPtr`. Note that
* in a snapshot bytecode file, a ShortPtr is measured relative to the
* beginning of the RAM section of the file.
* - If the lowest bits are `11`, interpret the high 14-bits as a signed 14 bit
* integer. The Value is an `VirtualInt14`
* - If the lowest bits are `01`, interpret the high 15-bits as a
* `BytecodeMappedPtr` or a well-known value.
*/
typedef mvm_Value Value;
static inline bool Value_isShortPtr(Value value) { return (value & 1) == 0; }
static inline bool Value_isBytecodeMappedPtrOrWellKnown(Value value) { return (value & 3) == 1; }
static inline bool Value_isVirtualInt14(Value value) { return (value & 3) == 3; }
static inline bool Value_isVirtualUInt12(Value value) { return (value & 0xC003) == 3; }
static inline bool Value_isVirtualUInt8(Value value) { return (value & 0xFC03) == 3; }
/**
* ShortPtr
*
* Hungarian prefix: sp
*
* A ShortPtr is a 16-bit **non-nullable** reference which references into GC
* memory, but not to data memory or bytecode.
*
* Note: To avoid confusion of when to use different kinds of null values,
* ShortPtr should be considered non-nullable. When null is required, use
* VM_VALUE_NULL for consistency, which is not defined as a short pointer.
*
* The GC assumes that anything with a low bit 0 is a non-null pointer into GC
* memory (it does not do null checking on these, since this is a hot loop).
*
* Note: At runtime, pointers _to_ GC memory must always be encoded as
* `ShortPtr` or indirectly through a BytecodeMappedPtr to a global variable.
* This is because the GC assumes (for efficiency reasons) only values with the
* lower bit `0` need to be traced/moved.
*
* A ShortPtr is interpreted one of 3 ways depending on the context:
*
* 1. On 16-bit architectures (when MVM_NATIVE_POINTER_IS_16_BIT is set),
* while the script is running, ShortPtr can be a native pointer, allowing
* for fast access. On other architectures, ShortPtr is encoded as an
* offset from the beginning of the virtual heap.
*
* 2. On non-16-bit architectures (when MVM_NATIVE_POINTER_IS_16_BIT is not
* set), ShortPtr is an offset into the allocation buckets. Access is
* linear time to the number of buckets, but the buckets are compacted
* together during a GC cycle so the number should typically be 1 or low.
*
* 3. In the hibernating GC heap, in the snapshot, ShortPtr is treated as an
* offset into the bytecode image, but always an offset back into the
* GC-RAM section. See `loadPointers`
*
* TODO: Rather than just MVM_NATIVE_POINTER_IS_16_BIT, we could better serve
* small 32-bit devices by having a "page" #define that is added to ShortPtr to
* get the real address. This is because on ARM architectures, the RAM pointers
* are mapped to a higher address space.
*
* A ShortPtr must never exist in a ROM slot, since they need to have a
* consistent representation in all cases, and ROM slots are not visited by
* `loadPointers`. Also short pointers are used iff they point to GC memory,
* which is subject to relocation and therefore cannot be referenced from an
* immutable medium.
*
* If the lowest bit of the `ShortPtr` is 0 (i.e. points to an even boundary),
* then the `ShortPtr` is also a valid `Value`.
*
* NULL short pointers are only allowed in some special circumstances, but are
* mostly not valid.
*/
typedef uint16_t ShortPtr;
/**
* Bytecode-mapped Pointer
*
* If `b` is a BytecodeMappedPtr then `b & 0xFFFE` is treated as an offset into
* the bytecode address space, and its meaning depends on where in the bytecode
* image it points:
*
*
* 1. If the offset points to the BCS_ROM section of bytecode, it is interpreted
* as pointing to that ROM allocation or function.
*
* 2. If the offset points to the BCS_GLOBALS region of the bytecode image, the
* `BytecodeMappedPtr` is treated being a reference to the allocation
* referenced by the corresponding global variable.
*
* This allows ROM Values, such as literal, exports, and builtins, to reference
* RAM allocations. *Note*: for the moment, behavior is not defined if the
* corresponding global has non-pointer contents, such as an Int14 or well-known
* value. In future this may be explicitly allowed.
*
* A `BytecodeMappedPtr` is only a pointer type and is not defined to encode the
* well-known values or null.
*
* Note that in practice, BytecodeMappedPtr is not used anywhere except in
* decoding DynamicPtr.
*
* See `BytecodeMappedPtr_decode_long`
*/
typedef uint16_t BytecodeMappedPtr;
/**
* Dynamic Pointer
*
* Hungarian prefix: `dp`
*
* A `Value` that is a pointer. I.e. its lowest bits are not `11` and it does
* not encode a well-known value. Can be one of:
*
* - `ShortPtr`
* - `BytecodeMappedPtr`
* - `VM_VALUE_NULL`
*
* Note that the only valid representation of null for this point is
* `VM_VALUE_NULL`, not 0.
*/
typedef Value DynamicPtr;
/**
* ROM Pointer
*
* Hungarian prefix: none
*
* A `DynamicPtr` which is known to only point to ROM
*/
typedef Value RomPtr;
/**
* Int14 encoded as a Value
*
* Hungarian prefix: `vi`
*
* A 14-bit signed integer represented in the high 14 bits of a 16-bit Value,
* with the low 2 bits set to the bits `11`, as per the `Value` type.
*/
typedef Value VirtualInt14;
/**
* Hungarian prefix: `lp`
*
* A nullable-pointer that can reference bytecode and RAM in the same address
* space. Not necessarily 16-bit.
*
* The null representation for LongPtr is assumed to be 0.
*
* Values of this type are only managed through macros in the port file, never
* directly, since the exact type depends on the architecture.
*
* See description of MVM_LONG_PTR_TYPE
*/
typedef MVM_LONG_PTR_TYPE LongPtr;
#define READ_FIELD_2(longPtr, structType, fieldName) \
LongPtr_read2_aligned(LongPtr_add(longPtr, OFFSETOF(structType, fieldName)))
#define READ_FIELD_1(longPtr, structType, fieldName) \
LongPtr_read1(LongPtr_add(longPtr, OFFSETOF(structType, fieldName)))
// NOTE: In no way are assertions meant to be present in production. They're
// littered everywhere on the assumption that they consume no overhead.
#if MVM_SAFE_MODE
#define VM_ASSERT(vm, predicate) do { if (!(predicate)) MVM_FATAL_ERROR(vm, MVM_E_ASSERTION_FAILED); } while (false)
#else
#define VM_ASSERT(vm, predicate)
#endif
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
// Offset of field in a struct
#define OFFSETOF(TYPE, ELEMENT) ((uint16_t)(uintptr_t)&(((TYPE *)0)->ELEMENT))
// Maximum size of an allocation (4kB)
#define MAX_ALLOCATION_SIZE 0xFFF
// This is the only valid way of representing NaN
#define VM_IS_NAN(v) ((v) == VM_VALUE_NAN)
// This is the only valid way of representing infinity
#define VM_IS_INF(v) ((v) == VM_VALUE_INF)
// This is the only valid way of representing -infinity
#define VM_IS_NEG_INF(v) ((v) == VM_VALUE_NEG_INF)
// This is the only valid way of representing negative zero
#define VM_IS_NEG_ZERO(v) ((v) == VM_VALUE_NEG_ZERO)
#define VM_NOT_IMPLEMENTED(vm) MVM_FATAL_ERROR(vm, MVM_E_NOT_IMPLEMENTED)
#define VM_RESERVED(vm) MVM_FATAL_ERROR(vm, MVM_E_UNEXPECTED)
// An error corresponding to an internal inconsistency in the VM. Such an error
// cannot be caused by incorrect usage of the VM. In safe mode, this function
// should terminate the application. If not in safe mode, it is assumed that
// this function will never be invoked.
#define VM_UNEXPECTED_INTERNAL_ERROR(vm) (MVM_FATAL_ERROR(vm, MVM_E_UNEXPECTED), -1)
#define VM_VALUE_OF_DYNAMIC(v) ((void*)((TsAllocationHeader*)v + 1))
#define VM_DYNAMIC_TYPE(v) (((TsAllocationHeader*)v)->type)
#define VM_MAX_INT14 0x1FFF
#define VM_MIN_INT14 (-0x2000)
#if MVM_SAFE_MODE
#define VM_EXEC_SAFE_MODE(code) code
#define VM_SAFE_CHECK_NOT_NULL(v) do { if ((v) == NULL) return MVM_E_UNEXPECTED; } while (false)
#define VM_SAFE_CHECK_NOT_NULL_2(v) do { if ((v) == NULL) { MVM_FATAL_ERROR(vm, MVM_E_UNEXPECTED); return NULL; } } while (false)
#define VM_ASSERT_UNREACHABLE(vm) MVM_FATAL_ERROR(vm, MVM_E_UNEXPECTED)
#else
#define VM_EXEC_SAFE_MODE(code)
#define VM_SAFE_CHECK_NOT_NULL(v)
#define VM_SAFE_CHECK_NOT_NULL_2(v)
#define VM_ASSERT_UNREACHABLE(vm)
#endif
#if MVM_DONT_TRUST_BYTECODE || MVM_SAFE_MODE
// TODO: I think I need to do an audit of all the assertions and errors in the code, and make sure they're categorized correctly as bytecode errors or not
#define VM_INVALID_BYTECODE(vm) MVM_FATAL_ERROR(vm, MVM_E_INVALID_BYTECODE)
#define VM_BYTECODE_ASSERT(vm, condition) do { if (!(condition)) VM_INVALID_BYTECODE(vm); } while (false)
#else
#define VM_INVALID_BYTECODE(vm)
#define VM_BYTECODE_ASSERT(vm, condition)
#endif
#ifndef CODE_COVERAGE
/*
* A set of macros for manual code coverage analysis (because the off-the-shelf
* tools appear to be quite expensive). This should be overridden in the port
* file for the unit tests. Each instance of this macro should occur on its own
* line. The unit tests can dumbly scan the source text for instances of this
* macro to establish what code paths _should_ be hit. Each instance should have
* its own unique numeric ID.
*
* If the ID is omitted or a non-integer placeholder (e.g. "x"), the script `npm
* run update-coverage-markers` will fill in a valid ID.
*
* Explicit IDs are used instead of line numbers because a previous analysis
* remains roughly correct even after the code has changed.
*/
#define CODE_COVERAGE(id)
#define CODE_COVERAGE_UNTESTED(id)
#define CODE_COVERAGE_UNIMPLEMENTED(id)
#define CODE_COVERAGE_ERROR_PATH(id)
/**
* In addition to recording code coverage, it's useful to have information about
* the coverage information for table entries. Code and tables can be
* alternative representations of the same thing. For example, a lookup table
* can be represented as a switch statement. However, only the switch statement
* form typically shows up in code coverage analysis. With Microvium coverage
* analysis, tables are covered as well.
*
* If the ID is omitted or a non-integer placeholder (e.g. "x"), the script `npm
* run update-coverage-markers` will fill in a valid ID.
*
* @param indexInTable The runtime expression for the case that is actually hit.
* @param tableSize The size of the table (can be a runtime expression)
* @param id A unique numeric ID to uniquely identify the marker
*/
#define TABLE_COVERAGE(indexInTable, tableSize, id)
#endif
#ifndef MVM_SUPPORT_FLOAT
#define MVM_SUPPORT_FLOAT 1
#endif
#ifndef MVM_PORT_INT32_OVERFLOW_CHECKS
#define MVM_PORT_INT32_OVERFLOW_CHECKS 1
#endif
#ifndef MVM_SAFE_MODE
#define MVM_SAFE_MODE 0
#endif
// TODO: The example port file sets to 1 because we want it enabled in the
// tests. But really we should have a separate test port file.
#ifndef MVM_VERY_EXPENSIVE_MEMORY_CHECKS
#define MVM_VERY_EXPENSIVE_MEMORY_CHECKS 0
#endif
#ifndef MVM_DONT_TRUST_BYTECODE
#define MVM_DONT_TRUST_BYTECODE 0
#endif
#ifndef MVM_SWITCH
#define MVM_SWITCH(tag, upper) switch (tag)
#endif
#ifndef MVM_CASE
#define MVM_CASE(value) case value
#endif
/**
* Type code indicating the type of data.
*
* This enumeration is divided into reference types (TC_REF_) and value types
* (TC_VAL_). Reference type codes are used on allocations, whereas value type
* codes are never used on allocations. The space for the type code in the
* allocation header is 4 bits, so there are up to 16 reference types and these
* must be the first 16 types in the enumeration.
*
* The reference type range is subdivided into containers or non-containers. The
* GC uses this distinction to decide whether the body of the allocation should
* be interpreted as `Value`s (i.e. may contain pointers). To minimize the code,
* either ALL words in a container are `Value`s, or none.
*
* Value types are for the values that can be represented within the 16-bit
* mvm_Value without interpreting it as a pointer.
*/
typedef enum TeTypeCode {
// Note: only type code values in the range 0-15 can be used as the types for
// allocations, since the allocation header allows 4 bits for the type. Types
// 0-8 are non-container types, 0xC-F are container types (9-B reserved).
// Every word in a container must be a `Value`. No words in a non-container
// can be a `Value` (the GC uses this to distinguish whether an allocation may
// contain pointers, and the signature of each word). Note that buffer-like
// types would not count as containers by this definition.
/* --------------------------- Reference types --------------------------- */
// A type used during garbage collection. Allocations of this type have a
// single 16-bit forwarding pointer in the allocation.
TC_REF_TOMBSTONE = 0x0,
TC_REF_INT32 = 0x1, // 32-bit signed integer
TC_REF_FLOAT64 = 0x2, // 64-bit float
/**
* UTF8-encoded string that may or may not be unique.
*
* Note: If a TC_REF_STRING is in bytecode, it is because it encodes a value
* that is illegal as a property index in Microvium (i.e. it encodes an
* integer).
*/
TC_REF_STRING = 0x3,
/**
* A string whose address uniquely identifies its contents, and does not
* encode an integer in the range 0 to 0x1FFF.
*
* To keep property lookup efficient, Microvium requires that strings used as
* property keys can be compared using pointer equality. This requires that
* there is only one instance of each of those strings (see
* https://en.wikipedia.org/wiki/String_interning).
*
* A string with the type code TC_REF_INTERNED_STRING means that it exists in
* one of the interning tables (either the one in ROM or the one in RAM). Not
* all strings are interned, because it would be expensive if every string
* concatenation resulted in a search of the intern table and possibly a new
* entry (imagine if every JSON string landed up in the table!).
*
* In practice we do this:
*
* - All valid non-index property keys in ROM are interned. If a string is in
* ROM but it is not interned, the engine can conclude that it is not a
* valid property key or it is an index.
* - Strings constructed in RAM are only interned when they're used to access
* properties.
*/
TC_REF_INTERNED_STRING = 0x4,
TC_REF_FUNCTION = 0x5, // TsBytecodeFunc
TC_REF_HOST_FUNC = 0x6, // TsHostFunc
TC_REF_UINT8_ARRAY = 0x7, // Byte buffer
TC_REF_SYMBOL = 0x8, // Reserved: Symbol
/* --------------------------- Container types --------------------------- */
TC_REF_DIVIDER_CONTAINER_TYPES, // <--- Marker. Types after or including this point but less than 0x10 are container types
TC_REF_CLASS = 0x9, // TsClass
TC_REF_VIRTUAL = 0xA, // Reserved: TsVirtual
TC_REF_RESERVED_1 = 0xB, // Reserved
TC_REF_PROPERTY_LIST = 0xC, // TsPropertyList - Object represented as linked list of properties
TC_REF_ARRAY = 0xD, // TsArray
TC_REF_FIXED_LENGTH_ARRAY = 0xE, // TsFixedLengthArray
TC_REF_CLOSURE = 0xF, // TsClosure
/* ----------------------------- Value types ----------------------------- */
TC_VAL_INT14 = 0x10,
TC_VAL_UNDEFINED = 0x11,
TC_VAL_NULL = 0x12,
TC_VAL_TRUE = 0x13,
TC_VAL_FALSE = 0x14,
TC_VAL_NAN = 0x15,
TC_VAL_NEG_ZERO = 0x16,
TC_VAL_DELETED = 0x17, // Placeholder for properties and list items that have been deleted or holes in arrays
TC_VAL_STR_LENGTH = 0x18, // The string "length"
TC_VAL_STR_PROTO = 0x19, // The string "__proto__"
TC_END,
} TeTypeCode;
// Note: VM_VALUE_NAN must be used instead of a pointer to a double that has a
// NaN value (i.e. the values must be normalized to use the following table).
// Operations will assume this canonical form.
// Note: the `(... << 2) | 1` is so that these values don't overlap with the
// ShortPtr or BytecodeMappedPtr address spaces.
// Some well-known values
typedef enum vm_TeWellKnownValues {
VM_VALUE_UNDEFINED = (((int)TC_VAL_UNDEFINED - 0x11) << 2) | 1, // = 1
VM_VALUE_NULL = (((int)TC_VAL_NULL - 0x11) << 2) | 1,
VM_VALUE_TRUE = (((int)TC_VAL_TRUE - 0x11) << 2) | 1,
VM_VALUE_FALSE = (((int)TC_VAL_FALSE - 0x11) << 2) | 1,
VM_VALUE_NAN = (((int)TC_VAL_NAN - 0x11) << 2) | 1,
VM_VALUE_NEG_ZERO = (((int)TC_VAL_NEG_ZERO - 0x11) << 2) | 1,
VM_VALUE_DELETED = (((int)TC_VAL_DELETED - 0x11) << 2) | 1,
VM_VALUE_STR_LENGTH = (((int)TC_VAL_STR_LENGTH - 0x11) << 2) | 1,
VM_VALUE_STR_PROTO = (((int)TC_VAL_STR_PROTO - 0x11) << 2) | 1,
VM_VALUE_WELLKNOWN_END,
} vm_TeWellKnownValues;
#define VIRTUAL_INT14_ENCODE(i) ((uint16_t)(((unsigned int)(i) << 2) | 3))
typedef struct TsArray {
/*
* Note: the capacity of the array is the length of the TsFixedLengthArray
* pointed to by dpData, or 0 if dpData is VM_VALUE_NULL. The logical length
* of the array is determined by viLength.
*
* Note: If dpData is not null, it must be a unique pointer (it must be the
* only pointer that points to that allocation)
*
* Note: for arrays in GC memory, their dpData must point to GC memory as well
*
* Note: Values in dpData that are beyond the logical length MUST be filled
* with VM_VALUE_DELETED.
*/
DynamicPtr dpData; // Points to TsFixedLengthArray
VirtualInt14 viLength;
} TsArray;
typedef struct TsFixedLengthArray {
// Note: the length of the fixed-length-array is determined by the allocation header
Value items[1];
} TsFixedLengthArray;
typedef struct vm_TsStack vm_TsStack;
/**
* Used to represent JavaScript objects.
*
* The `proto` pointer points to the prototype of the object.
*
* Properties on object are stored in a linked list of groups. Each group has a
* `next` pointer to the next group (list). When assigning to a new property,
* rather than resizing a group, the VM will just append a new group to the list
* (a group with just the one new property).
*
* Only the `proto` field of the first group of properties in an object is used.
*
* The garbage collector compacts multiple groups into one large one, so it
* doesn't matter that appending a single property requires a whole new group on
* its own or that they have unused proto properties.
*/
typedef struct TsPropertyList {
// Note: if the property list is in GC memory, then dpNext must also point to
// GC memory, but dpProto can point to any memory (e.g. a prototype stored in
// ROM).
// Note: in the serialized form, the next pointer must be null
DynamicPtr dpNext; // TsPropertyList* or VM_VALUE_NULL, containing further appended properties
DynamicPtr dpProto; // Note: the prototype is only meaningful on the first in the list
/*
Followed by N of these pairs to the end of the allocated size:
Value key; // TC_VAL_INT14 or TC_REF_INTERNED_STRING
Value value;
*/
} TsPropertyList;
/**
* A property list with a single property. See TsPropertyList for description.
*/
typedef struct TsPropertyCell /* extends TsPropertyList */ {
TsPropertyList base;
Value key; // TC_VAL_INT14 or TC_REF_INTERNED_STRING
Value value;
} TsPropertyCell;
/**
* A closure is a function-like type that has access to an outer lexical scope
* (other than the globals, which are already accessible by any function).
*
* The `target` must reference a function, either a local function or host (it
* cannot itself be a TsClosure). This will be what is called when the closure
* is called. If it's an invalid type, the error is the same as if calling that
* type directly.
*
* The closure keeps a reference to the outer `scope`. The machine semantics for
* a `CALL` of a `TsClosure` is to set the `scope` register to the scope of the
* `TsClosure`, which is then accessible via the `VM_OP_LOAD_SCOPED_n` and
* `VM_OP_STORE_SCOPED_n` instructions. The `VM_OP1_CLOSURE_NEW` instruction
* automatically captures the current `scope` register in a new `TsClosure`.
*
* Scopes are created using `VM_OP1_SCOPE_PUSH` using the type
* `TC_REF_FIXED_LENGTH_ARRAY`, with one extra slot for the reference to the
* outer scope. An instruction like `VM_OP_LOAD_SCOPED_1` accepts an index into
* the slots in the scope chain (see `vm_findScopedVariable`)
*
* By convention, the caller passes `this` by the first argument. If the closure
* body wants to access the caller's `this` then it just access the first
* argument. If the body wants to access the outer scope's `this` then it parent
* must copy the `this` argument into the closure scope and the child can access
* it via `VM_OP_LOAD_SCOPED_1`, the same as would be done for any closed-over
* parameter.
*/
typedef struct TsClosure {
Value scope;
Value target; // Function type
} TsClosure;
/**
* This type is to provide support for a subset of the ECMAScript classes
* feature. Classes can be instantiated using `new`, but it is illegal to call
* them directly. Similarly, `new` doesn't work on arbitrary function.
*/
typedef struct TsClass {
Value constructorFunc; // Function type
Value staticProps;
} TsClass;
/**
* TsVirtual (at the time of this writing, this is just a placeholder type)
*
* This is a placeholder for an idea to have something like a "low-level proxy"
* type. See my private notes for details (if you have access to them). The
* `type` and `state` fields correspond roughly to the "handler" and "target"
* fields respectively in a normal ES `Proxy`.
*/
typedef struct TsVirtual {
Value state;
Value type;
} TsVirtual;
// External function by index in import table
typedef struct TsHostFunc {
// Note: TC_REF_HOST_FUNC is not a container type, so it's fields are not
// traced by the GC.
//
// Note: most host function reference can be optimized to not require this
// allocation -- they can use VM_OP2_CALL_HOST directly. This allocation is
// only required then the reference to host function is ambiguous or there are
// calls to more than 256 host functions.
uint16_t indexInImportTable;
} TsHostFunc;
typedef struct TsBucket {
uint16_t offsetStart; // The number of bytes in the heap before this bucket
struct TsBucket* prev;
struct TsBucket* next;
/* Note: pEndOfUsedSpace used to be on the VM struct, rather than per-bucket.
* The main reason it's useful to have it on each bucket is in the hot GC-loop
* which needs to check if it's caught up with the write cursor in to-space or
* check if it's hit the end of the bucket. Without this value being in each
* bucket, the calculation to find the end of the bucket is expensive.
*
* Note that for the last bucket, `pEndOfUsedSpace` doubles up as the write
* cursor, since it's only recording the *used* space. The *capacity* of each
* bucket is not recorded, but the capacity of the *last* bucket is recorded
* in `pLastBucketEndCapacity` (on the VM and GC structures). */
uint16_t* pEndOfUsedSpace;
/* ...data */
} TsBucket;
typedef struct TsBreakpoint {
struct TsBreakpoint* next;
uint16_t bytecodeAddress;
} TsBreakpoint;
/*
Minimum size:
- 6 pointers + 1 long pointer + 4 words
- = 24B on 16bit
- = 36B on 32bit.
Maximum size (on 64-bit machine):
- 9 pointers + 4 words
- = 80 bytes on 64-bit machine
See also the unit tests called "minimal-size"
*/
struct mvm_VM {
uint16_t* globals;
LongPtr lpBytecode;
vm_TsStack* stack;
// Last bucket of GC memory
TsBucket* pLastBucket;
// End of the capacity of the last bucket of GC memory
uint16_t* pLastBucketEndCapacity;
// Handles - values to treat as GC roots
mvm_Handle* gc_handles;
void* context;
#if MVM_INCLUDE_DEBUG_CAPABILITY
TsBreakpoint* pBreakpoints;
mvm_TfBreakpointCallback breakpointCallback;
#endif // MVM_INCLUDE_DEBUG_CAPABILITY
uint16_t heapSizeUsedAfterLastGC;
uint16_t stackHighWaterMark;
uint16_t heapHighWaterMark;
#if MVM_VERY_EXPENSIVE_MEMORY_CHECKS
// Amount to shift the heap over during each collection cycle
uint8_t gc_heap_shift;
#endif
#if MVM_SAFE_MODE
// A number that increments at every possible opportunity for a GC cycle
uint8_t gc_potentialCycleNumber;
#endif // MVM_SAFE_MODE
};
typedef struct TsInternedStringCell {
ShortPtr spNext;
Value str;
} TsInternedStringCell;
// Possible values for the `flags` machine register
typedef enum vm_TeActivationFlags {
// Note: these flags start at bit 8 because they use the same word as the argument count
// Flag to indicate if the most-recent CALL operation involved a stack-based
// function target (as opposed to a literal function target). If this is set,
// then the next RETURN instruction will also pop the function reference off
// the stack.
AF_PUSHED_FUNCTION = 1 << 9,
// Flag to indicate that returning from the current frame should return to the host
AF_CALLED_FROM_HOST = 1 << 10
} vm_TeActivationFlags;
/**
* This struct is malloc'd from the host when the host calls into the VM
*/
typedef struct vm_TsRegisters { // 24 B on 32-bit machine
uint16_t* pFrameBase;
uint16_t* pStackPointer;
LongPtr lpProgramCounter;
// Note: I previously used to infer the location of the arguments based on the
// number of values PUSHed by a CALL instruction to preserve the activation
// state (i.e. 3 words). But now that distance is dynamic, so we need and
// explicit register.
Value* pArgs;
uint16_t argCountAndFlags; // Lower 8 bits are argument count, upper 8 bits are vm_TeActivationFlags
Value scope; // Closure scope
uint16_t catchTarget; // 0 if no catch block
#if MVM_SAFE_MODE
// This will be true if the VM is operating on the local variables rather
// than the shared vm_TsRegisters structure.
uint8_t usingCachedRegisters;
uint8_t _reserved; // My compiler seems to pad this out anyway
#endif
} vm_TsRegisters;
/**
* This struct is malloc'd from the host when the host calls into the VM and
* freed when the VM finally returns to the host. This struct embeds both the
* working registers and the call stack in the same allocation since they are
* needed at the same time and it's more efficient to do a single malloc where
* possible.
*/
struct vm_TsStack {
// Allocate registers along with the stack, because these are needed at the same time (i.e. while the VM is active)
vm_TsRegisters reg;
// Note: the stack grows upwards (towards higher addresses)
// ... (stack memory) ...
};
typedef struct TsAllocationHeader {
/* 4 least-significant-bits are the type code (TeTypeCode). Remaining 12 bits
are the allocation size, excluding the size of the header itself, in bytes
(measured in bytes so that we can represent the length of strings exactly).
See also `vm_getAllocationSizeExcludingHeaderFromHeaderWord` */
uint16_t headerData;
} TsAllocationHeader;
typedef struct TsBytecodeFunc {
uint8_t maxStackDepth;
/* Followed by the bytecode bytes */
} TsBytecodeFunc;
typedef struct vm_TsImportTableEntry {
mvm_HostFunctionID hostFunctionID;
/*
Note: I considered having a `paramCount` field in the header since a common
scenario would be copying the arguments into the parameter slots. However,
most parameters are not actually mutated in a function, so the LOAD_ARG
instruction could just be used directly to get the parameter value (if the
optimizer can detect such cases).
*/
} vm_TsImportTableEntry;
#define GC_TRACE_STACK_COUNT 20
typedef struct gc_TsGCCollectionState {
VM* vm;
TsBucket* firstBucket;
TsBucket* lastBucket;
uint16_t* lastBucketEndCapacity;
} gc_TsGCCollectionState;
#define TOMBSTONE_HEADER ((TC_REF_TOMBSTONE << 12) | 2)
// A CALL instruction saves the current registers to the stack. I'm calling this
// the "frame boundary" since it is a fixed-size sequence of words that marks
// the boundary between stack frames. The shape of this saved state is coupled
// to a few different places in the engine, so I'm versioning it here in case I
// need to make changes
#define VM_FRAME_BOUNDARY_VERSION 2
// The number of words between one call stack frame and the next (i.e. the
// number of saved registers during a CALL)
#define VM_FRAME_BOUNDARY_SAVE_SIZE_WORDS 4
static inline mvm_HostFunctionID vm_getHostFunctionId(VM*vm, uint16_t hostFunctionIndex);
static TeError vm_createStackAndRegisters(VM* vm);
static TeError vm_requireStackSpace(VM* vm, uint16_t* pStackPointer, uint16_t sizeRequiredInWords);
static Value vm_convertToString(VM* vm, Value value);
static Value vm_concat(VM* vm, Value* left, Value* right);
static TeTypeCode deepTypeOf(VM* vm, Value value);
static bool vm_isString(VM* vm, Value value);
static int32_t vm_readInt32(VM* vm, TeTypeCode type, Value value);
static TeError vm_resolveExport(VM* vm, mvm_VMExportID id, Value* result);
static inline mvm_TfHostFunction* vm_getResolvedImports(VM* vm);
static void gc_createNextBucket(VM* vm, uint16_t bucketSize, uint16_t minBucketSize);
static void* gc_allocateWithHeader(VM* vm, uint16_t sizeBytes, TeTypeCode typeCode);
static void gc_freeGCMemory(VM* vm);
static Value vm_allocString(VM* vm, size_t sizeBytes, void** data);
static TeError getProperty(VM* vm, Value* pObjectValue, Value* pPropertyName, Value* out_propertyValue);
static TeError setProperty(VM* vm, Value* pOperands);
static TeError toPropertyName(VM* vm, Value* value);
static void toInternedString(VM* vm, Value* pValue);
static uint16_t vm_stringSizeUtf8(VM* vm, Value str);
static bool vm_ramStringIsNonNegativeInteger(VM* vm, Value str);
static TeError toInt32Internal(mvm_VM* vm, mvm_Value value, int32_t* out_result);
static inline uint16_t vm_getAllocationSizeExcludingHeaderFromHeaderWord(uint16_t headerWord);
static inline LongPtr LongPtr_add(LongPtr lp, int16_t offset);
static inline uint16_t LongPtr_read2_aligned(LongPtr lp);
static inline uint16_t LongPtr_read2_unaligned(LongPtr lp);
static void memcpy_long(void* target, LongPtr source, size_t size);
static void loadPointers(VM* vm, uint8_t* heapStart);
static inline ShortPtr ShortPtr_encode(VM* vm, void* ptr);
static inline uint8_t LongPtr_read1(LongPtr lp);
static LongPtr DynamicPtr_decode_long(VM* vm, DynamicPtr ptr);
static inline int16_t LongPtr_sub(LongPtr lp1, LongPtr lp2);
static inline uint16_t readAllocationHeaderWord(void* pAllocation);
static inline uint16_t readAllocationHeaderWord_long(LongPtr pAllocation);
static inline void* gc_allocateWithConstantHeader(VM* vm, uint16_t header, uint16_t sizeIncludingHeader);
static inline uint16_t vm_makeHeaderWord(VM* vm, TeTypeCode tc, uint16_t size);
static int memcmp_long(LongPtr p1, LongPtr p2, size_t size);
static LongPtr getBytecodeSection(VM* vm, mvm_TeBytecodeSection id, LongPtr* out_end);
static inline void* LongPtr_truncate(LongPtr lp);
static inline LongPtr LongPtr_new(void* p);
static inline uint16_t* getBottomOfStack(vm_TsStack* stack);
static inline uint16_t* getTopOfStackSpace(vm_TsStack* stack);
static inline void* getBucketDataBegin(TsBucket* bucket);
static uint16_t getBucketOffsetEnd(TsBucket* bucket);
static uint16_t getSectionSize(VM* vm, mvm_TeBytecodeSection section);
static Value vm_intToStr(VM* vm, int32_t i);
static Value vm_newStringFromCStrNT(VM* vm, const char* s);
static TeError vm_validatePortFileMacros(MVM_LONG_PTR_TYPE lpBytecode, mvm_TsBytecodeHeader* pHeader);
static LongPtr vm_toStringUtf8_long(VM* vm, Value value, size_t* out_sizeBytes);
static LongPtr vm_findScopedVariable(VM* vm, uint16_t index);
static Value vm_cloneFixedLengthArray(VM* vm, Value* pArr);
static Value vm_safePop(VM* vm, Value* pStackPointerAfterDecr);
static LongPtr vm_getStringData(VM* vm, Value value);
static inline VirtualInt14 VirtualInt14_encode(VM* vm, int16_t i);
static inline TeTypeCode vm_getTypeCodeFromHeaderWord(uint16_t headerWord);
static bool DynamicPtr_isRomPtr(VM* vm, DynamicPtr dp);
static inline void vm_checkValueAccess(VM* vm, uint8_t potentialCycleNumber);
static inline uint16_t vm_getAllocationSize(void* pAllocation);
static inline uint16_t vm_getAllocationSize_long(LongPtr lpAllocation);
static inline mvm_TeBytecodeSection vm_sectionAfter(VM* vm, mvm_TeBytecodeSection section);
static void* ShortPtr_decode(VM* vm, ShortPtr shortPtr);
static TeError vm_newError(VM* vm, TeError err);
static void* vm_malloc(VM* vm, size_t size);
static void vm_free(VM* vm, void* ptr);
static inline uint16_t* getTopOfStackSpace(vm_TsStack* stack);
static inline Value* getHandleTargetOrNull(VM* vm, Value value);
static TeError vm_objectKeys(VM* vm, Value* pObject);
static mvm_TeError vm_uint8ArrayNew(VM* vm, Value* slot);
static Value getBuiltin(VM* vm, mvm_TeBuiltins builtinID);
#if MVM_SAFE_MODE
static inline uint16_t vm_getResolvedImportCount(VM* vm);
#endif // MVM_SAFE_MODE
static const Value smallLiterals[] = {
/* VM_SLV_UNDEFINED */ VM_VALUE_DELETED,
/* VM_SLV_UNDEFINED */ VM_VALUE_UNDEFINED,
/* VM_SLV_NULL */ VM_VALUE_NULL,
/* VM_SLV_FALSE */ VM_VALUE_FALSE,
/* VM_SLV_TRUE */ VM_VALUE_TRUE,
/* VM_SLV_INT_MINUS_1 */ VIRTUAL_INT14_ENCODE(-1),
/* VM_SLV_INT_0 */ VIRTUAL_INT14_ENCODE(0),
/* VM_SLV_INT_1 */ VIRTUAL_INT14_ENCODE(1),
/* VM_SLV_INT_2 */ VIRTUAL_INT14_ENCODE(2),
/* VM_SLV_INT_3 */ VIRTUAL_INT14_ENCODE(3),
/* VM_SLV_INT_4 */ VIRTUAL_INT14_ENCODE(4),
/* VM_SLV_INT_5 */ VIRTUAL_INT14_ENCODE(5),
};
#define smallLiteralsSize (sizeof smallLiterals / sizeof smallLiterals[0])
static const char PROTO_STR[] = "__proto__";
static const char LENGTH_STR[] = "length";
static const char TYPE_STRINGS[] =
"undefined\0boolean\0number\0string\0function\0object\0symbol\0bigint";
// 0 10 18 25 32 41 48 55
// Character offsets into TYPE_STRINGS
static const uint8_t typeStringOffsetByType[VM_T_END] = {
0 , /* VM_T_UNDEFINED */
41, /* VM_T_NULL */
10, /* VM_T_BOOLEAN */
18, /* VM_T_NUMBER */
25, /* VM_T_STRING */
32, /* VM_T_FUNCTION */
41, /* VM_T_OBJECT */
41, /* VM_T_ARRAY */
41, /* VM_T_UINT8_ARRAY */
32, /* VM_T_CLASS */
48, /* VM_T_SYMBOL */
55, /* VM_T_BIG_INT */
};
// TeTypeCode -> mvm_TeType
static const uint8_t typeByTC[TC_END] = {
VM_T_END, /* TC_REF_TOMBSTONE */
VM_T_NUMBER, /* TC_REF_INT32 */
VM_T_NUMBER, /* TC_REF_FLOAT64 */
VM_T_STRING, /* TC_REF_STRING */
VM_T_STRING, /* TC_REF_INTERNED_STRING */
VM_T_FUNCTION, /* TC_REF_FUNCTION */
VM_T_FUNCTION, /* TC_REF_HOST_FUNC */
VM_T_UINT8_ARRAY, /* TC_REF_UINT8_ARRAY */
VM_T_SYMBOL, /* TC_REF_SYMBOL */
VM_T_CLASS, /* TC_REF_CLASS */
VM_T_END, /* TC_REF_VIRTUAL */
VM_T_END, /* TC_REF_RESERVED_1 */
VM_T_OBJECT, /* TC_REF_PROPERTY_LIST */
VM_T_ARRAY, /* TC_REF_ARRAY */
VM_T_ARRAY, /* TC_REF_FIXED_LENGTH_ARRAY */
VM_T_FUNCTION, /* TC_REF_CLOSURE */
VM_T_NUMBER, /* TC_VAL_INT14 */
VM_T_UNDEFINED, /* TC_VAL_UNDEFINED */
VM_T_NULL, /* TC_VAL_NULL */
VM_T_BOOLEAN, /* TC_VAL_TRUE */
VM_T_BOOLEAN, /* TC_VAL_FALSE */
VM_T_NUMBER, /* TC_VAL_NAN */
VM_T_NUMBER, /* TC_VAL_NEG_ZERO */
VM_T_UNDEFINED, /* TC_VAL_DELETED */
VM_T_STRING, /* TC_VAL_STR_LENGTH */
VM_T_STRING, /* TC_VAL_STR_PROTO */
};
#define GC_ALLOCATE_TYPE(vm, type, typeCode) \
(type*)gc_allocateWithConstantHeader(vm, vm_makeHeaderWord(vm, typeCode, sizeof (type)), 2 + sizeof (type))
#if MVM_SUPPORT_FLOAT
static int32_t mvm_float64ToInt32(MVM_FLOAT64 value);
#endif
// MVM_LOCAL declares a local variable whose value would become invalidated if
// the GC performs a cycle. All access to the local should use MVM_GET_LOCAL AND
// MVM_SET_LOCAL. This only needs to be used for pointer values or values that
// might hold a pointer.
#if MVM_SAFE_MODE
#define MVM_LOCAL(type, varName, initial) type varName ## Value = initial; uint8_t _ ## varName ## PotentialCycleNumber = vm->gc_potentialCycleNumber
#define MVM_GET_LOCAL(varName) (vm_checkValueAccess(vm, _ ## varName ## PotentialCycleNumber), varName ## Value)
#define MVM_SET_LOCAL(varName, value) varName ## Value = value; _ ## varName ## PotentialCycleNumber = vm->gc_potentialCycleNumber
#else
#define MVM_LOCAL(type, varName, initial) type varName = initial
#define MVM_GET_LOCAL(varName) (varName)
#define MVM_SET_LOCAL(varName, value) varName = value
#endif // MVM_SAFE_MODE
// Various things require the registers (vm->stack->reg) to be up to date
#define VM_ASSERT_NOT_USING_CACHED_REGISTERS(vm) \
VM_ASSERT(vm, !vm->stack || !vm->stack->reg.usingCachedRegisters)
/**
* Public API to call into the VM to run the given function with the given
* arguments (also contains the run loop).
*
* Control returns from `mvm_call` either when it hits an error or when it
* executes a RETURN instruction within the called function.
*
* If the return code is MVM_E_UNCAUGHT_EXCEPTION then `out_result` points to the exception.
*/
TeError mvm_call(VM* vm, Value targetFunc, Value* out_result, Value* args, uint8_t argCount) {
/*
Note: when microvium calls the host, only `mvm_call` is on the call stack.
This is for the objective of being lightweight. Each stack frame in an
embedded environment can be quite expensive in terms of memory because of all
the general-purpose registers that need to be preserved.
*/
// -------------------------------- Definitions -----------------------------
#define CACHE_REGISTERS() do { \
VM_ASSERT(vm, reg->usingCachedRegisters == false); \
VM_EXEC_SAFE_MODE(reg->usingCachedRegisters = true;) \
lpProgramCounter = reg->lpProgramCounter; \
pFrameBase = reg->pFrameBase; \
pStackPointer = reg->pStackPointer; \
} while (false)
#define FLUSH_REGISTER_CACHE() do { \
VM_ASSERT(vm, reg->usingCachedRegisters == true); \
VM_EXEC_SAFE_MODE(reg->usingCachedRegisters = false;) \
reg->lpProgramCounter = lpProgramCounter; \
reg->pFrameBase = pFrameBase; \
reg->pStackPointer = pStackPointer; \
} while (false)
#define READ_PGM_1(target) do { \
VM_ASSERT(vm, reg->usingCachedRegisters == true); \
target = LongPtr_read1(lpProgramCounter);\
lpProgramCounter = LongPtr_add(lpProgramCounter, 1); \
} while (false)
#define READ_PGM_2(target) do { \
VM_ASSERT(vm, reg->usingCachedRegisters == true); \
target = LongPtr_read2_unaligned(lpProgramCounter); \
lpProgramCounter = LongPtr_add(lpProgramCounter, 2); \
} while (false)
#define PUSH(v) do { \
VM_ASSERT(vm, reg->usingCachedRegisters == true); \
VM_ASSERT(vm, pStackPointer < getTopOfStackSpace(vm->stack)); \
*pStackPointer = v; \
pStackPointer++; \
} while (false)
#if MVM_SAFE_MODE
#define POP() vm_safePop(vm, --pStackPointer)
#else
#define POP() (*(--pStackPointer))
#endif
// Push the current registers onto the call stack
#define PUSH_REGISTERS(lpReturnAddress) do { \
VM_ASSERT(vm, VM_FRAME_BOUNDARY_VERSION == 2); \
PUSH((uint16_t)(uintptr_t)pStackPointer - (uint16_t)(uintptr_t)pFrameBase); \
PUSH(reg->scope); \
PUSH(reg->argCountAndFlags); \
PUSH((uint16_t)LongPtr_sub(lpReturnAddress, vm->lpBytecode)); \
} while (false)
// Inverse of PUSH_REGISTERS
#define POP_REGISTERS() do { \
VM_ASSERT(vm, VM_FRAME_BOUNDARY_VERSION == 2); \
lpProgramCounter = LongPtr_add(vm->lpBytecode, POP()); \
reg->argCountAndFlags = POP(); \
reg->scope = POP(); \
pStackPointer--; \
pFrameBase = (uint16_t*)((uint8_t*)pStackPointer - *pStackPointer); \
reg->pArgs = pFrameBase - VM_FRAME_BOUNDARY_SAVE_SIZE_WORDS - (uint8_t)reg->argCountAndFlags; \
} while (false)
// Reinterpret reg1 as 8-bit signed
#define SIGN_EXTEND_REG_1() reg1 = (uint16_t)((int16_t)((int8_t)reg1))
#define INSTRUCTION_RESERVED() VM_ASSERT(vm, false)
// ------------------------------ Common Variables --------------------------
VM_SAFE_CHECK_NOT_NULL(vm);
if (argCount) VM_SAFE_CHECK_NOT_NULL(args);
TeError err = MVM_E_SUCCESS;
// These are cached values of `vm->stack->reg`, for quick access. Note: I've
// chosen only the most important registers to be cached here, in the hope
// that the C compiler will promote these eagerly to the CPU registers,
// although it may choose not to.
register uint16_t* pFrameBase;
register uint16_t* pStackPointer;
register LongPtr lpProgramCounter;
// These are general-purpose scratch "registers". Note: probably the compiler
// would be fine at performing register allocation if we didn't have specific
// register variables, but having them explicit forces us to think about what
// state is being used and designing the code to minimize it.
register uint16_t reg1;
register uint16_t reg2;
register uint16_t reg3;
uint16_t* regP1 = 0;
LongPtr regLP1 = 0;
uint16_t* globals;
vm_TsRegisters* reg;
vm_TsRegisters registerValuesAtEntry;
#if MVM_DONT_TRUST_BYTECODE
LongPtr maxProgramCounter;
LongPtr minProgramCounter = getBytecodeSection(vm, BCS_ROM, &maxProgramCounter);
#endif
// Note: these initial values are not actually used, but some compilers give a
// warning if you omit them.
pFrameBase = 0;
pStackPointer = 0;
lpProgramCounter = 0;
reg1 = 0;
reg2 = 0;
reg3 = 0;
// ------------------------------ Initialization ---------------------------
CODE_COVERAGE(4); // Hit
// Create the call stack if it doesn't exist
if (!vm->stack) {
CODE_COVERAGE(230); // Hit
err = vm_createStackAndRegisters(vm);
if (err != MVM_E_SUCCESS) {
return err;
}
} else {
CODE_COVERAGE_UNTESTED(232); // Not hit
}
globals = vm->globals;
reg = &vm->stack->reg;
registerValuesAtEntry = *reg;
// Because we're coming from C-land, any exceptions that happen during
// mvm_call should register as host errors
reg->catchTarget = VM_VALUE_UNDEFINED;
// Copy the state of the VM registers into the logical variables for quick access
CACHE_REGISTERS();
// ---------------------- Push host arguments to the stack ------------------
// 254 is the maximum because we also push the `this` value implicitly
if (argCount > 254) {
CODE_COVERAGE_ERROR_PATH(220); // Not hit
return MVM_E_TOO_MANY_ARGUMENTS;
} else {
CODE_COVERAGE(15); // Hit
}
vm_requireStackSpace(vm, pStackPointer, argCount + 1);
PUSH(VM_VALUE_UNDEFINED); // Push `this` pointer of undefined
TABLE_COVERAGE(argCount ? 1 : 0, 2, 513); // Hit 2/2
reg1 = argCount;
while (reg1--) {
PUSH(*args++);
}
// ---------------------------- Call target function ------------------------
reg1 /* argCountAndFlags */ = (argCount + 1) | AF_CALLED_FROM_HOST; // +1 for the `this` value
reg2 /* target */ = targetFunc;
goto LBL_CALL;
// --------------------------------- Run Loop ------------------------------
// This forms the start of the run loop
//
// Some useful debug watches:
//
// - Program counter: /* pc */ (uint8_t*)lpProgramCounter - (uint8_t*)vm->lpBytecode
// /* pc */ (uint8_t*)vm->stack->reg.lpProgramCounter - (uint8_t*)vm->lpBytecode
//
// - Frame height (in words): /* fh */ (uint16_t*)pStackPointer - (uint16_t*)pFrameBase
// /* fh */ (uint16_t*)vm->stack->reg.pStackPointer - (uint16_t*)vm->stack->reg.pFrameBase
//
// - Frame: /* frame */ (uint16_t*)pFrameBase,10
// /* frame */ (uint16_t*)vm->stack->reg.pFrameBase,10
//
// - Stack height (in words): /* sp */ (uint16_t*)pStackPointer - (uint16_t*)(vm->stack + 1)
// /* sp */ (uint16_t*)vm->stack->reg.pStackPointer - (uint16_t*)(vm->stack + 1)
//
// - Frame base (in words): /* bp */ (uint16_t*)pFrameBase - (uint16_t*)(vm->stack + 1)
// /* bp */ (uint16_t*)vm->stack->reg.pFrameBase - (uint16_t*)(vm->stack + 1)
//
// - Arg count: /* argc */ (uint8_t)vm->stack->reg.argCountAndFlags
// - First 4 arg values: /* args */ vm->stack->reg.pArgs,4
//
// Notes:
//
// - The value of VM_VALUE_UNDEFINED is 0x001
// - If a value is _odd_, interpret it as a bytecode address by dividing by 2
//
LBL_DO_NEXT_INSTRUCTION:
CODE_COVERAGE(59); // Hit
// This is not required for execution but is intended for diagnostics,
// required by mvm_getCurrentAddress.
// TODO: If MVM_INCLUDE_DEBUG_CAPABILITY is not included, maybe this shouldn't be here, and `mvm_getCurrentAddress` should also not be available.
reg->lpProgramCounter = lpProgramCounter;
// Check we're within range
#if MVM_DONT_TRUST_BYTECODE
if ((lpProgramCounter < minProgramCounter) || (lpProgramCounter >= maxProgramCounter)) {
VM_INVALID_BYTECODE(vm);
}
#endif
// Check breakpoints
#if MVM_INCLUDE_DEBUG_CAPABILITY
if (vm->pBreakpoints) {
TsBreakpoint* pBreakpoint = vm->pBreakpoints;
uint16_t currentBytecodeAddress = LongPtr_sub(lpProgramCounter, vm->lpBytecode);
do {
if (pBreakpoint->bytecodeAddress == currentBytecodeAddress) {
FLUSH_REGISTER_CACHE();
mvm_TfBreakpointCallback breakpointCallback = vm->breakpointCallback;
if (breakpointCallback)
breakpointCallback(vm, currentBytecodeAddress);
CACHE_REGISTERS();
break;
}
pBreakpoint = pBreakpoint->next;
} while (pBreakpoint);
}
#endif // MVM_INCLUDE_DEBUG_CAPABILITY
// Instruction bytes are divided into two nibbles
READ_PGM_1(reg3);
reg1 = reg3 & 0xF;
reg3 = reg3 >> 4;
if (reg3 >= VM_OP_DIVIDER_1) {
CODE_COVERAGE(428); // Hit
reg2 = POP();
} else {
CODE_COVERAGE(429); // Hit
}
VM_ASSERT(vm, reg3 < VM_OP_END);
MVM_SWITCH(reg3, (VM_OP_END - 1)) {
/* ------------------------------------------------------------------------- */
/* VM_OP_LOAD_SMALL_LITERAL */
/* Expects: */
/* reg1: small literal ID */
/* ------------------------------------------------------------------------- */
MVM_CASE(VM_OP_LOAD_SMALL_LITERAL): {
CODE_COVERAGE(60); // Hit
TABLE_COVERAGE(reg1, smallLiteralsSize, 448); // Hit 11/12
#if MVM_DONT_TRUST_BYTECODE
if (reg1 >= smallLiteralsSize) {
err = vm_newError(vm, MVM_E_INVALID_BYTECODE);
goto LBL_EXIT;
}
#endif
reg1 = smallLiterals[reg1];
goto LBL_TAIL_POP_0_PUSH_REG1;
}
/* ------------------------------------------------------------------------- */
/* VM_OP_LOAD_VAR_1 */
/* Expects: */
/* reg1: variable index */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP_LOAD_VAR_1):
CODE_COVERAGE(61); // Hit
LBL_OP_LOAD_VAR:
reg1 = pStackPointer[-reg1 - 1];
if (reg1 == VM_VALUE_DELETED) {
err = vm_newError(vm, MVM_E_TDZ_ERROR);
goto LBL_EXIT;
}
goto LBL_TAIL_POP_0_PUSH_REG1;
/* ------------------------------------------------------------------------- */
/* VM_OP_LOAD_SCOPED_1 */
/* Expects: */
/* reg1: variable index */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP_LOAD_SCOPED_1):
CODE_COVERAGE(62); // Hit
LongPtr lpVar;
LBL_OP_LOAD_SCOPED:
lpVar = vm_findScopedVariable(vm, reg1);
reg1 = LongPtr_read2_aligned(lpVar);
goto LBL_TAIL_POP_0_PUSH_REG1;
/* ------------------------------------------------------------------------- */
/* VM_OP_LOAD_ARG_1 */
/* Expects: */
/* reg1: argument index */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP_LOAD_ARG_1):
CODE_COVERAGE(63); // Hit
goto LBL_OP_LOAD_ARG;
/* ------------------------------------------------------------------------- */
/* VM_OP_CALL_1 */
/* Expects: */
/* reg1: index into short-call table */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP_CALL_1): {
CODE_COVERAGE_UNTESTED(66); // Not hit
goto LBL_CALL_SHORT;
}
/* ------------------------------------------------------------------------- */
/* VM_OP_FIXED_ARRAY_NEW_1 */
/* Expects: */
/* reg1: length of new fixed-length-array */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP_FIXED_ARRAY_NEW_1): {
CODE_COVERAGE_UNTESTED(134); // Not hit
goto LBL_FIXED_ARRAY_NEW;
}
/* ------------------------------------------------------------------------- */
/* VM_OP_EXTENDED_1 */
/* Expects: */
/* reg1: vm_TeOpcodeEx1 */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP_EXTENDED_1):
CODE_COVERAGE(69); // Hit
goto LBL_OP_EXTENDED_1;
/* ------------------------------------------------------------------------- */
/* VM_OP_EXTENDED_2 */
/* Expects: */
/* reg1: vm_TeOpcodeEx2 */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP_EXTENDED_2):
CODE_COVERAGE(70); // Hit
goto LBL_OP_EXTENDED_2;
/* ------------------------------------------------------------------------- */
/* VM_OP_EXTENDED_3 */
/* Expects: */
/* reg1: vm_TeOpcodeEx3 */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP_EXTENDED_3):
CODE_COVERAGE(71); // Hit
goto LBL_OP_EXTENDED_3;
/* ------------------------------------------------------------------------- */
/* VM_OP_CALL_5 */
/* Expects: */
/* reg1: argCount */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP_CALL_5): {
CODE_COVERAGE_UNTESTED(72); // Not hit
// Uses 16 bit literal for function offset
READ_PGM_2(reg2);
reg3 /* scope */ = VM_VALUE_UNDEFINED;
goto LBL_CALL_BYTECODE_FUNC;
}
/* ------------------------------------------------------------------------- */
/* VM_OP_STORE_VAR_1 */
/* Expects: */
/* reg1: variable index relative to stack pointer */
/* reg2: value to store */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP_STORE_VAR_1): {
CODE_COVERAGE(73); // Hit
LBL_OP_STORE_VAR:
// Note: the value to store has already been popped off the stack at this
// point. The index 0 refers to the slot currently at the top of the
// stack.
pStackPointer[-reg1 - 1] = reg2;
goto LBL_TAIL_POP_0_PUSH_0;
}
/* ------------------------------------------------------------------------- */
/* VM_OP_STORE_SCOPED_1 */
/* Expects: */
/* reg1: variable index */
/* reg2: value to store */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP_STORE_SCOPED_1): {
CODE_COVERAGE(74); // Hit
LongPtr lpVar;
LBL_OP_STORE_SCOPED:
lpVar = vm_findScopedVariable(vm, reg1);
Value* pVar = (Value*)LongPtr_truncate(lpVar);
// It would be an illegal operation to write to a closure variable stored in ROM
VM_BYTECODE_ASSERT(vm, lpVar == LongPtr_new(pVar));
*pVar = reg2;
goto LBL_TAIL_POP_0_PUSH_0;
}
/* ------------------------------------------------------------------------- */
/* VM_OP_ARRAY_GET_1 */
/* Expects: */
/* reg1: item index (4-bit) */
/* reg2: reference to array */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP_ARRAY_GET_1): {
CODE_COVERAGE_UNTESTED(75); // Not hit
// I think it makes sense for this instruction only to be an optimization for fixed-length arrays
VM_ASSERT(vm, deepTypeOf(vm, reg2) == TC_REF_FIXED_LENGTH_ARRAY);
regLP1 = DynamicPtr_decode_long(vm, reg2);
// These indexes should be compiler-generated, so they should never be out of range
VM_ASSERT(vm, reg1 < (vm_getAllocationSize_long(regLP1) >> 1));
regLP1 = LongPtr_add(regLP1, reg2 << 1);
reg1 = LongPtr_read2_aligned(regLP1);
goto LBL_TAIL_POP_0_PUSH_REG1;
}
/* ------------------------------------------------------------------------- */
/* VM_OP_ARRAY_SET_1 */
/* Expects: */
/* reg1: item index (4-bit) */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP_ARRAY_SET_1): {
CODE_COVERAGE_UNTESTED(76); // Not hit
reg2 = POP(); // array reference
// I think it makes sense for this instruction only to be an optimization for fixed-length arrays
VM_ASSERT(vm, deepTypeOf(vm, reg3) == TC_REF_FIXED_LENGTH_ARRAY);
// We can only write to it if it's in RAM, so it must be a short-pointer
regP1 = (Value*)ShortPtr_decode(vm, reg3);
// These indexes should be compiler-generated, so they should never be out of range
VM_ASSERT(vm, reg1 < (vm_getAllocationSize(regP1) >> 1));
regP1[reg1] = reg2;
goto LBL_TAIL_POP_0_PUSH_0;
}
/* ------------------------------------------------------------------------- */
/* VM_OP_NUM_OP */
/* Expects: */
/* reg1: vm_TeNumberOp */
/* reg2: first popped operand */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP_NUM_OP): {
CODE_COVERAGE(77); // Hit
goto LBL_OP_NUM_OP;
} // End of case VM_OP_NUM_OP
/* ------------------------------------------------------------------------- */
/* VM_OP_BIT_OP */
/* Expects: */
/* reg1: vm_TeBitwiseOp */
/* reg2: first popped operand */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP_BIT_OP): {
CODE_COVERAGE(92); // Hit
goto LBL_OP_BIT_OP;
}
} // End of primary switch
// All cases should loop explicitly back
VM_ASSERT_UNREACHABLE(vm);
/* ------------------------------------------------------------------------- */
/* LBL_OP_LOAD_ARG */
/* Expects: */
/* reg1: argument index */
/* ------------------------------------------------------------------------- */
LBL_OP_LOAD_ARG: {
CODE_COVERAGE(32); // Hit
reg2 /* argCountAndFlags */ = reg->argCountAndFlags;
if (reg1 /* argIndex */ < (uint8_t)reg2 /* argCount */) {
CODE_COVERAGE(64); // Hit
reg1 /* result */ = reg->pArgs[reg1 /* argIndex */];
} else {
CODE_COVERAGE_UNTESTED(65); // Not hit
reg1 = VM_VALUE_UNDEFINED;
}
goto LBL_TAIL_POP_0_PUSH_REG1;
}
/* ------------------------------------------------------------------------- */
/* LBL_CALL_SHORT */
/* Expects: */
/* reg1: index into short-call table */
/* ------------------------------------------------------------------------- */
LBL_CALL_SHORT: {
CODE_COVERAGE_UNTESTED(173); // Not hit
LongPtr lpShortCallTable = getBytecodeSection(vm, BCS_SHORT_CALL_TABLE, NULL);
LongPtr lpShortCallTableEntry = LongPtr_add(lpShortCallTable, reg1 * sizeof (vm_TsShortCallTableEntry));
#if MVM_SAFE_MODE
LongPtr lpShortCallTableEnd;
getBytecodeSection(vm, BCS_SHORT_CALL_TABLE, &lpShortCallTableEnd);
VM_ASSERT(vm, lpShortCallTableEntry < lpShortCallTableEnd);
#endif
reg2 /* target */ = LongPtr_read2_aligned(lpShortCallTableEntry);
lpShortCallTableEntry = LongPtr_add(lpShortCallTableEntry, 2);
// Note: reg1 holds the new argCountAndFlags, but the flags are zero in this situation
reg1 /* argCountAndFlags */ = LongPtr_read1(lpShortCallTableEntry);
reg3 /* scope */ = VM_VALUE_UNDEFINED;
// The high bit of function indicates if this is a call to the host
bool isHostCall = reg2 & 1;
if (isHostCall) {
CODE_COVERAGE_UNTESTED(67); // Not hit
goto LBL_CALL_HOST_COMMON;
} else {
CODE_COVERAGE_UNTESTED(68); // Not hit
reg2 >>= 1;
goto LBL_CALL_BYTECODE_FUNC;
}
} // LBL_CALL_SHORT
/* ------------------------------------------------------------------------- */
/* LBL_OP_BIT_OP */
/* Expects: */
/* reg1: vm_TeBitwiseOp */
/* reg2: first popped operand */
/* ------------------------------------------------------------------------- */
LBL_OP_BIT_OP: {
int32_t reg1I = 0;
int32_t reg2I = 0;
int8_t reg2B = 0;
reg3 = reg1;
// Convert second operand to an int32
reg2I = mvm_toInt32(vm, reg2);
// If it's a binary operator, then we pop a second operand
if (reg3 < VM_BIT_OP_DIVIDER_2) {
CODE_COVERAGE(117); // Hit
reg1 = POP();
reg1I = mvm_toInt32(vm, reg1);
// If we're doing a shift operation, the operand is in the 0-32 range
if (reg3 < VM_BIT_OP_END_OF_SHIFT_OPERATORS) {
reg2B = reg2I & 0x1F;
}
} else {
CODE_COVERAGE(118); // Hit
}
VM_ASSERT(vm, reg3 < VM_BIT_OP_END);
MVM_SWITCH (reg3, (VM_BIT_OP_END - 1)) {
MVM_CASE(VM_BIT_OP_SHR_ARITHMETIC): {
CODE_COVERAGE(93); // Hit
reg1I = reg1I >> reg2B;
break;
}
MVM_CASE(VM_BIT_OP_SHR_LOGICAL): {
CODE_COVERAGE(94); // Hit
// Cast the number to unsigned int so that the C interprets the shift
// as unsigned/logical rather than signed/arithmetic.
reg1I = (int32_t)((uint32_t)reg1I >> reg2B);
#if MVM_SUPPORT_FLOAT && MVM_PORT_INT32_OVERFLOW_CHECKS
// This is a rather annoying edge case if you ask me, since all
// other bitwise operations yield signed int32 results every time.
// If the shift is by exactly zero units, then negative numbers
// become positive and overflow the signed-32 bit type. Since we
// don't have an unsigned 32 bit type, this means they need to be
// extended to floats.
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Bitwise_Operators#Signed_32-bit_integers
if ((reg2B == 0) & (reg1I < 0)) {
FLUSH_REGISTER_CACHE();
reg1 = mvm_newNumber(vm, (MVM_FLOAT64)((uint32_t)reg1I));
CACHE_REGISTERS();
goto LBL_TAIL_POP_0_PUSH_REG1;
}
#endif // MVM_PORT_INT32_OVERFLOW_CHECKS
break;
}
MVM_CASE(VM_BIT_OP_SHL): {
CODE_COVERAGE(95); // Hit
reg1I = reg1I << reg2B;
break;
}
MVM_CASE(VM_BIT_OP_OR): {
CODE_COVERAGE(96); // Hit
reg1I = reg1I | reg2I;
break;
}
MVM_CASE(VM_BIT_OP_AND): {
CODE_COVERAGE(97); // Hit
reg1I = reg1I & reg2I;
break;
}
MVM_CASE(VM_BIT_OP_XOR): {
CODE_COVERAGE(98); // Hit
reg1I = reg1I ^ reg2I;
break;
}
MVM_CASE(VM_BIT_OP_NOT): {
CODE_COVERAGE(99); // Hit
reg1I = ~reg2I;
break;
}
}
CODE_COVERAGE(101); // Hit
// Convert the result from a 32-bit integer
if ((reg1I >= VM_MIN_INT14) && (reg1I <= VM_MAX_INT14)) {
CODE_COVERAGE(34); // Hit
reg1 = VirtualInt14_encode(vm, (uint16_t)reg1I);
} else {
CODE_COVERAGE(35); // Hit
FLUSH_REGISTER_CACHE();
reg1 = mvm_newInt32(vm, reg1I);
CACHE_REGISTERS();
}
goto LBL_TAIL_POP_0_PUSH_REG1;
} // End of LBL_OP_BIT_OP
/* ------------------------------------------------------------------------- */
/* LBL_OP_EXTENDED_1 */
/* Expects: */
/* reg1: vm_TeOpcodeEx1 */
/* ------------------------------------------------------------------------- */
LBL_OP_EXTENDED_1: {
CODE_COVERAGE(102); // Hit
reg3 = reg1;
VM_ASSERT(vm, reg3 <= VM_OP1_END);
MVM_SWITCH (reg3, VM_OP1_END - 1) {
/* ------------------------------------------------------------------------- */
/* VM_OP1_RETURN_x */
/* Expects: */
/* reg1: vm_TeOpcodeEx1 */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP1_RETURN): {
CODE_COVERAGE(107); // Hit
reg1 = POP();
goto LBL_RETURN;
}
MVM_CASE (VM_OP1_THROW): {
CODE_COVERAGE(106); // Hit
reg1 = POP(); // The exception value
// Find the closest catch block
reg2 = reg->catchTarget;
// If none, it's an uncaught exception
if (reg2 == VM_VALUE_UNDEFINED) {
CODE_COVERAGE(208); // Hit
*out_result = reg1;
err = MVM_E_UNCAUGHT_EXCEPTION;
goto LBL_EXIT;
} else {
CODE_COVERAGE(209); // Hit
}
VM_ASSERT(vm, ((intptr_t)reg2 & 1) == 1);
// Unwind the stack. regP1 is the stack pointer address we want to land up at
regP1 = (uint16_t*)(((intptr_t)getBottomOfStack(vm->stack) + (intptr_t)reg2) & ~1);
VM_ASSERT(vm, pStackPointer >= getBottomOfStack(vm->stack));
VM_ASSERT(vm, pStackPointer < getTopOfStackSpace(vm->stack));
while (pFrameBase > regP1) {
CODE_COVERAGE(211); // Hit
// Near the beginning of mvm_call, we set `catchTarget` to undefined
// (and then restore at the end), which should direct exceptions through
// the path of "uncaught exception" above, so no frame here should ever
// be a host frame.
VM_ASSERT(vm, !(reg->argCountAndFlags & AF_CALLED_FROM_HOST));
// In the current frame structure, the size of the preceding frame is
// saved 4 words ahead of the frame base
pStackPointer = pFrameBase;
POP_REGISTERS();
}
pStackPointer = regP1;
// The next catch target is the outer one
reg->catchTarget = pStackPointer[0];
// Jump to the catch block
reg2 = pStackPointer[1];
VM_ASSERT(vm, (reg2 & 1) == 1);
lpProgramCounter = LongPtr_add(vm->lpBytecode, reg2 & ~1);
// Push the exception to the stack for the catch block to use
goto LBL_TAIL_POP_0_PUSH_REG1;
}
/* ------------------------------------------------------------------------- */
/* VM_OP1_CLOSURE_NEW */
/* Expects: */
/* reg3: vm_TeOpcodeEx1 */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP1_CLOSURE_NEW): {
CODE_COVERAGE(599); // Hit
FLUSH_REGISTER_CACHE();
TsClosure* pClosure = gc_allocateWithHeader(vm, sizeof (TsClosure), TC_REF_CLOSURE);
CACHE_REGISTERS();
pClosure->scope = reg->scope; // Capture the current scope
pClosure->target = POP();
reg1 = ShortPtr_encode(vm, pClosure);
goto LBL_TAIL_POP_0_PUSH_REG1;
}
/* ------------------------------------------------------------------------- */
/* VM_OP1_NEW */
/* Expects: */
/* Nothing */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP1_NEW): {
CODE_COVERAGE(347); // Hit
READ_PGM_1(reg1); // arg count
regP1 = &pStackPointer[-reg1 - 1]; // Pointer to class
reg1 /*argCountAndFlags*/ |= AF_PUSHED_FUNCTION;
reg2 /*class*/ = regP1[0];
// Can only `new` classes in Microvium
if (deepTypeOf(vm, reg2) != TC_REF_CLASS) {
err = MVM_E_USING_NEW_ON_NON_CLASS;
goto LBL_EXIT;
}
regLP1 = DynamicPtr_decode_long(vm, reg2);
// Note: using the stack as a temporary store because things can shift
// during a GC collection and we these temporaries to be GC-visible. It's
// safe to trash these particular slots. The regP1[1] slot holds the
// `this` value passed by the caller, which will always be undefined
// because `new` doesn't allows passing a `this`, and `regP1[0]` holds the
// class, which we've already read.
regP1[1] /*props*/ = READ_FIELD_2(regLP1, TsClass, staticProps);
regP1[0] /*func*/ = READ_FIELD_2(regLP1, TsClass, constructorFunc);
// Using the stack just to root this in the GC graph
PUSH(getBuiltin(vm, BIN_STR_PROTOTYPE));
// We've already checked that the target of the `new` operation is a
// class. A class cannot existed without a `prototype` property. If the
// class was created at compile time, the "prototype" string will be
// embedded in the bytecode because the class definition uses it. If the
// class was created at runtime, the "prototype" string will *also* be
// embedded in the bytecode because classes at runtime are only created by
// sequences of instructions that also includes reference to the
// "prototype" string. So either way, the fact that we're at this point in
// the code means that the "prototype" string must exist as a builtin.
VM_ASSERT(vm, pStackPointer[-1] != VM_VALUE_UNDEFINED);
FLUSH_REGISTER_CACHE();
TsPropertyList* pObject = GC_ALLOCATE_TYPE(vm, TsPropertyList, TC_REF_PROPERTY_LIST);
pObject->dpNext = VM_VALUE_NULL;
getProperty(vm, &regP1[1], &pStackPointer[-1], &pObject->dpProto);
TeTypeCode tc = deepTypeOf(vm, pObject->dpProto);
if ((tc != TC_REF_PROPERTY_LIST) && (tc != TC_REF_CLASS) && (tc != TC_REF_ARRAY)) {
pObject->dpProto = VM_VALUE_NULL;
}
CACHE_REGISTERS();
POP(); // BIN_STR_PROTOTYPE
if (err != MVM_E_SUCCESS) goto LBL_EXIT;
// The first argument is the `this` value
regP1[1] = ShortPtr_encode(vm, pObject);
reg2 = regP1[0];
goto LBL_CALL;
}
/* ------------------------------------------------------------------------- */
/* VM_OP1_SCOPE_PUSH */
/* Expects: */
/* Nothing */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP1_SCOPE_PUSH): {
CODE_COVERAGE(605); // Hit
READ_PGM_1(reg1); // Scope variable count
reg2 = (reg1 + 1) * 2; // Scope array size, including 1 slot for parent reference
FLUSH_REGISTER_CACHE();
uint16_t* newScope = gc_allocateWithHeader(vm, reg2, TC_REF_FIXED_LENGTH_ARRAY);
CACHE_REGISTERS();
uint16_t* p = newScope;
*p++ = reg->scope; // Reference to parent
while (reg1--)
*p++ = VM_VALUE_UNDEFINED; // Initial variable values
// Add to the scope chain
reg->scope = ShortPtr_encode(vm, newScope);
goto LBL_TAIL_POP_0_PUSH_0;
}
/* ------------------------------------------------------------------------- */
/* VM_OP1_TYPE_CODE_OF */
/* Expects: */
/* Nothing */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP1_TYPE_CODE_OF): {
CODE_COVERAGE_UNTESTED(607); // Not hit
reg1 = POP();
reg1 = mvm_typeOf(vm, reg1);
goto LBL_TAIL_POP_0_PUSH_REG1;
}
/* ------------------------------------------------------------------------- */
/* VM_OP1_POP */
/* Expects: */
/* Nothing */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP1_POP): {
CODE_COVERAGE(138); // Hit
pStackPointer--;
goto LBL_TAIL_POP_0_PUSH_0;
}
/* ------------------------------------------------------------------------- */
/* VM_OP1_TYPEOF */
/* Expects: */
/* Nothing */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP1_TYPEOF): {
CODE_COVERAGE(167); // Hit
// TODO: This is should really be done using some kind of built-in helper
// function, but we don't support those yet. The trouble with this
// implementation is that it's doing a string allocation every time. Also
// the new string is not an interned string so it's expensive to compare
// `typeof x === y`. Basically this is just a stop-gap.
reg1 = mvm_typeOf(vm, pStackPointer[-1]);
VM_ASSERT(vm, reg1 < sizeof typeStringOffsetByType);
reg1 = typeStringOffsetByType[reg1];
VM_ASSERT(vm, reg1 < sizeof(TYPE_STRINGS) - 1);
const char* str = &TYPE_STRINGS[reg1];
FLUSH_REGISTER_CACHE();
reg1 = vm_newStringFromCStrNT(vm, str);
CACHE_REGISTERS();
goto LBL_TAIL_POP_1_PUSH_REG1;
}
/* ------------------------------------------------------------------------- */
/* VM_OP1_OBJECT_NEW */
/* Expects: */
/* (nothing) */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP1_OBJECT_NEW): {
CODE_COVERAGE(112); // Hit
FLUSH_REGISTER_CACHE();
TsPropertyList* pObject = GC_ALLOCATE_TYPE(vm, TsPropertyList, TC_REF_PROPERTY_LIST);
CACHE_REGISTERS();
reg1 = ShortPtr_encode(vm, pObject);
pObject->dpNext = VM_VALUE_NULL;
pObject->dpProto = VM_VALUE_NULL;
goto LBL_TAIL_POP_0_PUSH_REG1;
}
/* ------------------------------------------------------------------------- */
/* VM_OP1_LOGICAL_NOT */
/* Expects: */
/* (nothing) */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP1_LOGICAL_NOT): {
CODE_COVERAGE(113); // Hit
reg2 = POP(); // value to negate
reg1 = mvm_toBool(vm, reg2) ? VM_VALUE_FALSE : VM_VALUE_TRUE;
goto LBL_TAIL_POP_0_PUSH_REG1;
}
/* ------------------------------------------------------------------------- */
/* VM_OP1_OBJECT_GET_1 */
/* Expects: */
/* reg1: objectValue */
/* reg2: propertyName */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP1_OBJECT_GET_1): {
CODE_COVERAGE(114); // Hit
FLUSH_REGISTER_CACHE();
err = getProperty(vm, pStackPointer - 2, pStackPointer - 1, pStackPointer - 2);
CACHE_REGISTERS();
if (err != MVM_E_SUCCESS) goto LBL_EXIT;
goto LBL_TAIL_POP_1_PUSH_0;
}
/* ------------------------------------------------------------------------- */
/* VM_OP1_ADD */
/* Expects: */
/* reg1: left operand */
/* reg2: right operand */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP1_ADD): {
CODE_COVERAGE(115); // Hit
reg1 = pStackPointer[-2];
reg2 = pStackPointer[-1];
// Special case for adding unsigned 12 bit numbers, for example in most
// loops. 12 bit unsigned addition does not require any overflow checks
if (Value_isVirtualUInt12(reg1) && Value_isVirtualUInt12(reg2)) {
CODE_COVERAGE(116); // Hit
reg1 = reg1 + reg2 - VirtualInt14_encode(vm, 0);
goto LBL_TAIL_POP_2_PUSH_REG1;
} else {
CODE_COVERAGE(119); // Hit
}
if (vm_isString(vm, reg1) || vm_isString(vm, reg2)) {
CODE_COVERAGE(120); // Hit
FLUSH_REGISTER_CACHE();
// Note: the intermediate values are saved back to the stack so that
// they're preserved if there is a GC collection. Even these conversions
// can trigger a GC collection
pStackPointer[-2] = vm_convertToString(vm, pStackPointer[-2]);
pStackPointer[-1] = vm_convertToString(vm, pStackPointer[-1]);
reg1 = vm_concat(vm, &pStackPointer[-2], &pStackPointer[-1]);
CACHE_REGISTERS();
goto LBL_TAIL_POP_2_PUSH_REG1;
} else {
CODE_COVERAGE(121); // Hit
// Interpret like any of the other numeric operations
// TODO: If VM_NUM_OP_ADD_NUM might cause a GC collection, then we shouldn't be popping here
POP();
reg1 = VM_NUM_OP_ADD_NUM;
goto LBL_OP_NUM_OP;
}
}
/* ------------------------------------------------------------------------- */
/* VM_OP1_EQUAL */
/* Expects: */
/* reg1: left operand */
/* reg2: right operand */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP1_EQUAL): {
CODE_COVERAGE(122); // Hit
// TODO: This popping should be done on the egress rather than the ingress
reg2 = POP();
reg1 = POP();
FLUSH_REGISTER_CACHE();
bool eq = mvm_equal(vm, reg1, reg2);
CACHE_REGISTERS();
if (eq) {
CODE_COVERAGE(483); // Hit
reg1 = VM_VALUE_TRUE;
} else {
CODE_COVERAGE(484); // Hit
reg1 = VM_VALUE_FALSE;
}
goto LBL_TAIL_POP_0_PUSH_REG1;
}
/* ------------------------------------------------------------------------- */
/* VM_OP1_NOT_EQUAL */
/* Expects: */
/* Nothing */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP1_NOT_EQUAL): {
reg1 = pStackPointer[-2];
reg2 = pStackPointer[-1];
// TODO: there seem to be so many places where we have to flush the
// register cache, that I'm wondering if it's actually a net benefit. It
// would be worth doing an experiment to see if the code size is smaller
// without the register cache. Also, is it strictly necessary to flush all
// the registers or can we maybe define a lightweight flush that just
// flushes the stack pointer?
FLUSH_REGISTER_CACHE();
bool eq = mvm_equal(vm, reg1, reg2);
CACHE_REGISTERS();
if(eq) {
CODE_COVERAGE(123); // Hit
reg1 = VM_VALUE_FALSE;
} else {
CODE_COVERAGE(485); // Hit
reg1 = VM_VALUE_TRUE;
}
goto LBL_TAIL_POP_2_PUSH_REG1;
}
/* ------------------------------------------------------------------------- */
/* VM_OP1_OBJECT_SET_1 */
/* Expects: */
/* Nothing */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP1_OBJECT_SET_1): {
CODE_COVERAGE(124); // Hit
FLUSH_REGISTER_CACHE();
err = setProperty(vm, pStackPointer - 3);
CACHE_REGISTERS();
if (err != MVM_E_SUCCESS) {
CODE_COVERAGE_UNTESTED(265); // Not hit
goto LBL_EXIT;
} else {
CODE_COVERAGE(322); // Hit
}
goto LBL_TAIL_POP_3_PUSH_0;
}
} // End of VM_OP_EXTENDED_1 switch
// All cases should jump to whatever tail they intend. Nothing should get here
VM_ASSERT_UNREACHABLE(vm);
} // End of LBL_OP_EXTENDED_1
/* ------------------------------------------------------------------------- */
/* VM_OP_NUM_OP */
/* Expects: */
/* reg1: vm_TeNumberOp */
/* reg2: first popped operand */
/* ------------------------------------------------------------------------- */
LBL_OP_NUM_OP: {
CODE_COVERAGE(25); // Hit
int32_t reg1I = 0;
int32_t reg2I = 0;
reg3 = reg1;
// If it's a binary operator, then we pop a second operand
if (reg3 < VM_NUM_OP_DIVIDER) {
CODE_COVERAGE(440); // Hit
reg1 = POP();
if (toInt32Internal(vm, reg1, &reg1I) != MVM_E_SUCCESS) {
CODE_COVERAGE(444); // Hit
#if MVM_SUPPORT_FLOAT
goto LBL_NUM_OP_FLOAT64;
#endif // MVM_SUPPORT_FLOAT
} else {
CODE_COVERAGE(445); // Hit
}
} else {
CODE_COVERAGE(441); // Hit
reg1 = 0;
}
// Convert second operand to a int32 (or the only operand if it's a unary op)
if (toInt32Internal(vm, reg2, &reg2I) != MVM_E_SUCCESS) {
CODE_COVERAGE(442); // Hit
// If we failed to convert to int32, then we need to process the operation as a float
#if MVM_SUPPORT_FLOAT
goto LBL_NUM_OP_FLOAT64;
#endif // MVM_SUPPORT_FLOAT
} else {
CODE_COVERAGE(443); // Hit
}
VM_ASSERT(vm, reg3 < VM_NUM_OP_END);
MVM_SWITCH (reg3, (VM_NUM_OP_END - 1)) {
MVM_CASE(VM_NUM_OP_LESS_THAN): {
CODE_COVERAGE(78); // Hit
reg1 = reg1I < reg2I;
goto LBL_TAIL_PUSH_REG1_BOOL;
}
MVM_CASE(VM_NUM_OP_GREATER_THAN): {
CODE_COVERAGE(79); // Hit
reg1 = reg1I > reg2I;
goto LBL_TAIL_PUSH_REG1_BOOL;
}
MVM_CASE(VM_NUM_OP_LESS_EQUAL): {
CODE_COVERAGE(80); // Hit
reg1 = reg1I <= reg2I;
goto LBL_TAIL_PUSH_REG1_BOOL;
}
MVM_CASE(VM_NUM_OP_GREATER_EQUAL): {
CODE_COVERAGE(81); // Hit
reg1 = reg1I >= reg2I;
goto LBL_TAIL_PUSH_REG1_BOOL;
}
MVM_CASE(VM_NUM_OP_ADD_NUM): {
CODE_COVERAGE(82); // Hit
#if MVM_SUPPORT_FLOAT && MVM_PORT_INT32_OVERFLOW_CHECKS
#if __has_builtin(__builtin_add_overflow)
if (__builtin_add_overflow(reg1I, reg2I, &reg1I)) {
goto LBL_NUM_OP_FLOAT64;
}
#else // No builtin overflow
int32_t result = reg1I + reg2I;
// Check overflow https://blog.regehr.org/archives/1139
if (((reg1I ^ result) & (reg2I ^ result)) < 0) goto LBL_NUM_OP_FLOAT64;
reg1I = result;
#endif // No builtin overflow
#else // No overflow checks
reg1I = reg1I + reg2I;
#endif
break;
}
MVM_CASE(VM_NUM_OP_SUBTRACT): {
CODE_COVERAGE(83); // Hit
#if MVM_SUPPORT_FLOAT && MVM_PORT_INT32_OVERFLOW_CHECKS
#if __has_builtin(__builtin_sub_overflow)
if (__builtin_sub_overflow(reg1I, reg2I, &reg1I)) {
goto LBL_NUM_OP_FLOAT64;
}
#else // No builtin overflow
reg2I = -reg2I;
int32_t result = reg1I + reg2I;
// Check overflow https://blog.regehr.org/archives/1139
if (((reg1I ^ result) & (reg2I ^ result)) < 0) goto LBL_NUM_OP_FLOAT64;
reg1I = result;
#endif // No builtin overflow
#else // No overflow checks
reg1I = reg1I - reg2I;
#endif
break;
}
MVM_CASE(VM_NUM_OP_MULTIPLY): {
CODE_COVERAGE(84); // Hit
#if MVM_SUPPORT_FLOAT && MVM_PORT_INT32_OVERFLOW_CHECKS
#if __has_builtin(__builtin_mul_overflow)
if (__builtin_mul_overflow(reg1I, reg2I, &reg1I)) {
goto LBL_NUM_OP_FLOAT64;
}
#else // No builtin overflow
// There isn't really an efficient way to determine multiplied
// overflow on embedded devices without accessing the hardware
// status registers. The fast shortcut here is to just assume that
// anything more than 14-bit multiplication could overflow a 32-bit
// integer.
if (Value_isVirtualInt14(reg1) && Value_isVirtualInt14(reg2)) {
reg1I = reg1I * reg2I;
} else {
goto LBL_NUM_OP_FLOAT64;
}
#endif // No builtin overflow
#else // No overflow checks
reg1I = reg1I * reg2I;
#endif
break;
}
MVM_CASE(VM_NUM_OP_DIVIDE): {
CODE_COVERAGE(85); // Hit
#if MVM_SUPPORT_FLOAT
// With division, we leave it up to the user to write code that
// performs integer division instead of floating point division, so
// this instruction is always the case where they're doing floating
// point division.
goto LBL_NUM_OP_FLOAT64;
#else // !MVM_SUPPORT_FLOAT
err = vm_newError(vm, MVM_E_OPERATION_REQUIRES_FLOAT_SUPPORT);
goto LBL_EXIT;
#endif
}
MVM_CASE(VM_NUM_OP_DIVIDE_AND_TRUNC): {
CODE_COVERAGE(86); // Hit
if (reg2I == 0) {
reg1I = 0;
break;
}
reg1I = reg1I / reg2I;
break;
}
MVM_CASE(VM_NUM_OP_REMAINDER): {
CODE_COVERAGE(87); // Hit
if (reg2I == 0) {
CODE_COVERAGE(26); // Hit
reg1 = VM_VALUE_NAN;
goto LBL_TAIL_POP_0_PUSH_REG1;
}
CODE_COVERAGE(90); // Hit
reg1I = reg1I % reg2I;
break;
}
MVM_CASE(VM_NUM_OP_POWER): {
CODE_COVERAGE(88); // Hit
#if MVM_SUPPORT_FLOAT
// Maybe in future we can we implement an integer version.
goto LBL_NUM_OP_FLOAT64;
#else // !MVM_SUPPORT_FLOAT
err = vm_newError(vm, MVM_E_OPERATION_REQUIRES_FLOAT_SUPPORT);
goto LBL_EXIT;
#endif
}
MVM_CASE(VM_NUM_OP_NEGATE): {
CODE_COVERAGE(89); // Hit
#if MVM_SUPPORT_FLOAT && MVM_PORT_INT32_OVERFLOW_CHECKS
// Note: Zero negates to negative zero, which is not representable as an int32
if ((reg2I == INT32_MIN) || (reg2I == 0)) goto LBL_NUM_OP_FLOAT64;
#endif
reg1I = -reg2I;
break;
}
MVM_CASE(VM_NUM_OP_UNARY_PLUS): {
reg1I = reg2I;
break;
}
} // End of switch vm_TeNumberOp for int32
// Convert the result from a 32-bit integer
if ((reg1I >= VM_MIN_INT14) && (reg1I <= VM_MAX_INT14)) {
CODE_COVERAGE(103); // Hit
reg1 = VirtualInt14_encode(vm, (uint16_t)reg1I);
} else {
CODE_COVERAGE(104); // Hit
FLUSH_REGISTER_CACHE();
reg1 = mvm_newInt32(vm, reg1I);
CACHE_REGISTERS();
}
goto LBL_TAIL_POP_0_PUSH_REG1;
} // End of case LBL_OP_NUM_OP
/* ------------------------------------------------------------------------- */
/* LBL_OP_EXTENDED_2 */
/* Expects: */
/* reg1: vm_TeOpcodeEx2 */
/* ------------------------------------------------------------------------- */
LBL_OP_EXTENDED_2: {
CODE_COVERAGE(127); // Hit
reg3 = reg1;
// All the ex-2 instructions have an 8-bit parameter. This is stored in
// reg1 for consistency with 4-bit and 16-bit literal modes
READ_PGM_1(reg1);
// Some operations pop an operand off the stack. This goes into reg2
if (reg3 < VM_OP2_DIVIDER_1) {
CODE_COVERAGE(128); // Hit
reg2 = POP();
} else {
CODE_COVERAGE(129); // Hit
}
VM_ASSERT(vm, reg3 < VM_OP2_END);
MVM_SWITCH (reg3, (VM_OP2_END - 1)) {
/* ------------------------------------------------------------------------- */
/* VM_OP2_BRANCH_1 */
/* Expects: */
/* reg1: signed 8-bit offset to branch to, encoded in 16-bit unsigned */
/* reg2: condition to branch on */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP2_BRANCH_1): {
CODE_COVERAGE(130); // Hit
SIGN_EXTEND_REG_1();
goto LBL_BRANCH_COMMON;
}
/* ------------------------------------------------------------------------- */
/* VM_OP2_STORE_ARG */
/* Expects: */
/* reg1: unsigned index of argument in which to store */
/* reg2: value to store */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP2_STORE_ARG): {
CODE_COVERAGE_UNTESTED(131); // Not hit
#if MVM_DONT_TRUST_BYTECODE
// The ability to write to argument slots is intended as an optimization
// feature to elide the parameter variable slots and instead use the
// argument slots directly. But this only works if the optimizer can
// prove that unprovided parameters are never written to (or that all
// parameters are satisfied by arguments). If you don't trust the
// optimizer, it's possible the callee attempts to write to the
// caller-provided argument slots that don't exist.
if (reg1 >= (uint8_t)reg->argCountAndFlags) {
err = vm_newError(vm, MVM_E_INVALID_BYTECODE);
goto LBL_EXIT;
}
#endif
reg->pArgs[reg1] = reg2;
goto LBL_TAIL_POP_0_PUSH_0;
}
/* ------------------------------------------------------------------------- */
/* VM_OP2_STORE_SCOPED_2 */
/* Expects: */
/* reg1: unsigned index of global in which to store */
/* reg2: value to store */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP2_STORE_SCOPED_2): {
CODE_COVERAGE(132); // Hit
goto LBL_OP_STORE_SCOPED;
}
/* ------------------------------------------------------------------------- */
/* VM_OP2_STORE_VAR_2 */
/* Expects: */
/* reg1: unsigned index of variable in which to store, relative to SP */
/* reg2: value to store */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP2_STORE_VAR_2): {
CODE_COVERAGE_UNTESTED(133); // Not hit
goto LBL_OP_STORE_VAR;
}
/* ------------------------------------------------------------------------- */
/* VM_OP2_JUMP_1 */
/* Expects: */
/* reg1: signed 8-bit offset to branch to, encoded in 16-bit unsigned */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP2_JUMP_1): {
CODE_COVERAGE(136); // Hit
SIGN_EXTEND_REG_1();
goto LBL_JUMP_COMMON;
}
/* ------------------------------------------------------------------------- */
/* VM_OP2_CALL_HOST */
/* Expects: */
/* reg1: arg count */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP2_CALL_HOST): {
CODE_COVERAGE_UNTESTED(137); // Not hit
// TODO: Unit tests for the host calling itself etc.
// Put function index into reg2
READ_PGM_1(reg2);
// Note: reg1 is the argCount and also argCountAndFlags, because the flags
// are all zero in this case. In particular, the target is specified as an
// instruction literal, so `AF_PUSHED_FUNCTION` is false.
goto LBL_CALL_HOST_COMMON;
}
/* ------------------------------------------------------------------------- */
/* VM_OP2_CALL_3 */
/* Expects: */
/* reg1: arg count */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP2_CALL_3): {
CODE_COVERAGE(142); // Hit
reg1 /* argCountAndFlags */ |= AF_PUSHED_FUNCTION;
reg2 /* target */ = pStackPointer[-(int16_t)(uint8_t)reg1 - 1]; // The function was pushed before the arguments
goto LBL_CALL;
}
/* ------------------------------------------------------------------------- */
/* VM_OP2_CALL_6 */
/* Expects: */
/* reg1: index into short-call table */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP2_CALL_6): {
CODE_COVERAGE_UNTESTED(145); // Not hit
goto LBL_CALL_SHORT;
}
/* ------------------------------------------------------------------------- */
/* VM_OP2_LOAD_SCOPED_2 */
/* Expects: */
/* reg1: unsigned closure scoped variable index */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP2_LOAD_SCOPED_2): {
CODE_COVERAGE(146); // Hit
goto LBL_OP_LOAD_SCOPED;
}
/* ------------------------------------------------------------------------- */
/* VM_OP2_LOAD_VAR_2 */
/* Expects: */
/* reg1: unsigned variable index relative to stack pointer */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP2_LOAD_VAR_2): {
CODE_COVERAGE_UNTESTED(147); // Not hit
goto LBL_OP_LOAD_VAR;
}
/* ------------------------------------------------------------------------- */
/* VM_OP2_LOAD_ARG_2 */
/* Expects: */
/* reg1: unsigned variable index relative to stack pointer */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP2_LOAD_ARG_2): {
CODE_COVERAGE_UNTESTED(148); // Not hit
VM_NOT_IMPLEMENTED(vm);
err = MVM_E_FATAL_ERROR_MUST_KILL_VM;
goto LBL_EXIT;
}
/* ------------------------------------------------------------------------- */
/* VM_OP2_EXTENDED_4 */
/* Expects: */
/* reg1: The Ex-4 instruction opcode */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP2_EXTENDED_4): {
CODE_COVERAGE(149); // Hit
goto LBL_OP_EXTENDED_4;
}
/* ------------------------------------------------------------------------- */
/* VM_OP2_ARRAY_NEW */
/* reg1: Array capacity */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP2_ARRAY_NEW): {
CODE_COVERAGE(100); // Hit
// Allocation size excluding header
uint16_t capacity = reg1;
TABLE_COVERAGE(capacity ? 1 : 0, 2, 371); // Hit 2/2
FLUSH_REGISTER_CACHE();
MVM_LOCAL(TsArray*, arr, GC_ALLOCATE_TYPE(vm, TsArray, TC_REF_ARRAY));
CACHE_REGISTERS();
reg1 = ShortPtr_encode(vm, MVM_GET_LOCAL(arr));
PUSH(reg1); // We need to push early to avoid the GC collecting it
MVM_GET_LOCAL(arr)->viLength = VirtualInt14_encode(vm, 0);
MVM_GET_LOCAL(arr)->dpData = VM_VALUE_NULL;
if (capacity) {
FLUSH_REGISTER_CACHE();
uint16_t* pData = gc_allocateWithHeader(vm, capacity * 2, TC_REF_FIXED_LENGTH_ARRAY);
CACHE_REGISTERS();
MVM_SET_LOCAL(arr, ShortPtr_decode(vm, pStackPointer[-1])); // arr may have moved during the collection
MVM_GET_LOCAL(arr)->dpData = ShortPtr_encode(vm, pData);
uint16_t* p = pData;
uint16_t n = capacity;
while (n--)
*p++ = VM_VALUE_DELETED;
}
goto LBL_TAIL_POP_0_PUSH_0;
}
/* ------------------------------------------------------------------------- */
/* VM_OP1_FIXED_ARRAY_NEW_2 */
/* Expects: */
/* reg1: Fixed-array length (8-bit) */
/* ------------------------------------------------------------------------- */
MVM_CASE (VM_OP2_FIXED_ARRAY_NEW_2): {
CODE_COVERAGE_UNTESTED(135); // Not hit
goto LBL_FIXED_ARRAY_NEW;
}
} // End of vm_TeOpcodeEx2 switch
// All cases should jump to whatever tail they intend. Nothing should get here
VM_ASSERT_UNREACHABLE(vm);
} // End of LBL_OP_EXTENDED_2
/* ------------------------------------------------------------------------- */
/* LBL_FIXED_ARRAY_NEW */
/* Expects: */
/* reg1: length of fixed-array to create */
/* ------------------------------------------------------------------------- */
LBL_FIXED_ARRAY_NEW: {
FLUSH_REGISTER_CACHE();
uint16_t* arr = gc_allocateWithHeader(vm, reg1 * 2, TC_REF_FIXED_LENGTH_ARRAY);
CACHE_REGISTERS();
uint16_t* p = arr;
// Note: when reading a DELETED value from the array, it will read as
// `undefined`. When fixed-length arrays are used to hold closure values, the
// `DELETED` value can be used to represent the TDZ.
while (reg1--)
*p++ = VM_VALUE_DELETED;
reg1 = ShortPtr_encode(vm, arr);
goto LBL_TAIL_POP_0_PUSH_REG1;
}
/* ------------------------------------------------------------------------- */
/* LBL_OP_EXTENDED_3 */
/* Expects: */
/* reg1: vm_TeOpcodeEx3 */
/* ------------------------------------------------------------------------- */
LBL_OP_EXTENDED_3: {
CODE_COVERAGE(150); // Hit
reg3 = reg1;
// Most Ex-3 instructions have a 16-bit parameter
if (reg3 >= VM_OP3_DIVIDER_1) {
CODE_COVERAGE(603); // Hit
READ_PGM_2(reg1);
} else {
CODE_COVERAGE(606); // Hit
}
if (reg3 >= VM_OP3_DIVIDER_2) {
CODE_COVERAGE(151); // Hit
reg2 = POP();
} else {
CODE_COVERAGE(152); // Hit
}
VM_ASSERT(vm, reg3 < VM_OP3_END);
MVM_SWITCH (reg3, (VM_OP3_END - 1)) {
/* ------------------------------------------------------------------------- */
/* VM_OP3_POP_N */
/* Expects: */
/* Noth