mini_jit

class Brgemm

Public Types

enum class dtype_t : uint32_t

data type

Values:

enumerator fp32
enumerator fp64
enum class error_t : int32_t

error codes

Values:

enumerator success
enumerator err_wrong_dtype
enumerator err_wrong_dimension
enumerator err_row_major_order_not_supported
enumerator err_batch_reduce_size_not_supported
using kernel_t = void (*)(void const *a, void const *b, void *c, int64_t lda, int64_t ldb, int64_t ldc, int64_t br_stride_a, int64_t br_stride_b)

Public Functions

error_t generate(uint32_t m, uint32_t n, uint32_t k, uint32_t br_size, uint32_t trans_a, uint32_t trans_b, uint32_t trans_c, dtype_t dtype)

Generate a kernel for batch-reduce matrix multiplication.

Parameters:
  • m – number of rows in A and C.

  • n – number of columns in B and C.

  • k – number of columns in A and rows in B.

  • br_size – batch-reduce size.

  • trans_a – 0 if A is stored in column-major order, 1 if A is stored in row-major order.

  • trans_b – 0 if B is stored in column-major order, 1 if B is stored in row-major order.

  • trans_c – 0 if C is stored in column-major order, 1 if C is stored in row-major order.

  • dtype – data type of the matrices.

Returns:

error_t::success on success, another error_t value otherwise.

kernel_t get_kernel() const

Get the generated kernel: C += sum_i(A_i * B_i).

Returns:

pointer to the generated kernel.

void write_kernel_to_file(const char *path) const

Writes the current kernel into a file.

Parameters:

path – The file to write the kernel to.

Private Functions

void fill_with_matmuls_no_batch_dim_column_major_fp32(uint32_t m, uint32_t n, uint32_t k)

Fills the kernel with a suitable matmul with no batch size, column major format, and fp32 datatypes.

Parameters:
  • Kernel – The kernel to add instructions too.

  • m – number of rows in A and C.

  • n – number of columns in B and C.

  • k – number of columns in A and rows in B.

void fill_with_matmuls_batch_dim_column_major_fp32(uint32_t m, uint32_t n, uint32_t k, uint32_t br_size)

Fills the kernel with a suitable matmul with no batch size, column major format, and fp32 datatypes.

Parameters:
  • Kernel – The kernel to add instructions too.

  • m – number of rows in A and C.

  • n – number of columns in B and C.

  • k – number of columns in A and rows in B.

  • br_size – number of batch dimensions.

Private Members

kernel_t kernel = nullptr
mini_jit::Kernel native_kernel
class EinsumTree

Public Types

enum class ErrorParse

Values:

enumerator None
enumerator ExpectedLeftBracket
enumerator ExpectedRightBracket
enumerator ExpectedArrow
enumerator ExpectedComma
enumerator ExpectedDimensionList
enumerator NotAllowedToParseAgain
enumerator UndefinedNode
enum class ErrorExecute

Values:

enumerator None
enumerator InvalidRoot
enumerator NotEnoughInputTensors
enumerator TooManyInputTensors
enumerator NullPtrAsInputTensor
enumerator err_wrong_dtype
enumerator err_wrong_dimension
enumerator err_wrong_primitive
enumerator err_wrong_first_touch_primitive
enumerator err_wrong_main_primitive
enumerator err_wrong_last_touch_primitive
enumerator err_execution_type_not_supported
enumerator err_invalid_primitive_configuration
enumerator err_invalid_first_touch_configuration
enumerator err_invalid_main_configuration
enumerator err_invalid_last_touch_configuration
enumerator err_invalid_execution_order
enumerator err_invalid_strides
enumerator err_k_dimension_must_not_be_shared
enumerator err_shared_required_for_parallel_execution
enum class NodeType

Values:

enumerator Leaf
enumerator Contraction
enumerator Transposition

Public Functions

EinsumTree(const std::string &tree_str)
EinsumTree(const std::string &tree_str, const std::vector<int64_t> &sorted_dim_sizes)
~EinsumTree()
void set_sorted_dim_sizes(const std::vector<int64_t> &sorted_dim_sizes)

Set the sorted dime sizes of the input tensors.

Parameters:

sorted_dim_sizes – The sorted dim sizes

ErrorParse parse_tree_no_optimization()

Parses the einsum tree string and builds the tree structure.

Returns:

ErrorParse indicating the result of the parsing operation.

ErrorParse parse_tree()

Parses the einsum tree string, builds the tree structure and optimizes the tree.

Returns:

ErrorParse indicating the result of the parsing operation.

EinsumNode *get_root() const

Returns the root node of the EinsumTree.

Returns:

Pointer to the root EinsumNode.

void optimize(EinsumNode *node)

Optimizes the einsum tree structure.

Parameters:

The – node and its children to optimize.

void conditional_swap(mini_jit::EinsumTree::EinsumNode *node)

Ensures that the ‘m’ dimension is unit stride, by swapping if ‘n’ dimension is unit stride.

Parameters:

node – The node of type tensor contraction.

void reorder_left_node(EinsumNode *node)

Reorders left node of a contraction to ensure the ‘km’ dimensions are at the right. The ‘m’ dimension has unit-stride.

Parameters:

node – The EinsumNode representing the parent child of the contraction.

void reorder_right_node(EinsumNode *node)

Reorders right node of a contraction to ensure the ‘nk’ dimensions are at the right. The ‘k’ dimension has unit-stride.

Parameters:

node – The EinsumNode representing the parent of the contraction.

ErrorExecute execute(const std::vector<void*> &tensors)

Executes the einsum operation defined by the tree.

Parameters:

tensors – A vector of pointers to the input tensors of the leafs.

Returns:

ErrorExecute indicating the result of the execution operation.

Private Functions

EinsumNode *parse_node(size_t &pos, const std::string &str)

Parses a node from the string starting at the given position in the einsum tree. The node can be a leaf, contraction, or transposition node.

Parameters:
  • pos – The position in the string to start parsing from.

  • str – The string containing the einsum tree representation.

Returns:

A pointer to the parsed EinsumNode.

TensorConfig lower_node(const EinsumNode *node)

Lowers the given EinsumNode to a TensorConfig.

Parameters:

node – The EinsumNode to lower.

Returns:

A TensorConfig representing the lowered node.

void get_config_dim_types_and_sizes(const mini_jit::EinsumTree::EinsumNode *node, std::map<int64_t, size_t> &id_map, std::vector<mini_jit::TensorConfig::dim_t> &dim_types, std::vector<int64_t> &dim_sizes, uint32_t &number_of_k)

Retrieves the dimension types and sizes for the given EinsumNode.

Parameters:
  • node – The EinsumNode for which to retrieve the dimension types and sizes.

  • id_map – A map that associates dimension IDs with a fixed index.

  • dim_types – A vector to store the dimension types.

  • dim_sizes – A vector to store the dimension sizes.

  • number_of_k – A reference to store the number of ‘k’ dimensions.

std::vector<int64_t> get_config_strides(const EinsumNode *node, std::map<int64_t, size_t> &id_map)

Retrieves the strides for the given EinsumNode based on the provided dimension ID map.

Parameters:
  • node – The EinsumNode for which to retrieve the strides.

  • id_map – A map that associates dimension IDs with a fixed index.

Returns:

A vector of strides corresponding to the dimensions of the node.

ErrorExecute execute_node(const std::vector<void*> &input_tensors, EinsumNode *node)

Executes the tensor operation for the given EinsumNode.

Parameters:
  • input_tensors – The tensors provided by the user.

  • node – The EinsumNode to execute.

Returns:

An ErrorExecute enum indicating the result of the execution.

void assign_tensor_indices(EinsumNode *node)

Assigns intermediate tensors to the given EinsumNode.

Parameters:

node – The EinsumNode to which the tensors will be assigned.

bool is_unit_stride_n(EinsumNode *node)

Checks if the given EinsumNode has unit stride in the ‘n’ dimension.

Parameters:

node – The EinsumNode to check.

Returns:

true if the node has unit stride in the ‘n’ dimension, false otherwise.

std::vector<int64_t> parse_dim_list(size_t &pos, const std::string &str)

Parses a dimension list from the string starting at the given position. The dimension list is expected to be a comma-separated list of integers.

Parameters:
  • pos – The position in the string to start parsing from.

  • str – The string containing the dimension list.

Returns:

A vector of integers representing the parsed dimensions.

std::vector<int64_t> compute_strides(const std::vector<int64_t> &dim_ids)

Computes the strides for the given dimension IDs based on the sorted dimension sizes.

Parameters:

dim_ids – A vector of dimension IDs for which to compute the strides.

Returns:

A vector of computed strides corresponding to the dimension IDs.

std::vector<int64_t> get_output_dims(const std::vector<int64_t> &dim_ids)

Retrieves the output dimensions for the given dimension IDs based on the sorted dimension sizes.

Parameters:

dim_ids – A vector of dimension IDs for which to retrieve the output dimensions.

Returns:

A vector of output dimensions corresponding to the provided dimension IDs.

ErrorExecute parse_setup_error(TensorOperation::error_t error)

Parses the setup error from a TensorOperation error code to an ErrorExecute enum.

Parameters:

error – The error code from TensorOperation.

Returns:

An ErrorExecute enum representing the parsed error.

void delete_tree(EinsumNode *node)

Recursively deletes the EinsumNode tree starting from the given node.

int32_t findKDim(EinsumNode *Node, bool getLeftIndex)

Finds the k-dimension of the left or right child of the given node.

Parameters:
  • Node – The node to check.

  • getLeftIndex – If true, finds the k-dimension in the left child; otherwise, in the right child.

Returns:

int k-dim index if found, otherwise -1.

int32_t findNDim(EinsumNode *Node)

Finds the n-dimension of the right child of the given node.

Parameters:

Node – The node to check.

Returns:

int n-dim index if found, otherwise -1.

int32_t findMDim(EinsumNode *Node)

Finds the n-dimension of the left child of the given node.

Parameters:

Node – The node to check.

Returns:

int m-dim index if found, otherwise -1.

Private Members

uint32_t tensorIndex = 0
EinsumNode *root = nullptr
const std::string tree_str
ErrorParse error_parse = ErrorParse::None
std::vector<int64_t> dim_sizes
struct EinsumNode

Public Functions

std::string to_string() const

Gets a string representation of the einsum tree.

std::string name() const

Gets the string representation of the dim ids of the node.

int64_t get_size(const std::vector<int64_t> dim_sizes) const

Get the size of the tensor represented by this node.

Parameters:

dim_sizes – A vector of dimension sizes corresponding to the output dimensions.

Public Members

NodeType type
int32_t input_tensor_index = -1
float *tensor = nullptr
std::vector<int64_t> output_dim_ids
EinsumNode *left = nullptr
EinsumNode *right = nullptr

Private Functions

std::string _to_string(uint depth, std::string connection, std::string depthString) const

This method recursively formats the node and its children into a string.

Parameters:
  • depth – The current depth in the tree, used for indentation.

  • connection – A string representing the connection type.

  • depthString – A string representation of the current depth.

Returns:

A formatted string representing the einsum tree.

class Kernel

Public Functions

inline Kernel()

Constructor

~Kernel() noexcept

Destructor

Kernel(Kernel const&) = delete
Kernel &operator=(Kernel const&) = delete
Kernel(Kernel&&) noexcept = delete
Kernel &operator=(Kernel&&) noexcept = delete
void add(uint32_t instruction)

Adds an instruction to the code buffer.

Parameters:

instruction – instruction which is added.

void add(std::vector<uint32_t> instructions)

Adds an instruction to the code buffer.

Parameters:

instructions – instructions which are added.

std::size_t get_size() const

Gets the size of the code buffer.

Returns:

size of the code buffer in bytes.

std::size_t get_instruction_count() const

Gets the number of instruction in the code buffer.

Returns:

number of instruction in the code buffer.

void set_kernel()

Sets the kernel based on the code buffer.

void const *get_kernel() const

Gets a pointer to the executable kernel.

void write(char const *path) const

Writes the code buffer to the given file.

Parameters:

path – path to the file.

Private Functions

void *allocate_mmap(std::size_t size_bytes) const

Allocates memory through POSIX mmap.

Parameters:

size_bytes – size in bytes.

void release_mmap(std::size_t size_bytes, void *memory) const

Release POSIX mmap allocated memory.

Parameters:
  • size_bytes – size in bytes.

  • memory – pointer to memory which is released.

void set_executable(std::size_t size_bytes, void *memory) const

Sets the given memory region executable.

Parameters:
  • size_bytes – number of bytes.

  • memory – point to memory.

void release_memory()

Release memory of the kernel if allocated.

Private Members

std::vector<uint32_t> buffer

high-level code buffer

std::size_t size_allocate = 0

size of the kernel

void *kernel = nullptr

executable kernel

struct TensorConfig

Public Types

enum class exec_t : uint32_t

Values:

enumerator seq
enumerator prim
enumerator shared
enum class prim_t : uint32_t

primitive type

Values:

enumerator none
enumerator zero
enumerator copy
enumerator relu
enumerator gemm
enumerator brgemm
enum class dim_t : uint32_t

dimension type

Values:

enumerator undefined
enumerator c
enumerator m
enumerator n
enumerator k
enum class dtype_t : uint32_t

data type

Values:

enumerator fp32
enumerator fp64

Public Functions

std::string to_string() const

Converts the config to a string.

Returns:

std::string The string representation

Public Members

prim_t first_touch

The first touch primitive to be executed.

prim_t main

The main primitive to be executed.

prim_t last_touch

The last touch primitive to be executed.

std::vector<dim_t> dim_types

The dimensions types of each dimension.

std::vector<exec_t> exec_types

The execution types of each dimension.

std::vector<int64_t> dim_sizes

The dim_sizes that are supported.

std::vector<int64_t> strides_in0

The strides of the first input of each dimension.

std::vector<int64_t> strides_in1

The strides of the second input of each dimension.

std::vector<int64_t> strides_out

The strides of the output of each dimension.

dtype_t dtype

The data type to be used in the tensor operation.

Public Static Functions

static bool equals(const TensorConfig &config1, const TensorConfig config2)

Compares the two configuration and check if all values are equal.

Parameters:
  • config1 – The first configuration.

  • config2 – The second configuration.

Returns:

true Both configuration are equal.

Returns:

false Both configuration are NOT equal.

class TensorOperation

Public Types

enum class error_t : int32_t

execution type

error codes

Values:

enumerator success
enumerator err_wrong_dtype
enumerator err_wrong_dimension
enumerator err_wrong_primitive
enumerator err_wrong_first_touch_primitive
enumerator err_wrong_main_primitive
enumerator err_wrong_last_touch_primitive
enumerator err_execution_type_not_supported
enumerator err_invalid_primitive_configuration
enumerator err_invalid_first_touch_configuration
enumerator err_invalid_main_configuration
enumerator err_invalid_last_touch_configuration
enumerator err_invalid_execution_order
enumerator err_invalid_strides
enumerator err_k_dimension_must_not_be_shared
enumerator err_shared_required_for_parallel_execution
enum class stride_t : int32_t

Values:

enumerator in0
enumerator in1
enumerator out

Public Functions

error_t setup(const TensorConfig &config)

Setup for a binary tensor contraction or a unary tensor operation.

Parameters:

config – The configuration of the tensor dimension and primitives.

Returns:

error_t error_t::success on success, other error values otherwise.

error_t setup_no_optimization(TensorConfig::dtype_t dtype, TensorConfig::prim_t prim_first_touch, TensorConfig::prim_t prim_main, TensorConfig::prim_t prim_last_touch, std::span<const TensorConfig::dim_t> dim_types, std::span<const TensorConfig::exec_t> exec_types, std::span<const int64_t> dim_sizes, std::span<const int64_t> strides_in0, std::span<const int64_t> strides_in1, std::span<const int64_t> strides_out)

Setup for a binary tensor contraction or a unary tensor operation.

Parameters:
  • dtype – Datatype of all tensor elements.

  • prim_first_touch – Type of the first touch primitive.

  • prim_main – Type of the main primitive.

  • prim_last_touch – Type of the last touch primitive.

  • dim_types – Dimension type of the loops (c, m, n, or k).

  • exec_types – Execution type of the loops (seq, shared, or prim).

  • dim_sizes – Sizes of the dimensions.

  • strides_in0 – Strides of the first input tensor.

  • strides_in1 – Strides of the second input tensor (ignored if unary).

  • strides_out – Strides of the output tensor.

Returns:

error_t::success on success, another error_t value otherwise.

void execute(void const *tensor_in0, void const *tensor_in1, void *tensor_out)

Execute the tensor operation.

Parameters:
  • tensor_in0 – First input tensor.

  • tensor_in1 – Second input tensor (use nullptr if unary).

  • tensor_out – Output tensor.

void execute_dimension(int64_t index_dimension, char const *ptr_in0, char const *ptr_in1, char *ptr_out, bool first_access, bool last_access)

General-purpose loop implementation featuring first and last touch operations. No threading is applied.

Parameters:
  • index_dimension – Dimension index of the loop which is executed.

  • ptr_in0 – Pointer to the first input tensor’s data.

  • ptr_in1 – Pointer to the second input tensor’s data (use nullptr if unary).

  • ptr_out – Pointer to the output tensor’s data.

  • first_access – True if first time accessing data of output tensor.

  • last_access – True if last time accessing data of output tensor.

TensorConfig get_config()

Get the current configuration object.

Returns:

TensorConfig used by the Tensor operation.

void write_kernel_to_file(std::string path_no_extension) const

Writes the current kernel into a file.

Parameters:

path – The file to write the kernel to without extension.

Public Static Functions

static bool isExpectedStride(int64_t expected, int index, const std::span<const int64_t> &strides)

Checks if the stride matches the given stride.

Parameters:
  • expected – The stride that is expected.

  • index – The index of the stride.

  • strides – The strides of the configuration.

Returns:

true The stride matches the expected.

Returns:

false The stride NOT matches the expected.

static bool isValidStride(const std::span<const TensorConfig::dim_t> &dim, const std::span<const int64_t> &strides, const stride_t strideType)

Checks if the strides are valid for the given dimension.

Parameters:
  • dim – The dimension types of the configuration.

  • strides – The strides of the configuration.

Returns:

true The strides are valid.

Returns:

false The strides are NOT valid.

static bool isUnary(TensorConfig::prim_t prim)

Indicates if a primitive fits the Unary generator.

                                         .:=+######*=:.
                                     .=*##%%%%%%%%%%%%%%*-....
                                 ..=###%%%%%%%%%%%%%%%%%%%%%+:...           ..........    ..
                               ..*######%%%%%%%%%%%%%%%%%%%%@@@@%%%%%%%%%%%%%%%%%%%%%%%%%#*=:..
                             ..:-=*######%%%%%%%%%%%%%%%%%%%%%@@@%%%%%@@@@@%%####%%##****##**##*
                             .:::--#######%%%%%%###%%%%%%@@@@%%##***++++++===+**+=--:....
                            .-::---*########%%%%%%%%%%%#*+++++++++======--:............
                           .+::::--*########%%%%%%%%%%#++++===--==-.........
                           --:::---*#########%%%%%%%%%%%%%%%*......
                          .*:::::--+##########%%%%%%%%%%%@%=..
                          .#=:::::-=###########%%%%%%%%%%%-.
                          :##::::::-*###########%%%%%%%%%#..
                          -###=:::::-*###########%%%%%%%%#..
                          =###%#+::::-+*##########%%%%%%%%=..
                         .+######%%+-::-=**########%%%%%%%%*..
                         .+#########%%%%+-:--*######%%%#*====....
                         .############%%@@@@@@@@@@*===========:..
                         -############%%%@@@@@@#+=====---=--==-..
                        .#%###########%@@@@@@#====-=------------.
                       .############%%%@@@@%+=------------------.
                      :############%%%%%%%%=--------------------:.
                     -############%%%%%%%%+=::::::::::::::::----:..
                    .#############%%%%%%@#=-:::::::::::::::::::::..
                   .#############%%%%%%%@--::::::::::::::::::::::..
                   -###########%%%%%%%%@#-:::::::::::::::::::::::..
                   =########%%%##%%%%%@@=:..:::::::::::::::::::::..
                  .*######%@####*%%%%%@%:.....................:::..
                  :######%@#####*%%%%%@*........................:..
                 .*#####%@#####*#%%%%@@-.....................:::::.
                 :#####%@%#####*%@%%@@#:.......::::::::::::::::::*...
                 =#####@%%%####*%@%@@%-..........................*+..
                .*####@%%%%####*%@@@@#...........................:#*.
               .=####%@%%%%####*%@@@@:............................=#=..
               :####%@%%%%%####*%@@@=.............................:-#=.
              .=###%@@%%%%%####+%@@*..............................::-#-...
              .*###%@%%%%%%####+*@%:..............................::.:#:..
              =###%@@#%%%%%%###**%-.............................:::...=#..
             .*##%%@@#%%%%%%###*+=.............................::::....++...
            .-###%%@@#%%%%%%%###+=...........................:::::::....*-..
            .=###%%@@#%%%%%%%###*=..........................::::::::....=%..
            .+###%@@@%%%%%%%%###*+:.......................:::::::::::..::#+.
            .*##%%@@@@*%%%%%%####+:......................::::::::::::::::-#.
            .*%%%%%%@@%#%%%%%####*+......................:::::::::::::::::*=..
            .#%%%%%%@@@#%%%%%%####+....................:::::::::::::::::::#%..
            :#%%%%%%%@@%%%%%%%####*-.................:::::::::::::::::::::#%-.
            :#%%%%%%%%@@%%%%%%#####=.................:::::::::::::::::::::=%+.
            :#%%%%%%%%@@@%%%%%#####+:.............:::::::::::::::::-:::::::##.
            -#%%%%%%%%%@@%%%%%%####*:............::::::::::::::::::-:::::::-%:
            -#%%%%%%%%%@@@%%%%%#####-............:::::::::::::::::::::::::::%=
            -#%%%%%%%%%%@@%%%%%#####+..........::::::::::::::::::::.::::::::#*
            :#%%%%%%%%%%#@@%%%%#####*.........:::::::::::::::::::::..:::::::##.
            -%%%%%%%%%%%-@@%%%%%####*.......::::::::::::::::::::::...:::::::##.
            -%%%%%%%%%%*-%@%%%%%#####......:::::::::::::::::::::::...:::::::##.
            -%%%%%%%%%%+-*@@%%%%#####.....::::::::::::::::::::::::. ..-:::::%+.
            =%%%%%%%%%%--*@@%%%%####+....:::::::::::::::::::::::::. ..-::::*%-.
           .#%%%%%%%%%+--+@@%%%%%###-..:::::::::::::::::::::::::::. ..+:::*%%:.
           .%%%%%%%%%%---=@@%%%%%###:.:::::::::::::::::::::::::::.. ..*::%%%*..
           :%%%%%%%%%*--.=@%%%%%%##+.:::::::::::::::::::::::::--:.. ..%=+%%%:.
          .+%%%%%%%%%=-..+@%%%%%%##..::::::::::::::::::::::::---... ..%%%%%=..
          .*%%%%%%%%#-...#@%%%%%%#:::::::::::::::::::::::::----:.   ..#%%%+...
          :%%%%%%%%%:...:@%%%%%%%-::::::::::::::::::::::::-----..   ..=%%-..
         .*%%%%%%%%+....:@@%%%%%:::::::::::::::::::::::-------...     ....
         :%%%%%%%%%:....-%@@@@%:::::::::::::::::::::---------:...
        .+%%%%%%%%+.....:#@@@+::::::::::::::::::::----------:.
        :%%%%%%%%#.......+%*:::::::::::::::::::------------:..
       .%%%%%%%%@-.........:::::::::::::::----------------:.
      .*%%%%%%%@=........:::::::::::::------------------=...
     .+%%%%%%@%#:.......:::::::::::::....---------------....
    .+%%%%%@@@@-:......::::::::::::..    .=------------...
   .+%%@%@@@@@=::....:::::::::::::...    .-=--------==:..
  .#@@@@@@@@@*.::..:::::::::::::-:.       .=-=---=-=:=.
.*@@@@@@@@=%=...:..::::::::::::-:..       .=+**=++**-..
.:@%%%**+:.-…..:::::::::::–.. .%#######-:…. -%++-……. .. …::&#8212;:::—&#8212;::.. .=##*************+=:… …##**=+#*-… .*#************#####*##*-. -#######**+=-:… .+##********#####*###%%#*+. :##*********###****+=:….-+++**####*#####**+-.:+*. .+#******####*****#%###*+:.. .+##**-..=#. .*##**********#%####%%%#*=… .. … …:::-+*####*#%###=…-. .-==+=…-.

Parameters:

prim – The primitive to check.

Returns:

true The primitive is a unary.

Returns:

false The primitive is NOT a unary.

static bool isBrgemm(TensorConfig::prim_t prim)

Indicates if a primitive fits the Brgemm generator.

Parameters:

prim – The primitive to check.

Returns:

true The primitive is a brgemm.

Returns:

false The primitive is NOT a brgemm.

static int32_t findMatch(const std::span<const TensorConfig::dim_t> &dim, const std::span<const TensorConfig::exec_t> &exec, TensorConfig::dim_t searchDim, TensorConfig::exec_t searchExec, uint32_t startIndex = 0)

Finds the matching index of the given pair of dim and exec types.

Parameters:
  • dim – The dimension types to search through.

  • exec – The execution types to search through.

  • searchDim – The acceptable dimension type.

  • searchExec – The acceptable execution type.

  • startIndex – The optional start index for the search.

Returns:

uint32_t The index of the found match. -1 if not match was found.

Private Functions

bool isValidPrimConfig(const std::span<const TensorConfig::dim_t> &dim, const std::span<const TensorConfig::exec_t> &exec)

Validates that exactly one m primitive dimension and one n primitive dimension exists.

Parameters:
  • dim – The dimension types to search through.

  • exec – The execution types to search through.

Returns:

true The configuration is a valid primitive setup.

Returns:

false The configuration is NOT a valid primitive setup.

bool isValidPrimStrides(const std::span<const TensorConfig::dim_t> &dim, const std::span<const TensorConfig::exec_t> &exec, const std::span<const int64_t> &strides_in0, const std::span<const int64_t> &strides_out, const TensorConfig::prim_t main_prim)

Validates that the strides of the m primitives and n primitives dimension are unit strides.

Parameters:
  • dim – The dimension types to search through.

  • exec – The execution types to search through.

  • prim_main – The main primitive of the tensor operation.

Returns:

true The configuration has valid strides.

Returns:

false The configuration does not have valid unit strides.

bool isValidKDim(const std::span<const TensorConfig::dim_t> &dim, const std::span<const TensorConfig::exec_t> &exec, const std::span<const int64_t> &strides_in1, const TensorConfig::prim_t prim)

Checks if the K dimension is valid for the given primitive.

Parameters:
  • dim – The dimension types to search through.

  • exec – The execution types to search through.

  • strides_in1 – The strides of the second input.

  • prim – The primitive i.e. Gemm or Brgemm to be executed.

Returns:

true The configuration is a valid setup.

Returns:

false The configuration is NOT a valid setup.

bool isSortedConfiguration(const std::span<const TensorConfig::exec_t> &exec)

Checks if the configuration is sorted such that the primitives are last.

Parameters:

exec – The execution types of the configuration.

Returns:

true The configuration align with the requirement.

Returns:

false The configuration NOT algin with the requirement.

Unary::error_t generateUnary(Unary &unary, TensorConfig::prim_t prim, const std::span<const int64_t> &dim_sizes, bool isTranspose)

Generates the unary kernel.

Parameters:
  • unary – The unary used for generation.

  • prim – The primitive that is generated.

  • dim_sizes – The sizes of each dimension.

  • isTranspose – Indicates if the unary is executes a tranpose operation.

Returns:

Unary::error_t

Private Members

TensorConfig config
TensorConfig::dtype_t dtype
TensorConfig::prim_t prim_first = TensorConfig::prim_t::none
TensorConfig::prim_t prim_main = TensorConfig::prim_t::none
TensorConfig::prim_t prim_last = TensorConfig::prim_t::none
std::span<const TensorConfig::dim_t> dim_types
std::span<const TensorConfig::exec_t> exec_types
std::span<const int64_t> dim_sizes
std::span<const int64_t> strides_in0
std::span<const int64_t> strides_in1
std::span<const int64_t> strides_out
int32_t indexPrimM = -1
int32_t indexPrimN = -1
int32_t indexPrimK = -1
int32_t indexPrimBatch = -1
std::variant<Brgemm, Unary> first_touch
std::variant<Brgemm, Unary> main_kernel
std::variant<Brgemm, Unary> last_touch
bool isParallel = false
bool isTranspose = false
bool hasSetupError = false
class TensorOptimization

Public Functions

TensorConfig optimize(TensorConfig config)

Optimize the given configuration.

Parameters:

config – The configuration to be optimized.

Returns:

TensorConfig The optimized configuration.

TensorConfig optimize_primitive_identification(TensorConfig config)

Optimizes the config by identifying the primitive dimension.

Parameters:

config – The configuration to be optimized.

Returns:

TensorConfig The optimized configuration.

TensorConfig optimize_shared_identification(TensorConfig config)

Optimizes the config by identifying the shared dimension.

Parameters:

config – The configuration to be optimized.

Returns:

TensorConfig The optimized configuration.

TensorConfig optimize_dimension_reordering_shared(TensorConfig config)

Optimizes the config by dimension reordering favoring the shared optimization.

Parameters:

config – The configuration to be optimized.

Returns:

TensorConfig The optimized configuration.

TensorConfig optimize_dimension_reordering_fusing(TensorConfig config)

Optimizes the config by dimension reordering favoring the dimension fusing optimization.

Parameters:

config – The configuration to be optimized.

Returns:

TensorConfig The optimized configuration.

TensorConfig optimize_dimension_splitting(TensorConfig config)

Optimizes the config by splitting the dimensions.

Parameters:

config – The configuration to be optimized.

Returns:

TensorConfig The optimized configuration.

TensorConfig optimize_dimension_fusing(TensorConfig config)

Optimizes the config by fusing the dimensions.

Parameters:

config – The configuration to be optimized.

Returns:

TensorConfig The optimized configuration.

Private Functions

void _reorder_helper_adjust_index(int32_t index, int32_t adjust_index, int32_t &primitive_m, int32_t &primitive_n, int32_t &primitive_k1, int32_t &primitive_k2)

Adjusts the primitive index based on the new index.

Parameters:
  • new_index – The index to adjust.

  • adjust_index – The index set for adjustment.

  • primitive_m – The m primitive index.

  • primitive_n – The n primitive index.

  • primitive_k1 – The k1 primitive index.

  • primitive_k2 – The k2 primitive index.

void _primitive_identification(TensorConfig &config)

Runs the optimization primitive identification.

Parameters:

config – The configuration object to use.

void _shared_identification(TensorConfig &config)

Runs the optimization shared identification.

Parameters:

config – The configuration object to use.

void _dimension_reordering_shared(TensorConfig &config)

Runs the optimization dimension reordering favoring the shared optimization.

Parameters:

config – The configuration object to use.

void _dimension_reordering_fusing(TensorConfig &config)

Runs the optimization dimension reordering favoring the dimension fusing optimization.

Parameters:

config – The configuration object to use.

void _swap_elements(TensorConfig &config, size_t index1, size_t index2)

Swaps two elements in the vectors of the config.

Parameters:
  • config – The configuration object to use.

  • index1 – The index of element 1 to be set a position of index2.

  • index2 – The index of element 2 ot be set a position of index1.

void _move_elements(TensorConfig &config, size_t old_index, size_t new_index)

Moves an element from the old index to the new index position.

Parameters:
  • config – The configuration object to use.

  • old_index – The index on the current postion.

  • new_index – The index that should be the new position.

void _dimension_splitting(TensorConfig &config)

Runs the optimization dimension splitting.

Parameters:

config – The configuration object to use.

void _dimension_fusing(TensorConfig &config)

Runs the optimization dimension fusing.

Parameters:

config – The configuration object to use.

Private Members

const int thread_count = 1

The number of processors to use for parallel work.

const double maximum_inbalanced_parallel_precentage = 1.0 / 100

The inbalanced percentage of parallelism that can be achieved.

const uint32_t fuse_split_dimension_size = 256

The dimension count when fusing or splitting is applied.

class Unary

Public Types

enum class dtype_t : uint32_t

data type

Values:

enumerator fp32
enumerator fp64
enum class ptype_t : uint32_t

primitive type

Values:

enumerator zero
enumerator identity
enumerator relu
enum class error_t : int32_t

error codes

Values:

enumerator success
enumerator err_wrong_dtype
enumerator err_wrong_dimension
using kernel_t = void (*)(void const *a, void *b, int64_t ld_a, int64_t ld_b)

Public Functions

error_t generate(uint32_t m, uint32_t n, uint32_t trans_b, dtype_t dtype, ptype_t ptype)

Generate a kernel for a unary primitive.

Parameters:
  • m – Number of rows in A and B.

  • n – Number of columns in A and B.

  • trans_b – 0 if B is stored in column-major order, 1 if B is stored in row-major order.

  • dtype – Data type of the matrices.

  • ptype – Primitive type.

Returns:

error_t::success on success, another error_t value otherwise.

kernel_t get_kernel() const

Get the generated kernel: B := op(A).

Returns:

pointer to the generated kernel.

void write_kernel_to_file(const char *path) const

Writes the current kernel into a file.

Parameters:

path – The file to write the kernel to.

Private Functions

void fill_with_zero_unary_column_major_fp32(uint32_t m, uint32_t n)

Fills the kernel with a suitable zero unary in column major format, and fp32 datatype.

Parameters:
  • m – numbers of rows in A and B.

  • n – numbers of columns in A and B.

void identity_unary_fp32(uint32_t m, uint32_t n, uint32_t trans_b)

Does a identity unary on a matrix in column major format, and fp32 datatype.

Parameters:
  • m – numbers of rows in A and B.

  • n – numbers of columns in A and B.

  • trans_b – transpose A (0 no, 1 yes)

void relu_unary_fp32(uint32_t m, uint32_t n, uint32_t trans_b)

Does a relu unary on a matrix in column major format, and fp32 datatype.

Parameters:
  • m – numbers of rows in A and B.

  • n – numbers of columns in A and B.

  • trans_b – transpose A (0 no, 1 yes)

Private Members

kernel_t kernel = nullptr
mini_jit::Kernel native_kernel

arm_instructions

enum class mini_jit::arm_instructions::R32Bit : uint32_t

32 bit sized general purpose register

Values:

enumerator w0

32 bit parameter/result register (caller-saved)

enumerator w1

32 bit parameter/result register (caller-saved)

enumerator w2

32 bit parameter/result register (caller-saved)

enumerator w3

32 bit parameter/result register (caller-saved)

enumerator w4

32 bit parameter/result register (caller-saved)

enumerator w5

32 bit parameter/result register (caller-saved)

enumerator w6

32 bit parameter/result register (caller-saved)

enumerator w7

32 bit parameter/result register (caller-saved)

enumerator w8

32 bit scratch register (caller-saved)

enumerator w9

32 bit scratch register (caller-saved)

enumerator w10

32 bit scratch register (caller-saved)

enumerator w11

32 bit scratch register (caller-saved)

enumerator w12

32 bit scratch register (caller-saved)

enumerator w13

32 bit scratch register (caller-saved)

enumerator w14

32 bit scratch register (caller-saved)

enumerator w15

32 bit scratch register (caller-saved)

enumerator w16

32 bit scratch register (caller-saved)

enumerator w17

32 bit scratch register (caller-saved)

enumerator w19

32 bit scratch register (callee-saved)

enumerator w20

32 bit scratch register (callee-saved)

enumerator w21

32 bit scratch register (callee-saved)

enumerator w22

32 bit scratch register (callee-saved)

enumerator w23

32 bit scratch register (callee-saved)

enumerator w24

32 bit scratch register (callee-saved)

enumerator w25

32 bit scratch register (callee-saved)

enumerator w26

32 bit scratch register (callee-saved)

enumerator w27

32 bit scratch register (callee-saved)

enumerator w28

32 bit scratch register (callee-saved)

enumerator w29

32 bit scratch register (callee-saved)

enumerator w30

32 bit scratch register (callee-saved)

enumerator wsp

32 bit stack pointer

enumerator wzr

32 bit zero register

enum class mini_jit::arm_instructions::R64Bit : uint32_t

64 bit sized general purpose register, including stack pointer

Values:

enumerator x0

64 bit parameter/result register (caller-saved)

enumerator x1

64 bit parameter/result register (caller-saved)

enumerator x2

64 bit parameter/result register (caller-saved)

enumerator x3

64 bit parameter/result register (caller-saved)

enumerator x4

64 bit parameter/result register (caller-saved)

enumerator x5

64 bit parameter/result register (caller-saved)

enumerator x6

64 bit parameter/result register (caller-saved)

enumerator x7

64 bit parameter/result register (caller-saved)

enumerator x8

64 bit scratch register (caller-saved)

enumerator x9

64 bit scratch register (caller-saved)

enumerator x10

64 bit scratch register (caller-saved)

enumerator x11

64 bit scratch register (caller-saved)

enumerator x12

64 bit scratch register (caller-saved)

enumerator x13

64 bit scratch register (caller-saved)

enumerator x14

64 bit scratch register (caller-saved)

enumerator x15

64 bit scratch register (caller-saved)

enumerator x16

64 bit scratch register (caller-saved)

enumerator x17

64 bit scratch register (caller-saved)

enumerator x19

64 bit scratch register (callee-saved)

enumerator x20

64 bit scratch register (callee-saved)

enumerator x21

64 bit scratch register (callee-saved)

enumerator x22

64 bit scratch register (callee-saved)

enumerator x23

64 bit scratch register (callee-saved)

enumerator x24

64 bit scratch register (callee-saved)

enumerator x25

64 bit scratch register (callee-saved)

enumerator x26

64 bit scratch register (callee-saved)

enumerator x27

64 bit scratch register (callee-saved)

enumerator x28

64 bit scratch register (callee-saved)

enumerator x29

frame pointer register (callee-saved)

enumerator x30

link register (callee-saved)

enumerator fp

frame pointer register (callee-saved)

enumerator lr

link register (callee-saved)

enumerator sp

stack pointer register

enumerator xzr

64 bit zero register

enum class mini_jit::arm_instructions::ShiftLSL

Represents the Logical Shift Left option.

Values:

enumerator LSL

Represents the Logical Shift Left option.

enum class mini_jit::arm_instructions::ShiftLSR

Represents the Logical Shift Right option.

Values:

enumerator LSR

Represents the Logical Shift Right option.

enum class mini_jit::arm_instructions::ShiftASR

Represents the Arithmetic Shift Right option.

Values:

enumerator ASR

Represents the Arithmetic Shift Right option.

enum class mini_jit::arm_instructions::ShiftROR

Represents the ROtate Right option.

Values:

enumerator ROR

Represents the ROtate Right option.

enum class mini_jit::arm_instructions::V8Bit : uint32_t

Byte sized vector register B0 - B31.

Values:

enumerator b0

8 bit parameter/result register (caller-saved)

enumerator b1

8 bit parameter/result register (caller-saved)

enumerator b2

8 bit parameter/result register (caller-saved)

enumerator b3

8 bit parameter/result register (caller-saved)

enumerator b4

8 bit parameter/result register (caller-saved)

enumerator b5

8 bit parameter/result register (caller-saved)

enumerator b6

8 bit parameter/result register (caller-saved)

enumerator b7

8 bit parameter/result register (caller-saved)

enumerator b8

8 bit scratch register (callee-saved, lower 64 bits)

enumerator b9

8 bit scratch register (callee-saved, lower 64 bits)

enumerator b10

8 bit scratch register (callee-saved, lower 64 bits)

enumerator b11

8 bit scratch register (callee-saved, lower 64 bits)

enumerator b12

8 bit scratch register (callee-saved, lower 64 bits)

enumerator b13

8 bit scratch register (callee-saved, lower 64 bits)

enumerator b14

8 bit scratch register (callee-saved, lower 64 bits)

enumerator b15

8 bit scratch register (callee-saved, lower 64 bits)

enumerator b16

8 bit scratch register (caller-saved)

enumerator b17

8 bit scratch register (caller-saved)

enumerator b18

8 bit scratch register (caller-saved)

enumerator b19

8 bit scratch register (caller-saved)

enumerator b20

8 bit scratch register (caller-saved)

enumerator b21

8 bit scratch register (caller-saved)

enumerator b22

8 bit scratch register (caller-saved)

enumerator b23

8 bit scratch register (caller-saved)

enumerator b24

8 bit scratch register (caller-saved)

enumerator b25

8 bit scratch register (caller-saved)

enumerator b26

8 bit scratch register (caller-saved)

enumerator b27

8 bit scratch register (caller-saved)

enumerator b28

8 bit scratch register (caller-saved)

enumerator b29

8 bit scratch register (caller-saved)

enumerator b30

8 bit scratch register (caller-saved)

enumerator b31

8 bit scratch register (caller-saved)

enum class mini_jit::arm_instructions::V16Bit : uint32_t

Half word sized vector register H0 - H31.

Values:

enumerator h0

16 bit parameter/result register (caller-saved)

enumerator h1

16 bit parameter/result register (caller-saved)

enumerator h2

16 bit parameter/result register (caller-saved)

enumerator h3

16 bit parameter/result register (caller-saved)

enumerator h4

16 bit parameter/result register (caller-saved)

enumerator h5

16 bit parameter/result register (caller-saved)

enumerator h6

16 bit parameter/result register (caller-saved)

enumerator h7

16 bit parameter/result register (caller-saved)

enumerator h8

16 bit scratch register (caller-saved)

enumerator h9

16 bit scratch register (callee-saved, lower 64 bits)

enumerator h10

16 bit scratch register (callee-saved, lower 64 bits)

enumerator h11

16 bit scratch register (callee-saved, lower 64 bits)

enumerator h12

16 bit scratch register (callee-saved, lower 64 bits)

enumerator h13

16 bit scratch register (callee-saved, lower 64 bits)

enumerator h14

16 bit scratch register (callee-saved, lower 64 bits)

enumerator h15

16 bit scratch register (callee-saved, lower 64 bits)

enumerator h16

16 bit scratch register (caller-saved)

enumerator h17

16 bit scratch register (caller-saved)

enumerator h18

16 bit scratch register (caller-saved)

enumerator h19

16 bit scratch register (caller-saved)

enumerator h20

16 bit scratch register (caller-saved)

enumerator h21

16 bit scratch register (caller-saved)

enumerator h22

16 bit scratch register (caller-saved)

enumerator h23

16 bit scratch register (caller-saved)

enumerator h24

16 bit scratch register (caller-saved)

enumerator h25

16 bit scratch register (caller-saved)

enumerator h26

16 bit scratch register (caller-saved)

enumerator h27

16 bit scratch register (caller-saved)

enumerator h28

16 bit scratch register (caller-saved)

enumerator h29

16 bit scratch register (caller-saved)

enumerator h30

16 bit scratch register (caller-saved)

enumerator h31

16 bit scratch register (caller-saved)

enum class mini_jit::arm_instructions::V32Bit : uint32_t

Word sized vector register S0 - S31.

Values:

enumerator s0

32 bit parameter/result register (caller-saved)

enumerator s1

32 bit parameter/result register (caller-saved)

enumerator s2

32 bit parameter/result register (caller-saved)

enumerator s3

32 bit parameter/result register (caller-saved)

enumerator s4

32 bit parameter/result register (caller-saved)

enumerator s5

32 bit parameter/result register (caller-saved)

enumerator s6

32 bit parameter/result register (caller-saved)

enumerator s7

32 bit parameter/result register (caller-saved)

enumerator s8

32 bit scratch register (caller-saved)

enumerator s9

32 bit scratch register (callee-saved, lower 64 bits)

enumerator s10

32 bit scratch register (callee-saved, lower 64 bits)

enumerator s11

32 bit scratch register (callee-saved, lower 64 bits)

enumerator s12

32 bit scratch register (callee-saved, lower 64 bits)

enumerator s13

32 bit scratch register (callee-saved, lower 64 bits)

enumerator s14

32 bit scratch register (callee-saved, lower 64 bits)

enumerator s15

32 bit scratch register (callee-saved, lower 64 bits)

enumerator s16

32 bit scratch register (caller-saved)

enumerator s17

32 bit scratch register (caller-saved)

enumerator s18

32 bit scratch register (caller-saved)

enumerator s19

32 bit scratch register (caller-saved)

enumerator s20

32 bit scratch register (caller-saved)

enumerator s21

32 bit scratch register (caller-saved)

enumerator s22

32 bit scratch register (caller-saved)

enumerator s23

32 bit scratch register (caller-saved)

enumerator s24

32 bit scratch register (caller-saved)

enumerator s25

32 bit scratch register (caller-saved)

enumerator s26

32 bit scratch register (caller-saved)

enumerator s27

32 bit scratch register (caller-saved)

enumerator s28

32 bit scratch register (caller-saved)

enumerator s29

32 bit scratch register (caller-saved)

enumerator s30

32 bit scratch register (caller-saved)

enumerator s31

32 bit scratch register (caller-saved)

enum class mini_jit::arm_instructions::V64Bit : uint32_t

Double word sized vector register D0 - D31.

Values:

enumerator d0

64 bit parameter/result register (caller-saved)

enumerator d1

64 bit parameter/result register (caller-saved)

enumerator d2

64 bit parameter/result register (caller-saved)

enumerator d3

64 bit parameter/result register (caller-saved)

enumerator d4

64 bit parameter/result register (caller-saved)

enumerator d5

64 bit parameter/result register (caller-saved)

enumerator d6

64 bit parameter/result register (caller-saved)

enumerator d7

64 bit parameter/result register (caller-saved)

enumerator d8

64 bit scratch register (caller-saved)

enumerator d9

64 bit scratch register (callee-saved, lower 64 bits)

enumerator d10

64 bit scratch register (callee-saved, lower 64 bits)

enumerator d11

64 bit scratch register (callee-saved, lower 64 bits)

enumerator d12

64 bit scratch register (callee-saved, lower 64 bits)

enumerator d13

64 bit scratch register (callee-saved, lower 64 bits)

enumerator d14

64 bit scratch register (callee-saved, lower 64 bits)

enumerator d15

64 bit scratch register (callee-saved, lower 64 bits)

enumerator d16

64 bit scratch register (caller-saved)

enumerator d17

64 bit scratch register (caller-saved)

enumerator d18

64 bit scratch register (caller-saved)

enumerator d19

64 bit scratch register (caller-saved)

enumerator d20

64 bit scratch register (caller-saved)

enumerator d21

64 bit scratch register (caller-saved)

enumerator d22

64 bit scratch register (caller-saved)

enumerator d23

64 bit scratch register (caller-saved)

enumerator d24

64 bit scratch register (caller-saved)

enumerator d25

64 bit scratch register (caller-saved)

enumerator d26

64 bit scratch register (caller-saved)

enumerator d27

64 bit scratch register (caller-saved)

enumerator d28

64 bit scratch register (caller-saved)

enumerator d29

64 bit scratch register (caller-saved)

enumerator d30

64 bit scratch register (caller-saved)

enumerator d31

64 bit scratch register (caller-saved)

enum class mini_jit::arm_instructions::V128Bit : uint32_t

Quad word sized vector register Q0 - Q31.

Values:

enumerator q0

128 bit parameter/result register (caller-saved)

enumerator q1

128 bit parameter/result register (caller-saved)

enumerator q2

128 bit parameter/result register (caller-saved)

enumerator q3

128 bit parameter/result register (caller-saved)

enumerator q4

128 bit parameter/result register (caller-saved)

enumerator q5

128 bit parameter/result register (caller-saved)

enumerator q6

128 bit parameter/result register (caller-saved)

enumerator q7

128 bit parameter/result register (caller-saved)

enumerator q8

128 bit scratch register (caller-saved)

enumerator q9

128 bit scratch register (callee-saved, lower 128 bits)

enumerator q10

128 bit scratch register (callee-saved, lower 128 bits)

enumerator q11

128 bit scratch register (callee-saved, lower 128 bits)

enumerator q12

128 bit scratch register (callee-saved, lower 128 bits)

enumerator q13

128 bit scratch register (callee-saved, lower 128 bits)

enumerator q14

128 bit scratch register (callee-saved, lower 128 bits)

enumerator q15

128 bit scratch register (callee-saved, lower 128 bits)

enumerator q16

128 bit scratch register (caller-saved)

enumerator q17

128 bit scratch register (caller-saved)

enumerator q18

128 bit scratch register (caller-saved)

enumerator q19

128 bit scratch register (caller-saved)

enumerator q20

128 bit scratch register (caller-saved)

enumerator q21

128 bit scratch register (caller-saved)

enumerator q22

128 bit scratch register (caller-saved)

enumerator q23

128 bit scratch register (caller-saved)

enumerator q24

128 bit scratch register (caller-saved)

enumerator q25

128 bit scratch register (caller-saved)

enumerator q26

128 bit scratch register (caller-saved)

enumerator q27

128 bit scratch register (caller-saved)

enumerator q28

128 bit scratch register (caller-saved)

enumerator q29

128 bit scratch register (caller-saved)

enumerator q30

128 bit scratch register (caller-saved)

enumerator q31

128 bit scratch register (caller-saved)

enum class mini_jit::arm_instructions::VGeneral

General vector register of V0 - V31.

Values:

enumerator v0

128 bit parameter/result register (caller-saved)

enumerator v1

128 bit parameter/result register (caller-saved)

enumerator v2

128 bit parameter/result register (caller-saved)

enumerator v3

128 bit parameter/result register (caller-saved)

enumerator v4

128 bit parameter/result register (caller-saved)

enumerator v5

128 bit parameter/result register (caller-saved)

enumerator v6

128 bit parameter/result register (caller-saved)

enumerator v7

128 bit parameter/result register (caller-saved)

enumerator v8

128 bit scratch register (caller-saved)

enumerator v9

128 bit scratch register (callee-saved, lower 128 bits)

enumerator v10

128 bit scratch register (callee-saved, lower 128 bits)

enumerator v11

128 bit scratch register (callee-saved, lower 128 bits)

enumerator v12

128 bit scratch register (callee-saved, lower 128 bits)

enumerator v13

128 bit scratch register (callee-saved, lower 128 bits)

enumerator v14

128 bit scratch register (callee-saved, lower 128 bits)

enumerator v15

128 bit scratch register (callee-saved, lower 128 bits)

enumerator v16

128 bit scratch register (caller-saved)

enumerator v17

128 bit scratch register (caller-saved)

enumerator v18

128 bit scratch register (caller-saved)

enumerator v19

128 bit scratch register (caller-saved)

enumerator v20

128 bit scratch register (caller-saved)

enumerator v21

128 bit scratch register (caller-saved)

enumerator v22

128 bit scratch register (caller-saved)

enumerator v23

128 bit scratch register (caller-saved)

enumerator v24

128 bit scratch register (caller-saved)

enumerator v25

128 bit scratch register (caller-saved)

enumerator v26

128 bit scratch register (caller-saved)

enumerator v27

128 bit scratch register (caller-saved)

enumerator v28

128 bit scratch register (caller-saved)

enumerator v29

128 bit scratch register (caller-saved)

enumerator v30

128 bit scratch register (caller-saved)

enumerator v31

128 bit scratch register (caller-saved)

enum class mini_jit::arm_instructions::VType8x8Bit : uint32_t

Use 8 Byte sized vectors.

Values:

enumerator t8B

Use 8 Byte sized vectors.

enum class mini_jit::arm_instructions::VType16x8Bit : uint32_t

Use 16 Byte sized vectors.

Values:

enumerator t16B

Use 16 Byte sized vectors.

enum class mini_jit::arm_instructions::VType4x16Bit : uint32_t

Use 4 half word (16 Bit) sized vectors.

Values:

enumerator t4H

Use 4 half word (16 Bit) sized vectors.

enum class mini_jit::arm_instructions::VType8x16Bit : uint32_t

Use 8 half word (16 Bit) sized vectors.

Values:

enumerator t8H

Use 8 half word (16 Bit) sized vectors.

enum class mini_jit::arm_instructions::VType2x32Bit : uint32_t

Use 2 word (32 Bit) sized vectors.

Values:

enumerator t2S

Use 2 word (32 Bit) sized vectors.

enum class mini_jit::arm_instructions::VType4x32Bit : uint32_t

Use 4 word (32 Bit) sized vectors.

Values:

enumerator t4S

Use 4 word (32 Bit) sized vectors.

enum class mini_jit::arm_instructions::VType1x64Bit : uint32_t

Use 1 double word (64 Bit) sized vector.

Values:

enumerator t1D

Use 1 double word (64 Bit) sized vector.

enum class mini_jit::arm_instructions::VType2x64Bit : uint32_t

Use 2 double word (64 Bit) sized vector.

Values:

enumerator t2D

Use 2 double word (64 Bit) sized vector.

const uint32_t mini_jit::arm_instructions::mask1 = 0b1
const uint32_t mini_jit::arm_instructions::mask2 = 0b11
const uint32_t mini_jit::arm_instructions::mask3 = 0b111
const uint32_t mini_jit::arm_instructions::mask4 = 0b1111
const uint32_t mini_jit::arm_instructions::mask5 = 0b1'1111
const uint32_t mini_jit::arm_instructions::mask6 = 0b11'1111
const uint32_t mini_jit::arm_instructions::mask7 = 0b111'1111
const uint32_t mini_jit::arm_instructions::mask8 = 0b1111'1111
const uint32_t mini_jit::arm_instructions::mask9 = 0b1'1111'1111
const uint32_t mini_jit::arm_instructions::mask10 = 0b11'1111'1111
const uint32_t mini_jit::arm_instructions::mask11 = 0b111'1111'1111
const uint32_t mini_jit::arm_instructions::mask12 = 0b1111'1111'1111
const uint32_t mini_jit::arm_instructions::mask13 = 0b1'1111'1111'1111
const uint32_t mini_jit::arm_instructions::mask14 = 0b11'1111'1111'1111
const uint32_t mini_jit::arm_instructions::mask15 = 0b111'1111'1111'1111
const uint32_t mini_jit::arm_instructions::mask16 = 0b1111'1111'1111'1111
const uint32_t mini_jit::arm_instructions::mask17 = 0b1'1111'1111'1111'1111
const uint32_t mini_jit::arm_instructions::mask18 = 0b11'1111'1111'1111'1111
const uint32_t mini_jit::arm_instructions::mask19 = 0b111'1111'1111'1111'1111
const R32Bit mini_jit::arm_instructions::w0 = R32Bit::w0

32 bit parameter/result register (caller-saved)

const R32Bit mini_jit::arm_instructions::w1 = R32Bit::w1

32 bit parameter/result register (caller-saved)

const R32Bit mini_jit::arm_instructions::w2 = R32Bit::w2

32 bit parameter/result register (caller-saved)

const R32Bit mini_jit::arm_instructions::w3 = R32Bit::w3

32 bit parameter/result register (caller-saved)

const R32Bit mini_jit::arm_instructions::w4 = R32Bit::w4

32 bit parameter/result register (caller-saved)

const R32Bit mini_jit::arm_instructions::w5 = R32Bit::w5

32 bit parameter/result register (caller-saved)

const R32Bit mini_jit::arm_instructions::w6 = R32Bit::w6

32 bit parameter/result register (caller-saved)

const R32Bit mini_jit::arm_instructions::w7 = R32Bit::w7

32 bit parameter/result register (caller-saved)

const R32Bit mini_jit::arm_instructions::w8 = R32Bit::w8

32 bit scratch register (caller-saved)

const R32Bit mini_jit::arm_instructions::w9 = R32Bit::w9

32 bit scratch register (caller-saved)

const R32Bit mini_jit::arm_instructions::w10 = R32Bit::w10

32 bit scratch register (caller-saved)

const R32Bit mini_jit::arm_instructions::w11 = R32Bit::w11

32 bit scratch register (caller-saved)

const R32Bit mini_jit::arm_instructions::w12 = R32Bit::w12

32 bit scratch register (caller-saved)

const R32Bit mini_jit::arm_instructions::w13 = R32Bit::w13

32 bit scratch register (caller-saved)

const R32Bit mini_jit::arm_instructions::w14 = R32Bit::w14

32 bit scratch register (caller-saved)

const R32Bit mini_jit::arm_instructions::w15 = R32Bit::w15

32 bit scratch register (caller-saved)

const R32Bit mini_jit::arm_instructions::w16 = R32Bit::w16

32 bit scratch register (caller-saved)

const R32Bit mini_jit::arm_instructions::w17 = R32Bit::w17

32 bit scratch register (caller-saved)

const R32Bit mini_jit::arm_instructions::w19 = R32Bit::w19

32 bit scratch register (callee-saved)

const R32Bit mini_jit::arm_instructions::w20 = R32Bit::w20

32 bit scratch register (callee-saved)

const R32Bit mini_jit::arm_instructions::w21 = R32Bit::w21

32 bit scratch register (callee-saved)

const R32Bit mini_jit::arm_instructions::w22 = R32Bit::w22

32 bit scratch register (callee-saved)

const R32Bit mini_jit::arm_instructions::w23 = R32Bit::w23

32 bit scratch register (callee-saved)

const R32Bit mini_jit::arm_instructions::w24 = R32Bit::w24

32 bit scratch register (callee-saved)

const R32Bit mini_jit::arm_instructions::w25 = R32Bit::w25

32 bit scratch register (callee-saved)

const R32Bit mini_jit::arm_instructions::w26 = R32Bit::w26

32 bit scratch register (callee-saved)

const R32Bit mini_jit::arm_instructions::w27 = R32Bit::w27

32 bit scratch register (callee-saved)

const R32Bit mini_jit::arm_instructions::w28 = R32Bit::w28

32 bit scratch register (callee-saved)

const R32Bit mini_jit::arm_instructions::w29 = R32Bit::w29

32 bit scratch register (callee-saved)

const R32Bit mini_jit::arm_instructions::w30 = R32Bit::w30

32 bit scratch register (callee-saved)

const R32Bit mini_jit::arm_instructions::wsp = R32Bit::wsp

32 bit stack pointer

const R32Bit mini_jit::arm_instructions::wzr = R32Bit::wzr

32 bit zero register

const R64Bit mini_jit::arm_instructions::x0 = R64Bit::x0

64 bit parameter/result register (caller-saved)

const R64Bit mini_jit::arm_instructions::x1 = R64Bit::x1

64 bit parameter/result register (caller-saved)

const R64Bit mini_jit::arm_instructions::x2 = R64Bit::x2

64 bit parameter/result register (caller-saved)

const R64Bit mini_jit::arm_instructions::x3 = R64Bit::x3

64 bit parameter/result register (caller-saved)

const R64Bit mini_jit::arm_instructions::x4 = R64Bit::x4

64 bit parameter/result register (caller-saved)

const R64Bit mini_jit::arm_instructions::x5 = R64Bit::x5

64 bit parameter/result register (caller-saved)

const R64Bit mini_jit::arm_instructions::x6 = R64Bit::x6

64 bit parameter/result register (caller-saved)

const R64Bit mini_jit::arm_instructions::x7 = R64Bit::x7

64 bit parameter/result register (caller-saved)

const R64Bit mini_jit::arm_instructions::x8 = R64Bit::x8

64 bit scratch register (caller-saved)

const R64Bit mini_jit::arm_instructions::x9 = R64Bit::x9

64 bit scratch register (caller-saved)

const R64Bit mini_jit::arm_instructions::x10 = R64Bit::x10

64 bit scratch register (caller-saved)

const R64Bit mini_jit::arm_instructions::x11 = R64Bit::x11

64 bit scratch register (caller-saved)

const R64Bit mini_jit::arm_instructions::x12 = R64Bit::x12

64 bit scratch register (caller-saved)

const R64Bit mini_jit::arm_instructions::x13 = R64Bit::x13

64 bit scratch register (caller-saved)

const R64Bit mini_jit::arm_instructions::x14 = R64Bit::x14

64 bit scratch register (caller-saved)

const R64Bit mini_jit::arm_instructions::x15 = R64Bit::x15

64 bit scratch register (caller-saved)

const R64Bit mini_jit::arm_instructions::x16 = R64Bit::x16

64 bit scratch register (caller-saved)

const R64Bit mini_jit::arm_instructions::x17 = R64Bit::x17

64 bit scratch register (caller-saved)

const R64Bit mini_jit::arm_instructions::x19 = R64Bit::x19

64 bit scratch register (callee-saved)

const R64Bit mini_jit::arm_instructions::x20 = R64Bit::x20

64 bit scratch register (callee-saved)

const R64Bit mini_jit::arm_instructions::x21 = R64Bit::x21

64 bit scratch register (callee-saved)

const R64Bit mini_jit::arm_instructions::x22 = R64Bit::x22

64 bit scratch register (callee-saved)

const R64Bit mini_jit::arm_instructions::x23 = R64Bit::x23

64 bit scratch register (callee-saved)

const R64Bit mini_jit::arm_instructions::x24 = R64Bit::x24

64 bit scratch register (callee-saved)

const R64Bit mini_jit::arm_instructions::x25 = R64Bit::x25

64 bit scratch register (callee-saved)

const R64Bit mini_jit::arm_instructions::x26 = R64Bit::x26

64 bit scratch register (callee-saved)

const R64Bit mini_jit::arm_instructions::x27 = R64Bit::x27

64 bit scratch register (callee-saved)

const R64Bit mini_jit::arm_instructions::x28 = R64Bit::x28

64 bit scratch register (callee-saved)

const R64Bit mini_jit::arm_instructions::x29 = R64Bit::x29

64 bit scratch register (callee-saved)

const R64Bit mini_jit::arm_instructions::x30 = R64Bit::x30

64 bit scratch register (callee-saved)

const R64Bit mini_jit::arm_instructions::fp = R64Bit::fp

frame pointer register (callee-saved)

const R64Bit mini_jit::arm_instructions::lr = R64Bit::lr

link register (callee-saved)

const R64Bit mini_jit::arm_instructions::sp = R64Bit::sp

stack pointer register

const R64Bit mini_jit::arm_instructions::xzr = R64Bit::xzr

64 bit zero register

const ShiftLSL mini_jit::arm_instructions::LSL = ShiftLSL::LSL

Logical Shift Left.

const ShiftLSR mini_jit::arm_instructions::LSR = ShiftLSR::LSR

Logical Shift Right.

const ShiftASR mini_jit::arm_instructions::ASR = ShiftASR::ASR

Arithmetic Shift Right.

const ShiftROR mini_jit::arm_instructions::ROR = ShiftROR::ROR

ROtate Right.

const V8Bit mini_jit::arm_instructions::b0 = V8Bit::b0

8 bit parameter/result register (caller-saved)

const V8Bit mini_jit::arm_instructions::b1 = V8Bit::b1

8 bit parameter/result register (caller-saved)

const V8Bit mini_jit::arm_instructions::b2 = V8Bit::b2

8 bit parameter/result register (caller-saved)

const V8Bit mini_jit::arm_instructions::b3 = V8Bit::b3

8 bit parameter/result register (caller-saved)

const V8Bit mini_jit::arm_instructions::b4 = V8Bit::b4

8 bit parameter/result register (caller-saved)

const V8Bit mini_jit::arm_instructions::b5 = V8Bit::b5

8 bit parameter/result register (caller-saved)

const V8Bit mini_jit::arm_instructions::b6 = V8Bit::b6

8 bit parameter/result register (caller-saved)

const V8Bit mini_jit::arm_instructions::b7 = V8Bit::b7

8 bit parameter/result register (caller-saved)

const V8Bit mini_jit::arm_instructions::b8 = V8Bit::b8

8 bit scratch register (callee-saved, lower 64 bit)

const V8Bit mini_jit::arm_instructions::b9 = V8Bit::b9

8 bit scratch register (callee-saved, lower 64 bit)

const V8Bit mini_jit::arm_instructions::b10 = V8Bit::b10

8 bit scratch register (callee-saved, lower 64 bit)

const V8Bit mini_jit::arm_instructions::b11 = V8Bit::b11

8 bit scratch register (callee-saved, lower 64 bit)

const V8Bit mini_jit::arm_instructions::b12 = V8Bit::b12

8 bit scratch register (callee-saved, lower 64 bit)

const V8Bit mini_jit::arm_instructions::b13 = V8Bit::b13

8 bit scratch register (callee-saved, lower 64 bit)

const V8Bit mini_jit::arm_instructions::b14 = V8Bit::b14

8 bit scratch register (callee-saved, lower 64 bit)

const V8Bit mini_jit::arm_instructions::b15 = V8Bit::b15

8 bit scratch register (callee-saved, lower 64 bit)

const V8Bit mini_jit::arm_instructions::b16 = V8Bit::b16

8 bit scratch register (caller-saved)

const V8Bit mini_jit::arm_instructions::b17 = V8Bit::b17

8 bit scratch register (caller-saved)

const V8Bit mini_jit::arm_instructions::b18 = V8Bit::b18

8 bit scratch register (caller-saved)

const V8Bit mini_jit::arm_instructions::b19 = V8Bit::b19

8 bit scratch register (caller-saved)

const V8Bit mini_jit::arm_instructions::b20 = V8Bit::b20

8 bit scratch register (caller-saved)

const V8Bit mini_jit::arm_instructions::b21 = V8Bit::b21

8 bit scratch register (caller-saved)

const V8Bit mini_jit::arm_instructions::b22 = V8Bit::b22

8 bit scratch register (caller-saved)

const V8Bit mini_jit::arm_instructions::b23 = V8Bit::b23

8 bit scratch register (caller-saved)

const V8Bit mini_jit::arm_instructions::b24 = V8Bit::b24

8 bit scratch register (caller-saved)

const V8Bit mini_jit::arm_instructions::b25 = V8Bit::b25

8 bit scratch register (caller-saved)

const V8Bit mini_jit::arm_instructions::b26 = V8Bit::b26

8 bit scratch register (caller-saved)

const V8Bit mini_jit::arm_instructions::b27 = V8Bit::b27

8 bit scratch register (caller-saved)

const V8Bit mini_jit::arm_instructions::b28 = V8Bit::b28

8 bit scratch register (caller-saved)

const V8Bit mini_jit::arm_instructions::b29 = V8Bit::b29

8 bit scratch register (caller-saved)

const V8Bit mini_jit::arm_instructions::b30 = V8Bit::b30

8 bit scratch register (caller-saved)

const V8Bit mini_jit::arm_instructions::b31 = V8Bit::b31

8 bit scratch register (caller-saved)

const V16Bit mini_jit::arm_instructions::h0 = V16Bit::h0
const V16Bit mini_jit::arm_instructions::h1 = V16Bit::h1

16 bit parameter/result register (caller-saved)

const V16Bit mini_jit::arm_instructions::h2 = V16Bit::h2

16 bit parameter/result register (caller-saved)

const V16Bit mini_jit::arm_instructions::h3 = V16Bit::h3

16 bit parameter/result register (caller-saved)

const V16Bit mini_jit::arm_instructions::h4 = V16Bit::h4

16 bit parameter/result register (caller-saved)

const V16Bit mini_jit::arm_instructions::h5 = V16Bit::h5

16 bit parameter/result register (caller-saved)

const V16Bit mini_jit::arm_instructions::h6 = V16Bit::h6

16 bit parameter/result register (caller-saved)

const V16Bit mini_jit::arm_instructions::h7 = V16Bit::h7

16 bit parameter/result register (caller-saved)

const V16Bit mini_jit::arm_instructions::h8 = V16Bit::h8

16 bit scratch register (callee-saved, lower 64 bit)

const V16Bit mini_jit::arm_instructions::h9 = V16Bit::h9

16 bit scratch register (callee-saved, lower 64 bit)

const V16Bit mini_jit::arm_instructions::h10 = V16Bit::h10

16 bit scratch register (callee-saved, lower 64 bit)

const V16Bit mini_jit::arm_instructions::h11 = V16Bit::h11

16 bit scratch register (callee-saved, lower 64 bit)

const V16Bit mini_jit::arm_instructions::h12 = V16Bit::h12

16 bit scratch register (callee-saved, lower 64 bit)

const V16Bit mini_jit::arm_instructions::h13 = V16Bit::h13

16 bit scratch register (callee-saved, lower 64 bit)

const V16Bit mini_jit::arm_instructions::h14 = V16Bit::h14

16 bit scratch register (callee-saved, lower 64 bit)

const V16Bit mini_jit::arm_instructions::h15 = V16Bit::h15

16 bit scratch register (callee-saved, lower 64 bit)

const V16Bit mini_jit::arm_instructions::h16 = V16Bit::h16

16 bit scratch register (caller-saved)

const V16Bit mini_jit::arm_instructions::h17 = V16Bit::h17

16 bit scratch register (caller-saved)

const V16Bit mini_jit::arm_instructions::h18 = V16Bit::h18

16 bit scratch register (caller-saved)

const V16Bit mini_jit::arm_instructions::h19 = V16Bit::h19

16 bit scratch register (caller-saved)

const V16Bit mini_jit::arm_instructions::h20 = V16Bit::h20

16 bit scratch register (caller-saved)

const V16Bit mini_jit::arm_instructions::h21 = V16Bit::h21

16 bit scratch register (caller-saved)

const V16Bit mini_jit::arm_instructions::h22 = V16Bit::h22

16 bit scratch register (caller-saved)

const V16Bit mini_jit::arm_instructions::h23 = V16Bit::h23

16 bit scratch register (caller-saved)

const V16Bit mini_jit::arm_instructions::h24 = V16Bit::h24

16 bit scratch register (caller-saved)

const V16Bit mini_jit::arm_instructions::h25 = V16Bit::h25

16 bit scratch register (caller-saved)

const V16Bit mini_jit::arm_instructions::h26 = V16Bit::h26

16 bit scratch register (caller-saved)

const V16Bit mini_jit::arm_instructions::h27 = V16Bit::h27

16 bit scratch register (caller-saved)

const V16Bit mini_jit::arm_instructions::h28 = V16Bit::h28

16 bit scratch register (caller-saved)

const V16Bit mini_jit::arm_instructions::h29 = V16Bit::h29

16 bit scratch register (caller-saved)

const V16Bit mini_jit::arm_instructions::h30 = V16Bit::h30

16 bit scratch register (caller-saved)

const V16Bit mini_jit::arm_instructions::h31 = V16Bit::h31

16 bit scratch register (caller-saved)

const V32Bit mini_jit::arm_instructions::s0 = V32Bit::s0

32 bit parameter/result register (caller-saved)

const V32Bit mini_jit::arm_instructions::s1 = V32Bit::s1

32 bit parameter/result register (caller-saved)

const V32Bit mini_jit::arm_instructions::s2 = V32Bit::s2

32 bit parameter/result register (caller-saved)

const V32Bit mini_jit::arm_instructions::s3 = V32Bit::s3

32 bit parameter/result register (caller-saved)

const V32Bit mini_jit::arm_instructions::s4 = V32Bit::s4

32 bit parameter/result register (caller-saved)

const V32Bit mini_jit::arm_instructions::s5 = V32Bit::s5

32 bit parameter/result register (caller-saved)

const V32Bit mini_jit::arm_instructions::s6 = V32Bit::s6

32 bit parameter/result register (caller-saved)

const V32Bit mini_jit::arm_instructions::s7 = V32Bit::s7

32 bit parameter/result register (caller-saved)

const V32Bit mini_jit::arm_instructions::s8 = V32Bit::s8

32 bit scratch register (callee-saved, lower 64 bit)

const V32Bit mini_jit::arm_instructions::s9 = V32Bit::s9

32 bit scratch register (callee-saved, lower 64 bit)

const V32Bit mini_jit::arm_instructions::s10 = V32Bit::s10

32 bit scratch register (callee-saved, lower 64 bit)

const V32Bit mini_jit::arm_instructions::s11 = V32Bit::s11

32 bit scratch register (callee-saved, lower 64 bit)

const V32Bit mini_jit::arm_instructions::s12 = V32Bit::s12

32 bit scratch register (callee-saved, lower 64 bit)

const V32Bit mini_jit::arm_instructions::s13 = V32Bit::s13

32 bit scratch register (callee-saved, lower 64 bit)

const V32Bit mini_jit::arm_instructions::s14 = V32Bit::s14

32 bit scratch register (callee-saved, lower 64 bit)

const V32Bit mini_jit::arm_instructions::s15 = V32Bit::s15

32 bit scratch register (callee-saved, lower 64 bit)

const V32Bit mini_jit::arm_instructions::s16 = V32Bit::s16

32 bit scratch register (caller-saved)

const V32Bit mini_jit::arm_instructions::s17 = V32Bit::s17

32 bit scratch register (caller-saved)

const V32Bit mini_jit::arm_instructions::s18 = V32Bit::s18

32 bit scratch register (caller-saved)

const V32Bit mini_jit::arm_instructions::s19 = V32Bit::s19

32 bit scratch register (caller-saved)

const V32Bit mini_jit::arm_instructions::s20 = V32Bit::s20

32 bit scratch register (caller-saved)

const V32Bit mini_jit::arm_instructions::s21 = V32Bit::s21

32 bit scratch register (caller-saved)

const V32Bit mini_jit::arm_instructions::s22 = V32Bit::s22

32 bit scratch register (caller-saved)

const V32Bit mini_jit::arm_instructions::s23 = V32Bit::s23

32 bit scratch register (caller-saved)

const V32Bit mini_jit::arm_instructions::s24 = V32Bit::s24

32 bit scratch register (caller-saved)

const V32Bit mini_jit::arm_instructions::s25 = V32Bit::s25

32 bit scratch register (caller-saved)

const V32Bit mini_jit::arm_instructions::s26 = V32Bit::s26

32 bit scratch register (caller-saved)

const V32Bit mini_jit::arm_instructions::s27 = V32Bit::s27

32 bit scratch register (caller-saved)

const V32Bit mini_jit::arm_instructions::s28 = V32Bit::s28

32 bit scratch register (caller-saved)

const V32Bit mini_jit::arm_instructions::s29 = V32Bit::s29

32 bit scratch register (caller-saved)

const V32Bit mini_jit::arm_instructions::s30 = V32Bit::s30

32 bit scratch register (caller-saved)

const V32Bit mini_jit::arm_instructions::s31 = V32Bit::s31

32 bit scratch register (caller-saved)

const V64Bit mini_jit::arm_instructions::d0 = V64Bit::d0

64 bit parameter/result register (caller-saved)

const V64Bit mini_jit::arm_instructions::d1 = V64Bit::d1

64 bit parameter/result register (caller-saved)

const V64Bit mini_jit::arm_instructions::d2 = V64Bit::d2

64 bit parameter/result register (caller-saved)

const V64Bit mini_jit::arm_instructions::d3 = V64Bit::d3

64 bit parameter/result register (caller-saved)

const V64Bit mini_jit::arm_instructions::d4 = V64Bit::d4

64 bit parameter/result register (caller-saved)

const V64Bit mini_jit::arm_instructions::d5 = V64Bit::d5

64 bit parameter/result register (caller-saved)

const V64Bit mini_jit::arm_instructions::d6 = V64Bit::d6

64 bit parameter/result register (caller-saved)

const V64Bit mini_jit::arm_instructions::d7 = V64Bit::d7

64 bit parameter/result register (caller-saved)

const V64Bit mini_jit::arm_instructions::d8 = V64Bit::d8

64 bit scratch register (callee-saved, lower 64 bit)

const V64Bit mini_jit::arm_instructions::d9 = V64Bit::d9

64 bit scratch register (callee-saved, lower 64 bit)

const V64Bit mini_jit::arm_instructions::d10 = V64Bit::d10

64 bit scratch register (callee-saved, lower 64 bit)

const V64Bit mini_jit::arm_instructions::d11 = V64Bit::d11

64 bit scratch register (callee-saved, lower 64 bit)

const V64Bit mini_jit::arm_instructions::d12 = V64Bit::d12

64 bit scratch register (callee-saved, lower 64 bit)

const V64Bit mini_jit::arm_instructions::d13 = V64Bit::d13

64 bit scratch register (callee-saved, lower 64 bit)

const V64Bit mini_jit::arm_instructions::d14 = V64Bit::d14

64 bit scratch register (callee-saved, lower 64 bit)

const V64Bit mini_jit::arm_instructions::d15 = V64Bit::d15

64 bit scratch register (callee-saved, lower 64 bit)

const V64Bit mini_jit::arm_instructions::d16 = V64Bit::d16

64 bit scratch register (caller-saved)

const V64Bit mini_jit::arm_instructions::d17 = V64Bit::d17

64 bit scratch register (caller-saved)

const V64Bit mini_jit::arm_instructions::d18 = V64Bit::d18

64 bit scratch register (caller-saved)

const V64Bit mini_jit::arm_instructions::d19 = V64Bit::d19

64 bit scratch register (caller-saved)

const V64Bit mini_jit::arm_instructions::d20 = V64Bit::d20

64 bit scratch register (caller-saved)

const V64Bit mini_jit::arm_instructions::d21 = V64Bit::d21

64 bit scratch register (caller-saved)

const V64Bit mini_jit::arm_instructions::d22 = V64Bit::d22

64 bit scratch register (caller-saved)

const V64Bit mini_jit::arm_instructions::d23 = V64Bit::d23

64 bit scratch register (caller-saved)

const V64Bit mini_jit::arm_instructions::d24 = V64Bit::d24

64 bit scratch register (caller-saved)

const V64Bit mini_jit::arm_instructions::d25 = V64Bit::d25

64 bit scratch register (caller-saved)

const V64Bit mini_jit::arm_instructions::d26 = V64Bit::d26

64 bit scratch register (caller-saved)

const V64Bit mini_jit::arm_instructions::d27 = V64Bit::d27

64 bit scratch register (caller-saved)

const V64Bit mini_jit::arm_instructions::d28 = V64Bit::d28

64 bit scratch register (caller-saved)

const V64Bit mini_jit::arm_instructions::d29 = V64Bit::d29

64 bit scratch register (caller-saved)

const V64Bit mini_jit::arm_instructions::d30 = V64Bit::d30

64 bit scratch register (caller-saved)

const V64Bit mini_jit::arm_instructions::d31 = V64Bit::d31

64 bit scratch register (caller-saved)

const V128Bit mini_jit::arm_instructions::q0 = V128Bit::q0

128 bit parameter/result register (caller-saved)

const V128Bit mini_jit::arm_instructions::q1 = V128Bit::q1

128 bit parameter/result register (caller-saved)

const V128Bit mini_jit::arm_instructions::q2 = V128Bit::q2

128 bit parameter/result register (caller-saved)

const V128Bit mini_jit::arm_instructions::q3 = V128Bit::q3

128 bit parameter/result register (caller-saved)

const V128Bit mini_jit::arm_instructions::q4 = V128Bit::q4

128 bit parameter/result register (caller-saved)

const V128Bit mini_jit::arm_instructions::q5 = V128Bit::q5

128 bit parameter/result register (caller-saved)

const V128Bit mini_jit::arm_instructions::q6 = V128Bit::q6

128 bit parameter/result register (caller-saved)

const V128Bit mini_jit::arm_instructions::q7 = V128Bit::q7

128 bit parameter/result register (caller-saved)

const V128Bit mini_jit::arm_instructions::q8 = V128Bit::q8

128 bit scratch register (callee-saved, lower 64 bit)

const V128Bit mini_jit::arm_instructions::q9 = V128Bit::q9

128 bit scratch register (callee-saved, lower 64 bit)

const V128Bit mini_jit::arm_instructions::q10 = V128Bit::q10

128 bit scratch register (callee-saved, lower 64 bit)

const V128Bit mini_jit::arm_instructions::q11 = V128Bit::q11

128 bit scratch register (callee-saved, lower 64 bit)

const V128Bit mini_jit::arm_instructions::q12 = V128Bit::q12

128 bit scratch register (callee-saved, lower 64 bit)

const V128Bit mini_jit::arm_instructions::q13 = V128Bit::q13

128 bit scratch register (callee-saved, lower 64 bit)

const V128Bit mini_jit::arm_instructions::q14 = V128Bit::q14

128 bit scratch register (callee-saved, lower 64 bit)

const V128Bit mini_jit::arm_instructions::q15 = V128Bit::q15

128 bit scratch register (callee-saved, lower 64 bit)

const V128Bit mini_jit::arm_instructions::q16 = V128Bit::q16

128 bit scratch register (caller-saved)

const V128Bit mini_jit::arm_instructions::q17 = V128Bit::q17

128 bit scratch register (caller-saved)

const V128Bit mini_jit::arm_instructions::q18 = V128Bit::q18

128 bit scratch register (caller-saved)

const V128Bit mini_jit::arm_instructions::q19 = V128Bit::q19

128 bit scratch register (caller-saved)

const V128Bit mini_jit::arm_instructions::q20 = V128Bit::q20

128 bit scratch register (caller-saved)

const V128Bit mini_jit::arm_instructions::q21 = V128Bit::q21

128 bit scratch register (caller-saved)

const V128Bit mini_jit::arm_instructions::q22 = V128Bit::q22

128 bit scratch register (caller-saved)

const V128Bit mini_jit::arm_instructions::q23 = V128Bit::q23

128 bit scratch register (caller-saved)

const V128Bit mini_jit::arm_instructions::q24 = V128Bit::q24

128 bit scratch register (caller-saved)

const V128Bit mini_jit::arm_instructions::q25 = V128Bit::q25

128 bit scratch register (caller-saved)

const V128Bit mini_jit::arm_instructions::q26 = V128Bit::q26

128 bit scratch register (caller-saved)

const V128Bit mini_jit::arm_instructions::q27 = V128Bit::q27

128 bit scratch register (caller-saved)

const V128Bit mini_jit::arm_instructions::q28 = V128Bit::q28

128 bit scratch register (caller-saved)

const V128Bit mini_jit::arm_instructions::q29 = V128Bit::q29

128 bit scratch register (caller-saved)

const V128Bit mini_jit::arm_instructions::q30 = V128Bit::q30

128 bit scratch register (caller-saved)

const V128Bit mini_jit::arm_instructions::q31 = V128Bit::q31

128 bit scratch register (caller-saved)

const VGeneral mini_jit::arm_instructions::v0 = VGeneral::v0

general parameter/result register (caller-saved)

const VGeneral mini_jit::arm_instructions::v1 = VGeneral::v1

general parameter/result register (caller-saved)

const VGeneral mini_jit::arm_instructions::v2 = VGeneral::v2

general parameter/result register (caller-saved)

const VGeneral mini_jit::arm_instructions::v3 = VGeneral::v3

general parameter/result register (caller-saved)

const VGeneral mini_jit::arm_instructions::v4 = VGeneral::v4

general parameter/result register (caller-saved)

const VGeneral mini_jit::arm_instructions::v5 = VGeneral::v5

general parameter/result register (caller-saved)

const VGeneral mini_jit::arm_instructions::v6 = VGeneral::v6

general parameter/result register (caller-saved)

const VGeneral mini_jit::arm_instructions::v7 = VGeneral::v7

general parameter/result register (caller-saved)

const VGeneral mini_jit::arm_instructions::v8 = VGeneral::v8

general scratch register (callee-saved, lower 64 bit)

const VGeneral mini_jit::arm_instructions::v9 = VGeneral::v9

general scratch register (callee-saved, lower 64 bit)

const VGeneral mini_jit::arm_instructions::v10 = VGeneral::v10

general scratch register (callee-saved, lower 64 bit)

const VGeneral mini_jit::arm_instructions::v11 = VGeneral::v11

general scratch register (callee-saved, lower 64 bit)

const VGeneral mini_jit::arm_instructions::v12 = VGeneral::v12

general scratch register (callee-saved, lower 64 bit)

const VGeneral mini_jit::arm_instructions::v13 = VGeneral::v13

general scratch register (callee-saved, lower 64 bit)

const VGeneral mini_jit::arm_instructions::v14 = VGeneral::v14

general scratch register (callee-saved, lower 64 bit)

const VGeneral mini_jit::arm_instructions::v15 = VGeneral::v15

general scratch register (callee-saved, lower 64 bit)

const VGeneral mini_jit::arm_instructions::v16 = VGeneral::v16

general scratch register (caller-saved)

const VGeneral mini_jit::arm_instructions::v17 = VGeneral::v17

general scratch register (caller-saved)

const VGeneral mini_jit::arm_instructions::v18 = VGeneral::v18

general scratch register (caller-saved)

const VGeneral mini_jit::arm_instructions::v19 = VGeneral::v19

general scratch register (caller-saved)

const VGeneral mini_jit::arm_instructions::v20 = VGeneral::v20

general scratch register (caller-saved)

const VGeneral mini_jit::arm_instructions::v21 = VGeneral::v21

general scratch register (caller-saved)

const VGeneral mini_jit::arm_instructions::v22 = VGeneral::v22

general scratch register (caller-saved)

const VGeneral mini_jit::arm_instructions::v23 = VGeneral::v23

general scratch register (caller-saved)

const VGeneral mini_jit::arm_instructions::v24 = VGeneral::v24

general scratch register (caller-saved)

const VGeneral mini_jit::arm_instructions::v25 = VGeneral::v25

general scratch register (caller-saved)

const VGeneral mini_jit::arm_instructions::v26 = VGeneral::v26

general scratch register (caller-saved)

const VGeneral mini_jit::arm_instructions::v27 = VGeneral::v27

general scratch register (caller-saved)

const VGeneral mini_jit::arm_instructions::v28 = VGeneral::v28

general scratch register (caller-saved)

const VGeneral mini_jit::arm_instructions::v29 = VGeneral::v29

general scratch register (caller-saved)

const VGeneral mini_jit::arm_instructions::v30 = VGeneral::v30

general scratch register (caller-saved)

const VGeneral mini_jit::arm_instructions::v31 = VGeneral::v31

general scratch register (caller-saved)

const VType8x8Bit mini_jit::arm_instructions::t8b = VType8x8Bit::t8B

Use 8 Byte sized vectors.

const VType16x8Bit mini_jit::arm_instructions::t16b = VType16x8Bit::t16B

Use 16 Byte sized vectors.

const VType4x16Bit mini_jit::arm_instructions::t4h = VType4x16Bit::t4H

Use 4 half word (16 Bit) sized vectors.

const VType8x16Bit mini_jit::arm_instructions::t8h = VType8x16Bit::t8H

Use 8 half word (16 Bit) sized vectors.

const VType2x32Bit mini_jit::arm_instructions::t2s = VType2x32Bit::t2S

Use 2 word (32 Bit) sized vectors.

const VType4x32Bit mini_jit::arm_instructions::t4s = VType4x32Bit::t4S

Use 4 word (32 Bit) sized vectors.

const VType1x64Bit mini_jit::arm_instructions::t1d = VType1x64Bit::t1D

Use 1 double word (64 Bit) sized vector.

const VType2x64Bit mini_jit::arm_instructions::t2d = VType2x64Bit::t2D

Use 2 double word (64 Bit) sized vector.

constexpr uint32_t mini_jit::arm_instructions::add(const R32Bit Wd, const R32Bit Wn, const R32Bit Wm)
constexpr uint32_t mini_jit::arm_instructions::add(const R64Bit Rd, const R64Bit Rn, const R64Bit Rm)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::add(const R32Bit Wd, const R32Bit Wn, const R32Bit Wm, const T, uint32_t amount)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::add(const R64Bit Rd, const R64Bit Rn, const R64Bit Rm, const T, uint32_t amount)
constexpr uint32_t mini_jit::arm_instructions::add(const R32Bit Wd, const R32Bit Wn, const uint32_t imm)
constexpr uint32_t mini_jit::arm_instructions::add(const R64Bit Rd, const R64Bit Rn, const uint32_t imm)
constexpr uint32_t mini_jit::arm_instructions::add(const R32Bit Wd, const R32Bit Wn, const uint32_t imm, bool shift12)
constexpr uint32_t mini_jit::arm_instructions::add(const R64Bit Rd, const R64Bit Rn, const uint32_t imm, bool shift12)
constexpr uint32_t mini_jit::arm_instructions::cbnz(const R32Bit Wt, const int32_t offset)
constexpr uint32_t mini_jit::arm_instructions::cbnz(const R64Bit Xt, const int32_t offset)
constexpr uint32_t mini_jit::arm_instructions::ldpPost(const R32Bit Wt1, const R32Bit Wt2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::ldpPost(const R64Bit Xt1, const R64Bit Xt2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::ldpPre(const R32Bit Wt1, const R32Bit Wt2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::ldpPre(const R64Bit Xt1, const R64Bit Xt2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::ldp(const R32Bit Wt1, const R32Bit Wt2, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::ldp(const R64Bit Xt1, const R64Bit Xt2, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::ldpOffset(const R32Bit Wt1, const R32Bit Wt2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::ldpOffset(const R64Bit Xt1, const R64Bit Xt2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::ldrPost(const R32Bit Wt, const R64Bit Xn, const int32_t imm9)
constexpr uint32_t mini_jit::arm_instructions::ldrPost(const R64Bit Xt, const R64Bit Xn, const int32_t imm9)
constexpr uint32_t mini_jit::arm_instructions::ldrPre(const R32Bit Wt, const R64Bit Xn, const int32_t imm9)
constexpr uint32_t mini_jit::arm_instructions::ldrPre(const R64Bit Xt, const R64Bit Xn, const int32_t imm9)
constexpr uint32_t mini_jit::arm_instructions::ldr(const R32Bit Wt, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::ldr(const R64Bit Xt, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::ldrOffset(const R32Bit Wt, const R64Bit Xn, const uint32_t imm12)
constexpr uint32_t mini_jit::arm_instructions::ldrOffset(const R64Bit Xt, const R64Bit Xn, const uint32_t imm12)
constexpr uint32_t mini_jit::arm_instructions::lsl(const R32Bit Wd, const R32Bit Wn, const uint32_t shift)
constexpr uint32_t mini_jit::arm_instructions::lsl(const R64Bit Xd, const R64Bit Xn, const uint32_t shift)
constexpr uint32_t mini_jit::arm_instructions::madd(const R32Bit Wd, const R32Bit Wn, const R32Bit Wm, const R32Bit Wa)
constexpr uint32_t mini_jit::arm_instructions::madd(const R64Bit Xd, const R64Bit Xn, const R64Bit Xm, const R64Bit Xa)
constexpr uint32_t mini_jit::arm_instructions::mov(const R32Bit Wd, const R32Bit Wm)
constexpr uint32_t mini_jit::arm_instructions::mov(const R64Bit Xd, const R64Bit Xm)
constexpr uint32_t mini_jit::arm_instructions::mov(const R32Bit Wd, const uint32_t imm)
constexpr uint32_t mini_jit::arm_instructions::mov(const R64Bit Xd, const uint32_t imm)
constexpr uint32_t mini_jit::arm_instructions::mov(const R32Bit Wd, const int32_t imm)
constexpr uint32_t mini_jit::arm_instructions::mov(const R64Bit Xd, const int32_t imm)
constexpr uint32_t mini_jit::arm_instructions::movSp(const R32Bit Wd, const R32Bit Wn)
constexpr uint32_t mini_jit::arm_instructions::movSp(const R64Bit Xd, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::movn(const R32Bit Wd, const uint32_t imm)
constexpr uint32_t mini_jit::arm_instructions::movn(const R64Bit Xd, const uint32_t imm)
constexpr uint32_t mini_jit::arm_instructions::movn(const R32Bit Wd, const uint32_t imm, const uint32_t lslShift)
constexpr uint32_t mini_jit::arm_instructions::movn(const R64Bit Xd, const uint32_t imm, const uint32_t lslShift)
constexpr uint32_t mini_jit::arm_instructions::movz(const R32Bit Wd, const uint32_t imm)
constexpr uint32_t mini_jit::arm_instructions::movz(const R64Bit Xd, const uint32_t imm)
constexpr uint32_t mini_jit::arm_instructions::movz(const R32Bit Wd, const uint32_t imm, const uint32_t lslShift)
constexpr uint32_t mini_jit::arm_instructions::movz(const R64Bit Xd, const uint32_t imm, const uint32_t lslShift)
constexpr uint32_t mini_jit::arm_instructions::orr(const R32Bit Wd, const R32Bit Wn, const R32Bit Wm)
constexpr uint32_t mini_jit::arm_instructions::orr(const R64Bit Rd, const R64Bit Rn, const R64Bit Rm)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::orr(const R32Bit Wd, const R32Bit Wn, const R32Bit Wm, const T, uint32_t amount)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::orr(const R64Bit Rd, const R64Bit Rn, const R64Bit Rm, const T, uint32_t amount)
constexpr uint32_t mini_jit::arm_instructions::ret()
constexpr uint32_t mini_jit::arm_instructions::ret(const R64Bit Rn)
constexpr uint32_t mini_jit::arm_instructions::stpPost(const R32Bit Wt1, const R32Bit Wt2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::stpPost(const R64Bit Xt1, const R64Bit Xt2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::stpPre(const R32Bit Wt1, const R32Bit Wt2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::stpPre(const R64Bit Xt1, const R64Bit Xt2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::stp(const R32Bit Wt1, const R32Bit Wt2, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::stp(const R64Bit Xt1, const R64Bit Xt2, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::stpOffset(const R32Bit Wt1, const R32Bit Wt2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::stpOffset(const R64Bit Xt1, const R64Bit Xt2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::sub(const R32Bit Wd, const R32Bit Wn, const uint32_t imm12)
constexpr uint32_t mini_jit::arm_instructions::sub(const R64Bit Xd, const R64Bit Xn, const uint32_t imm12)
constexpr uint32_t mini_jit::arm_instructions::sub(const R32Bit Wd, const R32Bit Wn, const uint32_t imm12, const bool leftShift12)
constexpr uint32_t mini_jit::arm_instructions::sub(const R64Bit Xd, const R64Bit Xn, const uint32_t imm12, const bool leftShift12)
constexpr uint32_t mini_jit::arm_instructions::eor(const VGeneral Vd, const VType16x8Bit, const VGeneral Vn, const VType16x8Bit, const VGeneral Vm, const VType16x8Bit)
constexpr uint32_t mini_jit::arm_instructions::eor(const VGeneral Vd, const VType8x8Bit, const VGeneral Vn, const VType8x8Bit, const VGeneral Vm, const VType8x8Bit)
constexpr uint32_t mini_jit::arm_instructions::fmax(const VGeneral Vd, const VType2x32Bit, const VGeneral Vn, const VType2x32Bit, const VGeneral Vm, const VType2x32Bit)
constexpr uint32_t mini_jit::arm_instructions::fmax(const VGeneral Vd, const VType4x32Bit, const VGeneral Vn, const VType4x32Bit, const VGeneral Vm, const VType4x32Bit)
constexpr uint32_t mini_jit::arm_instructions::fmax(const VGeneral Vd, const VType2x64Bit, const VGeneral Vn, const VType2x64Bit, const VGeneral Vm, const VType2x64Bit)
constexpr uint32_t mini_jit::arm_instructions::fmax(const V16Bit Vd, const V16Bit Vn, const V16Bit Vm)
constexpr uint32_t mini_jit::arm_instructions::fmax(const V32Bit Vd, const V32Bit Vn, const V32Bit Vm)
constexpr uint32_t mini_jit::arm_instructions::fmax(const V64Bit Vd, const V64Bit Vn, const V64Bit Vm)
constexpr uint32_t mini_jit::arm_instructions::fmla(const V16Bit Hd, const V16Bit Hn, const VGeneral Vm, const uint32_t index)
constexpr uint32_t mini_jit::arm_instructions::fmla(const V32Bit Sd, const V32Bit Sn, const VGeneral Vm, const uint32_t index)
constexpr uint32_t mini_jit::arm_instructions::fmla(const V64Bit Dd, const V64Bit Dn, const VGeneral Vm, const uint32_t index)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::fmla(const VGeneral Vd, const T, const VGeneral Vn, const T, const VGeneral Vm, const uint32_t index)
template<>
constexpr uint32_t mini_jit::arm_instructions::fmla<VType4x16Bit>(const VGeneral Vd, const VType4x16Bit, const VGeneral Vn, const VType4x16Bit, const VGeneral Vm, const uint32_t index)
template<>
constexpr uint32_t mini_jit::arm_instructions::fmla<VType8x16Bit>(const VGeneral Vd, const VType8x16Bit, const VGeneral Vn, const VType8x16Bit, const VGeneral Vm, const uint32_t index)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::ld1(const VGeneral Vt, const T, const R64Bit Xn)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::ld1(const VGeneral Vt, const T, const VGeneral Vt2, const T, const R64Bit Xn)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::ld1(const VGeneral Vt, const T, const VGeneral Vt2, const T, const VGeneral Vt3, const T, const R64Bit Xn)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::ld1(const VGeneral Vt, const T, const VGeneral Vt2, const T, const VGeneral Vt3, const T, const VGeneral Vt4, const T, const R64Bit Xn)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::ld1Post(const VGeneral Vt, const T, const R64Bit Xn, const uint32_t imm)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::ld1Post(const VGeneral Vt, const T, const VGeneral Vt2, const T, const R64Bit Xn, const uint32_t imm)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::ld1Post(const VGeneral Vt, const T, const VGeneral Vt2, const T, const VGeneral Vt3, const T, const R64Bit Xn, const uint32_t imm)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::ld1Post(const VGeneral Vt, const T, const VGeneral Vt2, const T, const VGeneral Vt3, const T, const VGeneral Vt4, const T, const R64Bit Xn, const uint32_t imm)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::ld1Post(const VGeneral Vt, const T, const R64Bit Xn, const R64Bit Xm)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::ld1Post(const VGeneral Vt, const T, const VGeneral Vt2, const T, const R64Bit Xn, const R64Bit Xm)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::ld1Post(const VGeneral Vt, const T, const VGeneral Vt2, const T, const VGeneral Vt3, const T, const R64Bit Xn, const R64Bit Xm)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::ld1Post(const VGeneral Vt, const T, const VGeneral Vt2, const T, const VGeneral Vt3, const T, const VGeneral Vt4, const T, const R64Bit Xn, const R64Bit Xm)
constexpr uint32_t mini_jit::arm_instructions::ld1(const V8Bit bt, const uint32_t index, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::ld1(const V16Bit bt, const uint32_t index, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::ld1(const V32Bit bt, const uint32_t index, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::ld1(const V64Bit bt, const uint32_t index, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::ld1Post(const V8Bit bt, const uint32_t index, const R64Bit Xn, const uint32_t imm)
constexpr uint32_t mini_jit::arm_instructions::ld1Post(const V16Bit bt, const uint32_t index, const R64Bit Xn, const uint32_t imm)
constexpr uint32_t mini_jit::arm_instructions::ld1Post(const V32Bit bt, const uint32_t index, const R64Bit Xn, const uint32_t imm)
constexpr uint32_t mini_jit::arm_instructions::ld1Post(const V64Bit bt, const uint32_t index, const R64Bit Xn, const uint32_t imm)
constexpr uint32_t mini_jit::arm_instructions::ld1Post(const V8Bit bt, const uint32_t index, const R64Bit Xn, const R64Bit Xm)
constexpr uint32_t mini_jit::arm_instructions::ld1Post(const V16Bit bt, const uint32_t index, const R64Bit Xn, const R64Bit Xm)
constexpr uint32_t mini_jit::arm_instructions::ld1Post(const V32Bit bt, const uint32_t index, const R64Bit Xn, const R64Bit Xm)
constexpr uint32_t mini_jit::arm_instructions::ld1Post(const V64Bit bt, const uint32_t index, const R64Bit Xn, const R64Bit Xm)
constexpr uint32_t mini_jit::arm_instructions::ldpPost(const V32Bit St1, const V32Bit St2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::ldpPost(const V64Bit Dt1, const V64Bit Dt2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::ldpPost(const V128Bit Qt1, const V128Bit Qt2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::ldpPre(const V32Bit St1, const V32Bit St2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::ldpPre(const V64Bit Dt1, const V64Bit Dt2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::ldpPre(const V128Bit Qt1, const V128Bit Qt2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::ldp(const V32Bit St1, const V32Bit St2, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::ldp(const V64Bit Dt1, const V64Bit Dt2, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::ldp(const V128Bit Qt1, const V128Bit Qt2, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::ldpOffset(const V32Bit St1, const V32Bit St2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::ldpOffset(const V64Bit Dt1, const V64Bit Dt2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::ldpOffset(const V128Bit Qt1, const V128Bit Qt2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::ldrPost(const V8Bit Bt, const R64Bit Xn, const int32_t imm9)
constexpr uint32_t mini_jit::arm_instructions::ldrPost(const V16Bit Ht, const R64Bit Xn, const int32_t imm9)
constexpr uint32_t mini_jit::arm_instructions::ldrPost(const V32Bit St, const R64Bit Xn, const int32_t imm9)
constexpr uint32_t mini_jit::arm_instructions::ldrPost(const V64Bit Dt, const R64Bit Xn, const int32_t imm9)
constexpr uint32_t mini_jit::arm_instructions::ldrPost(const V128Bit Qt, const R64Bit Xn, const int32_t imm9)
constexpr uint32_t mini_jit::arm_instructions::ldrPre(const V8Bit Bt, const R64Bit Xn, const int32_t imm9)
constexpr uint32_t mini_jit::arm_instructions::ldrPre(const V16Bit Ht, const R64Bit Xn, const int32_t imm9)
constexpr uint32_t mini_jit::arm_instructions::ldrPre(const V32Bit St, const R64Bit Xn, const int32_t imm9)
constexpr uint32_t mini_jit::arm_instructions::ldrPre(const V64Bit Dt, const R64Bit Xn, const int32_t imm9)
constexpr uint32_t mini_jit::arm_instructions::ldrPre(const V128Bit Qt, const R64Bit Xn, const int32_t imm9)
constexpr uint32_t mini_jit::arm_instructions::ldr(const V8Bit Bt, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::ldr(const V16Bit Ht, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::ldr(const V32Bit St, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::ldr(const V64Bit Dt, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::ldr(const V128Bit Qt, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::ldrOffset(const V8Bit Bt, const R64Bit Xn, const uint32_t imm12)
constexpr uint32_t mini_jit::arm_instructions::ldrOffset(const V16Bit Ht, const R64Bit Xn, const uint32_t imm12)
constexpr uint32_t mini_jit::arm_instructions::ldrOffset(const V32Bit St, const R64Bit Xn, const uint32_t imm12)
constexpr uint32_t mini_jit::arm_instructions::ldrOffset(const V64Bit Dt, const R64Bit Xn, const uint32_t imm12)
constexpr uint32_t mini_jit::arm_instructions::ldrOffset(const V128Bit Qt, const R64Bit Xn, const uint32_t imm12)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::st1(const VGeneral Vt, const T, const R64Bit Xn)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::st1(const VGeneral Vt, const T, const VGeneral Vt2, const T, const R64Bit Xn)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::st1(const VGeneral Vt, const T, const VGeneral Vt2, const T, const VGeneral Vt3, const T, const R64Bit Xn)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::st1(const VGeneral Vt, const T, const VGeneral Vt2, const T, const VGeneral Vt3, const T, const VGeneral Vt4, const T, const R64Bit Xn)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::st1Post(const VGeneral Vt, const T, const R64Bit Xn, const uint32_t imm)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::st1Post(const VGeneral Vt, const T, const VGeneral Vt2, const T, const R64Bit Xn, const uint32_t imm)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::st1Post(const VGeneral Vt, const T, const VGeneral Vt2, const T, const VGeneral Vt3, const T, const R64Bit Xn, const uint32_t imm)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::st1Post(const VGeneral Vt, const T, const VGeneral Vt2, const T, const VGeneral Vt3, const T, const VGeneral Vt4, const T, const R64Bit Xn, const uint32_t imm)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::st1Post(const VGeneral Vt, const T, const R64Bit Xn, const R64Bit Xm)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::st1Post(const VGeneral Vt, const T, const VGeneral Vt2, const T, const R64Bit Xn, const R64Bit Xm)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::st1Post(const VGeneral Vt, const T, const VGeneral Vt2, const T, const VGeneral Vt3, const T, const R64Bit Xn, const R64Bit Xm)
template<typename T>
constexpr uint32_t mini_jit::arm_instructions::st1Post(const VGeneral Vt, const T, const VGeneral Vt2, const T, const VGeneral Vt3, const T, const VGeneral Vt4, const T, const R64Bit Xn, const R64Bit Xm)
constexpr uint32_t mini_jit::arm_instructions::st1(const V8Bit bt, const uint32_t index, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::st1(const V16Bit bt, const uint32_t index, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::st1(const V32Bit bt, const uint32_t index, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::st1(const V64Bit bt, const uint32_t index, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::st1Post(const V8Bit bt, const uint32_t index, const R64Bit Xn, const uint32_t imm)
constexpr uint32_t mini_jit::arm_instructions::st1Post(const V16Bit bt, const uint32_t index, const R64Bit Xn, const uint32_t imm)
constexpr uint32_t mini_jit::arm_instructions::st1Post(const V32Bit bt, const uint32_t index, const R64Bit Xn, const uint32_t imm)
constexpr uint32_t mini_jit::arm_instructions::st1Post(const V64Bit bt, const uint32_t index, const R64Bit Xn, const uint32_t imm)
constexpr uint32_t mini_jit::arm_instructions::st1Post(const V8Bit bt, const uint32_t index, const R64Bit Xn, const R64Bit Xm)
constexpr uint32_t mini_jit::arm_instructions::st1Post(const V16Bit bt, const uint32_t index, const R64Bit Xn, const R64Bit Xm)
constexpr uint32_t mini_jit::arm_instructions::st1Post(const V32Bit bt, const uint32_t index, const R64Bit Xn, const R64Bit Xm)
constexpr uint32_t mini_jit::arm_instructions::st1Post(const V64Bit bt, const uint32_t index, const R64Bit Xn, const R64Bit Xm)
constexpr uint32_t mini_jit::arm_instructions::stpPost(const V32Bit St1, const V32Bit St2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::stpPost(const V64Bit Dt1, const V64Bit Dt2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::stpPost(const V128Bit Qt1, const V128Bit Qt2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::stpPre(const V32Bit St1, const V32Bit St2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::stpPre(const V64Bit Dt1, const V64Bit Dt2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::stpPre(const V128Bit Qt1, const V128Bit Qt2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::stp(const V32Bit St1, const V32Bit St2, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::stp(const V64Bit Dt1, const V64Bit Dt2, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::stp(const V128Bit Qt1, const V128Bit Qt2, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::stpOffset(const V32Bit St1, const V32Bit St2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::stpOffset(const V64Bit Dt1, const V64Bit Dt2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::stpOffset(const V128Bit Qt1, const V128Bit Qt2, const R64Bit Xn, const int32_t imm7)
constexpr uint32_t mini_jit::arm_instructions::strPost(const V8Bit Bt, const R64Bit Xn, const int32_t imm9)
constexpr uint32_t mini_jit::arm_instructions::strPost(const V16Bit Ht, const R64Bit Xn, const int32_t imm9)
constexpr uint32_t mini_jit::arm_instructions::strPost(const V32Bit St, const R64Bit Xn, const int32_t imm9)
constexpr uint32_t mini_jit::arm_instructions::strPost(const V64Bit Dt, const R64Bit Xn, const int32_t imm9)
constexpr uint32_t mini_jit::arm_instructions::strPost(const V128Bit Qt, const R64Bit Xn, const int32_t imm9)
constexpr uint32_t mini_jit::arm_instructions::strPre(const V8Bit Bt, const R64Bit Xn, const int32_t imm9)
constexpr uint32_t mini_jit::arm_instructions::strPre(const V16Bit Ht, const R64Bit Xn, const int32_t imm9)
constexpr uint32_t mini_jit::arm_instructions::strPre(const V32Bit St, const R64Bit Xn, const int32_t imm9)
constexpr uint32_t mini_jit::arm_instructions::strPre(const V64Bit Dt, const R64Bit Xn, const int32_t imm9)
constexpr uint32_t mini_jit::arm_instructions::strPre(const V128Bit Qt, const R64Bit Xn, const int32_t imm9)
constexpr uint32_t mini_jit::arm_instructions::str(const V8Bit Bt, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::str(const V16Bit Ht, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::str(const V32Bit St, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::str(const V64Bit Dt, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::str(const V128Bit Qt, const R64Bit Xn)
constexpr uint32_t mini_jit::arm_instructions::strOffset(const V8Bit Bt, const R64Bit Xn, const uint32_t imm12)
constexpr uint32_t mini_jit::arm_instructions::strOffset(const V16Bit Ht, const R64Bit Xn, const uint32_t imm12)
constexpr uint32_t mini_jit::arm_instructions::strOffset(const V32Bit St, const R64Bit Xn, const uint32_t imm12)
constexpr uint32_t mini_jit::arm_instructions::strOffset(const V64Bit Dt, const R64Bit Xn, const uint32_t imm12)
constexpr uint32_t mini_jit::arm_instructions::strOffset(const V128Bit Qt, const R64Bit Xn, const uint32_t imm12)
constexpr uint32_t mini_jit::arm_instructions::trn1(const VGeneral Vd, const VType8x8Bit, const VGeneral Vn, const VType8x8Bit, const VGeneral Vm, const VType8x8Bit)
constexpr uint32_t mini_jit::arm_instructions::trn1(const VGeneral Vd, const VType16x8Bit, const VGeneral Vn, const VType16x8Bit, const VGeneral Vm, const VType16x8Bit)
constexpr uint32_t mini_jit::arm_instructions::trn1(const VGeneral Vd, const VType4x16Bit, const VGeneral Vn, const VType4x16Bit, const VGeneral Vm, const VType4x16Bit)
constexpr uint32_t mini_jit::arm_instructions::trn1(const VGeneral Vd, const VType8x16Bit, const VGeneral Vn, const VType8x16Bit, const VGeneral Vm, const VType8x16Bit)
constexpr uint32_t mini_jit::arm_instructions::trn1(const VGeneral Vd, const VType2x32Bit, const VGeneral Vn, const VType2x32Bit, const VGeneral Vm, const VType2x32Bit)
constexpr uint32_t mini_jit::arm_instructions::trn1(const VGeneral Vd, const VType4x32Bit, const VGeneral Vn, const VType4x32Bit, const VGeneral Vm, const VType4x32Bit)
constexpr uint32_t mini_jit::arm_instructions::trn1(const VGeneral Vd, const VType2x64Bit, const VGeneral Vn, const VType2x64Bit, const VGeneral Vm, const VType2x64Bit)
constexpr uint32_t mini_jit::arm_instructions::trn2(const VGeneral Vd, const VType8x8Bit, const VGeneral Vn, const VType8x8Bit, const VGeneral Vm, const VType8x8Bit)
constexpr uint32_t mini_jit::arm_instructions::trn2(const VGeneral Vd, const VType16x8Bit, const VGeneral Vn, const VType16x8Bit, const VGeneral Vm, const VType16x8Bit)
constexpr uint32_t mini_jit::arm_instructions::trn2(const VGeneral Vd, const VType4x16Bit, const VGeneral Vn, const VType4x16Bit, const VGeneral Vm, const VType4x16Bit)
constexpr uint32_t mini_jit::arm_instructions::trn2(const VGeneral Vd, const VType8x16Bit, const VGeneral Vn, const VType8x16Bit, const VGeneral Vm, const VType8x16Bit)
constexpr uint32_t mini_jit::arm_instructions::trn2(const VGeneral Vd, const VType2x32Bit, const VGeneral Vn, const VType2x32Bit, const VGeneral Vm, const VType2x32Bit)
constexpr uint32_t mini_jit::arm_instructions::trn2(const VGeneral Vd, const VType4x32Bit, const VGeneral Vn, const VType4x32Bit, const VGeneral Vm, const VType4x32Bit)
constexpr uint32_t mini_jit::arm_instructions::trn2(const VGeneral Vd, const VType2x64Bit, const VGeneral Vn, const VType2x64Bit, const VGeneral Vm, const VType2x64Bit)
constexpr uint32_t mini_jit::arm_instructions::zip1(const VGeneral Vd, const VType8x8Bit, const VGeneral Vn, const VType8x8Bit, const VGeneral Vm, const VType8x8Bit)
constexpr uint32_t mini_jit::arm_instructions::zip1(const VGeneral Vd, const VType16x8Bit, const VGeneral Vn, const VType16x8Bit, const VGeneral Vm, const VType16x8Bit)
constexpr uint32_t mini_jit::arm_instructions::zip1(const VGeneral Vd, const VType4x16Bit, const VGeneral Vn, const VType4x16Bit, const VGeneral Vm, const VType4x16Bit)
constexpr uint32_t mini_jit::arm_instructions::zip1(const VGeneral Vd, const VType8x16Bit, const VGeneral Vn, const VType8x16Bit, const VGeneral Vm, const VType8x16Bit)
constexpr uint32_t mini_jit::arm_instructions::zip1(const VGeneral Vd, const VType2x32Bit, const VGeneral Vn, const VType2x32Bit, const VGeneral Vm, const VType2x32Bit)
constexpr uint32_t mini_jit::arm_instructions::zip1(const VGeneral Vd, const VType4x32Bit, const VGeneral Vn, const VType4x32Bit, const VGeneral Vm, const VType4x32Bit)
constexpr uint32_t mini_jit::arm_instructions::zip1(const VGeneral Vd, const VType2x64Bit, const VGeneral Vn, const VType2x64Bit, const VGeneral Vm, const VType2x64Bit)
constexpr uint32_t mini_jit::arm_instructions::zip2(const VGeneral Vd, const VType8x8Bit, const VGeneral Vn, const VType8x8Bit, const VGeneral Vm, const VType8x8Bit)
constexpr uint32_t mini_jit::arm_instructions::zip2(const VGeneral Vd, const VType16x8Bit, const VGeneral Vn, const VType16x8Bit, const VGeneral Vm, const VType16x8Bit)
constexpr uint32_t mini_jit::arm_instructions::zip2(const VGeneral Vd, const VType4x16Bit, const VGeneral Vn, const VType4x16Bit, const VGeneral Vm, const VType4x16Bit)
constexpr uint32_t mini_jit::arm_instructions::zip2(const VGeneral Vd, const VType8x16Bit, const VGeneral Vn, const VType8x16Bit, const VGeneral Vm, const VType8x16Bit)
constexpr uint32_t mini_jit::arm_instructions::zip2(const VGeneral Vd, const VType2x32Bit, const VGeneral Vn, const VType2x32Bit, const VGeneral Vm, const VType2x32Bit)
constexpr uint32_t mini_jit::arm_instructions::zip2(const VGeneral Vd, const VType4x32Bit, const VGeneral Vn, const VType4x32Bit, const VGeneral Vm, const VType4x32Bit)
constexpr uint32_t mini_jit::arm_instructions::zip2(const VGeneral Vd, const VType2x64Bit, const VGeneral Vn, const VType2x64Bit, const VGeneral Vm, const VType2x64Bit)

internal

enum class mini_jit::arm_instructions::internal::addShiftType : uint32_t

Values:

enumerator DEFAULT
enumerator LSL
enumerator LSR
enumerator ASR
enum class mini_jit::arm_instructions::internal::orrShiftType : uint32_t

Values:

enumerator DEFAULT
enumerator LSL
enumerator LSR
enumerator ASR
enumerator ROR
enum class mini_jit::arm_instructions::internal::subShiftType : uint32_t

Values:

enumerator DEFAULT
enumerator LSL0
enumerator LSL12
enum class mini_jit::arm_instructions::internal::eorSimdTypes : uint32_t

Values:

enumerator t8B
enumerator t16b
enum class mini_jit::arm_instructions::internal::fmaxSzType : uint32_t

Values:

enumerator sz0
enumerator sz1
enum class mini_jit::arm_instructions::internal::fmaxQType : uint32_t

Values:

enumerator q0
enumerator q1
enum class mini_jit::arm_instructions::internal::fmaxFType : uint32_t

Values:

enumerator ftype00
enumerator ftype01
enumerator ftype11
enum class mini_jit::arm_instructions::internal::fmlaHalfPrecisionTypes

Values:

enumerator t4H
enumerator t8H
enum class mini_jit::arm_instructions::internal::fmlaSingleDoublePrecisionTypes

Values:

enumerator t2s
enumerator t4s
enumerator t2d
enum class mini_jit::arm_instructions::internal::ld1DataTypes

Values:

enumerator v8bit
enumerator v16bit
enumerator v32bit
enumerator v64bit
enum class mini_jit::arm_instructions::internal::ld1Types

Values:

enumerator t8b
enumerator t16b
enumerator t4h
enumerator t8h
enumerator t2s
enumerator t4s
enumerator t1d
enumerator t2d
enum class mini_jit::arm_instructions::internal::ldpSimdFpDataTypes : uint32_t

Values:

enumerator v32bit
enumerator v64bit
enumerator v128bit
enum class mini_jit::arm_instructions::internal::ldrSimdFpDataTypes : uint32_t

Values:

enumerator v8bit
enumerator v16bit
enumerator v32bit
enumerator v64bit
enumerator v128bit
enum class mini_jit::arm_instructions::internal::st1DataTypes

Values:

enumerator v8bit
enumerator v16bit
enumerator v32bit
enumerator v64bit
enum class mini_jit::arm_instructions::internal::st1Types

Values:

enumerator t8b
enumerator t16b
enumerator t4h
enumerator t8h
enumerator t2s
enumerator t4s
enumerator t1d
enumerator t2d
enum class mini_jit::arm_instructions::internal::stpSimdFpDataTypes : uint32_t

Values:

enumerator v32bit
enumerator v64bit
enumerator v128bit
enum class mini_jit::arm_instructions::internal::strSimdFpDataTypes : uint32_t

Values:

enumerator v8bit
enumerator v16bit
enumerator v32bit
enumerator v64bit
enumerator v128bit
enum class mini_jit::arm_instructions::internal::trn1SizeType : uint32_t

Values:

enumerator size00
enumerator size01
enumerator size10
enumerator size11
enum class mini_jit::arm_instructions::internal::trn1QType : uint32_t

Values:

enumerator q0
enumerator q1
enum class mini_jit::arm_instructions::internal::trn2SizeType : uint32_t

Values:

enumerator size00
enumerator size01
enumerator size10
enumerator size11
enum class mini_jit::arm_instructions::internal::trn2QType : uint32_t

Values:

enumerator q0
enumerator q1
enum class mini_jit::arm_instructions::internal::zip1SizeType : uint32_t

Values:

enumerator size00
enumerator size01
enumerator size10
enumerator size11
enum class mini_jit::arm_instructions::internal::zip1QType : uint32_t

Values:

enumerator q0
enumerator q1
enum class mini_jit::arm_instructions::internal::zip2SizeType : uint32_t

Values:

enumerator size00
enumerator size01
enumerator size10
enumerator size11
enum class mini_jit::arm_instructions::internal::zip2QType : uint32_t

Values:

enumerator q0
enumerator q1
const uint32_t mini_jit::arm_instructions::internal::ld1ImmediateRm = 0b11111
const uint32_t mini_jit::arm_instructions::internal::st1ImmediateRm = 0b11111
template<typename T>
constexpr addShiftType mini_jit::arm_instructions::internal::_addParseShiftType()
template<>
constexpr addShiftType mini_jit::arm_instructions::internal::_addParseShiftType<ShiftLSL>()
template<>
constexpr addShiftType mini_jit::arm_instructions::internal::_addParseShiftType<ShiftLSR>()
template<>
constexpr addShiftType mini_jit::arm_instructions::internal::_addParseShiftType<ShiftASR>()
constexpr uint32_t mini_jit::arm_instructions::internal::addShiftedRegister(const uint32_t Rd, const uint32_t Rn, const uint32_t Rm, const addShiftType shift, const uint32_t imm6, const bool is64bit)
constexpr uint32_t mini_jit::arm_instructions::internal::addImmediate(const uint32_t Rd, const uint32_t Rn, const uint32_t imm12, const bool shift12, const bool is64bit)
constexpr uint32_t mini_jit::arm_instructions::internal::cbnz(const uint32_t Rt, const int32_t imm19, bool is64bit)
constexpr uint32_t mini_jit::arm_instructions::internal::_ldpPostPreOffset(const uint32_t opcode, const uint32_t Rt1, const uint32_t Rt2, const uint32_t Rn, const int32_t imm7, const bool is64bit)
constexpr uint32_t mini_jit::arm_instructions::internal::ldpPost(const uint32_t Rt1, const uint32_t Rt2, const uint32_t Rn, const int32_t imm7, const bool is64bit)
constexpr uint32_t mini_jit::arm_instructions::internal::ldpPre(const uint32_t Rt1, const uint32_t Rt2, const uint32_t Rn, const int32_t imm7, const bool is64bit)
constexpr uint32_t mini_jit::arm_instructions::internal::ldpOffset(const uint32_t Rt1, const uint32_t Rt2, const uint32_t Rn, const int32_t imm7, const bool is64bit)
constexpr uint32_t mini_jit::arm_instructions::internal::ldrImmediatePost(const uint32_t Rt, const uint32_t Rn, const int32_t imm9, const bool is64bit)
constexpr uint32_t mini_jit::arm_instructions::internal::ldrImmediatePre(const uint32_t Rt, const uint32_t Rn, const int32_t imm9, const bool is64bit)
constexpr uint32_t mini_jit::arm_instructions::internal::ldrImmediateOffset(const uint32_t Rt, const uint32_t Rn, const uint32_t imm12, const bool is64bit)
constexpr uint32_t mini_jit::arm_instructions::internal::lslImmediate(const uint32_t Rd, const uint32_t Rn, const uint32_t shift, const bool is64bit)
constexpr uint32_t mini_jit::arm_instructions::internal::madd(const uint32_t Rd, const uint32_t Rn, const uint32_t Rm, const uint32_t Ra, const bool is64bit)
constexpr uint32_t mini_jit::arm_instructions::internal::movn(const uint32_t Rd, const uint32_t imm16, const uint32_t shift, bool is64bit)
constexpr uint32_t mini_jit::arm_instructions::internal::movz(const uint32_t Rd, const uint32_t imm16, const uint32_t shift, bool is64bit)
template<typename T>
constexpr orrShiftType mini_jit::arm_instructions::internal::_orrParseShiftType()
template<>
constexpr orrShiftType mini_jit::arm_instructions::internal::_orrParseShiftType<ShiftLSL>()
template<>
constexpr orrShiftType mini_jit::arm_instructions::internal::_orrParseShiftType<ShiftLSR>()
template<>
constexpr orrShiftType mini_jit::arm_instructions::internal::_orrParseShiftType<ShiftASR>()
template<>
constexpr orrShiftType mini_jit::arm_instructions::internal::_orrParseShiftType<ShiftROR>()
constexpr uint32_t mini_jit::arm_instructions::internal::orrShiftedRegister(uint32_t Rd, uint32_t Rn, uint32_t Rm, orrShiftType shift, uint32_t imm6, bool is64bit)
constexpr uint32_t mini_jit::arm_instructions::internal::ret(const uint32_t Rn)
constexpr uint32_t mini_jit::arm_instructions::internal::_stpPostPreOffset(const uint32_t opcode, const uint32_t Rt1, const uint32_t Rt2, const uint32_t Rn, const int32_t imm7, const bool is64bit)
constexpr uint32_t mini_jit::arm_instructions::internal::stpPost(const uint32_t Rt1, const uint32_t Rt2, const uint32_t Rn, const int32_t imm7, const bool is64bit)
constexpr uint32_t mini_jit::arm_instructions::internal::stpPre(const uint32_t Rt1, const uint32_t Rt2, const uint32_t Rn, const int32_t imm7, const bool is64bit)
constexpr uint32_t mini_jit::arm_instructions::internal::stpOffset(const uint32_t Rt1, const uint32_t Rt2, const uint32_t Rn, const int32_t imm7, const bool is64bit)
constexpr uint32_t mini_jit::arm_instructions::internal::subImmediate(const uint32_t Rd, const uint32_t Rn, const uint32_t imm12, const subShiftType shift, bool is64bit)
constexpr uint32_t mini_jit::arm_instructions::internal::eorVector(const uint32_t Vd, const uint32_t Vn, const uint32_t Vm, const eorSimdTypes type)
constexpr uint32_t mini_jit::arm_instructions::internal::fmaxVector(const uint32_t Vd, const uint32_t Vn, const uint32_t Vm, const fmaxSzType sz_type, const fmaxQType q_type)
constexpr uint32_t mini_jit::arm_instructions::internal::fmaxScalar(const uint32_t Vd, const uint32_t Vn, const uint32_t Vm, const fmaxFType f_type)
template<typename T>
constexpr bool mini_jit::arm_instructions::internal::_fmlaIsDouble()
template<>
constexpr bool mini_jit::arm_instructions::internal::_fmlaIsDouble<VType2x32Bit>()
template<>
constexpr bool mini_jit::arm_instructions::internal::_fmlaIsDouble<VType4x32Bit>()
template<>
constexpr bool mini_jit::arm_instructions::internal::_fmlaIsDouble<VType2x64Bit>()
template<typename T>
constexpr fmlaSingleDoublePrecisionTypes mini_jit::arm_instructions::internal::_fmlaParseSingleDoubleType()
template<>
constexpr fmlaSingleDoublePrecisionTypes mini_jit::arm_instructions::internal::_fmlaParseSingleDoubleType<VType2x32Bit>()
template<>
constexpr fmlaSingleDoublePrecisionTypes mini_jit::arm_instructions::internal::_fmlaParseSingleDoubleType<VType4x32Bit>()
template<>
constexpr fmlaSingleDoublePrecisionTypes mini_jit::arm_instructions::internal::_fmlaParseSingleDoubleType<VType2x64Bit>()
constexpr uint32_t mini_jit::arm_instructions::internal::fmlaByElementScalarHalfPrecision(const uint32_t Hd, const uint32_t Hn, const uint32_t Vm, const uint32_t index)
constexpr uint32_t mini_jit::arm_instructions::internal::fmlaByElementScalarSingleDoublePrecision(const uint32_t Vd, const uint32_t Vn, const uint32_t Vm, const uint32_t index, bool isDoublePrecision)
constexpr uint32_t mini_jit::arm_instructions::internal::fmlaByElementVectorHalfPrecision(const fmlaHalfPrecisionTypes T, const uint32_t Vd, const uint32_t Vn, const uint32_t Vm, const uint32_t index)
constexpr uint32_t mini_jit::arm_instructions::internal::fmlaByElementVectorSingleDoublePrecision(const fmlaSingleDoublePrecisionTypes T, const uint32_t Vd, const uint32_t Vn, const uint32_t Vm, const uint32_t index, bool isDoublePrecision)
template<typename T>
constexpr ld1Types mini_jit::arm_instructions::internal::_ld1ParseType()
template<>
constexpr ld1Types mini_jit::arm_instructions::internal::_ld1ParseType<VType8x8Bit>()
template<>
constexpr ld1Types mini_jit::arm_instructions::internal::_ld1ParseType<VType16x8Bit>()
template<>
constexpr ld1Types mini_jit::arm_instructions::internal::_ld1ParseType<VType4x16Bit>()
template<>
constexpr ld1Types mini_jit::arm_instructions::internal::_ld1ParseType<VType8x16Bit>()
template<>
constexpr ld1Types mini_jit::arm_instructions::internal::_ld1ParseType<VType2x32Bit>()
template<>
constexpr ld1Types mini_jit::arm_instructions::internal::_ld1ParseType<VType4x32Bit>()
template<>
constexpr ld1Types mini_jit::arm_instructions::internal::_ld1ParseType<VType1x64Bit>()
template<>
constexpr ld1Types mini_jit::arm_instructions::internal::_ld1ParseType<VType2x64Bit>()
constexpr void mini_jit::arm_instructions::internal::_ld1GetQAndSize(const ld1Types type, uint32_t &out_q, uint32_t &out_size)
constexpr uint32_t mini_jit::arm_instructions::internal::_ld1GetOpCode(uint32_t registerAmount)
constexpr uint32_t mini_jit::arm_instructions::internal::ld1MultipleStructures(const uint32_t Vt, const ld1Types Tt, const uint32_t Xn, const uint32_t registerAmount)
constexpr uint32_t mini_jit::arm_instructions::internal::ld1MultipleStructuresPost(const uint32_t Vt, const ld1Types Tt, const uint32_t Xn, const uint32_t imm, const uint32_t Xm, const uint32_t registerAmount)
constexpr uint32_t mini_jit::arm_instructions::internal::ld1SingleStructures(const uint32_t Vt, const ld1DataTypes type, const uint32_t index, const uint32_t Xn)
constexpr uint32_t mini_jit::arm_instructions::internal::ld1SingleStructuresPost(const uint32_t Vt, const ld1DataTypes type, const uint32_t index, const uint32_t Xn, const uint32_t imm, const uint32_t Xm)
constexpr uint32_t mini_jit::arm_instructions::internal::_ldpSimdFpPostPreOffset(const uint32_t opcode, const uint32_t Rt1, const uint32_t Rt2, const uint32_t Rn, const int32_t imm7, const ldpSimdFpDataTypes type)
constexpr uint32_t mini_jit::arm_instructions::internal::ldpPost(const uint32_t Rt1, const uint32_t Rt2, const uint32_t Rn, const int32_t imm7, const ldpSimdFpDataTypes type)
constexpr uint32_t mini_jit::arm_instructions::internal::ldpPre(const uint32_t Rt1, const uint32_t Rt2, const uint32_t Rn, const int32_t imm7, const ldpSimdFpDataTypes type)
constexpr uint32_t mini_jit::arm_instructions::internal::ldpOffset(const uint32_t Rt1, const uint32_t Rt2, const uint32_t Rn, const int32_t imm7, const ldpSimdFpDataTypes type)
constexpr uint32_t mini_jit::arm_instructions::internal::_ldrSimdFpGetOpCode(const ldrSimdFpDataTypes type)
constexpr uint32_t mini_jit::arm_instructions::internal::ldrSimdFpImmediatePost(const uint32_t Vt, const uint32_t Rn, const int32_t imm9, const ldrSimdFpDataTypes type)
constexpr uint32_t mini_jit::arm_instructions::internal::ldrSimdFpImmediatePre(const uint32_t Vt, const uint32_t Rn, const int32_t imm9, const ldrSimdFpDataTypes type)
constexpr uint32_t mini_jit::arm_instructions::internal::ldrSimdFpImmediateOffset(const uint32_t Vt, const uint32_t Rn, const uint32_t imm12, const ldrSimdFpDataTypes type)
template<typename T>
constexpr st1Types mini_jit::arm_instructions::internal::_st1ParseType()
template<>
constexpr st1Types mini_jit::arm_instructions::internal::_st1ParseType<VType8x8Bit>()
template<>
constexpr st1Types mini_jit::arm_instructions::internal::_st1ParseType<VType16x8Bit>()
template<>
constexpr st1Types mini_jit::arm_instructions::internal::_st1ParseType<VType4x16Bit>()
template<>
constexpr st1Types mini_jit::arm_instructions::internal::_st1ParseType<VType8x16Bit>()
template<>
constexpr st1Types mini_jit::arm_instructions::internal::_st1ParseType<VType2x32Bit>()
template<>
constexpr st1Types mini_jit::arm_instructions::internal::_st1ParseType<VType4x32Bit>()
template<>
constexpr st1Types mini_jit::arm_instructions::internal::_st1ParseType<VType1x64Bit>()
template<>
constexpr st1Types mini_jit::arm_instructions::internal::_st1ParseType<VType2x64Bit>()
constexpr void mini_jit::arm_instructions::internal::_st1GetQAndSize(const st1Types type, uint32_t &out_q, uint32_t &out_size)
constexpr uint32_t mini_jit::arm_instructions::internal::_st1GetOpCode(uint32_t registerAmount)
constexpr uint32_t mini_jit::arm_instructions::internal::st1MultipleStructures(const uint32_t Vt, const st1Types Tt, const uint32_t Xn, const uint32_t registerAmount)
constexpr uint32_t mini_jit::arm_instructions::internal::st1MultipleStructuresPost(const uint32_t Vt, const st1Types Tt, const uint32_t Xn, const uint32_t imm, const uint32_t Xm, const uint32_t registerAmount)
constexpr uint32_t mini_jit::arm_instructions::internal::st1SingleStructures(const uint32_t Vt, const st1DataTypes type, const uint32_t index, const uint32_t Xn)
constexpr uint32_t mini_jit::arm_instructions::internal::st1SingleStructuresPost(const uint32_t Vt, const st1DataTypes type, const uint32_t index, const uint32_t Xn, const uint32_t imm, const uint32_t Xm)
constexpr uint32_t mini_jit::arm_instructions::internal::_stpSimdFpPostPreOffset(const uint32_t opcode, const uint32_t Rt1, const uint32_t Rt2, const uint32_t Rn, const int32_t imm7, const stpSimdFpDataTypes type)
constexpr uint32_t mini_jit::arm_instructions::internal::stpPost(const uint32_t Rt1, const uint32_t Rt2, const uint32_t Rn, const int32_t imm7, const stpSimdFpDataTypes type)
constexpr uint32_t mini_jit::arm_instructions::internal::stpPre(const uint32_t Rt1, const uint32_t Rt2, const uint32_t Rn, const int32_t imm7, const stpSimdFpDataTypes type)
constexpr uint32_t mini_jit::arm_instructions::internal::stpOffset(const uint32_t Rt1, const uint32_t Rt2, const uint32_t Rn, const int32_t imm7, const stpSimdFpDataTypes type)
constexpr uint32_t mini_jit::arm_instructions::internal::_strSimdFpGetOpCode(const strSimdFpDataTypes type)
constexpr uint32_t mini_jit::arm_instructions::internal::strSimdFpImmediatePost(const uint32_t Vt, const uint32_t Rn, const int32_t imm9, const strSimdFpDataTypes type)
constexpr uint32_t mini_jit::arm_instructions::internal::strSimdFpImmediatePre(const uint32_t Vt, const uint32_t Rn, const int32_t imm9, const strSimdFpDataTypes type)
constexpr uint32_t mini_jit::arm_instructions::internal::strSimdFpImmediateOffset(const uint32_t Vt, const uint32_t Rn, const uint32_t imm12, const strSimdFpDataTypes type)
constexpr uint32_t mini_jit::arm_instructions::internal::_trn1(const uint32_t Vd, const uint32_t Vn, const uint32_t Vm, const trn1SizeType size_type, const trn1QType q_type)
constexpr uint32_t mini_jit::arm_instructions::internal::_trn2(const uint32_t Vd, const uint32_t Vn, const uint32_t Vm, const trn2SizeType size_type, const trn2QType q_type)
constexpr uint32_t mini_jit::arm_instructions::internal::_zip1(const uint32_t Vd, const uint32_t Vn, const uint32_t Vm, const zip1SizeType size_type, const zip1QType q_type)
constexpr uint32_t mini_jit::arm_instructions::internal::_zip2(const uint32_t Vd, const uint32_t Vn, const uint32_t Vm, const zip2SizeType size_type, const zip2QType q_type)

kernels

void mini_jit::kernels::br_matmul_16m_4n_k(mini_jit::Kernel &kernel, const uint32_t m_loop_16, const uint32_t n_loop_4, const uint32_t k_loop, const uint32_t br_size)

Generates a 16*M x 4*N x k matmul kernel.

Parameters:
  • kernel – The kernel to add instructions to.

  • m_loop_16 – The repetitions of the m block of size 16.

  • n_loop_4 – The repetitions of the n block of size 4.

  • k_loop – The loops in the k dimensions.

  • br_size – number of batch dimensions.

void mini_jit::kernels::br_matmul_16m_lt4nRest_k(mini_jit::Kernel &kernel, const uint32_t m_loop_16, const uint32_t n_loop_4, const uint32_t k_loop, const uint32_t br_size, const uint32_t n_loop_rest, const bool use_init_and_end = true)

Generates a 16*M x 4*N + Rest x k matmul kernel.

Parameters:
  • kernel – The kernel to add instructions to.

  • m_loop_16 – The repetitions of the m block of size 16.

  • n_loop_4 – The repetitions of the n block of size 4.

  • k_loop – The loops in the k dimensions.

  • br_size – number of batch dimensions.

  • n_loop_rest – The rest/remainder of the n loop that is not dividable by 4

  • use_init_and_end – Indicates if the procedural call standard, initializing setup and the ret instruction are used. Defaults to true.

void mini_jit::kernels::br_matmul_16mRest_4n_k(mini_jit::Kernel &kernel, const uint32_t m_loop_16, const uint32_t n_loop_4, const uint32_t k_loop, const uint32_t br_size, const uint32_t m_loop_rest, const bool use_init_and_end = true)

Generates a 16*M x 4*N + Rest x k matmul kernel.

Parameters:
  • kernel – The kernel to add instructions to.

  • m_loop_16 – The repetitions of the m block of size 16.

  • n_loop_4 – The repetitions of the n block of size 4.

  • k_loop – The loops in the k dimensions.

  • br_size – number of batch dimensions.

  • m_loop_rest – The rest/remainder of the m loop that is not dividable by 16

  • use_init_and_end – Indicates if the procedural call standard, initializing setup and the ret instruction are used. Defaults to true.

void mini_jit::kernels::br_matmul_16mRest_lt4nRest_k(mini_jit::Kernel &kernel, const uint32_t m_loop_16, const uint32_t n_loop_4, const uint32_t k_loop, const uint32_t br_size, const uint32_t m_loop_rest, const uint32_t n_loop_rest)

Generates a 16*M x 4*N + Rest x k matmul kernel.

Parameters:
  • kernel – The kernel to add instructions to.

  • m_loop_16 – The repetitions of the m block of size 16.

  • n_loop_4 – The repetitions of the n block of size 4.

  • br_size – number of batch dimensions.

  • k_loop – The loops in the k dimensions.

  • m_loop_rest – The rest/remainder of the m loop that is not dividable by 16.

  • n_loop_rest – The rest/remainder of the n loop that is not dividable by 4.

void mini_jit::kernels::br_matmul_lt16_4n_k(mini_jit::Kernel &kernel, const uint32_t n_loop_4, const uint32_t k_loop, const uint32_t br_size, const uint32_t m_loop_rest, const bool use_init_and_end = true)

Generates a 16*M x 4*N + Rest x k matmul kernel.

Parameters:
  • kernel – The kernel to add instructions to.

  • n_loop_4 – The repetitions of the n block of size 4.

  • k_loop – The loops in the k dimensions.

  • br_size – number of batch dimensions.

  • m_loop_rest – The rest/remainder of the m loop that is not dividable by 16.

  • use_init_and_end – Indicates if the procedural call standard, initializing setup and the ret instruction are used. Defaults to true.

void mini_jit::kernels::br_matmul_lt16_lt4nRest_k(mini_jit::Kernel &kernel, const uint32_t n_loop_4, const uint32_t k_loop, const uint32_t br_size, const uint32_t m_loop_rest, const uint32_t n_loop_rest)

Generates a 16*M x 4*N + Rest x k matmul kernel.

Parameters:
  • kernel – The kernel to add instructions to.

  • n_loop_4 – The repetitions of the n block of size 4.

  • k_loop – The loops in the k dimensions.

  • br_size – number of batch dimensions.

  • m_loop_rest – The rest/remainder of the m loop that is not dividable by 16.

  • n_loop_rest – The rest/remainder of the n loop that is not dividable by 4.

void mini_jit::kernels::matmul_16_6_1(mini_jit::Kernel &kernel)

Generates a 16 x 6 x 1 matmul kernel.

Parameters:

kernel – The kernel to add instructions to.

void mini_jit::kernels::matmul_16_6_k(mini_jit::Kernel &kernel, const uint32_t k_loop)

Generates a 16 x 6 x k matmul kernel.

Parameters:
  • kernel – The kernel to add instructions to.

  • k_loop – The loop over the k dimension.

void mini_jit::kernels::matmul_16m_4n_k(mini_jit::Kernel &kernel, const uint32_t m_loop_16, const uint32_t n_loop_4, const uint32_t k_loop)

Generates a 16*M x 4*N x k matmul kernel.

Parameters:
  • kernel – The kernel to add instructions to.

  • m_loop_16 – The repetitions of the m block of size 16.

  • n_loop_4 – The repetitions of the n block of size 4.

  • k_loop – The loops in the k dimensions.

void mini_jit::kernels::matmul_16m_lt4nRest_k(mini_jit::Kernel &kernel, const uint32_t m_loop_16, const uint32_t n_loop_4, const uint32_t k_loop, const uint32_t n_loop_rest, const bool use_init_and_end = true)

Generates a 16*M x 4*N + Rest x k matmul kernel.

Parameters:
  • kernel – The kernel to add instructions to.

  • m_loop_16 – The repetitions of the m block of size 16.

  • n_loop_4 – The repetitions of the n block of size 4.

  • k_loop – The loops in the k dimensions.

  • n_loop_rest – The rest/remainder of the n loop that is not dividable by 4

  • use_init_and_end – Indicates if the procedural call standard, initializing setup and the ret instruction are used. Defaults to true.

void mini_jit::kernels::matmul_16mRest_4n_k(mini_jit::Kernel &kernel, const uint32_t m_loop_16, const uint32_t n_loop_4, const uint32_t k_loop, const uint32_t m_loop_rest, const bool use_init_and_end = true)

Generates a 16*M x 4*N + Rest x k matmul kernel.

Parameters:
  • kernel – The kernel to add instructions to.

  • m_loop_16 – The repetitions of the m block of size 16.

  • n_loop_4 – The repetitions of the n block of size 4.

  • k_loop – The loops in the k dimensions.

  • m_loop_rest – The rest/remainder of the m loop that is not dividable by 16

  • use_init_and_end – Indicates if the procedural call standard, initializing setup and the ret instruction are used. Defaults to true.

void mini_jit::kernels::matmul_16mRest_lt4nRest_k(mini_jit::Kernel &kernel, const uint32_t m_loop_16, const uint32_t n_loop_4, const uint32_t k_loop, const uint32_t m_loop_rest, const uint32_t n_loop_rest)

Generates a 16*M x 4*N + Rest x k matmul kernel.

Parameters:
  • kernel – The kernel to add instructions to.

  • m_loop_16 – The repetitions of the m block of size 16.

  • n_loop_4 – The repetitions of the n block of size 4.

  • k_loop – The loops in the k dimensions.

  • m_loop_rest – The rest/remainder of the m loop that is not dividable by 16.

  • n_loop_rest – The rest/remainder of the n loop that is not dividable by 4.

void mini_jit::kernels::matmul_lt16_4n_k(mini_jit::Kernel &kernel, const uint32_t n_loop_4, const uint32_t k_loop, const uint32_t m_loop_rest, const bool use_init_and_end = true)

Generates a 16*M x 4*N + Rest x k matmul kernel.

Parameters:
  • kernel – The kernel to add instructions to.

  • n_loop_4 – The repetitions of the n block of size 4.

  • k_loop – The loops in the k dimensions.

  • m_loop_rest – The rest/remainder of the m loop that is not dividable by 16.

  • use_init_and_end – Indicates if the procedural call standard, initializing setup and the ret instruction are used. Defaults to true.

void mini_jit::kernels::matmul_lt16_lt4nRest_k(mini_jit::Kernel &kernel, const uint32_t n_loop_4, const uint32_t k_loop, const uint32_t m_loop_rest, const uint32_t n_loop_rest)

Generates a 16*M x 4*N + Rest x k matmul kernel.

Parameters:
  • kernel – The kernel to add instructions to.

  • n_loop_4 – The repetitions of the n block of size 4.

  • k_loop – The loops in the k dimensions.

  • m_loop_rest – The rest/remainder of the m loop that is not dividable by 16.

  • n_loop_rest – The rest/remainder of the n loop that is not dividable by 4.

void mini_jit::kernels::unary_identity(mini_jit::Kernel &kernel, const uint32_t m_loop, const uint32_t n_loop)

Generates a M x N unary identity kernel.

Parameters:
  • kernel – The kernel to add instructions too.

  • m_loop – The repetitions of the m dimensions.

  • n_loop – The repetitions of the n dimensions.

void mini_jit::kernels::unary_identity_transpose(mini_jit::Kernel &kernel, const uint32_t m_loop, const uint32_t n_loop)

Generates a M x N unary identity transpose kernel.

Parameters:
  • kernel – The kernel to add instructions too.

  • m_loop – The repetitions of the m dimensions.

  • n_loop – The repetitions of the n dimensions.

void mini_jit::kernels::unary_relu(mini_jit::Kernel &kernel, const uint32_t m_loop, const uint32_t n_loop)

Generates a M x N unary identity kernel.

Parameters:
  • kernel – The kernel to add instructions too.

  • m_loop – The repetitions of the m dimensions.

  • n_loop – The repetitions of the n dimensions.

void mini_jit::kernels::unary_relu_transpose(mini_jit::Kernel &kernel, const uint32_t m_loop, const uint32_t n_loop)

Generates a M x N unary identity kernel.

Parameters:
  • kernel – The kernel to add instructions too.

  • m_loop – The repetitions of the m dimensions.

  • n_loop – The repetitions of the n dimensions.

void mini_jit::kernels::unary_zero(mini_jit::Kernel &kernel, const uint32_t m_loop_4, const uint32_t n_loop, const uint32_t m_rest)

Generates a M x 4*N unary zero kernel.

Parameters:
  • kernel – The kernel to add instructions too.

  • m_loop_4 – The repetitions of the m block of size 4.

  • n_loop – The repetitions of the n block of size 4.

  • m_rest – The reminder of the m repetitions.

  • use_init_and_end – Indicates if the procedural call standard, initializing setup and the ret instruction are used. Defaults to

void mini_jit::kernels::unary_zero_16m_n(mini_jit::Kernel &kernel, const uint32_t m_loop_16, const uint32_t n_loop, const bool use_init_and_end = true)

Generates a M x 4*N unary zero kernel.

Parameters:
  • kernel – The kernel to add instructions too.

  • m_loop_16 – The repetitions of the m block of size.

  • n_loop – The repetitions of the n block of size 4.

  • use_init_and_end – Indicates if the procedural call standard, initializing setup and the ret instruction are used. Defaults to