From b239c3304c66767077956140f8671299d0d8e144 Mon Sep 17 00:00:00 2001 From: Chunseok Lee Date: Fri, 27 Dec 2024 15:17:39 +0900 Subject: [PATCH 1/4] [onert-micro] Enable UnidirectionalSequenceLSTM ONE-DCO-1.0-Signed-off-by: Chunseok Lee --- .../include/execute/OMRuntimeKernel.h | 2 +- .../kernels/UnidirectionalSequenceLSTM.h | 180 ++++++ .../PALUnidirectionalSequenceLSTMCommon.h | 597 ++++++++++++++++++ .../include/pal/mcu/KernelsToBuild.lst | 2 +- .../pal/mcu/PALUnidirectionalSequenceLSTM.h | 23 + .../kernels/UnidirectionalSequenceLSTM.cpp | 389 ++++++++++++ .../tests/UnidirectionalSequenceLSTM.test.cpp | 0 .../kernels/UnidirectionalSequenceLSTM.cpp | 49 ++ 8 files changed, 1240 insertions(+), 2 deletions(-) create mode 100644 onert-micro/onert-micro/include/execute/kernels/UnidirectionalSequenceLSTM.h create mode 100644 onert-micro/onert-micro/include/pal/common/PALUnidirectionalSequenceLSTMCommon.h create mode 100644 onert-micro/onert-micro/include/pal/mcu/PALUnidirectionalSequenceLSTM.h create mode 100644 onert-micro/onert-micro/src/execute/kernels/UnidirectionalSequenceLSTM.cpp create mode 100644 onert-micro/onert-micro/src/execute/kernels/tests/UnidirectionalSequenceLSTM.test.cpp create mode 100644 onert-micro/onert-micro/src/import/kernels/UnidirectionalSequenceLSTM.cpp diff --git a/onert-micro/onert-micro/include/execute/OMRuntimeKernel.h b/onert-micro/onert-micro/include/execute/OMRuntimeKernel.h index e33239f7256..50814267d89 100644 --- a/onert-micro/onert-micro/include/execute/OMRuntimeKernel.h +++ b/onert-micro/onert-micro/include/execute/OMRuntimeKernel.h @@ -23,7 +23,7 @@ #include -constexpr static uint32_t maxInputSize = 6; +constexpr static uint32_t maxInputSize = 24; // was 6, but lstm takes 24 inputs constexpr static uint32_t maxOutputSize = 5; namespace onert_micro diff --git a/onert-micro/onert-micro/include/execute/kernels/UnidirectionalSequenceLSTM.h b/onert-micro/onert-micro/include/execute/kernels/UnidirectionalSequenceLSTM.h new file mode 100644 index 00000000000..ff3672a62ea --- /dev/null +++ b/onert-micro/onert-micro/include/execute/kernels/UnidirectionalSequenceLSTM.h @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ONERT_MICRO_KERNELS_UNIDIRECTIONAL_SEQUENCE_LSTM_H +#define ONERT_MICRO_KERNELS_UNIDIRECTIONAL_SEQUENCE_LSTM_H + +#include "OMStatus.h" + +#include "core/OMUtils.h" +#include "core/OMKernelData.h" +#include "core/OMDataType.h" + +#include "execute/OMKernelExecutionBuilder.h" +#include "execute/OMUtils.h" +#include "execute/OMRuntimeKernel.h" + +using namespace onert_micro::core; +using namespace onert_micro::execute; + +namespace +{ + +int dim(const circle::Tensor *x, int index) +{ + onert_micro::core::OMRuntimeShape shape(x); + return shape.dims(index); +} + +int num_elements(const circle::Tensor *x) +{ + onert_micro::core::OMRuntimeShape shape(x); + return shape.flatSize(); +} + +int num_dims(const circle::Tensor *x) +{ + onert_micro::core::OMRuntimeShape shape(x); + return shape.dimensionsCount(); +} + +} // namespace +namespace onert_micro +{ +namespace lstm +{ + +struct LSTMStruct +{ + LSTMStruct() = delete; + LSTMStruct(const LSTMStruct &) = delete; + + explicit LSTMStruct(const OMExecuteArgs &execute_args) + { + core::OMRuntimeContext &runtime_context = execute_args.runtime_context; + core::OMRuntimeStorage &runtime_storage = execute_args.runtime_storage; + uint16_t op_index = execute_args.kernel_index; + + execute::OMRuntimeKernel runtime_kernel; + runtime_kernel.readKernel(op_index, runtime_context); + + for (int i; i < 24; i++) + { + internal_tensors[i] = runtime_kernel.inputs[i]; + } + + output_internal = runtime_kernel.outputs[0]; + + options = runtime_kernel.first_operator->builtin_options_as_UnidirectionalSequenceLSTMOptions(); + } + + void validateTensorTypes() + { + assert(input()->type() == (output_state()->type())); + assert(output()->type() == (input()->type())); + + for (int32_t i = 1; i < 9; ++i) + { + assert(internal_tensors[i] == nullptr or + (input_to_forget_weights()->type()) == (internal_tensors[i])->type()); + } + + for (int32_t i = 12; i < 16; ++i) + { + assert(internal_tensors[i] == nullptr or + (forget_gate_bias()->type()) == (internal_tensors[i]->type())); + } + } + + const circle::Tensor *input() { return internal_tensors[0]; }; + + const circle::Tensor *input_to_input_weights() { return internal_tensors[1]; }; + const circle::Tensor *input_to_forget_weights() { return internal_tensors[2]; }; + const circle::Tensor *input_to_cell_weights() { return internal_tensors[3]; }; + const circle::Tensor *input_to_output_weights() { return internal_tensors[4]; }; + + const circle::Tensor *recurrent_to_input_weights() { return internal_tensors[5]; }; + const circle::Tensor *recurrent_to_forget_weights() { return internal_tensors[6]; }; + const circle::Tensor *recurrent_to_cell_weights() { return internal_tensors[7]; }; + const circle::Tensor *recurrent_to_output_weights() { return internal_tensors[8]; }; + + const circle::Tensor *cell_to_input_weights() { return internal_tensors[9]; }; + const circle::Tensor *cell_to_forget_weights() { return internal_tensors[10]; }; + const circle::Tensor *cell_to_output_weights() { return internal_tensors[11]; }; + + const circle::Tensor *input_gate_bias() { return internal_tensors[12]; }; + const circle::Tensor *forget_gate_bias() { return internal_tensors[13]; }; + const circle::Tensor *cell_gate_bias() { return internal_tensors[14]; }; + const circle::Tensor *output_gate_bias() { return internal_tensors[15]; }; + + const circle::Tensor *projection_weights() { return internal_tensors[16]; }; + const circle::Tensor *projection_bias() { return internal_tensors[17]; }; + + const circle::Tensor *output_state() { return internal_tensors[18]; }; + const circle::Tensor *cell_state() { return internal_tensors[19]; }; + + const circle::Tensor *input_layer_norm_coefficients() { return internal_tensors[20]; }; + const circle::Tensor *forget_layer_norm_coefficients() { return internal_tensors[21]; }; + const circle::Tensor *cell_layer_norm_coefficients() { return internal_tensors[22]; }; + const circle::Tensor *output_layer_norm_coefficients() { return internal_tensors[23]; }; + const circle::Tensor *output() { return output_internal; }; + + const circle::UnidirectionalSequenceLSTMOptions *options; + + const circle::Tensor *get_internal_tensor(int i) { return internal_tensors[i]; } + +private: + const circle::Tensor *output_internal; + const circle::Tensor *internal_tensors[24]; +}; + +struct GateParameters +{ + FullyConnectedParams input_fc_params; + FullyConnectedParams recurrent_fc_params; +}; + +struct InterGateParameters +{ + BinaryArithmeticBroadcastParams forget_cell_mul_params; + BinaryArithmeticBroadcastParams input_mul_params; + BinaryArithmeticBroadcastParams output_mul_params; +}; + +struct CellStateInfo +{ + float cell_clip; + // clipping range for cell state only 16 bits cell is supported (could be + // generalized through templatation) + int16_t quantized_cell_clip; + // 2^-cell_state_scale_power = cell state scale, required by integer tanh + // computation + int32_t cell_state_scale_power; +}; + +struct LSTMParameters +{ + GateParameters forget_gate_parameters; + GateParameters input_gate_parameters; + GateParameters cell_gate_parameters; + GateParameters output_gate_parameters; + InterGateParameters inter_gate_parameters; +}; + +} // namespace lstm +} // namespace onert_micro + +#endif // ONERT_MICRO_KERNELS_UNIDIRECTIONAL_SEQUENCE_LSTM_H diff --git a/onert-micro/onert-micro/include/pal/common/PALUnidirectionalSequenceLSTMCommon.h b/onert-micro/onert-micro/include/pal/common/PALUnidirectionalSequenceLSTMCommon.h new file mode 100644 index 00000000000..399b97984c5 --- /dev/null +++ b/onert-micro/onert-micro/include/pal/common/PALUnidirectionalSequenceLSTMCommon.h @@ -0,0 +1,597 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ONERT_MICRO_PAL_COMMON_UNIDIRECTIONAL_SEQUENCE_LSTM_COMMON_H +#define ONERT_MICRO_PAL_COMMON_UNIDIRECTIONAL_SEQUENCE_LSTM_COMMON_H + +#include "core/OMUtils.h" +#include "execute/kernels/UnidirectionalSequenceLSTM.h" +#include "PALTanh.h" +#include "PALLogistic.h" +#include "PALFullyConnected.h" +#include "PALMul.h" +#include "PALUtils.h" + +using namespace onert_micro::core::utils; +using namespace onert_micro::execute::pal; + +namespace onert_micro +{ +namespace lstm_internal +{ +namespace +{ +// Possible fused activation functions. +typedef enum +{ + kTfLiteActNone = 0, + kTfLiteActRelu, + kTfLiteActReluN1To1, // min(max(-1, x), 1) + kTfLiteActRelu6, // min(max(0, x), 6) + kTfLiteActTanh, + kTfLiteActSignBit, + kTfLiteActSigmoid, +} FusedActivation; + +} // namespace + +// #ifndef DIS_QUANT + +// template +// void mulElementwise(int size, const ArithmeticQuantParams *params, const InputType *input1_data, +// const InputType *input2_data, OutputType *output_data) +// { +// for (int i = 0; i < size; ++i) +// { +// const int32_t input1_val = params->input1_offset + input1_data[i]; +// const int32_t input2_val = params->input2_offset + input2_data[i]; +// const int32_t unclamped_result = +// params->output_offset + multiplyByQuantizedMultiplier(input1_val * input2_val, +// params->output_multiplier, +// params->output_shift); +// const int32_t clamped_output = +// std::min(params->quantized_activation_max, +// std::max(params->quantized_activation_min, unclamped_result)); +// output_data[i] = static_cast(clamped_output); +// } +// } + +// // Input and output have the same shape in LSTM +// void mul(const onert_micro::RuntimeShape &shape, const ArithmeticQuantParams *params, +// const int16_t *input1_data, const int16_t *input2_data, int8_t *output_data) +// { +// return mulElementwise(shape.flatSize(), params, input1_data, input2_data, +// output_data); +// } + +// // Input and output have the same shape in LSTM +// void mul(const onert_micro::RuntimeShape &shape, const ArithmeticQuantParams *params, +// const int16_t *input1_data, const int16_t *input2_data, int16_t *output_data) +// { +// return mulElementwise(shape.flatSize(), params, input1_data, input2_data, output_data); +// } + +// void addElementWise(const int16_t *input_1, const int16_t *input_2, int n_batch, int n_input, +// int16_t *output) +// { +// for (int batch = 0; batch < n_batch; ++batch) +// { +// for (int i = 0; i < n_input; ++i) +// { +// const int index = batch * n_input + i; +// int32_t sum = input_1[index] + input_2[index]; +// const int32_t sum_clamped = +// std::min(static_cast(std::numeric_limits::max()), +// std::max(static_cast(std::numeric_limits::min()), sum)); +// output[index] = static_cast(sum_clamped); +// } +// } +// } + +// void tanh(int32_t cell_state_scale_power, const onert_micro::RuntimeShape &input_data_shape, +// int16_t *input_data, const onert_micro::RuntimeShape &output_data_shape, +// int16_t *output_data) +// { +// int32_t tanh_input_left_shift = (15 + cell_state_scale_power) - 3; +// int32_t input_multiplier = 0; +// if (tanh_input_left_shift < 0) /* handling negative shift value */ +// { +// tanh_input_left_shift = -tanh_input_left_shift; +// input_multiplier = 3; +// } +// const int flat_size = input_data_shape.flatSize(); +// onert_micro_pal::Tanh(input_multiplier, tanh_input_left_shift, flat_size, input_data, +// output_data); +// } + +// void sigmoid(const onert_micro::RuntimeShape &data_shape, int16_t *data) +// { +// onert_micro_pal::Logistic(0, 0, data_shape.flatSize(), data, data); +// } + +// void clipping(const int v_size, const onert_micro::lstm::CellStateInfo *cell_state_info, +// int16_t *vector) +// { +// for (int i = 0; i < v_size; i++) +// { +// vector[i] = std::max(std::min(cell_state_info->quantized_cell_clip, vector[i]), +// static_cast(-cell_state_info->quantized_cell_clip)); +// } +// } +// #endif // DIS_QUANT + +#ifndef DIS_FLOAT +// Input and output have the same shape in LSTM +void mul(const onert_micro::core::OMRuntimeShape &shape, + const BinaryArithmeticBroadcastParams *params, const float *input1_data, + const float *input2_data, float *output_data) +{ + const int flat_size = shape.flatSize(); + onert_micro::execute::pal::Mul(*params, flat_size, input1_data, input2_data, output_data); +} + +void addElementWise(const float *input_1, const float *input_2, int n_batch, int n_input, + float *output) +{ + for (int batch = 0; batch < n_batch; ++batch) + { + for (int i = 0; i < n_input; ++i) + { + const int index = batch * n_input + i; + output[index] = input_1[index] + input_2[index]; + } + } +} + +void tanh(int32_t, const onert_micro::core::OMRuntimeShape &input_data_shape, float *input_data, + const onert_micro::core::OMRuntimeShape &output_data_shape, float *output_data) +{ + onert_micro::execute::pal::Tanh(input_data_shape, input_data, output_data_shape, output_data); +} + +void sigmoid(const onert_micro::core::OMRuntimeShape &data_shape, float *data) +{ + const int flat_size = data_shape.flatSize(); + onert_micro::execute::pal::Logistic(flat_size, data, data); +} + +void clipping(const int v_size, const onert_micro::lstm::CellStateInfo *cell_state_info, + float *vector) +{ + for (int i = 0; i < v_size; i++) + { + vector[i] = + std::max(std::min(cell_state_info->cell_clip, vector[i]), -cell_state_info->cell_clip); + } +} +#endif // DIS_FLOAT + +// Size information about the LSTM kernel, which is deduced from tensors stored +// in the flat buffer file. +struct LstmSizeInfo +{ + bool time_major; + int32_t batch_size; + int32_t time_steps; + int32_t input_dimension; + int32_t state_dimension; +}; + +class LstmStepManager +{ +public: + LstmStepManager() = delete; + // Does not take any ownership, and all pointers must refer to valid objects + // that outlive the one constructed. + explicit LstmStepManager(const LstmSizeInfo &size_info) : size_info_(size_info) {} + + void updateTime() + { + current_time_ += 1; + // default as one batch per inference + int input_step = size_info_.input_dimension; + int output_step = size_info_.state_dimension; + // time major: batch inference + if (size_info_.time_major) + { + input_step = input_step * size_info_.batch_size; + output_step = output_step * size_info_.batch_size; + } + + input_offset_ += input_step; + output_offset_ += output_step; + } + + void updateBatch() + { + current_batch_ += 1; + // batch inference for time major: no action needed + if (size_info_.time_major) + { + return; + } + // otherwise: singe batch inference, go to the next batch + hidden_state_offset_ += size_info_.state_dimension; + cell_state_offset_ += size_info_.state_dimension; + } + + void resetTime() { current_time_ = 0; } + + onert_micro::core::OMRuntimeShape inputShape() const + { + int batch_size = 1; + if (size_info_.time_major) + { + batch_size = size_info_.batch_size; + } + const int dims[2] = {batch_size, size_info_.input_dimension}; + const int32_t *dims_data = reinterpret_cast(dims); + return onert_micro::core::OMRuntimeShape(2, dims_data); + } + + onert_micro::core::OMRuntimeShape stateShape() const + { + int batch_size = 1; + if (size_info_.time_major) + { + batch_size = size_info_.batch_size; + } + const int dims[2] = {batch_size, size_info_.state_dimension}; + const int32_t *dims_data = reinterpret_cast(dims); + return onert_micro::core::OMRuntimeShape(2, dims_data); + } + + int inputOffset() const { return input_offset_; } + + int outputOffset() const { return output_offset_; } + + int hiddenStateOffset() const { return hidden_state_offset_; } + + int cellStateOffset() const { return cell_state_offset_; } + +private: + int32_t current_time_ = 0; + int32_t current_batch_ = 0; + int32_t input_offset_ = 0; + int32_t output_offset_ = 0; + int32_t hidden_state_offset_ = 0; + int32_t cell_state_offset_ = 0; + + const LstmSizeInfo &size_info_; +}; + +// Calculates a single LSTM gate. +// Implements the following formula: +// gate = activate(FC(input) + FC(recurrent)) +// Activation is sigmoid except for the "cell" gate (configurable, usually tanh) +template +void calculateLstmGate(const LstmStepManager *step_info, + const onert_micro::lstm::GateParameters *gate_params, + // Input FC + ActivationType *input_data, const WeightType *input_weight_data, + const OMRuntimeShape &input_weight_shape, const BiasType *input_bias_data, + // Recurrent FC + ActivationType *recurrent_data, const WeightType *recurrent_weight_data, + const OMRuntimeShape &recurrent_weight_shape, + const BiasType *recurrent_bias_data, + // Output + CellType *gate_output, + // Scratch arrays + CellType *fc_output_buffer, const FusedActivation activation) +{ + // Input FC + const auto gate_output_shape = step_info->stateShape(); + { + FullyConnectedParams op_params{}; + // op_params.input_offset = gate_params->input_fc_params.input_offset; + // op_params.weights_offset = gate_params->input_fc_params.weights_offset; + // op_params.output_offset = gate_params->input_fc_params.output_offset; + // op_params.output_multiplier = gate_params->input_fc_params.output_multiplier; + // op_params.output_shift = gate_params->input_fc_params.output_shift; + // op_params.quantized_activation_min = gate_params->input_fc_params.quantized_activation_min; + // op_params.quantized_activation_max = gate_params->input_fc_params.quantized_activation_max; + op_params.float_activation_max = gate_params->input_fc_params.float_activation_max; + op_params.float_activation_min = gate_params->input_fc_params.float_activation_min; + + FullyConnected(op_params, input_data + step_info->inputOffset(), input_weight_shape, + input_weight_data, input_bias_data, gate_output_shape, gate_output); + } + + // Recurrent FC + { + FullyConnectedParams op_params{}; + // op_params.input_offset = gate_params->recurrent_fc_params.input_offset; + // op_params.weights_offset = gate_params->recurrent_fc_params.weights_offset; + // op_params.output_offset = gate_params->recurrent_fc_params.output_offset; + // op_params.output_multiplier = gate_params->recurrent_fc_params.output_multiplier; + // op_params.output_shift = gate_params->recurrent_fc_params.output_shift; + // op_params.quantized_activation_min = + // gate_params->recurrent_fc_params.quantized_activation_min; op_params.quantized_activation_max + // = gate_params->recurrent_fc_params.quantized_activation_max; + op_params.float_activation_max = gate_params->recurrent_fc_params.float_activation_max; + op_params.float_activation_min = gate_params->recurrent_fc_params.float_activation_min; + + FullyConnected(op_params, recurrent_data + step_info->hiddenStateOffset(), + recurrent_weight_shape, recurrent_weight_data, recurrent_bias_data, + gate_output_shape, fc_output_buffer); + + addElementWise(gate_output, fc_output_buffer, /*n_batch=*/gate_output_shape.dimsData()[0], + /*n_state=*/gate_output_shape.dimsData()[1], gate_output); + + switch (activation) + { + case FusedActivation::kTfLiteActSigmoid: + sigmoid(gate_output_shape, gate_output); + break; + case FusedActivation::kTfLiteActTanh: + { + // Set the scale power to -12 to avoid shift + tanh(/*cell_state_scale_power=*/-12, gate_output_shape, gate_output, gate_output_shape, + gate_output); + } + break; + default: + // Only Sigmoid or Tanh is used. + assert(false && "Only Sigmoid or Tanh is used"); + } + } +} + +// Update the hidden state of the LSTM kernel using the following formula: +// updated_hidden_state = Tanh(updated_cell_state) * output_gate_output, * means +// element wise multiplication +template +void updateLstmHidden(const LstmStepManager *step_info, CellType *cell_state_data_base, + ActivationType *hidden_state_data, const CellType *output_gate_output, + const BinaryArithmeticBroadcastParams *mul_params, + int32_t cell_state_scale_power, CellType *buffer) +{ + auto cell_state_shape = step_info->stateShape(); + CellType *cell_state_data = cell_state_data_base + step_info->cellStateOffset(); + // Tanh(cell_state) + tanh(cell_state_scale_power, cell_state_shape, cell_state_data, cell_state_shape, buffer); + // Update the hidden state + mul(cell_state_shape, mul_params, buffer, output_gate_output, + hidden_state_data + step_info->hiddenStateOffset()); +} + +// Update the cell state using the output from the forget gate, input gate, and +// cell gate Formula: updated_cell_state = forget_gate_output*cell_state + +// input_gate_output * cell_gate_output, where * denotes element wise +// multiplication +template +void updateLstmCell(const LstmStepManager *step_info, CellType *cell_state_data, + // Gate outputs + CellType *forget_gate_output, const CellType *input_gate_output, + const CellType *cell_gate_output, + // Mul parameters + const BinaryArithmeticBroadcastParams &forget_cell_mul_params, + const BinaryArithmeticBroadcastParams &input_mul_params, + const onert_micro::lstm::CellStateInfo *cell_state_info, CellType *buffer) +{ + auto cell_state_shape = step_info->stateShape(); + // Forget Gate x Cell State + mul(cell_state_shape, &forget_cell_mul_params, forget_gate_output, + cell_state_data + step_info->cellStateOffset(), + cell_state_data + step_info->cellStateOffset()); + // Input Gate x Cell Gate + mul(cell_state_shape, &input_mul_params, input_gate_output, cell_gate_output, buffer); + + // Update the cell state + addElementWise(cell_state_data + step_info->cellStateOffset(), buffer, + /*n_batch=*/cell_state_shape.dimsData()[0], + /*n_state=*/cell_state_shape.dimsData()[1], + cell_state_data + step_info->cellStateOffset()); + + if (cell_state_info->cell_clip > 0) + { + clipping(cell_state_shape.flatSize(), cell_state_info, + cell_state_data + step_info->cellStateOffset()); + } +} + +template +void lstmStep(onert_micro::lstm::LSTMStruct *lstm_struct, + onert_micro::lstm::LSTMParameters *lstm_params, LstmStepManager *step_info, + onert_micro::lstm::CellStateInfo *cell_state_info, ActivationType *output_state_data, + CellType *cell_state_data, CellType *scratch0, CellType *scratch1, CellType *scratch2, + CellType *scratch3, const OMExecuteArgs &execute_args) +{ + core::OMRuntimeContext &runtime_context = execute_args.runtime_context; + core::OMRuntimeStorage &runtime_storage = execute_args.runtime_storage; + uint16_t op_index = execute_args.kernel_index; + execute::OMRuntimeKernel runtime_kernel; + runtime_kernel.readKernel(op_index, runtime_context); + runtime_kernel.getDataFromStorage(op_index, runtime_storage, runtime_context); + + const WeightType *input_to_forget_weights_data = + core::utils::castInputData(runtime_kernel.inputs_data[2]); + const OMRuntimeShape input_to_forget_weights_shape(runtime_kernel.inputs[2]); + const BiasType *forget_gate_bias_data = + core::utils::castInputData(runtime_kernel.inputs_data[13]); + const WeightType *recurrent_to_forget_weights_data = + core::utils::castInputData(runtime_kernel.inputs_data[6]); + const OMRuntimeShape recurrent_to_forget_weights_shape(runtime_kernel.inputs[6]); + + /*Step1: Calculate gate outputs to prepare cell state update*/ + CellType *gate_internal_buffer = scratch3; + CellType *forget_gate_output = scratch0; + + ActivationType *input_data = + core::utils::castOutputData(runtime_kernel.inputs_data[0]); + + calculateLstmGate( + step_info, &lstm_params->forget_gate_parameters, + // Input FC + input_data, input_to_forget_weights_data, input_to_forget_weights_shape, forget_gate_bias_data, + // Recurrent FC + output_state_data, recurrent_to_forget_weights_data, recurrent_to_forget_weights_shape, nullptr, + // Output + forget_gate_output, gate_internal_buffer, FusedActivation::kTfLiteActSigmoid); + + // Input Gate calculation; + const WeightType *input_to_input_weights_data = + core::utils::castInputData(runtime_kernel.inputs_data[1]); + const OMRuntimeShape input_to_input_weights_shape(runtime_kernel.inputs[1]); + const BiasType *input_gate_bias_data = + core::utils::castInputData(runtime_kernel.inputs_data[12]); + const WeightType *recurrent_to_input_weights_data = + core::utils::castInputData(runtime_kernel.inputs_data[5]); + const OMRuntimeShape recurrent_to_input_weights_shape(runtime_kernel.inputs[5]); + + CellType *input_gate_output = scratch1; + calculateLstmGate( + step_info, &lstm_params->input_gate_parameters, + // Input FC + input_data, input_to_input_weights_data, input_to_input_weights_shape, input_gate_bias_data, + // Recurrent FC + output_state_data, recurrent_to_input_weights_data, recurrent_to_input_weights_shape, + /*recurrent_bias*/ nullptr, + // Output + input_gate_output, + // Scratch arrays + gate_internal_buffer, FusedActivation::kTfLiteActSigmoid); + + // Cell Gate calculation + const WeightType *input_to_cell_weights_data = + core::utils::castInputData(runtime_kernel.inputs_data[3]); + const OMRuntimeShape input_to_cell_weights_shape(runtime_kernel.inputs[3]); + const BiasType *cell_gate_bias_data = + core::utils::castInputData(runtime_kernel.inputs_data[14]); + const WeightType *recurrent_to_cell_weights_data = + core::utils::castInputData(runtime_kernel.inputs_data[7]); + const OMRuntimeShape recurrent_to_cell_weights_shape(runtime_kernel.inputs[7]); + + CellType *cell_gate_output = scratch2; + calculateLstmGate( + step_info, &lstm_params->cell_gate_parameters, + // Input FC + input_data, input_to_cell_weights_data, input_to_cell_weights_shape, cell_gate_bias_data, + // Recurrent FC + output_state_data, recurrent_to_cell_weights_data, recurrent_to_cell_weights_shape, + /*recurrent_bias*/ nullptr, + // Output + cell_gate_output, + // Scratch arrays + gate_internal_buffer, FusedActivation::kTfLiteActTanh); + + /*Step2: update the cell state */ + { + // const InterGateParameters& inter_gate_params = op_data.inter_gate_parameters; + CellType *updated_input_buffer = scratch1; // reuse buffer + + updateLstmCell( + step_info, cell_state_data, forget_gate_output, input_gate_output, cell_gate_output, + lstm_params->inter_gate_parameters.forget_cell_mul_params, + lstm_params->inter_gate_parameters.input_mul_params, cell_state_info, updated_input_buffer); + } + + { + /*Step3: update the hidden state */ + const WeightType *input_to_output_weights_data = + core::utils::castInputData(runtime_kernel.inputs_data[4]); + const OMRuntimeShape input_to_output_weights_shape(runtime_kernel.inputs[4]); + const BiasType *output_gate_bias_data = + core::utils::castInputData(runtime_kernel.inputs_data[15]); + const WeightType *recurrent_to_output_weights_data = + core::utils::castInputData(runtime_kernel.inputs_data[8]); + const OMRuntimeShape recurrent_to_output_weights_shape(runtime_kernel.inputs[8]); + + CellType *output_gate_output = scratch1; // reuse buffer + calculateLstmGate( + step_info, &lstm_params->output_gate_parameters, + // Input FC + input_data, input_to_output_weights_data, input_to_output_weights_shape, + output_gate_bias_data, + // Recurrent FC + output_state_data, recurrent_to_output_weights_data, recurrent_to_output_weights_shape, + nullptr, + // Output + output_gate_output, + // Scratch arrays + gate_internal_buffer, FusedActivation::kTfLiteActSigmoid); + CellType *tanh_activated_cell_buffer = scratch0; // reuse buffer + updateLstmHidden( + step_info, cell_state_data, output_state_data, output_gate_output, + &lstm_params->inter_gate_parameters.output_mul_params, + cell_state_info->cell_state_scale_power, tanh_activated_cell_buffer); + + ActivationType *output_ptr = + core::utils::castOutputData(runtime_kernel.outputs_data[0]); + std::memcpy(output_ptr + step_info->outputOffset(), + output_state_data + step_info->hiddenStateOffset(), + step_info->stateShape().flatSize() * sizeof(ActivationType)); + } +} + +} // namespace lstm_internal + +// Evaluate the LSTM kernel with (potential) multi-steps and multi-batch input +template +void evalLSTM(onert_micro::lstm::LSTMStruct *lstm_struct, + onert_micro::lstm::LSTMParameters *lstm_params, + onert_micro::lstm::CellStateInfo *cell_state_info, ActivationType *output_state_data, + CellType *cell_state_data, CellType *scratch0, CellType *scratch1, CellType *scratch2, + CellType *scratch3, const OMExecuteArgs &execute_args) +{ + lstm_internal::LstmSizeInfo size_info; + + size_info.time_major = lstm_struct->options->time_major(); + size_info.batch_size = + size_info.time_major ? dim(lstm_struct->input(), 1) : dim(lstm_struct->input(), 0); + size_info.time_steps = + size_info.time_major ? dim(lstm_struct->input(), 0) : dim(lstm_struct->input(), 1); + size_info.input_dimension = dim(lstm_struct->input(), 2); + size_info.state_dimension = dim(lstm_struct->output_state(), 1); + + lstm_internal::LstmStepManager step_info(size_info); + + // time is the first dimention, enable batch computation + if (size_info.time_major) + { + for (int t = 0; t < size_info.time_steps; t++) + { + lstm_internal::lstmStep( + lstm_struct, lstm_params, &step_info, cell_state_info, output_state_data, cell_state_data, + scratch0, scratch1, scratch2, scratch3, execute_args); + // prepare for the next time step + step_info.updateTime(); + } + } + else + { + // batch first, unable to size the input data. single batch inference + for (int b = 0; b < size_info.batch_size; b++) + { + for (int t = 0; t < size_info.time_steps; t++) + { + lstm_internal::lstmStep( + lstm_struct, lstm_params, &step_info, cell_state_info, output_state_data, cell_state_data, + scratch0, scratch1, scratch2, scratch3, execute_args); + // prepare for the next time step + step_info.updateTime(); + } + // prepare for the next batch + step_info.updateBatch(); + step_info.resetTime(); + } + } +} + +} // namespace onert_micro + +#endif // ONERT_MICRO_PAL_COMMON_UNIDIRECTIONAL_SEQUENCE_LSTM_COMMON_H diff --git a/onert-micro/onert-micro/include/pal/mcu/KernelsToBuild.lst b/onert-micro/onert-micro/include/pal/mcu/KernelsToBuild.lst index ecbe79f5289..3036858b92a 100644 --- a/onert-micro/onert-micro/include/pal/mcu/KernelsToBuild.lst +++ b/onert-micro/onert-micro/include/pal/mcu/KernelsToBuild.lst @@ -78,7 +78,7 @@ REGISTER_KERNEL(SOFTMAX, Softmax) #/*REGISTER_KERNEL(SELECT_V2, SelectV2)*/ REGISTER_KERNEL(SVDF, SVDF) REGISTER_KERNEL(WHILE, While) -#/*REGISTER_KERNEL(UNIDIRECTIONAL_SEQUENCE_LSTM, UnidirectionalSequenceLSTM)*/ +REGISTER_KERNEL(UNIDIRECTIONAL_SEQUENCE_LSTM, UnidirectionalSequenceLSTM) #/*REGISTER_KERNEL(RESIZE_BILINEAR, ResizeBilinear)*/ #/*REGISTER_KERNEL(RESIZE_NEAREST_NEIGHBOR, ResizeNearestNeighbor)*/ REGISTER_KERNEL(RSQRT, Rsqrt) diff --git a/onert-micro/onert-micro/include/pal/mcu/PALUnidirectionalSequenceLSTM.h b/onert-micro/onert-micro/include/pal/mcu/PALUnidirectionalSequenceLSTM.h new file mode 100644 index 00000000000..474df3f87f2 --- /dev/null +++ b/onert-micro/onert-micro/include/pal/mcu/PALUnidirectionalSequenceLSTM.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ONERT_MICRO_PAL_MCU_UNIDIRECTIONAL_SEQUENCE_LSTM_H +#define ONERT_MICRO_PAL_MCU_UNIDIRECTIONAL_SEQUENCE_LSTM_H + +#include "PALUnidirectionalSequenceLSTMCommon.h" + +#endif diff --git a/onert-micro/onert-micro/src/execute/kernels/UnidirectionalSequenceLSTM.cpp b/onert-micro/onert-micro/src/execute/kernels/UnidirectionalSequenceLSTM.cpp new file mode 100644 index 00000000000..f36090b16e8 --- /dev/null +++ b/onert-micro/onert-micro/src/execute/kernels/UnidirectionalSequenceLSTM.cpp @@ -0,0 +1,389 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "OMStatus.h" + +#include "core/OMUtils.h" +#include "core/OMKernelData.h" +#include "core/OMDataType.h" + +#include "execute/OMKernelExecutionBuilder.h" +#include "execute/OMUtils.h" +#include "execute/OMRuntimeKernel.h" + +#include "execute/kernels/UnidirectionalSequenceLSTM.h" +#include "PALUnidirectionalSequenceLSTM.h" + +using namespace onert_micro; +using namespace onert_micro::core; +using namespace onert_micro::core::utils; +using namespace onert_micro::execute; + +namespace onert_micro +{ +namespace +{ + +bool checkedLog2(const float x, int *log2_result) +{ + // Using TfLiteRound instead of std::round and std::log instead of + // std::log2 to work around these functions being missing in a toolchain + // used in some TensorFlow tests as of May 2018. + const float x_log2 = std::log(x) * (1.0f / std::log(2.0f)); + const float x_log2_rounded = std::round(x_log2); + const float x_log2_fracpart = x_log2 - x_log2_rounded; + + *log2_result = static_cast(x_log2_rounded); + return std::abs(x_log2_fracpart) < 1e-3f; +} + +// Create parameters for element wise multiplication that happens in a) cell +// state update ; b) hidden state update +// Note that all the output of gates are symmetrically quantized so only scales +// are required for input. However, during the hidden state update phase, the +// output is the updated hidden state, which is asymmetrically quantized. Thus +// output may require zero point +// onert_micro::core::BinaryArithmeticBroadcastParams +// createInterGateParams(const float input1_scale, const float input2_scale, const float +// output_scale, +// const OMDataType output_type, const int output_zp) +// { +// onert_micro::core::BinaryArithmeticBroadcastParams op_params; +// if (output_type == OMDataType::S16) +// { +// op_params.quantized_activation_min = std::numeric_limits::min(); +// op_params.quantized_activation_max = std::numeric_limits::max(); +// } +// else if (output_type == OMDataType::S8) +// { +// op_params.quantized_activation_min = std::numeric_limits::min(); +// op_params.quantized_activation_max = std::numeric_limits::max(); +// } + +// op_params.input1_offset = 0; // symmetric +// op_params.input2_offset = 0; // symmetric +// op_params.output_offset = output_zp; + +// const double input_product_scale = +// static_cast(input1_scale) * static_cast(input2_scale); +// double effective_scale = input_product_scale / static_cast(output_scale); +// auto output_shift = static_cast(op_params.output_shift); +// onert_micro::execute::quantizeMultiplier(effective_scale, &op_params.output_multiplier, +// &output_shift); +// op_params.output_shift = output_shift; +// return op_params; +// } + +// void createGateParams(const circle::Tensor *input, const circle::Tensor *input_weight, +// const circle::Tensor *input_bias, const circle::Tensor *hidden_state, +// const circle::Tensor *hidden_state_weight, +// const float nonlinear_activation_input_scale, const OMDataType cell_type, +// lstm::GateParameters *gate_params) +// { +// // Input CalculateOpDataFullyConnected +// { +// FullyConnectedParams input_gate_params; +// double real_multiplier = 0.0; +// int output_shift; +// int32_t output_activation_min; +// int32_t output_activation_max; +// int32_t output_multiplier; +// float input_scale = input->quantization()->scale()->data()[0]; +// float input_weight_scale = input_weight->quantization()->scale()->data()[0]; +// real_multiplier = getQuantizedConvolutionMultipler( +// input_scale, input_weight_scale, nonlinear_activation_input_scale); +// quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); +// calculateActivationRangeQuantized(FusedActFunc::NONE, 0, nonlinear_activation_input_scale, +// cell_type, &output_activation_min, &output_activation_max); + +// input_gate_params.output_shift = output_shift; +// input_gate_params.output_multiplier = output_multiplier; +// input_gate_params.quantized_activation_max = output_activation_max; +// input_gate_params.quantized_activation_min = output_activation_min; +// input_gate_params.input_offset = (-1)*(input->quantization()->zero_point()->data()[0]); +// input_gate_params.weights_offset = +// (-1)*(input_weight->quantization()->zero_point()->data()[0]); input_gate_params.output_offset +// = 0; + +// gate_params->input_fc_params = input_gate_params; +// } + +// // Recurrent CalculateOpDataFullyConnected +// { +// FullyConnectedParams recurrent_gate_params; +// double real_multiplier = 0.0; +// int output_shift; +// int32_t output_activation_min; +// int32_t output_activation_max; +// int32_t output_multiplier; +// float hidden_state_scale = hidden_state->quantization()->scale()->data()[0]; +// float hidden_state_weight_scale = hidden_state_weight->quantization()->scale()->data()[0]; +// float hidden_state_zeropoint = hidden_state->quantization()->zero_point()->data()[0]; +// float hidden_state_weight_zeropoint = +// hidden_state_weight->quantization()->zero_point()->data()[0]; real_multiplier = +// getQuantizedConvolutionMultipler(hidden_state_scale, +// hidden_state_weight_scale, +// nonlinear_activation_input_scale); +// quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); +// calculateActivationRangeQuantized(FusedActFunc::NONE, 0, nonlinear_activation_input_scale, +// cell_type, &output_activation_min, &output_activation_max); + +// recurrent_gate_params.output_shift = output_shift; +// recurrent_gate_params.output_multiplier = output_multiplier; +// recurrent_gate_params.quantized_activation_max = output_activation_max; +// recurrent_gate_params.quantized_activation_min = output_activation_min; +// recurrent_gate_params.input_offset = -(hidden_state_zeropoint); +// recurrent_gate_params.weights_offset = -(hidden_state_weight_zeropoint); +// recurrent_gate_params.output_offset = 0; + +// gate_params->recurrent_fc_params = recurrent_gate_params; +// } +// } + +// void prepareGateParamsInteger(lstm::LSTMStruct *lstm_struct, +// lstm::LSTMParameters *quant_lstm_params) +// { +// float nonlinear_input_scale = 0.00024414062; // 2^-12 Q3.12 -> Q0.15 + +// createGateParams(lstm_struct->input(), lstm_struct->input_to_forget_weights(), +// lstm_struct->forget_gate_bias(), lstm_struct->output_state(), +// lstm_struct->recurrent_to_forget_weights(), nonlinear_input_scale, +// OMDataType::S16, &quant_lstm_params->forget_gate_parameters); + +// createGateParams(lstm_struct->input(), lstm_struct->input_to_input_weights(), +// lstm_struct->input_gate_bias(), lstm_struct->output_state(), +// lstm_struct->recurrent_to_input_weights(), nonlinear_input_scale, +// OMDataType::S16, &quant_lstm_params->input_gate_parameters); + +// // lstm::GateParameters cell_gate_parameters; +// createGateParams(lstm_struct->input(), lstm_struct->input_to_cell_weights(), +// lstm_struct->cell_gate_bias(), lstm_struct->output_state(), +// lstm_struct->recurrent_to_cell_weights(), nonlinear_input_scale, +// OMDataType::S16, &quant_lstm_params->cell_gate_parameters); + +// // lstm::GateParameters output_gate_parameters; +// createGateParams(lstm_struct->input(), lstm_struct->input_to_output_weights(), +// lstm_struct->output_gate_bias(), lstm_struct->output_state(), +// lstm_struct->recurrent_to_output_weights(), nonlinear_input_scale, +// OMDataType::S16, &quant_lstm_params->output_gate_parameters); + +// // Inter gate multiplication parameters +// float nonlinear_output_scale = 0.00003051757; // 2^-15 Q3.12 -> Q0.15 +// float cell_state_scale = +// Tensor::scale(lstm_struct->cell_state()); // lstm_tensors.CellStateTensor()->params.scale; +// // forget gate output (nonlinear output) x cell state -> cell state +// quant_lstm_params->inter_gate_parameters.forget_cell_mul_params = createInterGateParams( +// nonlinear_output_scale, cell_state_scale, cell_state_scale, OMDataType::S16, 0); + +// // input gate output x cell gate output -> cell state +// quant_lstm_params->inter_gate_parameters.input_mul_params = createInterGateParams( +// nonlinear_output_scale, nonlinear_output_scale, cell_state_scale, OMDataType::S16, 0); + +// // tanh output x output gate output -> hidden state (potentially asymmetric) +// quant_lstm_params->inter_gate_parameters.output_mul_params = createInterGateParams( +// nonlinear_output_scale, nonlinear_output_scale, Tensor::scale(lstm_struct->output_state()), +// Tensor::element_type(lstm_struct->output_state()), +// Tensor::zero_point(lstm_struct->output_state())); +// } + +// Create the additional information about the cell state, which include: +// cell_state_scale_power: used in integer nonlinear function (e.g., tanh) +// quantized_cell_clip: quantized cell clip range +lstm::CellStateInfo createLstmCellStateInfo(const float cell_state_scale, const float cell_clip) +{ + lstm::CellStateInfo cell_state_info; + // cell_state_scale_power: 2^-cell_state_scale_power = cell state scale + int buffer; + checkedLog2(cell_state_scale, &buffer); + cell_state_info.cell_state_scale_power = buffer; + // Cell state specifics + cell_state_info.cell_clip = cell_clip; + cell_state_info.quantized_cell_clip = static_cast(std::min( + std::max(static_cast(cell_clip) / static_cast(cell_state_scale), -32768.0), + 32767.0)); + return cell_state_info; +} + +#ifndef DIS_FLOAT +FullyConnectedParams createFcParamsFloat() +{ + FullyConnectedParams op_params; + calculateActivationRange(circle::ActivationFunctionType::ActivationFunctionType_NONE, + &op_params.float_activation_min, &op_params.float_activation_max); + op_params.quantized_activation_max = op_params.float_activation_max; + op_params.quantized_activation_min = op_params.float_activation_min; + return op_params; +} + +lstm::GateParameters createGateParamsFloat() +{ + lstm::GateParameters gate_params; + + gate_params.input_fc_params = createFcParamsFloat(); + gate_params.recurrent_fc_params = createFcParamsFloat(); + + return gate_params; +} + +lstm::CellStateInfo createLstmCellStateInfoFloat(const float cell_clip) +{ + lstm::CellStateInfo cell_state_info; + cell_state_info.cell_clip = cell_clip; + cell_state_info.cell_state_scale_power = 0; // no quantization + cell_state_info.quantized_cell_clip = 0; // no quantization + return cell_state_info; +} + +void prepareGateParamsFloat(lstm::LSTMParameters *float_lstm_params) +{ + // Gate Parameters + float_lstm_params->forget_gate_parameters = createGateParamsFloat(); + float_lstm_params->input_gate_parameters = createGateParamsFloat(); + float_lstm_params->cell_gate_parameters = createGateParamsFloat(); + float_lstm_params->output_gate_parameters = createGateParamsFloat(); + + // Inter gate multiplication parameters + BinaryArithmeticBroadcastParams op_params; + calculateActivationRange(circle::ActivationFunctionType::ActivationFunctionType_NONE, + &op_params.float_activation_min, &op_params.float_activation_max); + // op_params.quantized_activation_max = op_params.float_activation_max; + // op_params.quantized_activation_min = op_params.float_activation_min; + float_lstm_params->inter_gate_parameters.forget_cell_mul_params = op_params; + float_lstm_params->inter_gate_parameters.input_mul_params = op_params; + float_lstm_params->inter_gate_parameters.output_mul_params = op_params; +} + +void evalFloat(const OMExecuteArgs &execute_args) +{ + lstm::LSTMStruct lstm_struct(execute_args); + + lstm::CellStateInfo cell_state_info = + createLstmCellStateInfoFloat(lstm_struct.options->cell_clip()); + + lstm::LSTMParameters lstm_params; + prepareGateParamsFloat(&lstm_params); + + const bool time_major = lstm_struct.options->time_major(); + const auto batch_size = time_major ? dim(lstm_struct.input(), 1) : dim(lstm_struct.input(), 0); + const auto state_dimension = dim(lstm_struct.output_state(), 1); + const auto cell_state_type_size = + getOMDataTypeSize(onertMicroDatatype(lstm_struct.cell_state()->type())); + + auto scratch_0_data = + std::make_unique(batch_size * state_dimension * cell_state_type_size); + auto scratch_1_data = + std::make_unique(batch_size * state_dimension * cell_state_type_size); + auto scratch_2_data = + std::make_unique(batch_size * state_dimension * cell_state_type_size); + auto scratch_3_data = + std::make_unique(batch_size * state_dimension * cell_state_type_size); + + // Create and fill with 0 output state tensor + auto output_state_data = std::make_unique(num_elements(lstm_struct.output_state())); + std::fill_n(output_state_data.get(), num_elements(lstm_struct.output_state()), 0); + + // Create and fill with 0 cell state tensor + auto cell_state_data = std::make_unique(num_elements(lstm_struct.cell_state())); + std::fill_n(cell_state_data.get(), num_elements(lstm_struct.cell_state()), 0); + + evalLSTM( + &lstm_struct, &lstm_params, &cell_state_info, output_state_data.get(), cell_state_data.get(), + core::utils::castOutputData(scratch_0_data.get()), + core::utils::castOutputData(scratch_1_data.get()), + core::utils::castOutputData(scratch_2_data.get()), + core::utils::castOutputData(scratch_3_data.get()), execute_args); +} +#endif // DIS_FLOAT + +void validateWeightTensorSize(const circle::Tensor *weight_tensor, int dim1_size, int dim2_size) +{ + assert(num_dims(weight_tensor) == 2); + assert(dim(weight_tensor, 0) == dim1_size); + assert(dim(weight_tensor, 1) == dim2_size); +} + +void validateTensorsSize(lstm::LSTMStruct *lstm_struct, const bool time_major) +{ + const auto batch_size = time_major ? dim(lstm_struct->input(), 1) : dim(lstm_struct->input(), 0); + + const auto input_dimension = dim(lstm_struct->input(), 2); + const auto state_dimension = dim(lstm_struct->output_state(), 1); + + // Input FC weights + for (int32_t i = 1; i < 5; i++) + { + validateWeightTensorSize(lstm_struct->get_internal_tensor(i), state_dimension, input_dimension); + } + + // Recurrent FC weights + for (int32_t i = 5; i < 9; i++) + { + validateWeightTensorSize(lstm_struct->get_internal_tensor(i), state_dimension, state_dimension); + } + + // Biases + for (int32_t i = 12; i < 16; i++) + { + assert(num_dims(lstm_struct->get_internal_tensor(i)) == 1); + assert(dim(lstm_struct->get_internal_tensor(i), 0) == state_dimension); + } + + // Check the shape of input state tensors. + // These tensor may be 1D or 2D. It's fine as long as the total size is + // correct. + assert(num_elements(lstm_struct->output_state()) == batch_size * state_dimension); + assert(num_elements(lstm_struct->cell_state()) == batch_size * state_dimension); + + // Check the shape of output tensor against that of input tensor + assert(num_dims(lstm_struct->output()) == 3); + assert(dim(lstm_struct->input(), 0) == dim(lstm_struct->output(), 0)); + assert(dim(lstm_struct->input(), 1) == dim(lstm_struct->output(), 1)); + assert(dim(lstm_struct->output(), 2) == state_dimension); +} + +} // namespace + +namespace execute +{ + +OMStatus execute_kernel_CircleUnidirectionalSequenceLSTM(const OMExecuteArgs &execute_args) +{ + core::OMRuntimeContext &runtime_context = execute_args.runtime_context; + core::OMRuntimeStorage &runtime_storage = execute_args.runtime_storage; + uint16_t op_index = execute_args.kernel_index; + + const circle::Tensor *input; + + execute::OMRuntimeKernel runtime_kernel; + runtime_kernel.readKernel(op_index, runtime_context); + input = runtime_kernel.inputs[0]; + + switch (input->type()) + { +#ifndef DIS_FLOAT + case circle::TensorType_FLOAT32: + evalFloat(execute_args); + break; +#endif // DIS_FLOAT + default: + assert(false && "Unsupported type."); + } + return OMStatus::Ok; +} + +} // namespace execute +} // namespace onert_micro diff --git a/onert-micro/onert-micro/src/execute/kernels/tests/UnidirectionalSequenceLSTM.test.cpp b/onert-micro/onert-micro/src/execute/kernels/tests/UnidirectionalSequenceLSTM.test.cpp new file mode 100644 index 00000000000..e69de29bb2d diff --git a/onert-micro/onert-micro/src/import/kernels/UnidirectionalSequenceLSTM.cpp b/onert-micro/onert-micro/src/import/kernels/UnidirectionalSequenceLSTM.cpp new file mode 100644 index 00000000000..7b69729c0de --- /dev/null +++ b/onert-micro/onert-micro/src/import/kernels/UnidirectionalSequenceLSTM.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "OMStatus.h" + +#include "import/OMKernelConfigureBuilder.h" + +#include "core/OMUtils.h" +#include "core/OMKernelData.h" + +#include "execute/OMRuntimeKernel.h" + +using namespace onert_micro; +using namespace onert_micro::core; + +namespace +{ + +constexpr uint32_t inputTensorIdx = 0; +constexpr uint32_t outputTensorIdx = 0; + +} // namespace + +namespace onert_micro +{ +namespace import +{ + +OMStatus configure_kernel_CircleUnidirectionalSequenceLSTM(const OMConfigureArgs &config_args) +{ + OMStatus status = OMStatus::Ok; + return status; +} + +} // namespace import +} // namespace onert_micro From e9b491eeb29bf4b51e299ab87e031240e8b82eff Mon Sep 17 00:00:00 2001 From: Chunseok Lee Date: Tue, 7 Jan 2025 13:46:33 +0900 Subject: [PATCH 2/4] lstm f32 test --- .../FloatUnidirectionalLSTMKernel.h | 164 ++++++++++++++++++ .../TestDataUnidirectionalLSTMBase.h | 60 +++++++ .../tests/UnidirectionalSequenceLSTM.test.cpp | 67 +++++++ 3 files changed, 291 insertions(+) create mode 100644 onert-micro/onert-micro/include/test_models/unidirectional_lstm/FloatUnidirectionalLSTMKernel.h create mode 100644 onert-micro/onert-micro/include/test_models/unidirectional_lstm/TestDataUnidirectionalLSTMBase.h diff --git a/onert-micro/onert-micro/include/test_models/unidirectional_lstm/FloatUnidirectionalLSTMKernel.h b/onert-micro/onert-micro/include/test_models/unidirectional_lstm/FloatUnidirectionalLSTMKernel.h new file mode 100644 index 00000000000..88a9a17b2f5 --- /dev/null +++ b/onert-micro/onert-micro/include/test_models/unidirectional_lstm/FloatUnidirectionalLSTMKernel.h @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ONERT_MICRO_TEST_MODELS_FLOAT_UNIDIRECTIONAL_LSTM_KERNEL_H +#define ONERT_MICRO_TEST_MODELS_FLOAT_UNIDIRECTIONAL_LSTM_KERNEL_H + +#include "TestDataUnidirectionalLSTMBase.h" + +namespace onert_micro +{ +namespace test_model +{ +namespace unidir_lstm_float +{ +/* + * UnidirectionalLSTM Kernel: + * + * Input(1, 4, 4) + * | + * UnidirectionalLSTM + * | + * Output(1, 4, 2) + */ +const unsigned char test_kernel_model_circle[] = { + 0x18, 0x00, 0x00, 0x00, 0x43, 0x49, 0x52, 0x30, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x08, 0x00, 0x10, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0xb4, 0x01, 0x00, 0x00, 0xd0, 0x05, 0x00, 0x00, 0xec, 0x05, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, + 0xa0, 0x01, 0x00, 0x00, 0x94, 0x01, 0x00, 0x00, 0x8c, 0x01, 0x00, 0x00, 0x84, 0x01, 0x00, 0x00, + 0x64, 0x01, 0x00, 0x00, 0x3c, 0x01, 0x00, 0x00, 0x1c, 0x01, 0x00, 0x00, 0xfc, 0x00, 0x00, 0x00, + 0xe4, 0x00, 0x00, 0x00, 0xcc, 0x00, 0x00, 0x00, 0x9c, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, + 0x3c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa0, 0xfe, 0xff, 0xff, + 0xd2, 0xfe, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x84, 0x9b, 0x3c, 0xbe, + 0x48, 0xfc, 0x22, 0xbf, 0x35, 0x43, 0xba, 0x3e, 0x18, 0x5c, 0xdb, 0x3e, 0x4b, 0x05, 0xdd, 0xbe, + 0xf5, 0x0f, 0x1e, 0xbf, 0x1f, 0x2e, 0x09, 0x3f, 0x9e, 0xb5, 0x2f, 0x3f, 0xfe, 0xfe, 0xff, 0xff, + 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0xca, 0xfe, 0x05, 0x3f, 0xea, 0x91, 0x06, 0xbe, + 0x77, 0xf4, 0xa7, 0xbe, 0x3f, 0x02, 0x23, 0xbf, 0xd1, 0xdc, 0x94, 0xbd, 0xc2, 0xdd, 0xb1, 0xbe, + 0x45, 0x13, 0xc8, 0x3e, 0x7f, 0x6b, 0xef, 0x3e, 0x2a, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0xec, 0xde, 0xe2, 0xbe, 0xf6, 0x46, 0x01, 0xbf, 0xed, 0x4d, 0x97, 0xbd, + 0xf2, 0xed, 0x09, 0xbf, 0x88, 0x4c, 0xe1, 0x3e, 0x60, 0x74, 0x89, 0x3e, 0x29, 0x79, 0x75, 0x3c, + 0x9b, 0x8f, 0xdb, 0xbe, 0x56, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0xe7, 0xc8, 0x6a, 0x3e, 0x16, 0x06, 0x8b, 0xbd, 0x49, 0xf5, 0xe5, 0x3e, 0x01, 0xfb, 0xf0, 0x3e, + 0x7c, 0x48, 0x10, 0xbf, 0x12, 0xd8, 0x94, 0xbe, 0x9a, 0xec, 0xaf, 0x3e, 0x4c, 0x1a, 0xdb, 0xbe, + 0x82, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, + 0x00, 0x00, 0x80, 0x3f, 0x96, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x8d, 0xd4, 0xdb, 0xbd, 0xfe, 0x63, 0xd1, 0x3e, 0x9f, 0x60, 0x1a, 0x3d, + 0xa1, 0x48, 0x0b, 0xbf, 0xc6, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x3a, 0xb2, 0xca, 0x3e, 0x58, 0x1a, 0x04, 0xbf, 0xe6, 0x76, 0x9f, 0x3e, 0x61, 0xa7, 0xd8, 0x3e, + 0xe2, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xf1, 0x48, 0x5c, 0xbe, + 0xcd, 0x54, 0xad, 0x3c, 0x9f, 0x8e, 0xbf, 0x3e, 0x69, 0xac, 0xfd, 0x3d, 0x00, 0x00, 0x06, 0x00, + 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x93, 0x70, 0x21, 0xbf, 0x76, 0x27, 0x8e, 0x3c, 0x97, 0xe3, 0xc5, 0x3e, 0xe5, 0x7d, 0x8c, 0x3e, + 0xf4, 0xff, 0xff, 0xff, 0xf8, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0x04, 0x00, 0x04, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, + 0x18, 0x00, 0x14, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x00, 0x00, 0xc8, 0x00, 0x00, 0x00, + 0xcc, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x08, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, + 0x14, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, + 0x0b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x41, 0x00, 0x00, 0x00, 0x04, + 0x01, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0b, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x06, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, + 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0xe4, 0x02, 0x00, 0x00, 0x98, 0x02, 0x00, 0x00, 0x54, 0x02, 0x00, 0x00, 0x20, 0x02, 0x00, 0x00, + 0xec, 0x01, 0x00, 0x00, 0xb8, 0x01, 0x00, 0x00, 0x88, 0x01, 0x00, 0x00, 0x58, 0x01, 0x00, 0x00, + 0x24, 0x01, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0xbc, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x60, 0xfd, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x53, 0x74, 0x61, 0x74, + 0x65, 0x66, 0x75, 0x6c, 0x50, 0x61, 0x72, 0x74, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x65, 0x64, 0x43, + 0x61, 0x6c, 0x6c, 0x3a, 0x30, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0xec, 0xfd, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, + 0x0c, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x2f, 0x6c, 0x73, 0x74, 0x6d, 0x2f, + 0x7a, 0x65, 0x72, 0x6f, 0x73, 0x31, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0xdc, 0xfd, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x39, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x0c, 0xfe, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x38, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x3c, 0xfe, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x37, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x36, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x9c, 0xfe, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x35, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0xc8, 0xfe, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x0f, 0x00, 0x00, 0x00, 0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, + 0x6e, 0x74, 0x34, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0xf4, 0xfe, 0xff, 0xff, + 0x0c, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, + 0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x33, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x24, 0xff, 0xff, 0xff, + 0x0c, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, + 0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x32, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x54, 0xff, 0xff, 0xff, + 0x0c, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, + 0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x31, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x84, 0xff, 0xff, 0xff, + 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x61, 0x72, 0x69, 0x74, 0x68, 0x2e, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x14, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x15, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x2f, 0x6c, + 0x73, 0x74, 0x6d, 0x2f, 0x7a, 0x65, 0x72, 0x6f, 0x73, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x5f, + 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x5f, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x31, 0x3a, + 0x30, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, + 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x2c, 0x11, 0x00, 0x00, 0x00, 0x4f, 0x4e, 0x45, 0x2d, 0x74, 0x66, 0x6c, 0x69, + 0x74, 0x65, 0x32, 0x63, 0x69, 0x72, 0x63, 0x6c, 0x65, 0x00, 0x00, 0x00}; + +const std::vector input_data = { + -3.509163, -18.256927, 6.4799614, 10.296598, 30.371328, 18.692572, 10.12867, -26.44944, + 25.324795, 3.8303719, 20.93112, 22.603086, -4.308655, 2.3276749, -5.9565907, 25.611776}; + +const std::vector reference_output_data = {0.7613201, -0.7570043, 0.0480366, + -4.3364323e-11, -0.7613433, 1.3437739e-08, + -0.7613537, -7.000451e-08}; + +} // namespace unidir_lstm_float + +class TestDataFloatUnidirectionalLSTM : public TestDataUnidirectionalLSTMBase +{ +public: + TestDataFloatUnidirectionalLSTM() + { + _input_data = unidir_lstm_float::input_data; + _reference_output_data = unidir_lstm_float::reference_output_data; + _test_kernel_model_circle = unidir_lstm_float::test_kernel_model_circle; + } + + ~TestDataFloatUnidirectionalLSTM() override = default; +}; + +} // namespace test_model +} // namespace onert_micro + +#endif // ONERT_MICRO_TEST_MODELS_FLOAT_UNIDIRECTIONAL_LSTM_KERNEL_H diff --git a/onert-micro/onert-micro/include/test_models/unidirectional_lstm/TestDataUnidirectionalLSTMBase.h b/onert-micro/onert-micro/include/test_models/unidirectional_lstm/TestDataUnidirectionalLSTMBase.h new file mode 100644 index 00000000000..921eda3d7d8 --- /dev/null +++ b/onert-micro/onert-micro/include/test_models/unidirectional_lstm/TestDataUnidirectionalLSTMBase.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ONERT_MICRO_TEST_MODELS_UNIDIRECTIONAL_LSTM_KERNEL_BASE_H +#define ONERT_MICRO_TEST_MODELS_UNIDIRECTIONAL_LSTM_KERNEL_BASE_H + +#include "test_models/TestDataBase.h" + +namespace onert_micro +{ +namespace test_model +{ + +template class TestDataUnidirectionalLSTMBase : public TestDataBase +{ +public: + TestDataUnidirectionalLSTMBase() = default; + + const unsigned char *get_model_ptr() override final { return _test_kernel_model_circle; } + + const std::vector &get_input_data_by_index(int i) override final + { + switch (i) + { + case 0: + return _input_data; + default: + assert(false && "Wrong input index"); + } + } + + const std::vector &get_output_data_by_index(int i) override final + { + assert(i == 0); + return _reference_output_data; + } + +protected: + std::vector _input_data; + std::vector _reference_output_data; + const unsigned char *_test_kernel_model_circle; +}; + +} // namespace test_model +} // namespace onert_micro + +#endif // ONERT_MICRO_TEST_MODELS_UNIDIRECTIONAL_LSTM_KERNEL_BASE_H diff --git a/onert-micro/onert-micro/src/execute/kernels/tests/UnidirectionalSequenceLSTM.test.cpp b/onert-micro/onert-micro/src/execute/kernels/tests/UnidirectionalSequenceLSTM.test.cpp index e69de29bb2d..bae0fbf670e 100644 --- a/onert-micro/onert-micro/src/execute/kernels/tests/UnidirectionalSequenceLSTM.test.cpp +++ b/onert-micro/onert-micro/src/execute/kernels/tests/UnidirectionalSequenceLSTM.test.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "execute/OMTestUtils.h" +#include "test_models/unidirectional_lstm/FloatUnidirectionalLSTMKernel.h" + +namespace onert_micro +{ +namespace execute +{ +namespace testing +{ + +using namespace testing; + +template +std::vector checkLSTMKernel(test_model::TestDataUnidirectionalLSTMBase *test_data_base) +{ + onert_micro::OMInterpreter interpreter; + onert_micro::OMConfig config; + + interpreter.importModel(reinterpret_cast(test_data_base->get_model_ptr()), config); + + interpreter.reset(); + interpreter.allocateInputs(); + + T *input_data = reinterpret_cast(interpreter.getInputDataAt(0)); + + std::copy(test_data_base->get_input_data_by_index(0).begin(), + test_data_base->get_input_data_by_index(0).end(), input_data); + interpreter.run(config); + + T *output_data = reinterpret_cast(interpreter.getOutputDataAt(0)); + const size_t num_elements = interpreter.getOutputSizeAt(0); + std::vector output_data_vector(output_data, output_data + num_elements); + return output_data_vector; +} + +class UnidirectionalLSTMTest : public ::testing::Test +{ + // Do nothing +}; + +TEST_F(UnidirectionalLSTMTest, Float_P) +{ + onert_micro::test_model::TestDataFloatUnidirectionalLSTM test_data_kernel; + std::vector output_data_vector = checkLSTMKernel(&test_data_kernel); + EXPECT_THAT(output_data_vector, + FloatArrayNear(test_data_kernel.get_output_data_by_index(0), 0.0000001f)); +} + +} // namespace testing +} // namespace execute +} // namespace onert_micro From 842903257c428bde7c443f2fd4308d19bc2aa0a3 Mon Sep 17 00:00:00 2001 From: Chunseok Lee Date: Tue, 7 Jan 2025 15:58:12 +0900 Subject: [PATCH 3/4] fix svace overflow --- .../src/execute/kernels/UnidirectionalSequenceLSTM.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/onert-micro/onert-micro/src/execute/kernels/UnidirectionalSequenceLSTM.cpp b/onert-micro/onert-micro/src/execute/kernels/UnidirectionalSequenceLSTM.cpp index f36090b16e8..88a76e647ae 100644 --- a/onert-micro/onert-micro/src/execute/kernels/UnidirectionalSequenceLSTM.cpp +++ b/onert-micro/onert-micro/src/execute/kernels/UnidirectionalSequenceLSTM.cpp @@ -283,14 +283,16 @@ void evalFloat(const OMExecuteArgs &execute_args) const auto cell_state_type_size = getOMDataTypeSize(onertMicroDatatype(lstm_struct.cell_state()->type())); + size_t scratch_buf_size = (long)batch_size * (long)state_dimension * (long)cell_state_type_size; + auto scratch_0_data = - std::make_unique(batch_size * state_dimension * cell_state_type_size); + std::make_unique(scratch_buf_size); auto scratch_1_data = - std::make_unique(batch_size * state_dimension * cell_state_type_size); + std::make_unique(scratch_buf_size); auto scratch_2_data = - std::make_unique(batch_size * state_dimension * cell_state_type_size); + std::make_unique(scratch_buf_size); auto scratch_3_data = - std::make_unique(batch_size * state_dimension * cell_state_type_size); + std::make_unique(scratch_buf_size); // Create and fill with 0 output state tensor auto output_state_data = std::make_unique(num_elements(lstm_struct.output_state())); From 2e8294ca6518a7dc76b9260d6180b46c4c2152a0 Mon Sep 17 00:00:00 2001 From: Chunseok Lee Date: Mon, 3 Feb 2025 14:45:19 +0900 Subject: [PATCH 4/4] fix format --- .../execute/kernels/UnidirectionalSequenceLSTM.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/onert-micro/onert-micro/src/execute/kernels/UnidirectionalSequenceLSTM.cpp b/onert-micro/onert-micro/src/execute/kernels/UnidirectionalSequenceLSTM.cpp index 88a76e647ae..548269aa51c 100644 --- a/onert-micro/onert-micro/src/execute/kernels/UnidirectionalSequenceLSTM.cpp +++ b/onert-micro/onert-micro/src/execute/kernels/UnidirectionalSequenceLSTM.cpp @@ -285,14 +285,10 @@ void evalFloat(const OMExecuteArgs &execute_args) size_t scratch_buf_size = (long)batch_size * (long)state_dimension * (long)cell_state_type_size; - auto scratch_0_data = - std::make_unique(scratch_buf_size); - auto scratch_1_data = - std::make_unique(scratch_buf_size); - auto scratch_2_data = - std::make_unique(scratch_buf_size); - auto scratch_3_data = - std::make_unique(scratch_buf_size); + auto scratch_0_data = std::make_unique(scratch_buf_size); + auto scratch_1_data = std::make_unique(scratch_buf_size); + auto scratch_2_data = std::make_unique(scratch_buf_size); + auto scratch_3_data = std::make_unique(scratch_buf_size); // Create and fill with 0 output state tensor auto output_state_data = std::make_unique(num_elements(lstm_struct.output_state()));