From 12ef21e88972b5d75fb7f93be022ebee4938b8ac Mon Sep 17 00:00:00 2001 From: tingchen Date: Wed, 29 Apr 2026 04:00:33 +0000 Subject: [PATCH 01/43] fmha f16 aiter integration --- aiter/__init__.py | 9 + aiter/fused_fmha_fwd_f16.py | 124 ++++++ aiter/jit/optCompilerConfig.json | 16 + aiter/ops/fmha_fwd_f16_asm.py | 82 ++++ csrc/include/rocm_ops.hpp | 14 + csrc/include/torch/fmha_fwd_f16.h | 38 ++ csrc/py_itfs_cu/asm_fmha_fwd_f16.cu | 409 ++++++++++++++++++ csrc/pybind/fmha_fwd_f16_asm_pybind.cu | 9 + ...HA_FWD_D128_1TG_4W_32mx4_256nx1_brd_rxy.co | Bin 0 -> 68776 bytes ...WD_D128_1TG_4W_32mx4_256nx1_cas_brd_rxy.co | Bin 0 -> 83256 bytes ...HA_FWD_D128_1TG_4W_32mx4_256nx1_cas_rxy.co | Bin 0 -> 68776 bytes ...6_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_rxy.co | Bin 0 -> 54176 bytes ...WD_D64_1TG_4W_32mx4_256nx1_brd_rxy_sink.co | Bin 0 -> 58040 bytes ...64_1TG_4W_32mx4_256nx1_cas_brd_rxy_sink.co | Bin 0 -> 72512 bytes ...WD_D64_1TG_4W_32mx4_256nx1_cas_rxy_sink.co | Bin 0 -> 57912 bytes ...HA_FWD_D64_1TG_4W_32mx4_256nx1_rxy_sink.co | Bin 0 -> 43432 bytes hsa/gfx1250/fmha_fwd_f16/fmha_fwd_f16.csv | 9 + op_tests/test_fmha_fwd_f16_asm.py | 387 +++++++++++++++++ 18 files changed, 1097 insertions(+) create mode 100644 aiter/fused_fmha_fwd_f16.py create mode 100644 aiter/ops/fmha_fwd_f16_asm.py create mode 100644 csrc/include/torch/fmha_fwd_f16.h create mode 100644 csrc/py_itfs_cu/asm_fmha_fwd_f16.cu create mode 100644 csrc/pybind/fmha_fwd_f16_asm_pybind.cu create mode 100755 hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_brd_rxy.co create mode 100755 hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_cas_brd_rxy.co create mode 100755 hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_cas_rxy.co create mode 100755 hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_rxy.co create mode 100755 hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_brd_rxy_sink.co create mode 100755 hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_cas_brd_rxy_sink.co create mode 100755 hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_cas_rxy_sink.co create mode 100755 hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_rxy_sink.co create mode 100644 hsa/gfx1250/fmha_fwd_f16/fmha_fwd_f16.csv create mode 100644 op_tests/test_fmha_fwd_f16_asm.py diff --git a/aiter/__init__.py b/aiter/__init__.py index b6dcb34a2f..9d5e6443f7 100644 --- a/aiter/__init__.py +++ b/aiter/__init__.py @@ -116,6 +116,15 @@ def getLogger(): e, ) + # FMHA fwd f16 ASM (independent of CK) — kept in a separate try block so + # it stays importable even when `module_aiter_core` fails to build on + # e.g. gfx1250. + try: + from .ops.fmha_fwd_f16_asm import fmha_fwd_f16_asm # noqa: F401,E402 + from .fused_fmha_fwd_f16 import fmha_fwd_f16 # noqa: F401,E402 + except (ImportError, RuntimeError, OSError, KeyError) as e: + logger.warning("aiter.fmha_fwd_f16 unavailable: %s", e) + # Import Triton-based communication primitives from ops.triton.comms (optional, only if Iris is available) try: from .ops.triton.comms import ( diff --git a/aiter/fused_fmha_fwd_f16.py b/aiter/fused_fmha_fwd_f16.py new file mode 100644 index 0000000000..cf9d734f24 --- /dev/null +++ b/aiter/fused_fmha_fwd_f16.py @@ -0,0 +1,124 @@ +# SPDX-License-Identifier: MIT +# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. + +""" +fused_fmha_fwd_f16 +================== + +Customer-facing API for the ASM-based FMHA forward kernel (BF16, gfx1250). + +Layout convention +----------------- +Tensor shapes and their physical memory ordering are controlled by ``i_perm`` +(input) and ``o_perm`` (output): + + 0 = bshd — [batch, seq, head, dim] + 1 = bhsd — [batch, head, seq, dim] + 2 = sbhd — [seq, batch, head, dim] ← default input (i_perm=2) + +Default output is ``o_perm=0`` (bshd → [batch, seq_q, head_q, dim_v]). + +Each tensor **must be contiguous** and its physical layout must match the +declared perm (e.g. for ``i_perm=2`` the tensor shape must be ``[s,b,h,d]`` +with natural strides ``[b*h*d, h*d, d, 1]``). + +Sink convention +--------------- +``sink`` is an optional per-Q-head f32 tensor of shape ``[q_head_num]``. +Values are in the **AITER / CK-Tile post-scale domain** (same domain as the +softmax logit ``Q·Kᵀ / sqrt(d)``). The kernel uses pre-scale internally; +this module performs the conversion: ``sink_raw = sink_user * sqrt(d)``. + +Supported shapes +---------------- +- ``q.shape`` determined by ``i_perm`` and ``(batch, q_head_num, q_seq_len, d)`` +- ``d ∈ {64, 128}`` +- dtype: bf16 +- GQA: ``q_head_num % kv_head_num == 0`` + +The border variant (_brd) is selected automatically when ``q_seq_len`` is not +a multiple of 128 or ``kv_seq_len`` is not a multiple of 256. + +Environment +----------- +Set ``AITER_ASM_DIR`` to ``{AITER_ROOT}/hsa`` and ``AITER_GPU_ARCHS=gfx1250`` +so the compiled kernel objects (``*.co``) can be located at runtime. +""" + +from __future__ import annotations + +import math +from typing import Optional, Tuple, Union + +import torch + +from .ops.fmha_fwd_f16_asm import fmha_fwd_f16_asm + + +def fmha_fwd_f16( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + softmax_scale: Optional[float] = None, + is_causal: bool = False, + return_lse: bool = False, + i_perm: int = 2, + o_perm: int = 0, + sink: Optional[torch.Tensor] = None, + out: Optional[torch.Tensor] = None, +) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: + """BF16 fused multi-head attention forward (ASM path, gfx1250). + + Parameters + ---------- + q, k, v : torch.Tensor + BF16 tensors. Physical shape determined by ``i_perm`` + (default 2 = sbhd → ``[seq, batch, head, dim]``). + All must be **contiguous**. + softmax_scale : float, optional + Defaults to ``1 / sqrt(head_dim)``. + is_causal : bool + Apply causal (lower-triangular) masking. + return_lse : bool + If True, also return LSE with shape ``[batch, q_head_num, q_seq_len]`` + in fp32. + i_perm : int + Input layout code: 0=bshd, 1=bhsd, 2=sbhd (default). + o_perm : int + Output layout code: 0=bshd (default), 1=bhsd, 2=sbhd. + sink : torch.Tensor, optional + Per-Q-head sink logits, shape ``[q_head_num]``, fp32, **post-scale** + (AITER convention). Converted to pre-scale internally. + **Required for D64 (head_dim=64)** — D64 `_rxy_sink` kernels always + run the sink code path. Pass ``torch.zeros(q_head_num)`` for a + neutral zero-logit sink. + Optional for D128 (head_dim=128) — D128 kernels ignore this field. + out : torch.Tensor, optional + Pre-allocated output buffer matching ``o_perm`` shape. + + Returns + ------- + torch.Tensor or (torch.Tensor, torch.Tensor) + ``out`` alone if ``return_lse=False``, otherwise ``(out, lse)``. + """ + if softmax_scale is None: + # head_dim is always the last dimension regardless of perm + softmax_scale = 1.0 / math.sqrt(q.size(-1)) + + results = fmha_fwd_f16_asm( + q, + k, + v, + float(softmax_scale), + bool(is_causal), + bool(return_lse), + int(i_perm), + int(o_perm), + sink, + out, + ) + + if return_lse: + assert len(results) == 2 + return results[0], results[1] + return results[0] diff --git a/aiter/jit/optCompilerConfig.json b/aiter/jit/optCompilerConfig.json index 8cfa3b6ef8..81d48cc213 100644 --- a/aiter/jit/optCompilerConfig.json +++ b/aiter/jit/optCompilerConfig.json @@ -1087,6 +1087,22 @@ "verbose": "False", "blob_gen_cmd": "''" }, + "module_fmha_fwd_f16_asm": { + "srcs": [ + "f'{AITER_CSRC_DIR}/py_itfs_cu/asm_fmha_fwd_f16.cu'", + "f'{AITER_CSRC_DIR}/pybind/fmha_fwd_f16_asm_pybind.cu'" + ], + "flags_extra_cc": [ + "'-DENABLE_CK=0'" + ], + "flags_extra_hip": [], + "extra_ldflags": "['-L/opt/rocm/lib', '-Wl,-rpath,/opt/rocm/lib']", + "extra_include": [], + "verbose": "False", + "blob_gen_cmd": [ + "f'{AITER_META_DIR}/hsa/codegen.py -m fmha_fwd_f16 --output_dir {{}}'" + ] + }, "module_fmha_v3_fwd": { "srcs": [ "f'{AITER_CSRC_DIR}/kernels/mha_common.cu'", diff --git a/aiter/ops/fmha_fwd_f16_asm.py b/aiter/ops/fmha_fwd_f16_asm.py new file mode 100644 index 0000000000..908957fe67 --- /dev/null +++ b/aiter/ops/fmha_fwd_f16_asm.py @@ -0,0 +1,82 @@ +# SPDX-License-Identifier: MIT +# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. + +"""Python stub for the ASM FMHA-forward (BF16) op. + +The real implementation lives in C++ (`csrc/py_itfs_cu/asm_fmha_fwd_f16.cu`) +and is exposed through the pybind module ``module_fmha_fwd_f16_asm``. +""" + +from typing import List, Optional + +import torch +from torch import Tensor + +from ..jit.core import compile_ops + + +def _shape_from_perm(perm: int, batch: int, heads: int, seqlen: int, dim: int): + """Return the expected tensor shape for the given perm code.""" + if perm == 0: # bshd + return (batch, seqlen, heads, dim) + elif perm == 1: # bhsd + return (batch, heads, seqlen, dim) + else: # sbhd + return (seqlen, batch, heads, dim) + + +def _dims_from_perm(t: Tensor, perm: int): + """Extract (batch, heads, seqlen, dim) from tensor shape given perm.""" + if perm == 0: # bshd [b,s,h,d] + return t.size(0), t.size(2), t.size(1), t.size(3) + elif perm == 1: # bhsd [b,h,s,d] + return t.size(0), t.size(1), t.size(2), t.size(3) + else: # sbhd [s,b,h,d] + return t.size(1), t.size(2), t.size(0), t.size(3) + + +def gen_fmha_fwd_f16_asm_fake_tensors( + q: Tensor, + k: Tensor, + v: Tensor, + softmax_scale: float, + is_causal: bool, + return_lse: bool, + i_perm: int = 2, + o_perm: int = 0, + sink: Optional[Tensor] = None, + out: Optional[Tensor] = None, +) -> List[Tensor]: + batch, q_head_num, q_seq_len, _ = _dims_from_perm(q, i_perm) + _, _, _, d_v = _dims_from_perm(v, i_perm) + + fake_out_shape = _shape_from_perm(o_perm, batch, q_head_num, q_seq_len, d_v) + fake_out = ( + out if out is not None + else torch.empty(fake_out_shape, dtype=q.dtype, device=q.device) + ) + if return_lse: + fake_lse = torch.empty( + (batch, q_head_num, q_seq_len), dtype=torch.float32, device=q.device + ) + return [fake_out, fake_lse] + return [fake_out] + + +@compile_ops( + "module_fmha_fwd_f16_asm", + fc_name="fmha_fwd_f16_asm", + gen_fake=gen_fmha_fwd_f16_asm_fake_tensors, +) +def fmha_fwd_f16_asm( + q: Tensor, + k: Tensor, + v: Tensor, + softmax_scale: float, + is_causal: bool, + return_lse: bool, + i_perm: int = 2, + o_perm: int = 0, + sink: Optional[Tensor] = None, + out: Optional[Tensor] = None, +) -> List[Tensor]: ... diff --git a/csrc/include/rocm_ops.hpp b/csrc/include/rocm_ops.hpp index 43a9a96cc6..88cf9bffe9 100644 --- a/csrc/include/rocm_ops.hpp +++ b/csrc/include/rocm_ops.hpp @@ -822,6 +822,20 @@ namespace py = pybind11; py::arg("sink") = std::nullopt, \ py::arg("d_sink") = std::nullopt); +#define FMHA_FWD_F16_ASM_PYBIND \ + m.def("fmha_fwd_f16_asm", \ + &aiter::torch_itfs::fmha_fwd_f16, \ + py::arg("q"), \ + py::arg("k"), \ + py::arg("v"), \ + py::arg("softmax_scale"), \ + py::arg("is_causal"), \ + py::arg("return_lse"), \ + py::arg("i_perm") = 2, \ + py::arg("o_perm") = 0, \ + py::arg("sink") = std::nullopt, \ + py::arg("out") = std::nullopt); + #define MHA_FWD_ASM_PYBIND \ m.def("fmha_v3_fwd", \ &aiter::torch_itfs::fmha_v3_fwd, \ diff --git a/csrc/include/torch/fmha_fwd_f16.h b/csrc/include/torch/fmha_fwd_f16.h new file mode 100644 index 0000000000..582b05b648 --- /dev/null +++ b/csrc/include/torch/fmha_fwd_f16.h @@ -0,0 +1,38 @@ +#pragma once +// SPDX-License-Identifier: MIT +// Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. +#include + +namespace aiter { +namespace torch_itfs { + +// ASM FMHA forward (BF16, gfx1250). +// +// Layout conventions (i_perm / o_perm): +// 0 = bshd [batch, seq, head, dim] +// 1 = bhsd [batch, head, seq, dim] +// 2 = sbhd [seq, batch,head, dim] (defaults) +// +// q/k/v shapes are fully determined by i_perm: +// i_perm=2: q [s,b,hq,d], k [s,b,hk,d], v [s,b,hk,d_v] +// i_perm=1: q [b,hq,s,d], k [b,hk,s,d], v [b,hk,s,d_v] +// i_perm=0: q [b,s,hq,d], k [b,s,hk,d], v [b,s,hk,d_v] +// +// out shape is determined by o_perm (default 0 → bshd [b,s,hq,d_v]). +// +// sink: optional per-head f32 tensor [q_head_num], post-scale AITER convention. +// Internally converted to pre-scale: sink_raw = sink_user * sqrt(qk_head_dim). +std::vector fmha_fwd_f16( + at::Tensor& q, + const at::Tensor& k, + const at::Tensor& v, + float softmax_scale, + bool is_causal, + bool return_lse, + int i_perm = 2, + int o_perm = 0, + std::optional sink_ = std::nullopt, + std::optional out_ = std::nullopt); + +} // namespace torch_itfs +} // namespace aiter diff --git a/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu b/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu new file mode 100644 index 0000000000..9daf019cb6 --- /dev/null +++ b/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu @@ -0,0 +1,409 @@ +// SPDX-License-Identifier: MIT +// Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. +// +// ASM FMHA forward (BF16, gfx1250 / MI4xx) — ported from poc_kl/mi400/fmha_fwd_f16. +// +// Layout convention (i_perm / o_perm): +// 0 = bshd [batch, seq, head, dim] +// 1 = bhsd [batch, head, seq, dim]sm_ +// 2 = sbhd [seq, batch,head, dim] ← default input (i_perm=2) +// ← default output (o_perm=0 → bshd) +// +// sink convention (AITER / CK-Tile post-scale): +// The user passes sink in the same domain as Q*K^T * softmax_scale (post-scale). +// The kernel expects pre-scale raw logits. This file converts: +// sink_raw = sink_user * sqrt(qk_head_dim) +#include +#include +#include +#include +#include +#include + +#include "aiter_hip_common.h" +#include "asm_fmha_fwd_f16_configs.hpp" + +namespace aiter { +namespace torch_itfs { + +// Kernel argument block (ABI = FmhaFwdKernelArgsBase in fmha_fwd_f16.cpp). +// kernarg_size = 528 B (33 slots × 16 B, including ptr_SINK at the end). +struct __attribute__((packed)) KernelArgs +{ + void* ptr_O; p2 _padO; + void* ptr_Q; p2 _padQ; + void* ptr_K; p2 _padK; + void* ptr_V; p2 _padV; + void* ptr_LSE; p2 _padLSE; + float scalar_f; p3 _padSc; + int q_seq_len; p3 _p0; + int stride_q_seq; p3 _p1; + int stride_q_tg; p3 _p2; + int stride_q_head; p3 _p3; + int stride_q_batch; p3 _p4; + int gqa; p3 _p5; + int stride_k_seq; p3 _p6; + int stride_k_head; p3 _p7; + int stride_k_batch; p3 _p8; + int opt; p3 _p9; + int lse; p3 _p10; + int kv_seq_len; p3 _p11; + int qk_head_dim; p3 _p12; + int v_head_dim; p3 _p13; + int q_head_num; p3 _p14; + int stride_v_seq; p3 _p15; + int stride_v_head; p3 _p16; + int stride_v_batch; p3 _p17; + int stride_o_seq; p3 _p18; + int stride_o_head; p3 _p19; + int stride_o_batch; p3 _p20; + void* ptr_QSeq; p2 _padQSeq; + void* ptr_KSeq; p2 _padKSeq; + int stride_lse_head;p3 _p21; + void* ptr_QSeqPad; p2 _padQSeqPad; + void* ptr_KSeqPad; p2 _padKSeqPad; + // per-Q-head f32 sink logits (pre-scale raw domain). + // D64 `_rxy_sink` kernels: ENABLE_SINK reads this at UCONST offset 0x200. + // D128 `_rxy` kernels: slot must exist for kernarg_size=528 but is unused. + void* ptr_SINK; p2 _padSINK; +}; + +// ---- helpers --------------------------------------------------------------- + +static std::string get_heuristic_kernel_fmha_fwd_f16(const std::string& dtype, + int hdim_q, + int hdim_v, + int mask_flag, + int border_flag, + const std::string& arch_id, + CFG* cfgs) +{ + for (const auto& el : *cfgs) + { + if (el.first.find(arch_id) != 0) continue; + const auto& cfg = el.second; + if (cfg.dtype != dtype) continue; + if (cfg.hdim_q != hdim_q) continue; + if (cfg.hdim_v != hdim_v) continue; + if (cfg.mask != mask_flag) continue; + if (cfg.border != border_flag) continue; + return el.first; + } + TORCH_CHECK(false, + "fmha_fwd_f16_asm: no kernel for dtype=", dtype, + " hdim_q=", hdim_q, " hdim_v=", hdim_v, + " mask=", mask_flag, " border=", border_flag, + " arch=", arch_id); + return ""; +} + +// Extract logical dimensions from tensor shape given the perm code. +// perm: 0=bshd [b,s,h,d], 1=bhsd [b,h,s,d], 2=sbhd [s,b,h,d] +static void dims_from_perm(const at::Tensor& t, int perm, + int& batch, int& heads, int& seqlen, int& dim) +{ + switch (perm) { + case 0: // bshd + batch = t.size(0); seqlen = t.size(1); heads = t.size(2); dim = t.size(3); + break; + case 1: // bhsd + batch = t.size(0); heads = t.size(1); seqlen = t.size(2); dim = t.size(3); + break; + default: // sbhd + seqlen = t.size(0); batch = t.size(1); heads = t.size(2); dim = t.size(3); + break; + } +} + +// Stride (in bytes) of tensor t along its [batch, head, seq] logical dimensions +// given perm (the physical dimension ordering stored in t.shape). +static void strides_from_perm(const at::Tensor& t, int perm, int elem_size, + int& s_batch, int& s_head, int& s_seq) +{ + switch (perm) { + case 0: // bshd: dim0=b, dim1=s, dim2=h, dim3=d + s_batch = (int)t.stride(0) * elem_size; + s_seq = (int)t.stride(1) * elem_size; + s_head = (int)t.stride(2) * elem_size; + break; + case 1: // bhsd: dim0=b, dim1=h, dim2=s, dim3=d + s_batch = (int)t.stride(0) * elem_size; + s_head = (int)t.stride(1) * elem_size; + s_seq = (int)t.stride(2) * elem_size; + break; + default: // sbhd: dim0=s, dim1=b, dim2=h, dim3=d + s_seq = (int)t.stride(0) * elem_size; + s_batch = (int)t.stride(1) * elem_size; + s_head = (int)t.stride(2) * elem_size; + break; + } +} + +// Build the expected shape vector for a tensor given logical dims and perm. +static std::vector shape_from_perm(int perm, + int batch, int heads, + int seqlen, int dim) +{ + switch (perm) { + case 0: return {batch, seqlen, heads, dim}; // bshd + case 1: return {batch, heads, seqlen, dim}; // bhsd + default:return {seqlen, batch, heads, dim}; // sbhd + } +} + +// ---- main entry ------------------------------------------------------------ + +// q/k/v layouts are determined by i_perm (default sbhd=2). +// Output layout is determined by o_perm (default bshd=0). +// sink: optional [q_head_num] fp32 tensor in AITER post-scale convention. +// Internally converted to pre-scale: sink_raw = sink_user * sqrt(qk_head_dim). +std::vector fmha_fwd_f16(at::Tensor& q, + const at::Tensor& k, + const at::Tensor& v, + float softmax_scale, + bool is_causal, + bool return_lse, + int i_perm, + int o_perm, + std::optional sink_, + std::optional out_) +{ + // ---- basic validation -------------------------------------------------- + TORCH_CHECK(q.dim() == 4 && k.dim() == 4 && v.dim() == 4, + "fmha_fwd_f16_asm: q/k/v must be 4-D tensors"); + TORCH_CHECK(q.is_contiguous() && k.is_contiguous() && v.is_contiguous(), + "fmha_fwd_f16_asm: q/k/v must be contiguous " + "(physical layout must match i_perm=", i_perm, ")"); + TORCH_CHECK(i_perm >= 0 && i_perm <= 2, "i_perm must be 0, 1, or 2"); + TORCH_CHECK(o_perm >= 0 && o_perm <= 2, "o_perm must be 0, 1, or 2"); + TORCH_CHECK(q.scalar_type() == at::kBFloat16, + "fmha_fwd_f16_asm: only bf16 is supported"); + TORCH_CHECK(k.scalar_type() == at::kBFloat16 && v.scalar_type() == at::kBFloat16, + "fmha_fwd_f16_asm: k/v must also be bf16"); + + // ---- dimension extraction ---------------------------------------------- + int batch, q_head_num, q_seq_len, qk_head_dim; + dims_from_perm(q, i_perm, batch, q_head_num, q_seq_len, qk_head_dim); + + int kv_batch, kv_head_num, kv_seq_len, kv_head_dim_check; + dims_from_perm(k, i_perm, kv_batch, kv_head_num, kv_seq_len, kv_head_dim_check); + + int v_batch, v_heads_check, v_seq_check, v_head_dim; + dims_from_perm(v, i_perm, v_batch, v_heads_check, v_seq_check, v_head_dim); + + TORCH_CHECK(kv_batch == batch, "k batch mismatch"); + TORCH_CHECK(v_batch == batch, "v batch mismatch"); + TORCH_CHECK(kv_head_dim_check == qk_head_dim, "k head_dim mismatch"); + TORCH_CHECK(v_heads_check == kv_head_num, "v head_num mismatch with k"); + TORCH_CHECK(v_seq_check == kv_seq_len, "v seq_len mismatch with k"); + TORCH_CHECK(q_head_num % kv_head_num == 0, "q_head_num must be a multiple of kv_head_num"); + TORCH_CHECK(qk_head_dim == 64 || qk_head_dim == 128, + "fmha_fwd_f16_asm: only head_dim 64 or 128 supported, got ", qk_head_dim); + TORCH_CHECK(v_head_dim == qk_head_dim, + "fmha_fwd_f16_asm: v_head_dim must equal qk_head_dim"); + + const int gqa = q_head_num / kv_head_num; + const int mask_flag = is_causal ? 1 : 0; + + // ---- stride extraction (in bytes) from tensor's actual strides -------- + const int elem_size = q.element_size(); // 2 for bf16 + + int stride_q_batch, stride_q_head, stride_q_seq; + strides_from_perm(q, i_perm, elem_size, stride_q_batch, stride_q_head, stride_q_seq); + + int stride_k_batch, stride_k_head, stride_k_seq; + strides_from_perm(k, i_perm, elem_size, stride_k_batch, stride_k_head, stride_k_seq); + + int stride_v_batch, stride_v_head, stride_v_seq; + strides_from_perm(v, i_perm, elem_size, stride_v_batch, stride_v_head, stride_v_seq); + + const int sub_Q = 128; // ts_qo: Q-tile size used by all kernels + const int stride_q_tg = sub_Q * stride_q_seq; + const int stride_lse_head = q_seq_len * (int)sizeof(float); // fixed layout + + // ---- output allocation ------------------------------------------------- + at::Tensor out; + if (out_.has_value()) + { + out = out_.value(); + auto expected = shape_from_perm(o_perm, batch, q_head_num, q_seq_len, v_head_dim); + TORCH_CHECK(out.sizes() == at::IntArrayRef(expected), + "fmha_fwd_f16_asm: pre-allocated out shape mismatch"); + TORCH_CHECK(out.is_contiguous() && out.scalar_type() == q.scalar_type(), + "fmha_fwd_f16_asm: out must be contiguous bf16"); + } + else + { + auto shape = shape_from_perm(o_perm, batch, q_head_num, q_seq_len, v_head_dim); + out = at::empty(at::IntArrayRef(shape), q.options()); + } + + int stride_o_batch, stride_o_head, stride_o_seq; + strides_from_perm(out, o_perm, elem_size, stride_o_batch, stride_o_head, stride_o_seq); + + // ---- LSE allocation (fixed layout [batch, q_head_num, q_seq_len] fp32) - + // Always allocate even when not returned: the kernel may access ptr_LSE. + at::Tensor lse = at::empty({batch, q_head_num, q_seq_len}, + q.options().dtype(at::kFloat)); + + // ---- sink buffer ------------------------------------------------------- + // D64 `_rxy_sink` kernels (ENABLE_SINK=1): ptr_SINK is actively read. + // Sink must be provided for D64; passing a zero buffer silently passes + // logit=0 through the sink path (which still exercises the code path but + // is numerically equivalent to a very negative logit after max-subtraction). + // We therefore REQUIRE an explicit sink for D64 so callers are aware. + // + // D128 `_rxy` kernels (ENABLE_SINK=0): ptr_SINK is compiled out; the slot + // must still be a valid non-null pointer, but values are irrelevant. + // Zeros are used when no sink is supplied for D128. + // + // sink_ is in AITER post-scale convention (same domain as Q·K^T * scale). + // Convert to pre-scale for kernel: sink_raw = sink_user * sqrt(qk_head_dim). + at::Tensor sink; + if (sink_.has_value()) + { + TORCH_CHECK(sink_.value().dim() == 1 && sink_.value().size(0) == q_head_num, + "fmha_fwd_f16_asm: sink must be 1-D with size q_head_num (", q_head_num, ")"); + TORCH_CHECK(sink_.value().scalar_type() == at::kFloat, + "fmha_fwd_f16_asm: sink must be fp32"); + // AITER post-scale → pre-scale: multiply by sqrt(qk_head_dim) + float pre_scale = std::sqrt(static_cast(qk_head_dim)); + sink = (sink_.value() * pre_scale).contiguous(); + } + else if (qk_head_dim == 64) + { + // D64 _rxy_sink kernels always compute the sink path (ENABLE_SINK=1). + // Require an explicit sink so callers know it is active. + TORCH_CHECK(false, + "fmha_fwd_f16_asm: D64 (_rxy_sink) kernels require an explicit `sink` " + "tensor of shape [q_head_num]=", q_head_num, " fp32 (AITER post-scale " + "convention). Pass `sink=torch.zeros(q_head_num, dtype=torch.float32)` " + "if you want a zero-logit sink."); + } + else + { + // D128 _rxy kernels: ENABLE_SINK=0, ptr_SINK is ignored by the kernel. + sink = at::zeros({q_head_num}, q.options().dtype(at::kFloat)); + } + + // ---- kernel args ------------------------------------------------------- + KernelArgs args = {}; + args.ptr_O = out.data_ptr(); + args.ptr_Q = q.data_ptr(); + args.ptr_K = k.data_ptr(); + args.ptr_V = v.data_ptr(); + args.ptr_LSE = lse.data_ptr(); + args.scalar_f = softmax_scale; + args.q_seq_len = q_seq_len; + args.stride_q_seq = stride_q_seq; + args.stride_q_tg = stride_q_tg; + args.stride_q_head = stride_q_head; + args.stride_q_batch = stride_q_batch; + args.gqa = gqa; + args.stride_k_seq = stride_k_seq; + args.stride_k_head = stride_k_head; + args.stride_k_batch = stride_k_batch; + args.opt = 0; + args.lse = return_lse ? 1 : 0; + args.kv_seq_len = kv_seq_len; + args.qk_head_dim = qk_head_dim; + args.v_head_dim = v_head_dim; + args.q_head_num = q_head_num; + args.stride_v_seq = stride_v_seq; + args.stride_v_head = stride_v_head; + args.stride_v_batch = stride_v_batch; + args.stride_o_seq = stride_o_seq; + args.stride_o_head = stride_o_head; + args.stride_o_batch = stride_o_batch; + args.ptr_QSeq = nullptr; + args.ptr_KSeq = nullptr; + args.stride_lse_head = stride_lse_head; + args.ptr_QSeqPad = nullptr; + args.ptr_KSeqPad = nullptr; + args.ptr_SINK = sink.data_ptr(); + + size_t arg_size = sizeof(args); + + // ---- kernel selection -------------------------------------------------- + // border_flag: automatically detected from seq-len alignment. + // q_seq_len must be a multiple of sub_Q (128) and + // kv_seq_len a multiple of 256 for the non-border variants. + const int border_flag = ((q_seq_len % 128) != 0 || (kv_seq_len % 256) != 0) ? 1 : 0; + + const std::string dtype = "bf16"; + const std::string arch_id = get_gpu_arch(); + CFG* cfg_map = &cfg_fmha_fwd_f16; + static SynchronizedCache impl_ptr_map; + + const std::string kernel_key = get_heuristic_kernel_fmha_fwd_f16( + dtype, qk_head_dim, v_head_dim, mask_flag, border_flag, arch_id, cfg_map); + auto it = cfg_map->find(kernel_key); + TORCH_CHECK(it != cfg_map->end(), + "fmha_fwd_f16_asm: kernel not found in CFG: ", kernel_key); + + const char* name = it->second.knl_name.c_str(); + const char* co_name = it->second.co_name.c_str(); + AiterAsmKernel* impl_ptr = &impl_ptr_map.get_or_create( + name, [&]() { return AiterAsmKernel(name, co_name); }); + + // ---- launch ------------------------------------------------------------ + const int wv_tg = 4; + const int bdx = (wv_tg == 4) ? 128 : 256; + const int gdx = (q_seq_len + sub_Q - 1) / sub_Q; // Q-tile count + const int gdy = q_head_num; + const int gdz = batch; + + // All _rxy kernels use remap_xy=1: swap gdx↔gdy at launch so that + // bid.x indexes heads and bid.y indexes Q-tiles. + auto stream = at::hip::getCurrentHIPStream().stream(); + + // ---- DEBUG DUMP ------------------------------------------------------- + fprintf(stderr, + "\n[fmha_fwd_f16 DEBUG] kernel_key=%s co=%s arg_size=%zu\n" + " KernelArgs:\n" + " ptr_O=%p ptr_Q=%p ptr_K=%p ptr_V=%p ptr_LSE=%p\n" + " scalar_f=%g\n" + " q_seq_len=%d kv_seq_len=%d q_head_num=%d gqa=%d\n" + " qk_head_dim=%d v_head_dim=%d opt=%d lse=%d\n" + " stride_q_seq=%d stride_q_tg=%d stride_q_head=%d stride_q_batch=%d\n" + " stride_k_seq=%d stride_k_head=%d stride_k_batch=%d\n" + " stride_v_seq=%d stride_v_head=%d stride_v_batch=%d\n" + " stride_o_seq=%d stride_o_head=%d stride_o_batch=%d\n" + " ptr_QSeq=%p ptr_KSeq=%p stride_lse_head=%d\n" + " ptr_QSeqPad=%p ptr_KSeqPad=%p ptr_SINK=%p\n" + " Launch dims (after rxy swap): gdx(head)=%d gdy(Qtile)=%d gdz(batch)=%d\n" + " bdx=%d bdy=1 bdz=1\n" + " Pre-swap: gdx(Qtile)=%d gdy(head)=%d gdz(batch)=%d\n", + kernel_key.c_str(), co_name, arg_size, + args.ptr_O, args.ptr_Q, args.ptr_K, args.ptr_V, args.ptr_LSE, + args.scalar_f, + args.q_seq_len, args.kv_seq_len, args.q_head_num, args.gqa, + args.qk_head_dim, args.v_head_dim, args.opt, args.lse, + args.stride_q_seq, args.stride_q_tg, args.stride_q_head, args.stride_q_batch, + args.stride_k_seq, args.stride_k_head, args.stride_k_batch, + args.stride_v_seq, args.stride_v_head, args.stride_v_batch, + args.stride_o_seq, args.stride_o_head, args.stride_o_batch, + args.ptr_QSeq, args.ptr_KSeq, args.stride_lse_head, + args.ptr_QSeqPad, args.ptr_KSeqPad, args.ptr_SINK, + gdy, gdx, gdz, bdx, + gdx, gdy, gdz); + fflush(stderr); + + impl_ptr->launch_kernel({&args, + &arg_size, + gdy, // launch_gdx = head count (swapped) + gdx, // launch_gdy = Q-tile count (swapped) + gdz, + bdx, + 1, + 1, + stream}); + + std::vector ret; + ret.push_back(out); + if (return_lse) ret.push_back(lse); + return ret; +} + +} // namespace torch_itfs +} // namespace aiter diff --git a/csrc/pybind/fmha_fwd_f16_asm_pybind.cu b/csrc/pybind/fmha_fwd_f16_asm_pybind.cu new file mode 100644 index 0000000000..62257c87ac --- /dev/null +++ b/csrc/pybind/fmha_fwd_f16_asm_pybind.cu @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: MIT +// Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. +#include "rocm_ops.hpp" +#include "torch/fmha_fwd_f16.h" + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) +{ + FMHA_FWD_F16_ASM_PYBIND; +} diff --git a/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_brd_rxy.co b/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_brd_rxy.co new file mode 100755 index 0000000000000000000000000000000000000000..03ea794cb2e8d2148c05bdcd24be103a0c012d4b GIT binary patch literal 68776 zcmeHw3wTu3wg1VPGb37C0j)+DU_eBkA%yTo7@h(L5s-(dH3<+N@-zfQj3*Nah)69e zVrsE7TC9hZ{rpJjTRM^+os<3HuiO?SG|fU^Z)(!I(s-X2_fKZ`E2F; z!a8g3wbxl|owe3JbIw_NmyVk}-pt4dTZa7e8Ffa6drS~^k1JbA>qI}>1{j}GhH*UKnf~Lslszdh|E0mh z7fQ2++VfTOM*d(G6DhOyz9f@Sj;tt!Y`QdqRSaOtZ1?|UMvplIQ$qekaV z&tw(MD_&i&G_PRZyuwwhb{7>E%+I^MXmN2NdgjkBLR;Rdl?C$(zrAqjin#?#pU7HX zu&nTjmBmGQ*FBlFV!?t{g~i4bS*sSWD{S?htThEoR~P0jS-gDy(?Xs%clCk=g+=M7 zc}ksT#qyvxNSZXPKqXl6}T!Y2k%=k{4$3_2cx3!lymaM!BMu ztL7CfEhx%cphr%Xk(T$OR?fX6PhDc&SbWFI!f!3^pW`z5&TVTDUx~K=a8z z3qH@RDlS?)zc5eOC0VUJ3sz4jSrsolDb%6JS+M$UlGP#xNm8ss+s=YnRg&4C4U46#3Qw|lIm0*$9($IoX&fpqXS6;GR!`sNntA@>WnQsDXTfXF8o6R=tg_FB z)osFR`RWv@!e_xsyQY{d>6$NROg#%`+BaQIJ>2rof}3{EHR;aza>mNDV5c=*ks5?A zXGG3|nbvegS`@yVvF$9lX-!u&)--k|?9T31ppx_2wCf9R)6>e!8C7!D|MR;Jbt*gM zRPocE%AEk8b+bA%L~81LIivP(Z>8^OD@`q!f6`EOr&H|h0XJJl98%+{?`+v{0B zeD;4T0_8rm^N;^X+en17H@b25Lvg*7M5p^t{hBsfYoYb;>L!LAyT~v`8W+aPLDNeA zfNX;9v($8cI8v37$hbF=fYf~rK0Wxz=wrMOH~7fNGTsjXMgX%3(H{m3`;7Oe5^rt1 zpHIAv@%~Ccqpk6Ngzdn$0mcDi#6e$0i2gX+0c!z`i;ef|2*KwVU>Fb%^idqJHDKg& zoDVPzx~aqg^Vtu$641B==VL$MHueL?h?75Hal(A?a^x*D5;zbv9y^c`h&)ICG_EP% zA2l}ZHzOAHy->l{E)2Oa+l65lPIY0v3s<@@;=*k%jJdFiFmT|)wDU&vdH1<=hg^8n zg?}{gG~#Cm+O>*2CoUN|U>KVYL=9u$YGcFxRf$&WTCGZ)|6pqv&S#YGZ-sWt*u4Mz zYpflu#`||zek)?-=0+j|2CUCDcI+QuJiOmGA%91{+lHqyil;9T*b#vNMhD8vtb}zB zkH=XS+BWVVkQjjTBr>e84=f)raKKjU?tvTqUmsLHaL~Z5{<{Zl9CTpNy@L{iwi;o^ zd;8anP8oo`-!ShrjB+E8<1@bSny)k3&a)<&F>8l4&e~xbSH5PRk8^CZc7&nRapH*# zgXfKeT^#){MCACm+h@G6=|IE`IMItXA1I0Y54Ktq3DY(_zL3WuId0A4upD2)<9s~R z!FeL)hoyR~f%fBu&{#YU$uW5zcgK?Fag1F*>g>nn0AnCF>dXB@XuZMY5{kfQ<@OfIupP5ROWJ4(KEL0}4b znCDxj|GW##GAp3kVf_Opa37%GzZrZZ2hN{t?HFsjel&P+Br;^k`tz?t+Z4BL=+H=H z*s%5V@5|udTHeH%@+}7PqKRv7lx1KxnRuAjJ7Qko@cLQwk8iLa@FBzCH3=9e_74qT z_GerY4mAe5$GpDJqc39o0)L11$0p6(5xaKQj>M$dI}8MI;@TS&gd+DQA`2^wY|}_A zY#%XN!{^GZjaI_H!{`E@@NuIno|!IO7zwm*A1O0-g5RU~`{w?H5#5i|L>4MMhPKD> z_c;E(g}+_+djfyo#^00sJ107;xa*SW0uN6lfN#avj4qKbTdzo5q51$-Th~NaMbkCb zbt_|O68QKNenrzQ(M|Pr^LN|2y=`oNC=o*Y$P<7V8_p0sK}cSKGNTgbe+qxQA@6sT zyzLdf2W?N|?-~4k7k^dwdlrA+!{7J!_eu0o=gUfDsq+C>vSmfGw)Rc*RegY}Ejy8| zXtHD3O5SV1N6VX&$WeVc{v61=XMgWRZzV6Fl2`CVhTwQ04*y}kiWqN0-lO|@ts=&+ zaQwCX!**AXOc>ktx6YZdCSlyazv4$nPb7>7kh?CtFkw8n-|`z731d5SMCLIn_7lWR z|Bjd$*fGZc^|CR6N6N-*zGdSrn{O!}W4sAH-`al>{OXIQ92K6|ErGA|AJ%Av{a0J({Hsl{loik>^FVAOF5R1V+r-x z$k2FcWavTs)!^?C{%Y}e7=K6bcNBkh`1=k1>hbqS{2jyJ`}q3^e+{KdwxLxx-iyC# z{O!Zv8S;c|kx$7(+2m95&^FOf&j*|b9z`x-jz?G`SIM04=y^kJ>i-b(|D))`ji<{0 zNgT2KJnT*UA@&1joLk5>=JRNOX=v#fldxyd{*I9$iB9X8 zqpWRR%4dP59cgBqq#5e(85x%7<t|nI))M)zG-53~$KP45 z4CAE(Cv714%P<<=e$+5xA%VNX0xRafJS6a7SYS=Qz(Wy%wK0K*;{uP= z2|TLofn(UCIwWvkSYSL~;K7K%nwY>tae=jU0uL*D$njb8Q1a9o+O>A)py&oJ9PRVg$TI5wV=)m$9{K;{*9mmr@arNpvJ*eqp&_&b7z+cl-{#h=4CFGnv>!h3~W(yy- zCr+Gfp9OnTo+|Ls^psQ6vt84(o%Zhy`nYTVTpU-s_UE;TyY?qdwQGMK*L41#vOnqL zonKYc+~UDSR6A~bG~3M((?;x^!&o( z%Fgep{6_j%=e8X*eI;@c^PBK|PvtirS7p4X@*C+ZGun3G{6hX!$aTzDiYI88-*{Zz z<|KVpo0Ig_;IHYcK*Rh-K9rgHieq`qe5Kmm{HEI7{HF9mPS*4>=%VRk;LrR<`WW~# zzmdKYax!1By%KUVU+H${E8Wig#`Y@k(e#v4)3aUEvzGXWEmt((?;x z^!&o(n$9^LIKPuV-g&yRKj~{=E81Up=5*lv&g0sQoDQ6?NnevOUD==XwYV0vr{W13 z+MmZq+MJ}XZF7?T2>5IITF}t`S@>{%nuKXEc2rx@h`1_|yKR zkApw$Px>0jNqe%r26ED#x}Ema?X*AJYr#j;Q%+6Kc1_Rr^xrbwdzX~V>bd*wUXfnw zvl7p!-!gcfv86qK%S_<>@>HYWGR=t|X5Xj`V^o;OSHAHv=*ErObl|U~ea{Elk4i~fKF*~@|K{?>{WY6U@_MP}B(I;p z)R@-z`KAMwG3s#S0@DZ@8LwGG+8V^mfOjB1Oz`!i=a^RW7`w6}g_~pd=3%&_>JK_TbzZ-aa;)B{6TVT_3EyK^ZoO>CxKWxMG=?dFe z6UA2d1HY2EE%*b#yAi)u@CSikMf^I!w*&7^e45}70q;Tldck)9?@4^7;12`uMSS*2 zf7piWL!WGLee!?6+Gcwub@#n zLqW52rh-=KECmClvjL3@Px@3Yukm8J=Jcrn#Fq$8pBhMfnc(!PLBv-GPM;b~{5HYq zQ$vWa5}ZCYl=y1F=~Kgq-=6AIDSi#xrua4R6u$D_={pBm0@PzZA zf=20f1BE3tpBl?+e2-jn`qVh$_XK-%;@7z5DSiz+#jk;<_%-kpzXqP-*T7T!8o2gr zuTPyT8N5Dqu4K?Y6<0oT+<9KX2ImC@PdGnO&?r5qpjrA;1+CJ5Q!r5aGeECTP3ATJ zx?FSm)V0LFAvk?%3h_q;r%zo+JSsSSYAW%^1gB3;BmOPH=~L5*KOs1M>U!c&rutNh zU*npm_%-kpzXqP-*T7T!8hDCd15fd5;M%XfK6S2S@cPubl0o|vUpzBUy`o@)^QwX; zoL?wtlpa>lEPYKutMqjR1Ep^OdVOjJukllI&FND!iGN3M`qV7qPYX_;noaz>g43sN zApWf2^r<<-zb`m_>PF%}5S%`B6Y(FW`c#Tv@VHSiR_2A<;Az*GDhc#2;GPw{Ku z+ONGnb*^Oa`qa6SK|jADEHQe%9vrsLx(l$Y{RlqT>G^u^n|@@K1LiJy%i0Jy{LbUn zCcv@xw)1ZVocxWU{;vYg_|A=fj_)+;JwL}^HSI$`$M-Eb?&tVHx3vv${K&FP0~~+N z{p|uAf8Ap_0gj*j?7#rWcOEC>d(4*c{R(CLklSVasEsmy;=?k2+7mK<&JSh$6*J#c z=k34xxS!`6^U$U0JlFpqC%|{)#b~>v*NxU{K#b>e{>yH zT;h2?{j%hff1!O~Yx}?UKG}Bj>m{AX;(O0luYD7fK~&MMOZs{7AKp@a%JJo2 z_)~A%OxkXKy%^(;W84mmdswyKnDm`99jC9ynU2eae$l=o`|Qu%bl~B>efyN`2ky%% zJy8N2(7?Nmh?(o`M%+fsuFgzccftgGoRII2qQ2p!)ISP6+S$4IzE+Mcqx&J)B(t4w zpwq5jc5BA|!}z|CIzJ5Bwsu$N6Zg0Qw6H^FTVFS)ZRb#H#!yRXse4Ol^)(HS?Q|&k zN~@h60XqVQ07HOX0K2@>dS_R_uE2x#V5b|7yWzNlJ=E!rZ6weUJ95Am9pJ>!XY>(X&3v_!2#j zMHyeBXML3MC3@CJX&2G6K1#cYp2woJi|AP&rCmhN`Y7!pde%p27t!-rly;%de-`DY zuYVThPJgfd`8UF+|Mptt1yG z`KGcP=U4~wIzQIL_uKN_Lu-7edo#W{;Wt$Jn?ruD!S8eCqn+O)8Tg*Z*e&0l;mz=& z;MHbuu)Wd=N!!7&z?z`3?2X_s(;8wIIh7dC?{)YMmVs}2@`dJ52hemJX<9?=VkaVP zwK0K*E$|MFGlRqIHO^BQPr3AWTXCT|(h=iBV@zwfz1FFdwxgZU*5w-0y4qgna16d5 z?x4P>Mn8O}`Z)Mr(#H&PUI|ItzOcafB^Y~2FVo^YlYu--o$<|ahndhhUuX_~88nyn zH7(9R5oxQ52|UyQ-kAf;Am^h>jHk}{PPxNO=o}ZC+DkD$v%hI^o~o0!!xN%oco-WAP1ihu!!_Dl9bde1Qie0&BcF_%Vih z;#=tsyYVesOlVSciA!5;oxsCh9rj>6^~Cqp9TNEd$`G0s+xcnS_p9p!nG^U1D__R9 z*v^(a9^Y|yNI+*Tt5-%Xx7W^ojHBN8=DP#GlQN)t0N1bUwPtXHUE+AZn+oCjbhS-u zr0qC+Twlib;T`xrl>wbQqrcmArWLj$j{3gph_~`C8gQM*A&eN_B<6GtI*U+dGF%A2FG3B!ME@o zc0)$5{9buKas3+K$#>Wd8NKp*<(=j7!?*GscHLx;5VyR ze&&3yjM{I#vU+9XH+(8KpkD&=cx6qI&GljWMxr^{y!_H+Q*n>+%o+SGy%fI9Z!y#J zcrn`fU8Bn5hh#49itE?=Ces>kZ*@Y_wks^Kl0KXTAHK%EPvvpO48P@6d0c4rT#4~n zd8ReN-sVK4ttuvPFMT^3zCF=?K;?1DrN94-3r%%5jL)8DT9fSUPMx&vqi^THw{3fe z!!f8skUWll&i$NU7?X_EkhJX#3yjgu-QdHFSLSi*jM@ds<3h8GbB~VMh_qG41n!}4 zbcb&EpTZ@oVizRGwDX3G%A^4xPJW zT<#Lnnqu#C+DBMx@zj3$^+5Rbb@n$^&gQuoSNty4Kz{6jF#}ha)>J#@=&XvwHOX9`4gY7XbAHn?&ACYNV+=uF##avbF}69s>1(3pLp_el^Iq8*(^^*MdGbT8 zj>_|1*~weW%A8O6P{*TkzE^g}s+N^GKgkby)hoMKR_6aCKlqndcJ_JYW$q_W)Bz=P zzn0%C?`+VKCu)R}xnIklBCnU9oBOr=UU@YyuiTsyys~nBD8V_^eg2>2UY@==9KJcz z-tB~>?O<47O>bPI;dh$WEPIc`-vZ?rtL{&DUo13-hGYDQGBY^ae#VJNTWw6>;XW8Y zqSUl*u&dM@!g$ud1>!<;WCX^KzQ+vCvA^fkN!!sZj0OKXz)Bp|d z=Xo7-;d6}LkhJX!3ygDK({XFS2RR<~9%>%R7n*~)7(ehfiQ|a0)x-oI;=HC~nDYqr zMvX{E{vL=6P3=I8A9{zxbe*&v=6t5(nDYnqMLkJ}-S{0~2u+LSehPji*Xq3v7vnl| z9wCl86g7|J3%?fY!FhywpmwDr@Aq4(2j>s!fjSqt&p$&wc%QHA54qKS{u#>6d4%Ip zPb2sFGXAvo=lnr^QG3&o_xNF4ukrVr!JF)VQTN@GkXPyZli~Zkx2KM%&FR4VdiwMf zTwD6))Vs_e$4-ZjbH12?`GWWK>RLc1-qSNr&vN3XH>!MTFGQTL?gigdlw_28UA{-}*ob4I#e>3VSPNb*NsrhT zrR%`CgFI1VrRI)wo%D0HbX_=?@EoYeQgcbVZt1$5@SX>Gc+j*AAo?o%qkYoDu8$)&IFD?{c;5f;w+QJ# zzb6-TlfQvyA%Ih&_)RV2s|eA*cOcrQeha^20jED{T70&|_)h*N5&CKsONiOQpqud> z)5@3e`-T9Y^8?+H^PMdJ9p)F85*HO(iGCoBYL&ok5NnayfIN!*1&FQG; z8{(+D>&RzH9M_a?Vc}Ov*pzOZcc>d`_SC$SFZ7LcV|!D&asHuhsPpT{=S&e9pQ0Pv zF!q$!iF1&;PSAyOP)z90_&RYeA|KQicH}dsI+3N3E^Kc~7tTqv1?myioHPvA@5UdR zmd|em>hS)$-SfWSJAl?teQHjk-l$jX$T{gZ?0?Z0{4c_neZfqBcA%ceulUq_q^=d@ z;(YXb@_5Y`{3H9{^acM+_#eLDF~VOXr@OJEVh}m~J<|Wy7kr=n@A`rt68^~-{D|-` zJ~bbazoN%{be!~Ru0X%a?SO~}#sK4;@yXmy`Bi)&w}0}%2j?BZxVF#wf^Pvf{J+u3g%~F0@Ho*n~D{ zfDJewkvD2IJMy^{ZPM1YNn6(@ZDAAIpaC|}a-)v3BY(5dCT(4tw1rLhEKBQ1oj5Nk z{;)mgrBmssb>h54`B4wrkbkxN+Rl{>x~}hB%HXZ@oDbVrFUs{|oo6BOmjq{>=K|s{3(h*vg~Wd@IO{wY5r0K+ z)_E=_{tLlb=Xopf*92#s=Mv&?ob(6QH_|5?T%Tl}Cw;QP^-0!w(kB~SpJbgUeX_yz zN!EGNCmUR!WSu8{vcdI9)_Kw=8(g1cohN;=0Y0hy;atj~{o!27Aa$N=QRlhNt@A8# z>pUH|&NJdjo#%Q->O9LGsq@_Ac(gQ2UY{NX-0Rb$fqQ*=3~=q!`Wy0dDTDro{9MW)bqQd9jWu&umtK9%CvxaKK-4LrrKfv5O2@D#rWp5oWQQ~VmZ_G_U|1&U3vZb)Myp)Ol`lymg-RsS;k}kL8-v zr|u&DiQx1phxlIwr%#mO9xEb)F?|ou}j0 zc}5(m^IY%XU4hC|Qs=qJ@z#0Lr^tuZO^&zDlRmYT*Z6X| z=Jctr5^pa!ed<2qc&o!aM4$Q^@lJx%r?wIAEI57Ye&SaMPM>;!_?3dwryeAJRjN;= z_%*J1ieCdy@oV5IehoauuYsrdHSiR_2Cn_u>r>}S2Cq+@D;cy;NuB39x6ZS~t@Cu; zI?sqBb)M^;*W5fMb)K6XZ=EN7YCErS54q;_sfUR75}ZD@gLogo=~E9A?<+Wcs)Bew z!Rb?9C*EIh`qU%D2MA7|`UdeqsXmqB*SO{>ehoauuYsrdHSiR_2A<;Az*GDhxb|zW zPn|0nygqfVWYFJN*WtNZY{>df`WvrcsHCJGZIweF?__jU?_J`h-$Cc8otvITpV1 zmPI*M{Q5nM)hW@zkj&jApdOmue#o_XBVs)&++tyf}e`-FMi`gORXdCuA@soyzRH* z|9AXvTR-1p(hf1$f!|j0++}___dS=j_w#)BUn%GN`k+CIp88foU()kj^Im(;BHvXX z9Je@r!4G@`B2f9W_+UJ@a{Gj`#J7woiFqAT_W|VhCZaH z-_QAh_&}i5dLN73IBV(Ed1}k>M}p+_4Oh> z`QP-@R~6qSzkN&Pi#z@QtK_=(%8M1fzCOEM{>{n%jO*W3^lz7cbMk)-dM*F%k54Z@ z>9zc%Kb`!f*YYP^{>{n%G5k^c6X{Q9e=R$&YjbwjGM`RfEvuG+IgHgeU#$(Xz3QJq1S2jIQ#Bndcair1m=L>xmA=*-8)bm0O z?g3O@7uwU>FaY1+b*jbO&v7x~eOepp{9lcEU*&CAwo})U^uSZ{L z)%6*7exRM0&)TB>;aBRfVxChmNqM~O&{NwX;ca*GN-g=LJ-z1sW6yaE0?pmEnb6)IddgH z3jIORxjxG8&f_w+h7i0Ci5=>r{GM3nSK3SENujM3n?MeHS6nCK4vT$`Mfn}E&abo+ zbEP5U8p+Rg${&*c#`42AwB7hkQ}SLM`CjEz8K0t0Sa{VCrt70~YYcgxx5^D z{5to>IR7JeE`^^hh41Ctac4P>m*cp=egW@*+1bqy2 zUfI2}Chv10AL+cZdu2`D-$Fj-S+DF~S(Ep*j1ACvW%tUOyq~3fp!3SkTV9Kt8YB`de48yu8NrL9g6iIknxq@_OadKBsa#_wsV*^!Q@<4fd1bURDl# z$ARxGwqHeUjB48z7FZcUUj%)(+J_xAw=90VQ@-@=S&zQ;@W&rO=4s$v57mZPs6 z@LOg7m$Mzm+d)@s z*C+FJ9P%;`??B%U^sTmk?^NKp0>^9YKPK~a4dnyfBj|es{BF1Ztn#(Q(y^$@?QTA- zg^bL@5=-?_=5~pl`Y2;n=j$Vok#rJ6^-;#E=zlEA*wp!&Hf0_bd(=l6o1%Yxl(DGu zHEl>bu}6KBF)8{Vi_)GtU(<%n!(Q3Fvg&+I8;+fXiM;AFe$a$cN8t1NPS2Z)JD=@Hs9rAgc^#C{js~KEtA9F&|b}%fk z=4O1WeDjY@>kj)p<&&3?FTO#hPv#5Fp|61EmIJ1>&VJvCNLzCMKGyx@|I`fLX@98d z)F>D0aJYwETxgHng7Nu3F|88&Bd1QAxC3NxNI zgN)ITv?cceoPn{dYhaABZh$&tt%{&pt3qfG&IAo>9T>Y2X-n=G$l8$`eqaU}yNns? z%surOGjXAMN!#JsXydo)jAhn1%QYwps^=Ndm$eF7W<$nQhXn5P%FMMY zIF|eM?Z!9sVWDZXzoAzL)(}un?(3)4s*o{itqQG6T-uWRAbNFR4FUD!UVv(?3ZZSW zomoGit`lTdHEc4z#dc=h0LOFBLA6$eE2~$=I%!L>GwTJYH`b~Mde^F$|FRi$?1q$D z=6qbA`7dEXYx{)5dN%6Iy$tnQ6$R*D@N?9(*hZ;$FCfOpBGj~%*=8y0+SIk-zJaJC zn2RxseqmbcZL5^Nek*F!HrRnu)(?=siZy(LKM!=bzF}JBcB@j>u91i8!}s>Av0I9| zpN;kf*c(9BQD8iB9^kk3iVwg0$G7%3fDhx5^S~S&Gwv9-tXWe$-TDCfT0ZIz7`Hcq zj`79#WNiTXVXcawUaMj*>Ju2BH-U~Z$2eq70QqrWM!ia{Ag{9buk2cUSucB}*N%I}qz-~W>z_jTm=|6cj|y+3(it%{&ttD-6S`JI1~2fmeO zt)G^kIi0wcQSXnV(^pls`H_~0JqAA<*o`Sr%rzG!1m`9qF&C4%MHa(s>gL%9+ z_%*dwMK0P^tqJou_RQmb`FvczrEicRa#nWyt*e2ev9o~9n$GgPfr(Lz1AR)w-ZW#H3f@-ab7Rt@FDyT2_JXQG}bzTc`&5C|!T3hYRQtw(s^zmZ&_*doi^}Ibmm1@)S8*%GtoNAl0Ova$@67k) z$9-URzF&zLV7zmV({auDNbzGFVLoDv6@ee)n{%ALE?PeBAIn_tm7Q^|Wo51>KkhBd zT9KhOJ#68Ppr zcF$7oJ&LyE9;i!kjY>W-tsQpnQq~!d#aMM8f_WrgXb+WOyp!PzK5S=|Mx-sdSL!m1 zM?r;EVP~s(gfXpu55$G`h=cL#&-VqtZs(NNN!!uo7#H#RtVirzHGfcM{tjSWu>l(7 zUA68*F?^2k8iP^RD@e#BoI0lKZoAZqqT$xrBOS zy$ZGNQCw(i*I@j*k0qw-r0p>0G#$sBL#QwJKUFovLepZoSwAe->b;<@A12OyQ#qHM zsUDn5s0a62RqIu>P!G-_)Pwu7s(bx2)Pwi>%KngB-RsNv7Tce53CDB)S9P!7Lb*AI zP+zQ9(Shq#L~y+}webbNVGmUI-dm7Y>HAyZ`@FxWj@(O@wZZi1`*3aPo7?_s208Wt z_&Dc`?U*xoU$3qO_iJ_6tJvZ4s{lWae+1)Kz#rD1=TqxdkiUu@+~2Q6d-)f9)@^{B zF7l{iSP%pdo;zo_N zpL^(XZXn;}dKIo5j61zvh2qT^f$UzrD2G=M{vJ?#VROzA>3XH>!MTFGxlb?Wigdlw z_28UA{@lx#b4I#e>3VSPNb*NsrhTrR%`CgFMxqz>qmzC;bdRT^G(J zJO}p==3J7lTe>c+4Njg1vZw3j)kX8=T%y;jn2d9&-x=HFI~A^1QH1Ni`C^}CpzU&g zqauBktI)pXGS|lu8=OaGU_9@C)Or;{zo!^s2tmlfO%ZUe>EX%&rC9_Fg_KU&imd1Ne@*~x&M~wJ)~ir+Or6lnItYka<_p?}?HIq8d4o2h&1nM($`2A&Np&hbCT;L5$8V9oNqX;Dc!=tuadAS-8k=1H|{a5e!~cTBi-_4 zd?VdB|4=vXORd(cknt(HMP%$LuM_7Wb)BG#TCYOr&-glVE+QZ9g{{`B5Lp`OQYZWy z>B2dQw&4ERoRjXv^?PKH@09CRP;c(9-IDbx)U|?KEnTmIyj2V$r?+Ii3i4OKu$OAf^5&lEoRiY8WBPUDoRs7Z*-xdT z)=BeD*NyX()=%s6SzoUr$l8%1Qg`|JzT~bQu!g~xVVA$h-snHyBKz%fy^2eC&K`e% z8NBONbfrH`6d69>m)x}ju1yDCn_+_gE%!#({_y#}?@MmwiJx+>`xL(h zp5oWQQ~VmZ_G_Pn|0nv`)MJK>_R@I1O36fKh&t<`yNvrD(d+PNn==+_=$-ebHW`F2c>s5^9_hXOlr~mgWls)W+ z+}<{jT(1Il;CJ#o_o$6>?uie#3-El?o{;m+`QboCuWIw4FX?%%D`vjuC*S_7kNY`( z%tM!|ao7JKN6}NiD(VM%#>1f7K2-E0%S4WA?r$65xa%IfG{AR>)TbKyke+^j`G8l| zp6Q*hdEHNc?=kZ&KhN85;d^R534I+|*FpY6Rtx`8cMJcC4+;NiPhJ|(bs#awPkQoi zKkik!*;zo~`Zw+B+%T-t{V8oGWt(X!tIlx^k_e)vjC-v?Istj7k5=^(vs3nlq$-SNRwI z)H^OTm4q0xhieOHetM3e?rM$@T3N3`=#?I;Az!g~y$a|~d$NX4^U|^z!lQzaGBVHd znpI;yFVljDS7svP?!)~pb^LtW9{75zbbuoIRswS?etxEuPqp|68I zRMxC`6Khro&5`ct>yEyT_Hc)7pg-CZZ9Q?0PWA{@>syX%?KKf<5 zXR_uw279SoDYUg>6UbrrcIsr@VX@D#=w)_RvgWxGc2YIZGOm&QhK$`8lK#f>!#C7i z2D^D{pDU2_RbG|xDf)zkR}EphK03d~ko!44UuaYGVH?J!=%e#%CFTH?Uu9e)eIhdU zl-EP&+bYb9D&Goyiu`fm-Q@MqdA9;{rOLZPn<9Um@Jx|k=ieCb0gyj?!#{e%_d<5q z$-;3Kjyv0uOJM2I6=i zj<2*QC2OE#ke4}eF!~0gubVyD8H(ef;CGchC0PSqN%=rG9DTz<*WI3~YM>>?>Z6%< zwyM))3|4Xe2Cc+cee`lW$B~$V|J%9A8t7`sNIHqFW6}2ZKu7eik1}R;4K!`aJS_I8 zk1}RO|N1CnGl=jp$(6k}*y;pXxtjT*_$VWP_>|R-u_qdRc zdDbhtSJvdcEn@?8UfI2}Chut}ALzWYGZ%a1P2SJa??LO8-z%@WkA-|*x%Kz1UU}7h zEadab?UhsIR>nJ+ZEBzBHP8H*8VtBeWUBe7E-Wh`>Mwhzb0g{DeuR3BxG)=Ar5u}yuHv6;L_ zhJ9527MhmJ?dI)T*vXr>t1*wLyes@#tcT9qan2{m+bz{Y=j|HIAI!JL8S0_)buH#$ zj?X_sxpls-#{A2C-BP)AzK-)g1>+{cpC-XK``WiSlW{y5zLjm~C-Zd;>_WOJ=$nGR ze)e2vDvqb(ILDr!%-6NB1L>xtZ#ww(w--1wa6AKax%Q%DzK%m)=HXfBn}xms_N~qh zIKBbL1MQ{Bd|gBNKzAehZUnzU_Hvc4C6uU9^GUk-bD<@Cy>?#t^xz;+L@lW+ZUn zLL6rVBF|;;A^0`r`=iFD{bs~^AJ~Nowsv92h1tf1a}ri00Rn5YwGlgz-#gF$IM$)r za^U$Xk^O2uFe1Pa=YXvD(Q93J*S7GkVc}h~!n-zwcTEb`8R<0!yz2^h*AU>j^tu73 zS|`A}EqU6ihw!fZz`7B=zJqtI1aEyl>xOi#`>AT&d9J2v+R4Ak8g{hn zdU~!0@L7?SvGG}vRlOU+Z)9&>KYdEq@q6p_z4iItI(+(+uD|!zO_PtVmG;&}laH>2 z_SQXDQk{xwVhezjV#Aw??@H?Yb7(TYJpGg)`^@XJO z)_$I@j*#(ns@hJ*#y^@)jGKQnopeljYdF(uHq&b}(`zv~KbW4H%k)~y=IY#72Xv}^ zm`}Iw^6Bc2nik|1imXQc(PtNU+^kI^5C_LNZVP=(&%My; zBlV8$olf3wvD9DeN<2d% zP0XdKbZPjV3i1e#)IH+e6FNz$sFN$9L4nf_Z@14o7)}=WD z8f~BE_-OkyN2Bf29F2-8yq8xw22kY~!7D?8@8P!w=udvj!S4)!R}&_4431Tf0fc`i zzr~0NzAG;M$?q=e1g|0lz2q2aOMWX65`>)~CTMjG@f|IFp6^;!j&Wm=?>==5@tvFEFc2FZ`_{Yb<2 zdOC)h+g)md}7yjsdN<51-YOhR@Dx&CzK4G)FVnA8Ynm zZ18F`IM`n41hK|MaF}Tgv5TAzIPN&ow1(Qn4u6w}#+kuk_8O-n+CyVZYq-7E>4f7h z*O=DT_Bw~p4!T}z21nQ>P6+K?ZPOZQJ5FaDce~EC!gj>r@0spc=V_F^-syt&?$?{v zXuDkT?}>Gu#@L&jE70C^wrP#EH!GgKv9`@Pdy9H^!QTg$^f7~czj6uaF6m`je4p}V z9ADbkwD>-SzqK<5m_fcjxfJc0{Y{JSPcFl8yFsSK_a*$TNuBw=Bopn_neRs~$1!!L zEqJaF)@`CK+M%5~(-!341?xD`7VXha{b>vG?25H(Xbaw-d{$&-j{2;~s(({=h24AK zWLo3xt;(me;8WMw_c>RhJuA<&CfM7QPi4cWCfW};-O!#r&$K4l+m%n{z^80`hjSI$ za~7J`WV=G~$pxQl?MIyMXwO|@T2t(uiqAmsxz7Hk(*x}TSD4mRJEr&y2A^s6<4#Ys z4_;+j)9qb~ekka#x4-T5Li^C$O^f@_@V=Mv#kgRM(B|FXUyKpPnvN0rS9kaqV}!A$ zV}$w4M+QkGBe2ihx(v>M5$?UznBqdAAOG* z%KVIH3Z-z4wJ;7wjr?m*54oO+iTB<*zYW}cpbSRnl@ zm;MHq{zlOE1^>}^n^rd9*n5$K0mpyMOkHo!>lho1cJgJ6(Qik?Z^?Tsc$3%o#mMb|6P96K1DpijjFZXkI($=H z`l&AcbkH*wFjg7+v^8U$w&l3wvsjleW#u}1jE#RZoftR&XgcYb;+ub6(JsxcKi1rOW9jQu{a-zc{b$jaamr`0|1A2d=K_ZNoCHukClS0d zB>0}N;8ld^-x`oCFZR(RaoKj}Zdj6&Jjc5cr-t!K(;?%X1QGt1zT( zXGriEA$f!auOuXoe8H;-$%EG}B5f6fXggy$ls&Llw0ceg2>p_KriTPS6qf$vUg-IP zA0Y(2Jd4G#dQJieIg7cuV9xfl3w$i2LzRvnN0QbyR&q-Vw?s2W2lYmA)2WXCueh$zajeZW$ z98K~$$wbkM`vR+H78Bj)Boj~ChkF#O=NnDXYWq~Xw&NbYcc$|$%lI!svHA5Yx{7IH^s-LbD4<1&zDVaw6l(%gou9!#3>Y-1~p&8mpSUVV-9i=6Tj66UX_gt@j(|y@rt=xL|Ub)oRkX3wD%@yC5)y zJk0Yg(|_ItW|OiPnzuEznLBT3;ftBZ6;y&6s~jz#oVN$^$zB7U)Neh(tz?T(5OAjrOk@ zoidC^jW4`rCNhlH_>j2VuyRb|oihw`*f2X`ltf9T6AAuVQ zr{W1?<9^clao%`D`t*6%8-x3$@MX|ZvK4Tnb;T81+3xm( zU+avniLQ#SYpmJdxXC<=K zxq(Ag%fNm=&FW5i>ee^WSB+D;XJv`*efw@@yW6k$Whb%~U3M&cYk5-lXOp_;Byv=L zjz0&w?}4uJy}1Q{%IusSFi|s}iJEZ(Yp!Sdvjgk5B}A0FOzF7TmRw9 z@khXq{>Nwh3Gn#;`iwsVHed~){{}pf;JRphhtTS^hhd}E*L_&pdge#?J6<|%)TRR; z!%m<4r2ahCIB~t}hG%`5?l)VRM~Oe@%j9!{hJU3VfUX@Ie(Fp2G0(p;o-)FoD_|^}7##?|#f9x~f2K?m#pK%nB zcKi=O+VPiwwBxS;X-C?bcBGwYN7|Woq@8I;+L?BwooPqfnRcX|X~!Mvy-;I2eS61A zJMI4V=XUx7ejhjGPoMnZq@5DjtK^^EpS<-o$M~(9QS$bqb%qfl{0-+)zc4;>MjhmH!(Ll{GvhcM>&2dAe=VBZ0Q^v&=Y0eL=t&~%~~Z9b4ag7mpZcH%z3 zE3*+kxz3h8?%unkgf#fv1%J`c|2{)D2=Maa-;pl~{DJ@@(}jK)&c8?wE^zU6F8ts` zVg(OFZolQWk9BbuO@fg)@*AzN4lAbhjEqmd^hF~B|A65S2e<{rKVL>C^CEx8j9x_k zdg+LtUHj;u?B@pbu6pQ${$2VmTVC9E<%)TEOP1y>D=b>*9`{|gI5#`H?}BBE3i1}* zK0j|kzkxN|RTzkzwmi1k_bj`9pD~nd#S~#yb3!S0{rl#z@P<#2j&jW={0rKj4>0&jU76_-=KK|3JS6d`xh4E44F5- zFgH7A!GOUFat0OlA2N4fcK_VM1q%vt2KAs(PW4Y+7qgWSxl$Q2Nk*)#?>xikoNQ7D z!|{I`176Tt+QR}2BWwAJ;zA>9{vFF#-LcHbTDW|5)}n${iwyUFwig!}Sw$=67Zev5 zS;d8GixqXjvc>bzT)4DoMc&GS`STYqUx@a?xvLlEEnU2v9Fxa+iUTaTtZ+F7<2=O$ za{(8jU5*4AXLj-O>~YhljGLS{e&&=hpJHg%lKDp7*f~>1T|04%HpdE$E$`DTtdbx&(+GsbsRGsKTIT6=wZ1nZ8q>+ z2yuJz{5rlh47r4Qyp~_XPU!b}#_N2dq0T2R8>NFjCcFxN)JyZ%{;FXZecpdzFM{K~ z_~Uq;?~-wV->du=d0$}ie-eBC4ZtzSQV0Ii@fGn4;Yw*%HsYbSpBq+KGg#X6_+xH- z9&^XH)``{SlXiNn?J4rR-ASOwr@LtS_&M(Q?Bqx}=pPWu1!?ViyoQTUG5+iBcsy@R z`KQNeScKl>wewI8K2Ip}-8T9u>9qZ{|I$CS?Oaj?D%|n-!I|<;pI0}QVMxj8AVh#a&va- zarXCl+LLeOiIXQH-Z+twm3c^voixr3h2qMQ|H4k46AF$g;=%Etky1J#!m>e5GyK1~ z^KGXH=ULb2HI|vcn32t(%l}wxuBi(e{V(2J3XH_X^na`e7aZq84IQUrBiSpsppjNR z`sxf1&fkRqcA&0je0?o~_IEW(+E3s<3R-s+X}Z(j7LOO~xz zFn`$##mnd4zUYOOYgWy>_Qm2Aix;n6w8nX%c=gh?i<3P{2eop+z4P=Co5D5su3Yr}rB{{&Mt*R|yw!{D zn73@v@_F-(ozkf?SUvu*c^2kOOIPGi!nZ=nvoK~_x}wo6d@Ho|EUcN9u4vlQ^qH7DyIX-uu4_}LFS^4l zD{qCWyoV!zd%XLn9sn?2^tXgsBO3bDuw=Y`0X5QkZ_bggCk2kCT zo4bzxrSFJwE1Xl9zijD}<;{NDq?}dWa+_Bd-#-7Id5f3LUo-FS6{~IwjPsm=GrTkY zVECwUeFw}Ncg^^b^Ty2{GjB}al0ozO&bWGBzuEJyEV=!je)CHD4_JOr-+4FBUp;TZ zs)h4b-E;3eZ|t2gR$GtXwtk<`s7?Uvuk= z#jEeVeZh)lAB_BZS1i75VRiAH%a<-*vFdi+s(Hto75Yi>ovRnEp11H`v~B6l^H#5! zfAei`;O;Ny1{(gu;=AYHwP^9G73g60Q|z_kCV?zdWmvHR(m ze<}`mA$NSi{*RddQQ!QvVFB$oSIt=VFV)$9pYWeWZ-#NqKb~TFlmEV6$2(uII>xPu zLoCp{X{WjAFSD;cf$rf89A|`cex@8c&CCY!1bW}3qI2WPs!%rca5f9Cdpn=v|DQX( zoKG?QpF73Qr!mMRWFJbLk3+`8&Zkp|n>(MD5?|>P%SLxMP;XCRwHCR^irka6fuA%-mFdB~NJPD@;m=OMT9JY<@f z_953~-6vXOeBETxzNquuzEDx}HOVtdcmvUwn5f$S5=cp#?)vNVt@ z1DOou)|LF0rpI!1_S`3$qhrHP`QBzISI*|W=U%P0Hyan(t|BU_`t?gD z`}bep&)L4Wzw^}I@c7d0r9qkFh|O@eXnPVJG^(h)Ol8%BJRYkQ$~NxppY4z9WJBt^ z1Iqgk=)XlhFkoZky93Jy3>>f}^1#521NRMlcwlzm7AMYl(Dq%YLkP0x`|iVzQ|=U% zgq?4_8}5j*bJRpPt+uPNYP;)P{;qp2uCY~ZkE2aTh_fMw*Ghm*bW^F2yq+Tqo&%mN0c4lpk?KNAWl& z$FzAQs7sqiPvv#_b9?%lS=+M{XK!~f$+Op7rzINla5lN5!s+8W*(Gh0PIGXsOl?%z$abd_Y=YxX zXRHECmL!YXwoR5fJ7Diw{C#h4)=BNfWs*xYPNVEO{5_Aq@8fSL{$9Y}pW^Suy&bb1 z^}Oqp?F5Erv%p(WH`FQFY0G8V%k()&UDi3%WA*UGVn@+TXSs??&0n_FX6|i}m$@wQt4A;w`yv=NcVsn|=Ab|c%!k+mI?8zly(V z{O!Tt8QO$z$**Zc_sQ3^Vfw^Kvu<&{dKNK@YZ1Z+VydiN&z>{HWBZ@PI`u5h;l^37 zQ#p*7!|^75lIMXncMucZ*HQkfA&Jp0W%t~8zmxHuE@v2{{C6qz1LVm*40^J!3|eR6QN!+H{4ZS7P( z3p#D6Gvm0KqR zh^-GT$*MAE4A+Kd_r5+kxi_2Lc6xO-=htc7s&(1)+0)BT$N$Tn^yyvW5A0XS`PIJT zeIRkxaT7WryMWObd$Ju|^?2gg{BD`Fyd#6_k{MeaJn8^Kcku{|v4SobIOYeMhn{-=H|#)AE+Z9<3r&*MWE9@nqE@VNdV*f;vM z&|&}64t-{SvM!I=pSnEge_bB*Kg%=VjM2}aEk-{B`|N+}XJDWGPyHJB$^K+{4g6$( znsWB1DQEw)ycTwhKK(TMEI0Zr=UQ|5>8>>g^4E=1u1`taC0c9KnXEINPj{X91N!8i zXpI>YtT7^Y#zj_^irk$PS(O&~YDQ#rok&?@aIDuDkvrld)1@MJCPh}JMefdstf~_! zYm6LM#6+I44INXzrVYKuEDqKfk^AE!Yf42PNQ$gYi#(VSd8kgLtT8y&YmCS}agmu) zk^7S(YtkYQWJK21iIg=)j;muL&)A0UkFRONtS`ebAI>K?m=rF#~4t-|4VqG3HUg`26zUlHHzUg)$CL8@U+G6z6u+R8L{WR<| zzEQstellLMyb^vgUYT;nD^t$+#_}rIG5Yk==(F7Dvz(lq1bwr<%)l|@Gy9W$W!4w! znDvFnH62UZaeb$LrsFi-|J1KRU$OtiW=T7)?>w#zm9*n}P5qkCG~NHyufW~{=)&XrwHF@OKLq|f7+qX>`&I^G5b@O2mP`zn9{xs$6f0oz6j?t%|MxW(IpXH7BGF^MFTQ{rwt_OM~ zd#o?czM}Usc<%C0Tkd7XbA5TK$zJB9b++c$Y167r*^KLEl%38#%ihbJ^csI)zp%f< zz3HZX*Wal2U3)I~fr7_hUtSgop2K9_X54pX$3pKY%D%v}nXLPHf@KelZCdv0hnki- zzuvS@IU^57LLn#Wgx-yw+rrTp`|5VU?F^2Doc6%&4L&F2bO7#P@VOx;1{^c^TOp?- za7TmxB!tI}p?5nOjJ^G3z?T8je_rz_d}xWXmS0??eK-mqE(X5XVEAwe@FfPrhf9Gk zH5fj$0&Zn6d}s~a+FR z#Rg|x{2Uv4x3|Heu+s;)kHO)v(-*j}!I7|20$gJ7Ibr8Y;46Vg zn)m`AY)&7A4>qTd!UvnvN8y9b>7($$=JZkcU~~E?e6Trv6h0VE^S3fRw+rY4e{(Ce z58+>I+J`mDpQ1T(#Gk5VgFj8nWBzn4ox}_+-NZ~SRbrNwMTyyvPWU*d`eBUwnK8$B z_6P27FvfEL@Bo7`o&$jg8jSHA1U$%KjOSqB!3JYIhX4;T7~?qMzq@w^K7DuXee!-0nzjPV=+Ji=g%XB;?gFvfEv z@JNF(o}++A8I1894Llmy=9JBAa?0j4Ic4*joU(aMPT9OBr)*x6Q#P;3DZ^`G7~@H1%rTzV0AFJ;#&a_8WP>rD*8*Q_FvfEV@DzhFo>PIR z8jSIr20YDRjOTRV>A*IpY+jR7Hm}Jko7d!&&1-VX<~2EG^O~Hpc}-3kUfY~1ln*wi z3gv_06n`XUoO)Z!2LBx`kNJPCrIR?QrJHzHOO<#}%c8{lkT$1gV2o#&F~@k$1fFRy z#&Z_%EQ2wgvw>$DjPbk<_&S3zo^ycb7>x0p3q03gjOX>h*8|&}vUyET*}NvFY+jR7 zHm}Jko7d!&&1-VX<~2EGcx`j4P(Ik4DwGe`-=y|cspHR_MqgiEX5IyI%OVd)^gBOY z-gohCPXzDyC>QUR1<#`RJ_z6SaYu&o-T@km{m)VAeiDYRJfkYdGZ(E3JDba!p0C+_ z+}5vZcr5P|{rXo;bXY`GlAbs^1F!%h*6Z|+Z_x$P_!A}5l&#$f(ybYLpel=C_lfc~btLcKb19Q)> zW(s}^n0tOT`#2xAVtk?~YX~^`7%(~65ODHwU~;k{;N%m)p9lCZ^?5kHAlw#eqiqR)%AjXVD9(T zje--v-0!P-f|J19@2mNO%YeDxS2qh@56u0(S|oS_F!%dvNejH@CC@$9eHqT<^!>m# zr+r|X(+Oak(@9{P(`CRmr`H49oZbLzIK6pH;e9xOb%D>Z3fT|mANV~y$2vb~2gaED zT(wljoSfo*U)?5{oZ^08-7c7%;(lMP5KK;Szpw5POippXuT~2tr?}r&cM2w_xZhWI z=X0tj%M7nE=6SpZ&f_(39` zV@^(Szpow^OippXuf8LgoZ^08Jt~--;(lK}CYYS!eqTKyn4IE%Up*<9oZ^08J(bU? zJYHkW^LP!M$7|p`UIXXx8aR*FzLRVr_Sm*mc=RVbJcfc z%*iS4_ti6k$tmvl)w6=hDem`GN-#Oa{l0omFgeBjzWTmka*F$X^@3n>iu--_Vm_zx zc#Sd7<27&|uYvP;4V=eo;5=Rf=kXfY@Y?27p?t78RVW_}r@q#6EQ?b!IL0r@n3Gd8 ziGLuNoZ@pV^|D}cYBuo?1(Q>Jj-`Gin4IEsEcFw?Rk)r=oH z#P_Qo6W_0W?vf(bZ{1(&cf4Bacbh5o`!13CgYT63BOj3Z6P}d%Q(u(&ZO6W&uiInM zhq|waZv0I5vG+oYB7Q&e$KFQ1d;G`VMm;a_E-87r?#m;3oXfw}wrES+qdkw8-SFPJ zj$`mmbhCHAhaWdmT6b-Z=WG7?L(NmxFaOrR_GF()*$wZlLA@iWw;lDK(&ck=+&R;F zW<1WcUO%)i)q6y*z5O=rd#ZQuUhDP(hl>-()&WC0c&|3;_VagP-X`77{!ENJWfmN# zVAVx*RK~>jCsht2W^Qew)61nse?yg>PHg=BJ?B!t3mR85}o2 z7yZzxMYxOKqGPN%=g{fuvhAmro(~g+s$^ghinfSgN#9Tg6#Bm^BtWbI|E0( zL4Fq;cfoNxZ;0O&$6ayU-W%q3$8mS)cJPM#J#pL<FG^ zuh^IN!j9J`Xm5S0wO10f7j`kO(%!?Vw%&lCz4a;fkF>Ww#r~1@)~DD%(%!=<_K&o; zKE?jw*nX4!C*Quw{+l^499$U=PIdF9`EeY_aopXT;g7=cDDbX_H_IP`<1r}j>0RfK z!|^yA_wwfY<8eG5$Hm?a{zM#4gnn=DCVvu+C!xHLSL#p3@njtL^%nS3a6ARaCEh}R z8jhzy|4MJMKLf`zP~OkG#h-=aSvcF6?#q0sPjP<9 ze5p@ye#v|}oZ|eF`BIJ{@irkqLS(z5O zJ0r5HPNaN8D92}QgM4e zatg@4vN^sTzX9Jt@w-0r9W1{g3So>FqMYBzIrzrR*(KjY;+NwC(W~6(AaA7~ld}DB zku_20_V=ShT{YNSqduq zclj@&KK(M^5@tl_PMC!o&pP;~y`BEXALqfP>Lak* zvX>j>dJ~hfJ#mqlmZ;mZhpV{$gb;t(W_$FsS%Ispi|_K=?ZS6^anZ??irk+RS!1^$f;wy`zTa=R3*T3!MJKN< z87Zr+6M4{X!*0}PJMqnbyX2?M*EsEdRzZE_7HF46n~krw zkH&AipChQpb|Vi#JAS+8pzTE%zs}dV(c#`Y-~N^_hVkj_xoU*x`?~`!BWFQ7e&gq$ z%^h*R%eAhGdr4n^^LI$T-{bt~8gZi|z4iXf0f&(vpq+zuc0!%5(_J;nEBDFu?&rGE z(cUI}Hz56~=Ni6^ybOBXXS-^Qw;A6O1oMFN5Z_AD-=2TsM#p*&>HC~xz?_e~2Rah) zk8@@wzKv`JJDiWaCu)sj&O6TAF9W`5J$yGwuEl;Aj&k0%fga}<=jXJ*UK-zx&V*m& zdZ#bKdVaQr9_JkA;dH}a&O7)>pE@56M>!vPPh*e4k$^kM&(Llce6;Pp@w-wz zH_*N;{IPw_^DW>oxsg4YZ??TgzUg_7`ivReV=n`@`F(KXIKBqu{MJ^-@dL6(cgFbj zyxvvgye)oA%67&@R+7WT;PBPnBRYLUnRHu zfZG$i$8{X1U*?+%?xb1n{ebzy442k17&wB!{FPxK{ zt1&6t5f_c{JCNr?$LUh3f7HPX#H`F|w&P{g)BO*>8K2M8Z^mcVNA3|FpU>29#%Z=2`AXVl5u;Dn@7<`! z_9C~5#CNp$3XECF5?4*~D)QnxIo=N(zs7r7$7wxI@K?uow7Dzl^}Eeglf4~&+a&XU zytJ3R9spin>wQnhY+j4=ir;EGh>zV+XTS8lbO*ImjGa4&r7H)EjcQt7dqWzMXT6_dUEH zGV2Fpy}l;bgdo=U#602p!8Ojr0LFS=1NmHXvA!7dfiZwE?K^E7Um5f1 zA9Bm+m~Z>exoUi6%+J|FT(y0-eP#U5*#p09-+9jVm$9EVk((wL`;Gs$zq6r7o5)*} zi~Yv`Jb!I_LF_mF+x{9`w%=S6Y+t!Ptiv_+eg416y*#-&4BVXQ?eb$%wm&YirYFW| z*nO^=mj(2dUVe(cvt*+<2w7vJEjx!$XK?V!*4J_WJVfe!C8Igb6nInLdfl1I{W*f{(w7V9w()&CN1&+*EKVTxsI^i$Xn8$`+%Sbfs&B8(%~5n|*<(d$U5*gNfZ za2;VgkWZyO@Apr)9b7-y4&+{u`}{L(2k-NB|HE&6pMQpaa~)xQybEDUL|4ZL@PefcL_a}k-ytii?kuRnl@9W9w$rxL5bISd0ly#?p z<6JLhV7=fyy&enr#Cv+i=~;oj>tK)d=c0aZ@L|*gi0grUJ$JB1^g;QUhq2#;9QUY; zOupR?xIz6Q)Z_g=?Q4Ck5A=g`tWCI?zTY1dV7)E`JTv$Eod2T`V>v&@V1AJ2~T%y+QT)HF@ozJ z=Q`I0+DD!oy*AM9k-*-uzz@#57U9nmTpzS;&JnB+cDow4gL?sO7k$pXK;w2bZU@&2 z+D0xOy;d}CSL1eY&7ghc>(OgQ<90P}2iK0AeZ*z>KilX>3OxGh{ucn#zT(rZcMwl!|cG5b33y>Z*@wivryOLR_7)ZKz>$sXhIZ+0Lj zW!vz1ehJ2Z{1dKnoWs9q?KnGQkT_R)CCVp06>uDLgX_pP)aU&V_afAT{_cLzoAeBx zg+NYDVNWgft0-~))d47<@_p=MA*a3QDn47{{Lbx3M88_kCCu4D(3|lCSCvZrJ%fQ~ z{SbRv$m@RMDn46cJGdvAiP)Ni9=?ICxF~!i!TYf|)*qH#=r}vlaxTsLEzW1%Uzi(Q zTWACMmD=;3i+a%C$#s+So9iFvcSh=0QsVq>u5aus#s~Hd*BJH*ax>{QrcU&$^mQ<2 z886s3EJyuU88_HR>~r=B*BbT(@;SBFYmL;;Yl|awcf{meleTcZk#Q|)Z`K=Pij6JMINh2QUo@>(I^Zc9P=>Ma9D;#YV=~GnC_r&N+m7Eq)Y^ehAs{({S`7 z;@84@P15#*HR=Db{LjPD=D+Kx`=v$DFAeAyu1B@PhPCH&DfUT=pif!^ebNGb!ais~9~i%pTdY0zEbNmOL7%ihpYU0hX(QXj zbxGSt-*a91sy3Q7ab2SS$Whjw&#F$;M$;y)Nwkd|X6^axierDGHkvkZP0HCuT<5r- zsEwvgT&HsO!A*`k+sJaejiyappJ*R>(c1I5R^xFrZDgCcCeb$XrnTp@E%MtQN7F{r zCay_2+wi^dIGQ$^HW}OGv^{=Yr%d}yd;XBQ$8Ny;lQ*0z?^8yQ_v=RB8x2O@ubY5x zG8lQk<^j($7Y3Gf)9LW zbf&@Zf$xmYG8jJaozd9_!w0@IdY!@Wf$xmYF&IAZozb}l!w0@IdOfiDMvwW=3gv_O zMz4@Qm~S>C9YfAS^vyyuUYCWOMZk*;M&7T*z>5t=-mfLVOAJQduUmj`F&KHjmI5y| z7L1-{i_9xM09>2DG?`FXj= z?sc7I7~f@Pyzg_J+ktO4_qTd!UvnvN8y9b>7($$=JZkcU~~E?e6Trv6h0VE=kgj9 z(1%=Jg97>>xianva*wSIa*wSGa*z2z?y;mVxyRPynE`WZl>3r%gE5}>0pAB~bIRs5 zIc4*joU(aMPT9OBr)*x6Q#P;3DVx{il;O3_sY3Z+bE;527)~WMSJno($JY7lHTL}t zS|)wTJ+|JL++*ebCSAVC&&xdqPOZZjuQOwg@w^}SeuFWdKCo{v#xnt&Fc{;R1Wp=^ z@hk%_GZ^E!9(cXM7|#vB8-Q(2*}NvFY+jR7Hm}Jko7d!&&1-VX<~2EG^O~G8ytX-2 zC?9N270L(0Dfwn|ZIF9xU66as4|0zseaStx-p6x&9j7Gs*d{+O_ZT=;jxjDbV~+9M z2)xl?jOQlcO$K8;9{_&9V2tNx;LQeOJRbyp&|r+`L%x0J1o#nyF`kbCKWZ?>b1U#xgE5|u z0Y7Fi#`AID#|_4KJ^}m$u+1r(*W{GVYjVowH92MTnw+wEO-|XoCZ}v(lT(J*Hm3^Z zgUzWz`CvFD-)yc8a*wSGa*z2z?y;mVxyRP~?`kfU`;vQXlb@G+44m4AG2Ujz9OL;U z@RJ5(JhuaHHyGpj6!239V>~N>D-6bXei!(=24g&*27cOLjOR1J&j8z;vUyET*}NvF zY+jR7Hm}Jko7d!&&1-VX<~2EGcx`j4P(Ik4DwGf3^xR`NfK%$V;Jabug5om+^=H9% z!@zuIp#D7gZWx%)4Aefs%ss|u2I_Ue%ss|u2I?1rnR|@S4Ad_LGxr#u8K}QJ{@j8& zw`QQM;U&>w?lC^cY6!l4X6`XQ$7%?^eP-@4KF4YZzI|ryF+Rs?2)=!0?lC^cY6!l4 zX6`XQ$7%?^eP-@4KF4anx6kIeMIn7K&n*h+gL#g{++z#TH|k9p3+5i-&g;5Jg>npEXaOr$+$WoQ?zAoE`~m zb9xl8&FRs=hSMgmK_PuGc?}BbgXGHin$NM$m)v9I6!*F6V;OUDiu--_&w|M*?)TNd z3MQwx-&g-Gn4IE%U;T$*a*F$X^`C;tDem{x?*x-m-0!RZ%I8!buQBF%yavwWHEztd#r?jzG@ny>yvCU4@ftXf*T8wa z2F~L(a2~IL^LPzxcx`j4P(Ik4DwGd~Q<)%6N$#;Te2!&tY757>wTwAA#r?i&E0~<( zeqZ5DV&)qqr?}r&9R!n8-0!Q7g2^fF_tj;B$tmvl)#ZZ8Dem{x75SXX<2A-SkJrF? zyavwWHEvw)L!;qushCf>yxe9XOubz&u9PjY%kIDI(;V57Bo1aI<=v+o0Mh>HX)!;We@I3Sm zj72%~AdQ1$9;LX{tEI&GgOgCcrR~t7YaqAvxW4Ff)UO`=+YY6ocZk=Y_xdM_*I)AX zXNu*h_YtqV;_r?qUia?*Xc3u;_Z1JJZpGjQ9cE75v@Z!g{1#Q+59gU9tv@8&!E(IM zxhE~>GXqf0cJVrF$5gx%X!?bAs0aQ1ygu_R@w#kJM(Wp4;`{;n!S=DeY!B_2zNDQx z(XXXHvw!ps-i>Cv-cu|`{e$#l$=e?)`my5gK2fZ1`jYR#710LU*KK`=T}7MrIbyF# z`&iEQ#pHaG_7&lMK>C&L_Pn-3y%V)BF7<0DPt-ozp?{U#-q3bRMZZb=Sbn1Ru|CJT zs@rcnWYM0a)Nj%rma{!+Ip3r`d^b^#d$$E0_--QgPSl=^)c?xyq8&Z%=wJPgqv$tj zPo3CrGG45&#~b}Rv;FrTaNrBMkO*C>x&Oe0Z6h6$9oBcsK3|6U=aBc0cDyn|-aplM zU}QG1BCPjl$4eu8kDcwQMti7F-p~1|@P7VZzNg6d zrSA&g@A$`0)GN?8<2dsbv48LH#D3X1V*kO5#r|VgTpFRi887P7{`J3nN87#aA3oIa z;=ag#X}=!6{35My#%EVx|78BZ67ai<{0{7&%>OT-Z~Wi&#i{+LzVV;>r}Cfr#{X0M#^_jdS+eExSXq^gzl?O$nlMx&!K;40@f7KspdHzLvIg{ zuX1j9Z&=4M$Ym;HSUn+FL!g5-q=Ic_Zc4^%`jtdEV$6=ToKKrIgYD95hMeCy=HX>M zGon*Ti8{M^J!8+T5p26&BSg1~I(4G2+tDBX^`n2moMPMApS!Ro7+c0SM{HD3(nrQw z_IWkdd9E8VIiIF)JciEBxRh1ezL|9*gL?tiqmJ>vRP?JTQI_YUSr=+>51`|^=$^6< zi@*(zQ!Unh)=P`+Q}&^W|J7Lcb=(eo`>H-PaX!QQ9>iEXhvTsO))p}ile;x&_e`9( zb2aw6ZEn$QGX`iM-{Wt5zA}gd8Tx~`*W`RK2WoJAt`nW1!(5E^+Q9g~B&o_?>5LqQ z$j?}b_(Pv+;gfQn-Fpk`AWviKR;|mX&z_F$y5s-TEzE~loT&UKnfHg_ALDu(=c4yg zZ#V1wis3f=Vmxbs@~7Uezk+d2&q?}Ym!qAgAF_6N5LaqxALWhnKz?Ce$3firs=ScY z%Uu_GjdMAEVO@VF&VNo|iV?URGE9r*fPQf=*2ZsK5i4mh|>?z3+|zPlTc`|c*ln;=<^__RMJ z=W9w)UW)PskPDC(k3Ln-dAY}TD&{PZ_$d1OrOovz<~+zq-5N^RIw1W}pW?UdCcd)2 zbet63TImz`fp6ODq~1a4pTjAB!*1d$`-!pAk$O%1XF2_k$@!-KgBzyb_&r_jUL5gW z$5g4G*Pgi8s-bM$9ur&Bi2JNxD!O^?VHxV>wa3KPO2mI1Tcuu;_9Uh5SB{5?wN+RX zb*vTrJpVId`-J0RVr~W2NgZ=VH_!h%v6<(;iM?sA|A?K-z_VrGUa6Pym*aRjj^}%C z;GO2+UWfMt&|8UfD{*dtx8Gld<5l41&E8*VJv}by59+PKxi!#R=+*dl;rK2bFY^9M z=PMxp((sot@gAJJ2j>=h2mG}-UJH9my#KBB7y~NlAN1DY+&bvp;??SJwPlXgr*sa4 zAU;)b?S`(*vHBEqAIO{n|B>?`cOMKNsV8&maEjlqOZ)3noU^(6U*3Blu1bH@r#NS& z{q-r%(cFD6?=_$&{c$+Oxhn0iPq9C#r(*;;20hz%+t=KEF8rgO?Yr%3?*113G0xh) z+rH-RYdJTdXZvpZn!BH+f6%jiXDqh;&E3nA_t3TdxBb;I2L9Q8oA0b`e>ukFpzXKq zr|CD_U)w*!IUVD(+m7GYE(LEK$;ae_J3hFx)O!bcS#;UXxX8*R&Lwf~R_~y%*OsNv zcPN!}yVv8~dhqc!?>#>$WmRdBua@InIqEI*4(T{vyY!6?897(I3FkK9-0j|new~!< z*^IKyC|mA*q~rLZrN7m2+`)OgpIaewq&~$t7n8Cbagk}6BlRiHITOFDmx7y!qoty= zQ|8X$6z5`6$|}<$cgx(VPjN1q_?kRKxQ12n|=^=1)h4+8_EjZo+Zmsm{a`Czb{X)G*aPASDyTkjf zzZJ(@alFd=`&_)PML$sQah!V`_EvlU&)=^x{(?Yr%(iP!XxdbaPjuO?n=|Db34&UkJ6YvMJ1 zWW2WhxBX?jrhm5Iwx9Zb9DdpU+WzU-iT<+vwEbe7)?7pGRK{wPKlPf6pVKir%5~rb z`Sm)ZT<2>+P79}#@88ME zQqehZBXn-s=c={dr+!k(YSSWhp48G`y3zZ*&vcF?`o$bD4suy!MCZ^=s9*XESFQ6t z_v@tWqj@M>@RzQ--#g;74)Ubx@Ae&>M|{_LQfIvEMma}gQnn{9GBX2pnNx#vlsPNd zX5>kY>O85UvwtRZm}i4?Hz{Q`X^}d&=ygAJqnx{(Gi)>Rq((VsGNMyE3-y_wgLAr0 z$_~y(8S^A?E;GNLjKQj?ex8B$GEau_*^xTcF_C+0pP46>b&)4Ex(oRm;-Zr&6}dkt zvc_%$^H;E)$dek?c~V6uuPqrVtF04x&~5|sSFoMPlN!}|Qbp&q`Nez6peKdaC{mfhyY&Y_xM(sSQ3*U01zSoeKN3Rs)v+!50N_fY7 z=2BvNkta23@}$nk`Nh9RJ{8YN=sc<8kLO9f1^JZ9JU79dN_uSICv#XVK%HCu+Ewd4 zm0*6QTah1WgIAPb?h4x1a}D_)Zie2i@4KqpYnEW%B-+sDkn4f@ot7cz%tr6L1oxuM ziNyKHb%1#uv>oQ0K%R%|V2AUO>%bfwbKY^@GGCImNxdL^9_JV5C-YL! z9`d9{O`g;R$c@4Ic|G(v=Qt0UkAn8noOke%J}pG<49-X9maxZwc~Z4a^rgv@YW%nT zWey5$6Me@V6t@4I8^&Meo}fMCNsTi1gzZ0bP0$ANq()7i)D!v791}Sk$kSl^&zNra zv9V|SYWv1~60}u?_L)4X#@9UG0uCcjYLxdP%zt!3-)wu0e4{U{&zQkyDLsKDhf!bt zT<(~B*Zokg^PDh_kCFTGg&4nOzj4(j@BBne%67&@>O85-!QltIZzuSB3g=8NCz0sv zUWEF~|JGHTz2=FelvSlg>O83{!R-gV776AgqF>CJqT{&eR4+#TmH*dO4|y#UbyBAD zq^<(DANDRuunzL1N*u>|-lubY;hf}LjY-*#xJaERbul>1`N}xXHX~1}#BtHt$+gGK z*`$R zZFHR0;{<Jcw0@f|P9nOv!Nf!mLI?RDJd^*FaouGFQdch|qWYO5DZ z*tt^g0k0qPI_Y@MYw0>cuGCvm=bry`)#G00MB`kkYjLe7yeJfb*SeoSD~LkF-6`BdkY^ajRgD^P6j&85iRpa;556Z~M+zZ+vB}r#<9K z)v@08owkjyjQjKtxl(o9w|!?UH@-6N=jSqat+AY1IKLNdCvBi@t-!4D>WDY zjsLd4+*8sfa;4_tzwtlMU)x>~|Be5)zs8pBH`fK*SFRDma7}%$zgX_)c|WlZ+!j=;*EBPaxrVU4$dww^`RhdIwEbr8IvJ}^q9%78F>+Al9-}+wv4E^R>!urUSD);(Q z|CIgDHH7U&uGDtSm72tOZMrZVea0K0@4X*FTqXCnfcw0^XB&|#HOjnnBJ$9sO-*ZR1hrXQSRCRge`0oLn6z%xBRf*89VF_!bA9P@)b-}Eu| z2aucpRdS_j+re5uTMuC#FwSnln!xz+NKk((>T^w??`hsk!FSqw9QL@ba_!+d$r!

Q_<1-m7<^eCuUlbvNYWSA=!0RL<|*zC`pTS1RW0J zxo&cPbN%D|&Pe@AO4!@Y^^JYS_`ts58pA$8u2j9o)QP_2;KZC|ykOt39Q9vi++ZKE z&)FwjYuFdal`46;rG8#p9I3k_Cg+;8h3k!sYe_CQH!*Uh>fGF{hif-!TU_i_Ql6-7 zTzA+up}FJw5?R?H)$K!AGQs-QgyEGq}0!A8_RI*uN)_?L3*6f7M&|q^qaIL zBeuVCoVXUz4sxaHyxn!8-=r-pKhZdGO=4dlSE^o5RjoZd`%Cyh4=MR}H zHHz;&qR3mqZ<3fRwHz6>~*8#5GgTB59_$z2H_E(X5XVEAwe@FfPrhf9GkH5fj$0&Zn6d}s~a+F-3&&a((b_B4Mv{Q9>6_-b9{ggnv-U{;DhF*!SF$I z(qQSEY zm_J%eCox7#HzB!FRYG#57A5j>rGDW$Ju$vL&3J$5I=z5<8GO`rih+v_&bm%-;NAv@ z!cHIHJ_d)wPG8`@21mk932=$Q=Y*Xrfv*J4W-3Lgxob9tBw=tC|KQvrRD++LsiQ#5Ce1i4Zhf?TP`f?TOiVn)EJ znOdrZUAv}f?TP`f?TOiV!tkT6Ot=cB_vmBQ6eu_DmXO>V?4=>ImYuE;A;%VcuoeM zY%s?2THtF9#&}Kvo?CF`l!5XB&+1ybky}gE5|Sfae&D@tg}h*I6PR6fUQ=pmT7Qu!RKp_gFhO67B`hTejiE0xc&8u|)m zu2eq9YPeD`bEWb*RzrWm%$3ULSPcV@=f*6g59YZ=A$>5KPI14l?hs5)alfxt3nr(y-&c1ECa1XHS9j-gDv#F~^E_Sy=kXdikJrF? zyavwWHES8`|3M_ z$tmvl)uV#RDem{xV}i*k?)TLbg2^fF_tlev$tmvl)l>PL%HuW0JdfAFdAtVB<27&| zuYvP;4V=eoV8d&hQ-$)u=2W44Fr3N+aq6s|V_BS<#4-M^j5#^Q{l0oeFgeBjzIs+L zImP|HN(m;XxZhXL2_~nw-&fxkOippXuU-&LPI14lUd-oI9R5W2v9zb1IM581p<{1LyG?IFHxBdAtVB z<27&|uYnD(ZB7-+2b)uc@&T!ukf#CPT?A6+40#QC1|0duFd7STrM~uj2j^AE;j|9< z;i5y=s{0{3drvEt;~n1pF*#r3qrB(!KUd|D%qOSwh`fe8cA|G+Bg&ayZWAQ)-o>R} zEhX$6+=B8+&kTut2a-8WKgWA!)vqD1o#-9n^*fCGB*N=oG4-#kOFT!%v zKS)2WnEGLae)OLIiLP(@lJCLg@-w}JoCeIdA@-WIkL7G%OwKoHpUL-;e(8B_hk7S! zUtH?fP@brLCjUp}OK(Wt5z%kbzEY{*qPJ5hTwQvWN*%j5+~qknb&C(&=xo;tDLq&=*!bL^mBO|DdOVMN&_Mdbd~kF_bX z^Wl;A*FINLMBdN-(Ey#JPmeeH)8tAe7dnm=?suF0Sw!bb#mhy^mq#Ac?6X@w(SOcS#Z7V`qD+(H`oP_pSR2?>k=o zUW9z_HdFZCcgZI@m#NNm1pk>+g!TvDDfUM`AoeFbDfXwncuA4TLBjE(KJB+1`;NBT zW6_5?UJTv%nT`+M3oW$186V~tIhFsffS+bOsDCQ|O>PqMkU2+A<-f^MLjQHHs8jfF za+Oq~|CxW}RQ{WMB~|Eu>YvJg)Ay(HTdx`L`BeRF`r7m%W0=Vin0d45np5*8)|^A~ z4GOu)eAM)X`KMWqn7=b7=OtI_n^tG?y=xV}bKum8&$D zFO_3*0z3Q$g7J+UIjwxD@TajarXRk_2h$&CoG~+uO+e@Rr zbgUHJTImz`;q~j7h5%yjoV}5YZ|ei^-D!JuRSb7y}b6A_*#iIK*v|9*Q7m3sr!}VVd8BS z)Q?+R~nF8^L7{e#{xoErwcuHF=#e^2IEeX5n$N9PFR z9IWE{4PBXI^{Ligi7#^s{P+6h^6yo{N9xJkI-F|j4e+J?^(oF-lYftW$~Y|jQJ>|*K+pEw_BD4;OaGu}`_7nc`R9V(`M#l@Mg2*Wf~6d3Ga9Ot@w zGbCT?$H0KxJQa?t%R5@2Y4(G<+3D>r{H)Bj!V3Sxp-ZRexTkooSO!FS9**6891H+y?)*;xpQkJn(*DCK&P@}q*{6)d(jWCH&P{25eTs9@#B25;^`t-QQ=F61{=+Hu zr-|3>L&jm-ciUGJui1yxvwgRHHSwDMF}~Wq+rFB3P5-E8`)>Pc;83XL)v*D$yt;C(ar-$J15HfMms0W z^M>*h<#nU}iE`#K55+gpZ}R$^=(piG`DOCu+4=F-q1@!bv-93DcbCa;XXmv$Sw1q_ zI$8cPa`|NW#!i)Aj6T`<#K>WjOV7@o$2?LdN1mM%&(3{k=e)CXUXizU{;tNkt{UgI zqQ0GX>{L0dIKRFsuNddXA6uI^Z~oZYWagBeKdf=Su*P}88s`Dy`rummz8dHGI$4gT zljT(UC#UNH^=>%I+&XPO2uGQBrp@1kqs%wc=I_GMQ{_xLS&k$-$5`W>VvTc$HO>{L zd2i(eYn%)0M0tBomZ#@bd3jEiho^D=oyNI$I2ZpXwU_hse^Pt-`>ca}XFD7xO^M$D za=HIvf_KN|d@h$?so-i#=<&#!a2+|q(~X5@S>H(Q7xm_XpnhK!0Q$NVG5a-cwBQfw-K{z0XuC z_z)$3N74O(WBz86goJOocUjVc_h;mM?%k9+!L^joKdAdd%BmeH%e~(d6TCkz=X3At zlnSn;qz&C4Qg+sUIM#3Yd_PJ*BYo}p4*FT>Yj1S;T>r-Q+Ya`-So%5Z33ToEnP`Xa zBhe1uC!!s`7c?8sx%hm5?^I|zF9p{Obop*WC7vNsCmrZi20DCCh4%RV$t!qLEw@zgAr2dX*a;Z-xsD!(0$+mCS75O0moJ$Y=b8y)K1<+n$9Y_zL}dH47oaNOx?S6$_;_4(|e z^EGaCxVO%ap}e!_su7;=cf@g*Yh4xhl0Nq{U6Esaq_^Jhgz~P_T{X%p*Y>+3$M|S( zlYbe?yU%vj7;m$-*%Nt{$9fOx-!8a6XxYn+^7oaN&}-SlRs22WVjN%6+g1EMg?sH* z{oN>kKe+_ut*&$xe?Pet$883>iocg|ugNy^_mWm9XPf!^NNXIk&Fl+aD~6om?29%i zXPemt0s9B+D<>% zxyF0i?~3w%x4CMvw?o?*06W)u-}Aene837OJpwNBN-DuA1iU)cQl9 zKi&INzX!^P-0dp<+sC}`<^1Bj;2dF}cLBdRM>yBa93j8Df?u2?oNH!|kYC-wFU}Fp zJ2OXUrzh-ij&RT z--hFir^CQ6{@d6+F&@M2a~1y$>|rP$Ugk#mZ(sL9`S66R_-_>tNBO7+-6;QU;$oD? zH@J%bHgO!~;~sOP{I`f1$0vfDv^@#7X>0Pmh}n=+?sucqod(;C(=#v^s6Q*vzb??9 z3;o`(Kk5Nj^?@AoFk&#|xJTXmf9oNzOJB{u?LhAJ1NS(`IET#~>xc3IcgP&$95!=o z0Lq8J4(Ax>u$g0nP)@s?W9029@Rqj6z&34-TZ-5YIsSI6YmgIRoAYE6<_YyD2l`V2 z{b|r=EZ|(_+-F~NuCs4hFZV1quuEV0Z#-~r{IRu(^X8AOO=eEq7?S^P<`78!`{W2Ua^6wY175YAo34E77>Hv2{EVg7KA zvtKyJ*)Qxr_6z%u{c^J0_b1DB-#AD5@9SCY|CIJ}p7L4j|CILX=K@afoCH!oClOp3 z6TCYvxQY_zUo90}O$jW|VsWgWlR#pRz9TI-O$od+Be;?hcz2!PDoS8^P9kL$j+E_) z2~JbeMqF?uC2f=nuA-z3j$Km9DkxEQ#(wDj=o~yJfkgZE#{}0<0w0JAuB8M%SSt7s zC9ph;#j$=)0tr9%qy=Xvf%j(w*H8ij z17SG7=s7h$h%u0uBRgY)D=C3@#|2kW0>4@+xSA4J zVhoOTjDZBdcccZUDS>xp1Xof5@2(SEMF}i1M#?H2DccbfoTj9WxZp}k+9(xVMM)ca zPDxosQp(QQ58WTjf*1pd_U(@euAu}z5Eooa34E|r@F7ZIi7`0VF$NO8?MVyHPy+AI z2(F<7K2RsPmJ(QEjFeS7Qnn{1I73Mralti|v{5R!mXbDfe@Iz%Qp(QQkGvQ&Cx|g~ zf*3O=h%s}H&!LJrL2NjIu9-s>!Td@GbFng*i&dD5W)4*Zb1@z0RtCCN&^2=?9q3dB zI#q#AHFV4zI+-0ahfbzr=FrJ>axrFU5M!1GF=lBHW0oH8pX#MSj5&d>>7VML?=nF@ z*984si+(o!QyuhkCeW=3bZen&`X>|U)C4-UfzBc5nEpAL9n(K2(=q*XG9A4a!Ly*T z=PN%sw!F-pjT8!aH#VzO^JCe>weAJV4?8{ISJ+J>!`l1qIj-ZLqb4GY;QOVztmpgg z!;Vv0bl#*g)okL}^R}-WdtT9G+HlWRZseTv+%i?9%R?*!4nsyF8Q4wkE1goh{oHA4 zd$XyL?JA;@YS^%3@~W%WyJ@vuO~GI3@v_Ae)b?`~bSH>zI9s$miFbV6bYy!`q$pWb zUbMYPL4Wx0WOBra^`>pE)An7bL&$m7`PRE`HsmzNi^S!QDshSNZ+2uE*o$yZbput|5 zvjg>>#ozb#W}OtS?cjZ7%LkkrhI3bCuX;F}&4xN;JD``HV^?+PuqElFQSUkYJ&(Wd z<8LSaUcld<;_t=19kU%#$8kDkJE2ZC3%mt&L&#gOrE#B)D%w<5lrDM@z4s9HS?{v! zWx8I`R#mP#b=m^CQC)V~7M2I+VXt|pbGEbA>zwYqWeR*xqAwKuh-4#LuS>RzKHnwM zWy`ka>Ahu6C3IfG-!8QI2Tj{-`Ug70p)(>o0(P>Y9@!qS;~X2&qX*i)8}(nt-z)h0 zA^xiH_aprM7=J(6+bi2kU%NP4tgj6WUzLM@Yx<;Zr#{=(JKI~=(`_#F1{IWg>S|k1sv&9 zM4q(@$NWdYujUu8^5j*d3h(dIhZTu1u~b9A)1jyCEziN`y6jKLYivtKeei#Yu?gDJ%G#~Dl` zl8)yPNyqbuq~ird(vfr~9Z6@>k#r^+_pB74bc}|M&h>{{5OonyUt*QyfSx>Yoq+oe{*WR@nj_EqL@F4f`CeyK{ zjbfGxjlACDhM|}H+%3UwEY(0Qt{;oH0994%+aG}{H1u%80^Y8ov8smZx|%1ec9OH> z)YLbjvBy7o@1wcf;g6CTFsi0kS@lDU6sP4`hbqq*h1*|4DHx4{|=MYnBGGWei(Rwq~Z{ z7&g-RteVqwJ(U^2xDgu~P}8Xl?Xgr=ojst6rjbE1wqt2A#3aVul8f-~Ponx_*WUiF zUbTCFf9E~YpF(-9Nl6SZWt?xOP|3p zG0xiD#2gQc>!;9<4=-H%*gYqLpX2--=f7XkUWJQml)A^yn<;_dS9Z}#{z1<7m6&ef z7;pbjMO^R&rv8Ed#BMj^$X)n(tS_CrvfLA#|0<7BKczgOpXk4DU`%MoQyuWOyY1t{ Y>J}5nErM@j;qUmLaXvo8Zm~-KU(EH`EdT%j diff --git a/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_rxy_brd_v8.co b/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_rxy_brd_v8.co new file mode 100755 index 0000000000000000000000000000000000000000..442a88cec2f5bc2c7c85d91b9be3c717dd402f8e GIT binary patch literal 67528 zcmeHw3wTu3wg1VPlZa>)5H-R86HLNG3=l$i3Ip;!5fOO^+9rX7hw?H6MT#dA42a0J ziW;en3EijUiydfVRE*QMTS6;tN_`|Wl1 zY|Zyg)>(V6z0TTeueJ7>Gkfh_F|}ZtnUWH;4EggJO-71)O%Qai8+=kaGnHkdjV}1# z&A7-&Lp|GSx?~u2nQ7PswESbSzNack{&%5UpkX(`*$yHYhA#I@x#7alp9R+tLaINu zqpTsfccFj?rr|mvGeQ@;7j8SVlkIeS-M_iNE-zvKI(t8n#xQO`JIZ;cf4mnz1_kC{ z60C9s(@@cZ>^ywdp)LS8>AES?=gc>Zd%jt`x@5)r;+)$`%hr^xTK~`kPvsPsEnok( z(R=GRbBdRiZzx_>SiE#;>H78i%1Vn%3hyjiSzd~ol9Do%6|P@bytMS|%U7*kQoQOL zIct|KTVGmkJe9M4<)+du|CDn_@v04_g}1F-Q}T?E6fW7YY*}epGV7-m>$PiE-PJMY zF8^zsyZx_mPCsMLomO<?R zQ@pzLjcX=P8=hY{?YbG03a2fcQaELJ?&!kd^QIT(Ei4?7yLw|@VeZKMH5-Q)mTkPN za7kH7;T@w3?WuETPc0~%Hh=cD-_KcBw(^eR^3u3Ktr8pml(T;My0XHhYd5SZzx5kA z>+f2yid!^It?W$Rb2U9&4iKdImer0pM{i+cF*l%k*WT#|`^ z-SI>hy+8_rZaL}orx&iT@Zekivgw}634iY9Lm5Bq-I#5SPo(uv)*53*3C`e8Jq;UG zZi}G%+}a+4<9N26Xj~YLb~lVJR$oPOelS#<5=*&176aPF`13}?_^>>d@$ePuDLZt? zFt!~E8%F*HPMpJ#N@yq&N zT|&=Nzairn^_NwC?$G6+n*?4T8pW>{VT1^aO@V2a!1K}scIhJU{H_Ac3kCWv64cF2#ql(f*ZYK*7vtq&Vt zukR7E_>G}J!EP=LxNw*YgD#xo!Xg*0b79DZ54teo!djz%_q{Obz9D_zgD%|>7ryPn z-x~!3yFXPuuzO~8W>HZntGIZ}mEE4IzOq{@-v8pKs{3Euif2T!tFte93ZK@pO5<>S z$an#tAJxZ9}pW*Z45Jwj7sPymRW7|hpq->R^qzSfNnFy_D&?EA7q**i#28?w~_Svvu@X4>{n zmb>+eUqP%u(G^4rAk$Oz7eR)sSeBxh9hW`&R74WOExqX25$w2wI(X%?EKfjUns(&W#1bgqvYw%*f zL%~)Y!OM6)BzRA^K78?Z_?yV`-0V<&EcOWS;f-i&OqsEHbB_xxC)8tarMb{@NEfrL zZYN_E*P9nxj?WnKdZuL}u3i1Qd7f#Q=UKB%TwkfmhP-ZO8OD`qT?;C$F0*=ct=`azoFjs&DAo ziVyUo-VeM>mr&OE=Wn^8H|l%0;`D(4d_eatmcI8s2453;H72DP*~ZMHCiE3DyE-8= z&9TrIb0q5l2N(wRQfZv5PYYiDN9uxcrHgxQ;W|61E<5Jj4^v8@c@8xC{vqQP*y_*g zBbUspj`W&e9lK;fH7AW&uZ0T2FYb?pN_W#oVx^-(Gz9ULEb$fCF`UH6DlH9Vj~W%K zH0r?bS$w_+Kl{G&vr>g0MA;AUsmJFKK2dz0!{>Q?epr7^>>6dS6JisTy#iOZJs~t< z=fv1VRR^fbg0Y~Y2}Xj--Y)?kZSRv~lU3d1tjVzVgZ1NMi{BymP zYNW)BTIfC&n*$rV(6pR6@DJboG$&?`uZYDeVj**wL!S(p0f)Xw7=;fKYTGbVk2q%P zzd63t2FIK)e)f;4RyXsSYp!P7fAkHhU|T}ACDd&-3>k0KryIvG#&5zN-l|u&WgMu7 zXt78}R&^vJyE;AVJC*6#|5};8{qtKtzy0%7p|R15(AdNHG~jatpGJI+;&Tk2xAAGh z=U4bN{_8 zxnD)hFx}Yf!?h1_`K#xRwb}ni5o^DSI>uIxQ5=UKKZmg*{wV8Z?z`VYOfsKC`AcIf zt~Chi@t;68hJAH z&g-zv_QWqVDoNMLP5;DM;Xx+Z}K z=ezfO)G&4jPzJasC@@kaaBoOpO+?`SsKDALfd}@@7kzvZ9;Iz*lLjU`7S-OXMp80$=2@5Xga3ZFSqo?`ZTx<+Vr(}%V^hMK0z+cl-{sk_5 z4dh(7pjFP33xyBMV<%h77eJqsrxtuPJ>}H&EZ6ibr~Ze4KI-Z}57%|B{y7#=SO27` zbM?>bhKz43{gXbL@ru$v=^LOc>R)(%Tj`(IjVa$&`Xqfr$}39$q;JGnP@jq?XsCZ) zAM4&q-`KsC{uuac`bN-D|KvlNsZX}$HT9{=UHz+aSN|-J!e%sm6n)Y3QShh!NgoA& z>Ywxtkdyjkc?0C6K6N?usmrNaJ<(-;$jwH?@r)?9$cVy2zb*6KU znK0+b_dH{Yid&I@z`a3%HAMpVhXmF}1RjVAtZNb|aU9nw1`FI16c{NIxHlxQCL(Zu zRA6nBK#AjWy*nWAjCrWI^$B^XIKIq{;{p!{1vV53JQ5Pv7!i0hD)3m7K#AkHR&iY5 z!Jxosk-)T6m+QKKz%%Bd^zjLK=r}$ecCX|36kIbVGA`5SbsQ&+ zj^n(p$#_r25zCukT) zcwN`MmA3IOkiG_TGA^*Z268ek=yJvdUCubd@>=lG^psQ6vs}}&oHkhidL75-;hJ%f`lPOO z94C#A)9&Jv4pG ztk_S~_XoUF+%byZA7(I)ztHCULx*i1Ol*^%mtT3~!W%1Z4Bfb+bKh$uelPyKb*R>Q z65k61PhY&ZvI^e`w_8?^jLNM0v$kee;SJa8=5<`RlJ$C~iFZn77k*ER@jL5BtiwC7 zNkLv`z5WsCrcT;+=&z)G&j;EM`JFUYHPxj>{r0N1^$puwdA-!o%Il{uwWT$F%DHsX z(NI?t?>bXnwZ`m%{w5u*1m1`EIKj67?@Rm|!S4aykN8BvtAGcHPZE49@C@SD3cd|^ zf8tXGzZdwG#Ha5vrmAOB$Z*1E^h8<9Afc%Qekt**1m6PuGU8VYeh=`=i4PUL3V1s4 z;eu}keg*Lnf^P%fi}*;v?*-nQ_^4gR4(L?r0cGb(hM(>@_cCaEco5^03EfyTMOP02 z&mwLMz6*FZ@#_SC82A9<*9-m#@PWi{5d2Z#gNWZKcs20B#ODkC81SozFKo4k2Qe13 z$rjfp9|BIBY;kRJ7jW8Si))h)1E)>4xHkC+aN1;xYm<)xr%krFHdzgvHre9Z?ys|;LSus^Xt74jh zX%*7}jf=&mH(!7^_|UDjNXGka;8zpBS#Sq<4)MUo8AK4Yt#1t z*EYR<%DI=}2<8RGxXqBu>)ZdAdbse&wnLc5oH@!yPB=Fx*y7Ap@T7C2f=0zW1=V~DR8 zoHjL<_y)mgQ{#x=nP^i9b`9Mo*fsD3y9SY5 zl0n;4NZH5<2M>fQPVvQa%slDbqo7fN7tktBZB@{!*rs4w#l3)Dn;OqCzDvfOHgyg0 zFA7eZnn3(+!D&+yiB||tn+g)I6r46SiTFK&)21d9-zqq5>RRIWCfZbjU1Q7>>>7B2 zT?0?BYv2iX4LrfFfhX8CaBbIKn>trAcx~!j$)IftUkzY0C!B{AY;krec+z=TL8Iak z1XZqRK{2IL|5A;ykb5N#}6f@u{$2K3rg0mt~uGUl|Y z>xh3vaN5*t;$Ib@vjL^o0?1fDZyz|HxmCwqD>{(HO4%_ zu7M}mHSh$x2A*Koz!U5mc!FI6*LLl-sdFWR*QU;u4BDo+Fo<#LX9~7BuPAuZ`MH8d z#Zd*#idPl1Dqd4Ct>Se+uT9P47(XpzPMexf{GSA;O)Vh)jNr7Xg~b0^aN5*O#J?pt zZE6wme-WHEwV3#K1gA~iO#HivHkDx481n?X2A*Koz!U5mc!FI6Pq1s?33d%!+qKuG z&Xo*an>trA==WEIn~lM*`NwUt?gq@XAIBn?!LJRu`FmCsVBT$ST3Z3ffANI14RFf+ zy;64q7JOxF>OTO^`=`aJY~OFvd#P+c;D!%U*?ws8iBz^9b$gFAwx3veSsL3sr%gco!m73)?1FUo>wilHUP4(cL1`lyf^zw+1Xdh&c0H1_LaQYSGK== z_$#Sw-*3`uscb*shBsB;hZc+Mqiz@3CsvB=(;pJquYcmQG`7EDq_oeNF6{@+m-fS# zOZzb!r2V9OrTxrDrTq=xkoLW&zM}4XRq2~b*WH*#9v4(aX-mx>dQhjDbg>lk{G` ze5cp&VcqEak73Oo`}`Pad)S%I$L@6tXrYJn9=>d+M@FC;Co>Cl}Ybpuf_d=H%fz59OKm3@0Dg`MA!qXE~#B zJsQ{9c7Zb%*JD9Hz@F`l$MtxW547hv)MHQJJzs7PUj}#?U^-wrU@yR4FCPy}|C+-+ z?SYQ;u{nIHJ=l?cqMbd&k-i=eUvB3((qHhghq?W24qsvCy8Q)TjH~qbc(}Kn@AkJj zO#MiIo5R$P^tU-o{YZb0hp8XwZ*!RX;n;qf<)^)Un&sC%FafqQ0X8+rp6dj09mMru zd!92H*OOs)SJ?}kDY%}3@*(z3&NN(4!}ZnnVrK@fXW%-=zQvh^>sg>5YJb5gz;yx2 zhuKBWY+TRA^>BNMGY8jmaGh(HICF757xW|SWzIZY&qH~hy~0_5>jk(TY2WJHgzKAd zoo}yl7UOy`=ttRWl7S5;xEl%`j`02@i6^M{G~Ze|B`+-hv{F^&*m`wOZs^{ zO#hO8Hizk7($D5F^&<&UZkJL!_*6H{?jNo zZT-_IxBl(A!*#6ZVC}2&J;ZOTz4b`j-StQU_XY*l6balP5?C7%cpxgUu1TP*N5VDU zjqMHy+!GWSDH6ChB(NqTaDP-_ZIeJ*k0jS;&4V^$d_o?~TS>i_MWF(c)T?=HT1 zapj_$tV2sPp75+ocGrlxYYVzzUFR)WJHfR;+P1jHf$L04P|kHV2G*q*`(zCjetkXS zzsB^Bw%0iUDLWh#*x)x-zwRGrT4U@orv~l0?u2WJ46IEl5}G4@K+|`kX^pkZosg6@ zMg$(Uz&kM2^pCUeaGpkc%B9yIMTO>AU$hTgYg*&&jZTx4z10z*&1FMP zi!nYVWepL5M_Rx;eWdAU%&$Rv_8IG1`^-n5qe9bo8QQ0hFfGmnO;UFBa+LKNWm=pQ zo<=(vgR(vxk9}Cb|ut~PXn%O@4u+}FiG|?h~heHAzynRST8}<`x zYy0fOnyiS>B=jXJWsOY&k9zyCAMM#stjFyW!#W~EXgaOuXYg#T#tAaVu$HJu+IL#d zmfIfde*47GXDzE&MlH8jp~eq)VrAFlf`(DyWqU*>hDe}cW)@vZ|3V0ZzZHjK5qeGK%oKiUku z(X=MpRSs=^@cE|yT6-IQvlM+QU&DH;D?vASp=nLAx8rwA*AM82SYJhXhkVZTPqlZb zXNHr&>5qJ#IN{nKeP%w^Q>B9s{gKZXSKyj{N5B2pwKqkFwN|vX!23Qw{k9kA=wI~D zxh}s5)|$4ncCdE3&pycL zmESAx$F5ytU3H&*kkKo@SKb9KKdi0pvkz;2hz zpc9g^+K9jdwC!QA?V0v26~`%;UdI>}n!0SXAGXxAX4#K8O;UD{ww(*xw(V+%ZLn@# z;yCI#_j7)sPtsQdQnn{3FhV_N!-navjN|Mx*1Ag^7n;4Cd$iAnq^u?)a6fHhAZ(j{ z%Q((HV|~2DaiOW@+@t-sNy-k;HU`7i>Bo%YG6vsPW5RWq#Ic@U#b!g=bh;mm)9eS< z;Hx-YB<(xh55{Ts1MBwt>_g0ocCsJOpqF!gH*yL3 z#1Nx9EBAi1V}Fr%An_f29)K~+U2a+h_U?rEP8-jIjbCR!uHv*BC&;VfJNi5j?ecCj zt=aY-XHv>9_LAIIk&J`d^ngRx%S6YVn}oK-d7jrT*~E1W|(&*?b8c+Yzw-y$CGbKnz<1Dx-) zzcb#GCvq|3@qQS5fd0-oPWv_IBgK#70h#Gzx!_0t<{YQTMazgB4i)RY^3$)itc>;K zhx`r|>%Fp*x0aQ0pYkF1L&bfs?DSVHE8~8gA7ZOlcCW0Q1LFK(V_w-==arZ7pFEK} z5|95{ey_X>K}Vj*D~ZQ{Eq{W%UVd);*YbPi)x5lNb6)Vu$~j^)?x~*j|0vJ%w9oOd z&-wN~Cm>~qg900dV2s9p(X4{)P58osg6@Mg$(c z8to@knAS~pt(r&Z)B5{BRA`P(K>Nw}nf^ugx1A;_dpig1g7=u#V*7xaKPa<$RzW;9 zK*MKxj$9pKW!>(kk-s^J- z#*uRgapY8}xui(=b-Ev%OV|(OVf5uQe`ou_IfVT{&W1eepJ6}vtgrMBxz)4&8OqJM zgzb?VBG3BL{BDDx z+VpIUEp2no-KL*y=fcK0XUxN#!RLB47LbY0^^DgGTz)r!AKNcR`=PLh$@e0*yZn{! zV4fI;@+tS@J15|@FPYd)W6%-PZJ&mAeC8*AMUOdxa?rVm z$WDH9!H@GQ=O50Kj1ips=Gsqu#K5EWL?pJa@ICsSPBQ8_Fx?h|V zynWGpllzq12hJVjiF_e7cO>^ozgtW03+EEv1Gz`ho^kxGJ;)dD?d$5~V*F=3Y+8nK{4MN{wKo8Wx|$IvpY@n)(yy;&|i|?1%4}LG1kJuUl9oFFOzQlK;g3n|@%s(u{ zwhwzEQWxPf7yVi33%e}>$ z^NoyaZeKm$5JwJKU%ppjyY~7P6n-^??e&fG4*Q1uGd1rN34NQsvAn&$asFZ7kh9j8 z@0vo=KA~?cL)%l16Xzf`PUs8gpoq|)@i=iVA|K?*_2qk~CXuC0Us&E=UpOaG7s#zs zbJ94B-{S9@mM_&xYr^luy`J;=-vP8<@Tob8{YLIxU(QLtV*QIg|8EFi_W9FOhov?1 z`e!~hAE~i|T%3=7OCGQK{J&@Y8$SOZ3IE;aKTh~d#B|qpln)}NzeoCC`}}`q{kuN@ z2ZVp{`9CE5lTXb@XZLdsS)x%rfvoh=G?oO=`uIaLa7aU`Fz%8`7^ZH_me zk~X!OWBiefIc@50;vWl6n{tT%RdCu=1@V~Rw5bsBl!uYCJn1NHs*<=*aN5)s;;Dku zrtTqr-ox>{Z~A9~T?0?BYv2iX4LrfFfhX8C@C3UCo?zF&wOxB{>Rid-wW)I@gSIKj zr`+V`Q*L(iDIGVTGUP};VD#vC)!kkU1Q7>>>7B2T?0?BYv2iX z4LrfFfhX8CaBbIKn>trAcx~!j$)Ig2s%&MGn@_pf&8KwSe9Dk>*o{+?Pg&(iKIJyY zn@>rb+Q~7#LdKjn^$)~*3r?GQfOsFlX;WV!-cNAa)Puw`1gA|sMEpv@X;Zt1X9-T5 zdYJftM4L*mYm9k0d(%#;0$O!d1u!XaUN1_8ymZ9pEEZ-JJFz+SiSFr(coK9pn8ApL@^Z{g?mj1B>No_crgl_LnCt z-uKQw^+>$|?~IS3?d~y4`pv%q@ApB6xw|e8_00Pm3CMo19PiW*Mx;KPk8<{l_hCP7 zz*`KJiZy-ed5z4{-vA^sO`Dk5|Pm|C$Ql5q1dIi5vv0txQ zEJypJlw|a3Y+w?CD?_?=g zWYCj}59!+LUr^dN5VqGp@}Yb+gPvD>iiEyR|5)B$|Ja^mT|4NVelheXB< zyZge@4sZXp*#BLB>5yzC3D_qGx|=&|lK? zUQ1tn&m!OT@1L;P{+>%OSM44iIYQC1f3@fz*QC=Q?)=jSioPP{W95hUcE2Q*?Y@+8 zc`AQPWPj?=AJWtA7kx+UzWB$lS+w_+uZq3j{$KA|-vqrL$NAqB{&&4E{438B{`Xxf z{2v-{c`E7kc#)p`Z+_tw#rL*fzp3KI7gPVDv#>igS@`RxaGC+iwjMqA zKj(=|&@jK;J2$AaCqkK(-^@hbHge8u5Pv9BBV^(p-z(S#x#xP1Z}#=KE12V*Q&IC< z@%v+tk8!=1amj1pm%C(sQ`6%Kd=m07j>gN;PpyZTx7>{@jpUE= z)W^AK?V%Mqgv2c*8C2<1g6 zUjn!U`M#8^X3>-VSP!}*B`7aJ{VjH#6O=ZMgs3~Z40X#;_j&t!j*7{Pp6^#AG{;t; zZUySTU>|f?2Ku*eMcJ*mN1o#DaqnsgFv2 za&uU6laGfPyEqQJ7cG$(DfEY>&&^@3KaWb=214*UB6?^Jb4{_1vDBA}l|tJnI)NNm zTihh=j*5Pchq;zm$5`r#@zRiXZRBS;L z9y;#s#$2i5uFxjP-y}Q}3?&W`Xrr-5wtPTd1d#?8h_S>e5CWr?v*wEj0^b~YrV32WsN`E(lWfzB&C-DZ)c{#?kL9g6iIkn!r@_OadHpe)x z*R!hmVscMxjsx3SX}^N}7*)16D6l4kx)ADawU0V#ep&fszapvIzXf$$ zV2`)iuQ?$ptBnXeP=&fGv|D8#Q*pd;<@5cbQdhSPb=y$4+J4h%lCp!_QMMgrYwWjG z96z@59Tmq7)Z;ge-Qpw7VftJ^%Ju{WM#M*&!}K{Fzw1`QHW5dQgl4b!&hapPF(hR* z5rO-~cbdcWMIFDR&=1>3g{D??)EuUdHc8n5(M@xhzNzDP1N5Wfx6pK6ZZ~dIPu{p) zhj~QBUE$a1e(1Oz<$Qv;-PwNVxZQwxm+{s(!+z*^-H3UZ?TgM(ZXK`dF#j@McUEp4 zucLfULAxEWrya1(we~NZow(i!+gfKg#p87Y^g_A^Q1<}pZnxiY9>n#7xGuAQ6_3}A z&;#jqp>7xWt+#*UJc8>-Kv!-z$K!Pr@-hxrqplit8|>dYyK%i6*LT>zkH_l<$_Ki~ zQTI6b-D&?(#cT1U<6#xs-FVmt85xJgmzu+j?czJlVfw0$*T*0u>BNVc!}L|@|M4(= zQ^#xSlyO+}(Hy34O8=X~^hF)7sYB9#%s#wmD?+) zim8yxE3a2R6+59Xubf`F7^ju3VIK>|YQ2|5Z8x0xitht{&I8OL@SJ7K+=5J$tGpTH z78p454Dme5oPb+?VEQ-O$DM$b#m{oP1?!b>`Mzo0WxuCvvM2ds9WrgQNNA6I9yDJ# zWLlf-KRY2Qi=X+%+@GQsO#c_{4^*xi3U^PVyN^wEHn#m|nLhqlaZppP(04;p7C&Q-`6D-d$Mn;8=`-vz&tjv`M1{6-0opT9f;Lr(2kA+(*=Gjj;kI6-EW$0qGNt!L&AuszSJQ+rmpvU+7~ zlClInGiQMP#-0^^@17MUFPnbHZb`^#F2eYfyoAl8?UN33+Sp&7fv5MZC`SFVpCYft zHY&Vl+|WN(Ag`^`HY=FhrpAWn%^{ay3EHgqxoK^&tqR)ut;kcm$4;wY4gvWqU&A{5 zrJ%d@b55MfEK!4;s!1eZu57+)e}vRxCk|0{!!5(9!4Uhs+Bg zKc45O_pB&E4g>v>Yy7>o&OIv>Pv}zbS)t|k%FA2;#S^+?E`V2lue{6wAU~c5$Q%H# z{9N--9@w+OulKBIPkyfTkMqEKdFK0R`5DuRYZ)~^udH6#xUQeP)OmyMo)zusmd_OA z#j^NZNP0AB@xN2hW;R zdscL^AKbG-=^t{dJuA*oZpLS}=UJAF&u1z(<23t?JuCcb&x%gU%{?pFU!Ikz;yZG^ zmSfDy{>!v>+UXVEJ&9=J<*@O8uzRUEt;PxR>OCt~pk4XzOzQ#LFYz6V3u6!qTL<;ZCVf7feP=w6&qpK583@yJm6~<`u$EHE-w|A%te9DO(-W$|-BIlpQDWo|Ls^UO`o zCq+VgxE%Mr<4@wtAt{TWIm-D>`!VMe_8a?Fs6CLPLfd!;+Hd+u{JKfXj&fepzRY=q z{pFdSDwkMjIxRQzh-Iwa^Xoif;yeqK^U0a^gYyad!Sh4az7?J92j>y?gJ+GZ=lwJ6 z2cP$q{vo$|-k0{B)<5SHw&$6q>UqDDa&sPGf3a^xAMRTb!gy`#?(=`e&R5UhI}l%K z`#WL#e8y)Vc|Iz0glW?cU~Fld5B}Bkv+XX}IOmN=FmLeLUX2CMAa(bxsCN1720ylc z9PQV_9=4q4Q~Oqszw#YChEr_vnJKzV5vpX>-Fn&DXwto=q zIVVu|2%o1QJNfMbKhCY3e>hh%MsV(BN9LhDpMliA z6+*wi9CSN-`*7yN>enCW=d*&+zLpUD4%~tA2e0&5cLMGj;8Xin(7)r~O@vv<<60N)Tft{C&OafP;rGWq5vhyt znT!6c^abDG+(I5aPndHH=|I1i^Ctb9^B?^?D(!0sQNN$_8+FC_K;3YTp-!-Gg_>iU zgkJVTfX^~sP&X_``vZ&{)Dd+~op7$9E_i;inrozeLSGDNyC)!ZZTiCbM#eQazE2Wy zo_);uhV9zxTTuAb5VqGh&O7WI&rMd}V}!m<--@Jto4#@WVc&Sxvf8&o+9&iaByCSQ zPMm|(IH50U-wL5W<8k6#L_R#7RV|SNEWQA-11x-wMUw zos+!#A0+R0!1;)Lv2TUiM=5##gXH}VI3JNW&&lrWz7>i;^nR*+D|8?Az7>i;bl=&1 zE6AT`a(8Cm3i9Te-JFy39uLWV)O$K`PKxtJT-SR%B=7B@_jTYr73UAz)cZWJ+}lU* zz7^z;eJj+yN~hbmg1mW7IOn9~aZDaJ&Pj3Jko{DB)P2&tll#VbO7~Cq=hMD#g`YVj zVhFBSYA;3J4HZ{1^HGji0tHvNBj{`K}L z;@js^2AvakE@hBB%g@dMkDqT3n@q54;0bmOJi)GkYrFQ^)DYVGBC&ni_0`007M$nX z=MewA;5^@cDDgtUdA|MG%HXxBb0veesgTOa{p=iYj^}8O@9i?amB7akUoZF;;A4qz z5d0qCKfu-6r46Sf%x5m)21d8 zuMqr`$l$f9b0veesi+&LK062evva`J_dVG84BGiaV(0X&nZzF!oW3=S_@jc;w`}5% z?J~4Y$-WhzodeDoF^9T&qE(kS5dT`MF6R<|s#TXa68}bGoGM`1mxYFL>N?_I5u9;q zHu0|t&Ny{F@vz{ZL|60H)_c2acH{e2Jo{un* zG}e|g??L#r=^xA4zkt-Y>7UNKh&=tI;zPRj`WKY;4TSCWPv>FOJpH`ne+Ye>{uN34 zHvMCJj&<$Rl7|tJ_HFvZa`q=8^=RaH8gJ-R?^{9J z&zLIb#1C5dL8{ufVhY!gJzG!vA6_bF#*ewPM_PQ}3h04r<$3Q(Tjky}AM2II``z%A z+;7o$^A$b&TZ8_Rp7*+P{(Gt9J7U9$RJOnN(aTi38^4pQ=-Izo^pEQq=?|lBm$U09 zR*D?cACj}{uYcmQH2#*z{?wsAq^I3qG4d64MtR2c*VLKzgXX`P%KHvq{+`-bLXRVJ zJIH^`2H`*HUg1CUQQ?2XH!e%lxgZh9PkQq2J@pmE_o~u2)fwvJwtkRGzqFs~q3HGa zFxTUB^6T7?TH3eEza#l|j>vB4pSd5WlV9hGP=1xOcN+P1&PWaP&peRR$*=Q9C_m{> zr+=;c)5)!USTD5vgz8w*viAbB5IKt-9b3L!v@cLx?u}xyOL!r{@UvUCj|fTdVe5 z5PH>*4Un&#^0_|Mh`v*w%;VF%Hn=)7gvV||%E&m&F{{IT#(5(k^%2U(YtZZsN}1ZX zLi5w}L=^KE+mS}gUnKOkgeXgpQO^qvcm_~;)k1q(9mezWF!!rGYT|25U@we3E`uM&Tlm~IIO}#tM zG~oUn?pqPdti+~&Mw`7WAXlgNu7He;?QQn1fNYFu@jWVB8=+1ZOXKCRH~Mh{?iurz zqYw1!M)F7bDfh0xeLA&w1@DU(mAqd>eD4a51?o?=Z$y0W3XVxTeE2&w;~QMqn)6%X(R-XquHp-MqM9!tn6L!2KKHHnqvb|HxPAw?ePxFK>zk& zlnur``q>jy&U4)x@5tU2sOPhFPt^B(xjB3p;AMazlp`J-j7WVn9p&jL?*-WFWzXIf zZ!D2GDfEY>&&}bU_CO~pZ5s%|>xk%~Iee)-IG+0)fxc9{6xv473FNScI8D;-sOaZ- z_;NcZp8H$_J*nJhY1c-6L)sn;NPS!RVMl5{gWkM3(7O@$Rcw{^3H=EQuLi>8{^%GR zLHuX?BB4#_56jRlp+7pt)?gk`F;?2O=}$=7o^m{N%&o=TsA8_rC&(Wa-tCTuj=j4v zU#i$EviXML zoLpSz!Zxq8r^R!jYoQO)<)JPQbeZ-HCm+}OxX!X?#dDz}ke6|BH0nm9F550}#^QP` z_zkdU$8(`;C?Dv?qi#It2HJB}F0}Yqb2!}|rt&rEgSDK)K`TDi9KOQNb;PG&|8`zH z7rG8Il1_Z|R;p&$*C~vDPcQSJwFREqw!YUfI2}#-D2`ALzWY zGj4n3jX%@U?m_F7-z%?rmW6y?x%HY?ue|D67V>%J_R6VZE9COZ>y=N%0?5K=F}sj_h55 zdgMUt79VL2)8_(GwkIesB0ka_rqA*DQ_Z9A!Zz{jStK-j#dnT}>5CyLtBDBQFTT?p zrZ2L+)(_i9g{D??)EuUdHc8n5(M@xhz8QZ`hJIB17Mjk>?Z)j!=*b(m>oAX~xGVfR z-47kNqnu9=w>#Sp9k&}We=yz}XV?!NuNyHBvwhJS%B|yd9p+!g>(0up<8_qJDQGtf z_B0E&In@4wQ-JFN*w!$+C?2mHpcm53M%`@G4Y!v#b8tNe*SU5{JYF|K52Txmy1C#t z!d~Xg!}UDS<=HFZ@j4258HX33ZUO2>+P6A4;rb?A=i95|@w$QXfo?JC7K7g?dyR_M z;!DTFDz>}Nw2hFFaaeq*In3BDzSA70uj=;~#~>r=#D|*0^i}Eq@i2W;$7|}8aai=x z9Hwtd|C__~MIEoHL(+*pn#1%->HqOC^{L}Eb;vmEmE9|=j@Q&7>AbRgW!3ST@-e=8 zW%tUe<2B_YomY0RtU6vR`9SBDo$=Z$ua4K0k@4CqzgJ!rQy`yLZm*mwWOJQv2-sz(Ih82d?>DBm@ZA;_rzgf*+1bef+&|li-bnpg*egA!T)jl*Qkj z2LwMHl=}F)@FKw*3CTn0L(0xt4)vQ1bi&*)y*G+?FBI>dC*D0yynC5=_b_2jn%m6NF?tY!Tc4yzle7~5$`@B%rVmYfq3`U@b2fp95cN)!m0K| z;Jw=01A+Y8-SYtDdjAEE@ux*r+R~>*R&BH1eIRI4dfx}{{tn*#9K8EB(5Cc$4c9JbCib`SIRdc+%?}cyF#dV~2My2f%c{Mbnx!oK%3Qj zHhA;4$)~+MZSrY1FB|3U8F2B{WUzl=X0%;dRO}Fll{~?(Py9U_o4c|?VWz!by@0t zYdf^wwN2#1wMpc|wMFE^H9`yVzKib(xV}R1dD^|FOUrc*HF%dqnutrowJqewH7n%D zwJ6`jyPVVU`3Ub*PDjJ{8KUPl;zCxUDBP6QC&$K&fVB7*OY zN_~6{Mw8&RgrJvqB2pG#HxUp#LI|4p8i}Cb`-`MLz6OH+ACj`&gl8j%(g%JoQ}09o z)jJWv8v=qK2?`$nO}9w!V}$W{BDhxXL;xX2{P(|z;D@79AOB6SN$^HO(91g!DXTN2 zEdHBlK=8vssgM7zStNKPA$cf$NLgJ-%FbAhgc$RN*eZX+r9Ica;l>!e3)eoh8}ATR zY-op8`w)MtrOopd4 zF$Q|p`r*4|6=Pglz6)mG`JPwnhwto3!*}v3#<(YG1aBlH52X(&s|!il8Ox!+pYS_|{~FUj+Fs}Qu^)+loN0}*%bY&A z?mN-6#@gi$zmo^1n*MS29Zp}A2d*`(@%BciAFlgPH?3>zO%C53WL{_bC)k^v0Ln9M z)0${IP6n>CuQ#or9ddZDf!GsllD)<0kMe;xn$~2yO7S0zJ;AQEw>ej$eDFfknqqHP zJcnQ(q^b4}^&1zzAN0K1^z(PPo}lY_m1*&Jw@Yz-*-+Es?{56Aoj%g^^LMt(P@X=* zwD>#Q<+$!O%Cz{q8oz6@&-`629p&sZe@D9l*X%QO!FvU;#}{?c3+3!HbwU39vF8_c z(HrILKXpN#nb_xuy3p@pKMk_dmOd?d)i#?6y$`wBw5HiRl}+Wqrl#8uI9VvqDKxDa z_Jhi%hQX$0+Pj==ln+~KTC?m&luhNrrfj>~8G!QK<)&3&?^b;Bz~?&qac3aP^KLV( z+4dgACm(#SxBt-@g!25grZvZoC_baX=LY*pXE4e~uQ#o^_FhFl7W6mTUw5uT`Pe&6 zi{};bT`c{JenB6h&a+`(^bz`+_7U3GK-d?3gubSIg!VNU_C+6|-)SEqpCRBwAEDoA zy^v21_)stOL9G|^83sPo3w==Qh4i_gr(WoTS}(NaPovzl>rbQH+Qu1A$HTsOp4AYH z$M`Rr7SF93kMaqXrl03kU5)Yy6{f}Wg(jeU@_nYC=LzMYJa~_3@jRg*%BMYK`gwj3 ztMq;9n!ZlmvR(XL ztjm|Oa!*G6F7|J!PxPC=wQ9{|n{n>ZVG z_3LxKhUWsnx!-W<;3K1f^BjyZfIRDBEFjOU7zfC6DtxI{8s9tidd}y62he)K=RcS4 zVmUW47H}@2&nkO{-7}`pcNs(IyNnss3w@h5}&}`~USW_V1;?^i#fz{d?(e!h4c2@_n0U&#QM9W8C*7V_M$>@N9SWexn^){T_hl zqpSBMgM<&yCs*%D2DQEi*geSQi+v^?uszX#ws+Uh-tOT%-o)q4`q==T5}@zL)A zI-=3<0Xm|IzbBa~{o+}{>Yc?*_dUtXR{ikYV)cHb9a^oQI#+i**Oz>FPOo}T0zJpy zlel{38M)LY&%#yr1g+K&&$m_YNn9G9O-mY{JFE4>Ggp;7?eNk1>4-+_rz0Bm4ghEV z;vGQ50K|9t_&L!5!S@HHK7KZIk>GWNpqF_K_}Quf!4C(e zK7KxGk>HJlI^v`C(-DosnB0b(wWh*Xqp|bxoU19_IO$nR;GVv(ida?b^!GO)OYO~ z%DUi!E&XHJ)gi0{&WH@C&K`jJE!kDss0*P^_uDjjziRYLF}`YC@TwV0F}h))ag|}^ zn#40w40GH#JEL;I{R3hHs*Pc$kx`ksH8Yl39qNyq{{fkmxUMuJDMseladynuThIFi zq0h{*S1H_pvxkC-c+7Z8(Pd?3hN2;<)ADRFMh`#j{hr7DMvt*8jXLmo7N76c$BggS zQ?epr7^ z>>9K&j0v#`XcLP8--)&<*dJkMtG=_Uv$L{8pUeJwWp*U{K4^Xi>Dg{#Y@%wH{a|KQ z=7b450k>vOoVb(aZaw&QO9{q;iY^!l?(C!V`c!=;)CK9;S372_c1jnsW{ECrdne1?dd06GR-ot# zA_Y6E;<|V_u8Y~R*{Xha)@peYnSQ_m!-4}2k zfL(UW2&A1reK*wacmVkO8EHS+0f_o8KapJQFec9Q>(iX{`Caho>ZBN#yyknci)D0S z?hw8+KIcn!zdNS$dy@5{FP-~YTQB?4`8~<{nJ?X4!;{YatgSbE>D=qt`gdPC*Q{8- z^rgFNc+$CM#d_D5&i$ROKlsu=B>a;v-Ce_z-a`B%U;0VHzxvXf@V&0-hd$$%fbaa+ zXS@UWtCxJne**sIr#|CXfXzSm8NUYn?dv|{H-Nu?%V+!-;2;0PXEXyI|4*OsF5r9r zQj1barM~Z>aoSuW6Q67N{=nB9$TQtW03dQzxa$d0N?zs&v+B? z?eF`Hw*Y@}$Y;C_NIm{LAochQK70I5gnnR=w2sYmLWdZeDIN9vh+q@Jlq>X~|^ zo~g%byq9I(eX^duUfrsveP91fPrt)+a(jCE_;;;(is6iuzju2&&Ri8^^IKnSGK>J> zuQ=}lHXB9};qMLO{o_LO=rN&r^lhPe6m3ZJDB7HO|MWC5oN-`~z5_lJAkRk+n@;%R z?T3a5Xt^c`5&)) z^Vr2t{Bh}9(d*JD3|+Byb?MNh#bqlt6t7u6w0z~7ARu!%;EnDth z4_&=7Z`iP*%T})_E?jnJN#U~L`9{vr^(%@?O3T&{ojh%Le&Mw1W=txaws1<}l;OFf z3y05}UYNJAa76Cvjd_K+BlFj699~$q@vg!pWhI4ojLuna6s%lxTWQ(Yz@+P@1PThK z1afn8hUE+k3|vuOzHa^4p+lFiEMKutB20L{@T?;SFO5Z_0{Xj*4|pWv^)ou zficBPm*kBYv8-g-u)LDtqsNR{Rx)OI$&&oM(W7$nN=laIkIch@ZG4f~n3z$^Jnf95}H6vc4fCwWrFatBgMdT)T5E|ru5V_wZVNgI(1O-$?Jl%r>A`;^T zabn1JYsO?W=uWa^GbZfN8cfEBn~95x#@(Ix@BTaPZuTesNmg;7|M#u$R8u{}-Tk*8 zvpi4poww?%I$u@2_0_5BKHo$qOq=NDM(Te| zrLD1HLH>eFd8ep3u?vEWpqzV_!G z)1~^Hb*YtBss86xYS4L=8g^c#Mx9rw$a$3-e_o{~pI51A=T&OPd6k-TUZobCSEdx~jwf?+HZ8(=w=j+xt$xTkPV7$`zmbGi{zH`aC<+t3i zeD(SzD^_k;zHA9snZM;D!e8s<3+q-|Vd<)sx2$gUqZa8baG~!2 zu339~P&cyPQdqoJFTG>=`(wvV957_b#Oo)IT{3b0_$A{93>>y(z??}-2F+h`&A>Z0 z3|ca9@Q~FT1}s^-;l3p|FI~6frnSqK+%s&6H(}QF3DcHLoICxxpZ2|T?aF(Wu3w%u z>V(zK_xrB9<<7NBZeDZu>h-sMukX71?zm~qs`tl#;hXloeOXQ4yH~GVv1aWZx@}8N zG%n|_`rf^6`MM>`?n4V#-n?Yp`lUDD{u-Qr+7dMXFZv$G0b>3gD! z?p(WuPXcS!F1cgPvgJ811=fC(()glkJ^Utv|^t30+s6^+u+GAd9fr!f^_PePe)w0b|R?X)oF)2 z9BaGX!qrwX6EQa@$K_B+nj{c*<`a<}tb?YdNVt+y`K z^U2oe(e|6avOnZJyT6-LS=$}?Mb39>mD5n0>-@I1W2@LJ?9Z6<54G*fzOugq^v0sD zPo1Ujm7>Qfm%1YJl*kM6M7C-r^1_Qmx|fIyT`KaT)*>%%%aIcsyE$9*eR6T1ucV4% zQREaoRJ5h2tTI;AZA+{$mbX8Qyqvt)D|);qRo2Fx@7A`BsXstaok|?>dEL=)Y9aE~gv%vPV%FuFD+seG$r(qRi6NQt%;_GbuIc z5tKc?bkd|9F{cjoy@Ag!@p%)UgZR9K&)?$ntJ*24DVjr5Q&UkUl>**@vN=;@Q+ISr zmF$WE6efyyl@yo6zEbktvXVr}!zHnj?bIhny_BcRl{{8dUNm*;4#+Ji66WT*tjYO?7eIU8nVV&n_=-y59v3LFVQ@hk9fCdtL!9UIAX^<;99GxM1_lcF*sc+3p1H8Ag5> zxK*oI@r4&|p4kccolaoRkpj#S)3#K8<->HwF8b9Oo8y!?Qx3W4ub6w0A9M43g|@hZ znGe~+ao8_q&hgs3NQeJmU(oNg3a%BdOPb}Cr2I!fr3{@{p=0hJbKVA5|E@N1`J7#e zj&paVE}yrHi$tp9d@V6A9!bTPS8$G`mJf}Q2r^fSWv+n7m?Tc|^5wCTp+jS3P7UgN z1)rZ_p8ZtMv*jA^L)zcqQ;W}je3JOQiqC8K{H*rc)U}$gqf?_bUx78ZN5@9*7?T>K z^B{FvBo)y*kwirE{c_Y}_&zQ*PUnp)9tXbfs~wdZrTGr2`7St>BiPA1nO_*H{lxjt z?K92Kj^{r=FHJf1wIg4w8IyAM)V3Zt=bn`FQf>ce9lG6B<3Kc;f9-g(06nH)(_>@ z`R-hqXMd5aTD#X?do|1cMW|1dWhq&f(v&qa=Dc5$3c9U)}Q6ZC@?#_R!+;{I67&EiPJIw%GZ{+8pP@A1~gvcq=~T zOHvEUV&L~WE$m2RdKBX~y z4*Q(=Ddx*M{fL79bYDgKn^EJ%j?fIV0U0c0jhcs{Gg5- zf{s%CcjpfCcH=&~c^~+=E6sIV;l}Pz&*+t*ZhEflGYY@@Br+ZkZywXy8Z z3+&$c=CEj+HwXH(edn7!aosIF=e|~!xBpV;wU2#e|0R$M$|}o>wqf0$H)~hX{MoyT zwk;|z+E!dX7;|Pw@n4t4=Dk}MoBv)}?8e`e#Rlyyn}azuI99a1cvf*NRaEpa=R{ud zh}Z?Gk()`{qQ^#;&xOtq>dZc&)2eu4?84NfKxb;XS!;)l+O%oTh@U((GB$Ga=u~YP zs&L?ww=d`O(Re&7!M8VGpHEw6TV|QTg9ew)nm4O#R&3Vxv#~wLd2|1X`!)RrTclP6 zj+U21M7Ay!+0O8g&*Bc!kl;`kQc)MO6?GvmF}j=2LHB3pplj-<9p84G1nq#Vq#ck| zv;%TaOk{OJH;DXD_Xn=gmYM>Q`ywKfr6Lc+ zMAjul9!!d?ZxDG%_lI1ccOAMvzF-}W^Yi`eldyBs!9*LkV-Ci;2-g+3uFB!_G17C~ zjxSV+P6e)=7v#FCaBbM=Ckmek8~v&_v%*F{(PnK}1^U%FYs0xp>aWUqBD^P8>aWh3 z6@F!Y)SblCj)(I6eyvpx^n zRX8hb^b=@{(NCa$qfh(i1^QL6bN;*&b{?NE^)Nkk{6zXZ^e649Mm-3=inVtk^jD8YrG5Sf=&;F-=67{qHsb2>>*`G|WgPrV8lg|D$>Fj@|*P|YzPdkl1 z(~UmU&uZN{n7M|Wu}ln4d)7L2u4|5)vF6C0)ETe+R-!;;Wkh6EsmMJsk<|&2dy^t- z8bpd8$F=ssB6mkbCQ3zC#za;nMD9t7tZon~eq63A3PhfB9olbw!8)`bUlI6mkq06o z>qw6S${Qr3jKnv6FT%Gysl|;LchAr3H=(>Z}h97LqEcLXfyo+%kr9j zL8k|PM5hOSM7ImR&*&%67Neg){q!T$PoRGK5$adLPWlC=SHVvD1(Qy{VAAPFm|l%~ zj6Ur&`b;nQb;g|l@3Q@;*<#r~H% z2X^K<%Io@^ft|T7Qok-|mhOM**JCW$pSn)yu>W~|xXlUu`Zg!@52JpgUk@GjKkK2* z>`#{EHTzSi2mP1_X_YXM#+&+~1hspHgueaDgoVLtk znPr9y8d5fU{_L{ZvDw?tZeJt*d(mRG|Aq^n!@fZ9l0}texxpDh%5BB{SZV@zaE$wX zoExOvPr1iTZJ*FG?N{4dra8ajx;OSvZVt~-bKVJG_ypPtjQwV3;LZl;<~Ut|yBK^y zj?)#mtHBrMV9%2CPJzJ}*w7KUqrtGD6L2SB_E)xT;P_=i{S@c1NByxlmq@uy{x~g<`~03ap`WS4mIeiQ^*qlBF z8w{s6kIlXfKiiyr8$v(Zx*uzlKSOims6SK7CV!Te$Nkw_I?*{=y3x5>s^~l|^P=-1 zozMwR4Z;`?GGmVM91J|zV2tMw;2{QMJcj}gH5lVL40xEq7|-Fr!wtrGjsPBEFvfEv z@JL{rQ#P;3DVx{il+9~$%H}mWW%HVxvUyET*}NvF46kiYWy=PeQ`xe?a4M!ba@5De zoAy&YBu=@<{Rg#lqBtzperk)BD!Nt6yy!!aHm62mj7OO<$9P@~e67J4&(Xl64aRtm z0Ul#8#xnvOF&N`H7I>_|7|(IQ;|#`lUI%;~u+1r(*W{GVYjVowH92MTnw+wEO-|Xo zCZ}v(lT(J*Hm9;>gUzXI*# zP5x_I9`}Eyr4v1%r5k-+OBMaOmU+=%K-!#|hB2OI#vJ2$J@EAgV?3t=Pd6Cjc?0kb z24g&D0M9TO<2e&}rokA`S-`Ul#(2&Ko(*hs%H}mWW%HVxvUyET*}NvFY+jR7Hm}Jk zo7d!&;kC`FY}sIQDqA)fPVomv^i#jqvdMp2%j5pvY3W1{Y3WAa(Naa<)iN*o9;D5w zIT+(PX3R03bAjg?jPaZYJkMZ^=X~J#24g%I0532YMZk*;#(3Tcd?T>U zDVx{il+9~$%H}mWW%HVxvUyET*}NvFY+jR7hSxTyvSowKschM>=&NdfwK{R;l)tF5 z%=}KJu&nrzV*PuRlJ{Kv?j#q#KPhtYo2%d~iocWKZ%^E@IT^oCnSlMzG3tI2g07rV zmE+7s>!O}*vs7WWzEn-JQ`W&mNm*|uM z^Bk+`D#4q9d5+a|wcrPVI}rC1Tn@~0tfm2iw*d1TtLYlSTY-6w)ihY}L%=-8Y8v`P z`rIO$HkfmZY}#P>@EFFY2z{fbNMAh;%ss#I1U~`HJ-@nM@RPvY^Q#*K?*!(aU(FQ! z6fpPvYPR5Az})kzxq_bt=AK{8Kf#B`Fg{_VH3gh}9GIMJ3OM-$Fge*2aPmoDag(wt*u(+0zbY})W&b&j>^VmZfB3uU|? z0Oo#Q-6+@x=6+v&Rd5uT`+c=Ua15CHeYI3@88G+z>Sn>4fw|vT%LP9O%>BN))r+r|X(@|iX(=lM1(`CRmr#A!JoPH44aC+PL?Ave<>jKa7ve^%p z9Q-dh$GRkF2gaEDT(wfhoSfo*U)?U4oZ^08-65Eq;(lMP5ll{Tzpw5ROippXuht1B zr?}r&cMB$`xZhXzW^$?z(+saM<{7*O&fqn02CsoLcnzGvYv2rC0~=o3oXVCBHm9;> zgW*(6KbxQDIhMsK?sL_BGUntI_xtL8!Q>S8`|1I~p!E20p2CsoLcnzGvYv2rC1849WID^-~hSxTyvSowKschL` zIQ50ju`Et;pQ|30F(;?E-&c%QPI14lz9E>L;(lK}E|{F+eqTK)n4IE% zUp*z5oZ^08J)Oy^3|?c*Gk6W0!E4|QUISMH8 zr_Spf%iUqKB6!-h;dzqZd;5EiPgV(?ryavwTHE;&6firjwoWW~g!)u#U*|NdrRJLp|occoN zSQe+|aExD&F(;?y5`SMXImL4<^^#z6YCiE_3MQv`j-`Gen4ID{mijBf(`OTCqH#xoE z4UgQY9)K+Jp25pvz2EI~<4;sMP+K5J-G5YVg&hA#$J`x|)4nw#_v?^zzP~7! z<-3mkB$wrT%=}X>%lBJ)G?(Rv-qkjb<;Rq@&tv&Xk9W*t`5T@cn8)(-e=sDEkw}8~OX*Kle83d5Pa^l9!u4KdQ&M{Nhe| zJ31Zfb0Tf=yPFp7#rwmp-ua8S@LsXj-I(V2`hWXC^OWVwFaBL0_L-zDes?{}9Ywia zDEG8ZUzFy~xt25IajxYCp?&dwW3H|pv~~Z}{rX+KsTMfYH+p;%Fra5BY6FC4Sq&g4UcvXRFJ$pRKML)A0DN2g7f*+T8`R3uFOg0c1DG zZf~{TT?APK9QKC!CAcoZb!Tsc-xJq8aoxom<@d&QZ|HXQM*Dqm-3RFfUc~Q<>%O=y z^v3!9aor!+-MsPsKwJ-mes^!8KM2=@kY40X_J`nl2(F90ss1or55skdH_ab`>k-iJ z;Z65P;d&I(dwMf`_G3HXc5gMt+e5a8%!kZ}>i<5zkEgZ83cjH|TwNW7CbBxrAAoc$y1ZH%*jq`i%C z_K&ppNSys6?QM*+e>k>ZX8XyvFSGq-4vYp@MuStmyjgw(*AZOz_U8EGa6JyZyULsA zkH__Rr1$X__!DtG5!Y9Hi~PyBo{a0h-eP|$uBSr3pZ7I?8m^}yy}wuLPsjCiTo3SW z@@L?B2CfHs%luimo(27DycPZ&T+c!JAn#Ux9cyCq@9g%_KURhNSys5?QD#*U!;bp-5^rlg_G;^u7jL$zF-}U@6rYD?dSYA$6nGgostf=gzx`&(T$7B z7A{cxpY8pu^{#yI-eT~+OKZGqycqBE@clHyIlgz3gE3x)biV85;9V)_MR~6e-_9Nk zU+ad4d3X8+l6D{>vM%i0@m_eOtA=}P{VJ5_yHI?;&B6OprJ{4NGjzI)an%TKy&sdb z`h>_s3UwDuaKj_Ld;Awro_3jc;*z3sxC_b`T<5A$-Uh!x(mw2pv~H7Jb*;D2XBoT$ z-C4g6j(l*b<|yiIceNX)PcM+PeG!q#b|~BKDp%3R=fKCY&3JFR^IWvKRCEqp0iE{! zTt(j>leD^o$b(I&JAben=2}pN@@zBSweCC@ZBB|#eS4J8zs6NuBN`;_PzR)S9O^2r z6)&KijKSK@9FG_APMK}9BV}p|MDDX~R^Tej;yv!pFXH{Pi0C9sMIMNWth3vYi!y8{ z-uLeOBHj~Bh)zaZl9E>6Ao7sihCL|HcH$lJ&Z#}ZjdSVem+;K3#|bv4@P1pV+~chK zSq0_sE_&w_+H7pKZ8Ub<{hW((Y&YIp@63169JDCRZ4%=aQAC2>)XRaF_>uvU53OJ1S$vZn}XE&7T zIonm^ymFsh?|q>gzRuf<-{wny>bZt@{JKN0_k354_qO5p`oTQlJj6SHw71V!-0%c% zyMC594$S$;=ZvEP|2Svn;vK(y)WiA6=Z-6J&3VUp`+2}Ot%vvj$hCq$hQge;9ihkh z#rZias4s!{ALqg@a=qJUAw54kL638e^KiD|FXtU>q)kP~LSfEFJ~!B7a5Uf!-skW9 zB5btnx9$Br;5FX$@BAWcwC%U;ofp)H_x(G+i1!z5`{xIBRB>+Lea2JSf84GE`$gM+ zt_8M@hPSq@wrzYrQqK*vF9mySTQh76I81J&PG_54Uo+eEyhnNZ4DMrBf!ln~wYeW( zk958>s{QyuS)+?Eetm9q)kJTHUm$6f5s_8oa9?nElJ}_g_aG3Lzew=Ma`~~sjqEpGW$IRK7q*WzE?jbjN zg4>+8^y6$ZVm62$7oBRZJ!bwkNZMX=3u3uT6^an%ej;hT4n$?0L> z^i1zL?aO%|mf<_f^yj@%X4pDc&GIV!AweuA-e&~vGu!*Fe-!>--v{&0#Bp-qH?Kmu z5%;=kj#uT|F`M|@!{;Hhe$dzJd!l^NV^hoL2L8Se<_gyku5-o@(BJc3hyjxJ_kA%Z z=m)sonfXqC&pHtwB<=6}V-9e>bB#0en(L9SkK+NGImZT~KF)8hab{eMjfg9veZ6fz z=e4nwzMl0V=7{$7w(YFj*h;@o`w)*r`+eJX&R1hA{eHSW_*UC?+g7dt>H5Gi+ji#J z_R{~ePQ)`w`+sAOu};J~N&A0ee}=tweS!Zs_S^QFx@^0-F4(qmjo5^H>Sz7` zkmq^wa}@YF*L%?~khB94k#&7AMx*X`)jV&H&;39-%IaqnJ{yb9!BHqby37sF_g?m6 zl2)G(dFX1CA02ho092%O{mEs(T*5s^u*ZD#H|;2_H*o`zmaN=4_uAe0|+m(1muq}3%v z9^~3)<}ue2wi_`xx^PdB6rK7ZC_mypnb!@Hc8F`5naf;5*j~iz==>t~0*>gMwcRg( zS29+gbiN$p$hCwRaX$1~QY!VGbvwA0upNjE(uL3bXWI_0A#4ZYh{&`4Iktn(`nvyN zw|>??N4vR}usq_G$g{qbKV$!M4Pko`+oTJh^CK9qiI2PC8@<2Q&)!qvU&;Mx;69)4 z*+#@Z>C9()a(X((mfW22fE#AnS>QO=j5$~{_*}2Y0ygovp8k4XP~QU7$MTC%z90B7 z?jiX0png4fuuk+x`uIn%=Y*X24Hwa9dmVIx@_8u7XMWbN^|40K4$iTTp;r2te^`L^ z`VjEUJo9t@kAsip{1}hAMAEDZwL1Sx?c1-_XEw_)w~^CD_A$;%ILMCdApjogKGxsN9-BBW;AbC^LB9U zNY@X)%>Fg);+kN$#njupP0icDwS#pchK*i3nzzZExixPK*Am_X@ow~5(!6cW+j88# z4{UGVHoGmRUaloN_A<)uz`bOjapX6<5yP-k=puh0#((mYu5z3szqu0e3=1HUS9J~2 zr#>BU9CL%~$WD~!GZ6P9)Pw$>LC~A_Eu4oyPLE?>E#<2zk-v8c(r0`Jds)a?-*Xku zmpH%E`x4Qw(Q^rNb{O>LeBV{2QhwiX;CX+EeJ$jIzj77Nm)H*OOXkA2#-N9HO)D-B z9gXstEQ0liX$VESJ0W=qK67zC>;A&r;M&4E5G$w)pS!3B{YtKzoZnpkIKPupzKRn0 zd$_)_ujn7xH(X=bCx|no*O&&;uh#d$oTa~D-!L8J_tI~$kJ#tz6RtJv3&bYsqSqQJ zpV1aa%I+?ZycTWYdL!dHu!~u5h!NMQ3(u8U?qqF?NPSh5CuPo8p@t>oVW(*aY9?T1|>xQoX3f45$i$hrY<~pY7kpm zw1w#>YYW#T_66cS={0F2#&6M&T@}hzc@6mfxZ|s#@JEp9^^jhZ*lxsw>cTbYpP2tf zDEz-E-wK8EbNlBt^7_{yy&mbYf?ZsX{*`sS6AJ%3^WP7J|AX@HL*XNozlBc^=8m3& z@admW|DQwQKQRB}Q20-jzYB#wrTl$JuScw3>tj7SN`1XnAYc1-NX!S$0nT^MPx^M+ zujdzh`{#fB;{}INuI&#(;SV62eiRCSNc>7juSvT8U`_f5rvFVS-1?7&x?kD`{nCVf z;d;b+5#Oo{*CX~z+n`_CqF>l2ZP6#}gC_I=*CW=A7+76+HpM<^8}v!rpikPOPuK@d z=mTRn;$(HMcwTj~HkvkZ zO=8`M$JK@BR~-A3wb8VRYf`#y_;rr^$=Ybz#C0lNKe)+pXB(Mrx6!nT>l5oojIS;{ z+iE_Jrj2Y9*Cf`B7+_s^zD0i9<7nDw+Qc;}T{moRK8~i1rcI`9a@rn0u2ZIcrak|i zF}@b#cZ!QIl;15f#@APYziKezYkdv)YX&2}))L?)1|z;!DR8O5h_AI2c&WjNuXPjf zO$H;r*3G~-1E*s;<>Wa0ZRQ*ka|t%^x0!Pdh7J5}<~)O81Am)2-(c9l-)1f_7&h>? znF|eu4g789B7LttBXbLLoMq^nWoEp(=QzuOmm7@u zS}TB87>xK@w*cQ_Fyd?73Vf@4eKyH=$660&DpAq8=?O2a7UT?-6<9Rpm-3DVk?*YEYV2tOz!1o%A z@!SBs!C;K%eZcn_jPcwEywPBc=l#I<1KXUkc}-5)ye6k?UXxQcugNK!*W{GVYjVow zH92K?ZF4GHHrSlXmJNneG0l~YL5#0W{$`DR|3NKdez}&LeTnf^?n{iXt$s#~FK}uT z#(0w%bByN$zz-OV@$`XxgE5{_;HbeE&lqsbV2o!OaGAjv&&|M_4aRsr2>c+h%_*DL z(&8ckJU^pf3Ms5sZd~FJ1eEC6) zub3|}zBc-_&n>`P490kF1>R~f#`7WIhYZGeZUf$C zFvjy?;D-&ycy0&YZZO945#UFFZBE&|CZ}v(lT$XY$tj!HzV|*Q;XJTp)~70ei4JTp*#BbYJ1cxIsX3ucTjo*AfD1vADM z&kWSh1T)4L&kWS-f*IqBX9nsQC(bPx+iDKdnqCkc#`xknR#WhfF=Kr39IGjK$Cxp` zc#hQ+ykpE5Up&WZ3f?hhj4z&JH3jb&GsYLsv6_N+j2YvL=U7d6$Jm@(WYY$7ZjntJ z%sCcgd@V!Ys5fLR7~_k3e)Xnc#`xl%U%e%mF}}FxSHBX>7+>7;t6vLdj4$r_)!zwb zj4$r_)jNV2$rxYcWK+ON#`q#9o4`rKhiuwl_>fH-{;SThM&W(_Rp?vwzKr*^z})Yv z-v}NJ%>BOld%9N2zr^f-?oW2g&aN5KV$fgY@en2*DkeC!-@Eq$B zIhQ1-xX)D|%b1f>-0!P@6HHEVzps8Ln4IE%U;Q7!i2@lDem{x{}oJ5alfzr zD43k$eqa4(CZ{rZjWN&QHE;&6firjwoWX103|<3g@EX|g+U8WYY_K_%EgKA{V!{5f zOv`gT$FexZeXjaU#+;nueqVhqn4IE%UmX)nPI14lQi91T?)O!W{JkA=iu-*P5=>5U zzpruylT+O9s|%iVOe{jq&kSAzXYd+0gV(?ryavwTHE;&6firjwYU~-E4ef1T=N3IP6!-h8jbL($`+aq}U~-E4 zeRYLka*F$X)ghBp8N9}rXYd+0gV(?ryavwTHE;&6firjwYLi$);(lLs7EDfYzpuIqCa1XHSA~MfDem`GcfsTo z_xq|?FgeBjzUq<5sSI9Y%rkfmoWX103|<3g@ESOS*T5OP1~$C5Ih8FNY))m%2E(Z@ zbdF_lYA46Imy9_%#d9omm0)s;=UD1$!Q>Rru~a|7vbdqCQi==Geghk7S#Uqs5+QJ$=Q ztcUhh^?FU$Q!4r`+Q;;hwU6aF*44c}>Y74(Vp6_EdzjAlBqYB@d-&T#J?_13>dN0H zQtxE#NlN)sju-3E5cRq4&GrD7+?@ynlMY(A@dpuO4so=jhnz?JQ3&EPY+Lzw*65spTpB zebwOaWRr(?{j>1@zW;2S%iqwm9}?&XzT3-tm*vX6AMVg8m-ls|N9ql)DpyrP3H_vGMfv_9KcjrK7f0_VfMzyFigkLG-?=ix(bF3)AT zZxnXO9|`Z5{^DIlzOQ^o_zna;F{uA^_P-qPyPEtC z>OY9c894C(MOyAzV1Flz?erPmC}uN?o#sy<24siH)gJ-nZ(&#Vz_yIvzix0*T)qOaRA z81@aKeZib!+u5HlVofl08QUDGqk@t)($BKbYp~99-6)X!1a0FrbSfi~R%P2})`=vZ z1z3(c#{N>#ucky=hK*)jsKYaW_Uod1#y-pgH#ko9So>KnA$6az4~_q?!Md;gc3|77 z`q23KB%gcWW9?Xpqwe3BM?XyN)}h^Vk#EOD9CZ7ly!mDf&_4c-zj=OD;0Kbl2Y#aDT27MbKf)LwjwY|6d*}DtoyIu>=tZu?qf&Hr2zXBIlLbTUiG25C;z$TsCXo zENs^u|6gxq+`+!ls(+Pve;D@BuXl7Ve>eVCtD=_;w_z9kSzDw({Z?ZS`Z+x(VGsRi zIvwpa{gATL1HV$w`jOr|-r{H0eH{3mQ^jATUi!Y!YaTQ4Gwc3yk$P)e`_GEa+CB%d00!gTf6mY1S;aB)o_}8Ph}Z?G zk^Bi>tmv`P<;E8#7J`SX7_`&3Gybyprc(? z3!m%CeCQoqhV*5~U+mTR5h+toiM&HAkhcPPU-f?CYoEOEwXUV2bNE)|-HN=gdHZ~( zLI1sn zL4PBzH=@2dD+X66gEt(*DLc=WO~Jn9m>ZtN+iu&Me#V7;^tHC_wyo)BTh0yW*|yuZrk`nP zAM|Y7>5pxD)6cWyJ#=mRZF{wkfqk~!=3Q&sUXC$2XxnYuY5L8!*S61aj(*;}ORd+B zmEesdahZH@#|L*-dT%2(i%zSIh^&esFNVC^yhFZTUsgWXwN&!zrddjK8kOI?VE==V-U_*ELvw>95b$ZsV_$d`>~R z?cmdPaC43K5B?5Z?*O;%^cvFsx(@w9y+@JvDDv*|KJp*K^<%hR>-|&OU)Q4_sP_c& zo4$eAZx{0J_Wsqcz;y+#@A3XU?XT--AM~C<-ZQB0 zUhhA&zm~alB(8mX;1BCzBmJ<;Nq;oPIVYw4N8;>HPf3EYi%F&Y}@ItZF`Nsrj7L1w*9uf^w+e{w%fK-`&8Iv z+iTmWeJA?Mw$rwYep+)4F;wZRO&rx5u6S0TM})Z!Fn)zIfM2yZ{D{-?-ro-Vwh*(hfvK)-A@n@{51!s{6c8G$-4!Uc3WOPL_(!!LLH+Yx`Zb z(fflRleGGTNF6V=^mR9UzxO8{Cy9134vd3%7D>@L{56y>{h6yad7t_XlJ?;eq}}uj zS3Tez^;rh-QuVw04)WpOb-dI$FS%jP(E>@^7ZI79gR+dP!8yvf6>Kx&rG|C9RM9yw z7dni!!MPigw7P^y9b)XzhC#^~UjZjiJ?^O43_37pG} zvnOM)HmuJy&|b#MFg807!DE9NtTK{5EEHvw}CM%*iOVt z4eNNRqLa~k1h&*JsfiW!DPQ*(M>v*Z6bJqRL_!WAbV6%>)B<0V#pBcA;(_G)F(7V+-fi!>s@!W8W$Yx@q4N;$fia#|A@0l;@1iL8qKu2g`N(yEu^x0i zj5~o?4+~Ha=OfpFg}COt{*WTtN+ne+q~A%n50!F zMCy2{cY@mwdu^kPOGLXEH%0q#(WzO1@^}6(S8ey&MH?hd$4gxcZa?C+kFpHnrHUU% zKA+RMzHm-*t`RH;&c&ThJ;-!jz-?Mf9Ef}-4|Kh40UVhY$ht21h_2Br|y^h*X>v4j;CSK~TD7XIK zT=l3I7XOYP%bA#|_ki2q@VaQf&HHg~nV6|7QSP4qnFkiTiaIG`GfIgr1Ld?{(&tHvsL0`Z%&&+xHeAbDWscD~oC*}d? zJl8ohx49f;>eI%mel*oc^^+TYvu)889g>F-$|Vy0?;Z`;ngjji;wePoWr$0Bg()Xw9gMYPcw{7J*kgg9rvu$UdZ7vjQzH~rY_rVt_`-WTqj21p89!zg*?;qnPL<8`IOf?S|Di$A|mTn zVT?9??y6m0pJ)zvI38v7vk2CbQqeiM3FZBqQ21%DZ!{)p^$C%O?m&423Q-kaf4!D) zUYk8ZQgjacD8Ko_Q23kPz-WV{eYhIsVxf?F#v7#95ZcVW0At%Z(BX4E$8kM4$2nXe zY5O7~lU(1-{AFA_mPgD~y`Gec&VltPf6wn_F2^LTE+O(D*Echdxt_4yh?%P6Zzn~k z{vMRy_?gV>21z@_bv?&bl33 zPuLE`Ox5wX&$b<0N7xR;OqJ*Tb8H8n_jUinZvDJ}j&^fBVR^(%mFImaf5!gjI>PoM zW@=}~OpRf@wzdg{zvT_l&)?hOU&;L);69)6*+#@n4KqF+IsGWcmfU>om>XuMqEwmxUDT zO~*_{IX?HZeyxvZYTCg$W@4sp2(Vrs0-ovl5%}26@Ufg9<(MDj`PPrIM}XY+JBgX9 z>*jpIvo-75j(I>oy8~+i{l}w0`NvS6YXWUg@OcWhv%V)#AJW*9O*`j+q+R!Fgw5rs}#mM_{|%F4|$YgL?v9FZ!H&g68dN-VUx6tQ#>?^;*%q zUCrCUHG}mdW~yE@nzyTYJGgeF>xW-v|C)AjO|aWy>TTYp=565G!8#E$Rj(b*+hor1 zo418)3GacJsd_DG-nQm#VSKvuePDa@w%Khl^>QsS=ls)fFTK}zOn$Gzn5kn7(n z*FVnhq?E6sM16a>zOk?9AJ{isW7sE%nX1>A2GN&ToS3up7wj9Rqx@d_4fYZHoPENz zhJAsUsS=-C%4f92k+QoBB(FtVxZcRP4(x2!8)C#v)iJtR4)<=+wusbMMR~Heaou6t z5HnT&?t|#JXj`e2Z_zfcKWrOfrs|m8F)5$XHm2d;ryM7)L3*6f79BHH^jowgDRrN6 zoVXUT9>h%5@w*#DzeQV^ezI}mn#8_9%v8N5-H-8mW@zY?F;m%Y#7xz3yw4OfRgV?y zI@_43tXt1P`1G@hnacXLKGvht#Z2XV=ltZezrF|O7kvAfVy5c)gEh&Hz1ck0Cf6g@ zi8%>+IPNnMyH%*LDrrT|_ zW2Ulx#7xyOz0VXgm31R#s*dB`d>osP8`q?C-LU;sZ8U8%bvJJt*D2FJ)1Lp%n5kjB z_Yp??62=!~%+$`noef63(k{SV3`V@tuE1RlM!eDj-~xjYue1=j&|t(X?FQV*3>(@5w>KCzbO7#PFl@*N&Nmn~TnT)o!LR`f z_d6X8h7Fy7I{};60*qIhEgOEdo%`>xhq7sdi8aBPsYU3UA~RmybDUz}VuKN{v;?@s zV8ko!0o=o2#4GIy+|yvhEA0i`%V5MS?G4=9V8koE3ivAEG#_At=A;=f*q}LSFl^AA zG#EB$P8tjwG$##)4Vse%!v@VsgJFZ_BygG!S+pU|hb-D4u_ivn_ULF3GquSdr{9%4 z?q8>+6CJOm8|HNe*Zr{gif2Ak8zV1v!+W3a*I^fB0AbNU!; zusMAUHrSj#1{-WnAA=2s)9H9jS+pS?k12~bNQ|#f{TZ4wM}wHDO+n1m<3Y?+CpstK z)Lbo9RAQ#)MKfZiq8)=U#)Hh5V>|}~4>lO%IRto!!5Gh>z(Wnjcn$*|W-!KcIPh?T zF`grUM;MIp90@!U*yfbYYjVowH92MTnw+wEO-|XoCZ}v(lT$XY$tlBYn^W1c!RA!9 zY%rXP1)Rc5T8LlS6vRwD?mwup6D`-$jY`Z^6_uE&dC`oRso>NojPWQl<`~awfv+_f z<2f35w80q9F~DOC#&|}6BL-tU#{!Qv7~?q(c$~o)&+CA%1GYJ3^O~Hpc}-5)ye6k? zUXxQcugNK!*W{GVYjVo)+U8WYY_K_%EgKA{@bwwEbTo*W+7!f0Js!kNb)qw&&j}(4aRs*0iI$o z#&at0RD&^|9&bIRs5Ic4*joU(aMPT9OBr)*x6Q#P;3DVx{il;O3_schL`b1GXl z7)~VvPQ9vSQxG%tcn~wyi5}4DZd78Xs;ICF`l!5X9L@uvUyET*}NvFY+jR7Hm}Jk zo7d!&&1-VX<~2EGcx`hkTQ=C7%9ag=Q~UuW{nW1mPQ9(=@gQcZ6Fn4g>K!drRAQ#) zMKfZif>U!a#&gV=V?5^q&ovn1IS+WA!5Gi^!1E2pcrE~5U@*pWA@D+jF`kQn7a5H4 zyb<_DV4G7mugNK!*W{GVYjVowH92MTnw+wEO-|XoCZ`OqZBAv&2Afmavf;}fGc^oO zso`>t#h9r)Gf*Q1GiEB!4AixP88ek<25O98#!Tgzff_5AF;jVFpso|ln5jH7P!j|* zW-8AN)T9&V7K|0z4rxujM29g`d5+a|m0-q98M10Flh>v7*`T<~@(>}1x=_s(x=@_uh=`vuO z)0=^9PCp22IBnuFWzz-|k13lrNQ|#9c#ajs_#&sc&s8gB%*iS4_tou!$tmvl)g6M# zDem{x8o}fg_xtKD!Q>S8`)ZwFa*F$Xb+=%0iu-+aZziWQc#Sd7;5BdtuYogo4V=Mi z;0#^^XYd->@Y?27wrsFDl`R_#r(*gU{b=yFf6nn7%iQ{3;XErQ7@?)TM0nVibtHO4%H*T5OP2F~C$ za0ah|Gk6W0!E0c{YnxNqvccw5wrnt*`aalfyg&g4`EuQBEsyavwTHE;&6firjw zoWX103|<2pUfZ0?mJK$ivSoweR5I{W=XH){acUaJ_?t53RW=zDem{xw*`|^ z-0!QnU~-E4ef6wha*F$X^&P?F6!-h;dBNlq_xtL5nVibtHO4%H*T5OP2F~C$a0ah| zGk6W0!E0c{YnxNqvccw5wrnt*`a;#d9q6l3;R*=UD15 z1(Q=e$5KBKOiu9}OZ}B#a*F3z>PLdfDV}4gA7^qZgVz}I3|<3g@ESOS*T5OP2F~C$ za0ah|4XK$%IR1luOObC=pEdGbjF$63d#6)5h+(siFyw0K>DhFAT;9LO%s=Ha9pygcefupvn#=nRy{m1WiFa4= z3gQfrCn@FYD3O1VcCdYHFWX~ceN>?@Sx1{gW!~&^!L1KYW?_}*s zO8HZcmx&FMK>z9(P@>4k9W+o?)RGir(7K~6+bRw+&uDdz;faL@O#_lrDLX|9~h^B z_a3`N?mgw{j(NP_%;)8P3x7OB>vR5Bp}o}Sy}Hl+B$xGGbN5lrOjETVcF!ZiJY5g%}V#lP%+vf4UH$2-ukH2GQduq@g>XY|Z4i?@QPI@<&eD5_^ z_&(s4PjpOE9q$PCGp-2hAAYygKlUN1f67x*|IF{T&ogmII9}9e{hcPft?RvN`3Krx zjNI}k?H|17+iHC?K8!PRCi`CoKh1bh|4jCq_$1^ZN zGX}|->^Ct>s?q<{Ka>5Y@6TknUNd0xnflxGwdq6pFcT**`9{k%r|u1`IfvyP6mpSq zsc8%2P%|Aqzp_B`C1&aySYsr8&yG&V`a~qJni9Hu8AsC8LqBu)4Xi1mQ^P!7LvJ6i zA3#jfO$)7+2%Q36L zdd795K=Ko`jn~krj7VCQZJSvql32f3jylHvQqixbL|TT8W?iVmGk}iiC%R|s!*on2 zto=HMpVWQEJ~aNn2J61|+ktJT>OZZ@Fn5kU1k#EO5UH@-Z%v7|m zdCXMir(>ql9{9cH<*Tqxa6Ia8|I@}yO%;{l4^KEP;-vcdGndBshAA6#sb=j||JeuWVatP5lpNT$P2 z9Vn3ex&ovZAiW!8H^c_he)fIDO%=U^MMy6~e%Kr4N2E+WCF(d-g1i#sb@oO`+|>6G zH&t{F_e5S#vqU*_f})PJ!E^x7}DVn z_9Y}gnUC~*q<4hu_?8tn_5GW~Pm2BlX>((|o!8S(O4&L})OAq$p)r1i*E=0|FMJ27ebaIGs?bk5?w*uuVZS3~_Z3KfOZ&|;0s763zgGdj zuYIeO&uC9X>Z+q`-X7y)6Y&2mUn;s8?O__qWwgim*ea|8+Q&+{7VU{i*;9^(@wwGl z8@10B{S5n)Quj&6!}#6`te4vNif)Ge4N_-@{l*6;_#6Nq+XwvU1MU@g5x+04`{KIL z8<&p5SH))o==Dclf8=%Z#`^{c$ADxhm~%jI%#Y96t6TeZFnGZEN~@7xq!lw%xWh{hSN?=xc4; zZClgNx11Z$vu(F+O+VMtKIqxD({J1Mrk`oad+6Hs+xF^bS=eXWZQi%G?bXk+u+O&J zwp067*k#*m+ozvxVGEzd%=^@=Gx;%ivrXbNjRto{gFC&vS$=_}RYpWsMUWRkUT<%X z#7+GeaZ@F4&p70b10S#Q=J_#6t4@gAJ05xCQLc}-K;ou;jJTgbgHF~8snU!4U)E3`ld0?xtV@WM*nF4 zEjnj!ci^|{(NDJDuE9E@{jSt^*6lEUJIVC~e*0|OVf=O-)(`qy=N#K%{B=FnVU{mF zN4t%`uEF|Ce|@%g8-JbTa|+5$1)rvZoBh16`O|Pc4czMQm8Si59r}fO(~&nFc>}zg z{292Of$M?Zvb4XhM?X++7V>7HzH7V{{v2G-f!-kR*0jG)!e08}dB~fGyusdW{sLSt z!1WMsRoY+I(LU%cLf#_OH`H6L{k6=cBXRB9gJ;@$*hoJtbEz>--!5~fG0wSa&Mywb zM(W8NYK(KPO8bw*IX8{JW}ngzOMf)RIX9*Kjd9LJbjB`#(`;WxgpT=La z59x<(+ihEozh)m&&$ivR)%a`LNB?TuZrf`7HSMFGZM$u&@z>fu=-IZ@U)%N?e@z?d zuWkEnd$mu2eYV}Uo!V!>F56z)KK*Qt{<7_~?b6TY#PfAJu1102JrT)I$H^!aTtf-{efn<} z;F@O=F-SZYrsE_e1Xm^{KOOg=L2xxC^!M_ecSq7HC|O5=-~=UbWkhf~4n(QoYD(6@ zagIsadE3FWAO~lu=#L~N+L!)Lp+NA#h~%fgD<~Cwn3DSw-5q5y?-Vg_jDhr(_+vKP2tE?a<%JpidY-$i(Hf z zGQN$8t82$~wPT4gzR}5IhuZNtd9Rbj-(>wKjkk$(J9g=p#a43Z%VMkHtQ~`toH8*- z?bxE^tcfLR#|$N>Osr5l#v|)7aUJa#j;zPTZM0)FvK|wg(T>ANeG`Atjs#6q-VAf7IE9qT$>tU7Y}bg}8q6pM~F*|F!yVH0=J zj zZf!Di%8oVHJhoi(7;?>H#&Lact=MqQW5S&-9@pvOZ~d#&^PqYs6lNTzj_-%Uj8W9_ z??Yk6ChGXxQ20#owN4jL%Z^9aJpNqsICIV8%4yzP@#C7ujXPP4r_;rBI#Ud%GsSFb z9;>N&oF>l2|4HrTJpG^4UdQ@ApTCvTXQi(^*Hxd1zVcevPxbe0A9b~Vm!;2JPoiu8 zwu$xdw@Ivrzb#@t{Eg6joOkh@fWNQM^}G<=GtlMl9I9}ZM4d#S!{4^BKK^Ef_3^hT zFXJrdO!a(*^OQ5`@I2#8I`;1!^clq)Vh6v!{}lG{HwmVHYH(hy&pB`&-CWo755L1_ z9>0@kd-xqY>#4_g=B9u69jC4*(B*fx)a7@zrhk%w4!?tDef5FPVd$9tIbA)be@>@k z`sZ{y`V0Wy*XuJ8NPQ+ExT-+#9{%nC`RU(v@HYp*HI(Ty5nSst5lHOE)4$6|2(C;@ ze)=~U4T7sFp)Y45l9vA6M1kN0C3Moik%$Q1Q!4rC-#~Ew$0V(S@_g*j{ekbx^qB~x zJ`)jKS0MOcL~!~$-BQ7aDbr^nxYlPPkgy~D{a-@xfu!W8ztd|FTu%voITMkz8b{L7 z--#9oJ`j=o^!J*jg6k<+hwcwatBFb4IopxpW8N38@;hAex%&OU$KWj7%%KXLA!^@n z5?wQg_+2eI&+lAW55Gg@9O8GNW)AVYH(gJl%kR$gJ)vvn5WhpD4!&U0SVKRmOi4$tJZj|p^mo^JZ*boH42Ih~H_pVR5wl*{wo zlbppmf!~4Den)Usf#5w6!PS(=-&-oUh7wrLVsWkg4y2y%f)feBl}W)>l*r%HAh?Fey1q#JCJDKfdavG zl)wigg6k=P50wf&ObIM!vAEWL2NJgJO9)O<0v|{UuA>A#*dVx`5?K6>q}4c*wy!{N zl9F{q1lLirj#9z(l&nMdhosfSB<-B-F#8kkF~Zln;bGpLei*TE!y{ca+*|8+#&wr5 zt{UO3_qiu8nBay-diVHUkX~?|t44Vn{I0m}Hpx}jdK-P79TZ*fhDUpw`~svGd9E7c z`FjY*&r*%60v{5s&^lZ>!%O>AmN>YP`2i*VzX# zxF>kq^>?h{K0OR-?Ozxdj2)8;`eMF zaNTjJtN6Vd_nK@ozgNpgI@`?e(XPZb+swY;y$TRto_)~~>1;Fmg7tSpym|ITC#19e z>z z2luc6W_r*0y^%g_ovUVfm0EuU^k;kD^{+zuh$)|CV(G=@TD!!~EOT z^y5>(P1ZdPb+fMN_rYgF&UnBLQ+F2Xrk|dJxj_ASf&PL(e-ZTiq5g3XxvD?p_($M_ zAt!#r&HTF{LA|uq{2dT-ZxFc0ImS6`=GY*l54lU`80WB=V?&TW0`+i?aSod~HVo;k zmvfA~9S7dB?(wLbbxm9e-wrwX4yQ4{!X9W7Qpif`GxyrfEzUEwK z-?Ci#EHnuz*%s65b(5b;rRe^ zdfZh*AZL6B&jpaPz8C0Wjtm3l-(VdM$-lEY0+N3(btEMJMrtTm&T2k`_w*^8yYwNPyYw0C7tU?=i`K*Z;T&hbaE`NI*njL7_8OiLkI_4bUboH2X zfYa%ibAZ$7q|ZsFNW1E$1ZNggf^(87C;F#mN^rh$5?#|jH9_AcgMO|H`newcoIWQB z`Z*cs)&;ut&^7&&40P%Oo%%rMFmz1+)SRXs(?6%vG5vEo9eoBcIXDx6#GbygKyVc$ z@Scd^YD(a}rGjfHf#pmD*ZNEZ68rJp3Bd_U;L4=nDoWrz4T7sFf#pm@(kdKD+g%_y zLCHEIf~zQ5N2%ayO4h;oACt5SN~E2$9lAeO1!p3VXy1VX!F80t2P1;(DS;1_3O-B; zEN3FP)@LG+uw!3BaFP=EKvHlWCGf!p!S$5DawZ~aHIAh1D-fKdWE~N~b(E~5RB$~d z>(KonX*DrPJ7+sGe9XeY$1Dtd%)-FOEIcuXDi#L5;Uv0d4pjv6D-q1as$ecwV=kIG zR1wU@M4($0=vG75%%Mb}Qx)h`2Rb#-F>~m2^_V$yIvq2IPN$RhF)ITfvoi28D+3?1 z@vy}X$F55+QGV>w$})F8LMq_*u_>imA5TSZa4%DS$m#Q5 z5q3<7xb~iVf$O*zsHuo5_+Ag4*5^I9v*Yy0yJ%XOYBlxJi*{|g^rF1!ti!!fxw#iy z1Noav%1e+J zL!N23>vVd@>6+tw+qw81Hs$d|t!nXSLU+u0>?lqZYrS!)aXNoo@wgp3Tkoqaa}L11*YWu|`r;R7*B9(Zl(`6HE>2yH zdQ&-*Qj<`xbNu2-lh7A+sP7GYeu>YU_#DLNEqwkKpI_BZNlnqVOifMIwgAI+<>2pJ zpSCZk&$fCgPnXkuF?Fi+h3D;HdXTT{o0giU^`<4J?I=(8#arpVn4X%h^QRY2M_<&T z?eg1p1s>)X_Rn+iu6jP+Rd*C{ZvQ;;Ge1<|KMG#9E-c79iu~5d-~K4@9}Dw-xg8Su zt$r!*V#l#?=HHO#=g)10&qaQYbNRcWH(DvD72h4gp7GUCez14U=RQfj5z6P^Qd4h* z^0`k^zYgUGzv0Q}Z+O)Ep?v;^NBw;$pTAjAzYXOFzv0Q}Z&uXDp?v;rG4;Dp{->0` z59J5H;mL0z{w$P#obp&GzXAKYhM$F;-$H)$i;(jX_U za2|Q-I|dh!bKf(#h`e%!!6oFvG=s~?tLGTZAs5dx_yzgJ}*XJ3`BbTo+SU|4) z!QfZq>Yog*A|0H^=o<27k>+Ldo+tkRHLBUUe6VKk3T~?hUuk{2a07K(`)00^o~xoY z-keWaE&u&b?F{XoV72s})A^EK2a@aPoMf(!4%gA)IyziO=QMM5bhwTV*U{lRIv1I% zqr-J{P{%1e->G8^en6f*&fqL^<_iYX$k_se86@fWBa(ESMUsx^kfbB&OgfUzq$BA} zI+D($Bk4>!lFpq(wPcxp+jrYsmny4Nt_?yXJojQV_B{tpXZVAxTaX@o=0nbM}M@AkmuP8hMlLZN8 zj|cqt3?cNMS&<~riA;hDBCab~NvxEhiiqn7-bcjs1IviGZXhG#dVyPs+a=gWd{}~s z-^TEKy!V81nEdL{=idEy{0R@*C+=!!zNjoslq`I=|^2gl%4U=!4 z`nlrd^;0olzV0*o<8NgppVUn&msfUl1VUOzcW0~Cu`jAc1HrHs*z=;M?$hdn=^<4M zHiR-m0WCK)tQmSR$lFX(p#n5xfK|PaD>{KRUB(01xkTtZvel2a~5x#u#M4P#i@$Da@jwW=Qkgz0*W_#Mj{3T~qP}2I_RD_XlS$hi$TulUB4sBBW3rJ= zE4z2LrIr5v1Lr-83~xNkQ#0bs-|g#8iJ}3 z_s5>z)w-vxvpw3Rq9GUx1S0jphQ>&+L2V4hBYI;bVi;D)3|oe2>M=ELJ%!TUCb{iZ zRt2N4dZG$9+Ixsq0#44qDbc!y+wscqHE;J8Q7UJ$wgs{|oXHQTK~7`_<)ofZ0{=^P zI|p(uYwETRvTY67uD6~}8ECfpb6IUbH%-(W?N)3sq4lRSRO79@=GK7vrL7DG<8f>~ zhD@T}%Xx%<)&APc?Yn#0yR?qIJ#BYigWP9=7Tw>|+WitqcGrp816u>Hxjcys=Q~xR zz@r#%!b-8!WFkzAYr#(LNaLE0C8`tSy&7%H#bQ>(i*YUZ1dkWxBA%OZjSd>$>&F+@ z0YM*k6!GHx1?#Y!ZHO1w9lQaZg1u`5x>aC>mrY@p$6WNN)C_nt9s|Z z!Ntvr_|i^V8o!^%`%5aQk9eM=C3X=nn7U2;F&=;94)HlG7j3uEbyt&jgkEl>eoB5q pKhb~RK%3BxySn&<$Kw~?O-Y=$Fdo9p-^D-woagzTaO5nF{|nf$5-R`z literal 0 HcmV?d00001 diff --git a/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_brd_rxy_sink.co b/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_brd_rxy_sink.co deleted file mode 100755 index 290f9cf99b43cc5d8af43be0ce94808d7c2c9dc1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 58040 zcmeHw3w#vS+5YV8>=Mu_M$`xq0!BnGNeCfag#`i%gUB6GYZ6F+NJt-Nq5ZCM_RI@$VEHyN{#kM*7EAL|mP(_sF!k?))@leN^% z?1JR)<6IKtgd4K1n>o)gHawHNB5zq~Zp!lflA`>=();dxJSDeeY3Vyg-|A;lau=7a z&MnN&UA#EIv~*8Ner{g&9VG>2`8bo8SAw+c(v`W3^S`vTuy|2!;o~VqxhwJ?Us+a? zed7};#Y>iy=9d|dr<4|~&yRa5Wle73>iq2G1x0yJOZDtUtCuXvFKJuNlcJj9qQZ3@ z)-p@i(*5JC#n!bXeVnz-*R`abaxEu0Fi&t`rq7wID++X2(MCC<^`(n*3v)}dm*|`` zWlqaIsP&81Ws4!!iDm0n=6|tZSh_0Ysa4sf`Kz)E^NX@`bvZewQO+}^WhDi9`Ps6# zXi+Oqqo}8&MU^dWHR|QiX%zM4Xi>{JNup-GyzMj!Yls%MD7S3!GFNGJr%~GDOIPK( zifA~EB6OcG7em62C#O-E?y2S3ZF}n7({NNh)BQAxdU}=W=DdOxZnKh3qpZDa@H~A!1$ME1sWx{Jr#)?xmTzd95AwZim*N-k6Bm*ikXH9sZbV6T>oR>Wt`>+AF5f zVO7tiEG;Qsy%Mu&>5BZKvg{=VYxDE6d9nK6{MGSa;))oT!cm3X!h)qmabG(oompNn zyYEj~k-Ii~NnvhT_8r9~%T?h#XTca=8NWGh;*_D8*;8(~enR$?1zFiynHkwb=U$hc zu^@X``iiv~+3CYGi`EX!UR08oU9xr^Y$;lvZBL#(WAgOuDf4Db`ew??l7cn4W%*H) zT5UF-N-15svLt(P@#>&!x0>&dQQv{;nu4 z$zD;Mm+yI6{Sqldr{|Y_18C{uq9G9c;3vwEAxoF69hyEoP5)$BlV4I=P+YX#qksR% zpFPL?=kKj}FMj>#-;YWo0C^FA@u}Z`#QdYa{`3?n6Hr5Ukyg9rrNE&T@4f0pIzR1I z&Ai6H*YW>mc8nWsIOb3Mnl`#?qxJ9VWAqH;@P&pk-Z=k26?EeC56E`(&ZVTTflz}d z;@J|3!0J@vJ^cT`7-GDK;s3x$G2Tmp3_+$*;(P!y;5FWxN!;CdFNgRdK{a?^2?i!;k?;5bA>ikli6em!m$&0Q6=OL+0>2 zH}y6iZSwd-&j~(WSJfCcsvFIa z#de=BWOpT#luT1HpyW&?bCg`EWJt+vO4cdapyUAqcNu=r)H5#hoTOT)$uO#$!iJH# z+St%o8i^BQP#Woae|L0<*QjcYL%L;bYK*J2s)nL3U}Id@>#Ul%DZU!ZXN9bcj8JI! z@X8FMrg6Bjv(bBfPEC$V!<`pVzBl5p2_fS|e^rGQvF_pVWXnR@#>U~1;ixm>u^!B< z8lE|Pt95tgM&E-Ysxn7pZuQ+gV&jOW5nDz?Mr<_#oYTDy8ht#FgAbWo45P~Mr+ba_ z4to0{t&26ytg~vY$ySYNTy@awiaNGgH378m2yunS2nEzJ&V4&1kB=bT_;z(u$n-nm z3pX{DAMkxQZdoY6{^9ZYJWj%W8I%(;{}I$W2GWlx9gD}%shrTU&UY$1*3tcT!TFXG zx}d7UTwpm&i&$3oqmkf^=7pByMITs}i7^^-$n0Xu_TxBCq@jPVz(D!CO|P)xrd`mj zru>3#{uwO8>}r|5F5S!u%P-Q=D@`V_7t-h3gmOboU8h?$S+*MAkt0K)QKKrm-iWjr zDsA-WP-x7UN^=*q46e zXyfR>Wq(0eGOICC9kc&mz`2m|Q+$5bST}86P2COiYa-JY)EF4D$PG6MiH2;6gqGGC zX&xi8v{%T$q(|CDE8?p$;!!5JhdFBamM#tXd-V!c7`ss3BlvuwF=B)pQB7#6z;#G_ z6racN`652M@p&AdFX8h9HTs46ZM`ybr8oyE(vl;|LMOQ{ zc`G?S4dwVEKB3b;(qEkG@9V#HM|@plQX~oK29%@iO^>9DbLqZx*t@rJaAdHs7gE?Oam1tW1?Uqo=R?NZu=kxtj#bF`C5}Je zIA+iO@eyNNWB2sAYa+&djkVu+=V-*(4*zog`4Qv(M$2b-BE}B15gx&)ZKS9(eKmEa zzh;u}!HP-#hbks*x^3fan{KO`WW0%XzSVdkc=Z|6axTQ&@Xqr)5p!%X5(!2^W}3sf zA2O31&U?xOnDdm{M$`8b$Mn7Cczxe?%z2hb|ANozZu-U!Vcsuz2M3v#l6fh0-q7d+ z!O-aM;`2j%UdHDq`1}l?H}H8UDC!>Ffa87m?8oPMeA?te*`W?}gv}l3Xq&Xuev3G8WrhN3A01iwtiLfq0^H(b6a&r`v!)_L-~kh`-J*b_KQ4LfuiU$HZP4>6-F|B-Xo1K%m_6`B0Ekl z&-Jl+E>F0;V$Om&6(?i=RYu*()p3UH@)$2Qwf2GZABNHL_9KQ-mn7xxfRyz)Qtq7x zZcqnbBem3l+(jM8I_f~~-ZQUboyeY!b&Lpf;)Oj8NmA|$NVz{p%IAeWI7Yqo)PdYf z9modiK<*Rvbf6>b=|IOY4xsJdZ=zj^#-f+&E;4(1&>zV-uElY^r@E;J(>-R-C+nq7 zEsm`x<#D~IB++L{{W{O~#JxVLU+?CUj;$Ez?=%oY|@z93PpZ(oyC|M_S9D z|BqkD(3&1O+L})L2gA+=Wq$^a>y`a%OM|kXI`zta9`E;jMc7aM2G484 ze(LW>TWG&16FRh?=`54>i*l6xB3;?9%cuRa-tEGET@LNn>9k*`bFN=?vU9yKcHVaA zA3|1z+c%ubJWoE^d42|MGTWQu$!d;ExjP_beU6lJj^h|}r#4B-UDN@tqYk8;;~nUT zIn{xVnB$phj!U^OAm#oXDdil;G3u>Pl5#I~fE%a-Dd%_xI>Md~boBfl3;yc)orPmP zzsY?)zp10=H;?N*e;4zE`gNZ8D>%Q!+`?Sp{19bAhx3E!dVY&?)cg?XYJTYQIX`5* zuZj7g%i;Xc>6{-roqakTdW`VF4h!hY)SM_XvW zC=)ugpXqvji*l6xB3;?9%cuRa-uNr%pL992U#HW4o!<7le8AxH^7#Yz+&w5Xs4^w; zjQB3c_4MXm{4O8B`TgWE-{mLFGcz_%&xD>8b8nhkaWdB#?(gyw)^UdI!u1Wlo2r{` zxy@>7uy`FL@uXX-Dynepx5={N%?jTZ-$s9x$LN2^{0!HUTy@AYO(SB)@p>%6>%I?p zZi2^{5a97uhdzYfWcQ+rwx;Ve7>e%x?+|*juOFwKa>&Gu0t84tY zy2&yo{4CVXG!hNZL2FdJL0kcR1@Rb(D}j3xUn}tj;6B9TC9VQaBAy`eM&Q20lO(PN z?ngXX;=6&bB)%@*n2fdy8;*GKv(*`ZukHiBirAKT zJ8*yE8zjCT_-f)CCEfu%fOwX~4**|7JV)Xh;DN;RB;E-;hw^afzf=}lRxACTsrS7*|Om+`$o zAJ>`vaQ@5HP52&ja2WtD9dTv}+2YI=@~AUMNFz8`NHaK3NGmvBNPlnvq;Y;Lr!qLk z1#-;Eso})SB_^jbiC0KWPK_WgmYAFxNxVv8a%vQDsl?>eXyVlplT%}e?}+7846iZ1 zF}wzj;Wcm!uYqHD4IIO3;22&5YhJrK)mb*UIn`Me`!QD=jY zMzBgqGq_PmD_AY0KX^B!n^R*s#_QymlT+6c-zhOUHIDc$iOH$)#6gM4sQ__>#N^Zj z;tdj$Qxl0dN=!~oBECD8Q!%{8n8)xMIEL52F}wzj;Wcm!uYqHD4XkE=`x$M_yO=H%35;w=)B zQ&WgPCows79r5QSCa10^zE5IuYAW&l5|dNYh#!!coU(~`#&Rl#*BJ8{UIWMQ8aRg6 zz%jfAj^QMR>Hrw#~?9C2O{vc>tfkVl>G2x$brE2J6xo{(1X z`$GDIKY(;|YC6aGK{@8+)D6TBOH5A9Abvz*a_UCnu*BrlOyWl+CZ}c*e^Fv`YBurX z5|dMNh@Xh%R1B{%<}thmj^QcnuuGYv34O18ZKpIn`MWp&QFCjf@hijOA}x8zINu zdBm!Q%-Yh^w-s{w!=rtlgPi-+%|7PuGvV((=D&K@KYYxentQ~@{3BMy`H{+WtS4mtUXx!F^$yB^OZ4@ajsFmRY(F0F=QFrZdK>wC z;gjA*F)#6afxImL=!h8Ss`GmJxAywi;MTNTUoY>Qh4n^p2fu(XgRszDALV)3pWYHY zW&W!3{&z6@Os3uXdKvN^LB1N~+bPm-j&kQz^Xc(8)qELfUpRIAkj9Mark$y&L&_V0 zy(z(?<-m{zo>hd*3}+AKZOBY^=3(3^Bj7kCpTC5?V}ope813k3XJCDoJhqGhFQZQq zdU`XRo5s z%ZabX?dlEL8!`zp39=t#zgN5ON`_1ZPP9il{c+qM$5+^+odGx=faBiwSZ5%P2ST@x zJ6!9mna=ztW!KWZ*ah>B;u>P9}~saeS3M z%^8W~kvQ&ePj^P+cr^5{wr4nFaXc331MHa&`>_Xbk5`++mqK0&nE;sp*%PwotB1p~ zea+z>_5eq=u{nH+JIefXDuG))oF|M+`hr_+> zOx51zF#AWgw>ixIk?m~`vwviJ4~N-5vc1h=_7BJQT(+NlJD2U(b6^~}G7g-&#-8m2 za2&w#Kzpt;5yun3yFvDRCkw|}NFQw9I|ZFU?`jFF9YD!<=7oz8ns7e#!aL9OnFz?Q9Nne#v$= zhdIAwI}e9Bzhpa`!<=8Toy}qPi)?3enEfK#c{t2|k?m{_vtML8o5SoE+0N!L`$e|% zaG3o<&Yz2Rlk4ZA-EE(%f4WAln#;GFSMEpdOh0Cx<0ucAP(!-ZpGRSu5Hn4 zp}3BL>)7&;&NV3p*0C8k%QZQ8V)}C8wPxZ-d!>^k)Aj|V+@ENyIFvZXv_{z_PCfE- z-3Zsl7+Cw3BXyp?0y@3No7QN%%+YH&ER>lv*-RW`uW_D4KH4MJ(ZIHOSSxn~=X(q> z6X`E4_z0fI+PN#{VQmWSLci7bK)F2znHK#h?LyjK+6CEg3C>-bYFhNGKICV+u-@*9 zd03A_yP&hL1$870Hxub^-D^rNMc#yArbXYo7kS0n8LWlbgY_@EZnu4H>vr4qG{#Nz zHEiF5H8Z+yw|!y_%UZb}#ck7U4W2b*O}I}_Ig~KwvLy9bv=IcWAY7V;yAn9NrGOoZCc}P$C)4H z!|W>ztoiASbNz2Lt$-bJ$f*H7GjW1l=`2v?)uE3JtOx1`y#aGfYocA{kS7DXnu(L_ zYUilCJY0WY3B7>}Oe@RYB)*A>u}xg^U$uND2E*8_a^zD0A2Ey-<_A^ znEWKaIj%|XdlSj$o=9i=$y@eQzYo2Fuf34YzF@!b`^(K2w+-$(|5NqR51h&R_&cT$ z9OOE(w!Wf_b5i??Lg@#|g~6XO6DQl7og|sIJ0N9!GWu=sEv7Za-YR?r=Oovm311;~ z_L3JV;KghWR?x-tOr2`u)*&X^Ty3n!Q8#4n2N)t-tmm>EMlR*EsHV z{Z}Jz`cl)HZr2KbB001D3TsLT_ZLY0b8GI}!LET^{`jeWUhK^d}tS zf#4wh6n&-kOB}nw;Fg#VNZ0%)Z&JV;@|=EA^PA0osR&nI@2*+|X#&VARB_~Oy?F&e`e-Qd?^c|)(*RFSXJyV6e`g$fu>O9Xe z9*Z%aXYX+aLx1d@rZwN*D|`~?KCfTq;rhjZKIUpY+n$cGA~#td()O}G$Oe7~X?{M5 zF`})!j_Km^O&J(Z@{_)a=dngu_$JzeGWO9gYTjm`uFO@Er}Ra%AM1#F@4OA@bbaKDw#i*L{g3Et*cZXI%Bj{j8)L=#^z}+d_0jK${^EBCu206ou5lG+ z;sW~_X9)UaT+p;`vK#Q7tjgtg1$@jn=uN!GOk8Mx#Yur)V1sGhZ0{4kMfX2_N&tFO z?lTi_vHwe4M@$1}IQG*q_PjQcWBr=AHeuZ}*5Mika^_uTqQ2JWwTbQ%`Vp2n8#dDK z%!S_}7w4;TZ$i1eMv0aSf5LKaM!Bi5bK>2m#d}ozKGXh(*D1Pu`jt%RkA^<|5B&;b zljuLS9~z1C>?3lYyqpMLay+xZF>+^00j_Z%uU~=db;xPp5`EBg_#o=fQ2H~K{%q*e z$8yfl$GG|Dw$p9bnX-p|=WMcvzGy4>Ew6Z|j$<4B6x)axgWmL0 z)MMRl``mWWPl<7Wt@KlYN-}5H^hWJHq;_oP5@g~xb={}<$ zOYo)nn|bb4Z{i=B=b$(7&%|%ipSa3~5Bm$#f9_4B@8b6*`A+{#zSIAbr}SgGeEKo^ zJNAv>3VacL8RrW7hjWGfLyogQ*pKXwkKTX3%NwXG{#kG0TaYbZ^CrGS{G2!O7nI-f zCU$?nub*v=S8Z)kZH-rLjYnJQ$J%oBcy_n7^Gw-D-*&d!NPjmL&);tS=6c_?ka;gX zn0eTfWJVe1!YA|J zbD7M4-&HdIgCk`A#RvZ`>Mec$h(&uhTymMH=l}#0!Z0qY5^Dlk%EsOb!ewZ);(D_dwcd>J zUR-b1qSl)+-izzaTGV6CaeA@m|Y`54G}vaZco9i{d2Xy~xQH#Yx6{k&`WolZ^Kw zCtDOJ8Sh0-wkS?A-iw@UQJiGF7dhFYILUY~as7p0 zxr+C4RJ>Qnk?~%Yj*R!Ja%8+$wd0QW8jH1^h3H%BO*!7z0F~0IwwemBbT(R}ue@#1nx_h<_{bB;Zow-?y$M9gDS{Zcbkd?B?`1U^l171G_mL z0CsbF0Bpt=E!vrVV;6X=mCX*C(%4@m}jyyjQu3_i|LcSICj^UX^&B z#C6P7xaUk+jr+;1>yyc;GLG>d1U@p%%HQ`N+uk(ivioA^SB$*E1m7fDP`-9vn_#N^aw z;!7kZr?wDZ7R#v^USrH-cnuuGYv34O1IO?hIEL52F}w!WymoV{vutp4ss7p0xr+C4RJ>Qnk?~%Y&Ozm;WV~0kJ+lT!~850jXjdWd+q#N^bN!k6~k+c zc?_?CV|Wc5!)xFeUIWMQ8aRg6z?#=?PIZ$FaUT>6`R9@c9 z=QN`p>tpl~&(#`uZpU%Nc}oWoZHm@>Y=S@V;$?!d~QHJg3le)qxrm_ebU~$V?$QOGvGG) zLEHJB$P>LEQj9yw;`?mLecc}SyKN2d=2paV+={r4+aPa){80HW)?t97IY`ez`Xb0h zi22Ba%mcpF-tSOX*F)VUIJX4nK5Ku&S%%|fIKIt(UaVo$c9WC0R4=l z-w{51;bVQ0q|WnYI9G;zdG-&TH8@^_<9z!^QJ&W>T+~PSVd&Ej%b1wv@OB4`>^9?dKra3I*Vh)FA>G=p-#hM5BblK0%VXhC;{IA9QApdh@dHduTG>5r9O!hnK z#QHGJ^*YR1!S&od`YmvNE&6>e#%PKCva=q?>oKlN?f-V3QCwwv3~;<0=gOhC%>Id3mn}Ja zILvij{4NmuVV^@+^0+z7bzYLU&0((V`Ve};2cyh+;I!m1#un?m^x7=yM9YGXxAS=I za<}7n_R9HotbJ~K+;;H0@NBRn`aM?wzTiF3kH7~z;DQ4#6xgpJHbA89=DVVCu0Diw zA)LG2{+aL%`wJfH!}mwy+}=u@s|3H6+pjxSIIeFYT5F=MK@o=NiYaG3s%zCg!W zNp3cW>FZ?QG>6G`**DE$a$EMr;V`)@`=U8aF3YwzhuN31?ag8KrEKfrF#C=^#?3dk zoo<^xZae5>&IUWa`Wftbg7HY?H3-+?sdxSB__buD>-F9S-sKKo(!lk3w|>h^Tx%b8 z_8u>?$c%>InT9~JkNK(i}*G5j`_JLm+St>ecFY2AAN8AJe=d&Kh6cN zvt+yYzIYM8CUr&p+D%_G6FDc`F)8zrm+J*NKNyq3Hu3%PB7RNP&2@R&zP5F{?c!QX z(buqD#H6UY-S)ZP!OS(r+9t-Oux`B`+&IO$x!zLrE5=#GueDRRyFSLFi2Bre%iGWo zIo~m@a{B`(8^?=&U|M(CN5ndDJzhHYB<~e7(Xm^c9O&h}gcqpUM@77eYf@LluN8q8ciR^P zb*yVX@)iBkv^Lq@gN${hJ#FLH^01zo+@!DIdG50ye1&>VXk+ck)=gg_`c?aicIwtKZ0>sLGeo`c8SWUi zm6)^l*zrLf!?pzdxAK2XYqQ-W$aUI!4BN)Am4G{2>`R007`COzTk?C;+G;1@{jr)i z+Y{X}Y-Qlg=j@)rWza4AlWE;+Cknr!`%}lTt$|&iw|fT*ptt6)rnSvZ5`IN;aeJbU zVOtA3?z8&^Z-?I6|1+)ac5;w0Y`Q%96#7T)r|47I*SCX*^i}ki+Bb3h3c)YV3HnDp z2gscwaEDx{Z`2%TIV({P`;Wd*_ZxkUn{V#=KkmBecRH(X`k=Akwzyuw-V^XM4g6kM zkFmVp?jPiS7D$Wkd9fUQw*H@{wZk3|WV}cg^6G0E?sWlO5yMuFF@C@v7%YT-`A4Qz zV-FU7iSs`i!>0DSVB1SDR^%q@6W1D2Ht>5$^OJERw6$#vTN%ca{G@;4dG2!|;zVQ_ z`{)}rZ_7~En!ib&(jU=&?u8*@*iO+N#)b$#4SyzL*i?Pwi?+#KH+_)kYuG1Z*iNxN z#)Pmwea)ix7CByh^gE)z_&tIcwsP3zc)W=_?UdjO^a&QQS+#Z=-tDByRo5mC^eVf0 z6CbqGgGJB_dA-&{c82gRy8n4?5`td!Mc%}R?M!hku^F7(|8f3G2T1 zLtMu|Zu?hk44du~`Vp479X9eBW(WKZxmcshtwp)KPKlO_>lBvz5Xvovot0g@B8H9M zY1;qrT1A&nzp@7U>!DBoqwky0e`-Ip7U$VV30|^)tH3dGr}_`L&Vk(Yzqn?H z+zc+!2W^E9qW--~f1A?Z4t@Gq&Kdd`H~-vry6rks_R#N~P4>_iO$Wcl_0UZD++ij9 zuIfCmwFf(Fa6h z*r3m65d5yA-{N-}=ScwRd`^%f&&S5FVT|dUXuCUxjbln*qhr{3o_p8`-z3WuF>L%E zGbPTYGu&4$2jW?(M^VQspbZ zwFc?iuJl@WKyJTU<%6$T3%p~H*W$I29wYj%bx5yC^;+v8YlnHQJ0Tw$;f=3_*n`Y~NT{TTfn`$ljDzKFhzbA|oGxx)S- z$Jrn3NA|~mGKQ@!SC40RTRYE`jr472yN&dB<#@*Y>APb;eRpgbcyKnqA6H&0e6rsG ztmk9f_r{*HxkPc}-f#r|~9GSv@=Yq|veawhgo(=khoVe7#s8&HK9l=}>GiRlX@g!L+nF}VHP@f+Sr*-k51fqQHE;~Cfn#_LtaH&IoucWiKT zsJ@%7~OeXYEn zN_@Y>oLkd~ACQ=H%O>6#Z)i@*7`9LMETfN@$-a5CwJ&E8f3dYMXA?i(+Lv>PpNRES z)0y_5)S;icf%svG>8EB8KO!;x)Q!YpiT@oN+0_rdk*VI)%wK5Bo)K<++%%= zJQ+8^`_jA*Ml50`WV(o9y9=>X+&@Xj{M0^&b*qeFle)*mu$6!0ieW>&(Rd%|Gj@Xa zKHYmD>2=PvPs(-90qEhm(O&Mwlq2&rP$Eslu<87Iy>s1@YQ6Ju$~RPbNvbTCtJgZ$ zKB?j!+9_Yh+~{@Ab+AvwJRGNqA_eJv%MI$vOmcfwx~XyjfmI;l-XVk8})g=SL;V(*l07>F}ID8qy8~5 zY$&_K7&er5S}|-Wo6ozBiD7&3SWfSM5u9GzP7E8;#B*eME;fekMa5^S({T(N>Wy+5 zdHDP|CWZ~17cp!&2hPUEuu1*89C_~eF>IKdVhz8m{AdiDDmO`$6%)gTxhdj!j#Ivl z`O=(jfUSbl?bH)c_0U$dF*b$`^$0$9P)~HtKKrD-7`BL2fiF!X8pnomPb!WL<#iOt zmWXxr?)}4}acuNo;B^wtA8$XgXdD~rp>BWZwzt0+*RhG%0G5g8djp}@-hN}$6R`#? z4|*xk)BBH++hTsciE~?}KTgAWy(bydgin^|_NU{V-lI&!vAu~nHmUP`2F|s&XIa#D zzX_kgcw*?&7x#d^dyg`v)A#1c^9>0&@7}Xa$E?)BRvoh<`&sT|7WL(rBO+!+mbb4r z%IS?bxwiY8MSVEtt>F7{>@t*HNyo7(>d!Hk1^-pOZrj{8X#Us1PQm}<)Z0;iMEyJJ z74xBkdi8v$r|*JKABlcxZ!fuM92{Yxn%Tp z+kN7qacn4)acl#iHvoE!=RGEljb%Yc^7+{Psy??(?z*)v zXn=hpj_nld(>|a9^$8!)L4Dc>?5Dqn514|!YH!az?E}~j`haQBYi|!f>IsgpJm}4U zUVD4`MZf=89_IFJ=(V@UU-Y|=`$FaHN*4*)F?zpt$#XoWVaC`Ai$BS<|UOZFV_$98B(=lc47&3RP z7}xLW_^>k>2gG&3XS03e<=JeXxUT_!W>4a?E7GVb^o zmeXD=49jUR_J!rN7wf`!I<6_#aZBzvCHAw9M{>skk()Xe>P*I_kelao#sVyosX zxuauk+_5#}PJ1ylrzJ6njDawy_rErHXQTTPshFYmVmCU959lZ!;6&^HJ6i95qCINdd(yOx zBcXqf?tP?UiFAyJI|k!yU%Sq^^GR(R=hY{*ZTwxvc%famZ$RmerAm@m>@r9(*__G3WlP_|c&um{gTU{3>|%ebF4@%aq!^SoZ-vlQN+ zq(nJ;b0m&F2O=L0r9(-bB&9w6-6 z)+O#=;hZ1$MtIEURMg{hrv-Rdm$(o1q|(E=dc-v{jn90jUyn3>f1A&nM0rU25Wm&i z=H>I2wrSeFh8JbMe8!>etAA0J$8*}gdex2wv_sohuiC-;{o1|;m9`&g+CI0Qw$GiW z?Q^GzF~xHL!7)g|F^TK>i~?A4499|Fkm&ne`ndunu;iH1F_aGTBTpSAb;Ot|oq*EG zQ96{=5%%Cdtl$`=;F!ewdH*=dF&qnyK_XxDo_CJK`vk|7j-hmzA9)%mkw(Kc zj?$r|P7KH1R2;*-Ej@;{xPKxzfpdBcYt>lSVXXBS)~d1QJ!?INbt;Ybll2(7_4F9J z)ASg+)1n;vgW?$O8*2M_Uy%0k{+_mv_wqOgc#lro$NN-559hRfyeGyq-s{r#x%ISt z?lf(mJFTBpcfxxSdQbR7#J=f0;IF{3-s_#$-FlDr-bin6FL7Rjx3_mV_r%rvE%RDM z?-kw;>3ZK?maq2)zY^(s&t3X5UIXa;<9pzo-Y5PN9P54Id0nUXf4>yzde8UEaIE(c z=QXz8H#`C9|EWF0&u05L-_K_I^ffKHqxTw5#@Oh+mB|;qhxk=U*Ly3IFM4n4{z%t* zE3=$*#JzKWU+#sf_w!{r8HkVP-n|2muJ`C=IhkFL-AkI~j6^wY_m0;0B9alW&OORG zU-Vw&>}S3AIOmS$Jh`d&73bX1oF^YY?tP|N$GKoDd3vtcsyR#U=)J24VO;fI)#Ofl zds2@@dV6~!k3)KUdmsmpe!M-8S(bZ`W9pu0FJtPr-Mg5))O%sm=jyq_acFN(=?tXn z{j50`^}ffMNZ0!ub1t5#{f*CM`^oon*?v6-(lFjxTUz%_j=2}HPVT|o-d@PuC-y}9 z?PlWqiT2k$(SExBse2Lpo^bEWO#dFe7f~Sh=GJ>IyZ1DvFL3Xze75aZ%j@z_YTG!k zKB;XJzss<%wfLO~DSjtPT+e;BqrVezjNe$bZTHO9dt!^El)&I8K7a8$5$%ZnPE_sS`}4JZd?!Bh z@?H1ZKE7{Wl!r8JpF2(4=S~x2iudXZjzJ2JNnFo&{zo~6W5F>T8GmYEjpl`x<242! zvhXG&L~tH5yO@UA#hPZ~I7g%nK4fk&j2wTr=@nMow8`CS$|ra8&tMs5SIhKu>1I|~ zev$5B8n74A=R1IMLrpm|b85QIwrb*L`D!em6|%;T4TY|~w$iM#YOI;~Utyonh@4_n03Ayzt11?SNUuF7WBuB3x&py$991s+-Iq7GL2pbjXoaZ5#zjr zX2gU2gbeK8W~H0NeLaRbW{e#%su~R}`nK@8KWp?D`v7nNd9BfdeGt>4!X#2=G=E zZ}bcG+uF9TCi<%@{B{0&&}*Bi&wN)#t`zzF+pH?9U%#!88?7s^+{$!y9_4K_k|W7N zFS#yx>rB`lLO)or!x!-hz5bE@;(UK!|E)W^*ELoc_0V||pFL>vQ^&Sh_YZW&L1%nq zJj#i921N#;9OLNtL4(lty~zJGKF{FuWqca&c^03q;PchSA(0`X?vzN1s2dozT1HAJ z<%DggKHHWWNfr4-+f!0x+f!4wGF_b)<)uZ^gkD-*+SaOQ+n;KVaNSrr} zA2h}Ni0ecyJs5rwy%Ja56#mQwab2+IPnTB>qYKu(=AG}xnN9-s(n%OzWcj`0@LuJ( zgW+C=k#M8mNthRhPdCSd^WN{oS=R8l?!MvOy7OAydcm9UYs&9=6MjSaqBr4pl&^Rb znkirPCj61|pf};qly7cHGT{E$3tG@caVR6+iUzD@-M$++$8TiTzYUP+BbOu_6Pb`q-z9rN6@|q_MFLU zapV5ZyvgVrw4wV|GpwWDl%R+yKpVT?_>W-sc^}|&B={lDfApQ^E-hDmaM8c6I&#q) zu(jn`Z^B!YU-KrsL;0LH;TM$OVjCb;AGD}GXiDB|OaMl1BO4kGyLbNtC}ajQ0=Av=*xhf$@2lQVT~Sn)y0Un2_VU8)75ODg z)p6>If{e7Z)FmsHclBSGqb1MaQ%erDGRc)vobTX zht9n&J7Yoiu=EvcGqTf%XBMp;n!TtbFS}&zy6n<|qU9;2#&n4MlF>;MZpccSK0PZb zeOO9bN?Ou@Wo2b6OGl@sE-ffqwt7*@;^Gylvu~WVBDJt^&59u_ONwvLUtE@glS#uy zEgrdOamMgrxx+JtjT(_ZeCW_6dATDNFG_q@^zzK5|L=i2Pxr7G&8u%L(~Mvt>a3FyTY z`9;W#ddhMaK`ukOJd)Bl^+C+GC(oWSd3yGgc{3*co7q#A=NZ{q3ujEYVd^B^Ij1i< z4j2W))J5n6CEqz;pO8||E1k-`oU1yUp1)cmyiojc{<5w(38V9x)AJvZNU_eZ=dYH@ zN?)JX`Bk!k-|(2nt@Z2qu4R%^)cLjjTK2(tuPeXyFIrZr{93RL?!T)6n5N6uywx(` zR>pB#oQgE&*Zwa$A8^Bff1w{q8U5eJfZ;g7yleyi^gIi_E%8bzL?7``_n(?xcvpx_ z)A~o-L5@lzPpPN-Pk%41N1EuwZD^f0yK)&qy2~H z3EzI#UYBpJ?_J;7Yn`*c9h)?LvKtB&DaZT^JHK*5!8Juua9wnvNu3&D+Hj{m{_o&? z-pRs!mQCf8{DC|aR3@^0kj`XjGe}AkMI8(jiN^FlmV*k8bFPMt)1#FP2Mvm}%28LR zQ*gf^U9JtPan24df^sh42Pr4zrGK-2TUn<2_W|1rv~jLNIo+P<^8qE>((O@U{%a+F z6ZA`@jD0&~{U_YZh8%ar#LH*Sb(}3vmaZyWwyrdHMR~>Q@|ElEzVqSS(u$?)-gbIy zdos6lapn5bmBpot7niSF_gF=FX<6}270WBjai^@T0%^tT)|M_V|Ju@(YZjHRd^mS? z>8kRF*H%^(U-?Mxnk7rtl~+0s=dN46sl5H;xf@DXt}icMv3zye6DE7{qV-FblvlKF z=26|un$;_BZnKrEq?HbT%~m{VCHt@0$~URQo&@yhbm#ideD$!XN{ zH0qL(T2b6;q<(T5mB~o0C~iGczdDWD9$vGy z(i*8Fr&Gnsb>%HpycBXyqmBbBHk>$BUJ7+MjjEowF&O5u<*RJ9vQML~0~?IRT2+;I zI#u0hs#?81O{=2Qs7k15hAc(RmqIg7qcY)U&{M0nlGCV7$ho04IbRB`J&o$5r)$zD z;Y*?TX;daXUDIk7z7*PZ8nsDJ*PPhX+7DCR={*Y6@pJ8}+2uFN$I45gdeiF1{?wtH z^3s|*)Mit`2jEkISEq+acYQB~e)88JrN?EIW|o$<4AigM)c*8rqFduYrL}hCuhg10 z%j~H$Qcr3xxlWr+J(Ih%V$J%sm`zJpm9MTWUb1{+d08di18?#%22v; z`O?+xzj;DBi@fA^IGDStbYt<7m8F%%H?65y5me553d-=w_>J-7Cl4+xo_xiWamAD8 zPb{8TSWrB8&gI1g^NWY%ui98poIkX1^~S-)i%ZuPFRCajuGn}p^jW>4*qbzK#-!=R zljqKu@SWVX70Wl2R+gvKYSG(yJa^sFwH3vS*Q{S%dBY>Q>uz4PXwAwuCVli)b61od z%w4~F`I0pit90**Te=teM(+A`_1v3EHU%=64#N88WdFGC>KMi)vFl(=66Srm5u zG?Tc4^QRKxbDck}g>*VPe~L35co*a$$Xa6LucyTQLrjPK38ZtL^QT`?qMjp=MUWuP zyN4hISt2l<);`7U>4e$d#Zef$|i(RicdY->z7PT_iIOT)Tkdu_XQ$usWl zfPoA~Y$hM~^=4dE#z zdrE>d$1(MXlUaM>C^$Z=rdlP{?Yy3(6wsy%9w z+T%Kx9Cpt@8@tq=BJ}SladpUv7X{b2_uaU;K8kebyW1M$ZkC@oXM1DSp~&~zFN+s( ze0Y5}ud}hAgL>lbpJS58LHf}kkK#4*Y#!Hkp655Wokzy)^JgnR{`s0}cfRtOmQp&1FjUr=)2mhcX`erHPu1to%$mVE+%uFL zX?Lbut+I4FMy1gO4nsyF+fi@4@r>ze&qOb<@9^RA_=pjk&$tq4GlH~{BjfQ=qc)R! z7lM1DZ*oG-4hO!>g-slcYnC(hXS74yJ=5p-D%$N!ybpPw<8UmqoZ}56i!S^FhLS~{ z;lVY>|2f=?J3qzW&l+l{&D~Rb#k@VqY4i6uFj?}7tF=T&?nuU$?sf7)PI772xC3W_ zw5=)`+2eFXo!}nkwi8*pG@jM9YrNXohx#7G-ujRzk2*Vg}=YU-!~ctCI{;Fa+A5bJz%X{ZajDApyVKZ4^pS)CG&KiyxKgi?-i&= z^vzG^>wEc;eCT_iVL)<#))!LiYj83Y;Ll@Bzz1;Lc?uM&9uc^LHT~~db^TURa^Yd?AxBa?p_^X+cTu>dy;4PS- z8W&7huy;XHHEPZ*Sg@Cpe&R02Ev#`={&uB0hMY@Y3q@Sq>it>>d?lyU)<})2R$owC zvugDHvmCgfZH@0S53a`*@%ob2-uoQ-hk38l;fJ1!o9p**-7*$$HkBk?0&v26=PG9&mY=xRm`;XI&yI<&o>z*kt%*A}TD6V!yW=G=qJh+{F9v*HuiQN!dxBv6O?nJ*;Q3@yM6h#?(y!MdnO;MMpg7}JC-I@wUa~N z{9wa#GvW=&JOs-B~I-JYvU8vE9AV;*fIvj&pA%>TMs%; zZMKp7i;S!*G4jA%@Rxb;akrOwAonp3WG(YR?tg4<+j)|YwVlUFB2P!HPkpwLPZt?^ zu*Aq`wLZ8;yLHS1d4PE!>zN1gX{}Eid9*%lr9@#xK@vv>$*@yG@?xY+R#1G0}+$IE;K9p#DSnr*<6eWCukPUNBfOlO_cU)K}puhRqlrGDyf+Pz2XFZED= zNvHmj&bfZc2c7FH)8}oQ{vnP}Ny|5Un0cP_LFf4=sFQoLIi3^DaU=H^8Ch3iq?zNm z#@yMPZR9@Y0j^~pNHfRV$fM^}8+r5`FAV0mkxv&Hd9cJtGskg_cI&c@Jit7_^~?il z=6D-!g*L9&k>iNO^wV{p|bAIc&g}K7{q3c8* z&JU)`{MPjZ^FyZx^F!+A{4njlqUVRy!}%fUoF9_TF`bV3Wq!}0Zo&MfZZf}_N9H%L z4~FtDrhj7o`p_(`Kl2|%U#P#X6M3jV(`A0^dIJ4*dZ545PyJ219WSPTl6t7Wq*H%M zZ~a-`XFyfeync_})<52Vb8hlU{aKFZ{vBQUS>A{9`_U6V%l~4TndxP^bna3;=ju7t zALKd1{w)8CZG6Id;rRxiP1_o;xlT1+c?O?@44!gLO?4z#2PED0eC|q4LcU{2dk@c_ zN%!3t({@ZcG401YPE2!tysc3=<9-$ig`B7pIvhQtqoXmND=!AVSa2ldbOY`txLwHU z4%}Vv86hVdI9u>pA*Tm$55b=cIX!`U3dXZ-FW_Fl)Sqo0gASdL*6F?Tv<}Ch!}-AH z3x*CC0AC;&I$Q{RpZgpgAcRI%rM`h7OvOf}w-vByfrk8Pp-g zhYad)4*bI*_g(A<9QDUx9hP*P{qb5J_a|uS#3pL##wKa0Vw1JZid_!roD=w$_pqx5 z`wkF$zwct}J#=`W;A1Yf;6sOV1t(o>(T5HX5*!LUdBAys!(nGI@L<7_u#*p*FSuRU z83H^6n0upoUO@+&)5oBL&FN#%!RGWa=wNgD7<8~XeGEF-oIVB}Y)&784#H{fy=Gbm z?(Jq;hw$^;8u1zB<0BhfI_h7gWwSp^%j5oREuGjLE#25$EmdrumRYg+kWRRTQw6Yb zf!G}O911*CFzi_fTqqd!90oj0Fzh)Tc(`ELa|G}R!La8@;E{r1&r!gmfNf6Mye6k? zUXxQcugNK!*W{GVYjVowH92MTnw%0|+nmaj4mPJUrGs!Pt~qkl#{;GIQ`}5Sy2t%3 zS~{^BE#25wEmdrrmRYgeAZ<>KhK)yy&0){WfG-madyWAfBN+A^3p`dZ>{$d{BpCJ_ z2Ru$N>^UBIykOXK0`LT2n^QKg$tj!HOZGtv;SQ!kNeMS>BPRLr5pRc zmMZoST4u$50BLh-I&3^$Yz})~0eppE*mDN(48gGHmB3dDhCOEj&lC)MUIl!WVAyjO z@GQZw=WO8Fz&59BUXxQcugNK!*W{GVYjVowH92MTnw+wEO->1~ZBAuM2b)uw(m^=I zFG=`3^|F@D{wrD@_kXIT6Z@H#ZtSp@D)y?DS+Un3ZBEUBjpvBXVb8h1a|Oem^ML0G zhCSy4&le1PUJZP;VAyj3@B+cG=R)9xf?>~VfUg0zIc4*joU(aMPT9OBr)*x6Q#P;3 zDVx{il+9~$N_cH^DpNYxoXV6A*Icg}>s8C1(}Zhks^zol1M1SYp`!yHeEl2Pj zvvOT<&+J9<`%r$@?2ZeieFr@W>z`xH`^PZynmwu-?78T?sAqf4iT8iFy``=fe#mR{ zefE!EI5Ds6Tg1Ds6XPJF0wZPc>R$#98)innDz+CUE>kN(obG@&M z4UPkIy{}3Qt_J3MUoAFxGcects@&i$z+CUEr5&A#!9G^i`{6WB-wJGV+6T5d9Rs#G z9S62KT@7q=dNZ)i=`Fy*>FpEE-b*HRcoClq+{enK4rjmkG3;ZV9rOb><~mm`H#R4y zxZYPQ3?`?z-dC#(Ca1XGS8EI=r?}o%HyTV%alNnB8B9)by|30AOippVuWm}`)BvUl zuVM2vUIVA`8aR#Dz-hb&PUAIj8n1zc*EXjzrGw3>Oz9w;itA_d)7;0hIK_3Yy4l#A zoZ@<4-C{5~#r3|r)nIap>wOh7n4IEzUsW4SPI0}jwirxKalNm$8ca@cy{~Rd=TsW6 zVe>Rz1E=vCIE~l9X}ktb<27&^uYrZvHm5SBgUzW-=^&i?Nc&h8r?}2lw;P+2Q(W(> zI}9eLxZYP^FqoX;dSBgTFgeBbzPj6Ba*FGHb+5tX6xaLeK7+|AuJ_gb>6}XAHEf>7 zYv43q1E=vCIE~l9X}ktb<2A7G+U8WIbg(&6}XAHEf>7 zYv43q1E=vCIE~l9X}ktb<2A7G+U8WIbg(&#%o~Vwauwa>0omzQ##;xQ2IAiwybjcy&4_0N!<#W<2``G3jJOk zaLu<>4P?QJH`P|i(YG8`+aM?2(Iv7Ia{8A>M!o z%1^!Dl)vg>Q+~m>O!;1O|ESv?vi@j(qatS`W#-Zh&N8 z**5!1-Pu>_&c0H2_LX(9uPlGz;8!9n-+kPx5thI7syB7t2bCJ#huvs&A6sp7zx-~a z`;}k0AdBTM8fwb-xZITQJJ*ySywsE*vEGy)cbh3c^*&Snsz*%uu9IHT?e;H!Q;+qi zt$)^I>^iwhI&NXLs&Dz=Z*npO_YhSJEF%f6- z+8_QhzT8OYyqi)yul)U+nx`yZbJl+j;Fy`TYhSHIxuYnz2j%Y9=?hcb`LN~09v`+` z0s5C1G0eW#lh?;LeQZjc@AdOdKT*yb;G4c4NnGgV`li3A z$IA=)+myJ-%MbdCdSO@7-y?~xUSZJRrUb{w^tUO&@iF~vN^pEke~%g&z&i*Q|p>wexGe>|?ogLnPCdHzIPPel3v?`nTCt|#Mq zptsPUg6k=`&h@VKr{Q`U@(=Q^_ow4}I@0sJ5`PA+XW)9Ux5%G~>zTOD_saZPxSoal zL%b#a99+*qdV#mhpNH#txE|`=;9rgFt8rcEt@Ib-dLi-;^Hyu_oB7g|;QTW4r76Mr zW#-F~1m~BTFHH%~FVoMa1m~COXH$an%k=X|g7eGtvnj#(W%}8a;JBE6HYGSNrk_U= z92e8irUb{u^s_0!aWVaDN^o3EKaV6hF68`YQEzhnv#59LGadhXKC|s}ui7u*vqhck z4A+$4440Ani;S!*G18pj!Zm(pwl~|zear(~%RG?g3|AX@$Tz2rJWlA%Z(VckHPs6j zsK!TfzH*W?rDk2#9%m=7#knD#tCDkgJVO&gAIp%=^H2`XP&y0Ec`$r)`eO7lH#*!~ z>t~y^r;Cg{7?79HiP5nhE~hw?nL#B*5=&Rv$6JkMT?Jl)2+YNS``%lRRNIj~IRk2Q3E4Cl3^ZCgKmerlsR3uWsx3uW0h?UBU!O(*F0 z2W@*co?2e_o0Yvo3aU-mC}i^}f9i z7Gvb#ELuoeO``=wP4d4p@{+gw?7RrqjPn&R!!zJmJb^OAwD z;5g?T3rERK`U>918ENAyP%q~CUivBWQghU#?W10}S4*4shRw-Y`U>_7=c_5=dg{h2->#wk?=Pv|ifKXHEbFtc$TSK2hz7&8jS9lNj zeUP_cg{x+G`?ODyv7}F-UWL%>O7F{lU*s!XHP1VseG=zBpI_$U`Nctg%+)&f zJs-9rH`yN24zNAQdVU58KOcpSs4Jgi+F8D-0QMw5>6>^T=hL-sqCTkOY5GOsZ2{UU zywUKKzKHtc412f7@Z9oY^*I2$X&((=#%C94k9-lG?6&ED^jJf`B%W11YzoB0>38P9?~sf0f_kq;y?jPV)eC>ZdKaSJLC|yjZLZ=T6+h3! z|L{3Q>Ze~RME;S;Pya)|!nio}pW=sx<37iT+$S%`gO{}DL~xAUnYKKd~=}QDOYk zDRgrJ-Lg-i8~qgfi1-%W=%<*EZQJ_UdeBd4J3v?ZsiIS~%k;lACiGWqSDu?n+Gv;l zN{?aSuZCberrqx%G9qk@xnBT0{YzMXK+Z^ToPzoTzg39znP0`419H|QK{@!E;lOhq zcNN!FVk7#m5lEl+b*w)iul|OsMnNw8maD=Ml|}z$c+s&NpM!pd?+Lm*7mofKQhh%h z{SEPt!qI=Fd?_5IACoboAL|^+%WC4im&4J2XPLv{=zkEuL4RV^4IlOgrvE$~rSIbB zCHYSOOup0qlBe`zQa}9|{T;_fa|OPLzKnB)S5KvPYdt?H9qHRX z?K;xmjmGzH*M4VHi?yQ3~23tgzo-*#@-2+QBqV@QPM?;kuY!t$lx zH|3YVX3F3ATT}k#|1ssOBc}ZA7n<^SUt-FCahNH;`0yWfyX)RLs;KXl^Dorx+&gqg zgq$uw8h;}&6q0={gk)dYHv3B5`TZevXJ4s1`^vi5SC*gu)GL_x?CYzF<(IzvregUu z|7>)>>A#KcvC#Xv@3)fyJuZx}{DNzoS*A~uNRCsKgGbsf)VGZ1h_;n;{22XmkLIl zpGCln1S8JRV&KKVsn{~1ki*}I&5_tI(1E`Zn=2SP@Hb-f1Vab@Mr^)d=)m8IT`d?o z@Hb)$1Vab@Mr@&A=)m8IT>~s*iVx61b5iUD9W*Be zLkG=C!O%f-QZRJToD>WlG$#c^2hB;r&_Qz&IK_ty>X70?26ZrJv^FBv(54{PP*o6X z$PZ!-#eEZNXtQr(4b}K2*3dRTE!NP@uCo&MT`Bgy#dTHzuM&K#>#PP|Ef~M9Sp&RA zaJB2K1zsz7i|gD7e52s4u2TVAA^0}eSqHohICYL0I@p{(1|4ipAA=4yr;kAgo72aj zgU#t<(81>PG3a1(`WSQ&PN&W>XHbXKIpz%NV9v&G3}Ov!3Stda1+j+wAl6XaH?f8` z{*mDQ) z4#BYJ9l&=0+nlm_O-|XoCZ}v(lT$XY$tj!H8jAbh)Au*~Ce~1mZ(N~+XL169~sJ{=+2?BG^Ks6f7SVP=1P|q36SVP=1P|q98SVP=1P~SJ0 zv4*&3pnlM@Z^0NlbCA~jsL8`vL)^z|4$e9<))4oxnuD{Bj5Wl4tmfdXBV!G5AFDYy z>&RF`+{bDT&N?#I5cjc~gR_o|HN<_aW}J1DeTz)$Ao~`X)Is*K7;C5uW21g#Y{6JV zT=T0J3}&n$uKCqV1~b+W*Zk_o1~b+W*Zk^bgBfdxYku`pgBfdxYkqauV8$BanqR%v z!Ux9KAt##yPBPXIIoTX=lCg%!$>xBQj5S0~HV2$!tRZr;Ip8E?4Uv=00Vf%2h@5N= zILTN;hN*xV~xhS;*}U%^@g$cWx!nTtG5gu1I+cl`nkblfw|sS zzcjcAnCpG@&jybJ=6YZKi^1c8x!za5HFyFr*Zb<&c>5dT<5Ca8Jm++T<@#j z8%$1dy|4b$U~-DOz9w;iU;e%YArwHK9MK9Rz`>L10Jo#=DX#a`rRkhX z<27ua#%tg-UIVA`8aR#Dz-hb&PUAJO@Y?27rgX45l_?#BQy*y`%i`2-+PJT=IXT6B zEY;s&a*F#{YM{a76!)>zAcM&%?qjLJ29s0V$5KNKCa1WMrG^?zPH`Vg4NK=#8n0pV zG+qOz@ftXd*T8AK22SHOa2l_Hh1WKxGNpsfsZ8l0XM08d3cm-g9kIE)oOO$4S5-Cf zH^L(x>h5&Xzv-^$@A~)~EMok2`{`_xeh}}>^)GX-eRXt?iHP~Mt?_k~+dE=Wcju~Y zjm(3*&q_QL=4DKj(YI{MVqR%i@!SaV7a>2sH>%@r0~zy&zb#~&S@>-$^X~1tx%=EK zlMlafJ}~i)l>LLltS>vLi}lXOZ&h_)`Y!7J z7=AmHN(g)9a_6$mxSq!RbEc9D^OJhH3f?gSl777$?H#x~T!5O`Zd% zifJR9X1gg)qYOB`moaIC$90%%n#Z^YKJ2q&;FOsBwUoHGzjaI;;dDLbrslL&ztqFN zu-@#TF2>doKG$JxYCfN$eu=pwoUVtin$ss~rzmKLx}uM@Y)AN9k9IVl+h|Ale2`;u zvUg_3RrQnLHu*u_`F`BL^j=(TchtrAc$53mAN!r&4tR4d;)q;}m?GChUJvy^k?;vLfYQuQI!;9*V?e#wPw0JuUc&von8%v?LA$nYwhqGmTIi{{ ze~Nb78V{KZwP;t*g*Muixll*H1q9=n-6ILk4><>DINOxq zJmGKpZGPDOJXOE!ew<2QGv7|s&(_D*gP(<;20c=rbIZXO$HXG=!37^&Snj=oxD+~V zf02=OaomgJ-VNT*v~M`L{Gsk8=H7wLxVIVnTH(Fw*WkJa`Br+br+h=*a^oA=4t~?K z*Z7a71br0SLE1jH16gZ$+mxVx5}!~H9mOXYE+0wIM~P3UgU`@D!PK$eaJnf$UuF8# zl%TH?pHL6KrhUSP)JJ?l9ef>q0s9Y~_uD$zZHq6chko=0AGZJE1M1P9_5p3QCqCdH z{XKlZHrQetY_ZCF)8CHk?YLg;y`Ay_><4|o4&2)T`>pYQ;qS!tPUKtb{YvN4pDnBp z`R>HMJ8|zu@7Mk=T<^klh4&l%nWjIhSRdy0J-Bxd>RaditG^r9yOFQbYijxIV|~cC z2lw{i-g@uf{k^!}i|Y;Ef3$q|u|CZ02XOBJ)OVBj2koN_7n>6FZSow)F=jj;o~1K< zJd&Wlqc4ycKZcu43HmxSHcbg~-Hc6Bg4{M^aU?-5o3Ut0kjtj;O$m;r>3dUxV`=(& zB*C$xkFoh?>uKxs*VcnR=F_0Ztd3u4r2V3N2H_b{XE3)S9EqKjgLH{C;B+*x3V1g3 z+P`yYRAMptd}{*;P` z!ZYV3-*=<8cz@RMP^eGq*hzDqaHHfr&-L;?Vkbp)>?G8SIbJsh_43Rqxlg^2c7S?8 z*3ZQ~o;&4S;2B@`3$c@;I(CxDqhlvs{Y^K@Ibp{`nTN7GtIGMocqr^sU1(NR$4&~` z=9y5@uXWqDUOeZk#~Qlpcql>Jwtn`z=Xt(ebYe^twk@#;oDbPH&-&_dg`IWmq?5F5 zx5rp0y1n46@AVjmlILAj<-O|{<9g8#Ty?8=RL3X~dr91rvX|Vb?=|}+$XE6PUPkF1 z*Rf6vA7({)zPA+jmi*XNanHf8nfkuC46z5QJs0oQZ5bn;>s^F=%YN#r&7O*pCpX~i z`4%rL#xuRC`f#RqG4kE;nyYHO_IOwCw#NCuJX=iPMIR*NPXELB0^~4#8U4@I$V=YR ze=Wc@`AL4$t|f>wKt3--I{Q!Fa-0_7EIRpm4bnLl92fc*n=iHwcAFnpd-NlpWP9`# z1Hi$P#ZD^Vya-|dI63Bdg|R7C;vD%_@61@XN!wp!WL+7?ZRK08+UA`dV@v?f$yDql zlSju+S`A*@=6yaUan{OFZuKu+wcYCwW1KbW(>iui8P4*PoAedDkJw4ZSD;?_gT3@q zwlU!liUe8oxHmiS3_ zyYv~lUHA+;e$raZ+1tI2F^QkF1mm~%UtG1r>l9;*0I^}~_(>Ju&K=$bF*|K?)0MCugG{x{G<)g>n^WbY&r66_%B!O z^0KvGFp0OK{Z{z=U)jeKLjPaACKKc~;NAXkiDYWwq;30h#{iXOO z+HWQJ#W_L$D06_^Sq<)x>-3Gnan`dI^>F;?3uWBsYiz#R?fk)ulu*~!@slcHPx6!g ziT4pdN&6@2gF2q3Zxr5EqHi1i+wheBi25Ub(#1M{(udTCaY?kFhCkEslY;ih7tzUX zn?6X7HT2W*lRjj7j7P%ufq{+;H?uUZH-L*a6P)GioPn zPhVne{fd4*VcU1Uhvyi`UB}YnC&`%5kFd^rpd+7QcEj(Gi+h54_o7}tr=;q|a|-Ky z0QIhco}1f+b^IiLriuUIvx?MDzp?@OHz7a$kL;V!e~KU4i2EEPa-VZ@GkD4ItpUf# zoo&Cva}MP8|HLypR=5cA&|xH&V{HD}dfIw@Qu@&Ee46y3 zFPaX1>*t}F=6i>=7`vLY!s75z_fRL5aDa9zO{-rf;I|cKjsTl)gsd zC-FYwCz;r8ram1%iJya zg8ch!{TM$<^q`;8c7U!re$px0ZEH;Euh_0UH*xI#QoHn5dJF@9wGQL4RW9xSoeUkB$qWBG16lWRv9%RN(x)}Ik!H8Yd4Y-?N z#4f^@0EfE^M(m<&;B3K&UDN}(hhW4m>IvLaFk%<=0`3KzikA}#>37_Bl6W@I;e6yd zUodpI0QdsI(BVSh3k5@m&cK}oLx+oiFA@wLx&U_(3>~@xcLkPnMvPt532B+oLCzUv zQU^JA#`sA&7>69OSFezMANVDL5xb~2?)Mgq*hQBDUn&@}i~0cf5scVHeS!N5M(m<~ z!2JXxc2R%e{=g|dKnKl9u@`jEoD>WlG$#c^2hB;r&_Q!jFm%wI6bv0SCj~!N1aX(0K$h-?YPjJ}03w*HP zhnDYLNm zi+>V73H>O5{R+h9u;)xN*W?s_G39%7$%$dV4t~jvVXt5NDHA^_E0z{N34UrKY&=nH4tq`l zo+KFdoD4i!Fzk6b@a2MG&nduD1jC+Ffu{h;_VkUl)8#D2f zRLsOr%8I4MPXec=!^YFa=CJ1#z*h){J!b&V5Da@>34Ens*mEZEOu?|{RlrvXhCOEi z&k_uK&IXiQX|YhRyE?;aL+)EGI%pE_YBl!25$l8o`D)` zFykk2&p?ecnDLXiXP_n+%=k&%Gf^37CUuZ~EXGgD!Puy&W~>-LiEDo48O-=eT=T0d3}*Z!uKCrK1~YyV*Zk@#gBd@G zYkoD`V8&14nqSQ|nDLXi=2!Du_`tY9y zRN<_&&FNc#ZBF~ZHm762HmBpjHm9qBZBB0nwmH28SU4?bvoooKoXyUp4(7c7N8HB> z&ij*7T<5Cg#^&S{*ZXRP!Q>Rz`)ZZJwUGwU~-DMn!H zDX#a`-3F6WT<@!U4JN0!-dFb-OippVukKIhR2r{g^E6%qr|}v%jn}|wyarC=HERz`|3f1$tkY)Rl;C$ zitBy#6@$qsuJ_ef4JN0!-d7JBOippVuO3O~R2r{g^E6%qr|}v%jn}|wyarC=HE_-bBP}}n4IE1mU_Zqa*F#{>gxuRQ{2Z= zPZ>;3aUV;4!(eiX`&jCm29s0V$5P))=TsW6Ve>Rz1E=vCIE~l9X}ktb<27&^uYrZv zHm5SBgUzW-=^$~FMCzCb&*1EGa5grY9mG$1=ArIRnTgrK-$e2^Sj0MBheh zvwWcDtp6NPV#?H0BJFAFBjx3+eeI*cS^HDe?*#Q_2X(PtIcLB3(IDo=N$Qt)FLK7d z7W(OU8>eWeC}@YeqM!9_N6y#Rq8)v{{v_?3G=37=((#jWcsB0A2F6cX4?9%a@sm*J z$>Jwf{@%n-O6AAzay17~=gHzHF+YE6DEU+Ilc+P!(6^4y#QZ13PeR>o#!o_hrxia5 zb@R8@C&W+s(TSWs_#<$7<4NKtAx)oQF!$2qC;cekv&qwT{3NuS;xx*@*6Hz+zpr;d=8bNN zBf&cH+wy+6*AI1d_r~}Ga6JI|vb~~~7)q=U`EqeD7x#L2<6B}Vy@7i>jX%!A{XE?7 z=}q*RrhT%xcQ7CK^3g^wZ?cJ@^af%mnLN)H;9ddB<#<#4LR=T(`VwziDo)DYH{dfE z%MSVJi#s7tC&W&=0P+IHP(nI=Z;82I-x=whk=_Ne%ga^_CFt4?bCPwLac)X<^7^FW zrR>EV(eYADeNP)3G$qdW`uS$u(WW;b6)&Y0^H%fy6nZ&BBLu!(pS|-gJK?u1CVImwGc&@l*EF zHz6*{Xxtl(e0{u`IzEZv?2*KUUT#YaC5}1r8Xh+#I(vD(;cZjmA}_xsh7#&z3?;*3 z*cP$mPKcqzx{$~4`ADLxR~YcQDZ%-{voSh`66;FwxhcWxyu%2>1GVb4(1S-yw#Qxp!bZ?u`e(`g`;IiMXDKd;`3z zQ@)|@cd#YqGuy%MdiEOs(UhQ%VmnCN$95oV4R4zg^iQc6O3+b!g5mO!1bvh|-_^lq zXrEx}*l#%9l%TINeQHY3SIKi-J^Y&X2_I4)@db78bvlMp(4MW6-M09Gdg!NPD1FHG z#0S))J?#V9Xit2=LHc|6fXT4MWY}V$x6q%0>nXU-^{!3%0QQ4EU>fdCgZ&11*Zb3P zJstV-ypom}N~{n0X5iio+#BpI@@L|DCa&|nvX&T1tPgX07VgbLeM7t@{v2G-LB0ZS zS<7c1>qEYIxHk{?hI%*nSL6C>To-yPTR!_(A9%VD_ZFhQVcu%(qYM|D67+5K7n;Yc zGl-#N_;@5ie@9;+F_a89n-cVOW^9@g94H^eaxppkC5|~#_QgyZFJ*Vjc4LIlofv_ggq|j@|uQ( zv#r66t3Lsqt!0NmW;eTa$MMIQWbsCx zCzY5>c8n!EmXaMq=~S_Lc-H-7v3X7vi|16acUs5V;h9c}e`Loyvf~=@thvN8`Xpo4 zkguO+{mA1_vwoU~@K1IuA3JsrxhXMv?AScyqr`o&gWE>T8^RuEWdHPw=RX9uTNL&~@4h*?- zvUo35NIzM;5yoOTS$vVd6i8t>c1_ml9{jjw?euoGf0;rztiDx&2wz zpM3u;>o0SlbvzRKSvQDJlNH3bIa!>MwqjYd6}#du#j0p4Hbv)1URIOS2YV*r=)Xg% z!{O+E5Wf+Q{(zF6>??;1pHphdwHxkdpju-N2kF&tJ^S9bJ&a1!GzHuGL z*f#sH#;3I7{$v|`fV?fjHTS(KxyM~%a4jY9e%jv&@=!8Qc94gXc{q+mK_1GJ)uZ*n zcP7xMp5Nox-%auR9N=fQKDg$7X6x@@_+1R@eOl`i@ z`EaieF}Rqze{=9s|;x!;`P7_L!w zUF$vJ5~QOZ!?7TbW6}<=Jj&ElGLP0L$Ws*LDGBmWGEW-E-Uv8`y)dyM_wY3*a8GQ= z{dUbU*jjAJy8nB=w)tnxVP4iB2H6ul&eN~ z75>Gz?l#s{BfU!OhxdIkdy*R+2G6;|jgIlE z{A{G>c&-}j`F;;v_rB6qMPA(Jvq~St)Enn*_Io0|&umwX_iA+g{qPJk!Q1BdLVCaX zuA1m=*ZXFC2Iw@(xJk*VH@9qMmcOK#@?%iF8 z>n_7w#l1Q{W3!*!tLuz(_LF;a7vY-yWdGS#Hsa{9|6P#IezO0pzbBqU*#E9bXP?=B z`q59bew<&QX8q)umfRWeIX61V+u`S6d$9?&6iy8<#O*%{}QC<7Q1SSw@dRS z4}6*G-Q)L0dfsAJP4jkZ4(6jC&)ef)iuC-YuA1)c)%6sho-4cu{60u8SmCM}-acJV zA?mr(`?B8`>4j@tHPfrr^$bTnS9uTl{g6I_k<@nKuz(H85Th`L$VL&N6gZs=sBgj89$Uh7DIj1?!x1`S>UAQdUMCuSFxz0W7lCWNPJ~1~srPBhyEFA(M3Bb`^02&KCmL+li9w#C zAP>tk4<+h-I&GaeB3LJmXjzBxJ*Rs8hkLRP<2y?AI%^cReAlDs$M-j~ zEZ@s0`tjX}x;~_des-GZXQyeK;vJBBF9K3?%-}k{pAy(`4A&U@y{+FV$#+YVKiZ~2 z9>=7m-fzizm`8Ig$aAWCv_5zbpXL~(=9s|;`Tjs)!!cY#pSsrX_$xs=xuNw5@;D|f z_5MND!#rA_AkV4lN#odpfMW|iv=qmd2OL}8q95N2r#XRpq95Pora6Xwh<mq*mb~naWHK4*p6rg% zb&g-wz5C9%Q;Tw6!QVsp`zrqShbp!{{9Ys-)I<^9H`sQP3G#hfuXB%a^tywvG2^! zz70wa(&cpDb8}7K2MyZE^x(d(FE5#=^X1j%?W{@l{i#&n^OO1detsk$eLsM{n%@~I zaH(^Tyet>zBs$}qgrk7*JsdgKIh^g^32t@hk)3rE_dDSJjyr+h>5=u1J0Nl2aemO) z{vY`}kqZtdeuPnJzpXLxxzD$6hxb#rd)aZ?;dioSXWO_C`)cRC+Ii?|l@-PlGx8<6 zI!@;+v;59;+vD#{KZN_?=i4haw0(!j&@(&mSzSFB?)+=Y?}t18hVn<@&i_jJQn)j} zGgL2!I|ttxcIJ16>Wy$`erKqD9`5`rJRAS&`LOfPkiY&x*!eZ&Z(ayH{{s21KMp&; zfo%F|*!eBwzrPlC{uS~+-U>Va2Kk3y^2{gq9WFS0Kl(Rm9NuMpEO|x}dMDApB;Jo) z)@0l%hq>?K?`Z5j+<*W1rgqJjynF7kOOBrV zI&^J*D%|-^%5R1{zfJi}xbrV4|DJt-491{27=z|u44N?puS3_@zY%ucfPC{?VdqWA zx4#p1-h%u^W7v5cl4J05NRGiTAUOuVgyb0P(fdHAj}Po=yN{1O@Y(C*@A14UcKom3 zx829&e|`4)X#bY*2)~zfs@{6=SB{fS`5VW1=ZHzW?{$-Q-`kFJ^qmval1EQW8-x1Z zyVvy-=WK7x8^ikrukX`8dbEDOSY+NM8O3=yC%EQ&C8-x4!||8+-@pG)#~=$I5B%5k zc_Ys<0`aTNzepe}&oLKg26#sx-#MOK!^^zjz6#Rk1UP^xF&0-5r#<3C;A1TmdjIpE zb3*tBgg;#1(F^~=q3-TEk#3>>1Wy*tc>eie{a%0m&u6}P-QkLHgI2As9JF@L;^Gx6 zi&vFbEDf#)ty*4?mp5q1s%53cOKvJFUNX4Q$sM$AS!r2$#kxV`Cl4+xo_xiWamAD8 zPb{8TSWrB8&gI1g^NWY%ui98poIkX1^~S-)i%ZuPFRCajuGn~U@w(-!SLCj9rbCoh zjLaT)#l-CC(?Lx{-qhEnQx@Z2h9##cNg#nsw!bRfATp+^}lk z+KM$dlrOH##m(#?BNh)|w76jCkkX+ALq-fMA3AvOlCsiaix=fV4lW%sbl8&Ng$2X& z`_CLVXTp?86GxT}9=3RBX=z^hkn+;}5sS;p3-a=p3?05Ce^~jD5sM1*h7^=9SyGxm ztS<-WRR6Sn-1cw>dO%azh&qPbIS4?N_76`P&bbcQ(7_}Z$=%%5Ybwi~+_IZjue*7b zle={F`rKuu>y|me|4gr}aB?fwl$BPNI=PkQ8!L70(pAeBBe{HK#hT)^rDbKyS1(0+ z`J(kpi&rjR%^FkJ#kvNJ;HvV~D2#S0OBX>dL%O*#(m6dN7JHLs&6qU3c=FsC6F!D2 za#xf&#S<6I7GA3fxWE&dPZxC|?q!k*CZj<0t&xgftnu iAgdPH7nl~X5KH?f?Mh+|E>2edKv4c($r#Ai<^LZb`pOvq diff --git a/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_rxy_sink_brd_v8.co b/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_rxy_sink_brd_v8.co new file mode 100755 index 0000000000000000000000000000000000000000..dfcc8fc87e76ab5ee4da51c0370306535c5cf2be GIT binary patch literal 56528 zcmeHw3wTw<)%MOlJ0XG>MAQfe2yhZmF@zApRX9Mn?ugtGyd;4Hh(N$3AYk0*BnCuC zEdqwrVoz$JjTUYC6GY) zqM6q(lTV_DRAHLND>+8%I#pCOxlpm1c9lHyK?TFm@m?8kC>nfG@|r?P@yC4BHF!kw z!S|x+8p#^L&MKhtnP5*RpU$t_H~aS`d2HY5)~CA3n1y_-?@a$#m*`I_%)b`2QYogX z?)y#sHNXcDN zvNm^RcJ7iT`NhSL73JsVW#3s;P?C?7yu2dBWf!l>U6TLRWh+-N&RzMnl+{a@7U!23 z`%;PvHs;5FJ!M_)%C-5~D+&tp9+x87i`On)nqSni=to7-s|#0dICaVKA6v;?Kemz+ z&bZ{$?&!pJi>}aPGEjz1?1m2+0P^3E#P^0Ugd@~m>L zIjdYHXO(OHndCZKvtBAIImHFzsgz|!tJki{F3w-JD!;HKduhS?{Jd;lWq!gX!neic z3)iiPK<>(dWrguio)ph4FIOB)S(Up!d+EyDlI%NI7p+ibBk8b(i&tUps{A*`PnbL; zBYW}~$lu z?Mbs|Ov=ihJa5Lt@20FNDp;3Wk{?m3S!?6#DaFgy6lE`2y|%FA_OGQBZ&^~yIg zKXRohEAkGetSu~9y1HnUXj^u3<6^#*vbH$CI6H3xT3E0oySOBG$%^m8_(wEB^S_aD zXYRWErA4dJ%j~Sgr&5}`Xid>-t^`&WWv^PDmml-ET04}W=krUR2DfZU;dLPVVNr42 zb<38nACfjKRWCBuLgeeXjq@FJI|Fm_Nqx?<562a?cwdxde=zRIbhcVl`}3sC2(tx3(sJ z>=MHmXIyyb(52`LtG5ujz#puO3CC;;hrz}h|6Fev?*|7N|11S93HRN3l~`>D8w_J} zL&z{P)*6r07l-=``Qq@von5iOzM}l129NROhJME0`u<4w8DFWljCbl|jo;Mwhz~x+ zehnJGs=uP_qJ}FWHy&lZZ{!}$LC=%snv%v@l6H!dG(KL^3pz_`UMQ*OVo5u9m9$GY zj-b@BpRq~Q6N`E-3j2Zqr1T7$-#?~zgm`D zlYC!tFnKHGi7h*9i+su3eHFgx(Vo;MP4ID@?lc)xQ&vS@ESdj7=2=3I=US3hGW(67QM*SNqiz8{o)ccg_b*@Af^b$E70 zus$5#1%Aj+QN58lwY0R`g_aZSwzte&U^$cvTUOT-;lPdNC6?nc1|9WTChXI7N6k*A zVRo{nn+W$8af6PUNrursu5(tI6+gXO=c>|fo#SS(4D$lZjP2CfEVJT7JTz=D!Fxbs zV;@1e!G?jC1A~_XPjPXk zHRnb+e|V4pkaHwS&Jp0(=x!KE`T4=*;lqPv#zB<#6n;-*etlQWuYAEjgShYESC3x< zeuwaT7QgS~_k;TD!`BO(jt!3$I0Y{-JvKPDa$I}$yEtSPnIPlf4eMU%dMMk z-EwP1aMYndaMTa+djY?f@%t%$zrgQx{N4@-y+_p{d;q_L_&tMPi#*UP*p7^7Z#y!& z9h#cT9#$A}X%A!0n#v}65ca?xKHO=PP5mB(jeA(xHTF@2$2@u#eMSC3rps&2HVZbv zd=~L9jS5UONv}DnFGby##K8W>rQH)3mrA>~N9j<5Iew2}rBTN8K*q9C_r#8{_n@9V ztOxV%-qFF)FXKHkwVd=xlX3w8pHaj?b>*?qoW4zSRTwl{$SfMh(ja-RqaMHb>I^^HdoR8b&o`K=)7vw1zUEdmo!8^*fbJ z__6k7j4))n2|el(B|YGm^k9yp&j@`GMqRa(0o_j-&^pS19uWGpBO~-_N5(LI)IgoQ z#^RT2E;W0^pf7v~S0h{-!}SW{W6U0p)=HUbgsn$qxHhK98*54VnwTBl{jpNMHfFZ> z@%^ey$jp9R#hvsF0=AD^S*QRauoo6Dd7p{{qfcftJTa!xF0jt`$`j;H>Epl6-Z zKONy(r9azJr}U>xtA@UHnH;% z=TSQ{LZ5bI^gJ2^oalL!iLjnW#F(B(l+p8u;o6vY1r8}+6Vt61=aIlLFi9MWG9g18 zGG5OkQI5i)h*vn&qR?2`MQ|dLVwC1L|dr8C=)W&pYeJgiE@X+;hkq=tf4CRjI-J+Pf%0Cr`FhlR&(&t0nbCPN(lg5DESOU^Cpc&8 z>0OuQ{8@CX)lg?Wg6o{*6Bg|)tH7FOi)D4|Tb8sfX;X3q?vRd}H}E}X(ovsj8eub@ z>x?khGaoPw_fX^g3@07^0CJPYZ*KSxW#9Ec_I<9Y!WEO0EYi1BoSgo`mgcfvdZD?j zS6(_<)_8?BYy2;QolV>$#~iUn?!fwC{4dJD_aZ-9^5x)rlfPc__kiz1ew^ehz$cO) zFZoU2`;wn1`OV<_k)I^_d%^c7KV^q8NnDel!*LI8#bS;$T`gtGz+X=OTFIA#zk>XA zlD`N1mE;FYz5;v#`5}_u1pX@WLnXf%d=K)&B!4gXp5%w`Ft(ykML!^}qjY#>Ysc%L z@vt4^<3rzA(_~*g06vMlE%_bblgZy8`JLbgkiSv#yTA`5KTGltg1?&l9LZOKzlQug z$?pb#E%^n_c-W4yASRm>CLaJ#Og1S@?f_3rHYrT*1W!yhDNODHPfRu`Og;#nm~2v* ztO8F=HYrT*22V^j0h1aJ9jSxHLr3ax3GBlm^L^ZRA9u!M?H)FpoC$)Sa3%_B1TqCR z1Cs=`0+R)e3rqnuE|Hin?Sdyj=v!-{9PhiqUq}9C$vfav$logY0QkY=vn3w{pGrPg z@@3$MkY6JCa`0*7^Cf=|_@U&N?JzQxeJTCW6NTx!!Mic-fOlg$0N#!1Ab2;X%fP!a zT@K!j>3hIyOmE3l{eU?4y!s?{cp29P+PIGFhYMfc+<@ztgG(;3bljOGXp=Kr&=bxa zL5;v%LCwHCL9M`iLE{1oK#dEVF_q3SE|6nRObsKyLh{5^2KiNzC#HszUoClJY6ST^ zBu`9@Bws9fVrmrmwUQ^MMw7oY8dFiY#`s3z8hjM4!AId5d=#$1N8uWL6t2N*T)Q#V zQ98IW)loWVOa%o-jyqTo3OmIICt>r1bB~}#ph8eHut`uWuvyT!z`dYuOpW0fZ;)e7 zOkGd@F3A&9W69qwd17iD`GDk!DL?r#$rDrK$=@S+Vrl~UO_C?3CX&B58dFiY#+XOp z8hjM4!AId5d=#$1N8uWL6t2N*T)Q#VQ98IW)loWVOyRMkuu~5R+T`pI^n|lhP$RHQ zP&0sgO~_e+Dna7{yFuNU%H$Z|C&!$annZq^B_$XY1kHR(hC|rZrxOQWzqjYd%s-tw!nEI)}$Z_Xqf;Ks?3VOo%xu8bi7lN9B zBZ69i*946V90he_YA(n4Q90(s)I9QEmpm~wpZw#JC#DvV|Ayp=shh}uQ}V>rLh|2| zJTbM1{F9O=rfw$xooGx&;TmHeg=_FpxCS4EYw%IH1|NlM@KLx1uW{|hR7dIH##Be? zpr2onmKxW*<{iD!x*ODIKZKX4*St39=BKR+(DW5=TAM(}+;!aA44S#EM{Ff%))z;` zehzf**B8Yyf1mO1#xnnaS^tV<{=vD&W0`;W9o^!Xe_YuWam+vEfgW+pf8&?a;+TKI zH#6dxzwacO|LO%Y|B!r{f8?Dq|M*QZ|FqpQ|Ezs7|HALc{Qc*>E9xD(_INDooA}@r zqMkWVrp57HObOz;U%SX!3(B^x17%xTH`_|x*;eY#wo-Stm1VK5%zx#OS7VvK&-mA3 znSa2nH$~eA=Stm&-ywA$S0;6z@_^L+#xGwH$NX0flll8jk@>HlC-V5uBI&L<t54tDuzT>}>q_gvdr|@?O2LB~cpnfn z)1AjKZ-b`KnTK&F4Flt(e8&;;j0v#)A+)21osNA^GHe+GUq+uK^zdXjJ%*%q<=o$m zJs)iIZpe1Cea?p}+yq(lLqa!CveT_^VpqWK2RTcuRX#^MmQPaUiK(wAi@I??ro28u0i-3$o8?vI)e}%g!n|;@1!7{ zf^c7Zf|H7HD#HEjOeYQDG|2b2Cp+l~rz75HPjxa7&OkWHp6-l5cm%@9c9t^=;Zcwu zV9#*IAUp=~1MQg(`>{Lt?uQ#gSAbptngE&r+5@!5;bS4$zQ#~@d!QrR*ciIpzQ&R5 zL_T|vBini`bful*$o8TfJ5{x}F?5xkrrL{gF|M+`$3i{r4AtJo5c@~Aw=u;2k?m~^ zv43QHkA>Jjvb~KV_7BJQeAb_MJD>H}b6_m6G8UM++MezB5%wc|jXl?yfbaz1?pk}k zlZkL9;s@C`Ig=5djPP~#B4;YXQxQ(FZ*it0JRS0b?c1CzgtHKzYUemJ5T1eX5PPvR z6XBT%r`dVVY=mb+eyF|FnTzmT#HZWKo%smQM|hZhyK@u5HzAy1uXGk6ya@8c?LvWl zIbRw>oL_RjG=@08=)V2#t{2Ow)0qs{UY1h7-GN3b~c9CFS4DDA@+-G=dlp`g_u7d^(NNON4@pC>r?K- z@}1!xkTBkNf5g38V(!6yTdnN}nX2}KNV?ZAX>E?Aaz6;dc=lDDDCr)`fUltpsN4_I zjts}tXh+6~dE+~aZeCQj@FuHaRnITG_M54_E9#wlJoY=?f;|Y_qoMnp`vth~DG%}7 zTVY_|ld(wdEx}vKm%Z1U-Vyd1CsD>7@Jo8oYpgoz9c@}8?INcZ`MLjrdpr#6mCBJa z&-8*!?{TIz$}VyA{sIeSCQdTFqwRIhqsT{n#C{OyHV=ENjw8MMb*7g#$-+EjI`&%i znuk3W)C>Jq+a2Y0zt*&9tEd;^_ERs=y33Jv#bDE-?TSTywhQ~Sdd^&&xB__-hME@b+kWH~dqA+K<1y^x&~>}@YgxBjug5WNqOYO*W7zwl>vroW z_GG}Zg#HI^on|91>*jcbvFGYc>z;#kp6FNR$3C;x>UP(69P*+*jvw}-^+rD=PBg7C z_Ijre!u_V0*7f#AtpA$F%fSAWIP|0M2Gcv%E_D(i=d(>~ob5RCBY2qI%fQ~1zDP^H z(X{+_&>^M<#+u&ocDb`al~;p4GO(|uALItkF|7%9g+rWNbAjodXm55-xXZ&Dw?E{r zSzub3_7-uCxe|Pj;ikoF5!br9-{+xSCr}PC%xe?zp8#3nme;4N5GFo}Z;or?dmbi2<1;I${>*%#~=UL)LiaqHl&^S@Oe?Z78lAMX`b0)yNS)zVg!a87Dl zu~OPWVqwrlrgxIP)k&0bd;OBu`p|EKZZ@sScBQZtoRi#7C2WP1*-u=g02fp2`y;j@ z3Heg8O>3&XUDyihBlcb?Tfu(z{m0`aHfbxEj(v^NR-jzW^=jHF;!&HGposqiTdeK&h zzJ~6?R;aq&`V9|qPvdj-ZhdGcSTDyy*oiZ&mv%z*skRfX)T{SA=ek_Ct_?nyAUC4ev}W6ToiOZ=E{`^a_EFm@+7yoSHNYWl744l>*;l zz2e9<3CD62#&VARRVPu#9q>zf@LKfQs5?z-u3hVJO;drqdQFofWuD;}kHHwvvmbK? zL4M3#rZwN*FYFTMKi4qxu!b=pkNH~5wx?mNh*8#uxc#gTw2s#yjnhXlM%0ySnNBYI zl#cNvMrogzj{UB}K2aZ(ae%f_<2D_2W!xcgN_#~8u_w0oV_3VKsXqHLZo*E(o^k!6 z>my#YPVTyCgG66LzcAJ-XIkHEj1}wCYnJxvqwNv>#p?*xC}W`4*fP_*z<$EH4t+8< zU|KiXb+|68a(TUgof!+c3HO=ah4%kADUkEuV_J*s1H!)O{-C=%hVrKFG%em&(FSm>M4RM?JoZ#n^IAu{#cLVo z3FG0qz9&bfi*?O$HP$CE#ePlHQhY#2WXp^j=k)?d7s5{P~P6lJjVmP z4swpswi3g{({bR3_K9u5o_A@V+R)8Vx+S)u8|@U^i2d@tX{RX1y50J@^`M;+;{aW0 zr~GZy%lK3E3GEf@)oZ7mcIu_Q5`C!b)ll@u^xY=BjxolV`|04bzKDAe&>122lPX`? ztqjD^{0iklMk0RxH*gOEdegT|Yc%Mh@0gY+){3M3 zlDO#Bn`@zW@I1c9vmWnnL9ORJ-rtdb(c}FC>0ytTc1-sf?N~x=YFs1Je&+H1nR$+Q zyniMC2JMNfY}l~BG5(hxFKrjEm&7~mGx1LQOPtb<>GEmEXz$oJ0xPgZv}K$t>>th* z_75@6{$M|{KR$f#y-u&Au5RD-c;5tVdeY;4oBUHA@2^O|=ka!ZuWuaN+D)~!Nwu|` zYHK&Nm3FKpR@>6MrJkRZjDWy1{_D-y zNuYdJHW`%f+Oj|^dybB~0d&{3H^(u5_OtI=%)jhs|FW2W^>2<_%zx)Ux{3UOm@7s8 zdoS%7%lx13J2aO0cMlmJ%lx^|$@~RJW&S&UFY|Bsx6EG_EA!uXrOf|8lFa{w;WGb{ zBkziOi{CqLQQvzmzf#n*bJ);WVmcjhd>20qlx@ubWm{P{+e+Q}o{qY+t<;@uWm#-1 z^Dp@3tC;s}>uVPCFZ7W1$Ejnw_le@NW}F&~Py-+Sq0qV1pWdu1&1FMLww&wXCz zFE}Fe-|-9|khxyOQ-TPwj~UpH03& z^7O09A^)u8=~tCY{s)q$U)5sr&q!M#3_jNU?eO>gc;=ZmXwXchQRovIrgneDwkA}Wo z9i@Zzqv=Q;+A}I)30h7`Bx-QzpCZreA^BfQejNDK@7WxKJ(O-tUk~1m>9OG5m>vh-jcGr4H>Ssf zcVl`2csHgeg4dYVdp0{#2fb&rBXyAbfY&R(s*TF8s#N(^Im)jp=t#e+a=cTbzd{9` zJ(F(6^JM%fzbay?gk$_CIp)OFTJnF9JTbM7{QpUwn7Whv-z85>ttbBv$rDo>$p1gd z6H^<>zbAQO>MrvCj>c3Jt}*6OxCS4EYw%IH1|NlM@KLx1ABAi18rN=2b(9WnOm&nF z8dE`mk&Vi)s??EwRgNS5s)A01NH2G!UsZ)8{i-%QZoevGs+42=fgE#U>TdEMN}iZ< z$p1(3#8iNMSn|YFkbKNeyh|Sc3u3B_yhrlHR5|%r$rDreknglJ;{VC{8HH=`QMd*l zg=_FpxCS4EYw%IH1|NlM@EX@{Om&nFZcKHQ4jNO^uWF<7fWS_v@~d)`UsceNepTfT z-X91%CH<;4J8r)!Vyc2;e1RNuVrmolE|MpvHj}?d^2F4=O~svPB46?A?m(#sv`S5@IizpBlS+pmh4s^l17CC8kY`W*S5k|(C_ zC*Mo*#MI}>_mMm?wVixl$rDo#knb;fVrmEZB*_y~JIN1-##9upG3HUY1|NlM@KLx1 zABAi1QMd*lg=_E{*KSO8ln!o8b(9VoQ_`<$qw=dNRen{D@~a9u(yyxAIil>8^sCzJ zxc#b#sa+i7tL2yzQxB5AR`SGD75VEVPfYD5KUng_R5ke_k|(CVKz^v?iK&Ok50gAG z^+oc-qcIhQYm9jmuE9s)8hjM4!AId5d=#$1N8uX0#cV2?fXGl`hSeMYhxT`b-fnf-9z3Fd0b~}`FxiCO?C zb&2mQs_(S%UF9&0LpsLc3A9`5KJN+c@030?>>K*gq_2>EG!c21{*SdG&*!~b{sX0R zHFf@=_42jf%m?tB(S5`E>mg4+8iutmO&Gp3THg~WTl>}YFfMy7bU5Be-^i( zfc`Z)%y(eAY`*W(@qBK}__}E)WBTAUV0xf@m+bb-VVro6EYoV~&!O>LJ5AwP%It5; ze@A1Q^+qs_Jiv4{{ckjmYcbaZj*$jD>~Z`1HniKU5B|TFzqT|m&3NO z+(cCt{fIQ4YcV$kp4%v2`xj|U*Fjf-=~n9Tt9qy_+E~MSG@k2FkHB*~^=LdFL_K;w zx$&MK^p!mUY!e^Uo!=O2>)V22%&D*XrYW(n+tc#G3a9WSLA`_fxpE*=ulSIN7?WdjP2rozYHv1W|k4@`NT;7hf z+fi1wUGJ_b8>QXb?AkyePbT>JZCUkcM|76Q*>q&;4P^fgFdZ2!<3cb zV`x5P=fi)>k9c9jWO|+SLum|2ACzOES$a-FSF!H_c3t*!V~Bgi^c<+h9N`?uk>wqb zW6&7l9x>VPs1tj{G~R14Zw21l(2IRQz1U9ZeUe@p|J9hw0{^OBw{C77H2!O#r@((3 z^|sd^dOp;kUNIlqsaMa3TG}q)em(kqJ;rFM{j#$W;f)yAW%f^;ClprM9s@WpMOrE3 zmfJrS`?DoxkA=9mi`N5zANDz9C5{_I-0LNA+Zf{Bt`8t5>@dol2TV&GV{Eb4OYhI3 zOr$KxNIV}4anF~;b7P3}gSI(RRs_$DAzSpgjyKZd@>YyKO!I`#S+kiUMCu~4F^=TV$ zkoF!nU^B*IGsa?-{id@8;VlRk+HXf}0NX(uuoY=rF@CG|m*A|us zx%-iJKho~7f9q^Vcss&H_V2_sOD{L@dj;H-^}kvh9r__N8p=u@L)? zHpY!Nx1MgDK5ji|W6lLVp1cU>YhXOQT!U~gp6lEY?yvMAUhnxf@cnPtk~;3uyXAYP zcfEbg;Wxz*7y0h^E!cl{%Xdv{gZ-}91KyqGa?c*H&hL#w_L*BDb6bOHZM6Rx@lE00 z-<;=6?_Ku4gl`J#P5;|KQ>i2_vUH+TGs8> zi~B7_Uqg4{o1*G=>*xNqF!vd2o#>y!y7iuL;|%NOo=efM7-!+f)=J&(`skM;>Qj3z zZ$m%i{J^wI?f0E*gcm<=T6f#W#a?keUfK^O@3850>?S7%a(OS|V`cUU;d>(SFx$(0 zmbpk<`if}xccSg*aKc>oA~Zxb02Ykl%VYaznKC-Kd3 z&B1Zer)NS z7iurIfp4y3uGi2Erd4St z;5)4vGdsL3ec4KYna|lh0`z5DjyxrQF|GS;udpq;Pqi=GI_UO!yLX@fa_jzXTHEbJ zVOt~~cX+ig+j{8pfZZ=}JLJ~?KhxS_`vUZ3)8)~w&_-%oMZ3cOz8zSky`s(3_K9P- z5*XvWppDe?fcPl{eu#J4M~!oqvj*j`4`~l|-_hQwr=#ko9U23Ci!}?* zQ-H0hM)#umXLy@!zJk%N`iux+W8O^}2@VH$YbSvXx?tAGEIt ztb}~&ho)6!4-&SCHX!25rp|9*+lw$(#3<_%>kUckcrDU6rN0PuZRyKag7G9qX``6V zvm1oJh%Dm(?W4wR3F=z+4~bLSBSP$9h!oKMK=lUcFxy_e)ykE34#Cl>YFvIIqCB~jMMUM5W zVvWMO@BaX68PM(jiS}jFeL_3JGIu~nu48t=_7ICzs@!Un%e6|RT&z`C?n5YdHS{d+ zo^HK9DSc>n&Lw?li?V=ku_l@+ z-#@HD-&J(+Sa*PKzQki0h!^i5WO}qO8^)D3X)bMmT0bR9`TfKl$5s2X6`{N>mwALQ z8*MC?G~?PoF{(7^L;^%Opo?u!x+;xQFpg58^@HkM*Ffco#(O$+a${q zzHGb>a*olq62ruk_GM#Rcz%npPi^SNJ`wLS+R%-5if!cCFSJwa6Ugs%>qlQUtq1Lt z7zgMoeA(Klx4k~0y<)w3?Zm!2RlT%Vq7Rk5Dn@^7z0wo$Wh((++0%n>KCU`?e;@Xf zDqq>Hb%@{I-(%egx?_OK2V1iq{H|*~7S~65jA*|$Aiip_$Jz*5J=9~}1^Uo%PsEo^ z;-Z`I#XTKgHr;2Zy3#k=OuzSa8Kw^~bpgLCHg-fo{ufR(|W%9vU$0$Y^2fQ zXN0MK03Lb~52Gdjah_AA@$lIhVK_e!`(9It$6Pfp!4Dz7MDpd})5zyb{vPl{$uDd6 zJ1WJ#RX3(T$@97NKG}}cLGP38NFC(f>(9;zi=2H2Oh(}vd=#$1N8uX0#fpvyN9mw3bx7H%&&~+@?2IsRE;VpGmAHPO z8Q0Uu@02{})^zd@N}hAeCck@!p)n`{v8deL0K#SDO2BHu-(ceL08x z*P`uI7URAkWoV~vApb?l(@xDG|FGm~r*0%4lKe-}!EL8HN{1+4wx_U%RP9lW__96q zNFO6l`cCkv$ZPepe^-x#(Td*Fz&$$Nmh<(nj)Z_N;(fgchQJ3&# z^U3oj=*zYie;+;|rl$N4A`e>Rlcu0I=f zK23i%FZS2F&+Ll$v(bhD+lfd|#NPaVpe@hviukjkKFTITrnNJ@xR*`%2(V1NTRz-A~qiNiaF6vy?Rd6(tg3Nk3he)c79vLpN;kjJ}P4%=RVI(Vpg8T7V&3epF`FM zSs%}MYjHMP#Gegi(w}W0U2qagg*SXKnjC!gs?Ie~4K z2RVsddB&UGlO-|ReAb(smmPcN9IuJzf*!OL!fyNtc)=M=+#fy`3* zY^pxDPVTz3EvSQj!k_I7>(e%%4)qBe&`y2Y1{|clhYgsFzH059JZ%Hm4%&d}kZbMS zJjw}-usq1kfLv?m=tZu7EDv*gHso46S1)qyV|kF954qOP*^6BJSRQb?2y*Uo_aqkO z*?HOyu#dGro5Z6$Pmi`h`?F=DoJ`>E<2+kW&Y_MytxM^Y8!fgx7IydyU;QMB5ZS{ONw~w3Kr_JrV=Jr`@ z%QuSqid*xAYRmVjEnlaWzD?Xer~S>`{$*}IGVbfuzGI)Hj}Z3@pUe6YtLL(Q;&}&b zklT04?JGrWYM&^#FBI{p{cPO+H7rN_(zyL+SdR9Sar?-yoYs6|SWas`F!Z(KUUTi! z$!**TK0Aq6kbj#e^;l1WrDq?BPl z`jrf6Yvl-i@NNY9)bV|d`yCYD+kk(D>m|Np;WJB8l(Roa@{#vL#G|2PNGX%3WJtju z5c((?zmmyOGNhC-a4)K#yVN{Y-bXxR#WQ*RoP_Vd#4|0V#o>7g!+d{5IlhltfbV#T zXJd~lIi%IX-;r^A7esl!vQ0HBfhos#<>P>?R;^bnXAua=2}IcC*BY7`dnU?ug?$fk9d71FKrpu z0Q!vi?nu*T$zP7JK0BW4I(uW#UDjU!9D2efBcTNrOK;&;R9FsrtNMmXi*@d7kY% z5b^p9UzU?`!O3S$vz!qqr{&qw`dNeze(XHQobyGWSI&Oc=aX~pXv`Cv`s{Je9gTV7 z@#8*+nsuBHx)P`7i>?~8#Ew4O`dW;uKFgZeY3O9^Iq@U{i-BX>n``>yN!Rv(k+-BPM z$g_w7d3Luxli7XdF>QhS?B#RqJX@~IKdo)!y!y1ZP288^+-q^42rBLqC11;PxFh$8 z2;&~By5*VL`pj%`U#4UXP~2nfVSePPAq6k*6P1i#$>b;*QpyN@aOQxxPXrbBiIP8< zD0%rT0%4S0+wu$peO`gkN68qVC_8eV0?Pp}?-Sc8C+a?Nq`FTW*?b?yvn9lRBGUAI z7|(?e&myjFz7ONs1VRpJ`aX&Nf?i~B^hBXXap+QIMZYyJ4meCFl1@U?#Y7QHABaauojoYv1BC&m=t-4_@G z6&RCzEzbmqU<_e_F;I-p9{nAEQes1lsgg024D$<&DH+aHzmg$stsJ2bzI!e(1}ZQn z`GfpkJ9vpPgayVxQO^Dx$=8vBm(L=UjG<(hA9?CXDI@ezGJYkKqhv@a6NRyb3S$eK z$B^H^XP@(X`+5xdy?TK$jI|y^etTWWAx)1Vzw^#Ge!pFhp<7Omp*v2Gp*t>uu>ys$ zf@b~r9d3aMq-p*5?QDTDv_tF1??MYXq-p*54Qj^m+tOM;x183`9jEnk$BDHA?(6$q zGN9_>3oR#j@!m3Xf#onhY*}4TgabF4mspO+7cf9869`dLq(0 zcMc|X=~CV=oLm*eK9IgO1FDh-AiX@fA{l8xr0I5>M$aQgpBUp|qstL99D}on44nOD zrJ3aW#u(=4(RSam0ow+I2UHoUF-G4q-zHz!R~77rJ?;a1WeArU?;t#Cw0+3fTkl6- z`bJ%AhY>a|KrQ}g{*bY+KFQ|`9tz5Mtw*^rVo1C7yo-89jI_&)gY`k2pAj^kt`8gE zt%u6NcJoq4 zefWChF^sX{vB(n+gRexj#@OK4%I1Dfs!C2u4qlY})w1N87jpg@e`PQ9VG#Z0gC0rYBq28;JVB&SNSaW&tLroM zWyTL7^BjIZLSH<8dVRruM4ryb(yfOX!;#XTtX)0lpW81wJ-4j@V4_ zBs%x|H#c0Ob&^?va0^^N=SR#2pKKUMsXxhLg{BOzeeh~L~0y6EEgPB?$4 z(_zEt#C^9Hy7>$z0q6Q83@fzaJY(^F=lCNbTnrL!jB^s^#pBo6i9x#O#dyma7T+~? zSm&<1FR-5VB>a~2IZwjxNMH0M{DJhaC!vw_XP$&VlOFLT{FU?#PXgahTEFxpyn}W9 zJ3sIkzXAR2^B&{3puc;`WBf1ZA71eozXNUjxySfD=%0^zj6Z<>^(~L_N6^3hn!cNS z#&N}w-DuyW@i?pKKj90)&^wIwg>n9k)#T=roq3bcH)uoGq#4!;Pf9@eHK2`MZ~RxF z>%8~zJ0AD|=^wt>*r_S${Y(Fobo|oSp=;AOJqd4;KIut#oAfD9!mmib$2NefK4?;X z(4_jH34QQ7bbb9>9^(zrH^1XC-UNO7yB^~$&|fupjJHAA2fqYmAN&fGeei2g_Cb}{ z|03J?P*wYFeC(m~uZ@4fx&vuw;qNKPbB@FVZ3)t#y$AD zjC=5H!#Mul$#LQ1C&!INc^~XFozNv)8dAqHJ^l4PF_`D>cM|>~bn7(ecW0<@=2V0s zgJl|iq0fHjXdlFJ;PAJ-v!q=lF%lGwRn)5zlo-zTlz;D?2(M=7b(LVL_$Do=cumG3 z5NpKa8w|MA#Ke4f@kK@q{=ngfK%bZi2zz4sn3u%%7C1A$@W{%qk9j3~=*~Z8{_Jp< z+9N50iGb25F$lNK}=?k)lrmb3^o}D%vWp7}S7a|P%FA9iBBj_EynOYl{J~3difoiTmgi5vQ~3uWaOjY=GULuO)DR%T*aT1sk4YU04< zB_(T$M-3jlte|B1+QlhLR<9a7`^Jf@2CrPXZq;>bidNsAzoaAuiHRd~mn=>nI&^8? z($w_4AtOePT$(p>NZ#U%^by0;()03`WDHA7U$QiH@wGF@&z(4RQs$_1M5JX588UKc z+OXjx(}txF&saJ#clgMWOP1tkQjg-6%g~c0I87a#O*QPAbEnaS@|BNpwGE$0G=jE2< z8Yw0D>q~@m?y7<%h|XVGv^slDZeAYx9P#;!*DlLmSy0FlBjIdO0{U-NejzfWo|4?f zpvw_2Ly{V2HU8Q5q}ekjWo1vEH)G;QFCk?`o{^ooaK`u>rm@RD@*l?oqhXl3Bppz+ zGF~T0s^^=IWnK=M&Zg(Jrc;$5=QZon#p(He3F7`04zJev^}N>9r}FD`onJ*8Smt1U zH`lN2fToE`Q0LeBYuX3t9#?*CcQoCi@@pbZgywKxi6G;2`5M2P`kO^Xs3kubvCOaS zVq{)iD8!B6i;_nEw@_f1%F8zJPtUjD5y`KSMD!7Zy8pVW_^~QZ=RcoJwTYYV&D zqI&rMOZ`=G5u)?A1Zm6s3swHqNKP3T%0q?m7Zm@S|LOdi79fe?mifP>^6zQRDndG+ zrbUR=W9Jr~DGoC>j2sn7oKjBrpT_S-#Oc0MqJq?^{J2?)`qTC5$O;5n%RdHdC;mMk KN#uXa{Qn>0y$LV? literal 0 HcmV?d00001 diff --git a/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_rxy_sink_cas_brd_v8.co b/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_rxy_sink_cas_brd_v8.co new file mode 100755 index 0000000000000000000000000000000000000000..211252a673293719a433b9aa8afb5de7023b203e GIT binary patch literal 71136 zcmeHw3wTx4nf5;WtOUdxB4UIC1W3X~3?YPY6#{bK5s^Dcn*#@GPsf?*%%sz~FrBfZ7&!m?uC>3Dbs~sT+oOFZ z&vUZ&yRP3_-@CrO*E(l?J36&=ni~q0D98MToj09OaLu43xPJ386FW15aig8i_}|sJ z#K}ZD%SyhE;jy4H2|FV%{}?T8>WGg2E(#(f>}oK}K?KK<_=t&jf-vTq1|@`$`j4@c zHFimmo}U_SG})Y(iFTx%`vJ>IdFkKWf2yuz|2}AY((H{WM>#+2AKTL7NrCy-A>0=T zCZUXdCuEHwEgRva>!(bgv%qog{zk>R%GH}I@^7iyw7zQX<_GS3EWcvYs?BdWy|#ZN zzhXu8mWs7y6)RR$ZQlIErmBj{vfDSUsjfmwW#uNsm2KWwv7+iLtJZEf zHFy1^)tvRAYyQ9koqf)#>qRGnOjc#^aGb#W0fj6 zuTn+lRciEkl^TCurAp4L)RgloHRHTWm7Z6rIpVvsq;1KHKvjGxnVq+ziQKlEgQ==SFKuCwZ6J+<(jQkm1Vrk z{52mD{z~7zaNkO1s93vZ)%wogdQUuyJYRn#e_h4avXyHqs>^QQu<4edZX~^+aPwMU zv99XXsgtJ-FD{#Q{ftRv(-uuBn^IgNV zZz)?*vAJydrpmJ0MwfY0=gyv5S~hLL?CZXpzj4!=+bXK7l7h8M?tCqO^Qw)T%2sUH zvcCG}ujX&QW8Lx%YhRu5ku;otOXZRLE$i2;+^}h#?qgYdCqsXkzh!gP=CaB=(9<<5 z$~IS5thnX-xCtbs!7%(q{_PdFRju5#0i(@9%YHJyeW*5W+Q27*4V%i=ZK$jYeLZ-F zsD=irs=ouYYQ_3%5b%SCi)*e~wQ}q5!jT2?uyI?}rp;?Mtlt}wCl@@Cy!VgKN!`MD zYLVx@Aj@=wm*ENT*o7vdB#38z`Sdh#_ao@WxHp!x{`Z*tVKre~^TUQ2|DL~(Wb~bn zq!#F~gLwLB*r^XU!4w*DOajD}>bS^q}>@vsctorD<3rb?mp+snRA_3Uh`Jb(h z^LA{g^FK8Rs}ubmyh=Z9$BsJAj-zqMDc<5d+Ojz@K`O?w; z&cT)eNY8P;+@hQ}TQZ!#Z|T`N_9VwO=KQLqNA1N&uSC8{sOxQ~;^lITHp2=$vop+8 zChU}H!p@ydctIBvx)+%+jAwY}@6y$T7iw)98T&iibvqeo=i)?8EQ+>s?#bDnQ+qI$ z(|>!cUo7)zHqt_wu_v{zryp#IJ6~z(7E}L&tfe~a8id(FSP+CIK{zJ}%Y$%Z5XOS= z{vd1&!sZ}67KA@{N-0N|4sygqj#q+wtwH#sHhvz^|6Kl zUE6X0OCM|Ke`!1J*O=FkcgbV;w8zytKWvFP&*1Z;mW1=;7OJIR?ZDjw69XHZf{@d% z7UPu2X>ht>%m(Jv;=0zsY{@~Ha+Ik^R6y^E(DcOg-6(syV*2zwG3O_!?^%3)iqCWS zJde){`200KKWmwpn5nrkD=`aY5((fvWW%i3tUdh`c?~gC)UPqOAul&Cc5&WUYV#WN z?#+wk?P7j%(o1-{T;BaT^*OUqv1}7L2WBFyc6mfbTclw54YA!q;NrX5Qm}g@P zdjt3Ghxl=$XMK?Ii#WN*BQv>1-f^@$uWLh2tR%Qbz0L)W^Zl5?-H{f*bSLJUk>Tmt zv6e()AMo&>qj_h_jGCHm7b!p1?O?6DNcqf{P^#}v7>{o00*xCpE5IJxt%)I-PrxHh8w%L<3SOmM}Rwbj^$o(LEVkLklw2u%iNQ` zC9)3QOQY*zJe^4)C(oJrOBdr5bG!I4H`7<>i#w9(2nRZPTxy-uEtw@({s+f`cBON0 zt#F;!AuTWA?*@fZ@;r?^(tgZ&5gh%Smd4BHH#GKK(2%%%VFPafiJpsei21NP5v$tI zxsj+E6(a%69LY6v1pIZnJ5Fv@RV;7Rs93FY1ob_M&v!7tzN_a~mBvpY?tA#O;Byq8 zWB5Fc&-d~9LCdv?Yc)?NBqnH{0&7lBh)vitF)>l6A=Ggti4vWsq_ITv{BqPIJfEDH ztkWjvP6p4PY8jsxuX&D8^W5M>D8QL#vInEIeK`L)eXe=g^Ze)KB?;%}E#sazGBM#C zYUx@y|F(ql^_KnLdE<1#`3CMc7hRNa{-Q-?IH82ojImE7=72*Nxyqk|`4L`_=_lOr z(L^Geh`9wm=VHvw_Bjt39>e@&DBN%}p7-61zxKl!Kk?lKW}ba9Lv?kpz4jWG{bG1% zlw}#REJGa#9BSi3A|S?x0C?^;668{b~GbJ-4j>dO*~Yh&Q^;w*J^an|DfixcXo zu54xpU_YzH>xD!J}rbaNm)D0)@6zvk*bNrOC>;rgmT zHpz^4+V81&)lz@c-A>@Zcq=dPtFVC zQRllS+v{KWuCzNhvS?v@J*OA7$0ts=$5Z~Hkh3|EzX;b&f&A=Cb09zSGzIeW`bg;i zY5AGIIrO5IpZSlVFO*-`i9D2_@vM{b>v{tDb$lSd)KB?MyL+|#QV->qc*-yFA86jL zO`U`1EEB`s%9`gNcFwvfbJom5CnuOkCOlYT!lrT)nt6n4@N<8*2@fz2a3k{|H1p^j zc{Fd&kw?#?;$R+`@NkIa( z6kN+ZBFAJNF^|k6UN?n)r+LWyjiGM6Igd1l!AbH^*NHsjA>(Bp>3RYl>iB?%Qa^cU z+I>;;Q0gHMC7wK#c#df)>X&&mpRxt>h_cB%Vjh`Cygm{t>`gns{LP`cT7Kp~g1%6G zT_^HTe#Xl@()9%L>-a!^sh{$jcDwba9gun`zr<61iI+LSwcdhhKe}gnY)tWY*>F_oMdp;aSVfNi9=cR9ri6(Y)Guv3a{b*nQc|pQX#x(Hky!6!$rU z%a$Ij%?O?`6K-cdPb8+o{+{CVBAz=F?%RAuO6;2Y-ngIbdT*TbGuoF)zsv}6?HxKE zxnM8u^}u*u>J8jma7M`K1KdY&r;yVZxUb*~LbwNqj%N$L5PKZJ{RCeea{2@J7mR1N z0l))*DL>mh1sS>{uKPO=Y8g&JhAV)t5DXc50QV3K8LkArQZQu50?rZ)8Lk4pN-$*T z3EWdKWatIl3z*}TZW*Ybbjy(BL%$Go5IUYC`Wk>03~;XCT<#%+j^_y;7;**z4-`B& zglQ8xK1lG@A!jh~V8PdfoFTwN1P=>2R|8)Soa6&!(3})~K?co9!H_|7QZQuDoD>Wh zG$#c^2F*#qkU?`&Fl5l21WxiHjWQ(pkVY9Ug?%{YzK!+#Nq-WavlDKcKUs&T{p)n- zM5pM`jZW2}icZsEW|ZF(IF|4xFGKhTJC5QW<>U4UE}ism)M1-HSBIzlc{+5W z^L6M(7wAw$7wRxGx(J~YZs$}HbX+7lhdxIFj}#1j76TUxhCW9Dj}i=hjs_kr82TIo zJVr3|ITm=VVCZuk@Hk+bQ#P;3DVx{il+9~$%H}mWW%HVxvUyET*}NvFgx5Bw(j|k< zsdUL8oQi3Vob>VVrtK6rITG$^|85;R(Rv-a(d{}^(H%O>jNXIL=G1uTc)aKw`n(qS zTEWog1mFpRq0foH69q$`CBP+uq0dRclLSMblYu7-hCZ(Yz7E*tl+9~$%H}mWW%HVx zvUyET*}NvFY+jR7Hm}Jk;kC`Fbje_IDqS)Nr?5q+?bHK0Z1eZ(@U;J+4xQ*e9lBAh z3z1Jn8+4c%eHfw5sVUI$6wx{KITd)SVCZui@HD~D=XBudf}zhDz%v9xpEH4H3Wh#s z0nZW)eR{wiu+1r(*W{GVYjVowH92MTnw+wEO-|XoCZ}v(lT*TLn^WnM!RA!DWDrgr z(;PYJKdr+y|NA;T?f*cBPV|R5bfeGcP(^>F!_4T95!#$8g^o)_=g{Z%z}E|gK4$~Z z77Tsf0DOaB=yMM69Kq1%jleewhCb&4&lL=P&I6tYY;(%yH92MTnw+wEO-|XoCZ}v( zlT$XY$tj!H=-&v1)<2XQ0Lz zTnEfO19h#zcLQ_JKut8b9+-OuYLda*fw^a(t}}QCF!v19RD8WANt9`*r=IitR4X7nqPSa?*-{X`Jg7I5+bU~;l8;N)Ilas)n*(K$KA^}f2(U~-DZaOw}*$FexZb*{SC=$xG5dSC4}n4IEzUwy`4a*FGH^;v_-DX#a`0|t{* zT<@y~4JN0!-d7J9OippVuO3e2R0^-5^AuhKr|=p$h1b9-yarC;HE;^AfrZyLr_v>Z z&8c+BAe=fD*s1f{$FexZb*}oH(K$KA^}hPN!Q>Rz`|1k@lT%#ptGL1B6xaLeO9qou zT<@zd8%$1dy{{fKn4IEzUwt)|Qz^WL&Qo{|oWg716kY?T@ESOU*T5;f1{Pl1oJyAr zHmA}hgK+8(+Q+guHJ>_u+~}N~T0s0YgUKoGW2vtjOinE#{tJW2DehybZyHQaaUV^3xyanaT1ue5o*# zdRjF@xp% zPI@VWZ;fDSdZKOdp*Y9W8E_OJLW(4KJxdYf9`$M^Af)w zB`<53;I?0+2n=;;;Ve4x2j&Y z1zkD!AI8}z_W5Dt?dIk9?*`Xx$cu5v>K4xPyYT=K|3cRTIS6xrBi?8~57&9P?(L2B2jO}UuKRf7{UNv>g1mjb3I0%A z4@G>oSK{a6Iv>~lyvcq6t_yJ8-<#qW;<^y|2YA!`B3u_CKF6Ek7vs7Z*SX#-e>AQ~ z<2uhP^~d6REbRI1xAfYmImJ z2KlCst??_oA-?G+%6UV5)7KO6E4_T*^cVGb1wnsX<5ziwL4Q#%^lJKhBHqg@4*J^~ z=lGcZw#GR=roXLmj*sc@i8#l{^tUz6@u6-%$?}tLpJe%E4om=7CV*3ey}5n~u1jz| z#GCI=#`R?I?rLwLKLyuQ5I@vg>`%k>G+bZfE%j&MdIqlZy=DF^T+c%OVcw_xQe2lJ zzQ8N@XXAP{u7`We{W-XvgX==C(w~d#xyV1lTj|fo^?bw^d8_?}xL%0sk>1VzVq7oA zb+NbBUyAFc$Un+kueopLOKY6-%gmS7IOms{FDK%hUuM3v#yP)CKU?FRU#6d}an3K( z&l7RZFVoM~IOmt?XKS3}V*1${=eU@Do``c?Og~%W92e8i);Pz-^s_b2aWVZo5$Cv& z^Pfby$@Nd7+;VQ>ea`^qe8}@h37oa~1J4cWxd;2L&U6-OMsOC%ga=DZ*i>#pa~28L z_$}A|Y!e<}9^gjiL1@k*og)u*>YO8w6MFUAOK)0QyJWFC`n8-dS!W%Cb5p^2kIp!2 zw+!cBcs@wRoM%-+=wl_~d9KI7nLB5xIoE`5C!dd8>qbU<8~tn(ceuoaM?meL35rP-pg3H!{w<&3_!_D33mi1lbng zT;EBgcfZDs&?YI&L#E@LU+)Dt-$S`DZcW`$Z}+QRMO#I=5O;`jA#A<^X+4Iyinc2Q z<=HQs4eY%DXPPJ%@*HkM8(AaW2yL2uKBouDW{q$a?b{)g)#s0Jp6H1~?QPrgb!^*~ z>+8^)9&5<{1kN2v+qQiAJP;hW&`*ag(_EBg+tfz_=lz_Z-ygK?d3es#DfZTs7X?>i5NU|LLx}*4u{Xzjl2&I2)CTam=~ijZE-r{A}dQ z@mw|0^ZkWMKFsay;M`O{q~+bxox zPo509z>Qqz?eI_A_2C(J0P+o4I3{Q_*|segZG|3d$gXWg(6%k#s2I=FKGR~$Lp#BCsS9l%B+(ocm|h4?>xuTU<5UJD}}~ zj3@01Wh;hkH+Wz42P0qc23O7T8ns=Ky18~Hhet!68@)&UA;>p+v#aKM2mJ)>kJLw- zLi;FoiZ+FM9s(ZHR?%LHZKD2$f?s-0AYSGGxswm>kn6OK!g1D9fOay7pyeIsj$Upv0S1t4oX}iSv&u5qgc!qJ1AM>?|eJ_Nr$WgY3xI=6Y zVKeVT!qdm0Bg)EWnNF5{DuO=AQQ9Y_wXnXW{@jilQ zl<|;jLaiHFZ-+FGw#bly}Vz*&P+hQ$@jXECEow@^O3LQZdWb!4r}`& z<4@aCf_&2+a3eQ)f2E%#W`Q%*eJOO$=M$sr7xnWA+n#*~o?{Tsxyy~n^FE(XWK3vB zSm#{GNZT_Xwuf9?7}UEM_3}9-SugAf>s^X^he6KC_qd9CLAO_FP$_H>^Un_Q z&k6F+MSj{^&KcSmn}4>Pwp@QId1!Y&PV&$e?E$~d^U#Uc58(Ns0An|0H`XBtr+wB{ zj&tJmtFW%iMu_w#^281JkqN!hChemQ2%epo5BU#~chqaoaA&_)xQkzi`er=nDz2+& z1Nf{&n^c1QI3u*5_d41w-pe>o7!TKw2g*&ley%wg==wBtOxr}+`RpXNi8`gN=@x!F zO54PA_)+M?br$PEeFrZOpN#T8$T>#aN)D4xC&3@uC-w#Y7L0v5i)>CHTlQIGqn%O`=8i#P{w_O#^P?@w}h8NxX@LA|h?8oXY!r)mpr8% zllp1LXzw^Snk%qHv}K$t93RdVjt@D`@!&XeJl=iltxm6?t#028M_xnN_N{Q_4dN%m zkzXmo z&yB<$~c)rvkV_F#PE(2VO21{&ZFV zuK-T^a0!JR{uXP#_*j7q{4Lf3!H|K!#abvBGVr%piv&Xk{uXPoV93DVVl5F28TebQ zrGg;?e~Wb!u$+CNU!io#AZK6FDTADa%IFtzDls;dqOSoVrwX`AF#PGP1YRi^{&ZFW zuM!M@I;(+K3x+?PHNa~G!=KL0z&8tqKb>2EZvjs70WxS#ioPI&=A>ZApgAcRGH6Z; zh76jMf+2(Eq+rOPIVl)2Xifqr`H)5#l6**`4CXAMV(`SdD;?w62*I5hwt`&XX={oCx*9pGMb=Cu~7mQ!AYyjRMxYl(x0&f(2 zx9i*re5>H?uCobvli+(?XEX3-;7NE6(EfBFgU#tvkiq8kDac@R`V?faIeiK;*qlBE z8Ej6Uf($mNPeBIZbn?7#8f8eH7fz!L<_!AQz^Bi)z^6}5;M2zseEP(E&_;$h2=T_jYf}zhlfbS3t zeQpEZCK&p>6ZlSGn^QKg$tj!HZ&8c+B zAe@S6j%*8j`qcP!8vDNS=@awoHLmlGPoH|<`1IM~r}*>%r)r?%8qqoQc^B|qf}u|z z*cS|aMuDS(q0bm_Ofd9W3tTH0`m6)46AXRc4SYAS%_*DL5{?bRJvpkP94)+*%tWp zsR?}g_<>KKnEykaUgsO1KJ~uw>9fO6@#zCj?SYQ>h|ZzU&j5c$F!XsJ@O^@z&(8vX zRxtE=Kk)s6q0a|^9}o9Y)+Qcniw@!(^H zdj{&e!FfDj?ir}>1?TaAxo4n`8cd%)+%r&58%&=*+%r%=Fql4lxM!fAF_=DmxM!e# z+`ex?UpDg**Y>!{L!Um}$7&1C6w;>;_p#c7Gllf&!+or_;7lQX`fwksEjUw1pFZ5j zY75R3(x(sivD$(&h4kseeXKT|DU^MSbjl$67U`5h_Oa;GrxIhMo;A9lPam%N)pG{Z zrw`Zs>IH-8(}!z*^)rL%(}!z*^$UaP(}!z*^*097rw`Zs>bSx5>BBX@dbyns^kqX% zwgsG|Pakr!E#M@5`jC@t0VnCxhn#E+I7y#AwWdN22TX$dSCsW!6m?4 z@2kHzcoHzz`|2MJo(#@-# zdJ?eB>B+!0r>_GRPRn`Wbjl#-h0`g6IfMQO?qdaK(8(#TbJcH*&dDjR_tkF=Ca1XG zSO0D>ImPw9`cH$&DX#a`e;G_palNnp+hB5v>wWc>!Q>Rz`|5vFIhDd|=sbnjz$v^2 zPT@6h3a^1vcnzGwYhdBE&8c+BU~?*6G6<()!TPXPhaYku%iXgCc6xaJIVK6zx^}Y(3_iK<-T<@!}!Q>Rz`zpg=a*FGH)#*V;e2;N{rtlg# zh1b9-yarC;HE;^Afm3)5oWg5h;kC`Fbje_IDqS)Nr~aUQEQ?cI=c)^g&dDjR_tk|4 zlT%#ptBVaLr?}o%ml{k?alNlDGnky>dS6{`FgeBbzPiF-a*FGHb!952Qg{uWr|=p$ zh1b9-yarC;HE;^Afm3)5EWEZkl`a`wVSNU~-Dmd{r?`)$t~QvQ z;y#wT#$a-a`&eq2!Q>S8vD9#b$tmt*sSyT~Q{2Z=BMl~}xR0eqrE)5T*U)(iuYpr| z4V=Pj;1pg1r|=p$h1bBsYnxN)lELOwx@3^^y%N5O-vc*}sp~7}-FP0kmA?@l^JrhE zyZ%jgGk@2|-(V5rx7&xaP5cojK|A;tsP7DbI53N8{i}`#ynk&RObbJnPF2>SDc%@LN^gm%+>XK7rp3Gp%F& z@~&6r)qR0{37(fYOFN8bJLOh8P58Yj+i65Q37#Q7Lp%1nWSO6BHK8s2UfJONl>D9i zNa&#mdiVzVEwV582G157|1BIF`fw?_#rSYZ=C|a3;w<_3TSLkJP9XDs%KXlm>ldFb z@4#n^j1AjwL4Nvh;kEd2Nx+Yb$a@-fi%*xH&gCz~U+A1ezb@Ty{qPH|1L@mEuKD|G zshhvGmU#YVknznk-^=MEGr{RW<{X3V6NYj641-B)qAwBQbJNU#&nC~Iv-yq@PP5%4 zr%?u+-cR2z!s8~)HO*tBfe#04Uo+(+9?Uzp{(d*Bij)^H=`ZR=X10pd_ID9WIow^uZrc= zegoVlKPWrjt$Mb1s%o86-r!x%P>Ss2<;_UD8FiI; zE&f_uuSLFc@0iY)tPlCtBW*p>D!lLOGf_-yTmn9iH}-fV(l;V~x%We#aoQ%Ev?H64 zwh3*l@P6cLJHF)6zS$AzvoIMfexkBC#G=Dhe$ZL4q8s|Af!`s$4&lSFdeA*79&IRDK;W2cJbB1y@ka?1I zA&=qni8#+28a}tiIX`HdlXWHe+#2WnFx+j8>-k~VhkS;+C*qtRau(2Vwl&Uq!r%4V z{IL6Zwtm_DIGem?zP(pITOL~u-VZ+xawP9{Yrq%tUiC&FT=2n#HQtNxPod8bALM)0 zzkvB1Ls|@JH+#R-w&BQ{NBiqt9PhqOBI>lW`NzaH21$hX#eC21R)))?EscJRBN z{lN;pR-5RH@GJR@| z(^iR1Xog+WHsM3cBetLkwvM)d{fF!aZJF%0#TGO}KH7p0+kdeE&1g^CfOE7bHsA>D zJ#4@Z=wb(SvCezV--+v;xL)tQk+cEq2W`MEr0s(KHh91C_uzUD@@@3q)cN$ih4mrd zeMq|xX}5a+;NOqy`*FR=`?bEO>3bFH!`$ABw7sZrv-i*bK3wlZzG|%;SOhL0!W zw0E=x;_Jt7vo%gzXU3*APOh7=X^oTHW-Ly`$z?MZt#NYM^u0CCu{3>ejdLtbUr)q2 zcC;}z-)uQ;nf};v(8hcmOGl}{2%kZC4%BIXcXtT)p&Z1EzX7M4@mIj}q07GK zMz(q<{A?3F}Wx(LR(=FXnjDeALTxr{q56Lfj$Bg|K-6 z(s=fibAji4*)RA|ifA87CXe=^wD?4-{i1wirw9Rv& zB45Y0ZMk^XSC2Jh*M3ogwr%RZJzhl;|e`%A4+Fv+is6O zQFMF3dEZZC9Lj&-sv7TYzYN#Qf9$Heyp!5bf#^$opj5u#MtrZ$FGs%0=kVf3@3i)N zV)!sO!ZW@VNL%?cSH(OBzi1l#{A&0gsP$aD!?t~lc(!*r@~!?GSJincN}k+|^XGSa znNgnWP1c8Vy(^IK=9gVn?{&s|ad#YD1kCfrv|Y48GVZiL^f5pV)0WZxEJj}PmiB82 zuE|gGn|dvWuL1ITDdO3G@|NSY9Ou!=*P9T}vEaDSzSw-RWw6`)sM@0)`BSz>TQL+I zJd+QlBF>Az4}g=SZAByfQ>?{V^6g%iXts$vSYpDaN{rju*Il*4yC_P(0GyLaA4(>V z_Mx;Mytv1^Br5*as!(qI-??h1*ELFiYm}#>52Z?+=O;I5E0_)+O2$^8Uf6^Ev{U3I zIZC+@cYtys)V>B*Lg(ZxZ3X)UA4^bg>swz z#Z`N}EWGntII}m>(T`F!IP)2=XOw=FR-;VyZ(Vht7tyvw##H<$-3Hk{>-CAQLB8Am z(^dC-+1j=kKJJZ(AEm93=K-&O^k(GS`roeF>*Yl0M@j0VU7?K>TSdDYZo^-WMJ2W2r z*3T@(=DCFThHcR0gI-=V+r%9%G2xM0FlO8S*H!zxK~X-}OhH+Bt|>Qpv>&A!==dRT zNOUdo*Szbh25+dgO|$_?KT60CJE8q3ZGx`IQMQM;Lu?OWGw(&hQ~D~QtR4L*RYRZT zC~Xwe;YUf^D9VF64%0pgZ>!O_+y2Y&ls1X-!;eyL?MLZD%0vGo+E&A+X+KIqd*qAA zWVcN_q{kZaX+KIIvOW4GVSDo2BK*{&{m{#jL3b?$vW_o<694o zkvltngJ&ItJO3Te?Fe^)OSD0IV1trW*Q?as$Z z9@?T(@LN9<%`x9UY{b~rUl>-mBHVFlSUHH--$9u4R6k14D{azz+JNBsDcj^fMBY)a z;zwx{>f3pFSo=|;4M_S?LVkW1!FwI;7Vl-8CnbpI_XXu9J=KpAbWGbs*=;{c)G2L^ z_)%gy{3sb8Z>B!&M~U}A&N13na+rJ)KT7Ni{3sdwbQak-Ci=UKv&cp}#XiE1lJ@t; zF+u)=wtV!XBy!MBX+1zz?MLY>?Vf8)Xs_6=JUemh-mhKSD?NsRz1ocN*mY%Ci65nE z;61&hbUw0tBVOUsg zL%4rLSlx;6kx}8KA0@+!ZrTsr2l1mMWA;J(C?)Ie;75t~OY)uenS7`HB~NL`q<-2l z+B=So<_c^PZ5ihZ$A@!;<3o;fJUEUVkB`!iQb(?yP413z{;6c7ZTq;(NPAa<@0$Pb zJwgBXo}g;*;N$!*B=fBB=l#AXnU5Xc5%j0@qZGk!!6G=*NS`D0qtqL?w_x}%>I2+I zFnk!{i-6;O1;d9?HgL9J_%P}R+)prk81)D4FBm?I1^^ELPWsIWh4ee+yNh2N$Z!Sn zTp<`T^Z@Q57&2T5e5GK>kOiD27&2T1e3f9x&=a_)V93x5xEHXTH=+-t?ubi=407Hm zoifPzGx|}=!8qiIz6ON!`?qri!-r8G((?qvhtWXbfr8<~Xb|uq!SG=;7j7+no~HE@y-kU?`&^aU9-Cj~h>h1w#hSNx_goa}qeo zhcwENDFF0Y|DL+hb$h>>IKycW+dwaOx z4D;^oLcyKPySGOGC(md@2Ak8TAVV%VC!9V78Ej6Uf($mNPeBHo)2ASV&FNE+!RGWS z$RM0fp3zRD49PRvX_Ud7`+qy|qjWOxqtq7oQ92#?QF5a5^>6fDdtY9=yMeCD8bO@XyDO;q0ceEV+2E=V}ZvChCasu zj{~+jH575eYjO%do;)tRCa3a&h1cZNFks;|IaL5Gye6lH0}HRosX}1kH90i`n7p<* zl`a`SpC!O0f}zhz zz>@?+pOb+n3x+FWkyr{D1lR@&~d5g9QwQ-_ zxq_k3dBF34ZBE&|CZ}v(lT$XY$tj!HVK6E@^bPjzk0A3&%`dkRSP%!kl z2zZfT=yNgfV!_bo65u6*q0gnjO9expHv!)SY;(%yH92MTnw+wEO-|XoCZ}v(lT$XY z$tj!HwWd^a%n4Aew} z=|_os25OSQ^rOT*19hFj^rOT*12xrP`cdMZftudFZ$bY--4WL|*yN!fCGKOjU2QP^ zC~+UF?HYsWM~VAbZNm(vA0_T%wGB6zew4V6)i%Ol`cdLOR@+E}=|_qCSZ$-){pO@o z2HCerrwp=>ML$Y87#lUyj1~PTam}whgXu?!YkqaT!StiVHNU#SVER$wnqS>$F#RZT z&9CMeOg~Co^Q#30(~lC@{Ay7gCnlWpLn>|>=<2H`_GW%#J}v1)K;dML(LEiwAO z3z+MDb(6t9FxUI)QwB$Yx!zZ02FHN8-d7a{*8+3BuT~gb2h8=psxtU)V6OMos&=0@ zH8?YEbNVh|o6|n9&FLtx&FL7h&FNZTo6~i`HmC0f7Ea3wR^L!Q>Rz`)ZxRDX#Zbt-<6J*Zb;jgUKnb_tkcT$tkY))jg@4O5rthp2BP36kY?T@ESOU*T5;f z22SBMu<+XERJvraIh8IMgj0XeK9!Q>Rz`|2|WlT%#ptIrxt zPI0}j9x#}k;(A{_XfQd&^}c$@U~-DFBwcualNm;Y%n>+^}c${U~-Da0;)1Q+N%W!fW6ZUIVA_ z8d!L3b1Gdj*qln248o~DXdlbs)O_mraieo`Y60=r3?`?zkEOnDFgeA2EcF)#lT+Ns zQr|R~oZ>!~`b&e!DehybZy8KZaUV;4JC#!@yoSzGcnzGwYv2@K1E=sBIEB~1DZB<2 zUfY~Xmkc(i(j|lVPm)mkMR*eDmxJ@MNk2+YKHAr*G=4kyn@Ii!i&*>YF!4v|59coU z@2Ro0 zEFY@B@ZX1)n=;J|5qFsKNO?JL-}rcN-u^80J3)QfL0zm@&f4#PJn*}5hWf?ti=4A> zgnZiP##!1a3EH8o=w~zAku&ymSTCO=Bd&)*tK{-hrz%8YaL9sM*h|9ku>q3(0~Q9^y^ znJ?neo8Q~Qvx>QDMn3hK=c>PqpWgt@8x$eg8q@jE1(ZicLy z(`RU>Bxr}SqK~P5l+cdm^EuiPJ|96lGM{X|YmU!woTM)$)O$`}N|gBn`cjI(w?hPe zANnBdgD~k!i8c(}&PIGT;`<})4<8UY2y=iV-spCJ5@?Tk^N^N@wBFuWe-N$*;ku7E zzTKY$>%{NNhahbT>gwxF@Q31hDDq`{CGEbHSReA`BP}0k{k+NTzLZ`?+8$$*3y@xb z^#0xypK;nQo3tZ^NGn7e1H5U*m(r{7rDXCvRfM!6l*{pE_{F#`#&xbYE9oy~|EsVY z^lOLww8!0%r#pNo^+4EzzLXG8`&(|(o3jv~h4`Kbd%j@#Qi80VFfUn`8Ryn`cW+SA zZ_0km6YV#})OXnEpf!GlH^evNjyAoaNxvzLn7f+uXOYVZz**Wo*LWmtJlfTB;vDVDoM@u`f?XetaUBgE<$0z4SX_^VUI%)!lYUh8 z(>}pJ%6O!WN4`Pc9POvXaP~y}N-w|NmlDSuc@2+S<5^yTZ+P1pzsf6Y_oak7=}XD* z7`laDx%c=|VqM5%_@k{4m^YjqCY=eER;w`jF3X z_e7lYL(T&l&bG!mPk3g}=7-(Sv-QjF$JyjH^X2Z@%%R^c(n6GHHhpV>~1?|}~*=>t0Xoh^+m(qu9 zPi#Ol+S4}R9PNn>I6`|58!!#JmQiSRdy0T%^rKeIvY;{(M}|N4_F& zb^E=K^&#Ivq%B0+NbhEUF|HTmy4YLWe(z&_;OSDNEk%8!y!F~f87{WQY1?QoG>=(l z;7iHy@kE^Vj7KxY3UYIOfS`8*C5PxN*5=V#1cqYvUADyCACX-FRATvbbx}N zCTG{tr)S4n5{Z4dj^RUpWwE0?XDa@fZ2wEPpC#MB(%Jm@@C^K!{Pvv9PtV!>@^tjW z!?T{^8_D*CWcxhg8FcY;^r!S^Lk@qOxK1*PZ!&t3+p+P{}$GBCVwsT3q`*O;(y2Xx5IH3zdE*08*)>8`ur*V ztB{+Y6j{mBPl~Lwbo|KcH-b50SSexW=C1eumupE;qQ5 zA@D)!-wEemBGK zXi)EAEl-e#^&svL%cD#)L*~))1bIq=Jmo8%)+&ftB zcPz{AZ92w@e9h0AcKKb2$k+6&sgG$QUsKSJX7oekYYO_o?+rx0<{*xH{34&7Pvo=X zL_RxC>lEK7XpSM&95c9y-*o^Rj^SE!3?at;fP6o}5ZG`m$m0ZgSRQ2>88VO7X^^KR z$WtEVVaPmM9_;06jv>?>Gx!MippzWKHR^8axKCV;c+_J!7UXeE+#!}nnP!H}qvZ+m zlmvOogFFnGCxv6L1{}lwnCOuE`kE6+6CH97UULk(79DawS?5EV=&&(}>oy+@Icdy8zh8EGOP_X8;(_xVIV?se&WNE7+Euf{m;mx+9KK9SFk z6Z!18{)&4XXWxs6TCPEs|h0I{6rrB$y+`~(h%dUuRkOVVx}IXxbA$Iqe=y>UH@Iq!*Qo0mje2hM9`%PH ze)MKn&GimyK9WZ{@VQ9NbH0%0)hn(l!5ze!oA*hF(=R6Y5 zv!0=-hhxEcBxAvP@=*`Rf^$m7g7p-j9*za)l#B(r`ALzLJpH7|Dx9TF91HHu^Sek|(d zT;W`lxiT8@>@UZMZB0g7ta}RTW?j?Pz!oE%u@3hhgtJgLxnBzIGym)$|C}KIT;%7R z<~WnvpJe&T_fN9?G6xEvk14xxKSnt1v##-<8SpMM;3qjCzEBI1Z^nbJ8i8=u!$E%V zvk18K^SDnVoE>*nF~T`t#{C)L+^+`t!1K|-d|$*EgnT!|ScH7f!#IR|Z$LOhWqwq9 z5xh@u?()9Cxk_#(_aXxBaz4=KD)%CIpWxi3E#Mr}`N3V<0FDvoJjaN0oMXVb$T86Q zFb6o690QIe$ADwSG2mEn3_ePqnt!f+~vwStsHeYpnex z2(f-TP;PJ|LtwK`4DvWZp6nnGL+0T)mIQeio~a%!58jWd*NF)AI?>=G*#?`v2wdxR zB0|)YeBY+LTT|~v1bLhw56kOyqQPdJ800Al@~}MfFhsqFQ`U)Nf_37U_H`KFgR0km zNRxFK-(jlPiG$nMVSJCK&WAKvhw*)&jN^MgWgTYclXaLKC+jdfF1b#e8LSg$w#&!& zTCxqkuTtdWyD9ZL5&cN66N7&6U63Ll-|xt>d~c)3$M+`c`Vc4b*>NJD9jA4QcSP#F z2!xtr1~>8jmB5B$xW?G;@A%G1zI&4V(K-$CI3_On{!7-wJep%cp0m}X<-t4pG{+EX zjv0J}?-m3$9K$u_Y3lg?zjDNr8(N+qk7MGJ?-n6pk$kIJTr+hkRcn z$DHqm6dm&2ikf54wdj!V3Do(JCOYIh2^q(C4T=u!e4;};PIPF;B{{Yx;MkgW`S{K_ z%?YH5e0(RKE5xFaYqm*^4a-BK08k2v*Yx$1NKb&Uplbil8cld zyX0W4yGZ$rPbk&(bRv3#d#Uon&d`^0@I--=sV}>oT*vLCX5sYp%L8@X(3jobjx#W` zOKGj@JnND!4K4qY7PO_2PA#r$oi}kkcAR(2IoMKyvCJ8JwU@xP za{*c@NyW#U$69i8a$?6~CSK&Ib4Cw8XFI<`JEO;Vwa$^27=Di*bH3A(aK76Dkz?m9 zOBqqdMVW9SygQLdgvKSt?MB(t;c?^k#GI#4-}mro!RIJG$MAU?pYP-IgO+O(*P@K$ zOh`;XnM4A351Mr*#3t-%ALrbLyxhFl#d%+;&1=lN7bCul`B`paVxlgWcYjWO&V&hj z5N^+zIB^f-gLKq)zf+PZ(fLXmOZN1}7{)MOIgleak*o7fPE6M6lXEBU+1K@{mRjeB z$ny+7Kf+l2_=AlF#}Q?^pv;Ad3sG+(G(9mL^*X07oIV|6@e|bdEIvQQ=Q(_y$L9rn z{u-a3waiS+)UwP<%+j&|Lw4oNip_f8v0#4o)k}D~oF0o=v&>j{-X6vW>AJqsM5)eK z+E}`$J~B@D1 zdODo-4-B6PXZ@Ptv*E0NX81xli{DSGUxc%Q?4z?KOy|rm&49KBmB45!_L1T{Exrmxl`_O z^f>-7`ZskF-m85oaX|udC(yqH-qozyY@FvWn2NDMAG+qwR;R=HQGE^qee8O}@1tE8 zyp7Mv=sQS%_u1A?ZMkn>b}IMeWv@Wiwr_^BUSs&JaMl|PpA2XHisARz2ZX^Gv;||( z7K}j~#^4pm`pRF1omUaQ_U*9q8p1ce8+Kkt_^YE~=M97$gTF<{G58fij=|p{CU4C6PRA~$^reuO6xm`67wEQp3`%J>nSs8 zYWinn9_{5kdNkzx>f6H$_(m){PBoC=2Fkdw&}zQQ{(gpcsf4B;F60*3Gj?lOcg z@GwL80Pn)^vXFBj!^=ZX{J(ZW9%IKcoJU(SkD2dEu(UvY{Qo{5<1xs@t%861yO{7o z6TqLA`O65xoFE~{rk}Z(^w#Ob28M4235o!JN-8oi`F|p=GQhA=%o7T|d&$L42!BBM z-~x|z_zQ>nx|e432~8k~dC$!~v3x@L(wm<7-djI=bH}jN8`f0~TT!uT^_GhDtAZWZ?Jw^x>}9A50?58J%DqOxk! z=3$ek4KFU6cKwV=Wz!Z-DVtJUR5pD6^s=HwWg`mLZ7nJ*99g`6>+rHoTkj~_yk`9^ zWh*K+mo48^S$5m#{LM}&VyZTc&7O4qlXflXh$t)`K77oG!jYrK6pkz!RlIUc z#i%i3R;;Kht{h#pqO!7LdC|(M!5p%)|M6MbaXUM)d_B;~j6~hSog6scO8J`(bQvtf zu|S_ClA6DMLv@vtUwOy+&3CMG@>i|jlE1oQ^J*vf&-m(1PX4A1l@-+$PJVUO)@q%* zV%?e*h^|_@X+zn@ipol85AjvYx2!5#yJkIWOkS7i8Zd_Is@9`0+NrKsj&L>N&6NqA z^VVdUH+AmpsikGp7Rs=8NaSUJS`yOr2L6s-ku1`ckR$#2M8Yu z?;R6XE#*6sv}5@tLHPm}vi?TkIe>rlRva&-yo76lxQ^w&9h5(Cj`Ew3F1oX`E)F_g x9z>F-%qQa~{N08)8M`2>4vqxnv1m*A6S*al{X0|nDR} float: @@ -301,20 +311,31 @@ def run_ref(q, k, v, *, is_causal: bool, sink: Optional[torch.Tensor] = None): # --------------------------------------------------------------------------- -@pytest.mark.parametrize("batch", [1, 2]) -@pytest.mark.parametrize("head_dim", [64, 128]) @pytest.mark.parametrize("is_causal", [False, True]) @pytest.mark.parametrize( - "hq,hk,sq,sk", + "head_dim,hq,hk,sq,sk,batch", [ - # Shapes from run.sh aligned tests: kv_head_num=4, gqa=16 - # → q_head_num = 4 * 16 = 64 - (8, 1, 128, 2048), # aligned (test_d64 / test_d128) - (8, 1, 130, 2048), # q unaligned: sq not mult of 128 - (8, 1, 128, 2300), # kv unaligned: sk not mult of 256 + # ----- Small shapes (cheap, GQA-light) --------------------------- + # Catch unaligned-sq / unaligned-sk corner cases without paying + # the cost of materializing the full [b, h, sq, sk] fp32 attn + # matrix in _ref_attn. + (64, 8, 1, 128, 2048, 1), # D64 aligned + (64, 8, 1, 128, 2048, 2), + (64, 8, 1, 130, 2048, 1), # D64 q-unaligned (sq not mult of 128) + (64, 8, 1, 128, 2300, 1), # D64 kv-unaligned (sk not mult of 256) + (128, 8, 1, 128, 2048, 1), # D128 aligned + (128, 8, 1, 128, 2048, 2), + (128, 8, 1, 130, 2048, 1), # D128 q-unaligned + (128, 8, 1, 128, 2300, 1), # D128 kv-unaligned + # ----- Large shapes aligned to run.sh perf_v4_d64 / perf_v4_d128 - + # Same memory pressure as test_fmha_fwd_f16_perf, batch=1 only + # because the reference path's fp32 attn matrix would otherwise + # exceed device memory (D64 batch=2 sq=sk=8192 → 32 GB). + (64, 64, 8, 8192, 8192, 1), # D64 perf-sized, aligned + (128, 64, 4, 4096, 4096, 1), # D128 perf-sized, aligned ], ) -def test_fmha_fwd_f16_correctness(batch, hq, hk, sq, sk, head_dim, is_causal): +def test_fmha_fwd_f16_correctness(head_dim, hq, hk, sq, sk, batch, is_causal): if get_gfx() not in ["gfx1250"]: return device = "cuda" @@ -348,12 +369,52 @@ def test_fmha_fwd_f16_correctness(batch, hq, hk, sq, sk, head_dim, is_causal): sink=sink, via="public", ) + + # Pinpoint NaN/Inf source: check kernel outputs BEFORE running the + # reference, so that a kernel-produced NaN is flagged independently + # of any reference-path issue. Move to fp32 CPU first to avoid the + # gfx1250 bf16 element-wise deadlock noted near the top of this file. + _ok = out_kernel.detach().float().cpu() + _ls = lse_asm.detach().float().cpu() + _shape_msg = ( + f"d={head_dim} causal={is_causal} b={batch} sq={sq} sk={sk}" + ) + assert not _ok.isnan().any().item(), ( + f"KERNEL out contains NaN [{_shape_msg}] -- kernel-side bug" + ) + assert not _ok.isinf().any().item(), ( + f"KERNEL out contains Inf [{_shape_msg}] -- kernel-side bug" + ) + assert not _ls.isnan().any().item(), ( + f"KERNEL lse contains NaN [{_shape_msg}] -- kernel-side bug" + ) + assert not _ls.isinf().any().item(), ( + f"KERNEL lse contains Inf [{_shape_msg}] -- kernel-side bug" + ) + out_ref, lse_ref = run_ref(q, k, v, is_causal=is_causal, sink=sink) + # Likewise sanity-check the reference before comparing. A NaN here + # indicates a reference-path issue (e.g. softmax underflow at a corner + # case the kernel handles correctly). + _or = out_ref.detach().float().cpu() + _lr = lse_ref.detach().float().cpu() + assert not _or.isnan().any().item(), ( + f"REFERENCE out contains NaN [{_shape_msg}] -- ref-path issue" + ) + assert not _or.isinf().any().item(), ( + f"REFERENCE out contains Inf [{_shape_msg}] -- ref-path issue" + ) + assert not _lr.isnan().any().item(), ( + f"REFERENCE lse contains NaN [{_shape_msg}] -- ref-path issue" + ) + assert not _lr.isinf().any().item(), ( + f"REFERENCE lse contains Inf [{_shape_msg}] -- ref-path issue" + ) + nrms_o = _nrms(out_kernel, out_ref) print( - f"[corr d={head_dim} causal={is_causal} b={batch} sq={sq} sk={sk}] " - f"nrms(out)={nrms_o:.3e}" + f"[corr {_shape_msg}] nrms(out)={nrms_o:.3e}" ) _cmp( @@ -569,8 +630,14 @@ def test_fmha_fwd_f16_perf(head_dim, is_causal): device = "cuda" torch.manual_seed(0) - # perf_d64 / perf_d128 in run.sh: batch=2 kv_head_num=8 gqa=8 -> hq=64 - sq, batch, hq, hk, sk = 8192, 2, 64, 8, 8192 + # Shapes aligned with run.sh perf_v?_d64 / perf_v?_d128: + # D64 : batch=2 kv_head_num=8 gqa=8 -> hq=64, hk=8, sq=sk=8192 + # D128 : batch=2 kv_head_num=4 gqa=16 -> hq=64, hk=4, sq=sk=4096 + # (D128 sq/sk is halved because per-head buffer doubles vs D64.) + if head_dim == 64: + sq, batch, hq, hk, sk = 8192, 2, 64, 8, 8192 + else: # head_dim == 128 + sq, batch, hq, hk, sk = 4096, 2, 64, 4, 4096 q, k, v = make_qkv_bshd( layout=2, sq=sq, From 48c01fdfa95aa91edb2e8d3315295e4ea14701eb Mon Sep 17 00:00:00 2001 From: tingchen Date: Sat, 16 May 2026 14:58:00 +0800 Subject: [PATCH 22/43] reformat --- op_tests/test_fmha_fwd_f16_asm.py | 66 +++++++++++++++---------------- 1 file changed, 31 insertions(+), 35 deletions(-) diff --git a/op_tests/test_fmha_fwd_f16_asm.py b/op_tests/test_fmha_fwd_f16_asm.py index a04daa036f..87bdea0286 100644 --- a/op_tests/test_fmha_fwd_f16_asm.py +++ b/op_tests/test_fmha_fwd_f16_asm.py @@ -319,10 +319,10 @@ def run_ref(q, k, v, *, is_causal: bool, sink: Optional[torch.Tensor] = None): # Catch unaligned-sq / unaligned-sk corner cases without paying # the cost of materializing the full [b, h, sq, sk] fp32 attn # matrix in _ref_attn. - (64, 8, 1, 128, 2048, 1), # D64 aligned - (64, 8, 1, 128, 2048, 2), - (64, 8, 1, 130, 2048, 1), # D64 q-unaligned (sq not mult of 128) - (64, 8, 1, 128, 2300, 1), # D64 kv-unaligned (sk not mult of 256) + (64, 8, 1, 128, 2048, 1), # D64 aligned + (64, 8, 1, 128, 2048, 2), + (64, 8, 1, 130, 2048, 1), # D64 q-unaligned (sq not mult of 128) + (64, 8, 1, 128, 2300, 1), # D64 kv-unaligned (sk not mult of 256) (128, 8, 1, 128, 2048, 1), # D128 aligned (128, 8, 1, 128, 2048, 2), (128, 8, 1, 130, 2048, 1), # D128 q-unaligned @@ -331,7 +331,7 @@ def run_ref(q, k, v, *, is_causal: bool, sink: Optional[torch.Tensor] = None): # Same memory pressure as test_fmha_fwd_f16_perf, batch=1 only # because the reference path's fp32 attn matrix would otherwise # exceed device memory (D64 batch=2 sq=sk=8192 → 32 GB). - (64, 64, 8, 8192, 8192, 1), # D64 perf-sized, aligned + (64, 64, 8, 8192, 8192, 1), # D64 perf-sized, aligned (128, 64, 4, 4096, 4096, 1), # D128 perf-sized, aligned ], ) @@ -376,21 +376,19 @@ def test_fmha_fwd_f16_correctness(head_dim, hq, hk, sq, sk, batch, is_causal): # gfx1250 bf16 element-wise deadlock noted near the top of this file. _ok = out_kernel.detach().float().cpu() _ls = lse_asm.detach().float().cpu() - _shape_msg = ( - f"d={head_dim} causal={is_causal} b={batch} sq={sq} sk={sk}" - ) - assert not _ok.isnan().any().item(), ( - f"KERNEL out contains NaN [{_shape_msg}] -- kernel-side bug" - ) - assert not _ok.isinf().any().item(), ( - f"KERNEL out contains Inf [{_shape_msg}] -- kernel-side bug" - ) - assert not _ls.isnan().any().item(), ( - f"KERNEL lse contains NaN [{_shape_msg}] -- kernel-side bug" - ) - assert not _ls.isinf().any().item(), ( - f"KERNEL lse contains Inf [{_shape_msg}] -- kernel-side bug" - ) + _shape_msg = f"d={head_dim} causal={is_causal} b={batch} sq={sq} sk={sk}" + assert ( + not _ok.isnan().any().item() + ), f"KERNEL out contains NaN [{_shape_msg}] -- kernel-side bug" + assert ( + not _ok.isinf().any().item() + ), f"KERNEL out contains Inf [{_shape_msg}] -- kernel-side bug" + assert ( + not _ls.isnan().any().item() + ), f"KERNEL lse contains NaN [{_shape_msg}] -- kernel-side bug" + assert ( + not _ls.isinf().any().item() + ), f"KERNEL lse contains Inf [{_shape_msg}] -- kernel-side bug" out_ref, lse_ref = run_ref(q, k, v, is_causal=is_causal, sink=sink) @@ -399,23 +397,21 @@ def test_fmha_fwd_f16_correctness(head_dim, hq, hk, sq, sk, batch, is_causal): # case the kernel handles correctly). _or = out_ref.detach().float().cpu() _lr = lse_ref.detach().float().cpu() - assert not _or.isnan().any().item(), ( - f"REFERENCE out contains NaN [{_shape_msg}] -- ref-path issue" - ) - assert not _or.isinf().any().item(), ( - f"REFERENCE out contains Inf [{_shape_msg}] -- ref-path issue" - ) - assert not _lr.isnan().any().item(), ( - f"REFERENCE lse contains NaN [{_shape_msg}] -- ref-path issue" - ) - assert not _lr.isinf().any().item(), ( - f"REFERENCE lse contains Inf [{_shape_msg}] -- ref-path issue" - ) + assert ( + not _or.isnan().any().item() + ), f"REFERENCE out contains NaN [{_shape_msg}] -- ref-path issue" + assert ( + not _or.isinf().any().item() + ), f"REFERENCE out contains Inf [{_shape_msg}] -- ref-path issue" + assert ( + not _lr.isnan().any().item() + ), f"REFERENCE lse contains NaN [{_shape_msg}] -- ref-path issue" + assert ( + not _lr.isinf().any().item() + ), f"REFERENCE lse contains Inf [{_shape_msg}] -- ref-path issue" nrms_o = _nrms(out_kernel, out_ref) - print( - f"[corr {_shape_msg}] nrms(out)={nrms_o:.3e}" - ) + print(f"[corr {_shape_msg}] nrms(out)={nrms_o:.3e}") _cmp( out_kernel, From c1dcc8c2ac192698d6c62346a32f9d7fdd9179b0 Mon Sep 17 00:00:00 2001 From: HaonanWang98 Date: Sun, 24 May 2026 10:01:31 +0000 Subject: [PATCH 23/43] fix .cu issue and replace .co --- csrc/py_itfs_cu/asm_fmha_fwd_f16.cu | 16 ++- ...FWD_D128_1TG_4W_32mx4_256nx1_rxy_brd_v8.co | Bin 67528 -> 0 bytes ...D128_1TG_4W_32mx4_256nx1_rxy_cas_brd_v8.co | Bin 81872 -> 0 bytes ...D64_1TG_4W_32mx4_256nx1_rxy_sink_brd_v8.co | Bin 56528 -> 0 bytes ...1TG_4W_32mx4_256nx1_rxy_sink_cas_brd_v8.co | Bin 71136 -> 0 bytes hsa/gfx1250/fmha_fwd_f16/fmha_fwd_f16.csv | 6 +- op_tests/test_fmha_fwd_f16_asm.py | 96 ++++++++++++++---- 7 files changed, 94 insertions(+), 24 deletions(-) delete mode 100755 hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_rxy_brd_v8.co delete mode 100755 hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_rxy_cas_brd_v8.co delete mode 100755 hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_rxy_sink_brd_v8.co delete mode 100755 hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_rxy_sink_cas_brd_v8.co diff --git a/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu b/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu index f1a8db965a..76ef048ac3 100644 --- a/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu +++ b/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu @@ -298,11 +298,21 @@ AITER_CTYPES_DEFINE_ENTRYPOINT_VOID( name, [&]() { return AiterAsmKernel(name, co_name); }); // ---- launch ------------------------------------------------------------ + // gdx = ceil(q_seq_len / sub_Q) is the total number of Q-tiles to compute. + // When s_opt bit1 (double_q) is set, each WG processes 2 Q-tiles internally, + // so launch_gdx must be halved. Mirrors poc_kl fmha_fwd_f16.cpp: + // int tg_div = (double_q != 0) ? 2 : 1; + // global_size_x = (q_tile_count + tg_div - 1) / tg_div * blockSizeX; + // The four shipped _brd v8 kernel binaries all support runtime double_q=1 + // (D64 _rxy_sink_brd / _rxy_sink_cas_brd, D128 _rxy_brd / _rxy_cas_brd). const int wv_tg = 4; const int bdx = (wv_tg == 4) ? 128 : 256; - const int gdx = (q_seq_len + sub_Q - 1) / sub_Q; // Q-tile count - const int gdy = q_head_num; - const int gdz = batch; + const int q_tile_count = (q_seq_len + sub_Q - 1) / sub_Q; + const bool double_q = (args.opt & 0x2) != 0; // bit1 of s_opt + const int tg_div = double_q ? 2 : 1; + const int gdx = (q_tile_count + tg_div - 1) / tg_div; + const int gdy = q_head_num; + const int gdz = batch; // All _rxy kernels use remap_xy=1: swap gdx↔gdy at launch so that // bid.x indexes heads and bid.y indexes Q-tiles. diff --git a/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_rxy_brd_v8.co b/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_rxy_brd_v8.co deleted file mode 100755 index 442a88cec2f5bc2c7c85d91b9be3c717dd402f8e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 67528 zcmeHw3wTu3wg1VPlZa>)5H-R86HLNG3=l$i3Ip;!5fOO^+9rX7hw?H6MT#dA42a0J ziW;en3EijUiydfVRE*QMTS6;tN_`|Wl1 zY|Zyg)>(V6z0TTeueJ7>Gkfh_F|}ZtnUWH;4EggJO-71)O%Qai8+=kaGnHkdjV}1# z&A7-&Lp|GSx?~u2nQ7PswESbSzNack{&%5UpkX(`*$yHYhA#I@x#7alp9R+tLaINu zqpTsfccFj?rr|mvGeQ@;7j8SVlkIeS-M_iNE-zvKI(t8n#xQO`JIZ;cf4mnz1_kC{ z60C9s(@@cZ>^ywdp)LS8>AES?=gc>Zd%jt`x@5)r;+)$`%hr^xTK~`kPvsPsEnok( z(R=GRbBdRiZzx_>SiE#;>H78i%1Vn%3hyjiSzd~ol9Do%6|P@bytMS|%U7*kQoQOL zIct|KTVGmkJe9M4<)+du|CDn_@v04_g}1F-Q}T?E6fW7YY*}epGV7-m>$PiE-PJMY zF8^zsyZx_mPCsMLomO<?R zQ@pzLjcX=P8=hY{?YbG03a2fcQaELJ?&!kd^QIT(Ei4?7yLw|@VeZKMH5-Q)mTkPN za7kH7;T@w3?WuETPc0~%Hh=cD-_KcBw(^eR^3u3Ktr8pml(T;My0XHhYd5SZzx5kA z>+f2yid!^It?W$Rb2U9&4iKdImer0pM{i+cF*l%k*WT#|`^ z-SI>hy+8_rZaL}orx&iT@Zekivgw}634iY9Lm5Bq-I#5SPo(uv)*53*3C`e8Jq;UG zZi}G%+}a+4<9N26Xj~YLb~lVJR$oPOelS#<5=*&176aPF`13}?_^>>d@$ePuDLZt? zFt!~E8%F*HPMpJ#N@yq&N zT|&=Nzairn^_NwC?$G6+n*?4T8pW>{VT1^aO@V2a!1K}scIhJU{H_Ac3kCWv64cF2#ql(f*ZYK*7vtq&Vt zukR7E_>G}J!EP=LxNw*YgD#xo!Xg*0b79DZ54teo!djz%_q{Obz9D_zgD%|>7ryPn z-x~!3yFXPuuzO~8W>HZntGIZ}mEE4IzOq{@-v8pKs{3Euif2T!tFte93ZK@pO5<>S z$an#tAJxZ9}pW*Z45Jwj7sPymRW7|hpq->R^qzSfNnFy_D&?EA7q**i#28?w~_Svvu@X4>{n zmb>+eUqP%u(G^4rAk$Oz7eR)sSeBxh9hW`&R74WOExqX25$w2wI(X%?EKfjUns(&W#1bgqvYw%*f zL%~)Y!OM6)BzRA^K78?Z_?yV`-0V<&EcOWS;f-i&OqsEHbB_xxC)8tarMb{@NEfrL zZYN_E*P9nxj?WnKdZuL}u3i1Qd7f#Q=UKB%TwkfmhP-ZO8OD`qT?;C$F0*=ct=`azoFjs&DAo ziVyUo-VeM>mr&OE=Wn^8H|l%0;`D(4d_eatmcI8s2453;H72DP*~ZMHCiE3DyE-8= z&9TrIb0q5l2N(wRQfZv5PYYiDN9uxcrHgxQ;W|61E<5Jj4^v8@c@8xC{vqQP*y_*g zBbUspj`W&e9lK;fH7AW&uZ0T2FYb?pN_W#oVx^-(Gz9ULEb$fCF`UH6DlH9Vj~W%K zH0r?bS$w_+Kl{G&vr>g0MA;AUsmJFKK2dz0!{>Q?epr7^>>6dS6JisTy#iOZJs~t< z=fv1VRR^fbg0Y~Y2}Xj--Y)?kZSRv~lU3d1tjVzVgZ1NMi{BymP zYNW)BTIfC&n*$rV(6pR6@DJboG$&?`uZYDeVj**wL!S(p0f)Xw7=;fKYTGbVk2q%P zzd63t2FIK)e)f;4RyXsSYp!P7fAkHhU|T}ACDd&-3>k0KryIvG#&5zN-l|u&WgMu7 zXt78}R&^vJyE;AVJC*6#|5};8{qtKtzy0%7p|R15(AdNHG~jatpGJI+;&Tk2xAAGh z=U4bN{_8 zxnD)hFx}Yf!?h1_`K#xRwb}ni5o^DSI>uIxQ5=UKKZmg*{wV8Z?z`VYOfsKC`AcIf zt~Chi@t;68hJAH z&g-zv_QWqVDoNMLP5;DM;Xx+Z}K z=ezfO)G&4jPzJasC@@kaaBoOpO+?`SsKDALfd}@@7kzvZ9;Iz*lLjU`7S-OXMp80$=2@5Xga3ZFSqo?`ZTx<+Vr(}%V^hMK0z+cl-{sk_5 z4dh(7pjFP33xyBMV<%h77eJqsrxtuPJ>}H&EZ6ibr~Ze4KI-Z}57%|B{y7#=SO27` zbM?>bhKz43{gXbL@ru$v=^LOc>R)(%Tj`(IjVa$&`Xqfr$}39$q;JGnP@jq?XsCZ) zAM4&q-`KsC{uuac`bN-D|KvlNsZX}$HT9{=UHz+aSN|-J!e%sm6n)Y3QShh!NgoA& z>Ywxtkdyjkc?0C6K6N?usmrNaJ<(-;$jwH?@r)?9$cVy2zb*6KU znK0+b_dH{Yid&I@z`a3%HAMpVhXmF}1RjVAtZNb|aU9nw1`FI16c{NIxHlxQCL(Zu zRA6nBK#AjWy*nWAjCrWI^$B^XIKIq{;{p!{1vV53JQ5Pv7!i0hD)3m7K#AkHR&iY5 z!Jxosk-)T6m+QKKz%%Bd^zjLK=r}$ecCX|36kIbVGA`5SbsQ&+ zj^n(p$#_r25zCukT) zcwN`MmA3IOkiG_TGA^*Z268ek=yJvdUCubd@>=lG^psQ6vs}}&oHkhidL75-;hJ%f`lPOO z94C#A)9&Jv4pG ztk_S~_XoUF+%byZA7(I)ztHCULx*i1Ol*^%mtT3~!W%1Z4Bfb+bKh$uelPyKb*R>Q z65k61PhY&ZvI^e`w_8?^jLNM0v$kee;SJa8=5<`RlJ$C~iFZn77k*ER@jL5BtiwC7 zNkLv`z5WsCrcT;+=&z)G&j;EM`JFUYHPxj>{r0N1^$puwdA-!o%Il{uwWT$F%DHsX z(NI?t?>bXnwZ`m%{w5u*1m1`EIKj67?@Rm|!S4aykN8BvtAGcHPZE49@C@SD3cd|^ zf8tXGzZdwG#Ha5vrmAOB$Z*1E^h8<9Afc%Qekt**1m6PuGU8VYeh=`=i4PUL3V1s4 z;eu}keg*Lnf^P%fi}*;v?*-nQ_^4gR4(L?r0cGb(hM(>@_cCaEco5^03EfyTMOP02 z&mwLMz6*FZ@#_SC82A9<*9-m#@PWi{5d2Z#gNWZKcs20B#ODkC81SozFKo4k2Qe13 z$rjfp9|BIBY;kRJ7jW8Si))h)1E)>4xHkC+aN1;xYm<)xr%krFHdzgvHre9Z?ys|;LSus^Xt74jh zX%*7}jf=&mH(!7^_|UDjNXGka;8zpBS#Sq<4)MUo8AK4Yt#1t z*EYR<%DI=}2<8RGxXqBu>)ZdAdbse&wnLc5oH@!yPB=Fx*y7Ap@T7C2f=0zW1=V~DR8 zoHjL<_y)mgQ{#x=nP^i9b`9Mo*fsD3y9SY5 zl0n;4NZH5<2M>fQPVvQa%slDbqo7fN7tktBZB@{!*rs4w#l3)Dn;OqCzDvfOHgyg0 zFA7eZnn3(+!D&+yiB||tn+g)I6r46SiTFK&)21d9-zqq5>RRIWCfZbjU1Q7>>>7B2 zT?0?BYv2iX4LrfFfhX8CaBbIKn>trAcx~!j$)IftUkzY0C!B{AY;krec+z=TL8Iak z1XZqRK{2IL|5A;ykb5N#}6f@u{$2K3rg0mt~uGUl|Y z>xh3vaN5*t;$Ib@vjL^o0?1fDZyz|HxmCwqD>{(HO4%_ zu7M}mHSh$x2A*Koz!U5mc!FI6*LLl-sdFWR*QU;u4BDo+Fo<#LX9~7BuPAuZ`MH8d z#Zd*#idPl1Dqd4Ct>Se+uT9P47(XpzPMexf{GSA;O)Vh)jNr7Xg~b0^aN5*O#J?pt zZE6wme-WHEwV3#K1gA~iO#HivHkDx481n?X2A*Koz!U5mc!FI6Pq1s?33d%!+qKuG z&Xo*an>trA==WEIn~lM*`NwUt?gq@XAIBn?!LJRu`FmCsVBT$ST3Z3ffANI14RFf+ zy;64q7JOxF>OTO^`=`aJY~OFvd#P+c;D!%U*?ws8iBz^9b$gFAwx3veSsL3sr%gco!m73)?1FUo>wilHUP4(cL1`lyf^zw+1Xdh&c0H1_LaQYSGK== z_$#Sw-*3`uscb*shBsB;hZc+Mqiz@3CsvB=(;pJquYcmQG`7EDq_oeNF6{@+m-fS# zOZzb!r2V9OrTxrDrTq=xkoLW&zM}4XRq2~b*WH*#9v4(aX-mx>dQhjDbg>lk{G` ze5cp&VcqEak73Oo`}`Pad)S%I$L@6tXrYJn9=>d+M@FC;Co>Cl}Ybpuf_d=H%fz59OKm3@0Dg`MA!qXE~#B zJsQ{9c7Zb%*JD9Hz@F`l$MtxW547hv)MHQJJzs7PUj}#?U^-wrU@yR4FCPy}|C+-+ z?SYQ;u{nIHJ=l?cqMbd&k-i=eUvB3((qHhghq?W24qsvCy8Q)TjH~qbc(}Kn@AkJj zO#MiIo5R$P^tU-o{YZb0hp8XwZ*!RX;n;qf<)^)Un&sC%FafqQ0X8+rp6dj09mMru zd!92H*OOs)SJ?}kDY%}3@*(z3&NN(4!}ZnnVrK@fXW%-=zQvh^>sg>5YJb5gz;yx2 zhuKBWY+TRA^>BNMGY8jmaGh(HICF757xW|SWzIZY&qH~hy~0_5>jk(TY2WJHgzKAd zoo}yl7UOy`=ttRWl7S5;xEl%`j`02@i6^M{G~Ze|B`+-hv{F^&*m`wOZs^{ zO#hO8Hizk7($D5F^&<&UZkJL!_*6H{?jNo zZT-_IxBl(A!*#6ZVC}2&J;ZOTz4b`j-StQU_XY*l6balP5?C7%cpxgUu1TP*N5VDU zjqMHy+!GWSDH6ChB(NqTaDP-_ZIeJ*k0jS;&4V^$d_o?~TS>i_MWF(c)T?=HT1 zapj_$tV2sPp75+ocGrlxYYVzzUFR)WJHfR;+P1jHf$L04P|kHV2G*q*`(zCjetkXS zzsB^Bw%0iUDLWh#*x)x-zwRGrT4U@orv~l0?u2WJ46IEl5}G4@K+|`kX^pkZosg6@ zMg$(Uz&kM2^pCUeaGpkc%B9yIMTO>AU$hTgYg*&&jZTx4z10z*&1FMP zi!nYVWepL5M_Rx;eWdAU%&$Rv_8IG1`^-n5qe9bo8QQ0hFfGmnO;UFBa+LKNWm=pQ zo<=(vgR(vxk9}Cb|ut~PXn%O@4u+}FiG|?h~heHAzynRST8}<`x zYy0fOnyiS>B=jXJWsOY&k9zyCAMM#stjFyW!#W~EXgaOuXYg#T#tAaVu$HJu+IL#d zmfIfde*47GXDzE&MlH8jp~eq)VrAFlf`(DyWqU*>hDe}cW)@vZ|3V0ZzZHjK5qeGK%oKiUku z(X=MpRSs=^@cE|yT6-IQvlM+QU&DH;D?vASp=nLAx8rwA*AM82SYJhXhkVZTPqlZb zXNHr&>5qJ#IN{nKeP%w^Q>B9s{gKZXSKyj{N5B2pwKqkFwN|vX!23Qw{k9kA=wI~D zxh}s5)|$4ncCdE3&pycL zmESAx$F5ytU3H&*kkKo@SKb9KKdi0pvkz;2hz zpc9g^+K9jdwC!QA?V0v26~`%;UdI>}n!0SXAGXxAX4#K8O;UD{ww(*xw(V+%ZLn@# z;yCI#_j7)sPtsQdQnn{3FhV_N!-navjN|Mx*1Ag^7n;4Cd$iAnq^u?)a6fHhAZ(j{ z%Q((HV|~2DaiOW@+@t-sNy-k;HU`7i>Bo%YG6vsPW5RWq#Ic@U#b!g=bh;mm)9eS< z;Hx-YB<(xh55{Ts1MBwt>_g0ocCsJOpqF!gH*yL3 z#1Nx9EBAi1V}Fr%An_f29)K~+U2a+h_U?rEP8-jIjbCR!uHv*BC&;VfJNi5j?ecCj zt=aY-XHv>9_LAIIk&J`d^ngRx%S6YVn}oK-d7jrT*~E1W|(&*?b8c+Yzw-y$CGbKnz<1Dx-) zzcb#GCvq|3@qQS5fd0-oPWv_IBgK#70h#Gzx!_0t<{YQTMazgB4i)RY^3$)itc>;K zhx`r|>%Fp*x0aQ0pYkF1L&bfs?DSVHE8~8gA7ZOlcCW0Q1LFK(V_w-==arZ7pFEK} z5|95{ey_X>K}Vj*D~ZQ{Eq{W%UVd);*YbPi)x5lNb6)Vu$~j^)?x~*j|0vJ%w9oOd z&-wN~Cm>~qg900dV2s9p(X4{)P58osg6@Mg$(c z8to@knAS~pt(r&Z)B5{BRA`P(K>Nw}nf^ugx1A;_dpig1g7=u#V*7xaKPa<$RzW;9 zK*MKxj$9pKW!>(kk-s^J- z#*uRgapY8}xui(=b-Ev%OV|(OVf5uQe`ou_IfVT{&W1eepJ6}vtgrMBxz)4&8OqJM zgzb?VBG3BL{BDDx z+VpIUEp2no-KL*y=fcK0XUxN#!RLB47LbY0^^DgGTz)r!AKNcR`=PLh$@e0*yZn{! zV4fI;@+tS@J15|@FPYd)W6%-PZJ&mAeC8*AMUOdxa?rVm z$WDH9!H@GQ=O50Kj1ips=Gsqu#K5EWL?pJa@ICsSPBQ8_Fx?h|V zynWGpllzq12hJVjiF_e7cO>^ozgtW03+EEv1Gz`ho^kxGJ;)dD?d$5~V*F=3Y+8nK{4MN{wKo8Wx|$IvpY@n)(yy;&|i|?1%4}LG1kJuUl9oFFOzQlK;g3n|@%s(u{ zwhwzEQWxPf7yVi33%e}>$ z^NoyaZeKm$5JwJKU%ppjyY~7P6n-^??e&fG4*Q1uGd1rN34NQsvAn&$asFZ7kh9j8 z@0vo=KA~?cL)%l16Xzf`PUs8gpoq|)@i=iVA|K?*_2qk~CXuC0Us&E=UpOaG7s#zs zbJ94B-{S9@mM_&xYr^luy`J;=-vP8<@Tob8{YLIxU(QLtV*QIg|8EFi_W9FOhov?1 z`e!~hAE~i|T%3=7OCGQK{J&@Y8$SOZ3IE;aKTh~d#B|qpln)}NzeoCC`}}`q{kuN@ z2ZVp{`9CE5lTXb@XZLdsS)x%rfvoh=G?oO=`uIaLa7aU`Fz%8`7^ZH_me zk~X!OWBiefIc@50;vWl6n{tT%RdCu=1@V~Rw5bsBl!uYCJn1NHs*<=*aN5)s;;Dku zrtTqr-ox>{Z~A9~T?0?BYv2iX4LrfFfhX8C@C3UCo?zF&wOxB{>Rid-wW)I@gSIKj zr`+V`Q*L(iDIGVTGUP};VD#vC)!kkU1Q7>>>7B2T?0?BYv2iX z4LrfFfhX8CaBbIKn>trAcx~!j$)Ig2s%&MGn@_pf&8KwSe9Dk>*o{+?Pg&(iKIJyY zn@>rb+Q~7#LdKjn^$)~*3r?GQfOsFlX;WV!-cNAa)Puw`1gA|sMEpv@X;Zt1X9-T5 zdYJftM4L*mYm9k0d(%#;0$O!d1u!XaUN1_8ymZ9pEEZ-JJFz+SiSFr(coK9pn8ApL@^Z{g?mj1B>No_crgl_LnCt z-uKQw^+>$|?~IS3?d~y4`pv%q@ApB6xw|e8_00Pm3CMo19PiW*Mx;KPk8<{l_hCP7 zz*`KJiZy-ed5z4{-vA^sO`Dk5|Pm|C$Ql5q1dIi5vv0txQ zEJypJlw|a3Y+w?CD?_?=g zWYCj}59!+LUr^dN5VqGp@}Yb+gPvD>iiEyR|5)B$|Ja^mT|4NVelheXB< zyZge@4sZXp*#BLB>5yzC3D_qGx|=&|lK? zUQ1tn&m!OT@1L;P{+>%OSM44iIYQC1f3@fz*QC=Q?)=jSioPP{W95hUcE2Q*?Y@+8 zc`AQPWPj?=AJWtA7kx+UzWB$lS+w_+uZq3j{$KA|-vqrL$NAqB{&&4E{438B{`Xxf z{2v-{c`E7kc#)p`Z+_tw#rL*fzp3KI7gPVDv#>igS@`RxaGC+iwjMqA zKj(=|&@jK;J2$AaCqkK(-^@hbHge8u5Pv9BBV^(p-z(S#x#xP1Z}#=KE12V*Q&IC< z@%v+tk8!=1amj1pm%C(sQ`6%Kd=m07j>gN;PpyZTx7>{@jpUE= z)W^AK?V%Mqgv2c*8C2<1g6 zUjn!U`M#8^X3>-VSP!}*B`7aJ{VjH#6O=ZMgs3~Z40X#;_j&t!j*7{Pp6^#AG{;t; zZUySTU>|f?2Ku*eMcJ*mN1o#DaqnsgFv2 za&uU6laGfPyEqQJ7cG$(DfEY>&&^@3KaWb=214*UB6?^Jb4{_1vDBA}l|tJnI)NNm zTihh=j*5Pchq;zm$5`r#@zRiXZRBS;L z9y;#s#$2i5uFxjP-y}Q}3?&W`Xrr-5wtPTd1d#?8h_S>e5CWr?v*wEj0^b~YrV32WsN`E(lWfzB&C-DZ)c{#?kL9g6iIkn!r@_OadHpe)x z*R!hmVscMxjsx3SX}^N}7*)16D6l4kx)ADawU0V#ep&fszapvIzXf$$ zV2`)iuQ?$ptBnXeP=&fGv|D8#Q*pd;<@5cbQdhSPb=y$4+J4h%lCp!_QMMgrYwWjG z96z@59Tmq7)Z;ge-Qpw7VftJ^%Ju{WM#M*&!}K{Fzw1`QHW5dQgl4b!&hapPF(hR* z5rO-~cbdcWMIFDR&=1>3g{D??)EuUdHc8n5(M@xhzNzDP1N5Wfx6pK6ZZ~dIPu{p) zhj~QBUE$a1e(1Oz<$Qv;-PwNVxZQwxm+{s(!+z*^-H3UZ?TgM(ZXK`dF#j@McUEp4 zucLfULAxEWrya1(we~NZow(i!+gfKg#p87Y^g_A^Q1<}pZnxiY9>n#7xGuAQ6_3}A z&;#jqp>7xWt+#*UJc8>-Kv!-z$K!Pr@-hxrqplit8|>dYyK%i6*LT>zkH_l<$_Ki~ zQTI6b-D&?(#cT1U<6#xs-FVmt85xJgmzu+j?czJlVfw0$*T*0u>BNVc!}L|@|M4(= zQ^#xSlyO+}(Hy34O8=X~^hF)7sYB9#%s#wmD?+) zim8yxE3a2R6+59Xubf`F7^ju3VIK>|YQ2|5Z8x0xitht{&I8OL@SJ7K+=5J$tGpTH z78p454Dme5oPb+?VEQ-O$DM$b#m{oP1?!b>`Mzo0WxuCvvM2ds9WrgQNNA6I9yDJ# zWLlf-KRY2Qi=X+%+@GQsO#c_{4^*xi3U^PVyN^wEHn#m|nLhqlaZppP(04;p7C&Q-`6D-d$Mn;8=`-vz&tjv`M1{6-0opT9f;Lr(2kA+(*=Gjj;kI6-EW$0qGNt!L&AuszSJQ+rmpvU+7~ zlClInGiQMP#-0^^@17MUFPnbHZb`^#F2eYfyoAl8?UN33+Sp&7fv5MZC`SFVpCYft zHY&Vl+|WN(Ag`^`HY=FhrpAWn%^{ay3EHgqxoK^&tqR)ut;kcm$4;wY4gvWqU&A{5 zrJ%d@b55MfEK!4;s!1eZu57+)e}vRxCk|0{!!5(9!4Uhs+Bg zKc45O_pB&E4g>v>Yy7>o&OIv>Pv}zbS)t|k%FA2;#S^+?E`V2lue{6wAU~c5$Q%H# z{9N--9@w+OulKBIPkyfTkMqEKdFK0R`5DuRYZ)~^udH6#xUQeP)OmyMo)zusmd_OA z#j^NZNP0AB@xN2hW;R zdscL^AKbG-=^t{dJuA*oZpLS}=UJAF&u1z(<23t?JuCcb&x%gU%{?pFU!Ikz;yZG^ zmSfDy{>!v>+UXVEJ&9=J<*@O8uzRUEt;PxR>OCt~pk4XzOzQ#LFYz6V3u6!qTL<;ZCVf7feP=w6&qpK583@yJm6~<`u$EHE-w|A%te9DO(-W$|-BIlpQDWo|Ls^UO`o zCq+VgxE%Mr<4@wtAt{TWIm-D>`!VMe_8a?Fs6CLPLfd!;+Hd+u{JKfXj&fepzRY=q z{pFdSDwkMjIxRQzh-Iwa^Xoif;yeqK^U0a^gYyad!Sh4az7?J92j>y?gJ+GZ=lwJ6 z2cP$q{vo$|-k0{B)<5SHw&$6q>UqDDa&sPGf3a^xAMRTb!gy`#?(=`e&R5UhI}l%K z`#WL#e8y)Vc|Iz0glW?cU~Fld5B}Bkv+XX}IOmN=FmLeLUX2CMAa(bxsCN1720ylc z9PQV_9=4q4Q~Oqszw#YChEr_vnJKzV5vpX>-Fn&DXwto=q zIVVu|2%o1QJNfMbKhCY3e>hh%MsV(BN9LhDpMliA z6+*wi9CSN-`*7yN>enCW=d*&+zLpUD4%~tA2e0&5cLMGj;8Xin(7)r~O@vv<<60N)Tft{C&OafP;rGWq5vhyt znT!6c^abDG+(I5aPndHH=|I1i^Ctb9^B?^?D(!0sQNN$_8+FC_K;3YTp-!-Gg_>iU zgkJVTfX^~sP&X_``vZ&{)Dd+~op7$9E_i;inrozeLSGDNyC)!ZZTiCbM#eQazE2Wy zo_);uhV9zxTTuAb5VqGh&O7WI&rMd}V}!m<--@Jto4#@WVc&Sxvf8&o+9&iaByCSQ zPMm|(IH50U-wL5W<8k6#L_R#7RV|SNEWQA-11x-wMUw zos+!#A0+R0!1;)Lv2TUiM=5##gXH}VI3JNW&&lrWz7>i;^nR*+D|8?Az7>i;bl=&1 zE6AT`a(8Cm3i9Te-JFy39uLWV)O$K`PKxtJT-SR%B=7B@_jTYr73UAz)cZWJ+}lU* zz7^z;eJj+yN~hbmg1mW7IOn9~aZDaJ&Pj3Jko{DB)P2&tll#VbO7~Cq=hMD#g`YVj zVhFBSYA;3J4HZ{1^HGji0tHvNBj{`K}L z;@js^2AvakE@hBB%g@dMkDqT3n@q54;0bmOJi)GkYrFQ^)DYVGBC&ni_0`007M$nX z=MewA;5^@cDDgtUdA|MG%HXxBb0veesgTOa{p=iYj^}8O@9i?amB7akUoZF;;A4qz z5d0qCKfu-6r46Sf%x5m)21d8 zuMqr`$l$f9b0veesi+&LK062evva`J_dVG84BGiaV(0X&nZzF!oW3=S_@jc;w`}5% z?J~4Y$-WhzodeDoF^9T&qE(kS5dT`MF6R<|s#TXa68}bGoGM`1mxYFL>N?_I5u9;q zHu0|t&Ny{F@vz{ZL|60H)_c2acH{e2Jo{un* zG}e|g??L#r=^xA4zkt-Y>7UNKh&=tI;zPRj`WKY;4TSCWPv>FOJpH`ne+Ye>{uN34 zHvMCJj&<$Rl7|tJ_HFvZa`q=8^=RaH8gJ-R?^{9J z&zLIb#1C5dL8{ufVhY!gJzG!vA6_bF#*ewPM_PQ}3h04r<$3Q(Tjky}AM2II``z%A z+;7o$^A$b&TZ8_Rp7*+P{(Gt9J7U9$RJOnN(aTi38^4pQ=-Izo^pEQq=?|lBm$U09 zR*D?cACj}{uYcmQH2#*z{?wsAq^I3qG4d64MtR2c*VLKzgXX`P%KHvq{+`-bLXRVJ zJIH^`2H`*HUg1CUQQ?2XH!e%lxgZh9PkQq2J@pmE_o~u2)fwvJwtkRGzqFs~q3HGa zFxTUB^6T7?TH3eEza#l|j>vB4pSd5WlV9hGP=1xOcN+P1&PWaP&peRR$*=Q9C_m{> zr+=;c)5)!USTD5vgz8w*viAbB5IKt-9b3L!v@cLx?u}xyOL!r{@UvUCj|fTdVe5 z5PH>*4Un&#^0_|Mh`v*w%;VF%Hn=)7gvV||%E&m&F{{IT#(5(k^%2U(YtZZsN}1ZX zLi5w}L=^KE+mS}gUnKOkgeXgpQO^qvcm_~;)k1q(9mezWF!!rGYT|25U@we3E`uM&Tlm~IIO}#tM zG~oUn?pqPdti+~&Mw`7WAXlgNu7He;?QQn1fNYFu@jWVB8=+1ZOXKCRH~Mh{?iurz zqYw1!M)F7bDfh0xeLA&w1@DU(mAqd>eD4a51?o?=Z$y0W3XVxTeE2&w;~QMqn)6%X(R-XquHp-MqM9!tn6L!2KKHHnqvb|HxPAw?ePxFK>zk& zlnur``q>jy&U4)x@5tU2sOPhFPt^B(xjB3p;AMazlp`J-j7WVn9p&jL?*-WFWzXIf zZ!D2GDfEY>&&}bU_CO~pZ5s%|>xk%~Iee)-IG+0)fxc9{6xv473FNScI8D;-sOaZ- z_;NcZp8H$_J*nJhY1c-6L)sn;NPS!RVMl5{gWkM3(7O@$Rcw{^3H=EQuLi>8{^%GR zLHuX?BB4#_56jRlp+7pt)?gk`F;?2O=}$=7o^m{N%&o=TsA8_rC&(Wa-tCTuj=j4v zU#i$EviXML zoLpSz!Zxq8r^R!jYoQO)<)JPQbeZ-HCm+}OxX!X?#dDz}ke6|BH0nm9F550}#^QP` z_zkdU$8(`;C?Dv?qi#It2HJB}F0}Yqb2!}|rt&rEgSDK)K`TDi9KOQNb;PG&|8`zH z7rG8Il1_Z|R;p&$*C~vDPcQSJwFREqw!YUfI2}#-D2`ALzWY zGj4n3jX%@U?m_F7-z%?rmW6y?x%HY?ue|D67V>%J_R6VZE9COZ>y=N%0?5K=F}sj_h55 zdgMUt79VL2)8_(GwkIesB0ka_rqA*DQ_Z9A!Zz{jStK-j#dnT}>5CyLtBDBQFTT?p zrZ2L+)(_i9g{D??)EuUdHc8n5(M@xhz8QZ`hJIB17Mjk>?Z)j!=*b(m>oAX~xGVfR z-47kNqnu9=w>#Sp9k&}We=yz}XV?!NuNyHBvwhJS%B|yd9p+!g>(0up<8_qJDQGtf z_B0E&In@4wQ-JFN*w!$+C?2mHpcm53M%`@G4Y!v#b8tNe*SU5{JYF|K52Txmy1C#t z!d~Xg!}UDS<=HFZ@j4258HX33ZUO2>+P6A4;rb?A=i95|@w$QXfo?JC7K7g?dyR_M z;!DTFDz>}Nw2hFFaaeq*In3BDzSA70uj=;~#~>r=#D|*0^i}Eq@i2W;$7|}8aai=x z9Hwtd|C__~MIEoHL(+*pn#1%->HqOC^{L}Eb;vmEmE9|=j@Q&7>AbRgW!3ST@-e=8 zW%tUe<2B_YomY0RtU6vR`9SBDo$=Z$ua4K0k@4CqzgJ!rQy`yLZm*mwWOJQv2-sz(Ih82d?>DBm@ZA;_rzgf*+1bef+&|li-bnpg*egA!T)jl*Qkj z2LwMHl=}F)@FKw*3CTn0L(0xt4)vQ1bi&*)y*G+?FBI>dC*D0yynC5=_b_2jn%m6NF?tY!Tc4yzle7~5$`@B%rVmYfq3`U@b2fp95cN)!m0K| z;Jw=01A+Y8-SYtDdjAEE@ux*r+R~>*R&BH1eIRI4dfx}{{tn*#9K8EB(5Cc$4c9JbCib`SIRdc+%?}cyF#dV~2My2f%c{Mbnx!oK%3Qj zHhA;4$)~+MZSrY1FB|3U8F2B{WUzl=X0%;dRO}Fll{~?(Py9U_o4c|?VWz!by@0t zYdf^wwN2#1wMpc|wMFE^H9`yVzKib(xV}R1dD^|FOUrc*HF%dqnutrowJqewH7n%D zwJ6`jyPVVU`3Ub*PDjJ{8KUPl;zCxUDBP6QC&$K&fVB7*OY zN_~6{Mw8&RgrJvqB2pG#HxUp#LI|4p8i}Cb`-`MLz6OH+ACj`&gl8j%(g%JoQ}09o z)jJWv8v=qK2?`$nO}9w!V}$W{BDhxXL;xX2{P(|z;D@79AOB6SN$^HO(91g!DXTN2 zEdHBlK=8vssgM7zStNKPA$cf$NLgJ-%FbAhgc$RN*eZX+r9Ica;l>!e3)eoh8}ATR zY-op8`w)MtrOopd4 zF$Q|p`r*4|6=Pglz6)mG`JPwnhwto3!*}v3#<(YG1aBlH52X(&s|!il8Ox!+pYS_|{~FUj+Fs}Qu^)+loN0}*%bY&A z?mN-6#@gi$zmo^1n*MS29Zp}A2d*`(@%BciAFlgPH?3>zO%C53WL{_bC)k^v0Ln9M z)0${IP6n>CuQ#or9ddZDf!GsllD)<0kMe;xn$~2yO7S0zJ;AQEw>ej$eDFfknqqHP zJcnQ(q^b4}^&1zzAN0K1^z(PPo}lY_m1*&Jw@Yz-*-+Es?{56Aoj%g^^LMt(P@X=* zwD>#Q<+$!O%Cz{q8oz6@&-`629p&sZe@D9l*X%QO!FvU;#}{?c3+3!HbwU39vF8_c z(HrILKXpN#nb_xuy3p@pKMk_dmOd?d)i#?6y$`wBw5HiRl}+Wqrl#8uI9VvqDKxDa z_Jhi%hQX$0+Pj==ln+~KTC?m&luhNrrfj>~8G!QK<)&3&?^b;Bz~?&qac3aP^KLV( z+4dgACm(#SxBt-@g!25grZvZoC_baX=LY*pXE4e~uQ#o^_FhFl7W6mTUw5uT`Pe&6 zi{};bT`c{JenB6h&a+`(^bz`+_7U3GK-d?3gubSIg!VNU_C+6|-)SEqpCRBwAEDoA zy^v21_)stOL9G|^83sPo3w==Qh4i_gr(WoTS}(NaPovzl>rbQH+Qu1A$HTsOp4AYH z$M`Rr7SF93kMaqXrl03kU5)Yy6{f}Wg(jeU@_nYC=LzMYJa~_3@jRg*%BMYK`gwj3 ztMq;9n!ZlmvR(XL ztjm|Oa!*G6F7|J!PxPC=wQ9{|n{n>ZVG z_3LxKhUWsnx!-W<;3K1f^BjyZfIRDBEFjOU7zfC6DtxI{8s9tidd}y62he)K=RcS4 zVmUW47H}@2&nkO{-7}`pcNs(IyNnss3w@h5}&}`~USW_V1;?^i#fz{d?(e!h4c2@_n0U&#QM9W8C*7V_M$>@N9SWexn^){T_hl zqpSBMgM<&yCs*%D2DQEi*geSQi+v^?uszX#ws+Uh-tOT%-o)q4`q==T5}@zL)A zI-=3<0Xm|IzbBa~{o+}{>Yc?*_dUtXR{ikYV)cHb9a^oQI#+i**Oz>FPOo}T0zJpy zlel{38M)LY&%#yr1g+K&&$m_YNn9G9O-mY{JFE4>Ggp;7?eNk1>4-+_rz0Bm4ghEV z;vGQ50K|9t_&L!5!S@HHK7KZIk>GWNpqF_K_}Quf!4C(e zK7KxGk>HJlI^v`C(-DosnB0b(wWh*Xqp|bxoU19_IO$nR;GVv(ida?b^!GO)OYO~ z%DUi!E&XHJ)gi0{&WH@C&K`jJE!kDss0*P^_uDjjziRYLF}`YC@TwV0F}h))ag|}^ zn#40w40GH#JEL;I{R3hHs*Pc$kx`ksH8Yl39qNyq{{fkmxUMuJDMseladynuThIFi zq0h{*S1H_pvxkC-c+7Z8(Pd?3hN2;<)ADRFMh`#j{hr7DMvt*8jXLmo7N76c$BggS zQ?epr7^ z>>9K&j0v#`XcLP8--)&<*dJkMtG=_Uv$L{8pUeJwWp*U{K4^Xi>Dg{#Y@%wH{a|KQ z=7b450k>vOoVb(aZaw&QO9{q;iY^!l?(C!V`c!=;)CK9;S372_c1jnsW{ECrdne1?dd06GR-ot# zA_Y6E;<|V_u8Y~R*{Xha)@peYnSQ_m!-4}2k zfL(UW2&A1reK*wacmVkO8EHS+0f_o8KapJQFec9Q>(iX{`Caho>ZBN#yyknci)D0S z?hw8+KIcn!zdNS$dy@5{FP-~YTQB?4`8~<{nJ?X4!;{YatgSbE>D=qt`gdPC*Q{8- z^rgFNc+$CM#d_D5&i$ROKlsu=B>a;v-Ce_z-a`B%U;0VHzxvXf@V&0-hd$$%fbaa+ zXS@UWtCxJne**sIr#|CXfXzSm8NUYn?dv|{H-Nu?%V+!-;2;0PXEXyI|4*OsF5r9r zQj1barM~Z>aoSuW6Q67N{=nB9$TQtW03dQzxa$d0N?zs&v+B? z?eF`Hw*Y@}$Y;C_NIm{LAochQK70I5gnnR=w2sYmLWdZeDIN9vh+q@Jlq>X~|^ zo~g%byq9I(eX^duUfrsveP91fPrt)+a(jCE_;;;(is6iuzju2&&Ri8^^IKnSGK>J> zuQ=}lHXB9};qMLO{o_LO=rN&r^lhPe6m3ZJDB7HO|MWC5oN-`~z5_lJAkRk+n@;%R z?T3a5Xt^c`5&)) z^Vr2t{Bh}9(d*JD3|+Byb?MNh#bqlt6t7u6w0z~7ARu!%;EnDth z4_&=7Z`iP*%T})_E?jnJN#U~L`9{vr^(%@?O3T&{ojh%Le&Mw1W=txaws1<}l;OFf z3y05}UYNJAa76Cvjd_K+BlFj699~$q@vg!pWhI4ojLuna6s%lxTWQ(Yz@+P@1PThK z1afn8hUE+k3|vuOzHa^4p+lFiEMKutB20L{@T?;SFO5Z_0{Xj*4|pWv^)ou zficBPm*kBYv8-g-u)LDtqsNR{Rx)OI$&&oM(W7$nN=laIkIch@ZG4f~n3z$^Jnf95}H6vc4fCwWrFatBgMdT)T5E|ru5V_wZVNgI(1O-$?Jl%r>A`;^T zabn1JYsO?W=uWa^GbZfN8cfEBn~95x#@(Ix@BTaPZuTesNmg;7|M#u$R8u{}-Tk*8 zvpi4poww?%I$u@2_0_5BKHo$qOq=NDM(Te| zrLD1HLH>eFd8ep3u?vEWpqzV_!G z)1~^Hb*YtBss86xYS4L=8g^c#Mx9rw$a$3-e_o{~pI51A=T&OPd6k-TUZobCSEdx~jwf?+HZ8(=w=j+xt$xTkPV7$`zmbGi{zH`aC<+t3i zeD(SzD^_k;zHA9snZM;D!e8s<3+q-|Vd<)sx2$gUqZa8baG~!2 zu339~P&cyPQdqoJFTG>=`(wvV957_b#Oo)IT{3b0_$A{93>>y(z??}-2F+h`&A>Z0 z3|ca9@Q~FT1}s^-;l3p|FI~6frnSqK+%s&6H(}QF3DcHLoICxxpZ2|T?aF(Wu3w%u z>V(zK_xrB9<<7NBZeDZu>h-sMukX71?zm~qs`tl#;hXloeOXQ4yH~GVv1aWZx@}8N zG%n|_`rf^6`MM>`?n4V#-n?Yp`lUDD{u-Qr+7dMXFZv$G0b>3gD! z?p(WuPXcS!F1cgPvgJ811=fC(()glkJ^Utv|^t30+s6^+u+GAd9fr!f^_PePe)w0b|R?X)oF)2 z9BaGX!qrwX6EQa@$K_B+nj{c*<`a<}tb?YdNVt+y`K z^U2oe(e|6avOnZJyT6-LS=$}?Mb39>mD5n0>-@I1W2@LJ?9Z6<54G*fzOugq^v0sD zPo1Ujm7>Qfm%1YJl*kM6M7C-r^1_Qmx|fIyT`KaT)*>%%%aIcsyE$9*eR6T1ucV4% zQREaoRJ5h2tTI;AZA+{$mbX8Qyqvt)D|);qRo2Fx@7A`BsXstaok|?>dEL=)Y9aE~gv%vPV%FuFD+seG$r(qRi6NQt%;_GbuIc z5tKc?bkd|9F{cjoy@Ag!@p%)UgZR9K&)?$ntJ*24DVjr5Q&UkUl>**@vN=;@Q+ISr zmF$WE6efyyl@yo6zEbktvXVr}!zHnj?bIhny_BcRl{{8dUNm*;4#+Ji66WT*tjYO?7eIU8nVV&n_=-y59v3LFVQ@hk9fCdtL!9UIAX^<;99GxM1_lcF*sc+3p1H8Ag5> zxK*oI@r4&|p4kccolaoRkpj#S)3#K8<->HwF8b9Oo8y!?Qx3W4ub6w0A9M43g|@hZ znGe~+ao8_q&hgs3NQeJmU(oNg3a%BdOPb}Cr2I!fr3{@{p=0hJbKVA5|E@N1`J7#e zj&paVE}yrHi$tp9d@V6A9!bTPS8$G`mJf}Q2r^fSWv+n7m?Tc|^5wCTp+jS3P7UgN z1)rZ_p8ZtMv*jA^L)zcqQ;W}je3JOQiqC8K{H*rc)U}$gqf?_bUx78ZN5@9*7?T>K z^B{FvBo)y*kwirE{c_Y}_&zQ*PUnp)9tXbfs~wdZrTGr2`7St>BiPA1nO_*H{lxjt z?K92Kj^{r=FHJf1wIg4w8IyAM)V3Zt=bn`FQf>ce9lG6B<3Kc;f9-g(06nH)(_>@ z`R-hqXMd5aTD#X?do|1cMW|1dWhq&f(v&qa=Dc5$3c9U)}Q6ZC@?#_R!+;{I67&EiPJIw%GZ{+8pP@A1~gvcq=~T zOHvEUV&L~WE$m2RdKBX~y z4*Q(=Ddx*M{fL79bYDgKn^EJ%j?fIV0U0c0jhcs{Gg5- zf{s%CcjpfCcH=&~c^~+=E6sIV;l}Pz&*+t*ZhEflGYY@@Br+ZkZywXy8Z z3+&$c=CEj+HwXH(edn7!aosIF=e|~!xBpV;wU2#e|0R$M$|}o>wqf0$H)~hX{MoyT zwk;|z+E!dX7;|Pw@n4t4=Dk}MoBv)}?8e`e#Rlyyn}azuI99a1cvf*NRaEpa=R{ud zh}Z?Gk()`{qQ^#;&xOtq>dZc&)2eu4?84NfKxb;XS!;)l+O%oTh@U((GB$Ga=u~YP zs&L?ww=d`O(Re&7!M8VGpHEw6TV|QTg9ew)nm4O#R&3Vxv#~wLd2|1X`!)RrTclP6 zj+U21M7Ay!+0O8g&*Bc!kl;`kQc)MO6?GvmF}j=2LHB3pplj-<9p84G1nq#Vq#ck| zv;%TaOk{OJH;DXD_Xn=gmYM>Q`ywKfr6Lc+ zMAjul9!!d?ZxDG%_lI1ccOAMvzF-}W^Yi`eldyBs!9*LkV-Ci;2-g+3uFB!_G17C~ zjxSV+P6e)=7v#FCaBbM=Ckmek8~v&_v%*F{(PnK}1^U%FYs0xp>aWUqBD^P8>aWh3 z6@F!Y)SblCj)(I6eyvpx^n zRX8hb^b=@{(NCa$qfh(i1^QL6bN;*&b{?NE^)Nkk{6zXZ^e649Mm-3=inVtk^jD8YrG5Sf=&;F-=67{qHsb2>>*`G|WgPrV8lg|D$>Fj@|*P|YzPdkl1 z(~UmU&uZN{n7M|Wu}ln4d)7L2u4|5)vF6C0)ETe+R-!;;Wkh6EsmMJsk<|&2dy^t- z8bpd8$F=ssB6mkbCQ3zC#za;nMD9t7tZon~eq63A3PhfB9olbw!8)`bUlI6mkq06o z>qw6S${Qr3jKnv6FT%Gysl|;LchAr3H=(>Z}h97LqEcLXfyo+%kr9j zL8k|PM5hOSM7ImR&*&%67Neg){q!T$PoRGK5$adLPWlC=SHVvD1(Qy{VAAPFm|l%~ zj6Ur&`b;nQb;g|l@3Q@;*<#r~H% z2X^K<%Io@^ft|T7Qok-|mhOM**JCW$pSn)yu>W~|xXlUu`Zg!@52JpgUk@GjKkK2* z>`#{EHTzSi2mP1_X_YXM#+&+~1hspHgueaDgoVLtk znPr9y8d5fU{_L{ZvDw?tZeJt*d(mRG|Aq^n!@fZ9l0}texxpDh%5BB{SZV@zaE$wX zoExOvPr1iTZJ*FG?N{4dra8ajx;OSvZVt~-bKVJG_ypPtjQwV3;LZl;<~Ut|yBK^y zj?)#mtHBrMV9%2CPJzJ}*w7KUqrtGD6L2SB_E)xT;P_=i{S@c1NByxlmq@uy{x~g<`~03ap`WS4mIeiQ^*qlBF z8w{s6kIlXfKiiyr8$v(Zx*uzlKSOims6SK7CV!Te$Nkw_I?*{=y3x5>s^~l|^P=-1 zozMwR4Z;`?GGmVM91J|zV2tMw;2{QMJcj}gH5lVL40xEq7|-Fr!wtrGjsPBEFvfEv z@JL{rQ#P;3DVx{il+9~$%H}mWW%HVxvUyET*}NvF46kiYWy=PeQ`xe?a4M!ba@5De zoAy&YBu=@<{Rg#lqBtzperk)BD!Nt6yy!!aHm62mj7OO<$9P@~e67J4&(Xl64aRtm z0Ul#8#xnvOF&N`H7I>_|7|(IQ;|#`lUI%;~u+1r(*W{GVYjVowH92MTnw+wEO-|Xo zCZ}v(lT(J*Hm9;>gUzXI*# zP5x_I9`}Eyr4v1%r5k-+OBMaOmU+=%K-!#|hB2OI#vJ2$J@EAgV?3t=Pd6Cjc?0kb z24g&D0M9TO<2e&}rokA`S-`Ul#(2&Ko(*hs%H}mWW%HVxvUyET*}NvFY+jR7Hm}Jk zo7d!&;kC`FY}sIQDqA)fPVomv^i#jqvdMp2%j5pvY3W1{Y3WAa(Naa<)iN*o9;D5w zIT+(PX3R03bAjg?jPaZYJkMZ^=X~J#24g%I0532YMZk*;#(3Tcd?T>U zDVx{il+9~$%H}mWW%HVxvUyET*}NvFY+jR7hSxTyvSowKschM>=&NdfwK{R;l)tF5 z%=}KJu&nrzV*PuRlJ{Kv?j#q#KPhtYo2%d~iocWKZ%^E@IT^oCnSlMzG3tI2g07rV zmE+7s>!O}*vs7WWzEn-JQ`W&mNm*|uM z^Bk+`D#4q9d5+a|wcrPVI}rC1Tn@~0tfm2iw*d1TtLYlSTY-6w)ihY}L%=-8Y8v`P z`rIO$HkfmZY}#P>@EFFY2z{fbNMAh;%ss#I1U~`HJ-@nM@RPvY^Q#*K?*!(aU(FQ! z6fpPvYPR5Az})kzxq_bt=AK{8Kf#B`Fg{_VH3gh}9GIMJ3OM-$Fge*2aPmoDag(wt*u(+0zbY})W&b&j>^VmZfB3uU|? z0Oo#Q-6+@x=6+v&Rd5uT`+c=Ua15CHeYI3@88G+z>Sn>4fw|vT%LP9O%>BN))r+r|X(@|iX(=lM1(`CRmr#A!JoPH44aC+PL?Ave<>jKa7ve^%p z9Q-dh$GRkF2gaEDT(wfhoSfo*U)?U4oZ^08-65Eq;(lMP5ll{Tzpw5ROippXuht1B zr?}r&cMB$`xZhXzW^$?z(+saM<{7*O&fqn02CsoLcnzGvYv2rC0~=o3oXVCBHm9;> zgW*(6KbxQDIhMsK?sL_BGUntI_xtL8!Q>S8`|1I~p!E20p2CsoLcnzGvYv2rC1849WID^-~hSxTyvSowKschL` zIQ50ju`Et;pQ|30F(;?E-&c%QPI14lz9E>L;(lK}E|{F+eqTK)n4IE% zUp*z5oZ^08J)Oy^3|?c*Gk6W0!E4|QUISMH8 zr_Spf%iUqKB6!-h;dzqZd;5EiPgV(?ryavwTHE;&6firjwoWW~g!)u#U*|NdrRJLp|occoN zSQe+|aExD&F(;?y5`SMXImL4<^^#z6YCiE_3MQv`j-`Gen4ID{mijBf(`OTCqH#xoE z4UgQY9)K+Jp25pvz2EI~<4;sMP+K5J-G5YVg&hA#$J`x|)4nw#_v?^zzP~7! z<-3mkB$wrT%=}X>%lBJ)G?(Rv-qkjb<;Rq@&tv&Xk9W*t`5T@cn8)(-e=sDEkw}8~OX*Kle83d5Pa^l9!u4KdQ&M{Nhe| zJ31Zfb0Tf=yPFp7#rwmp-ua8S@LsXj-I(V2`hWXC^OWVwFaBL0_L-zDes?{}9Ywia zDEG8ZUzFy~xt25IajxYCp?&dwW3H|pv~~Z}{rX+KsTMfYH+p;%Fra5BY6FC4Sq&g4UcvXRFJ$pRKML)A0DN2g7f*+T8`R3uFOg0c1DG zZf~{TT?APK9QKC!CAcoZb!Tsc-xJq8aoxom<@d&QZ|HXQM*Dqm-3RFfUc~Q<>%O=y z^v3!9aor!+-MsPsKwJ-mes^!8KM2=@kY40X_J`nl2(F90ss1or55skdH_ab`>k-iJ z;Z65P;d&I(dwMf`_G3HXc5gMt+e5a8%!kZ}>i<5zkEgZ83cjH|TwNW7CbBxrAAoc$y1ZH%*jq`i%C z_K&ppNSys6?QM*+e>k>ZX8XyvFSGq-4vYp@MuStmyjgw(*AZOz_U8EGa6JyZyULsA zkH__Rr1$X__!DtG5!Y9Hi~PyBo{a0h-eP|$uBSr3pZ7I?8m^}yy}wuLPsjCiTo3SW z@@L?B2CfHs%luimo(27DycPZ&T+c!JAn#Ux9cyCq@9g%_KURhNSys5?QD#*U!;bp-5^rlg_G;^u7jL$zF-}U@6rYD?dSYA$6nGgostf=gzx`&(T$7B z7A{cxpY8pu^{#yI-eT~+OKZGqycqBE@clHyIlgz3gE3x)biV85;9V)_MR~6e-_9Nk zU+ad4d3X8+l6D{>vM%i0@m_eOtA=}P{VJ5_yHI?;&B6OprJ{4NGjzI)an%TKy&sdb z`h>_s3UwDuaKj_Ld;Awro_3jc;*z3sxC_b`T<5A$-Uh!x(mw2pv~H7Jb*;D2XBoT$ z-C4g6j(l*b<|yiIceNX)PcM+PeG!q#b|~BKDp%3R=fKCY&3JFR^IWvKRCEqp0iE{! zTt(j>leD^o$b(I&JAben=2}pN@@zBSweCC@ZBB|#eS4J8zs6NuBN`;_PzR)S9O^2r z6)&KijKSK@9FG_APMK}9BV}p|MDDX~R^Tej;yv!pFXH{Pi0C9sMIMNWth3vYi!y8{ z-uLeOBHj~Bh)zaZl9E>6Ao7sihCL|HcH$lJ&Z#}ZjdSVem+;K3#|bv4@P1pV+~chK zSq0_sE_&w_+H7pKZ8Ub<{hW((Y&YIp@63169JDCRZ4%=aQAC2>)XRaF_>uvU53OJ1S$vZn}XE&7T zIonm^ymFsh?|q>gzRuf<-{wny>bZt@{JKN0_k354_qO5p`oTQlJj6SHw71V!-0%c% zyMC594$S$;=ZvEP|2Svn;vK(y)WiA6=Z-6J&3VUp`+2}Ot%vvj$hCq$hQge;9ihkh z#rZias4s!{ALqg@a=qJUAw54kL638e^KiD|FXtU>q)kP~LSfEFJ~!B7a5Uf!-skW9 zB5btnx9$Br;5FX$@BAWcwC%U;ofp)H_x(G+i1!z5`{xIBRB>+Lea2JSf84GE`$gM+ zt_8M@hPSq@wrzYrQqK*vF9mySTQh76I81J&PG_54Uo+eEyhnNZ4DMrBf!ln~wYeW( zk958>s{QyuS)+?Eetm9q)kJTHUm$6f5s_8oa9?nElJ}_g_aG3Lzew=Ma`~~sjqEpGW$IRK7q*WzE?jbjN zg4>+8^y6$ZVm62$7oBRZJ!bwkNZMX=3u3uT6^an%ej;hT4n$?0L> z^i1zL?aO%|mf<_f^yj@%X4pDc&GIV!AweuA-e&~vGu!*Fe-!>--v{&0#Bp-qH?Kmu z5%;=kj#uT|F`M|@!{;Hhe$dzJd!l^NV^hoL2L8Se<_gyku5-o@(BJc3hyjxJ_kA%Z z=m)sonfXqC&pHtwB<=6}V-9e>bB#0en(L9SkK+NGImZT~KF)8hab{eMjfg9veZ6fz z=e4nwzMl0V=7{$7w(YFj*h;@o`w)*r`+eJX&R1hA{eHSW_*UC?+g7dt>H5Gi+ji#J z_R{~ePQ)`w`+sAOu};J~N&A0ee}=tweS!Zs_S^QFx@^0-F4(qmjo5^H>Sz7` zkmq^wa}@YF*L%?~khB94k#&7AMx*X`)jV&H&;39-%IaqnJ{yb9!BHqby37sF_g?m6 zl2)G(dFX1CA02ho092%O{mEs(T*5s^u*ZD#H|;2_H*o`zmaN=4_uAe0|+m(1muq}3%v z9^~3)<}ue2wi_`xx^PdB6rK7ZC_mypnb!@Hc8F`5naf;5*j~iz==>t~0*>gMwcRg( zS29+gbiN$p$hCwRaX$1~QY!VGbvwA0upNjE(uL3bXWI_0A#4ZYh{&`4Iktn(`nvyN zw|>??N4vR}usq_G$g{qbKV$!M4Pko`+oTJh^CK9qiI2PC8@<2Q&)!qvU&;Mx;69)4 z*+#@Z>C9()a(X((mfW22fE#AnS>QO=j5$~{_*}2Y0ygovp8k4XP~QU7$MTC%z90B7 z?jiX0png4fuuk+x`uIn%=Y*X24Hwa9dmVIx@_8u7XMWbN^|40K4$iTTp;r2te^`L^ z`VjEUJo9t@kAsip{1}hAMAEDZwL1Sx?c1-_XEw_)w~^CD_A$;%ILMCdApjogKGxsN9-BBW;AbC^LB9U zNY@X)%>Fg);+kN$#njupP0icDwS#pchK*i3nzzZExixPK*Am_X@ow~5(!6cW+j88# z4{UGVHoGmRUaloN_A<)uz`bOjapX6<5yP-k=puh0#((mYu5z3szqu0e3=1HUS9J~2 zr#>BU9CL%~$WD~!GZ6P9)Pw$>LC~A_Eu4oyPLE?>E#<2zk-v8c(r0`Jds)a?-*Xku zmpH%E`x4Qw(Q^rNb{O>LeBV{2QhwiX;CX+EeJ$jIzj77Nm)H*OOXkA2#-N9HO)D-B z9gXstEQ0liX$VESJ0W=qK67zC>;A&r;M&4E5G$w)pS!3B{YtKzoZnpkIKPupzKRn0 zd$_)_ujn7xH(X=bCx|no*O&&;uh#d$oTa~D-!L8J_tI~$kJ#tz6RtJv3&bYsqSqQJ zpV1aa%I+?ZycTWYdL!dHu!~u5h!NMQ3(u8U?qqF?NPSh5CuPo8p@t>oVW(*aY9?T1|>xQoX3f45$i$hrY<~pY7kpm zw1w#>YYW#T_66cS={0F2#&6M&T@}hzc@6mfxZ|s#@JEp9^^jhZ*lxsw>cTbYpP2tf zDEz-E-wK8EbNlBt^7_{yy&mbYf?ZsX{*`sS6AJ%3^WP7J|AX@HL*XNozlBc^=8m3& z@admW|DQwQKQRB}Q20-jzYB#wrTl$JuScw3>tj7SN`1XnAYc1-NX!S$0nT^MPx^M+ zujdzh`{#fB;{}INuI&#(;SV62eiRCSNc>7juSvT8U`_f5rvFVS-1?7&x?kD`{nCVf z;d;b+5#Oo{*CX~z+n`_CqF>l2ZP6#}gC_I=*CW=A7+76+HpM<^8}v!rpikPOPuK@d z=mTRn;$(HMcwTj~HkvkZ zO=8`M$JK@BR~-A3wb8VRYf`#y_;rr^$=Ybz#C0lNKe)+pXB(Mrx6!nT>l5oojIS;{ z+iE_Jrj2Y9*Cf`B7+_s^zD0i9<7nDw+Qc;}T{moRK8~i1rcI`9a@rn0u2ZIcrak|i zF}@b#cZ!QIl;15f#@APYziKezYkdv)YX&2}))L?)1|z;!DR8O5h_AI2c&WjNuXPjf zO$H;r*3G~-1E*s;<>Wa0ZRQ*ka|t%^x0!Pdh7J5}<~)O81Am)2-(c9l-)1f_7&h>? znF|eu4g789B7LttBXbLLoMq^nWoEp(=QzuOmm7@u zS}TB87>xK@w*cQ_Fyd?73Vf@4eKyH=$660&DpAq8=?O2a7UT?-6<9Rpm-3DVk?*YEYV2tOz!1o%A z@!SBs!C;K%eZcn_jPcwEywPBc=l#I<1KXUkc}-5)ye6k?UXxQcugNK!*W{GVYjVow zH92K?ZF4GHHrSlXmJNneG0l~YL5#0W{$`DR|3NKdez}&LeTnf^?n{iXt$s#~FK}uT z#(0w%bByN$zz-OV@$`XxgE5{_;HbeE&lqsbV2o!OaGAjv&&|M_4aRsr2>c+h%_*DL z(&8ckJU^pf3Ms5sZd~FJ1eEC6) zub3|}zBc-_&n>`P490kF1>R~f#`7WIhYZGeZUf$C zFvjy?;D-&ycy0&YZZO945#UFFZBE&|CZ}v(lT$XY$tj!HzV|*Q;XJTp)~70ei4JTp*#BbYJ1cxIsX3ucTjo*AfD1vADM z&kWSh1T)4L&kWS-f*IqBX9nsQC(bPx+iDKdnqCkc#`xknR#WhfF=Kr39IGjK$Cxp` zc#hQ+ykpE5Up&WZ3f?hhj4z&JH3jb&GsYLsv6_N+j2YvL=U7d6$Jm@(WYY$7ZjntJ z%sCcgd@V!Ys5fLR7~_k3e)Xnc#`xl%U%e%mF}}FxSHBX>7+>7;t6vLdj4$r_)!zwb zj4$r_)jNV2$rxYcWK+ON#`q#9o4`rKhiuwl_>fH-{;SThM&W(_Rp?vwzKr*^z})Yv z-v}NJ%>BOld%9N2zr^f-?oW2g&aN5KV$fgY@en2*DkeC!-@Eq$B zIhQ1-xX)D|%b1f>-0!P@6HHEVzps8Ln4IE%U;Q7!i2@lDem{x{}oJ5alfzr zD43k$eqa4(CZ{rZjWN&QHE;&6firjwoWX103|<3g@EX|g+U8WYY_K_%EgKA{V!{5f zOv`gT$FexZeXjaU#+;nueqVhqn4IE%UmX)nPI14lQi91T?)O!W{JkA=iu-*P5=>5U zzpruylT+O9s|%iVOe{jq&kSAzXYd+0gV(?ryavwTHE;&6firjwYU~-E4ef1T=N3IP6!-h8jbL($`+aq}U~-E4 zeRYLka*F$X)ghBp8N9}rXYd+0gV(?ryavwTHE;&6firjwYLi$);(lLs7EDfYzpuIqCa1XHSA~MfDem`GcfsTo z_xq|?FgeBjzUq<5sSI9Y%rkfmoWX103|<3g@ESOS*T5OP1~$C5Ih8FNY))m%2E(Z@ zbdF_lYA46Imy9_%#d9omm0)s;=UD1$!Q>Rru~a|7vbdqCQi==Geghk7S#Uqs5+QJ$=Q ztcUhh^?FU$Q!4r`+Q;;hwU6aF*44c}>Y74(Vp6_EdzjAlBqYB@d-&T#J?_13>dN0H zQtxE#NlN)sju-3E5cRq4&GrD7+?@ynlMY(A@dpuO4so=jhnz?JQ3&EPY+Lzw*65spTpB zebwOaWRr(?{j>1@zW;2S%iqwm9}?&XzT3-tm*vX6AMVg8m-ls|N9ql)DpyrP3H_vGMfv_9KcjrK7f0_VfMzyFigkLG-?=ix(bF3)AT zZxnXO9|`Z5{^DIlzOQ^o_zna;F{uA^_P-qPyPEtC z>OY9c894C(MOyAzV1Flz?erPmC}uN?o#sy<24siH)gJ-nZ(&#Vz_yIvzix0*T)qOaRA z81@aKeZib!+u5HlVofl08QUDGqk@t)($BKbYp~99-6)X!1a0FrbSfi~R%P2})`=vZ z1z3(c#{N>#ucky=hK*)jsKYaW_Uod1#y-pgH#ko9So>KnA$6az4~_q?!Md;gc3|77 z`q23KB%gcWW9?Xpqwe3BM?XyN)}h^Vk#EOD9CZ7ly!mDf&_4c-zj=OD;0Kbl2Y#aDT27MbKf)LwjwY|6d*}DtoyIu>=tZu?qf&Hr2zXBIlLbTUiG25C;z$TsCXo zENs^u|6gxq+`+!ls(+Pve;D@BuXl7Ve>eVCtD=_;w_z9kSzDw({Z?ZS`Z+x(VGsRi zIvwpa{gATL1HV$w`jOr|-r{H0eH{3mQ^jATUi!Y!YaTQ4Gwc3yk$P)e`_GEa+CB%d00!gTf6mY1S;aB)o_}8Ph}Z?G zk^Bi>tmv`P<;E8#7J`SX7_`&3Gybyprc(? z3!m%CeCQoqhV*5~U+mTR5h+toiM&HAkhcPPU-f?CYoEOEwXUV2bNE)|-HN=gdHZ~( zLI1sn zL4PBzH=@2dD+X66gEt(*DLc=WO~Jn9m>ZtN+iu&Me#V7;^tHC_wyo)BTh0yW*|yuZrk`nP zAM|Y7>5pxD)6cWyJ#=mRZF{wkfqk~!=3Q&sUXC$2XxnYuY5L8!*S61aj(*;}ORd+B zmEesdahZH@#|L*-dT%2(i%zSIh^&esFNVC^yhFZTUsgWXwN&!zrddjK8kOI?VE==V-U_*ELvw>95b$ZsV_$d`>~R z?cmdPaC43K5B?5Z?*O;%^cvFsx(@w9y+@JvDDv*|KJp*K^<%hR>-|&OU)Q4_sP_c& zo4$eAZx{0J_Wsqcz;y+#@A3XU?XT--AM~C<-ZQB0 zUhhA&zm~alB(8mX;1BCzBmJ<;Nq;oPIVYw4N8;>HPf3EYi%F&Y}@ItZF`Nsrj7L1w*9uf^w+e{w%fK-`&8Iv z+iTmWeJA?Mw$rwYep+)4F;wZRO&rx5u6S0TM})Z!Fn)zIfM2yZ{D{-?-ro-Vwh*(hfvK)-A@n@{51!s{6c8G$-4!Uc3WOPL_(!!LLH+Yx`Zb z(fflRleGGTNF6V=^mR9UzxO8{Cy9134vd3%7D>@L{56y>{h6yad7t_XlJ?;eq}}uj zS3Tez^;rh-QuVw04)WpOb-dI$FS%jP(E>@^7ZI79gR+dP!8yvf6>Kx&rG|C9RM9yw z7dni!!MPigw7P^y9b)XzhC#^~UjZjiJ?^O43_37pG} zvnOM)HmuJy&|b#MFg807!DE9NtTK{5EEHvw}CM%*iOVt z4eNNRqLa~k1h&*JsfiW!DPQ*(M>v*Z6bJqRL_!WAbV6%>)B<0V#pBcA;(_G)F(7V+-fi!>s@!W8W$Yx@q4N;$fia#|A@0l;@1iL8qKu2g`N(yEu^x0i zj5~o?4+~Ha=OfpFg}COt{*WTtN+ne+q~A%n50!F zMCy2{cY@mwdu^kPOGLXEH%0q#(WzO1@^}6(S8ey&MH?hd$4gxcZa?C+kFpHnrHUU% zKA+RMzHm-*t`RH;&c&ThJ;-!jz-?Mf9Ef}-4|Kh40UVhY$ht21h_2Br|y^h*X>v4j;CSK~TD7XIK zT=l3I7XOYP%bA#|_ki2q@VaQf&HHg~nV6|7QSP4qnFkiTiaIG`GfIgr1Ld?{(&tHvsL0`Z%&&+xHeAbDWscD~oC*}d? zJl8ohx49f;>eI%mel*oc^^+TYvu)889g>F-$|Vy0?;Z`;ngjji;wePoWr$0Bg()Xw9gMYPcw{7J*kgg9rvu$UdZ7vjQzH~rY_rVt_`-WTqj21p89!zg*?;qnPL<8`IOf?S|Di$A|mTn zVT?9??y6m0pJ)zvI38v7vk2CbQqeiM3FZBqQ21%DZ!{)p^$C%O?m&423Q-kaf4!D) zUYk8ZQgjacD8Ko_Q23kPz-WV{eYhIsVxf?F#v7#95ZcVW0At%Z(BX4E$8kM4$2nXe zY5O7~lU(1-{AFA_mPgD~y`Gec&VltPf6wn_F2^LTE+O(D*Echdxt_4yh?%P6Zzn~k z{vMRy_?gV>21z@_bv?&bl33 zPuLE`Ox5wX&$b<0N7xR;OqJ*Tb8H8n_jUinZvDJ}j&^fBVR^(%mFImaf5!gjI>PoM zW@=}~OpRf@wzdg{zvT_l&)?hOU&;L);69)6*+#@n4KqF+IsGWcmfU>om>XuMqEwmxUDT zO~*_{IX?HZeyxvZYTCg$W@4sp2(Vrs0-ovl5%}26@Ufg9<(MDj`PPrIM}XY+JBgX9 z>*jpIvo-75j(I>oy8~+i{l}w0`NvS6YXWUg@OcWhv%V)#AJW*9O*`j+q+R!Fgw5rs}#mM_{|%F4|$YgL?v9FZ!H&g68dN-VUx6tQ#>?^;*%q zUCrCUHG}mdW~yE@nzyTYJGgeF>xW-v|C)AjO|aWy>TTYp=565G!8#E$Rj(b*+hor1 zo418)3GacJsd_DG-nQm#VSKvuePDa@w%Khl^>QsS=ls)fFTK}zOn$Gzn5kn7(n z*FVnhq?E6sM16a>zOk?9AJ{isW7sE%nX1>A2GN&ToS3up7wj9Rqx@d_4fYZHoPENz zhJAsUsS=-C%4f92k+QoBB(FtVxZcRP4(x2!8)C#v)iJtR4)<=+wusbMMR~Heaou6t z5HnT&?t|#JXj`e2Z_zfcKWrOfrs|m8F)5$XHm2d;ryM7)L3*6f79BHH^jowgDRrN6 zoVXUT9>h%5@w*#DzeQV^ezI}mn#8_9%v8N5-H-8mW@zY?F;m%Y#7xz3yw4OfRgV?y zI@_43tXt1P`1G@hnacXLKGvht#Z2XV=ltZezrF|O7kvAfVy5c)gEh&Hz1ck0Cf6g@ zi8%>+IPNnMyH%*LDrrT|_ zW2Ulx#7xyOz0VXgm31R#s*dB`d>osP8`q?C-LU;sZ8U8%bvJJt*D2FJ)1Lp%n5kjB z_Yp??62=!~%+$`noef63(k{SV3`V@tuE1RlM!eDj-~xjYue1=j&|t(X?FQV*3>(@5w>KCzbO7#PFl@*N&Nmn~TnT)o!LR`f z_d6X8h7Fy7I{};60*qIhEgOEdo%`>xhq7sdi8aBPsYU3UA~RmybDUz}VuKN{v;?@s zV8ko!0o=o2#4GIy+|yvhEA0i`%V5MS?G4=9V8koE3ivAEG#_At=A;=f*q}LSFl^AA zG#EB$P8tjwG$##)4Vse%!v@VsgJFZ_BygG!S+pU|hb-D4u_ivn_ULF3GquSdr{9%4 z?q8>+6CJOm8|HNe*Zr{gif2Ak8zV1v!+W3a*I^fB0AbNU!; zusMAUHrSj#1{-WnAA=2s)9H9jS+pS?k12~bNQ|#f{TZ4wM}wHDO+n1m<3Y?+CpstK z)Lbo9RAQ#)MKfZiq8)=U#)Hh5V>|}~4>lO%IRto!!5Gh>z(Wnjcn$*|W-!KcIPh?T zF`grUM;MIp90@!U*yfbYYjVowH92MTnw+wEO-|XoCZ}v(lT$XY$tlBYn^W1c!RA!9 zY%rXP1)Rc5T8LlS6vRwD?mwup6D`-$jY`Z^6_uE&dC`oRso>NojPWQl<`~awfv+_f z<2f35w80q9F~DOC#&|}6BL-tU#{!Qv7~?q(c$~o)&+CA%1GYJ3^O~Hpc}-5)ye6k? zUXxQcugNK!*W{GVYjVo)+U8WYY_K_%EgKA{@bwwEbTo*W+7!f0Js!kNb)qw&&j}(4aRs*0iI$o z#&at0RD&^|9&bIRs5Ic4*joU(aMPT9OBr)*x6Q#P;3DVx{il;O3_schL`b1GXl z7)~VvPQ9vSQxG%tcn~wyi5}4DZd78Xs;ICF`l!5X9L@uvUyET*}NvFY+jR7Hm}Jk zo7d!&&1-VX<~2EGcx`hkTQ=C7%9ag=Q~UuW{nW1mPQ9(=@gQcZ6Fn4g>K!drRAQ#) zMKfZif>U!a#&gV=V?5^q&ovn1IS+WA!5Gi^!1E2pcrE~5U@*pWA@D+jF`kQn7a5H4 zyb<_DV4G7mugNK!*W{GVYjVowH92MTnw+wEO-|XoCZ`OqZBAv&2Afmavf;}fGc^oO zso`>t#h9r)Gf*Q1GiEB!4AixP88ek<25O98#!Tgzff_5AF;jVFpso|ln5jH7P!j|* zW-8AN)T9&V7K|0z4rxujM29g`d5+a|m0-q98M10Flh>v7*`T<~@(>}1x=_s(x=@_uh=`vuO z)0=^9PCp22IBnuFWzz-|k13lrNQ|#9c#ajs_#&sc&s8gB%*iS4_tou!$tmvl)g6M# zDem{x8o}fg_xtKD!Q>S8`)ZwFa*F$Xb+=%0iu-+aZziWQc#Sd7;5BdtuYogo4V=Mi z;0#^^XYd->@Y?27wrsFDl`R_#r(*gU{b=yFf6nn7%iQ{3;XErQ7@?)TM0nVibtHO4%H*T5OP2F~C$ za0ah|Gk6W0!E0c{YnxNqvccw5wrnt*`aalfyg&g4`EuQBEsyavwTHE;&6firjw zoWX103|<2pUfZ0?mJK$ivSoweR5I{W=XH){acUaJ_?t53RW=zDem{xw*`|^ z-0!QnU~-E4ef6wha*F$X^&P?F6!-h;dBNlq_xtL5nVibtHO4%H*T5OP2F~C$a0ah| zGk6W0!E0c{YnxNqvccw5wrnt*`a;#d9q6l3;R*=UD15 z1(Q=e$5KBKOiu9}OZ}B#a*F3z>PLdfDV}4gA7^qZgVz}I3|<3g@ESOS*T5OP2F~C$ za0ah|4XK$%IR1luOObC=pEdGbjF$63d#6)5h+(siFyw0K>DhFAT;9LO%s=Ha9pygcefupvn#=nRy{m1WiFa4= z3gQfrCn@FYD3O1VcCdYHFWX~ceN>?@Sx1{gW!~&^!L1KYW?_}*s zO8HZcmx&FMK>z9(P@>4k9W+o?)RGir(7K~6+bRw+&uDdz;faL@O#_lrDLX|9~h^B z_a3`N?mgw{j(NP_%;)8P3x7OB>vR5Bp}o}Sy}Hl+B$xGGbN5lrOjETVcF!ZiJY5g%}V#lP%+vf4UH$2-ukH2GQduq@g>XY|Z4i?@QPI@<&eD5_^ z_&(s4PjpOE9q$PCGp-2hAAYygKlUN1f67x*|IF{T&ogmII9}9e{hcPft?RvN`3Krx zjNI}k?H|17+iHC?K8!PRCi`CoKh1bh|4jCq_$1^ZN zGX}|->^Ct>s?q<{Ka>5Y@6TknUNd0xnflxGwdq6pFcT**`9{k%r|u1`IfvyP6mpSq zsc8%2P%|Aqzp_B`C1&aySYsr8&yG&V`a~qJni9Hu8AsC8LqBu)4Xi1mQ^P!7LvJ6i zA3#jfO$)7+2%Q36L zdd795K=Ko`jn~krj7VCQZJSvql32f3jylHvQqixbL|TT8W?iVmGk}iiC%R|s!*on2 zto=HMpVWQEJ~aNn2J61|+ktJT>OZZ@Fn5kU1k#EO5UH@-Z%v7|m zdCXMir(>ql9{9cH<*Tqxa6Ia8|I@}yO%;{l4^KEP;-vcdGndBshAA6#sb=j||JeuWVatP5lpNT$P2 z9Vn3ex&ovZAiW!8H^c_he)fIDO%=U^MMy6~e%Kr4N2E+WCF(d-g1i#sb@oO`+|>6G zH&t{F_e5S#vqU*_f})PJ!E^x7}DVn z_9Y}gnUC~*q<4hu_?8tn_5GW~Pm2BlX>((|o!8S(O4&L})OAq$p)r1i*E=0|FMJ27ebaIGs?bk5?w*uuVZS3~_Z3KfOZ&|;0s763zgGdj zuYIeO&uC9X>Z+q`-X7y)6Y&2mUn;s8?O__qWwgim*ea|8+Q&+{7VU{i*;9^(@wwGl z8@10B{S5n)Quj&6!}#6`te4vNif)Ge4N_-@{l*6;_#6Nq+XwvU1MU@g5x+04`{KIL z8<&p5SH))o==Dclf8=%Z#`^{c$ADxhm~%jI%#Y96t6TeZFnGZEN~@7xq!lw%xWh{hSN?=xc4; zZClgNx11Z$vu(F+O+VMtKIqxD({J1Mrk`oad+6Hs+xF^bS=eXWZQi%G?bXk+u+O&J zwp067*k#*m+ozvxVGEzd%=^@=Gx;%ivrXbNjRto{gFC&vS$=_}RYpWsMUWRkUT<%X z#7+GeaZ@F4&p70b10S#Q=J_#6t4@gAJ05xCQLc}-K;ou;jJTgbgHF~8snU!4U)E3`ld0?xtV@WM*nF4 zEjnj!ci^|{(NDJDuE9E@{jSt^*6lEUJIVC~e*0|OVf=O-)(`qy=N#K%{B=FnVU{mF zN4t%`uEF|Ce|@%g8-JbTa|+5$1)rvZoBh16`O|Pc4czMQm8Si59r}fO(~&nFc>}zg z{292Of$M?Zvb4XhM?X++7V>7HzH7V{{v2G-f!-kR*0jG)!e08}dB~fGyusdW{sLSt z!1WMsRoY+I(LU%cLf#_OH`H6L{k6=cBXRB9gJ;@$*hoJtbEz>--!5~fG0wSa&Mywb zM(W8NYK(KPO8bw*IX8{JW}ngzOMf)RIX9*Kjd9LJbjB`#(`;WxgpT=La z59x<(+ihEozh)m&&$ivR)%a`LNB?TuZrf`7HSMFGZM$u&@z>fu=-IZ@U)%N?e@z?d zuWkEnd$mu2eYV}Uo!V!>F56z)KK*Qt{<7_~?b6TY#PfAJu1102JrT)I$H^!aTtf-{efn<} z;F@O=F-SZYrsE_e1Xm^{KOOg=L2xxC^!M_ecSq7HC|O5=-~=UbWkhf~4n(QoYD(6@ zagIsadE3FWAO~lu=#L~N+L!)Lp+NA#h~%fgD<~Cwn3DSw-5q5y?-Vg_jDhr(_+vKP2tE?a<%JpidY-$i(Hf z zGQN$8t82$~wPT4gzR}5IhuZNtd9Rbj-(>wKjkk$(J9g=p#a43Z%VMkHtQ~`toH8*- z?bxE^tcfLR#|$N>Osr5l#v|)7aUJa#j;zPTZM0)FvK|wg(T>ANeG`Atjs#6q-VAf7IE9qT$>tU7Y}bg}8q6pM~F*|F!yVH0=J zj zZf!Di%8oVHJhoi(7;?>H#&Lact=MqQW5S&-9@pvOZ~d#&^PqYs6lNTzj_-%Uj8W9_ z??Yk6ChGXxQ20#owN4jL%Z^9aJpNqsICIV8%4yzP@#C7ujXPP4r_;rBI#Ud%GsSFb z9;>N&oF>l2|4HrTJpG^4UdQ@ApTCvTXQi(^*Hxd1zVcevPxbe0A9b~Vm!;2JPoiu8 zwu$xdw@Ivrzb#@t{Eg6joOkh@fWNQM^}G<=GtlMl9I9}ZM4d#S!{4^BKK^Ef_3^hT zFXJrdO!a(*^OQ5`@I2#8I`;1!^clq)Vh6v!{}lG{HwmVHYH(hy&pB`&-CWo755L1_ z9>0@kd-xqY>#4_g=B9u69jC4*(B*fx)a7@zrhk%w4!?tDef5FPVd$9tIbA)be@>@k z`sZ{y`V0Wy*XuJ8NPQ+ExT-+#9{%nC`RU(v@HYp*HI(Ty5nSst5lHOE)4$6|2(C;@ ze)=~U4T7sFp)Y45l9vA6M1kN0C3Moik%$Q1Q!4rC-#~Ew$0V(S@_g*j{ekbx^qB~x zJ`)jKS0MOcL~!~$-BQ7aDbr^nxYlPPkgy~D{a-@xfu!W8ztd|FTu%voITMkz8b{L7 z--#9oJ`j=o^!J*jg6k<+hwcwatBFb4IopxpW8N38@;hAex%&OU$KWj7%%KXLA!^@n z5?wQg_+2eI&+lAW55Gg@9O8GNW)AVYH(gJl%kR$gJ)vvn5WhpD4!&U0SVKRmOi4$tJZj|p^mo^JZ*boH42Ih~H_pVR5wl*{wo zlbppmf!~4Den)Usf#5w6!PS(=-&-oUh7wrLVsWkg4y2y%f)feBl}W)>l*r%HAh?Fey1q#JCJDKfdavG zl)wigg6k=P50wf&ObIM!vAEWL2NJgJO9)O<0v|{UuA>A#*dVx`5?K6>q}4c*wy!{N zl9F{q1lLirj#9z(l&nMdhosfSB<-B-F#8kkF~Zln;bGpLei*TE!y{ca+*|8+#&wr5 zt{UO3_qiu8nBay-diVHUkX~?|t44Vn{I0m}Hpx}jdK-P79TZ*fhDUpw`~svGd9E7c z`FjY*&r*%60v{5s&^lZ>!%O>AmN>YP`2i*VzX# zxF>kq^>?h{K0OR-?Ozxdj2)8;`eMF zaNTjJtN6Vd_nK@ozgNpgI@`?e(XPZb+swY;y$TRto_)~~>1;Fmg7tSpym|ITC#19e z>z z2luc6W_r*0y^%g_ovUVfm0EuU^k;kD^{+zuh$)|CV(G=@TD!!~EOT z^y5>(P1ZdPb+fMN_rYgF&UnBLQ+F2Xrk|dJxj_ASf&PL(e-ZTiq5g3XxvD?p_($M_ zAt!#r&HTF{LA|uq{2dT-ZxFc0ImS6`=GY*l54lU`80WB=V?&TW0`+i?aSod~HVo;k zmvfA~9S7dB?(wLbbxm9e-wrwX4yQ4{!X9W7Qpif`GxyrfEzUEwK z-?Ci#EHnuz*%s65b(5b;rRe^ zdfZh*AZL6B&jpaPz8C0Wjtm3l-(VdM$-lEY0+N3(btEMJMrtTm&T2k`_w*^8yYwNPyYw0C7tU?=i`K*Z;T&hbaE`NI*njL7_8OiLkI_4bUboH2X zfYa%ibAZ$7q|ZsFNW1E$1ZNggf^(87C;F#mN^rh$5?#|jH9_AcgMO|H`newcoIWQB z`Z*cs)&;ut&^7&&40P%Oo%%rMFmz1+)SRXs(?6%vG5vEo9eoBcIXDx6#GbygKyVc$ z@Scd^YD(a}rGjfHf#pmD*ZNEZ68rJp3Bd_U;L4=nDoWrz4T7sFf#pm@(kdKD+g%_y zLCHEIf~zQ5N2%ayO4h;oACt5SN~E2$9lAeO1!p3VXy1VX!F80t2P1;(DS;1_3O-B; zEN3FP)@LG+uw!3BaFP=EKvHlWCGf!p!S$5DawZ~aHIAh1D-fKdWE~N~b(E~5RB$~d z>(KonX*DrPJ7+sGe9XeY$1Dtd%)-FOEIcuXDi#L5;Uv0d4pjv6D-q1as$ecwV=kIG zR1wU@M4($0=vG75%%Mb}Qx)h`2Rb#-F>~m2^_V$yIvq2IPN$RhF)ITfvoi28D+3?1 z@vy}X$F55+QGV>w$})F8LMq_*u_>imA5TSZa4%DS$m#Q5 z5q3<7xb~iVf$O*zsHuo5_+Ag4*5^I9v*Yy0yJ%XOYBlxJi*{|g^rF1!ti!!fxw#iy z1Noav%1e+J zL!N23>vVd@>6+tw+qw81Hs$d|t!nXSLU+u0>?lqZYrS!)aXNoo@wgp3Tkoqaa}L11*YWu|`r;R7*B9(Zl(`6HE>2yH zdQ&-*Qj<`xbNu2-lh7A+sP7GYeu>YU_#DLNEqwkKpI_BZNlnqVOifMIwgAI+<>2pJ zpSCZk&$fCgPnXkuF?Fi+h3D;HdXTT{o0giU^`<4J?I=(8#arpVn4X%h^QRY2M_<&T z?eg1p1s>)X_Rn+iu6jP+Rd*C{ZvQ;;Ge1<|KMG#9E-c79iu~5d-~K4@9}Dw-xg8Su zt$r!*V#l#?=HHO#=g)10&qaQYbNRcWH(DvD72h4gp7GUCez14U=RQfj5z6P^Qd4h* z^0`k^zYgUGzv0Q}Z+O)Ep?v;^NBw;$pTAjAzYXOFzv0Q}Z&uXDp?v;rG4;Dp{->0` z59J5H;mL0z{w$P#obp&GzXAKYhM$F;-$H)$i;(jX_U za2|Q-I|dh!bKf(#h`e%!!6oFvG=s~?tLGTZAs5dx_yzgJ}*XJ3`BbTo+SU|4) z!QfZq>Yog*A|0H^=o<27k>+Ldo+tkRHLBUUe6VKk3T~?hUuk{2a07K(`)00^o~xoY z-keWaE&u&b?F{XoV72s})A^EK2a@aPoMf(!4%gA)IyziO=QMM5bhwTV*U{lRIv1I% zqr-J{P{%1e->G8^en6f*&fqL^<_iYX$k_se86@fWBa(ESMUsx^kfbB&OgfUzq$BA} zI+D($Bk4>!lFpq(wPcxp+jrYsmny4Nt_?yXJojQV_B{tpXZVAxTaX@o=0nbM}M@AkmuP8hMlLZN8 zj|cqt3?cNMS&<~riA;hDBCab~NvxEhiiqn7-bcjs1IviGZXhG#dVyPs+a=gWd{}~s z-^TEKy!V81nEdL{=idEy{0R@*C+=!!zNjoslq`I=|^2gl%4U=!4 z`nlrd^;0olzV0*o<8NgppVUn&msfUl1VUOzcW0~Cu`jAc1HrHs*z=;M?$hdn=^<4M zHiR-m0WCK)tQmSR$lFX(p#n5xfK|PaD>{KRUB(01xkTtZvel2a~5x#u#M4P#i@$Da@jwW=Qkgz0*W_#Mj{3T~qP}2I_RD_XlS$hi$TulUB4sBBW3rJ= zE4z2LrIr5v1Lr-83~xNkQ#0bs-|g#8iJ}3 z_s5>z)w-vxvpw3Rq9GUx1S0jphQ>&+L2V4hBYI;bVi;D)3|oe2>M=ELJ%!TUCb{iZ zRt2N4dZG$9+Ixsq0#44qDbc!y+wscqHE;J8Q7UJ$wgs{|oXHQTK~7`_<)ofZ0{=^P zI|p(uYwETRvTY67uD6~}8ECfpb6IUbH%-(W?N)3sq4lRSRO79@=GK7vrL7DG<8f>~ zhD@T}%Xx%<)&APc?Yn#0yR?qIJ#BYigWP9=7Tw>|+WitqcGrp816u>Hxjcys=Q~xR zz@r#%!b-8!WFkzAYr#(LNaLE0C8`tSy&7%H#bQ>(i*YUZ1dkWxBA%OZjSd>$>&F+@ z0YM*k6!GHx1?#Y!ZHO1w9lQaZg1u`5x>aC>mrY@p$6WNN)C_nt9s|Z z!Ntvr_|i^V8o!^%`%5aQk9eM=C3X=nn7U2;F&=;94)HlG7j3uEbyt&jgkEl>eoB5q pKhb~RK%3BxySn&<$Kw~?O-Y=$Fdo9p-^D-woagzTaO5nF{|nf$5-R`z diff --git a/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_rxy_sink_brd_v8.co b/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_rxy_sink_brd_v8.co deleted file mode 100755 index dfcc8fc87e76ab5ee4da51c0370306535c5cf2be..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 56528 zcmeHw3wTw<)%MOlJ0XG>MAQfe2yhZmF@zApRX9Mn?ugtGyd;4Hh(N$3AYk0*BnCuC zEdqwrVoz$JjTUYC6GY) zqM6q(lTV_DRAHLND>+8%I#pCOxlpm1c9lHyK?TFm@m?8kC>nfG@|r?P@yC4BHF!kw z!S|x+8p#^L&MKhtnP5*RpU$t_H~aS`d2HY5)~CA3n1y_-?@a$#m*`I_%)b`2QYogX z?)y#sHNXcDN zvNm^RcJ7iT`NhSL73JsVW#3s;P?C?7yu2dBWf!l>U6TLRWh+-N&RzMnl+{a@7U!23 z`%;PvHs;5FJ!M_)%C-5~D+&tp9+x87i`On)nqSni=to7-s|#0dICaVKA6v;?Kemz+ z&bZ{$?&!pJi>}aPGEjz1?1m2+0P^3E#P^0Ugd@~m>L zIjdYHXO(OHndCZKvtBAIImHFzsgz|!tJki{F3w-JD!;HKduhS?{Jd;lWq!gX!neic z3)iiPK<>(dWrguio)ph4FIOB)S(Up!d+EyDlI%NI7p+ibBk8b(i&tUps{A*`PnbL; zBYW}~$lu z?Mbs|Ov=ihJa5Lt@20FNDp;3Wk{?m3S!?6#DaFgy6lE`2y|%FA_OGQBZ&^~yIg zKXRohEAkGetSu~9y1HnUXj^u3<6^#*vbH$CI6H3xT3E0oySOBG$%^m8_(wEB^S_aD zXYRWErA4dJ%j~Sgr&5}`Xid>-t^`&WWv^PDmml-ET04}W=krUR2DfZU;dLPVVNr42 zb<38nACfjKRWCBuLgeeXjq@FJI|Fm_Nqx?<562a?cwdxde=zRIbhcVl`}3sC2(tx3(sJ z>=MHmXIyyb(52`LtG5ujz#puO3CC;;hrz}h|6Fev?*|7N|11S93HRN3l~`>D8w_J} zL&z{P)*6r07l-=``Qq@von5iOzM}l129NROhJME0`u<4w8DFWljCbl|jo;Mwhz~x+ zehnJGs=uP_qJ}FWHy&lZZ{!}$LC=%snv%v@l6H!dG(KL^3pz_`UMQ*OVo5u9m9$GY zj-b@BpRq~Q6N`E-3j2Zqr1T7$-#?~zgm`D zlYC!tFnKHGi7h*9i+su3eHFgx(Vo;MP4ID@?lc)xQ&vS@ESdj7=2=3I=US3hGW(67QM*SNqiz8{o)ccg_b*@Af^b$E70 zus$5#1%Aj+QN58lwY0R`g_aZSwzte&U^$cvTUOT-;lPdNC6?nc1|9WTChXI7N6k*A zVRo{nn+W$8af6PUNrursu5(tI6+gXO=c>|fo#SS(4D$lZjP2CfEVJT7JTz=D!Fxbs zV;@1e!G?jC1A~_XPjPXk zHRnb+e|V4pkaHwS&Jp0(=x!KE`T4=*;lqPv#zB<#6n;-*etlQWuYAEjgShYESC3x< zeuwaT7QgS~_k;TD!`BO(jt!3$I0Y{-JvKPDa$I}$yEtSPnIPlf4eMU%dMMk z-EwP1aMYndaMTa+djY?f@%t%$zrgQx{N4@-y+_p{d;q_L_&tMPi#*UP*p7^7Z#y!& z9h#cT9#$A}X%A!0n#v}65ca?xKHO=PP5mB(jeA(xHTF@2$2@u#eMSC3rps&2HVZbv zd=~L9jS5UONv}DnFGby##K8W>rQH)3mrA>~N9j<5Iew2}rBTN8K*q9C_r#8{_n@9V ztOxV%-qFF)FXKHkwVd=xlX3w8pHaj?b>*?qoW4zSRTwl{$SfMh(ja-RqaMHb>I^^HdoR8b&o`K=)7vw1zUEdmo!8^*fbJ z__6k7j4))n2|el(B|YGm^k9yp&j@`GMqRa(0o_j-&^pS19uWGpBO~-_N5(LI)IgoQ z#^RT2E;W0^pf7v~S0h{-!}SW{W6U0p)=HUbgsn$qxHhK98*54VnwTBl{jpNMHfFZ> z@%^ey$jp9R#hvsF0=AD^S*QRauoo6Dd7p{{qfcftJTa!xF0jt`$`j;H>Epl6-Z zKONy(r9azJr}U>xtA@UHnH;% z=TSQ{LZ5bI^gJ2^oalL!iLjnW#F(B(l+p8u;o6vY1r8}+6Vt61=aIlLFi9MWG9g18 zGG5OkQI5i)h*vn&qR?2`MQ|dLVwC1L|dr8C=)W&pYeJgiE@X+;hkq=tf4CRjI-J+Pf%0Cr`FhlR&(&t0nbCPN(lg5DESOU^Cpc&8 z>0OuQ{8@CX)lg?Wg6o{*6Bg|)tH7FOi)D4|Tb8sfX;X3q?vRd}H}E}X(ovsj8eub@ z>x?khGaoPw_fX^g3@07^0CJPYZ*KSxW#9Ec_I<9Y!WEO0EYi1BoSgo`mgcfvdZD?j zS6(_<)_8?BYy2;QolV>$#~iUn?!fwC{4dJD_aZ-9^5x)rlfPc__kiz1ew^ehz$cO) zFZoU2`;wn1`OV<_k)I^_d%^c7KV^q8NnDel!*LI8#bS;$T`gtGz+X=OTFIA#zk>XA zlD`N1mE;FYz5;v#`5}_u1pX@WLnXf%d=K)&B!4gXp5%w`Ft(ykML!^}qjY#>Ysc%L z@vt4^<3rzA(_~*g06vMlE%_bblgZy8`JLbgkiSv#yTA`5KTGltg1?&l9LZOKzlQug z$?pb#E%^n_c-W4yASRm>CLaJ#Og1S@?f_3rHYrT*1W!yhDNODHPfRu`Og;#nm~2v* ztO8F=HYrT*22V^j0h1aJ9jSxHLr3ax3GBlm^L^ZRA9u!M?H)FpoC$)Sa3%_B1TqCR z1Cs=`0+R)e3rqnuE|Hin?Sdyj=v!-{9PhiqUq}9C$vfav$logY0QkY=vn3w{pGrPg z@@3$MkY6JCa`0*7^Cf=|_@U&N?JzQxeJTCW6NTx!!Mic-fOlg$0N#!1Ab2;X%fP!a zT@K!j>3hIyOmE3l{eU?4y!s?{cp29P+PIGFhYMfc+<@ztgG(;3bljOGXp=Kr&=bxa zL5;v%LCwHCL9M`iLE{1oK#dEVF_q3SE|6nRObsKyLh{5^2KiNzC#HszUoClJY6ST^ zBu`9@Bws9fVrmrmwUQ^MMw7oY8dFiY#`s3z8hjM4!AId5d=#$1N8uWL6t2N*T)Q#V zQ98IW)loWVOa%o-jyqTo3OmIICt>r1bB~}#ph8eHut`uWuvyT!z`dYuOpW0fZ;)e7 zOkGd@F3A&9W69qwd17iD`GDk!DL?r#$rDrK$=@S+Vrl~UO_C?3CX&B58dFiY#+XOp z8hjM4!AId5d=#$1N8uWL6t2N*T)Q#VQ98IW)loWVOyRMkuu~5R+T`pI^n|lhP$RHQ zP&0sgO~_e+Dna7{yFuNU%H$Z|C&!$annZq^B_$XY1kHR(hC|rZrxOQWzqjYd%s-tw!nEI)}$Z_Xqf;Ks?3VOo%xu8bi7lN9B zBZ69i*946V90he_YA(n4Q90(s)I9QEmpm~wpZw#JC#DvV|Ayp=shh}uQ}V>rLh|2| zJTbM1{F9O=rfw$xooGx&;TmHeg=_FpxCS4EYw%IH1|NlM@KLx1uW{|hR7dIH##Be? zpr2onmKxW*<{iD!x*ODIKZKX4*St39=BKR+(DW5=TAM(}+;!aA44S#EM{Ff%))z;` zehzf**B8Yyf1mO1#xnnaS^tV<{=vD&W0`;W9o^!Xe_YuWam+vEfgW+pf8&?a;+TKI zH#6dxzwacO|LO%Y|B!r{f8?Dq|M*QZ|FqpQ|Ezs7|HALc{Qc*>E9xD(_INDooA}@r zqMkWVrp57HObOz;U%SX!3(B^x17%xTH`_|x*;eY#wo-Stm1VK5%zx#OS7VvK&-mA3 znSa2nH$~eA=Stm&-ywA$S0;6z@_^L+#xGwH$NX0flll8jk@>HlC-V5uBI&L<t54tDuzT>}>q_gvdr|@?O2LB~cpnfn z)1AjKZ-b`KnTK&F4Flt(e8&;;j0v#)A+)21osNA^GHe+GUq+uK^zdXjJ%*%q<=o$m zJs)iIZpe1Cea?p}+yq(lLqa!CveT_^VpqWK2RTcuRX#^MmQPaUiK(wAi@I??ro28u0i-3$o8?vI)e}%g!n|;@1!7{ zf^c7Zf|H7HD#HEjOeYQDG|2b2Cp+l~rz75HPjxa7&OkWHp6-l5cm%@9c9t^=;Zcwu zV9#*IAUp=~1MQg(`>{Lt?uQ#gSAbptngE&r+5@!5;bS4$zQ#~@d!QrR*ciIpzQ&R5 zL_T|vBini`bful*$o8TfJ5{x}F?5xkrrL{gF|M+`$3i{r4AtJo5c@~Aw=u;2k?m~^ zv43QHkA>Jjvb~KV_7BJQeAb_MJD>H}b6_m6G8UM++MezB5%wc|jXl?yfbaz1?pk}k zlZkL9;s@C`Ig=5djPP~#B4;YXQxQ(FZ*it0JRS0b?c1CzgtHKzYUemJ5T1eX5PPvR z6XBT%r`dVVY=mb+eyF|FnTzmT#HZWKo%smQM|hZhyK@u5HzAy1uXGk6ya@8c?LvWl zIbRw>oL_RjG=@08=)V2#t{2Ow)0qs{UY1h7-GN3b~c9CFS4DDA@+-G=dlp`g_u7d^(NNON4@pC>r?K- z@}1!xkTBkNf5g38V(!6yTdnN}nX2}KNV?ZAX>E?Aaz6;dc=lDDDCr)`fUltpsN4_I zjts}tXh+6~dE+~aZeCQj@FuHaRnITG_M54_E9#wlJoY=?f;|Y_qoMnp`vth~DG%}7 zTVY_|ld(wdEx}vKm%Z1U-Vyd1CsD>7@Jo8oYpgoz9c@}8?INcZ`MLjrdpr#6mCBJa z&-8*!?{TIz$}VyA{sIeSCQdTFqwRIhqsT{n#C{OyHV=ENjw8MMb*7g#$-+EjI`&%i znuk3W)C>Jq+a2Y0zt*&9tEd;^_ERs=y33Jv#bDE-?TSTywhQ~Sdd^&&xB__-hME@b+kWH~dqA+K<1y^x&~>}@YgxBjug5WNqOYO*W7zwl>vroW z_GG}Zg#HI^on|91>*jcbvFGYc>z;#kp6FNR$3C;x>UP(69P*+*jvw}-^+rD=PBg7C z_Ijre!u_V0*7f#AtpA$F%fSAWIP|0M2Gcv%E_D(i=d(>~ob5RCBY2qI%fQ~1zDP^H z(X{+_&>^M<#+u&ocDb`al~;p4GO(|uALItkF|7%9g+rWNbAjodXm55-xXZ&Dw?E{r zSzub3_7-uCxe|Pj;ikoF5!br9-{+xSCr}PC%xe?zp8#3nme;4N5GFo}Z;or?dmbi2<1;I${>*%#~=UL)LiaqHl&^S@Oe?Z78lAMX`b0)yNS)zVg!a87Dl zu~OPWVqwrlrgxIP)k&0bd;OBu`p|EKZZ@sScBQZtoRi#7C2WP1*-u=g02fp2`y;j@ z3Heg8O>3&XUDyihBlcb?Tfu(z{m0`aHfbxEj(v^NR-jzW^=jHF;!&HGposqiTdeK&h zzJ~6?R;aq&`V9|qPvdj-ZhdGcSTDyy*oiZ&mv%z*skRfX)T{SA=ek_Ct_?nyAUC4ev}W6ToiOZ=E{`^a_EFm@+7yoSHNYWl744l>*;l zz2e9<3CD62#&VARRVPu#9q>zf@LKfQs5?z-u3hVJO;drqdQFofWuD;}kHHwvvmbK? zL4M3#rZwN*FYFTMKi4qxu!b=pkNH~5wx?mNh*8#uxc#gTw2s#yjnhXlM%0ySnNBYI zl#cNvMrogzj{UB}K2aZ(ae%f_<2D_2W!xcgN_#~8u_w0oV_3VKsXqHLZo*E(o^k!6 z>my#YPVTyCgG66LzcAJ-XIkHEj1}wCYnJxvqwNv>#p?*xC}W`4*fP_*z<$EH4t+8< zU|KiXb+|68a(TUgof!+c3HO=ah4%kADUkEuV_J*s1H!)O{-C=%hVrKFG%em&(FSm>M4RM?JoZ#n^IAu{#cLVo z3FG0qz9&bfi*?O$HP$CE#ePlHQhY#2WXp^j=k)?d7s5{P~P6lJjVmP z4swpswi3g{({bR3_K9u5o_A@V+R)8Vx+S)u8|@U^i2d@tX{RX1y50J@^`M;+;{aW0 zr~GZy%lK3E3GEf@)oZ7mcIu_Q5`C!b)ll@u^xY=BjxolV`|04bzKDAe&>122lPX`? ztqjD^{0iklMk0RxH*gOEdegT|Yc%Mh@0gY+){3M3 zlDO#Bn`@zW@I1c9vmWnnL9ORJ-rtdb(c}FC>0ytTc1-sf?N~x=YFs1Je&+H1nR$+Q zyniMC2JMNfY}l~BG5(hxFKrjEm&7~mGx1LQOPtb<>GEmEXz$oJ0xPgZv}K$t>>th* z_75@6{$M|{KR$f#y-u&Au5RD-c;5tVdeY;4oBUHA@2^O|=ka!ZuWuaN+D)~!Nwu|` zYHK&Nm3FKpR@>6MrJkRZjDWy1{_D-y zNuYdJHW`%f+Oj|^dybB~0d&{3H^(u5_OtI=%)jhs|FW2W^>2<_%zx)Ux{3UOm@7s8 zdoS%7%lx13J2aO0cMlmJ%lx^|$@~RJW&S&UFY|Bsx6EG_EA!uXrOf|8lFa{w;WGb{ zBkziOi{CqLQQvzmzf#n*bJ);WVmcjhd>20qlx@ubWm{P{+e+Q}o{qY+t<;@uWm#-1 z^Dp@3tC;s}>uVPCFZ7W1$Ejnw_le@NW}F&~Py-+Sq0qV1pWdu1&1FMLww&wXCz zFE}Fe-|-9|khxyOQ-TPwj~UpH03& z^7O09A^)u8=~tCY{s)q$U)5sr&q!M#3_jNU?eO>gc;=ZmXwXchQRovIrgneDwkA}Wo z9i@Zzqv=Q;+A}I)30h7`Bx-QzpCZreA^BfQejNDK@7WxKJ(O-tUk~1m>9OG5m>vh-jcGr4H>Ssf zcVl`2csHgeg4dYVdp0{#2fb&rBXyAbfY&R(s*TF8s#N(^Im)jp=t#e+a=cTbzd{9` zJ(F(6^JM%fzbay?gk$_CIp)OFTJnF9JTbM7{QpUwn7Whv-z85>ttbBv$rDo>$p1gd z6H^<>zbAQO>MrvCj>c3Jt}*6OxCS4EYw%IH1|NlM@KLx1ABAi18rN=2b(9WnOm&nF z8dE`mk&Vi)s??EwRgNS5s)A01NH2G!UsZ)8{i-%QZoevGs+42=fgE#U>TdEMN}iZ< z$p1(3#8iNMSn|YFkbKNeyh|Sc3u3B_yhrlHR5|%r$rDreknglJ;{VC{8HH=`QMd*l zg=_FpxCS4EYw%IH1|NlM@EX@{Om&nFZcKHQ4jNO^uWF<7fWS_v@~d)`UsceNepTfT z-X91%CH<;4J8r)!Vyc2;e1RNuVrmolE|MpvHj}?d^2F4=O~svPB46?A?m(#sv`S5@IizpBlS+pmh4s^l17CC8kY`W*S5k|(C_ zC*Mo*#MI}>_mMm?wVixl$rDo#knb;fVrmEZB*_y~JIN1-##9upG3HUY1|NlM@KLx1 zABAi1QMd*lg=_E{*KSO8ln!o8b(9VoQ_`<$qw=dNRen{D@~a9u(yyxAIil>8^sCzJ zxc#b#sa+i7tL2yzQxB5AR`SGD75VEVPfYD5KUng_R5ke_k|(CVKz^v?iK&Ok50gAG z^+oc-qcIhQYm9jmuE9s)8hjM4!AId5d=#$1N8uX0#cV2?fXGl`hSeMYhxT`b-fnf-9z3Fd0b~}`FxiCO?C zb&2mQs_(S%UF9&0LpsLc3A9`5KJN+c@030?>>K*gq_2>EG!c21{*SdG&*!~b{sX0R zHFf@=_42jf%m?tB(S5`E>mg4+8iutmO&Gp3THg~WTl>}YFfMy7bU5Be-^i( zfc`Z)%y(eAY`*W(@qBK}__}E)WBTAUV0xf@m+bb-VVro6EYoV~&!O>LJ5AwP%It5; ze@A1Q^+qs_Jiv4{{ckjmYcbaZj*$jD>~Z`1HniKU5B|TFzqT|m&3NO z+(cCt{fIQ4YcV$kp4%v2`xj|U*Fjf-=~n9Tt9qy_+E~MSG@k2FkHB*~^=LdFL_K;w zx$&MK^p!mUY!e^Uo!=O2>)V22%&D*XrYW(n+tc#G3a9WSLA`_fxpE*=ulSIN7?WdjP2rozYHv1W|k4@`NT;7hf z+fi1wUGJ_b8>QXb?AkyePbT>JZCUkcM|76Q*>q&;4P^fgFdZ2!<3cb zV`x5P=fi)>k9c9jWO|+SLum|2ACzOES$a-FSF!H_c3t*!V~Bgi^c<+h9N`?uk>wqb zW6&7l9x>VPs1tj{G~R14Zw21l(2IRQz1U9ZeUe@p|J9hw0{^OBw{C77H2!O#r@((3 z^|sd^dOp;kUNIlqsaMa3TG}q)em(kqJ;rFM{j#$W;f)yAW%f^;ClprM9s@WpMOrE3 zmfJrS`?DoxkA=9mi`N5zANDz9C5{_I-0LNA+Zf{Bt`8t5>@dol2TV&GV{Eb4OYhI3 zOr$KxNIV}4anF~;b7P3}gSI(RRs_$DAzSpgjyKZd@>YyKO!I`#S+kiUMCu~4F^=TV$ zkoF!nU^B*IGsa?-{id@8;VlRk+HXf}0NX(uuoY=rF@CG|m*A|us zx%-iJKho~7f9q^Vcss&H_V2_sOD{L@dj;H-^}kvh9r__N8p=u@L)? zHpY!Nx1MgDK5ji|W6lLVp1cU>YhXOQT!U~gp6lEY?yvMAUhnxf@cnPtk~;3uyXAYP zcfEbg;Wxz*7y0h^E!cl{%Xdv{gZ-}91KyqGa?c*H&hL#w_L*BDb6bOHZM6Rx@lE00 z-<;=6?_Ku4gl`J#P5;|KQ>i2_vUH+TGs8> zi~B7_Uqg4{o1*G=>*xNqF!vd2o#>y!y7iuL;|%NOo=efM7-!+f)=J&(`skM;>Qj3z zZ$m%i{J^wI?f0E*gcm<=T6f#W#a?keUfK^O@3850>?S7%a(OS|V`cUU;d>(SFx$(0 zmbpk<`if}xccSg*aKc>oA~Zxb02Ykl%VYaznKC-Kd3 z&B1Zer)NS z7iurIfp4y3uGi2Erd4St z;5)4vGdsL3ec4KYna|lh0`z5DjyxrQF|GS;udpq;Pqi=GI_UO!yLX@fa_jzXTHEbJ zVOt~~cX+ig+j{8pfZZ=}JLJ~?KhxS_`vUZ3)8)~w&_-%oMZ3cOz8zSky`s(3_K9P- z5*XvWppDe?fcPl{eu#J4M~!oqvj*j`4`~l|-_hQwr=#ko9U23Ci!}?* zQ-H0hM)#umXLy@!zJk%N`iux+W8O^}2@VH$YbSvXx?tAGEIt ztb}~&ho)6!4-&SCHX!25rp|9*+lw$(#3<_%>kUckcrDU6rN0PuZRyKag7G9qX``6V zvm1oJh%Dm(?W4wR3F=z+4~bLSBSP$9h!oKMK=lUcFxy_e)ykE34#Cl>YFvIIqCB~jMMUM5W zVvWMO@BaX68PM(jiS}jFeL_3JGIu~nu48t=_7ICzs@!Un%e6|RT&z`C?n5YdHS{d+ zo^HK9DSc>n&Lw?li?V=ku_l@+ z-#@HD-&J(+Sa*PKzQki0h!^i5WO}qO8^)D3X)bMmT0bR9`TfKl$5s2X6`{N>mwALQ z8*MC?G~?PoF{(7^L;^%Opo?u!x+;xQFpg58^@HkM*Ffco#(O$+a${q zzHGb>a*olq62ruk_GM#Rcz%npPi^SNJ`wLS+R%-5if!cCFSJwa6Ugs%>qlQUtq1Lt z7zgMoeA(Klx4k~0y<)w3?Zm!2RlT%Vq7Rk5Dn@^7z0wo$Wh((++0%n>KCU`?e;@Xf zDqq>Hb%@{I-(%egx?_OK2V1iq{H|*~7S~65jA*|$Aiip_$Jz*5J=9~}1^Uo%PsEo^ z;-Z`I#XTKgHr;2Zy3#k=OuzSa8Kw^~bpgLCHg-fo{ufR(|W%9vU$0$Y^2fQ zXN0MK03Lb~52Gdjah_AA@$lIhVK_e!`(9It$6Pfp!4Dz7MDpd})5zyb{vPl{$uDd6 zJ1WJ#RX3(T$@97NKG}}cLGP38NFC(f>(9;zi=2H2Oh(}vd=#$1N8uX0#fpvyN9mw3bx7H%&&~+@?2IsRE;VpGmAHPO z8Q0Uu@02{})^zd@N}hAeCck@!p)n`{v8deL0K#SDO2BHu-(ceL08x z*P`uI7URAkWoV~vApb?l(@xDG|FGm~r*0%4lKe-}!EL8HN{1+4wx_U%RP9lW__96q zNFO6l`cCkv$ZPepe^-x#(Td*Fz&$$Nmh<(nj)Z_N;(fgchQJ3&# z^U3oj=*zYie;+;|rl$N4A`e>Rlcu0I=f zK23i%FZS2F&+Ll$v(bhD+lfd|#NPaVpe@hviukjkKFTITrnNJ@xR*`%2(V1NTRz-A~qiNiaF6vy?Rd6(tg3Nk3he)c79vLpN;kjJ}P4%=RVI(Vpg8T7V&3epF`FM zSs%}MYjHMP#Gegi(w}W0U2qagg*SXKnjC!gs?Ie~4K z2RVsddB&UGlO-|ReAb(smmPcN9IuJzf*!OL!fyNtc)=M=+#fy`3* zY^pxDPVTz3EvSQj!k_I7>(e%%4)qBe&`y2Y1{|clhYgsFzH059JZ%Hm4%&d}kZbMS zJjw}-usq1kfLv?m=tZu7EDv*gHso46S1)qyV|kF954qOP*^6BJSRQb?2y*Uo_aqkO z*?HOyu#dGro5Z6$Pmi`h`?F=DoJ`>E<2+kW&Y_MytxM^Y8!fgx7IydyU;QMB5ZS{ONw~w3Kr_JrV=Jr`@ z%QuSqid*xAYRmVjEnlaWzD?Xer~S>`{$*}IGVbfuzGI)Hj}Z3@pUe6YtLL(Q;&}&b zklT04?JGrWYM&^#FBI{p{cPO+H7rN_(zyL+SdR9Sar?-yoYs6|SWas`F!Z(KUUTi! z$!**TK0Aq6kbj#e^;l1WrDq?BPl z`jrf6Yvl-i@NNY9)bV|d`yCYD+kk(D>m|Np;WJB8l(Roa@{#vL#G|2PNGX%3WJtju z5c((?zmmyOGNhC-a4)K#yVN{Y-bXxR#WQ*RoP_Vd#4|0V#o>7g!+d{5IlhltfbV#T zXJd~lIi%IX-;r^A7esl!vQ0HBfhos#<>P>?R;^bnXAua=2}IcC*BY7`dnU?ug?$fk9d71FKrpu z0Q!vi?nu*T$zP7JK0BW4I(uW#UDjU!9D2efBcTNrOK;&;R9FsrtNMmXi*@d7kY% z5b^p9UzU?`!O3S$vz!qqr{&qw`dNeze(XHQobyGWSI&Oc=aX~pXv`Cv`s{Je9gTV7 z@#8*+nsuBHx)P`7i>?~8#Ew4O`dW;uKFgZeY3O9^Iq@U{i-BX>n``>yN!Rv(k+-BPM z$g_w7d3Luxli7XdF>QhS?B#RqJX@~IKdo)!y!y1ZP288^+-q^42rBLqC11;PxFh$8 z2;&~By5*VL`pj%`U#4UXP~2nfVSePPAq6k*6P1i#$>b;*QpyN@aOQxxPXrbBiIP8< zD0%rT0%4S0+wu$peO`gkN68qVC_8eV0?Pp}?-Sc8C+a?Nq`FTW*?b?yvn9lRBGUAI z7|(?e&myjFz7ONs1VRpJ`aX&Nf?i~B^hBXXap+QIMZYyJ4meCFl1@U?#Y7QHABaauojoYv1BC&m=t-4_@G z6&RCzEzbmqU<_e_F;I-p9{nAEQes1lsgg024D$<&DH+aHzmg$stsJ2bzI!e(1}ZQn z`GfpkJ9vpPgayVxQO^Dx$=8vBm(L=UjG<(hA9?CXDI@ezGJYkKqhv@a6NRyb3S$eK z$B^H^XP@(X`+5xdy?TK$jI|y^etTWWAx)1Vzw^#Ge!pFhp<7Omp*v2Gp*t>uu>ys$ zf@b~r9d3aMq-p*5?QDTDv_tF1??MYXq-p*54Qj^m+tOM;x183`9jEnk$BDHA?(6$q zGN9_>3oR#j@!m3Xf#onhY*}4TgabF4mspO+7cf9869`dLq(0 zcMc|X=~CV=oLm*eK9IgO1FDh-AiX@fA{l8xr0I5>M$aQgpBUp|qstL99D}on44nOD zrJ3aW#u(=4(RSam0ow+I2UHoUF-G4q-zHz!R~77rJ?;a1WeArU?;t#Cw0+3fTkl6- z`bJ%AhY>a|KrQ}g{*bY+KFQ|`9tz5Mtw*^rVo1C7yo-89jI_&)gY`k2pAj^kt`8gE zt%u6NcJoq4 zefWChF^sX{vB(n+gRexj#@OK4%I1Dfs!C2u4qlY})w1N87jpg@e`PQ9VG#Z0gC0rYBq28;JVB&SNSaW&tLroM zWyTL7^BjIZLSH<8dVRruM4ryb(yfOX!;#XTtX)0lpW81wJ-4j@V4_ zBs%x|H#c0Ob&^?va0^^N=SR#2pKKUMsXxhLg{BOzeeh~L~0y6EEgPB?$4 z(_zEt#C^9Hy7>$z0q6Q83@fzaJY(^F=lCNbTnrL!jB^s^#pBo6i9x#O#dyma7T+~? zSm&<1FR-5VB>a~2IZwjxNMH0M{DJhaC!vw_XP$&VlOFLT{FU?#PXgahTEFxpyn}W9 zJ3sIkzXAR2^B&{3puc;`WBf1ZA71eozXNUjxySfD=%0^zj6Z<>^(~L_N6^3hn!cNS z#&N}w-DuyW@i?pKKj90)&^wIwg>n9k)#T=roq3bcH)uoGq#4!;Pf9@eHK2`MZ~RxF z>%8~zJ0AD|=^wt>*r_S${Y(Fobo|oSp=;AOJqd4;KIut#oAfD9!mmib$2NefK4?;X z(4_jH34QQ7bbb9>9^(zrH^1XC-UNO7yB^~$&|fupjJHAA2fqYmAN&fGeei2g_Cb}{ z|03J?P*wYFeC(m~uZ@4fx&vuw;qNKPbB@FVZ3)t#y$AD zjC=5H!#Mul$#LQ1C&!INc^~XFozNv)8dAqHJ^l4PF_`D>cM|>~bn7(ecW0<@=2V0s zgJl|iq0fHjXdlFJ;PAJ-v!q=lF%lGwRn)5zlo-zTlz;D?2(M=7b(LVL_$Do=cumG3 z5NpKa8w|MA#Ke4f@kK@q{=ngfK%bZi2zz4sn3u%%7C1A$@W{%qk9j3~=*~Z8{_Jp< z+9N50iGb25F$lNK}=?k)lrmb3^o}D%vWp7}S7a|P%FA9iBBj_EynOYl{J~3difoiTmgi5vQ~3uWaOjY=GULuO)DR%T*aT1sk4YU04< zB_(T$M-3jlte|B1+QlhLR<9a7`^Jf@2CrPXZq;>bidNsAzoaAuiHRd~mn=>nI&^8? z($w_4AtOePT$(p>NZ#U%^by0;()03`WDHA7U$QiH@wGF@&z(4RQs$_1M5JX588UKc z+OXjx(}txF&saJ#clgMWOP1tkQjg-6%g~c0I87a#O*QPAbEnaS@|BNpwGE$0G=jE2< z8Yw0D>q~@m?y7<%h|XVGv^slDZeAYx9P#;!*DlLmSy0FlBjIdO0{U-NejzfWo|4?f zpvw_2Ly{V2HU8Q5q}ekjWo1vEH)G;QFCk?`o{^ooaK`u>rm@RD@*l?oqhXl3Bppz+ zGF~T0s^^=IWnK=M&Zg(Jrc;$5=QZon#p(He3F7`04zJev^}N>9r}FD`onJ*8Smt1U zH`lN2fToE`Q0LeBYuX3t9#?*CcQoCi@@pbZgywKxi6G;2`5M2P`kO^Xs3kubvCOaS zVq{)iD8!B6i;_nEw@_f1%F8zJPtUjD5y`KSMD!7Zy8pVW_^~QZ=RcoJwTYYV&D zqI&rMOZ`=G5u)?A1Zm6s3swHqNKP3T%0q?m7Zm@S|LOdi79fe?mifP>^6zQRDndG+ zrbUR=W9Jr~DGoC>j2sn7oKjBrpT_S-#Oc0MqJq?^{J2?)`qTC5$O;5n%RdHdC;mMk KN#uXa{Qn>0y$LV? diff --git a/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_rxy_sink_cas_brd_v8.co b/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_rxy_sink_cas_brd_v8.co deleted file mode 100755 index 211252a673293719a433b9aa8afb5de7023b203e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 71136 zcmeHw3wTx4nf5;WtOUdxB4UIC1W3X~3?YPY6#{bK5s^Dcn*#@GPsf?*%%sz~FrBfZ7&!m?uC>3Dbs~sT+oOFZ z&vUZ&yRP3_-@CrO*E(l?J36&=ni~q0D98MToj09OaLu43xPJ386FW15aig8i_}|sJ z#K}ZD%SyhE;jy4H2|FV%{}?T8>WGg2E(#(f>}oK}K?KK<_=t&jf-vTq1|@`$`j4@c zHFimmo}U_SG})Y(iFTx%`vJ>IdFkKWf2yuz|2}AY((H{WM>#+2AKTL7NrCy-A>0=T zCZUXdCuEHwEgRva>!(bgv%qog{zk>R%GH}I@^7iyw7zQX<_GS3EWcvYs?BdWy|#ZN zzhXu8mWs7y6)RR$ZQlIErmBj{vfDSUsjfmwW#uNsm2KWwv7+iLtJZEf zHFy1^)tvRAYyQ9koqf)#>qRGnOjc#^aGb#W0fj6 zuTn+lRciEkl^TCurAp4L)RgloHRHTWm7Z6rIpVvsq;1KHKvjGxnVq+ziQKlEgQ==SFKuCwZ6J+<(jQkm1Vrk z{52mD{z~7zaNkO1s93vZ)%wogdQUuyJYRn#e_h4avXyHqs>^QQu<4edZX~^+aPwMU zv99XXsgtJ-FD{#Q{ftRv(-uuBn^IgNV zZz)?*vAJydrpmJ0MwfY0=gyv5S~hLL?CZXpzj4!=+bXK7l7h8M?tCqO^Qw)T%2sUH zvcCG}ujX&QW8Lx%YhRu5ku;otOXZRLE$i2;+^}h#?qgYdCqsXkzh!gP=CaB=(9<<5 z$~IS5thnX-xCtbs!7%(q{_PdFRju5#0i(@9%YHJyeW*5W+Q27*4V%i=ZK$jYeLZ-F zsD=irs=ouYYQ_3%5b%SCi)*e~wQ}q5!jT2?uyI?}rp;?Mtlt}wCl@@Cy!VgKN!`MD zYLVx@Aj@=wm*ENT*o7vdB#38z`Sdh#_ao@WxHp!x{`Z*tVKre~^TUQ2|DL~(Wb~bn zq!#F~gLwLB*r^XU!4w*DOajD}>bS^q}>@vsctorD<3rb?mp+snRA_3Uh`Jb(h z^LA{g^FK8Rs}ubmyh=Z9$BsJAj-zqMDc<5d+Ojz@K`O?w; z&cT)eNY8P;+@hQ}TQZ!#Z|T`N_9VwO=KQLqNA1N&uSC8{sOxQ~;^lITHp2=$vop+8 zChU}H!p@ydctIBvx)+%+jAwY}@6y$T7iw)98T&iibvqeo=i)?8EQ+>s?#bDnQ+qI$ z(|>!cUo7)zHqt_wu_v{zryp#IJ6~z(7E}L&tfe~a8id(FSP+CIK{zJ}%Y$%Z5XOS= z{vd1&!sZ}67KA@{N-0N|4sygqj#q+wtwH#sHhvz^|6Kl zUE6X0OCM|Ke`!1J*O=FkcgbV;w8zytKWvFP&*1Z;mW1=;7OJIR?ZDjw69XHZf{@d% z7UPu2X>ht>%m(Jv;=0zsY{@~Ha+Ik^R6y^E(DcOg-6(syV*2zwG3O_!?^%3)iqCWS zJde){`200KKWmwpn5nrkD=`aY5((fvWW%i3tUdh`c?~gC)UPqOAul&Cc5&WUYV#WN z?#+wk?P7j%(o1-{T;BaT^*OUqv1}7L2WBFyc6mfbTclw54YA!q;NrX5Qm}g@P zdjt3Ghxl=$XMK?Ii#WN*BQv>1-f^@$uWLh2tR%Qbz0L)W^Zl5?-H{f*bSLJUk>Tmt zv6e()AMo&>qj_h_jGCHm7b!p1?O?6DNcqf{P^#}v7>{o00*xCpE5IJxt%)I-PrxHh8w%L<3SOmM}Rwbj^$o(LEVkLklw2u%iNQ` zC9)3QOQY*zJe^4)C(oJrOBdr5bG!I4H`7<>i#w9(2nRZPTxy-uEtw@({s+f`cBON0 zt#F;!AuTWA?*@fZ@;r?^(tgZ&5gh%Smd4BHH#GKK(2%%%VFPafiJpsei21NP5v$tI zxsj+E6(a%69LY6v1pIZnJ5Fv@RV;7Rs93FY1ob_M&v!7tzN_a~mBvpY?tA#O;Byq8 zWB5Fc&-d~9LCdv?Yc)?NBqnH{0&7lBh)vitF)>l6A=Ggti4vWsq_ITv{BqPIJfEDH ztkWjvP6p4PY8jsxuX&D8^W5M>D8QL#vInEIeK`L)eXe=g^Ze)KB?;%}E#sazGBM#C zYUx@y|F(ql^_KnLdE<1#`3CMc7hRNa{-Q-?IH82ojImE7=72*Nxyqk|`4L`_=_lOr z(L^Geh`9wm=VHvw_Bjt39>e@&DBN%}p7-61zxKl!Kk?lKW}ba9Lv?kpz4jWG{bG1% zlw}#REJGa#9BSi3A|S?x0C?^;668{b~GbJ-4j>dO*~Yh&Q^;w*J^an|DfixcXo zu54xpU_YzH>xD!J}rbaNm)D0)@6zvk*bNrOC>;rgmT zHpz^4+V81&)lz@c-A>@Zcq=dPtFVC zQRllS+v{KWuCzNhvS?v@J*OA7$0ts=$5Z~Hkh3|EzX;b&f&A=Cb09zSGzIeW`bg;i zY5AGIIrO5IpZSlVFO*-`i9D2_@vM{b>v{tDb$lSd)KB?MyL+|#QV->qc*-yFA86jL zO`U`1EEB`s%9`gNcFwvfbJom5CnuOkCOlYT!lrT)nt6n4@N<8*2@fz2a3k{|H1p^j zc{Fd&kw?#?;$R+`@NkIa( z6kN+ZBFAJNF^|k6UN?n)r+LWyjiGM6Igd1l!AbH^*NHsjA>(Bp>3RYl>iB?%Qa^cU z+I>;;Q0gHMC7wK#c#df)>X&&mpRxt>h_cB%Vjh`Cygm{t>`gns{LP`cT7Kp~g1%6G zT_^HTe#Xl@()9%L>-a!^sh{$jcDwba9gun`zr<61iI+LSwcdhhKe}gnY)tWY*>F_oMdp;aSVfNi9=cR9ri6(Y)Guv3a{b*nQc|pQX#x(Hky!6!$rU z%a$Ij%?O?`6K-cdPb8+o{+{CVBAz=F?%RAuO6;2Y-ngIbdT*TbGuoF)zsv}6?HxKE zxnM8u^}u*u>J8jma7M`K1KdY&r;yVZxUb*~LbwNqj%N$L5PKZJ{RCeea{2@J7mR1N z0l))*DL>mh1sS>{uKPO=Y8g&JhAV)t5DXc50QV3K8LkArQZQu50?rZ)8Lk4pN-$*T z3EWdKWatIl3z*}TZW*Ybbjy(BL%$Go5IUYC`Wk>03~;XCT<#%+j^_y;7;**z4-`B& zglQ8xK1lG@A!jh~V8PdfoFTwN1P=>2R|8)Soa6&!(3})~K?co9!H_|7QZQuDoD>Wh zG$#c^2F*#qkU?`&Fl5l21WxiHjWQ(pkVY9Ug?%{YzK!+#Nq-WavlDKcKUs&T{p)n- zM5pM`jZW2}icZsEW|ZF(IF|4xFGKhTJC5QW<>U4UE}ism)M1-HSBIzlc{+5W z^L6M(7wAw$7wRxGx(J~YZs$}HbX+7lhdxIFj}#1j76TUxhCW9Dj}i=hjs_kr82TIo zJVr3|ITm=VVCZuk@Hk+bQ#P;3DVx{il+9~$%H}mWW%HVxvUyET*}NvFgx5Bw(j|k< zsdUL8oQi3Vob>VVrtK6rITG$^|85;R(Rv-a(d{}^(H%O>jNXIL=G1uTc)aKw`n(qS zTEWog1mFpRq0foH69q$`CBP+uq0dRclLSMblYu7-hCZ(Yz7E*tl+9~$%H}mWW%HVx zvUyET*}NvFY+jR7Hm}Jk;kC`Fbje_IDqS)Nr?5q+?bHK0Z1eZ(@U;J+4xQ*e9lBAh z3z1Jn8+4c%eHfw5sVUI$6wx{KITd)SVCZui@HD~D=XBudf}zhDz%v9xpEH4H3Wh#s z0nZW)eR{wiu+1r(*W{GVYjVowH92MTnw+wEO-|XoCZ}v(lT*TLn^WnM!RA!DWDrgr z(;PYJKdr+y|NA;T?f*cBPV|R5bfeGcP(^>F!_4T95!#$8g^o)_=g{Z%z}E|gK4$~Z z77Tsf0DOaB=yMM69Kq1%jleewhCb&4&lL=P&I6tYY;(%yH92MTnw+wEO-|XoCZ}v( zlT$XY$tj!H=-&v1)<2XQ0Lz zTnEfO19h#zcLQ_JKut8b9+-OuYLda*fw^a(t}}QCF!v19RD8WANt9`*r=IitR4X7nqPSa?*-{X`Jg7I5+bU~;l8;N)Ilas)n*(K$KA^}f2(U~-DZaOw}*$FexZb*{SC=$xG5dSC4}n4IEzUwy`4a*FGH^;v_-DX#a`0|t{* zT<@y~4JN0!-d7J9OippVuO3e2R0^-5^AuhKr|=p$h1b9-yarC;HE;^AfrZyLr_v>Z z&8c+BAe=fD*s1f{$FexZb*}oH(K$KA^}hPN!Q>Rz`|1k@lT%#ptGL1B6xaLeO9qou zT<@zd8%$1dy{{fKn4IEzUwt)|Qz^WL&Qo{|oWg716kY?T@ESOU*T5;f1{Pl1oJyAr zHmA}hgK+8(+Q+guHJ>_u+~}N~T0s0YgUKoGW2vtjOinE#{tJW2DehybZyHQaaUV^3xyanaT1ue5o*# zdRjF@xp% zPI@VWZ;fDSdZKOdp*Y9W8E_OJLW(4KJxdYf9`$M^Af)w zB`<53;I?0+2n=;;;Ve4x2j&Y z1zkD!AI8}z_W5Dt?dIk9?*`Xx$cu5v>K4xPyYT=K|3cRTIS6xrBi?8~57&9P?(L2B2jO}UuKRf7{UNv>g1mjb3I0%A z4@G>oSK{a6Iv>~lyvcq6t_yJ8-<#qW;<^y|2YA!`B3u_CKF6Ek7vs7Z*SX#-e>AQ~ z<2uhP^~d6REbRI1xAfYmImJ z2KlCst??_oA-?G+%6UV5)7KO6E4_T*^cVGb1wnsX<5ziwL4Q#%^lJKhBHqg@4*J^~ z=lGcZw#GR=roXLmj*sc@i8#l{^tUz6@u6-%$?}tLpJe%E4om=7CV*3ey}5n~u1jz| z#GCI=#`R?I?rLwLKLyuQ5I@vg>`%k>G+bZfE%j&MdIqlZy=DF^T+c%OVcw_xQe2lJ zzQ8N@XXAP{u7`We{W-XvgX==C(w~d#xyV1lTj|fo^?bw^d8_?}xL%0sk>1VzVq7oA zb+NbBUyAFc$Un+kueopLOKY6-%gmS7IOms{FDK%hUuM3v#yP)CKU?FRU#6d}an3K( z&l7RZFVoM~IOmt?XKS3}V*1${=eU@Do``c?Og~%W92e8i);Pz-^s_b2aWVZo5$Cv& z^Pfby$@Nd7+;VQ>ea`^qe8}@h37oa~1J4cWxd;2L&U6-OMsOC%ga=DZ*i>#pa~28L z_$}A|Y!e<}9^gjiL1@k*og)u*>YO8w6MFUAOK)0QyJWFC`n8-dS!W%Cb5p^2kIp!2 zw+!cBcs@wRoM%-+=wl_~d9KI7nLB5xIoE`5C!dd8>qbU<8~tn(ceuoaM?meL35rP-pg3H!{w<&3_!_D33mi1lbng zT;EBgcfZDs&?YI&L#E@LU+)Dt-$S`DZcW`$Z}+QRMO#I=5O;`jA#A<^X+4Iyinc2Q z<=HQs4eY%DXPPJ%@*HkM8(AaW2yL2uKBouDW{q$a?b{)g)#s0Jp6H1~?QPrgb!^*~ z>+8^)9&5<{1kN2v+qQiAJP;hW&`*ag(_EBg+tfz_=lz_Z-ygK?d3es#DfZTs7X?>i5NU|LLx}*4u{Xzjl2&I2)CTam=~ijZE-r{A}dQ z@mw|0^ZkWMKFsay;M`O{q~+bxox zPo509z>Qqz?eI_A_2C(J0P+o4I3{Q_*|segZG|3d$gXWg(6%k#s2I=FKGR~$Lp#BCsS9l%B+(ocm|h4?>xuTU<5UJD}}~ zj3@01Wh;hkH+Wz42P0qc23O7T8ns=Ky18~Hhet!68@)&UA;>p+v#aKM2mJ)>kJLw- zLi;FoiZ+FM9s(ZHR?%LHZKD2$f?s-0AYSGGxswm>kn6OK!g1D9fOay7pyeIsj$Upv0S1t4oX}iSv&u5qgc!qJ1AM>?|eJ_Nr$WgY3xI=6Y zVKeVT!qdm0Bg)EWnNF5{DuO=AQQ9Y_wXnXW{@jilQ zl<|;jLaiHFZ-+FGw#bly}Vz*&P+hQ$@jXECEow@^O3LQZdWb!4r}`& z<4@aCf_&2+a3eQ)f2E%#W`Q%*eJOO$=M$sr7xnWA+n#*~o?{Tsxyy~n^FE(XWK3vB zSm#{GNZT_Xwuf9?7}UEM_3}9-SugAf>s^X^he6KC_qd9CLAO_FP$_H>^Un_Q z&k6F+MSj{^&KcSmn}4>Pwp@QId1!Y&PV&$e?E$~d^U#Uc58(Ns0An|0H`XBtr+wB{ zj&tJmtFW%iMu_w#^281JkqN!hChemQ2%epo5BU#~chqaoaA&_)xQkzi`er=nDz2+& z1Nf{&n^c1QI3u*5_d41w-pe>o7!TKw2g*&ley%wg==wBtOxr}+`RpXNi8`gN=@x!F zO54PA_)+M?br$PEeFrZOpN#T8$T>#aN)D4xC&3@uC-w#Y7L0v5i)>CHTlQIGqn%O`=8i#P{w_O#^P?@w}h8NxX@LA|h?8oXY!r)mpr8% zllp1LXzw^Snk%qHv}K$t93RdVjt@D`@!&XeJl=iltxm6?t#028M_xnN_N{Q_4dN%m zkzXmo z&yB<$~c)rvkV_F#PE(2VO21{&ZFV zuK-T^a0!JR{uXP#_*j7q{4Lf3!H|K!#abvBGVr%piv&Xk{uXPoV93DVVl5F28TebQ zrGg;?e~Wb!u$+CNU!io#AZK6FDTADa%IFtzDls;dqOSoVrwX`AF#PGP1YRi^{&ZFW zuM!M@I;(+K3x+?PHNa~G!=KL0z&8tqKb>2EZvjs70WxS#ioPI&=A>ZApgAcRGH6Z; zh76jMf+2(Eq+rOPIVl)2Xifqr`H)5#l6**`4CXAMV(`SdD;?w62*I5hwt`&XX={oCx*9pGMb=Cu~7mQ!AYyjRMxYl(x0&f(2 zx9i*re5>H?uCobvli+(?XEX3-;7NE6(EfBFgU#tvkiq8kDac@R`V?faIeiK;*qlBE z8Ej6Uf($mNPeBIZbn?7#8f8eH7fz!L<_!AQz^Bi)z^6}5;M2zseEP(E&_;$h2=T_jYf}zhlfbS3t zeQpEZCK&p>6ZlSGn^QKg$tj!HZ&8c+B zAe@S6j%*8j`qcP!8vDNS=@awoHLmlGPoH|<`1IM~r}*>%r)r?%8qqoQc^B|qf}u|z z*cS|aMuDS(q0bm_Ofd9W3tTH0`m6)46AXRc4SYAS%_*DL5{?bRJvpkP94)+*%tWp zsR?}g_<>KKnEykaUgsO1KJ~uw>9fO6@#zCj?SYQ>h|ZzU&j5c$F!XsJ@O^@z&(8vX zRxtE=Kk)s6q0a|^9}o9Y)+Qcniw@!(^H zdj{&e!FfDj?ir}>1?TaAxo4n`8cd%)+%r&58%&=*+%r%=Fql4lxM!fAF_=DmxM!e# z+`ex?UpDg**Y>!{L!Um}$7&1C6w;>;_p#c7Gllf&!+or_;7lQX`fwksEjUw1pFZ5j zY75R3(x(sivD$(&h4kseeXKT|DU^MSbjl$67U`5h_Oa;GrxIhMo;A9lPam%N)pG{Z zrw`Zs>IH-8(}!z*^)rL%(}!z*^$UaP(}!z*^*097rw`Zs>bSx5>BBX@dbyns^kqX% zwgsG|Pakr!E#M@5`jC@t0VnCxhn#E+I7y#AwWdN22TX$dSCsW!6m?4 z@2kHzcoHzz`|2MJo(#@-# zdJ?eB>B+!0r>_GRPRn`Wbjl#-h0`g6IfMQO?qdaK(8(#TbJcH*&dDjR_tkF=Ca1XG zSO0D>ImPw9`cH$&DX#a`e;G_palNnp+hB5v>wWc>!Q>Rz`|5vFIhDd|=sbnjz$v^2 zPT@6h3a^1vcnzGwYhdBE&8c+BU~?*6G6<()!TPXPhaYku%iXgCc6xaJIVK6zx^}Y(3_iK<-T<@!}!Q>Rz`zpg=a*FGH)#*V;e2;N{rtlg# zh1b9-yarC;HE;^Afm3)5oWg5h;kC`Fbje_IDqS)Nr~aUQEQ?cI=c)^g&dDjR_tk|4 zlT%#ptBVaLr?}o%ml{k?alNlDGnky>dS6{`FgeBbzPiF-a*FGHb!952Qg{uWr|=p$ zh1b9-yarC;HE;^Afm3)5EWEZkl`a`wVSNU~-Dmd{r?`)$t~QvQ z;y#wT#$a-a`&eq2!Q>S8vD9#b$tmt*sSyT~Q{2Z=BMl~}xR0eqrE)5T*U)(iuYpr| z4V=Pj;1pg1r|=p$h1bBsYnxN)lELOwx@3^^y%N5O-vc*}sp~7}-FP0kmA?@l^JrhE zyZ%jgGk@2|-(V5rx7&xaP5cojK|A;tsP7DbI53N8{i}`#ynk&RObbJnPF2>SDc%@LN^gm%+>XK7rp3Gp%F& z@~&6r)qR0{37(fYOFN8bJLOh8P58Yj+i65Q37#Q7Lp%1nWSO6BHK8s2UfJONl>D9i zNa&#mdiVzVEwV582G157|1BIF`fw?_#rSYZ=C|a3;w<_3TSLkJP9XDs%KXlm>ldFb z@4#n^j1AjwL4Nvh;kEd2Nx+Yb$a@-fi%*xH&gCz~U+A1ezb@Ty{qPH|1L@mEuKD|G zshhvGmU#YVknznk-^=MEGr{RW<{X3V6NYj641-B)qAwBQbJNU#&nC~Iv-yq@PP5%4 zr%?u+-cR2z!s8~)HO*tBfe#04Uo+(+9?Uzp{(d*Bij)^H=`ZR=X10pd_ID9WIow^uZrc= zegoVlKPWrjt$Mb1s%o86-r!x%P>Ss2<;_UD8FiI; zE&f_uuSLFc@0iY)tPlCtBW*p>D!lLOGf_-yTmn9iH}-fV(l;V~x%We#aoQ%Ev?H64 zwh3*l@P6cLJHF)6zS$AzvoIMfexkBC#G=Dhe$ZL4q8s|Af!`s$4&lSFdeA*79&IRDK;W2cJbB1y@ka?1I zA&=qni8#+28a}tiIX`HdlXWHe+#2WnFx+j8>-k~VhkS;+C*qtRau(2Vwl&Uq!r%4V z{IL6Zwtm_DIGem?zP(pITOL~u-VZ+xawP9{Yrq%tUiC&FT=2n#HQtNxPod8bALM)0 zzkvB1Ls|@JH+#R-w&BQ{NBiqt9PhqOBI>lW`NzaH21$hX#eC21R)))?EscJRBN z{lN;pR-5RH@GJR@| z(^iR1Xog+WHsM3cBetLkwvM)d{fF!aZJF%0#TGO}KH7p0+kdeE&1g^CfOE7bHsA>D zJ#4@Z=wb(SvCezV--+v;xL)tQk+cEq2W`MEr0s(KHh91C_uzUD@@@3q)cN$ih4mrd zeMq|xX}5a+;NOqy`*FR=`?bEO>3bFH!`$ABw7sZrv-i*bK3wlZzG|%;SOhL0!W zw0E=x;_Jt7vo%gzXU3*APOh7=X^oTHW-Ly`$z?MZt#NYM^u0CCu{3>ejdLtbUr)q2 zcC;}z-)uQ;nf};v(8hcmOGl}{2%kZC4%BIXcXtT)p&Z1EzX7M4@mIj}q07GK zMz(q<{A?3F}Wx(LR(=FXnjDeALTxr{q56Lfj$Bg|K-6 z(s=fibAji4*)RA|ifA87CXe=^wD?4-{i1wirw9Rv& zB45Y0ZMk^XSC2Jh*M3ogwr%RZJzhl;|e`%A4+Fv+is6O zQFMF3dEZZC9Lj&-sv7TYzYN#Qf9$Heyp!5bf#^$opj5u#MtrZ$FGs%0=kVf3@3i)N zV)!sO!ZW@VNL%?cSH(OBzi1l#{A&0gsP$aD!?t~lc(!*r@~!?GSJincN}k+|^XGSa znNgnWP1c8Vy(^IK=9gVn?{&s|ad#YD1kCfrv|Y48GVZiL^f5pV)0WZxEJj}PmiB82 zuE|gGn|dvWuL1ITDdO3G@|NSY9Ou!=*P9T}vEaDSzSw-RWw6`)sM@0)`BSz>TQL+I zJd+QlBF>Az4}g=SZAByfQ>?{V^6g%iXts$vSYpDaN{rju*Il*4yC_P(0GyLaA4(>V z_Mx;Mytv1^Br5*as!(qI-??h1*ELFiYm}#>52Z?+=O;I5E0_)+O2$^8Uf6^Ev{U3I zIZC+@cYtys)V>B*Lg(ZxZ3X)UA4^bg>swz z#Z`N}EWGntII}m>(T`F!IP)2=XOw=FR-;VyZ(Vht7tyvw##H<$-3Hk{>-CAQLB8Am z(^dC-+1j=kKJJZ(AEm93=K-&O^k(GS`roeF>*Yl0M@j0VU7?K>TSdDYZo^-WMJ2W2r z*3T@(=DCFThHcR0gI-=V+r%9%G2xM0FlO8S*H!zxK~X-}OhH+Bt|>Qpv>&A!==dRT zNOUdo*Szbh25+dgO|$_?KT60CJE8q3ZGx`IQMQM;Lu?OWGw(&hQ~D~QtR4L*RYRZT zC~Xwe;YUf^D9VF64%0pgZ>!O_+y2Y&ls1X-!;eyL?MLZD%0vGo+E&A+X+KIqd*qAA zWVcN_q{kZaX+KIIvOW4GVSDo2BK*{&{m{#jL3b?$vW_o<694o zkvltngJ&ItJO3Te?Fe^)OSD0IV1trW*Q?as$Z z9@?T(@LN9<%`x9UY{b~rUl>-mBHVFlSUHH--$9u4R6k14D{azz+JNBsDcj^fMBY)a z;zwx{>f3pFSo=|;4M_S?LVkW1!FwI;7Vl-8CnbpI_XXu9J=KpAbWGbs*=;{c)G2L^ z_)%gy{3sb8Z>B!&M~U}A&N13na+rJ)KT7Ni{3sdwbQak-Ci=UKv&cp}#XiE1lJ@t; zF+u)=wtV!XBy!MBX+1zz?MLY>?Vf8)Xs_6=JUemh-mhKSD?NsRz1ocN*mY%Ci65nE z;61&hbUw0tBVOUsg zL%4rLSlx;6kx}8KA0@+!ZrTsr2l1mMWA;J(C?)Ie;75t~OY)uenS7`HB~NL`q<-2l z+B=So<_c^PZ5ihZ$A@!;<3o;fJUEUVkB`!iQb(?yP413z{;6c7ZTq;(NPAa<@0$Pb zJwgBXo}g;*;N$!*B=fBB=l#AXnU5Xc5%j0@qZGk!!6G=*NS`D0qtqL?w_x}%>I2+I zFnk!{i-6;O1;d9?HgL9J_%P}R+)prk81)D4FBm?I1^^ELPWsIWh4ee+yNh2N$Z!Sn zTp<`T^Z@Q57&2T5e5GK>kOiD27&2T1e3f9x&=a_)V93x5xEHXTH=+-t?ubi=407Hm zoifPzGx|}=!8qiIz6ON!`?qri!-r8G((?qvhtWXbfr8<~Xb|uq!SG=;7j7+no~HE@y-kU?`&^aU9-Cj~h>h1w#hSNx_goa}qeo zhcwENDFF0Y|DL+hb$h>>IKycW+dwaOx z4D;^oLcyKPySGOGC(md@2Ak8TAVV%VC!9V78Ej6Uf($mNPeBHo)2ASV&FNE+!RGWS z$RM0fp3zRD49PRvX_Ud7`+qy|qjWOxqtq7oQ92#?QF5a5^>6fDdtY9=yMeCD8bO@XyDO;q0ceEV+2E=V}ZvChCasu zj{~+jH575eYjO%do;)tRCa3a&h1cZNFks;|IaL5Gye6lH0}HRosX}1kH90i`n7p<* zl`a`SpC!O0f}zhz zz>@?+pOb+n3x+FWkyr{D1lR@&~d5g9QwQ-_ zxq_k3dBF34ZBE&|CZ}v(lT$XY$tj!HVK6E@^bPjzk0A3&%`dkRSP%!kl z2zZfT=yNgfV!_bo65u6*q0gnjO9expHv!)SY;(%yH92MTnw+wEO-|XoCZ}v(lT$XY z$tj!HwWd^a%n4Aew} z=|_os25OSQ^rOT*19hFj^rOT*12xrP`cdMZftudFZ$bY--4WL|*yN!fCGKOjU2QP^ zC~+UF?HYsWM~VAbZNm(vA0_T%wGB6zew4V6)i%Ol`cdLOR@+E}=|_qCSZ$-){pO@o z2HCerrwp=>ML$Y87#lUyj1~PTam}whgXu?!YkqaT!StiVHNU#SVER$wnqS>$F#RZT z&9CMeOg~Co^Q#30(~lC@{Ay7gCnlWpLn>|>=<2H`_GW%#J}v1)K;dML(LEiwAO z3z+MDb(6t9FxUI)QwB$Yx!zZ02FHN8-d7a{*8+3BuT~gb2h8=psxtU)V6OMos&=0@ zH8?YEbNVh|o6|n9&FLtx&FL7h&FNZTo6~i`HmC0f7Ea3wR^L!Q>Rz`)ZxRDX#Zbt-<6J*Zb;jgUKnb_tkcT$tkY))jg@4O5rthp2BP36kY?T@ESOU*T5;f z22SBMu<+XERJvraIh8IMgj0XeK9!Q>Rz`|2|WlT%#ptIrxt zPI0}j9x#}k;(A{_XfQd&^}c$@U~-DFBwcualNm;Y%n>+^}c${U~-Da0;)1Q+N%W!fW6ZUIVA_ z8d!L3b1Gdj*qln248o~DXdlbs)O_mraieo`Y60=r3?`?zkEOnDFgeA2EcF)#lT+Ns zQr|R~oZ>!~`b&e!DehybZy8KZaUV;4JC#!@yoSzGcnzGwYv2@K1E=sBIEB~1DZB<2 zUfY~Xmkc(i(j|lVPm)mkMR*eDmxJ@MNk2+YKHAr*G=4kyn@Ii!i&*>YF!4v|59coU z@2Ro0 zEFY@B@ZX1)n=;J|5qFsKNO?JL-}rcN-u^80J3)QfL0zm@&f4#PJn*}5hWf?ti=4A> zgnZiP##!1a3EH8o=w~zAku&ymSTCO=Bd&)*tK{-hrz%8YaL9sM*h|9ku>q3(0~Q9^y^ znJ?neo8Q~Qvx>QDMn3hK=c>PqpWgt@8x$eg8q@jE1(ZicLy z(`RU>Bxr}SqK~P5l+cdm^EuiPJ|96lGM{X|YmU!woTM)$)O$`}N|gBn`cjI(w?hPe zANnBdgD~k!i8c(}&PIGT;`<})4<8UY2y=iV-spCJ5@?Tk^N^N@wBFuWe-N$*;ku7E zzTKY$>%{NNhahbT>gwxF@Q31hDDq`{CGEbHSReA`BP}0k{k+NTzLZ`?+8$$*3y@xb z^#0xypK;nQo3tZ^NGn7e1H5U*m(r{7rDXCvRfM!6l*{pE_{F#`#&xbYE9oy~|EsVY z^lOLww8!0%r#pNo^+4EzzLXG8`&(|(o3jv~h4`Kbd%j@#Qi80VFfUn`8Ryn`cW+SA zZ_0km6YV#})OXnEpf!GlH^evNjyAoaNxvzLn7f+uXOYVZz**Wo*LWmtJlfTB;vDVDoM@u`f?XetaUBgE<$0z4SX_^VUI%)!lYUh8 z(>}pJ%6O!WN4`Pc9POvXaP~y}N-w|NmlDSuc@2+S<5^yTZ+P1pzsf6Y_oak7=}XD* z7`laDx%c=|VqM5%_@k{4m^YjqCY=eER;w`jF3X z_e7lYL(T&l&bG!mPk3g}=7-(Sv-QjF$JyjH^X2Z@%%R^c(n6GHHhpV>~1?|}~*=>t0Xoh^+m(qu9 zPi#Ol+S4}R9PNn>I6`|58!!#JmQiSRdy0T%^rKeIvY;{(M}|N4_F& zb^E=K^&#Ivq%B0+NbhEUF|HTmy4YLWe(z&_;OSDNEk%8!y!F~f87{WQY1?QoG>=(l z;7iHy@kE^Vj7KxY3UYIOfS`8*C5PxN*5=V#1cqYvUADyCACX-FRATvbbx}N zCTG{tr)S4n5{Z4dj^RUpWwE0?XDa@fZ2wEPpC#MB(%Jm@@C^K!{Pvv9PtV!>@^tjW z!?T{^8_D*CWcxhg8FcY;^r!S^Lk@qOxK1*PZ!&t3+p+P{}$GBCVwsT3q`*O;(y2Xx5IH3zdE*08*)>8`ur*V ztB{+Y6j{mBPl~Lwbo|KcH-b50SSexW=C1eumupE;qQ5 zA@D)!-wEemBGK zXi)EAEl-e#^&svL%cD#)L*~))1bIq=Jmo8%)+&ftB zcPz{AZ92w@e9h0AcKKb2$k+6&sgG$QUsKSJX7oekYYO_o?+rx0<{*xH{34&7Pvo=X zL_RxC>lEK7XpSM&95c9y-*o^Rj^SE!3?at;fP6o}5ZG`m$m0ZgSRQ2>88VO7X^^KR z$WtEVVaPmM9_;06jv>?>Gx!MippzWKHR^8axKCV;c+_J!7UXeE+#!}nnP!H}qvZ+m zlmvOogFFnGCxv6L1{}lwnCOuE`kE6+6CH97UULk(79DawS?5EV=&&(}>oy+@Icdy8zh8EGOP_X8;(_xVIV?se&WNE7+Euf{m;mx+9KK9SFk z6Z!18{)&4XXWxs6TCPEs|h0I{6rrB$y+`~(h%dUuRkOVVx}IXxbA$Iqe=y>UH@Iq!*Qo0mje2hM9`%PH ze)MKn&GimyK9WZ{@VQ9NbH0%0)hn(l!5ze!oA*hF(=R6Y5 zv!0=-hhxEcBxAvP@=*`Rf^$m7g7p-j9*za)l#B(r`ALzLJpH7|Dx9TF91HHu^Sek|(d zT;W`lxiT8@>@UZMZB0g7ta}RTW?j?Pz!oE%u@3hhgtJgLxnBzIGym)$|C}KIT;%7R z<~WnvpJe&T_fN9?G6xEvk14xxKSnt1v##-<8SpMM;3qjCzEBI1Z^nbJ8i8=u!$E%V zvk18K^SDnVoE>*nF~T`t#{C)L+^+`t!1K|-d|$*EgnT!|ScH7f!#IR|Z$LOhWqwq9 z5xh@u?()9Cxk_#(_aXxBaz4=KD)%CIpWxi3E#Mr}`N3V<0FDvoJjaN0oMXVb$T86Q zFb6o690QIe$ADwSG2mEn3_ePqnt!f+~vwStsHeYpnex z2(f-TP;PJ|LtwK`4DvWZp6nnGL+0T)mIQeio~a%!58jWd*NF)AI?>=G*#?`v2wdxR zB0|)YeBY+LTT|~v1bLhw56kOyqQPdJ800Al@~}MfFhsqFQ`U)Nf_37U_H`KFgR0km zNRxFK-(jlPiG$nMVSJCK&WAKvhw*)&jN^MgWgTYclXaLKC+jdfF1b#e8LSg$w#&!& zTCxqkuTtdWyD9ZL5&cN66N7&6U63Ll-|xt>d~c)3$M+`c`Vc4b*>NJD9jA4QcSP#F z2!xtr1~>8jmB5B$xW?G;@A%G1zI&4V(K-$CI3_On{!7-wJep%cp0m}X<-t4pG{+EX zjv0J}?-m3$9K$u_Y3lg?zjDNr8(N+qk7MGJ?-n6pk$kIJTr+hkRcn z$DHqm6dm&2ikf54wdj!V3Do(JCOYIh2^q(C4T=u!e4;};PIPF;B{{Yx;MkgW`S{K_ z%?YH5e0(RKE5xFaYqm*^4a-BK08k2v*Yx$1NKb&Uplbil8cld zyX0W4yGZ$rPbk&(bRv3#d#Uon&d`^0@I--=sV}>oT*vLCX5sYp%L8@X(3jobjx#W` zOKGj@JnND!4K4qY7PO_2PA#r$oi}kkcAR(2IoMKyvCJ8JwU@xP za{*c@NyW#U$69i8a$?6~CSK&Ib4Cw8XFI<`JEO;Vwa$^27=Di*bH3A(aK76Dkz?m9 zOBqqdMVW9SygQLdgvKSt?MB(t;c?^k#GI#4-}mro!RIJG$MAU?pYP-IgO+O(*P@K$ zOh`;XnM4A351Mr*#3t-%ALrbLyxhFl#d%+;&1=lN7bCul`B`paVxlgWcYjWO&V&hj z5N^+zIB^f-gLKq)zf+PZ(fLXmOZN1}7{)MOIgleak*o7fPE6M6lXEBU+1K@{mRjeB z$ny+7Kf+l2_=AlF#}Q?^pv;Ad3sG+(G(9mL^*X07oIV|6@e|bdEIvQQ=Q(_y$L9rn z{u-a3waiS+)UwP<%+j&|Lw4oNip_f8v0#4o)k}D~oF0o=v&>j{-X6vW>AJqsM5)eK z+E}`$J~B@D1 zdODo-4-B6PXZ@Ptv*E0NX81xli{DSGUxc%Q?4z?KOy|rm&49KBmB45!_L1T{Exrmxl`_O z^f>-7`ZskF-m85oaX|udC(yqH-qozyY@FvWn2NDMAG+qwR;R=HQGE^qee8O}@1tE8 zyp7Mv=sQS%_u1A?ZMkn>b}IMeWv@Wiwr_^BUSs&JaMl|PpA2XHisARz2ZX^Gv;||( z7K}j~#^4pm`pRF1omUaQ_U*9q8p1ce8+Kkt_^YE~=M97$gTF<{G58fij=|p{CU4C6PRA~$^reuO6xm`67wEQp3`%J>nSs8 zYWinn9_{5kdNkzx>f6H$_(m){PBoC=2Fkdw&}zQQ{(gpcsf4B;F60*3Gj?lOcg z@GwL80Pn)^vXFBj!^=ZX{J(ZW9%IKcoJU(SkD2dEu(UvY{Qo{5<1xs@t%861yO{7o z6TqLA`O65xoFE~{rk}Z(^w#Ob28M4235o!JN-8oi`F|p=GQhA=%o7T|d&$L42!BBM z-~x|z_zQ>nx|e432~8k~dC$!~v3x@L(wm<7-djI=bH}jN8`f0~TT!uT^_GhDtAZWZ?Jw^x>}9A50?58J%DqOxk! z=3$ek4KFU6cKwV=Wz!Z-DVtJUR5pD6^s=HwWg`mLZ7nJ*99g`6>+rHoTkj~_yk`9^ zWh*K+mo48^S$5m#{LM}&VyZTc&7O4qlXflXh$t)`K77oG!jYrK6pkz!RlIUc z#i%i3R;;Kht{h#pqO!7LdC|(M!5p%)|M6MbaXUM)d_B;~j6~hSog6scO8J`(bQvtf zu|S_ClA6DMLv@vtUwOy+&3CMG@>i|jlE1oQ^J*vf&-m(1PX4A1l@-+$PJVUO)@q%* zV%?e*h^|_@X+zn@ipol85AjvYx2!5#yJkIWOkS7i8Zd_Is@9`0+NrKsj&L>N&6NqA z^VVdUH+AmpsikGp7Rs=8NaSUJS`yOr2L6s-ku1`ckR$#2M8Yu z?;R6XE#*6sv}5@tLHPm}vi?TkIe>rlRva&-yo76lxQ^w&9h5(Cj`Ew3F1oX`E)F_g x9z>F-%qQa~{N08)8M`2>4vqxnv1m*A6S*al{X0|nDR} float: Definition matches op_tests/test_mha_mxfp8.py: nrms = sqrt(sum((|a-b| / max(|b|, eps))^2)) / (sqrt(numel) * max(|a|.max, |b|.max, eps)) - A small relative metric (~1e-3 for bf16, ~1e-6 for fp32) regardless of - output magnitude — useful complement to the absolute max-diff check. + + eps must be chosen above the dtype's effective resolution; otherwise the + `1 / max(|b|, eps)` term blows up for the (legitimately) near-zero + elements common in softmax outputs, producing huge nrms values that have + nothing to do with the kernel actually being wrong. For bf16 (relative + precision ~3.9e-3) we use eps=1e-3: this lets ~0 outputs contribute at + most a per-element relative error of `|a-b| / 1e-3`, which for valid + bf16-precision kernel output is well under 1 (consistent with the + overall metric being a small ~1e-3 number on PASSing kernels). """ a32 = actual.detach().float().cpu() b32 = expected.detach().float().cpu() abs_diff = (a32 - b32).abs() - eps = 1e-7 + eps = 1e-3 max_item = max(a32.abs().max().item(), b32.abs().max().item(), eps) sq_diff = (abs_diff / b32.abs().clamp(min=eps)).pow(2) return (sq_diff.sum().sqrt() / (math.sqrt(b32.numel()) * max_item)).item() @@ -311,7 +318,11 @@ def run_ref(q, k, v, *, is_causal: bool, sink: Optional[torch.Tensor] = None): # --------------------------------------------------------------------------- -@pytest.mark.parametrize("is_causal", [False, True]) +# Only causal kernels are shipped on gfx1250 (CSV registers only `mask=1` +# entries — the nocausal `_brd_v8` binaries were removed). is_causal is kept +# as a parameter so the kernel-call sites still receive the (now always-True) +# flag explicitly; if a nocausal binary is re-added, just add `False` back. +@pytest.mark.parametrize("is_causal", [True]) @pytest.mark.parametrize( "head_dim,hq,hk,sq,sk,batch", [ @@ -430,7 +441,12 @@ def test_fmha_fwd_f16_correctness(head_dim, hq, hk, sq, sk, batch, is_causal): def test_fmha_fwd_f16_ops_layer(): - """Direct ops-layer call: bshd qkv (sbhd memory layout), D64 + non-zero sink.""" + """Direct ops-layer call: bshd qkv (sbhd memory layout), D64 + non-zero sink. + + Uses is_causal=True because only causal binaries are registered in the + CSV (mask=1 rows). The test purpose is to exercise the low-level ops + entry point with a D64+sink call; causal vs nocausal is orthogonal here. + """ if get_gfx() not in ["gfx1250"]: return device = "cuda" @@ -456,11 +472,11 @@ def test_fmha_fwd_f16_ops_layer(): k, v, scale=scale, - is_causal=False, + is_causal=True, sink=sink, via="ops", ) - out_ref, lse_ref = run_ref(q, k, v, is_causal=False, sink=sink) + out_ref, lse_ref = run_ref(q, k, v, is_causal=True, sink=sink) _cmp(out_kernel, out_ref, rtol=1e-2, atol=1e-2) _cmp(lse_asm, lse_ref, rtol=1e-2, atol=1e-2) @@ -488,8 +504,10 @@ def test_fmha_fwd_f16_d64_requires_sink(): device=device, ) scale = 1.0 / math.sqrt(64) + # is_causal=True because only causal kernels are registered in the CSV; + # the error path being tested (D64 + sink=None) is orthogonal to causal. with pytest.raises(RuntimeError, match="D64.*sink"): - aiter.fmha_fwd_f16_asm(q, k, v, scale, False, True, sink=None) + aiter.fmha_fwd_f16_asm(q, k, v, scale, True, True, sink=None) # --------------------------------------------------------------------------- @@ -522,16 +540,19 @@ def test_fmha_fwd_f16_layout(layout, head_dim): scale = 1.0 / math.sqrt(head_dim) sink = _d64_sink(hq, device) if head_dim == 64 else None + # is_causal=True: only causal kernels are registered in the CSV. The + # layout test purpose (verify non-contiguous bshd views work) is + # orthogonal to causal masking, so causal=True is a fine choice here. out_kernel, lse_asm = run_kernel( q, k, v, scale=scale, - is_causal=False, + is_causal=True, sink=sink, via="public", ) - out_ref, lse_ref = run_ref(q, k, v, is_causal=False, sink=sink) + out_ref, lse_ref = run_ref(q, k, v, is_causal=True, sink=sink) _cmp( out_kernel, @@ -557,7 +578,8 @@ def test_fmha_fwd_f16_layout(layout, head_dim): @pytest.mark.parametrize("head_dim", [64, 128]) -@pytest.mark.parametrize("is_causal", [False, True]) +# Only causal kernels are shipped (see test_fmha_fwd_f16_correctness comment). +@pytest.mark.parametrize("is_causal", [True]) def test_fmha_fwd_f16_via_flash_attn_func(head_dim, is_causal): if get_gfx() not in ["gfx1250"]: return @@ -618,9 +640,45 @@ def test_fmha_fwd_f16_via_flash_attn_func(head_dim, is_causal): # --------------------------------------------------------------------------- +# Initialization patterns for perf q/k/v buffers. +# "randn" : standard normal (default; exercises real attention math). +# "const0.25" : fill every element with 0.25 — matches poc_kl +# fmha_batch_init `init_pattern=10` used in cpp perf runs. +# Useful when comparing pytest perf numbers to a cpp baseline +# that was produced with constant-fill inputs (rules out any +# perf swings caused by data-dependent kernel behavior, e.g. +# denormal handling or softmax-saturation paths). +_PERF_INITS = ["randn", "const0.25"] + + +def _make_qkv_perf(init: str, *, layout, sq, sk, batch, hq, hk, d, dtype, device): + """Allocate (q, k, v) in `layout` memory with bshd-shaped views, using the + requested perf-init pattern. See `make_qkv_bshd` for layout semantics.""" + if init == "randn": + return make_qkv_bshd( + layout=layout, sq=sq, sk=sk, batch=batch, hq=hq, hk=hk, d=d, + dtype=dtype, device=device, + ) + if init == "const0.25": + # Use randn-allocated bshd-shaped views (so .stride() reflects the + # requested layout's memory), then fill in-place with 0.25. In-place + # `.fill_()` is layout-agnostic so this works for non-contiguous views. + q, k, v = make_qkv_bshd( + layout=layout, sq=sq, sk=sk, batch=batch, hq=hq, hk=hk, d=d, + dtype=dtype, device=device, + ) + q.fill_(0.25) + k.fill_(0.25) + v.fill_(0.25) + return q, k, v + raise ValueError(f"unknown perf init pattern: {init!r}") + + +@pytest.mark.parametrize("init", _PERF_INITS) @pytest.mark.parametrize("head_dim", [64, 128]) -@pytest.mark.parametrize("is_causal", [False, True]) -def test_fmha_fwd_f16_perf(head_dim, is_causal): +# Only causal kernels are shipped (see test_fmha_fwd_f16_correctness comment). +@pytest.mark.parametrize("is_causal", [True]) +def test_fmha_fwd_f16_perf(head_dim, is_causal, init): if get_gfx() not in ["gfx1250"]: return device = "cuda" @@ -634,7 +692,8 @@ def test_fmha_fwd_f16_perf(head_dim, is_causal): sq, batch, hq, hk, sk = 8192, 2, 64, 8, 8192 else: # head_dim == 128 sq, batch, hq, hk, sk = 4096, 2, 64, 4, 4096 - q, k, v = make_qkv_bshd( + q, k, v = _make_qkv_perf( + init, layout=2, sq=sq, sk=sk, @@ -657,14 +716,17 @@ def test_fmha_fwd_f16_perf(head_dim, is_causal): is_causal, False, sink=sink, - num_iters=10, - num_warmup=2, + num_iters=20, + num_warmup=10, ) flops = 2.0 * batch * hq * sq * sk * (2 * head_dim) if is_causal: flops /= 2.0 tflops = flops / (us * 1e-6) / 1e12 - print(f"[perf] d={head_dim} causal={is_causal}: {us:.1f}us, {tflops:.2f} TFLOPS") + print( + f"[perf] d={head_dim} causal={is_causal} init={init}: " + f"{us:.1f}us, {tflops:.2f} TFLOPS" + ) # Sanity: catch silent-PASS when timing infrastructure breaks (e.g. profiler # / ROCTracer drops events → us=0, TFLOPS=inf). Without these asserts the # test would PASS with bogus numbers. From f0d942e27512800e4e6ebe83cb263e72358e00f6 Mon Sep 17 00:00:00 2001 From: HaonanWang98 Date: Sun, 24 May 2026 10:10:44 +0000 Subject: [PATCH 24/43] reformat --- op_tests/test_fmha_fwd_f16_asm.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/op_tests/test_fmha_fwd_f16_asm.py b/op_tests/test_fmha_fwd_f16_asm.py index 451d6da75b..94c320e942 100644 --- a/op_tests/test_fmha_fwd_f16_asm.py +++ b/op_tests/test_fmha_fwd_f16_asm.py @@ -656,16 +656,30 @@ def _make_qkv_perf(init: str, *, layout, sq, sk, batch, hq, hk, d, dtype, device requested perf-init pattern. See `make_qkv_bshd` for layout semantics.""" if init == "randn": return make_qkv_bshd( - layout=layout, sq=sq, sk=sk, batch=batch, hq=hq, hk=hk, d=d, - dtype=dtype, device=device, + layout=layout, + sq=sq, + sk=sk, + batch=batch, + hq=hq, + hk=hk, + d=d, + dtype=dtype, + device=device, ) if init == "const0.25": # Use randn-allocated bshd-shaped views (so .stride() reflects the # requested layout's memory), then fill in-place with 0.25. In-place # `.fill_()` is layout-agnostic so this works for non-contiguous views. q, k, v = make_qkv_bshd( - layout=layout, sq=sq, sk=sk, batch=batch, hq=hq, hk=hk, d=d, - dtype=dtype, device=device, + layout=layout, + sq=sq, + sk=sk, + batch=batch, + hq=hq, + hk=hk, + d=d, + dtype=dtype, + device=device, ) q.fill_(0.25) k.fill_(0.25) From 5775c00be990a666b4c5b094e1689825ac637486 Mon Sep 17 00:00:00 2001 From: tingchen Date: Mon, 25 May 2026 17:09:23 +0800 Subject: [PATCH 25/43] rename api --- aiter/jit/optCompilerConfig.json | 4 +- aiter/ops/mha.py | 26 +++---- ...a_fwd_f16.cu => asm_fmha_fwd_with_sink.cu} | 78 +++++++++---------- op_tests/test_fmha_fwd_f16_asm.py | 22 +++--- 4 files changed, 65 insertions(+), 65 deletions(-) rename csrc/py_itfs_cu/{asm_fmha_fwd_f16.cu => asm_fmha_fwd_with_sink.cu} (81%) diff --git a/aiter/jit/optCompilerConfig.json b/aiter/jit/optCompilerConfig.json index 80b9729074..4f52adc4bc 100644 --- a/aiter/jit/optCompilerConfig.json +++ b/aiter/jit/optCompilerConfig.json @@ -1106,9 +1106,9 @@ "verbose": "False", "blob_gen_cmd": "''" }, - "module_fmha_fwd_f16_asm": { + "module_fmha_fwd_with_sink_asm": { "srcs": [ - "f'{AITER_CSRC_DIR}/py_itfs_cu/asm_fmha_fwd_f16.cu'" + "f'{AITER_CSRC_DIR}/py_itfs_cu/asm_fmha_fwd_with_sink.cu'" ], "flags_extra_cc": [ "'-DENABLE_CK=0'" diff --git a/aiter/ops/mha.py b/aiter/ops/mha.py index 45993e6620..ad0c1a29c7 100644 --- a/aiter/ops/mha.py +++ b/aiter/ops/mha.py @@ -271,7 +271,7 @@ def fmha_v3_fwd( # --------------------------------------------------------------------------- -# fmha_fwd_f16 (BF16 ASM, gfx1250) — single-shot batched FMHA forward. +# fmha_fwd_with_sink_asm (gfx1250) — single-shot batched FMHA forward. # # API contract: q/k/v are **bshd shape** ([batch, seq, head, dim]); strides are # read directly from the tensor so non-contiguous bshd-shaped views (e.g. of @@ -282,15 +282,15 @@ def fmha_v3_fwd( # Memory-allocation policy: all GPU tensors (out, lse, sink) are allocated on # the Python side; the C++ entry point performs only pointer + stride # bookkeeping and kernel launch (no torch dependency). The public wrapper -# `fmha_fwd_f16_asm` below handles allocation and the AITER-post-scale → +# `fmha_fwd_with_sink_asm` below handles allocation and the AITER-post-scale → # kernel-pre-scale conversion for sink (multiply by sqrt(qk_head_dim)). # --------------------------------------------------------------------------- @compile_ops( - "module_fmha_fwd_f16_asm", - fc_name="fmha_fwd_f16_asm", + "module_fmha_fwd_with_sink_asm", + fc_name="fmha_fwd_with_sink_asm", ffi_type="ctypes", ) -def _fmha_fwd_f16_asm( +def _fmha_fwd_with_sink_asm( q: Tensor, k: Tensor, v: Tensor, @@ -303,7 +303,7 @@ def _fmha_fwd_f16_asm( ) -> None: ... -def fmha_fwd_f16_asm( +def fmha_fwd_with_sink_asm( q: Tensor, k: Tensor, v: Tensor, @@ -348,7 +348,7 @@ def fmha_fwd_f16_asm( sink_for_kernel = (sink * (qk_head_dim**0.5)).to(torch.float32).contiguous() elif qk_head_dim == 64: raise RuntimeError( - "fmha_fwd_f16_asm: D64 kernels require an explicit `sink` tensor " + "fmha_fwd_with_sink_asm: D64 kernels require an explicit `sink` tensor " f"of shape [q_head_num]={q_head_num} fp32 (AITER post-scale " "convention). Pass `sink=torch.zeros(q_head_num, dtype=torch.float32)` " "if you want a zero-logit sink." @@ -357,7 +357,7 @@ def fmha_fwd_f16_asm( # D128: kernel never reads sink contents but slot must be non-null. sink_for_kernel = torch.zeros(q_head_num, dtype=torch.float32, device=q.device) - _fmha_fwd_f16_asm( + _fmha_fwd_with_sink_asm( q, k, v, @@ -1425,8 +1425,8 @@ def can_impl_fmha_v3_fwd(): ret = ret and ((gqa_ratio & (gqa_ratio - 1)) == 0) return ret - def can_impl_fmha_fwd_f16(): - # gfx1250 ASM bf16 forward (fmha_fwd_f16_asm). Single-shot batched + def can_impl_fmha_fwd_with_sink_asm(): + # gfx1250 ASM bf16 forward (fmha_fwd_with_sink_asm). Single-shot batched # (no varlen / dropout / swa / quant / alibi / bias). Sink logits # (per-Q-head fp32) supported; sink-token (sink_size) not supported. ret = get_gfx() == "gfx1250" @@ -1459,11 +1459,11 @@ def _validate_cu(name: str, x: Optional[torch.Tensor]): _validate_cu("cu_seqlens_q", cu_seqlens_q) _validate_cu("cu_seqlens_kv", cu_seqlens_kv) - if can_impl_fmha_fwd_f16(): + if can_impl_fmha_fwd_with_sink_asm(): # gfx1250 ASM bf16 path: q/k/v are bshd; kernel reads strides directly, # no API-side permute. softmax_scale is forwarded as-is (kernel applies # it internally to Q·K^T). sink_ptr is in AITER post-scale convention; - # the public `fmha_fwd_f16_asm` wrapper multiplies it by + # the public `fmha_fwd_with_sink_asm` wrapper multiplies it by # sqrt(qk_head_dim) and auto-fills the D64 zero-sink case internally, # so we just forward the user's sink_ptr here. sink_for_kernel = sink_ptr @@ -1471,7 +1471,7 @@ def _validate_cu(name: str, x: Optional[torch.Tensor]): # D64 kernels always read SINK; pass an explicit zero-logit so the # wrapper does not raise on us. sink_for_kernel = torch.zeros(nhead_q, dtype=torch.float32, device=q.device) - out_, softmax_lse = fmha_fwd_f16_asm( + out_, softmax_lse = fmha_fwd_with_sink_asm( q, k, v, diff --git a/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu b/csrc/py_itfs_cu/asm_fmha_fwd_with_sink.cu similarity index 81% rename from csrc/py_itfs_cu/asm_fmha_fwd_f16.cu rename to csrc/py_itfs_cu/asm_fmha_fwd_with_sink.cu index 76ef048ac3..a8d725cd8b 100644 --- a/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu +++ b/csrc/py_itfs_cu/asm_fmha_fwd_with_sink.cu @@ -1,7 +1,7 @@ // SPDX-License-Identifier: MIT // Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. // -// ASM FMHA forward (BF16, gfx1250) — ported from poc_kl fmha_fwd_f16. +// ASM FMHA forward (BF16, gfx1250). // // Layout: q/k/v expected in **bshd shape** ([batch, seq, head, dim]). The // kernel reads per-dim strides directly from the input tensor, so callers may @@ -29,12 +29,11 @@ #include #include -// Kernel argument block — packed ABI (132 B = 0x84), matches -// FmhaFwdKernelArgsBase in poc_kl/mi400/fmha_fwd_f16/fmha_fwd_f16.cpp -// and the .args YAML emitted into the v8 .s patched HSA metadata. +// Kernel argument block — packed ABI (132 B = 0x84), matches the .args YAML +// emitted into the v8 .s patched HSA metadata. // -// Field names mirror poc_kl: short names (d_addr / q_seqs / k_hs / ...) -// rather than the older 528-B slot-padded layout we used pre-v8. +// Field naming uses short forms (d_addr / q_seqs / k_hs / ...) rather than +// the older 528-B slot-padded layout we used pre-v8. // // d = output O // q/k/v_seqs = stride along seq dim (bytes) @@ -81,7 +80,7 @@ struct KernelArgs }; #pragma pack(pop) static_assert(sizeof(KernelArgs) == 0x84, - "fmha_fwd_f16_asm: KernelArgs must be 132B packed (matches v8 .args)"); + "fmha_fwd_with_sink_asm: KernelArgs must be 132B packed (matches v8 .args)"); // ---- helpers --------------------------------------------------------------- @@ -107,7 +106,7 @@ static std::string get_heuristic_kernel_fmha_fwd_f16(const std::string& dtype, return el.first; } AITER_CHECK(false, - "fmha_fwd_f16_asm: no kernel for dtype=", dtype, + "fmha_fwd_with_sink_asm: no kernel for dtype=", dtype, " hdim_q=", hdim_q, " hdim_v=", hdim_v, " mask=", mask_flag, " arch=", arch_id); @@ -135,7 +134,7 @@ AITER_CTYPES_ERROR_DEF // slot must still be a valid non-null pointer of the right size, but // contents are ignored — pass a zero buffer. AITER_CTYPES_DEFINE_ENTRYPOINT_VOID( - fmha_fwd_f16_asm, + fmha_fwd_with_sink_asm, (aiter_tensor_t* q, aiter_tensor_t* k, aiter_tensor_t* v, @@ -151,25 +150,25 @@ AITER_CTYPES_DEFINE_ENTRYPOINT_VOID( // ---- arch + dtype validation ------------------------------------------ const std::string arch_id = get_gpu_arch(); AITER_CHECK(arch_id == "gfx1250", - "fmha_fwd_f16_asm: only supported on gfx1250, got ", arch_id); + "fmha_fwd_with_sink_asm: only supported on gfx1250, got ", arch_id); AITER_CHECK(q && k && v && out && lse && sink, - "fmha_fwd_f16_asm: q/k/v/out/lse/sink must all be non-null"); + "fmha_fwd_with_sink_asm: q/k/v/out/lse/sink must all be non-null"); AITER_CHECK(q->dtype() == AITER_DTYPE_bf16 && k->dtype() == AITER_DTYPE_bf16 && v->dtype() == AITER_DTYPE_bf16, - "fmha_fwd_f16_asm: q/k/v must be bf16"); + "fmha_fwd_with_sink_asm: q/k/v must be bf16"); AITER_CHECK(out->dtype() == AITER_DTYPE_bf16, - "fmha_fwd_f16_asm: out must be bf16"); + "fmha_fwd_with_sink_asm: out must be bf16"); AITER_CHECK(lse->dtype() == AITER_DTYPE_fp32, - "fmha_fwd_f16_asm: lse must be fp32"); + "fmha_fwd_with_sink_asm: lse must be fp32"); AITER_CHECK(sink->dtype() == AITER_DTYPE_fp32, - "fmha_fwd_f16_asm: sink must be fp32"); + "fmha_fwd_with_sink_asm: sink must be fp32"); AITER_CHECK(q->dim() == 4 && k->dim() == 4 && v->dim() == 4, - "fmha_fwd_f16_asm: q/k/v must be 4-D tensors (bshd shape)"); + "fmha_fwd_with_sink_asm: q/k/v must be 4-D tensors (bshd shape)"); AITER_CHECK(q->stride(-1) == 1 && k->stride(-1) == 1 && v->stride(-1) == 1, - "fmha_fwd_f16_asm: q/k/v must have contiguous last dim"); + "fmha_fwd_with_sink_asm: q/k/v must have contiguous last dim"); // ---- dimension extraction (bshd) --------------------------------------- const int batch = (int)q->size(0); @@ -181,39 +180,42 @@ AITER_CTYPES_DEFINE_ENTRYPOINT_VOID( const int kv_head_num = (int)k->size(2); const int v_head_dim = (int)v->size(3); - AITER_CHECK((int)k->size(0) == batch, "fmha_fwd_f16_asm: k batch mismatch"); - AITER_CHECK((int)v->size(0) == batch, "fmha_fwd_f16_asm: v batch mismatch"); - AITER_CHECK((int)k->size(3) == qk_head_dim, "fmha_fwd_f16_asm: k head_dim mismatch"); - AITER_CHECK((int)v->size(1) == kv_seq_len, "fmha_fwd_f16_asm: v seq_len mismatch with k"); - AITER_CHECK((int)v->size(2) == kv_head_num, "fmha_fwd_f16_asm: v head_num mismatch with k"); - AITER_CHECK(q_head_num % kv_head_num == 0, "fmha_fwd_f16_asm: q_head_num must be a multiple of kv_head_num"); + AITER_CHECK((int)k->size(0) == batch, "fmha_fwd_with_sink_asm: k batch mismatch"); + AITER_CHECK((int)v->size(0) == batch, "fmha_fwd_with_sink_asm: v batch mismatch"); + AITER_CHECK((int)k->size(3) == qk_head_dim, "fmha_fwd_with_sink_asm: k head_dim mismatch"); + AITER_CHECK((int)v->size(1) == kv_seq_len, "fmha_fwd_with_sink_asm: v seq_len mismatch with k"); + AITER_CHECK((int)v->size(2) == kv_head_num, "fmha_fwd_with_sink_asm: v head_num mismatch with k"); + AITER_CHECK(q_head_num % kv_head_num == 0, "fmha_fwd_with_sink_asm: q_head_num must be a multiple of kv_head_num"); AITER_CHECK(qk_head_dim == 64 || qk_head_dim == 128, - "fmha_fwd_f16_asm: only head_dim 64 or 128 supported, got ", qk_head_dim); + "fmha_fwd_with_sink_asm: only head_dim 64 or 128 supported, got ", qk_head_dim); AITER_CHECK(v_head_dim == qk_head_dim, - "fmha_fwd_f16_asm: v_head_dim must equal qk_head_dim"); + "fmha_fwd_with_sink_asm: v_head_dim must equal qk_head_dim"); AITER_CHECK(out->dim() == 4 && (int)out->size(0) == batch && (int)out->size(1) == q_seq_len && (int)out->size(2) == q_head_num && (int)out->size(3) == v_head_dim, - "fmha_fwd_f16_asm: out shape must be [batch, q_seq_len, q_head_num, v_head_dim]"); + "fmha_fwd_with_sink_asm: out shape must be [batch, q_seq_len, q_head_num, v_head_dim]"); AITER_CHECK(out->stride(-1) == 1, - "fmha_fwd_f16_asm: out must have contiguous last dim"); + "fmha_fwd_with_sink_asm: out must have contiguous last dim"); AITER_CHECK(lse->dim() == 3 && (int)lse->size(0) == batch && (int)lse->size(1) == q_head_num && (int)lse->size(2) == q_seq_len, - "fmha_fwd_f16_asm: lse shape must be [batch, q_head_num, q_seq_len]"); + "fmha_fwd_with_sink_asm: lse shape must be [batch, q_head_num, q_seq_len]"); AITER_CHECK(sink->dim() == 1 && (int)sink->size(0) == q_head_num, - "fmha_fwd_f16_asm: sink must be 1-D with size q_head_num (", q_head_num, ")"); + "fmha_fwd_with_sink_asm: sink must be 1-D with size q_head_num (", q_head_num, ")"); const int gqa = q_head_num / kv_head_num; const int mask_flag = is_causal ? 1 : 0; // ---- stride extraction (in bytes), bshd dim layout -------------------- // bshd: dim0=b, dim1=s, dim2=h, dim3=d - const int elem_size = (int)q->element_size(); // 2 for bf16 + // q/k/v and out element sizes are tracked separately so future f8-input / + // bf16-output configurations can use this same stride-extraction block. + const int elem_size = (int)q->element_size(); // qkv element size (2 for bf16, 1 for f8) + const int elem_size_o = (int)out->element_size(); // out element size (2 for bf16) const int stride_q_batch = (int)q->stride(0) * elem_size; const int stride_q_seq = (int)q->stride(1) * elem_size; @@ -227,17 +229,16 @@ AITER_CTYPES_DEFINE_ENTRYPOINT_VOID( const int stride_v_seq = (int)v->stride(1) * elem_size; const int stride_v_head = (int)v->stride(2) * elem_size; - const int stride_o_batch = (int)out->stride(0) * elem_size; - const int stride_o_seq = (int)out->stride(1) * elem_size; - const int stride_o_head = (int)out->stride(2) * elem_size; + const int stride_o_batch = (int)out->stride(0) * elem_size_o; + const int stride_o_seq = (int)out->stride(1) * elem_size_o; + const int stride_o_head = (int)out->stride(2) * elem_size_o; const int sub_Q = 128; // ts_qo: Q-tile size used by all kernels const int stride_q_tg = sub_Q * stride_q_seq; const int stride_lse_head = q_seq_len * (int)sizeof(float); // fixed layout // ---- kernel args ------------------------------------------------------- - // ABI = FmhaFwdKernelArgsBase from poc_kl/mi400/fmha_fwd_f16/fmha_fwd_f16.cpp - // (132 B packed). Field naming follows the poc_kl source-of-truth. + // 132 B packed KernelArgs (see `struct KernelArgs` above for ABI layout). KernelArgs args; memset(&args, 0, sizeof(args)); args.d_addr = out->data_ptr(); @@ -255,8 +256,7 @@ AITER_CTYPES_DEFINE_ENTRYPOINT_VOID( args.k_seqs = stride_k_seq; args.k_hs = stride_k_head; args.k_bas = stride_k_batch; - // s_opt SGPR: packs three host-side switches. Bit layout matches - // poc_kl/.../fmha_fwd_f16.cpp::opt_packed and the S_OPT_BIT_* defines: + // s_opt SGPR: packs three host-side switches. Bit layout: // bit0: reverse_kv (compile-time gated by CAS_MASK build; ignored by mask=0 kernels) // bit1: double_q (compile-time gated by DOUBLE_Q build; ignored by non-_dq kernels) // bit2: remap_xy (must be 1 — we swap gdx/gdy at launch below) @@ -290,7 +290,7 @@ AITER_CTYPES_DEFINE_ENTRYPOINT_VOID( dtype, qk_head_dim, v_head_dim, mask_flag, arch_id, cfg_map); auto it = cfg_map->find(kernel_key); AITER_CHECK(it != cfg_map->end(), - "fmha_fwd_f16_asm: kernel not found in CFG: ", kernel_key); + "fmha_fwd_with_sink_asm: kernel not found in CFG: ", kernel_key); const char* name = it->second.knl_name.c_str(); const char* co_name = it->second.co_name.c_str(); @@ -300,7 +300,7 @@ AITER_CTYPES_DEFINE_ENTRYPOINT_VOID( // ---- launch ------------------------------------------------------------ // gdx = ceil(q_seq_len / sub_Q) is the total number of Q-tiles to compute. // When s_opt bit1 (double_q) is set, each WG processes 2 Q-tiles internally, - // so launch_gdx must be halved. Mirrors poc_kl fmha_fwd_f16.cpp: + // so launch_gdx must be halved: // int tg_div = (double_q != 0) ? 2 : 1; // global_size_x = (q_tile_count + tg_div - 1) / tg_div * blockSizeX; // The four shipped _brd v8 kernel binaries all support runtime double_q=1 diff --git a/op_tests/test_fmha_fwd_f16_asm.py b/op_tests/test_fmha_fwd_f16_asm.py index 94c320e942..39b210430a 100644 --- a/op_tests/test_fmha_fwd_f16_asm.py +++ b/op_tests/test_fmha_fwd_f16_asm.py @@ -3,7 +3,7 @@ """Correctness + performance tests for fmha_fwd_f16 (BF16 ASM, gfx1250). Public API: aiter.flash_attn_func (preferred) -Ops layer: aiter.fmha_fwd_f16_asm (low-level, ~v3 style) +Ops layer: aiter.fmha_fwd_with_sink_asm (low-level, ~v3 style) Layout convention used in tests @@ -278,11 +278,11 @@ def run_kernel( ): """Call the kernel and return (out, lse). - via = "ops" → low-level aiter.fmha_fwd_f16_asm + via = "ops" → low-level aiter.fmha_fwd_with_sink_asm via = "public" → public aiter.flash_attn_func (dispatcher → asm path) """ if via == "ops": - return aiter.fmha_fwd_f16_asm( + return aiter.fmha_fwd_with_sink_asm( q, k, v, @@ -507,7 +507,7 @@ def test_fmha_fwd_f16_d64_requires_sink(): # is_causal=True because only causal kernels are registered in the CSV; # the error path being tested (D64 + sink=None) is orthogonal to causal. with pytest.raises(RuntimeError, match="D64.*sink"): - aiter.fmha_fwd_f16_asm(q, k, v, scale, True, True, sink=None) + aiter.fmha_fwd_with_sink_asm(q, k, v, scale, True, True, sink=None) # --------------------------------------------------------------------------- @@ -572,7 +572,7 @@ def test_fmha_fwd_f16_layout(layout, head_dim): # --------------------------------------------------------------------------- # Integration test: aiter.flash_attn_func -> mha._flash_attn_forward dispatcher -# -> our fmha_fwd_f16_asm branch. Verifies the public-API path on gfx1250 +# -> our fmha_fwd_with_sink_asm branch. Verifies the public-API path on gfx1250 # matches a direct ops-layer call bit-for-bit (same kernel, same args). # --------------------------------------------------------------------------- @@ -626,7 +626,7 @@ def test_fmha_fwd_f16_via_flash_attn_func(head_dim, is_causal): do = (out_via.float() - out_direct.float()).abs().max().item() dl = (lse_via.float() - lse_direct.float()).abs().max().item() assert do == 0.0, ( - f"flash_attn_func != fmha_fwd_f16_asm " + f"flash_attn_func != fmha_fwd_with_sink_asm " f"(d={head_dim}, causal={is_causal}) max|dO|={do}" ) assert dl == 0.0, ( @@ -642,8 +642,8 @@ def test_fmha_fwd_f16_via_flash_attn_func(head_dim, is_causal): # Initialization patterns for perf q/k/v buffers. # "randn" : standard normal (default; exercises real attention math). -# "const0.25" : fill every element with 0.25 — matches poc_kl -# fmha_batch_init `init_pattern=10` used in cpp perf runs. +# "const0.25" : fill every element with 0.25 — matches the cpp perf-test +# init pattern (`init_pattern=10`) used in cpp perf runs. # Useful when comparing pytest perf numbers to a cpp baseline # that was produced with constant-fill inputs (rules out any # perf swings caused by data-dependent kernel behavior, e.g. @@ -722,7 +722,7 @@ def test_fmha_fwd_f16_perf(head_dim, is_causal, init): sink = _d64_sink(hq, device) if head_dim == 64 else None us = _bench( - aiter.fmha_fwd_f16_asm, + aiter.fmha_fwd_with_sink_asm, q, k, v, @@ -839,7 +839,7 @@ def run_cli( if do_perf: us = _bench( - aiter.fmha_fwd_f16_asm, + aiter.fmha_fwd_with_sink_asm, q, k, v, @@ -868,7 +868,7 @@ def run_cli( parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter, - description="Run aiter.fmha_fwd_f16_asm on a single shape and dump kernel args.", + description="Run aiter.fmha_fwd_with_sink_asm on a single shape and dump kernel args.", ) parser.add_argument("-b", "--batch", type=int, default=1, help="batch size (default 1)") parser.add_argument( From 73377ae11a23fbac734e97e8fb9c0143e1b4e966 Mon Sep 17 00:00:00 2001 From: tingchen Date: Tue, 26 May 2026 02:03:25 +0000 Subject: [PATCH 26/43] add .co --- .../fmha_bf16_pertokenBf16_hd128_128x256.co | Bin 0 -> 83488 bytes .../fmha_bf16_pertokenBf16_hd64_128x256.co | Bin 0 -> 72736 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100755 hsa/gfx1250/fmha_fwd_f16/fmha_bf16_pertokenBf16_hd128_128x256.co create mode 100755 hsa/gfx1250/fmha_fwd_f16/fmha_bf16_pertokenBf16_hd64_128x256.co diff --git a/hsa/gfx1250/fmha_fwd_f16/fmha_bf16_pertokenBf16_hd128_128x256.co b/hsa/gfx1250/fmha_fwd_f16/fmha_bf16_pertokenBf16_hd128_128x256.co new file mode 100755 index 0000000000000000000000000000000000000000..fad880078b6e7885b8d91e35a4646fb5da906d66 GIT binary patch literal 83488 zcmd?S3w%}8nKr(&vl8(R6cOP70g`YLx!*xJAadJ?+;3t@6i^gF0dI)=oM1piYQ11g zEuC|;v5gd+v_q${!-S*7HXU)AI;d!!nY7=`H|g9y>eo7of%AW!_3m}DP6#1jzd3(9 z{(kY@&sy(V`(0~2>s@=Tv-eBn`q|UmtgNtdpq9i+q+fdd+1bsLaTA(B*$jHuuyC&Hk6TSQ3oH&GdgP2NN9U5)B zHIwTC!;Cb2e*sh%BQFnf@(oj`&t2p=kGxuZ*Q(n$6%V>|^~Uw9*KK<0iI)ZyZ@g{O zM^3lxuMR4{b@RQ&>sA)udh6;ovFge^zt*|7SIKIp><&+|hYwt-92VNk36l_Lx-O^59B6LMCzZgLkj~_L||t zQl$pAxzt*#)R6NkHR8NVjXAGU6V9tt_`FI@J+D$T&a2ey^C~s>yh<%RuTo3TtJI3~ zDphn|rBf7#G zy?*n`HESPOy=o;_nZM;D!e8p;3+q-&p?KZe+t#=H;c4kCa-_6+&|Sq3tX#9Mc=O8p zH*CB!s2f>tD=c2?i|<iWMLboa)!_Z4qmowA|HNauTlHr;mj#+A2jxOe^LJH9(;(}Q>2 zvSHl^!_V~|gYH~aJ?P%`Yu9Ypc$c<(Ws~Vye=+FZO{+JpT=gLAUwiAyO`D5vz4HyY z_SB8hxIZ6sfAM{**KFK?K4e$qy*8++1Mc3qf$RK+jVtfkuxfSIE5Vb&X7uao%|8aZ z?bh|zK=2Qb4A)$9+nNW44jVbdJVM;JdgG?G8`eLaWu8ayoYB&MeE#W}ji(XwJQpYp zgY1AOuxRi+7!J~@Z~nUKv?Nc5IHT@$UaZ-a?4$KICHpOTB5AyS!Z`UZN zz9z@{bxqfH(bw4LQRg3OI+t8}=t}5KMqQse#qSqklqic`k-19b1-T;IwG(;aMIzlx zL}p(m@}l-4FYd@u6dQXx+x2~NaGy()`Oye+@*mCLo?lWC&F{TE+AErSD1^MM+~{k1 z?59`M#GLQcbd0LsLv^;6?E{$?$RU9Y2Xby8ivoFfAfthNGLZ2=RynhI-%ic$8#VVm z80Z}dDMFdzWHI=V%$lB@{y55>DxN-lchspxeLu(F-{9{p{2jsH+xYui z{QaV4W^$(H(5&Pvlu0ImccX09tmv%Wy_1DyQGj0Yg0jMb!sw-i-zh1K7d}=PE!;(Y za@0$Dx?JIt`K9@@X6=UDp6_|PnI7cp`er9*YrWa=*|6!Qn#*8AL9#&W%t_ACd2;f<)PHmf$2M>%w+SI2Llcz3cSMf{VKN-B2=%U4eN!ZdiBV?!Ayc ze$1@4^k2v+csDeYd*&VB2Ch*T+Z_!zVK_bKji}&GL4NG=otTee%U4Ts59K&79(sLF zv?iI{gY=06~!1wc&3%_{H`|bs< z<6fX2(HsCKhDUR1WV<3+i1SciL|a&s=Y$SqO1Ivo}ta)GlU zb8=opz0pJct^g;m0Kal`qXidSux(zam&)dKYQp_O$PWRxYZooJ@WO5Lx*@+?6XsmW z!(1_KOm?YwjE>ty|2mVioI+>jQ5XFdbuaRxZmzG;7I!4`A^SQG`=`V?Rg)XO@;})Z z^gr!_YlZ8=W_g84|8Y<%P3Lv!nEOYacfi@dtBHSQVOhNEqO#;y7MF2>Np@YLCFaND z$>{2G&Xwfq(NPjY=1hUi8SojC#VJ_5I$Ah-bhN~&Mt!g0@5h*Lf2HT!YK;#f?XU4y zgTF)gOW^Ny{Jnv{pVVBNyjJseVsfJ9EwJYJ#OTD`laiBk9;8kSC&OAN91m;We+Bgz z-cLzR(Rot}rhxYcYbGQoXx>9=-V07<33hT@@(-i6zc~Lneztkq_5A1QWl5*5X8gYD zNl9maP5WUB?@Kzb)Rh1Dqf<%eRjf;wT#|JDyhi0XSxKh~eVJITaH=}Nz&pAn%z+9v>+;DS__-@YM`q??PzPm`~*`MX8_U^UUUc<6~ zmOU`SvXm@KY04TIbv~%+;vB;me+WMOsz!6mIZy-BlJQ;zW$|8xWnBt>Skk5NFH5@Y zym|Z0J8v%S{pgC)E|*r6tjJ$cvcmbtnk?s|AFbHAVh8?8S0&ZSGsDSO59%f9Zoqe>k$Y^PFKJqzB?%5jlZ_}^_sgpbGTkvmgu%2Y&IvaiXtqaC^Z2z-ZAHIb=t_S2(3d85H&xxO9zO2`eEBH_M zb)>&FE^?hqIp8$j>3d(Eg|#bp*dw{QL%0U)4Q$9mRgZul)N!++qtuYSIb*!NxX)hR z2R`m9bKOz6u{XP4 zsI#C+r(MCc=!MDYfzGT_+R+bduUT`)OxU`0;kX|^IzBpn+r(r|39547n|H0|^U_p2 zJHf{{U#D9wvopQS$Ppt;<}aRKGCw+hS6kYU<-B#MmlHqKR-4YK_XX`xu%oHnjlaUy zu3es^fqjdl zKfXvE$$ekE4kw8^dh2#4@xgYc_b>bE-vzDLgcY}kss;)z%|-ZohR~OSY)C| zi@8EXL!dU0yx*XS)S$u9s zdY0Su{up&TXkS7tpO+Mgr! zS7pr)y|O>36FTY@xvoZ8qhHmbNxvHP8~rNi%!fYhq|FQF2X^B6#DX9lHh+F1NXPa3 z&x3aLnjbRyakRzg$5Fr0r~Qiq{Yu!mWO0+7rY)EQ7Kxz$sZ$;FKd)TTJ#nBU+VmU?tfm_ zW&J?+C-rNy-qHO}{W^>V`%~8m9ri!3k9BC$uj|mHe+=~-{W|Ed|5*=hW`D9Qui2kE zJ?MX(9`rxc6X1-|PoOPEKY{w$|I|;Qe)d20Yhfq*lj*gvll^JZ*`Fqz{m=9|)MNB% zr_pD+(Pui>!M<%>2alw$X=hzKqj(Cn*3ENW16`~=c~}D}&$zxdv#CYBFK9>8`X~E# zXR7w0@jQ_gVUd+ZBKJo{R>eggNQkVi7b$BBuJw8#a&K5QwQ-S05+dvBMar5Y*VTC<&$$lWA78Kzv!+bI95idnR9w?PbB&4BS%$maM z%3dF9pGN(7ua4b~ekJ@5eVWwyvG!@auFCpY`!woTW_9e&HHGz8!N1W*={li9pT_Iz z4o&)19h&s3QNPizf)0He>!Ho`Q7p@A`Y4?q_%xj!_%z)v_+g_TM_Y`39QD(uQ9q9Q z>C>oR2|MYdm|h7x>7z_KeUwS3Ph)x&>M{DX)95qZ=rf(1oDF@mrYyuYeKPx#ePz}Z z>XAneKlSS{7VJ-5 zCv@2Vygt^UNx!Z`lm0Q(Z}jV+!~SPIw3+?Mvb<)0>hz%hb$ZbMOizF_Mn8eJ82tq5 zXa7?_f%@71)USn|>`$iG!cO+5NoRkWboM{f>rju;r=3Qh=|-RFW?kSp=AvmoeRO(s z`nFlgztMXtoDuID%{|o&t|f1twx?>f%#-P5MvWL%vS7)Ak_FKPyV|nnGkb#NE7YMI zFXY}&{Q1h|6(u>r`AO1k$GvLudhp;R&q8pfl5{_fFm2cMr>Fg5*Xe1_FSrIzKAMxo zbLOmfLl-`cwgO`>-W|BR!8ut@58xgKUy$YW1nz0@g<04?Wxbnc@WnWr1ny<f%_YLO_nnNc!0r!vz)7euLe%>0XAq(n(=}Snv({@ z2F*!>VT0zR!LUJd(qP!2IcYF#(3~_FHfT-)r}&UT8&Z78pbeM9KP22waaMi8pNun- zq}%9E(ejjkot91nM@~q0BiC!GBGa_YjZBAhE)V?6XZY0w<2%rd_viQ(2>2RYI49c~20YB*3$mTzz{7!g#-ryIY_K_f5;oYJ zJ_#FaPM?GgHm6U*2Ak6-VS~--ld!?&^hwxYIK6Fh=56@Nw#?g*{gWMsutxcFHAhbP z^R#UA=WBV&U!bKES*WEOS)`?kEY>nNvINq}ZsODkjPVFF<`~bBz#|RDc#Z-dWiZBb zH1KGHF`i?9#~6(991A?wV2tND;Bf|HJjVl%2evt7^O~Hpc}-5)ye6k?UXxQcugNK! z*W{GVYjVo)+U8WIY_K_%DH{x@qM9Qod_25qKg9#|qTI1hlHij--Y8+i`W=G0V-@l-SB7|-j0uQwRuISqK4!5Gi! zz|#%Jc+LQxVKByXCh$yyF`l!4XBmv~^ng8Jn^QKg$tj!H}IdZ~(UCT!Q4J}XkKhe^O9M;l}ys4#%{8Y=_$j=~c zPR+&`&o*O@@w@@}27@u4bAaaRc%d0{jOQZYMFwL$7XvRg7~{DFc!|Ln z&!xai4aRsb172n@#&bFFa)U9RHv!)SY;(%yH92MTnw+wEO-|XoCZ}v(lT$XY$tj!H zQI+D%MeCxTou#Me*Y0ep>#bT|%X>h-cqb1K#do?4VY&J>RQ2%0Q1a1O%hxR%rgTuS@3pXo*Ag?1n&Um znSr`q@T0&yGf>l;&Ml%Rvh_JuV}H>p0p>YYOPbF9YUf_DJ(9IJ7p;75UZj@3B&>D0MJCT%e17MZlc@Zm{}Pd@ra&6K`+3YdF- zeYIoTL+@>yVVvN7Of88A877;y4AU~;k% zoHXZHnY6+1A(J-zvCgr!UM%NWYMG4p!@%6{tD6M-z})Yvn*~RJx!+eS1xJCo-&e(g zOMtoGSGNk@2F(4wS}phyVD9(TZBOIXE;;wu`gwL5rymBkIqd`6oQ?q7oQ?w9oGt;j zIlT?o=JX@LhSNKzX5NM)SQmJnm&ty(;#+;nueqTKwB z=d%3tr@H2{{EaUR%Vqf`-yfCB^1ZH?^8J@c`Jt<&{Mh@Y{N(LYe&%yhe%?z`e%X(t ze4j-h>-!GB_e2iwcipp{^?erna9A$CdDx7!PFF8i_d>F*_d&9)yf@oQ+u2sy&bHEa zwv~0Utt@}#(06iJzUSoka#+6aybpET2N#R&qwf*hCzXip)1MOCZ+xM1F3VpvQp)$5 zF6H|#lJY}ulk#KlmGYAxmGU#6mGbkxE9JXg|Bk-z)vG_$eLa5rC%TWlmpbP1n~Xp8 zHuC-6KlL{1d5QNm$;+*upU~r6dU3bh-Q7+OY)V`4-qvLY@NIRwcmM2dd~>aJx1@N! z`QJX&JZ1UPi~nOF`%KbSytf(UPM};F$~~vkm#4UMuI0>loNKueXkTpbq-$zM>^St? z;KA2ytpUy+6gjmO7}CMJt5J7^zYp^^>gM~4Fz%E|aGa9w2gkA}MA-fq+R@b;fp6dC zS~>lWpijDV%^u}<9Xg~v=l*l}?w4(T4!Rw^eE;*{x)HkQhb|qn3;mA0^4fC_wN;mG zZ>z2u)6mqOM?!D6+uH-O2V@>(9%OIG-fy?xn-7@}9P-Bag}5%nb$4%^-w)UQaNWb3 z;19s{0Op{5gPo!)MUb%yK=*#)u- zWLL0B@l`1=mx+yQ{s${#0B~MfyN*sXqlwHn-o?h?yd0`;(8&{M|ijU zi*dae*CV|<{H3^FitAC{I)6E?mqUNFw_bB!=1W72^GoJSLyYrF=F9OI=aNJCDaWzoeZFG0rb(XG4trBJFI5v0tQ}$7AdlX=g)>{UYsbh_PR! zoeeSei?s82jQv8+e;Ms2*T0N*o9`xC{jNL%-=gce2mhV^EqL%P+|J-zIFS`$k(EUv z_eVum#YG-Sh^(#`Dc{238t;0Q=ZV}K78x%RSrHXk85g-fA+oAoq}@PhT-elS+S_b%=6-SY~3&%^K2 z4CnaWO%}#@71H^wmxFJmoPF|LAAX%Z61vt6jq&dG^CazXSY&O;x$FJVcvp?}Hu{w) z&u^jleVc>trHVx7NO$P;nB=N)-ex~4X?1atM-}SMyWS0r_wMswMtRz0zKKhS&aoaS zpLdMYA5Pf={q#X>4Omsro zPFK5%K0XURj%~&_+}#(U%|)Vf_zLKB9_%Xm{-~tY#zh`!MBQCRx*@Iwl_<|P zvMjzS@4gS;pM^yyQ6%zkRAjB)h8&b(JMry#_kH+IFfKZ2ZAnO4UA@Sob{qDiJll!y z)w?J63pdWCpI^Z zqB&@LF2*na1~)X(+v?ljrsZLL@;z5g@_c`Pz-4@I-<{vAIcRe)e{?u~~-}v=` z-hd^pn(FPu`})B=;5@`Pf3$btrEchYZ7U3JeE~tm|k0R+x{g%9hICL_@41h_Mfurz<$xTpKF0_qv5S>t8E*uYA4p7$tEpTT|XI&hocxiBre-#DWk%E;6=DrxSVYj(z zwpX6!-^uY2;P?&R^V(1Aae}?tzoX6lP;SJXuA1ZR^+!h;yW-6n@_H0_eWUkh+Gq1# zoLBtz*nxlSk20e+xN5E!_szG+9_T{_}%kUdz`tt!OGiH;k=6eS!gKLP)*?}PbgVq7`!n^&XUxcgnT(5v+AcvgJw;q#DLKj`cAJyAaY$yud~ z0)IacbA@XN*E!<{=s{l0BG z=c}=mem_+oe5-A{Z7bJ+RDIx>Z9DU9d+GmKCt|Cl{J*i^ws#5iSSRAHr2N0JKh0jd zzQF$*`)zwoUAEm^7i?R(Mr_4B^|Ste%JV$=IRX4!t_}CQwKVH*5o*j0OvS=^CazHSY(20o0+=~ zILPvdr=i!9BGEZK0_8{DBXc<_X|-{YN4U0`dCawh?M4ia9^4ZoM5k^P%8z?c=5@WK z9p##4<}%k1wiod_y6?kYz!9A`+x;?lC1dq*_pe|axt0(k&WB!0iln|aw}Wd5+kx01 zJ^0Mu)^>0WVLK2^|Sst+Re3ukHWVH_3OEVbz%t8r#_B7C*-uRyNJ}=|A-ru z&qX;t^Rs@fk2QjJaE^7&Zl|C5#{^if4*}22Ge7756!=)qkExg+(9h1sn!xgNg7R}wo@;`hw|tg@?W}J;>f^e~^@r;ueFWD&&ULO0 ztRFF7^xD9BPXzUy3hdy#>zMs%gzJN@n{x!~gWay??cjbu*NZ;qexP}~nzw^%1?xs! z8NF6CZ&&knaLr)-h&`j%jOOiX-VUxEsruoU*}tY;TodfJn0lMHsd*c?cCb#wu+eKr z^ER0?x8`l(TEcrE-i=;MnzyZaTTa>cf$h!PX1B%E%e6$urAFD^xR>lRj{j;e;wg5^ zzQ`Yj@t^UGs~qR}udYID#XLylRSrk`tmgubV{ULA*@N&t24meg)S}&Tp=NoZks4UrCAl{aoMJSM(3;8?G_z6T}(PYfQc9SLyp;&eC77 zZhO%cIC$2$y zoX{4oL2=PP=W*g%#Cj0BsRz%U>cy7R+QRgf+QK!7eSvsSdQBRS@mu~QS7qm@+pHKm6f>V<^}0``MunAsc^~9r}p) zwQRj6>H32;=^vQ>*V&==f9R$ArDM=9jp!GyN30j|t$J`hV!w0@`lTcKg?-WyeZoFy zL?3WHV%>;=)q`hK?30c`pL7iRq$B!-eb9(LFm@wORuAr3*e4x>KIw=);dz#6BiqDv zN!O3Q=el&JHkvkZU84Pnx7CB^RV}sAw25mH>qb1T9z4I|*tgV1(l<1*(R<@tQ#@FdhmRU{If!7+0 z_*!=W-(fJ~YuyQaCvb`nut9Uuj2CRsoHQ6VXigdo8#E^kh7FpN2Ezu;NrPd7=A^-} zL30u~#fJ>qkm5rIZIEvxA3%(+EkTU0twD@0KZx-a_3=|azT4jBON_5lUt)ai@Y7;^ zJ?J{?Fuv=|ct7MicLCpJ@WZaN9(cXMc&lv#@CJiRT<31!yA6KCb?yPa$KdU*vk`cs z!H>GmCg4rLld-nz7+@i#(4U`zQGvJ2ynz;jAs-$ zYB0vL1h~XtjORAsZ3bgJ9|3*@*yfbYYjVowH92MTnw+wEO-|XoCZ}v(lT$XY$tlBY zn^T#x!RAz^Y%rXXZzHz^F}}72F~0mD##hvr7+>3boa<{pB{9Bs_-Qe|z^PJnJ+nmaj4K}ATWrN{VLUUzH5aVlW5aY`aVthsY z!-1cY7+nEcbhTCczzA|YX)OHp8$TsV2tP2fxm7r#`8(wCk@7U zJ_Y=g!5Gh{fuA-QGDT(p5!%vIx1y1e381FG-j`4gJ z_*sK7o@Kyg24g&*1Afk6jAuD;xxpCEZvcP8V2tPUz|R|u@%$$6H-T+V*}NvFY+jR7 zHm}Jko7d!&&1-VX<~2EG^O~G8ytX-&DI07~Wy*#xdyKCY;FNkz&aoKdi)RMvuLLv3 z7tajTUkhf8FP<5wLxLIOi)RMvb-|4B#WMr-6Tyt}#WMr-reMbS;+cW^S<|@%V_Pjm zTI0*2!x&#Y$7&3|F=mV}o?|rz-xxE-7tgU8gKvx(Dtj6FQW5)R6 zIaXuvjWJ_<@f@oW-x!;7i%i;J&Mh)&gE_}yjIUMb8})M;3&!~3o?pEsm@&S%=T~nF zW{fZH`PDB3GsYM9{OXs28RLt4e)V^P8RLt4e)Xf5( z3?DLS!yoG$YXZLKUx&U`AINxL3(Wn#`jy~`z})YvzZX0SnEQS8kAlO%-0!Pj3!V(j z{l5BNf~Np;zpwsT@O8l4@2h`liXSim-}Bp?z82W#^h98r)02R0PKSYQPEQ85IXwl~ z=Ja*IhSMf~KqhT4@dGkxgT$oxg6CM5$hjmr#eJ^&t&BN2#r?keH^Jl-_xtKU1d~(T z@2meKn4IE%U;R!nImP|H`oDt7Dem{x9|V(A-0!RZO6OD>uQBFnyarC=HE#r?jzSTH%o{l2L8e$;(lL!MKC$V{l2T1E{6wk5LHG;`0 zo@1%Og2^eKW2vEn$tj*=so{djDV}4gk%Gx7o@1%e>6}XAHO4%R*T8AK22SHOa2l_H z(|8S>#%o~1YnxM(ZxAipN7&1OV1JWwTzSz^r*YEr$CLk8j zioe>Db1memx1P^oI^N+uoG1CU6Oq2|S3k~~s$&Lym~$NEtH!?3lljm)ay`-+Z)h4M z;}wOaTpcCykIqK=?r!6AZ-Cr$^-Z~-qI~t(-}EdJy<@!p%GW+JLch?K+`X*hkDRI%=yH)7o4(|GaJj65?d!j-=f2z>ha9QzwDvKb?aPz= z)7qDd_W@~Fy#I^39_qE!zOa<9rEIBvtcUhh_J2dyQzZJQwU6m7wU6aF)>Zv~(=&LJ7Y3-?(`cE4# zme=Ere&xSa?Z6gtA(C~a=KiA{y5)?Ij^8#hS%Ym%6hR#Zy!Z89!*j^{=Z22XSpxp* z@kW16j85Fe^5jDCo5KCI?|-6JC-LvA2LCQLd3eu13;!SduZ}rvKI1AMNm!9G3ffuPbx-9y{ApjrLHVykGW1;eGMX-c#iJ+INNT z_x#Jp>Q(5Qaa{DO)c@cgr2di%r2faQkouqMdu0yw&3I9t_22a7JG$OG|M^4hFCNPI zFKyT31z*wnW_Kps1e>VH6Z|qM7^|xmK zXW*mZ6ZOy5|HgKXYis>(Y(AU4##UnkeHg!4WckF*)7PBZnOJjX$TukDB4aJm7RFj+ zI(&Xbp5#}u&bpacV~C%z7?!*$O6VSNvK-&k^C|R?&BU4_I@QeMHS`Yh`dVkj zdlQx&K+I4X!|IvA8Uh`xA?0i~Z)e=WL6ORTm3e;*_R+6*b-wam?Co~> zuNrQ{F8Z^MNPq6_hQ9Q3dQQR~`q5N6+G+YBX{QH%rH=I@y?MOF&#e16@H=OUzev5* zeWBMpX5weo{TCtsOtBfMr~LuPqlJ3-{R4d)`LPeSpQ#?|8GD-BV*24sWzGHPN9XTa zh*$t4@$En7XTkh}sQJ!6w_sfKg5-F3Rh4E$J;W~NNUV+$hD-dVytB_xX zWIBA*;XKK&Ekb$`(rY zkA3Zvm%Y)mNOX?fj=bBE_f_wp&oteya8KxS&6%w;6exp|{Gb_3y*= zeYjrj{f&-UK>o#HFa6>J$a?^JYrG@=7F=&ZeYbgktM%vyDrq0|wjysU^lta+^!M5_ z#~NZf4np9gsRA`|QO~yBwl(#P z3;XD6ZQE^IQ_r@X8_=_Dw{1;5)6zcZ*|yUk+xDiOXUTi$+V@&xGVv??B!T#z>9 ze9<}DZTxjL)?fPTw%Tp{b%M_+D7OoI+68WI@czNyjqBau*4GBpCvp8Gt~YxBl=9bg=m+XOjl8E(-zM*W`+IP`2YQ>mhLpcfz+U>{GUSyZ?_Td; z{c>EFyJGD=RUADcpecE@Tzic~gyXdDi*APRMzS_i5z43|{^m#;x z>j2|dI79hUi!(Kdmzs}s9kT^-Eu7vGUy1Q3R{XUadcZsG=SkY(u*ljK_*Q||jqI2YC=zR5%tG0N*_oI?l7Z<7Hr53&Eh92@h(Q%S!7vsP{zlX1r9EMZDC| zKE!Yci%z0QMd7Ayi-2oDY3nXml`thQj3wl<`;;m;yDo=FLg#!ywuwfQ>nyrBaEx0#|CyX zj>Rn~bNk=9YMZAbjIneFVkAA{(F_K_rMrW z>kxNlyLVB9dr`(k;(X*fz*rBu9>$$OtcRtjhx3u^z%pEO-f`YCW|FRxdO`fWBE-Mo zyj>1G&M(eS#-?C>h?g2N@ltO=d<@Rdo1n)z$9c$@6s#}Kc?TP5(<;Qz;Cy6!3406} zFICrxzBKVtjs3R0j666QhOw9NCs-firG^-P!nU9BCRhjJrG`wr)RydL zoQYH&h}B@*PoHl0v8m6t)wYc>C0JJ#+GpaW8e7wB3pk8;sUbd#Fa}bKw%PSHvyHZ} zJbebwQU(II6fboI(sis8`thmqT)qn9x9(T2+TmRi$&<8-ut*&*bv-!zsCQX}zo~G} zq~a2Z&i>UXzy2RxwbN@KiAq{kT%?YddN;WJnAb7FxJ0yzaZ|J(7oF-gD1Z0=a@8)c zQ>0$fbiCA!;P&HQ=LpLnUaI(UkH>3=W3p$?G206@lw}-!tuiiB$4k8(+~&NcA7`5pFID`w=u~m-G4r=x(saDkJHU0$WBPF!gCFQI zVT?Os#7ot_*^x4BZU_A|+ktqg+D{ir`8KzMewyt-yws46m)gd5FkY(ef7q?#rJkeR z^v^7hc&XYypR3*U(`+~5rG|98)Hd49c&ThJ;-!jz-@oI~Z5XqS|Kh6MUYCd+51Y?1 zo5As~d0n-i*5d?wO}y0GQEv0Ux#|foB>o*Q%bA#|_kr7A_j+i*&HHg~nV6|-QSQF~ zxQ+ynd19F;n?WWY!S+dwoxow_~QR!+ha7!nMx$0{VR33o%nuK7T#t z1$_b6JTvF%^I0cirlx%U-IxcQ^IYf5+~%63>*JWfX8O5}sE>1;>zo-IV}yZ)xO`ho&Ma|O5dNV5B}A*-L{qMK&n3Q%(k6* zw!K^fSSMnprq%#szilt~m8=snQ&Vezu|LgTyS`uzF!tN_n!0Sexi;9ga-Eofd+O)? zHS$c)XNs-h=d<2`NS>q}4vVZ^hcVjvxvR>&fsrima4O2`XA!I=MWS*`epW zL6NAW)x|{~y$j_LC`6TeL-bn0d2RLt3DG&`qx`lDvqRtThDGWn?W6T57tPLA&wC^E z8bX`77hr5V2ReMN=QwT#=QxM+B<)~WWP}0Gy4w)Es#E6-y*OMZtug&e?dct-fW~z?A-PU$+9br2VGgY4V&#@hR-q-yPyY=(_ zIoi$jgyj)4Ri5{y{8{^->j>M6n5o?vGc}6w+R-69^i6M+e*WGC|4Q!f2KV`l&o&}v zYKZaa$mu6Aw&doMC*2UsJ`Ij@-PnV5gU|MQEMSv~nOYXqSC0Bv{&|$&06uKHAX~>w zW&L{Y;2HmJq?cZtt?q%`ae204y{VX~D97i1*01&POiepD$4t!B2Li0uhk$2#egr;t z8+RBW9{zGn%)nc{{jvr0R!XX8)RYaZRw> zV(M+)rsi$n+QB*zGgYr0&D&(o@te1WYYFdxn5lX#Y2LQxZDD-6)O}!k^S0S-G4*mS zG3WfVaWB2scv9Y1Va(Kx82_DL$yN^1uHw5Yl2^G2>AS8BIF7l&bz~vR^BKrt%v90e zzZrVFyJh3wA-(JUCwgY9u#~T&M12SDL;90_veo^NPxsB%F;h9eQ+pH9mzb%TvkyRT z&(+zgNXj345V&k`w%P(&K0I5;Ol3Q`H_zJvcU%_>g^PB4*=XXNNS5l(B{aoMJSM(3;8?G_z6U0o_YfQc9 zODs;zS^5k14bxHn0R0C0h<(mJ;abDKK+IH$&n@NC+Tuvry?K&%T3fi@$hZ#cZq^%O z#7xyOx>*kQKCNwGsjrfwsHMo+YmEV$Lx+u`Lwn% z4fj6dIB^Zq8n5m+FT3ZrQ_Zi2DYZ2=~%v2q}yI%B9YYWp`8Yiwv>%TmGhnRlh6M8 z9-LqB?PrUbs_PHdBs=zI^H`f)k615ars^2p&0}vikG0A5h;<`os*d5^JoaYun489K z#7x!y&O`I~n>^DpZ8R}cb^Yl3w#H1Q{fL>WV|kw~W-9AO%v2r6+r$@b-bT|Vu1Tr7 z;nz)k(dKP5ZQ?qWsvq1mF-Dngx6zK7%K8yARmb!`Tg+6}jhLxAj(78MY(8#WlTvlV z_A|B7w8_-nylq^kO#4iG{ySr)hVb1-2=Pl8Uyw0Vy90MO81YJb0QWE$@k)CF_cR#s zO7np83`V@tUckK!M!eGAz`YGdywX0veSlLjGPAOrPDtzYnS6_K5;j}`e1*ZVp)+u2 zgJHv!z*ia!8@d2@F&H*n1$>pkumKDAyIl>24c&md0h`zYj8~c|8-B5i`|rs|Giif~ zHNlvv`RJQ`GhThNoC4qigAuQ^5V+7_#4GI!+}B{lEA0o|&tSwW?GN1FV8kmO06f58 z#4EiT_-f!3A7F##q!};RpgCzUY|xxE7&d558VnmWCk=)Tnv({@2F*!>VT0x*aEcEZ zv?0ZZ4B8;ECO*aX=tK}Rwb7rVzm+`YU#F!LnX085k(j9}A~92QBWW>HKXaXd7~g?r zygzrHYk;pY_@wI$0v=>=(sc#{4>mX}+Zh5p#Ng~~XDIMcgLAT-VZg%-z98Ef4m=z< z6^{uv*qlBI8*EOWgbg;QPr?S9(bOc}I6 zVtjq-&()kc5yVVw3}U993Sy=@k%a-L7HO#>5;HY7k`^-+?HGYE9%05D<2e#|q`?@^ zQNW`N#(0hf9&Iqja}4kpgE5|CfyWw*@f-&{&R~q^c;NBCHm7V}lT$XY$tj!H#vo?usUT*m6T!I)^4$o|^&wS6Vy5Os(qg89Q&TaL5Tsn@k^3}U993Sy=@k;6LOjY!N?6_J>! zxskM(so>OXjPYzU<`~Z#fNwAu<2eU-j=>nu8-Z^$7~?q?c&@=1&w0S}490lQ2cB;* z#&ZGi0$`g{Hm}Jko7d!&&1-VX<~2EG^O~Hpc}-5)ye6j%uWe3c$_ATLnXN>%UnaVQ*b-iH5Oy!w@n%;D7!C0Z4kk;5=bQm+0=U9zb3ueqz zo?|s$BbYH$d5+aMSTJLz@*Jyis9?rS)J*9s#!Tg&UwMKVGnIROb%S8WOy!BM97F+_%{l2GPY#r?jzOE5Xb{l3~Dn4IE%U)>{^oZ^08Z4yjQalfzb6--WX zzpw63=TsW6G3IH!22SHOa2l_H(|8S>#%tg-UIQCm+nmaj4K}ATWrN{VR6nDi2>$J# zb3DhgIK_RgdQirkoZ^08JtUZ%;(lK}ESQ|)eqTidlT+O9s}jNF6!-h;5y9jX_xozQ zU~-E4ef4NMr_y+hF;C+)a2l_H(|8S>#%tg-UIVA`8rbmK=2WI^usM|}8w{tu&^eaH zDeiOCV>0IC6!-h;alzyi_xtK=g2^fF_tn=0lT+O9tEU8$Q{3;XX9SZ|-0!Pr1(Q?U z@2lt1IhDq1jCmTbfzx;moW^V5G+qOz@ftXd*T9C?Hm5RWgUzW-*jG}onu*? zn$0o(hKxBm#r?kereJc4`+fB-!Q>S8`zj`woZ^08y&#yJ;(lL!TQE7r{l0ohFgeBj zzWQ!Dr_y+hF;C+)a2l_H(|8S>#%tg-UIVA`8rbmK=2WI^usM|}8w{tu&^eaHsf8Tl zmu1Y!sYS%!6HHF=980|-n4ID{milwS6}XAHO4%R*T8AK22SHOa2l_H(|8S>#%o~1YnxM}#4h;Ju)$9VsqlRwVk{rk@Q zB!}rJ_YvqXQj=3h@UHNN>Ux1h#QRv~fsOrtg?{xKYyo)UDU^?FCJQ$b! z#C=F-yLcbA!^HN8qhDAL^`L*4_hP;vZ~Bt&!KGp}y^Od9jJYB8oz_04vweAz ze_H!Y%#ZlXFY0=z*HZh!Qofe5rS_Q^AeAq_A+bk9|FrfMN%_;-$MPKOs+T1ONL0$7 z)*hy_J#ooDtvx0dNcqbW3xs+twI?Cv&p2KtHb@-(t7AZk{%P&0m-*6BiPTlBCLPxy;A?=N2UIm&r1FC zzS}w1#3A8$QJ?j9yZ#+r@71e6)c#`p_D{5b@LuYu_09M&&dAy9e-->R<3ati*>B>L zkcW&rayI)-oD$lv<3*jteiN^x68+B@BxkeV#4M>o|5N{L_M5&xo85ZNfX!#?Z`0SN z59z~9oWR7-PhWFte~vZhn0$jmE;24PZDAa0ro-n~xku>$t&m8+X))djHW*)DhcaYZ)BPQwAWme48pM!&3L!g5-q?~PKylnbx+QoGE zn7whyZ;F|UcIh=k@+(R&{*QqP(W#_Fnf<(ge*+DJdkF{{RU#&si4^5e9P*U+g5OIoFEn^`9kSie|~I>!DY z(XXOJTAGb!U8uz~fR5=Wx@YafR7@zW{W^x9)P2@IH2%LD>%R8efo*5%L*wTYeC|QK z20Lcz*3Ye&sa&^_Z^t~{{BKswRJ5;o%v9#5Vy4m__`T-kE3r;+JZf?OR%50n^Gom# zPdKN=O@&=;j++V_>Dy0>n+n_L(^BzK1CFpy=u1=S;2Y<0E$*4L)6oXb>pIqt^fSgy z#eLcoHpnoGr+7vex_B6M}^uw8KF#T~} zaZ^747aQeyJA{~DA;bgg0oenR>F`sB^CZ7E59xVG?+w`-vB9*T{Qz-OMej&H(({oY z^2YdKDN{#@I*t}1uMm0Ny>SvZ^#jCB6`f=Kkk=1+J-i7%)1d#+0Hh7TJ$iZ*C2r~m zzmd49$mg?lC**f}yCK#YvNL29>F@^!xlG2L+lD~Kq~HD9Q~#JrRdg4pTG`ppkFWLj!OR=k6q~vO2yr)L_g`c zds6N+`yDBJFi-MNx8FPypx^BHd*$%^+P6yiwDyFhu3F0G?J+(!4*$>cMWUP59;TsO zT6>I-t;9N@eXNu_tvyjGd&coFKDP>MqxQL?pJsnT>TYp7jPEVSdZ~S{=%(3UFLkEb zZ+vi^&jIkU1Hq4h;9i~=_6Om55UzW9Q&MsGD*0>xy&=dOg1p||RDT$*hk=`YylJU8 zd{yWV>Wx6&27hmUpY_UUI^*urNq^F1}|O#BwS*(vdv zCW1Q?!JYozd_PapD#9Ww!^jIGZ-BQ@;->xTiD|aZ{0x_jJo;jx@wL z=kg?NZ&+kp=14<~bB@oSdL8{OxQS=aBGIXkxpO?mxfqqS%DBk=GIttcoQo`P`iJEc zqEjV()DYtwt(UX|(l-q;&dttbicA9?dp-*9h@zYy08 zp*O<2J>{4+iu%x{I#|ZdbaKK*S5XJU(-hVYukR?UhPw0pKZ5or}i1J%eL3HPd}TZzic~g zyY#a;xz;RhYL@fDpwRD7JkQbrdQO&obS|hjH=%Q&DOGg zAoBt_#F@P`xh$H5!m^#qwl6DL_IPinq9(_Au_oWyUeh}#_t0a`i-=eAPE8#DmO__; zAC`1+j@5M8dGq$0civpu`_UDpT`sLCS&_e@WQFsOHCfI_KU%SK#SZ+Hu1qd1iK5=6 zUDTnaU6z(FO{zoK(>lqeOUo%cM=w2e0qk8`Qc;q>Gk<&j;`wFyI}1uj;vUh(@0CPH z94J|hb3*fPIJEZfH=M+NuE;5R|I@52r1aL85FpOK_5sb>*7hWR{g&bm+Al#PfA_ z9eRCEw1&?W{f{}zkAmu-zd#&!p> zGWI%7oD%!TRNR<6!TZCKpNa!hB)FOq`UmyD$$@L0xkVwd?@7f0i3_esNPa4=N4?-G zO6VV8Ob18O$|+e#p5Qnoa79>fD$Yrf;3`ViK|dCiwDY!uXPpjyD?ooFDAB&uZ)SOd zkAx*Z^;=kx;A52Bf9w9hHTT+4NZ6M89V9OJa6bIdh z!H2_=pZcw*NN^n`>(KonY3FT+<_P+P@v%(YX*2_@1FN>|@(wD_n!&y7VE;(gl>e{h$$ypOC*N%xxPMKJ^b_`S2W8#+D zF-lpFiA!q7AZ0x!_NX0al=>#Vs2xX?zQc~Q%RV=8b?x}MRU zX(uE9OCDL1paY_A_;)l{-eHra0ufL3T8;+A-CU&SDD|9Q;O-xWb1}NhgnpmH9 zjL+6$GqSGMVlk4-t;JqETdYOeWXDz{hfQ2jJB}!0>X~?DLa;8^Vo^aVDrx6z z2Yn3B_HbUDI_op%b^N!x%>3f}4&*W4Um(}3J_!5|eg`u1s~qR&+HbU=Yvvc7-B{lfx@Lay_jBs-_i@(8-@d8C-?q*CYONkKzgp8V z^Q$!-(?9&Zm3GwrR_x&KeA@rucTUqk)xQn=Mhm*8fB1VK>*4QuY!82@)BXqjZ2E`4 zqiFvV=<;_E>hd=Z(?9&ZiaPu~iuLigChG9FB-1~w)nodnH67DGt?7_k1#LYyK9YV0 zY(7Fd-DXxDWOvl7FDrx6zhuQaW?-sh& z4UO^c_CtuF9UAYdvED|%JFa_7a@9C*v(K}RyzAZ2c<(;H2h#JdbJYaz0lz1%drx=O zwcZw=XEOOWxS@&ORzDBv`JSsLdA{EZ*M&E_D(ppl-m4$}9gE4{HorI0`z>(Q6t7g* zKLG!Z#dY2ezYo#}EOFITZ>O$vAY%7l@9omRjdS1C=^8i0-|ss?uhZ48;_vlW;JWi* zSMm3Ho?&zu>4y0Gd}pM08SX0nK7S>yyN-4he~;%Lo^9su@m-M4HuLxQt8mRWvoCnB zJp8*9?2E2QXPem9)u>BtT{XjdQgdnuI5pFI+Al=7zEdYOWX8^^8G1^Sl@R0Z1RS z$yM{c3avj5`U|}8_*Wx+-2JXv=v8X|a(;1MaE`Fg3&AhW5zaL;N64>!;1}lz=bD)# zP!IcsbI|k)>luQ2*e{%ereCN(4EpRB&Oy^J z=)Xlo)JP4l+Irm{VMBVwQ zn|^vB<^uH>2l`6`{pHXfjQXcM>Z&1-Qy+&9hMe|wH~rre3hJe;=HCw@_eOwwoMW8B zW{!z<0bS=Y3+@a>Q@?!vkT zISX}jp3KHPq5hmee{P^ZANuqKoU5Gs>}$?-_ASe$&bWhmX)FI7A#=w4r_?6Sn?JR2 zGINS&+`}-gGoEqPaL8HD1)K%9M*z?MCY}!<=fqq!3UcnZ@mv5o|GR-6=ExXe{@cxC zA^Gn!kAvjDvpgP>|F&{=j>_eEZr9hdL%)GkZ)S%wdB)APiN1hq5$CMtGk8y*!nsQy z!nsSI!G7V~X1{1X%pcBi_6z4Y`-T0-eqsNyUt0V3Ra*P^RGR-g9Dl4c?*A$6XH8wb78ryV+Qa)DBHR3JknlqI0{!&l8ztmINKR9FR-*kpj z-an`((5(z~tDtMnP~w43WuQ|P=u|_;oT0Q6t_}LR4*i@uV+#5?5$M(ix^>Vs{gViEY6G3RK<5~A zO#f83Qjh7M)^tq&w5Fra0A>VdB9PeASL6w6(z8oiAY+xBWZi{1ji{^M_6zrCF>{>Tt&$`IRB%PR!)hu zbGAeG$GYH51QP8#oF}-J68K11a2+M^(IUaeD1qfn1lRgZ1QK=}j0;Xs0v}EYuB8M% zQZKlU5?IbeB(2(!w1atq6O^nYEV!1EbrcD%qhuYrKP0U>Drx6zN1M;Mmj(W3S>S(` z1^#DQ)BGx57Wj=8bj|!K59VGxn5UJ&Jgve!HS?=Hn5XeTw=&SJg07ig@j$0C(5VV^ zs-a`%S8Mf{`PG_^nP08xr2Nm?!2hfb{Lk9J|Ez85pX#-N|7k(j^iOrrcZr~%YlD8S zLqD7TsSf%%5$M(ix^>Vs{gViEY6G3RK<5~AO#if2kLjP*bWH!Wrt|;jGj97^0R6m- zck+5)-nZc=3X?rM75iB*+perFS{sr4(o6)RBp}%7r7-WSEpw&4LBPzCua`o zjUMXTy>D5+o_MFfC*IF@-Dp8}_O?Df@J@e^CamA#?~Zr)yEmovg)aUL&29az!u$Pw z@TPno^7D{?(M8dMi!a{R^iF->vckT|-&R;!h`cEBOuJpD+q+KBEazL!#qYYwEWYCJ zlsamdON@Wd$sIr5Yw5dysQq0)9Nz_u8}B9YjRoJ~52HWx$6c+l1BVq2r{g5v$){dH zett9&l{{n5HfPMxv)=Cw+;7ZSuf(ZFJ+J+L?Oj`JR7Dt`?VMh0RTg1 zxXCg1-*hyYO^69#9{%e5svdPg4RURdgH8IC57a2EhTC{F|#s@NRl!H)ltI$G+w481TgRoE-z~NswI|)4d8)v70=MHdo z8hGI_XT!it$2mI#{NprdXMvYTI2!?8xyac$;MG4lI}aTDi?a(r18cZl1df}uCZ)ZW z#z)}DIseLX=Yp%qW6NJ{UDbaLJY63cuBMn$+TYUnf~#U~m^wuDtz4z|W@86^4xr%C z_=a0N8iGed@Ms7gjlJCB(GWZuf=5H}Xq@C0kA~pU0FQ$>-@(nC?E~)L!r6Y{&^FEv z01x$ZHUuOd4+4qDLqOv3Fpzj8o{2}|nRq0giAUm@cqE>QN8*`yB%X;!;+c3{XZ=o6 zv$ve5jq7gm^x4KoJdNPHKCR7C&eQdgn>?A=oBUz#PnRf;iM@N^<0Fi@2v5*+7dXt= zBEoZwjb5^CdIxQr-XYtj7d~Xu3!kykd)t`UTbPmkUD|2J`CR*;qG!W|_xiofWFH({ zkM9zta|u|)N6Th1n!6y&G@pjPD?W|+5Ds=|i_d#|%wrYBK4MSRYQky;Y0_kBb;Q%Ov+~h#ef={?KuvZ-i7nr@F+jT93a5cO8HtF>qK1@ zSSipDdI2mch0Ug-8DGbt?^OZw`gdA(4~b?VtRs-zM= ze?t^`sXtWj?q;4Pxk6`mUW@CVVmjLqPw81tJ{OO6c11IKp+l5CnRL+WwcAWCNkq{S zo0HCV>V;{p*17GjIdj@w{(#%-_PSn36^r@qX&z5Ux|muVbH{TT&ujB%WISD6Z)aZ4 z7jkdu@uC|_SD-Q85Q_&x0WA~^G=}w%&zDSS;dsmo^l5=mIN4AiZ1BJIO6%)0X3cD$ zmhgq+Ax-n@0bTPq#uIwb>raLnlK!wBXpGf+13^8R)coNW!P4E5b$u#o7>n8@N?WQu z#j6=MI=LnJQJaWx-xZ<*M5NZ8%@uX#PAtoIFUv4@M|QD0rFEy6_)GF)fw>F0gjUp; zyQnWMTGm=79fw%&D&(ShEs+56kn6F<9nr3ImRh_~j#@2%#EhPWGmcZ#V!#yScF9H- zePdooOBeM*p#FX?{mlRD;Ot#fD7rnwIw5#L}N_h^x1L0}gyQE8InN6N^T zE{SxL<5c1-VMybYjwPFu<97mNS4=Y}{pC27_^j}kdg(93*w2W@>&@fKaV^m$45hz3 ze~D9|=OzB~J|a=xM?^E$XOLyW=aK1i$@XRclo)}oG(}1)D0`5}U*6|#^!vw!VSuzH zsjM1;ihSuaSV@kj7TYi{#PU8V`Ev?+GmvD`KQH|A!vB3?C!y@{Sc}>|f2o%!{mZ*) zx&K1p@0E@MH;dzUmdT~R#Pl8fCr50H_4n{EKrgRdsp(GPzeotFpOT&APxjw;Ad}n) iQwv?&WK_}s literal 0 HcmV?d00001 diff --git a/hsa/gfx1250/fmha_fwd_f16/fmha_bf16_pertokenBf16_hd64_128x256.co b/hsa/gfx1250/fmha_fwd_f16/fmha_bf16_pertokenBf16_hd64_128x256.co new file mode 100755 index 0000000000000000000000000000000000000000..c968a9a059e712ee18684b963ddaa0e6e1872cc1 GIT binary patch literal 72736 zcmeHw3wTx4nf5;WtOUdxB4UIC1W3X~3?YPY6#{bK5s^EHO#+0AT!pBJai5bI5P@1m zjMUOOsl_&0+DZS?DYiDzwzgWe7hAkf+nMRiq|>=D{cA@taQ^pQYk$c)5#*xn(f*U? zIr;Xx_PTs)t#^HUueJC3c64&_6gLzqQjYlxJHK&4!7)WqaGaT8LZ_!OY^2i>|2sRE zIO#agv=Xnqd^*TXWJkp1AA_Y#9nkLI(-)cmktj_6F&$KJoQpJcoJc!)K~SJbD;;%p zt_aQ#PnH{lY@DE6DbGdt*`$;7Qoq^%T(Ml&?<~s;v~g}kI$fUVbC!~2>GG&B|Jun_ zfnp-1?K>dqT%5~{g36AZCbhcEvLt} zujQ64tGKOfZAsa(W#yYUKe4I2Y?&j57%R7E0_x7^2x0RQyS-pPwS51_XrMInEQNF2t){pC~H>_WK=LhH9 z@z0iX=RaG{8RwkySvPd%8MCgDJ{e%5D!WhWm)=>Tdq@H+?%Y`Z#TCQyTT=}_>r|_) zRC(u>s^Gj*jXbYZW6vv9(RrnsbY7{ZomZ;j^GY@Ayi(0QuT%@qE7g+oN>zGZsg|Et zs#WKeYVCQY+IU{6D$Xm_mUBsUzGl7B6!HNVj3;wfZrX6$#*)qDE7z5;uP9ludQ188 z5?*C~!dryD)0Zz?w^}2VtzErxeaCOUFPuqUtT~dqu53%minV1GC3kGtv?jsFPOEL}0Quw-NTriu+~%GXaMShc*cpk!$N$SwK9 z3nzakcjKnjx0h9vx2n*lqw|&A%_}!9Xt#?VA;-PQk-nO}XbII~Mq5bM*C7UbCmaX|740~(2sM_D=-cfdY`HD>& z(1L7;%qMf(s=sm5243YiY${o|VR?DztHFIi1zNSd;@dzgm#x1BfaL`H%NKUBb8vk^8vd^iaslaOV{Z?#GIPaK@KkcO35_ zK<*Uj<9^w1Th7AA!Z_xSVf=ev;pf=K&N;?MpO}Kr>c765b!vjRvEY97R7sCr>Nw+_ zi=KY^GRNttdYvlDIfq?P6l(}2Lc0$#J$f z#T}>cHs{gC&53?G{^rDh2Rq|Nxm)$cO=0H?O?{n%js0*w%lTrXa(>g8=KMor_l~hA z+5R!-SB>4OE^fLK@g^Xzcbu|UOVLA=Wv-Fw%E%7sMt1CI2{{qBxUvZ`QV~s)MntzT0AbV(CqpI2THfJ*oLI^9vmZ0DbkcB z%FzFb(A31#T}XSfZ0gkAG3Pnt_dI?-#_t9EUc~Pu{C zSurCvV|U*~PHhYs^{LCQ&B@M*U7Yi!s+_u<`*LDAI~kwc^b(#nwcb>Y!SBE+V^cpaVJ6S!v|gpV0H<80b5;$$CEj`Ha#;l)|g1_L-^32K@Dfpw93jZ z7b!p13WHP^$CEMDzytQssx8!LMd17xwU)SKSV-<91LpTpah)VS`_FvmK{j zdZ*$l)p16bPPLU?I;GEK9_|IoP3zFfty1Ya9GW({z+uR=v`3L|tZBd%;NlhFReE|X zyF-WS8@oPMdt=u&JRpMe5#Wv;W7!v6P<>+$obS#LD+`j3mlO#7G1)RuJKa{`!0Ts_%-48G=9(E_dWc+-*|1}TFuvS ziE)~*z?$3RV&is?PmI^+Aaz(#qDaRnsw>ibza04p-zO#}>T?sbCxY)!HI7Y;)qIE4 zd^b1|3UKKq-#VFa zzJ}}1MHeNUzim`$PAK6tpzRZhS>Vt`uJUJLe1zwv`w4e!G?9oVVs4(#F&T3+eU3-U zr!fvGg&S_#i@uxo6F;2xobS#vpib{1-MiPL787`k zaM-+OP)8oWi{sY%jea^B8~p?Pp2P1&{Cq-<%*pF#Kwqodckl!M;a@4Dt#=RlhjH96;wY7O zAZ?^~0OcHDIj}KT$#EysHwVH4qF06cYOd~+bI|899A6daMqPPsUx0b;GE49DK<|Ao zjEr9PLcUMk_q{Lx$9-El&uh*lh}SK4vA$PXSaq-}YX`2o^Jmv)EtpfAwPSHj){g9& z;oxjx_TN>-=D$)ETkvXC?4~!XVg-k*=7Q71V_7@1XJ^L}Sy}gylj+%`V;vG>s!5ux zJ>zQTAxM#=la1}KlOLF7T5_7<1;eXm&!1g2J2rdg+0-HAywKFgscSl0olYC?L+YWR zM_avH-o%uuW2U3!MKTU}@9iKAmnnzmAaSWQ_ZZb5aPIpba8A-Q-_JWv9rJ}eSY%{< zsgZ}~1$jK=IQtm~@&MyN)-ev`!6)V!{XQs8;)yfIaT18rMeEU!Y2@J|Baf6C`IOcN z$0)0waUc&d4rBx4KpximoFR_Z=L~Ti=Z8(yDdH@BvFaX)e9O}<-=1Ki^K7sx^JkVeAr~angy;^_Chx$u6^_Otkk^X0E zN7g3WgfrTn7_Oq0-8okq;$rM(!iG?u);_e;>6Gz4q#kYd!`K8TD;U#89xO7lzSKxF zrg4n1vOm+v1B?S)$2gE?OrIf+9z$n{qsMe%Fs6+>Tx8^tQX|cn#xcsR&ouH7;{Z1> z4x|~=XNaTqIYS&7(__I|8Pk(+EMuB{mod#aGNyT4ANqqH6O3OM>e7>AT8}4;4~_|) zC*p8SFkHs8&Ly*B%I`^=#P|)N*;;?bKZ3eYf1M}dP=AKYnAZ6O`s?sOf61Tvn{vDKq)n21sK10$ ze+idy$$9X+DL=e-YHVutjKq)iJptAhc8=gZ!2q*%(Z=nzx$|k$%t}sESWs9sXThAR zIk7oA&*q**u8E76sHPh(;Ju3(PbG^FR;2~^-wC%P@3j(>aVphy<|_|o`TatPA}kIf;)tq-oU*D<9T@| zaHil3u?_^>NASfVr!R0{!MLC82iy;s`m@Xv(4i~By1x6M*5L$nxB~bJ!O)=_a5urw z;Y#2u1w)4n;0(dg;VR&(1Ve}Jz}*EyhaSK^fZ1ND)`9(#Y8_hn&?kgG2p!Fm{_2O> z9dNebY_6|_j^+sNA9DHw_ZK`cgkcjpIzaH%A!i`)K*869oI${Y1P=*0R|8)S+{y>& zpgAf11syae1w#kTNx{%Tb5byL(3}(u9W*BeLkG=C!O%f-61bHQDb%5r4=L2)QrL&5 z-FL9oaonGPd;Wym;!o7_q<@{3PIQu%ZgjGiDmq2W^ypMb=hDEwyo*&g^zUHl@At5Z z2z-s;6RwjBoGUotIzxbm2o8mvJm5UR;jl9lc&OmCu#*p*FStY483sHIm~$3AuAqa> z=@Zbw=JW~ZU~~Eebg(&n0y@~7J^>wUPM?4dHm6TO2jO%z);?0L!}qIGuS59z+naEW z@^N_tmyY{4YT4q?*7Br3M@uIMt_b19wiw4IU0DhVD#r0;4#29r)*x6Q#P;3DVx{il+9~$ z%H}mWW%HVxvUyET39oHVrAh~zQ>oHHI2F?zIqu`;P1`9h043a${ykbc(Hbq?=r%1? zbi0=6(R(3nPK`w$kCi@0e_jiGtzh)$IN))D(Vydi#|uV(76BItMt@EKo*)?gIT3iG zVD#s8z}ErWoU(aMPT9OBr)*x6Q#P;3DVx{il+9~$%H}mWCA_vdl`0)4MRpGk|9ZMt^$19ROuj``kCg)asTI9 zw)ih=dD8!dmQM7STDsArTB_(PTBb)|g|s;}7kxZe`W*c^4|twn^yhrw`GV1(3xF30 zMt?2@UMLv-xd?cXVD#r=;KhQ`pEm*D1Z;E4<~2EG^O~Hpc}-5)ye6k?UXxQcugNK! z*W{G&+U8WMbg(&*VS+12xL5V^slj%|MMYxEh#i2I^Xa?*ZnTff{dc4KUXX)C7aK0dvhjU1#ui zV6GXc$p+sG%ryfwwQb!Zc08=tv04V2I90%0$7;FS;A&v5W3^mk@IAm+5)U!B2AJzu zEkh072F!J=mSF~O2j)6f%W#A51?DlUfhLDnr&se|xg5BetyZKI}}wt4`V zbAII+ycd{res#UU4+3+}uWm4SA28?q>PCYf0_L1w%`vzZm~(zL&)|oFIps z{S!f0OTftofXT_0fRlTH$;p<0lMe!ulPv)!_W_fWEdeJV0wyP00#4Qflann0Cm#kT zCtJWtS;tDH4#J02>hMvmV^v;g*0Izg)8BUkbH1-`GS~;^d|%yca1@yHeN|#`44CtM zRc3G%Fz5SfnZeb-obRi0gYN<6d|$2H>r4vPu`1sSCvp02V4KrEu+8Zxu+8Zhu+8Zz zV4Ks`z&5Av0Txd0m}J&oQmMm>xGr!lEtNW4^x{Xbj&)H`5A-qTxoWlPb8?FFeYM75 za*FePwa#F2it~N7!C-QV^L=%z!Q>R@`)aenNbPPDbDxR9m$*;%rN0K`aFr( zz)8FYPU1Ci60d=icnzGyYhdBE&8bxBU~?)}ItZs?`riCJ*Rd>4ah|L0G<{A^alWta zGMJp=d|%ydFgeBfzKR-5PI11ksthKlINw+I7)(xazOS|!OippWukKCeR1&Yz=SjQ< zPU1Ci60d=icnzGyYv3ea0}HQhPNhl*n^URMK{)lH*0C&3ah|L0Gks1@alWs18B9)b zzOO!QFgeBfzWR*8H&kvDbDxRg9ejRobRiL3?`>I-&YSOb1I3~=<_6A11Iqs zIEmN5NxTM5;x%v*uYrZvHm6dhgUzW_=^&hXImn7;pwe@bKeA!WzYn0~~q zUDBC;d{wt}rl0yi_jIPe;S2fcOuyjkh3QP+XR=8@aDhobwA`d0b%#kmVVg-m{b7^- z#>Y(hMc*>%`_21iu!rtg2_8@ld8%8c$KZZ*1(uQIw%eZc5`!xy@x zGyPS=P5M4lP5OcJO!}cKP5M!{ne-FxHR-26WYXXGWs|&W*<|Jv)Q$0gn$B`+)AJFfe==E5H7yL+4%+!nUvmC8kju{W>d(Z6{K zd-inPt*tz-`28E2r%YdS;eQNfo0+gBuT&u2aipt7x`%c6;#TgQYdYzVb4^!(`o)Kg zzoxNZd(*>1hFnwG2prCho~#6hbnq@}%q{Srz_^XMS^hlqJ7oeKr{w#!@$lFv>mNrw zx_brKJ86!UGvGzENk;c@q2GOIUT2Q|hp}&rb$%FeyLegtd%0hGy&!u*WGQs)C%kXgVHZ=|1t;~X6K^hWyw za6ACVy}YshARG@u+}_?ee=v>*BRtb9@^f*Vi{n1tL_ZJ5c{uLtP4e?`oR9eZyeWPG zjtdZ;sefukZ%>rk+UW4fai4kHxR_a(z=@d$UMZV})Q{Q9p9$sNk-{v^m$JDnu&h|0&ZH}{jOnr~V**>Pe&2hF5`}UKpKl%1a z)?dcJIB;bgI5p6l?HA#=2*-oGx&A~PPXzC-_U8MOa6Ad&gT00R6dX^%@ipFJe;SUb z;W*b@;?KbG48$MeeabJ!aWTU4yi$KAj%VU{sJGOgh2vQ`&i9u4vvE8d@rQXU{JA)u zi|_((l|LWH^Km@fyTxCKe(FU_%iiujAkY+E^8RD={oioI7La%>o@lA`X z7A;gwU&;D{weK>}HQ4Xb5&Qm@VE+sE2T7ZAuSy7YT#j(=>v6FE&sl8tHR0*xi;-*H z$VhLapJ~Dl7a4gZ;;ef$GR9S-yiI;R(sS<(_ZK@8v*SHbdB!zLv^VoCPa~}5hP%pGweOKh$^=enq zR#7j69im>44Oie?w;`^g?Mg#>)(iU-d(Ok2ChCPahg(oa#&9=6n`ZCN>4vl!!(2uC zb_i+p{v+%sdg4%9*|vV|%eM9UD*8>gHFSRh`;Me+TR*)Y2u^P3%f!}cHqx?e_D2GH z51pXiXIu6h+~?_bWqRzlJXP6tdB+i7m&g9Yp3Giohs^6-HP+kW_r`JGsjj-#+lu?Y zw*GRkH!2OCI<%34Jym@XZ@?T^P4sGf@?_8jZsa;|yMNNo5BIqJ5O2@|S55ME=xfZCz}-i< ziq|6E>q@)NL%mKSA99%2Ch|W6amia=pRU3&`AL4WZ8P5vN66>y2xtAtTeef*cf*>m zJrK^eV7u@dVe`e-!7lToDvx&HFIgVv3TweZ?!{|wD=Ii9#a65}c92{ce6bsu?Ctb3 zP1wOABkQx!Zi8=f)f8{HwiO(c+{>qJg^6>ByvPMFrh50c+KOzX%Pn!$G;fc#71T%X z`wMIZ+d1n*I6`jHR`5Lb_ZnM)d@nxiJ{0QJJTI`(-E`kb7ltzf;dC)n5u z6Xzgpm~hv&739SL@Pa(1tzez7huGMPpln+|wh78?E!)@~E@rM=MQq7S(Tkv4a<;vyg7F!hetx6 z8@)&UL5MeUv#Vx%2mJ)>kK{+2Li;FoiZ+G)JP16bt)jgY+r<7G41Vb`fp8fE{3dfmG9`a!u(iTeF(bm{}v&;W;mrc8qsWqwjFlT(92eJxvYL$~{e~iSrcucr5yOp7(@581cv6<*NDKA#Im9{&^2G z5BD$*;$ytlv+numD{_?OA?y&#gKXe+NO<}<`iQ#nUZ#U(p9;{Q+KlWqydII+@=c>;k^qaQRuxGq~k@CnF(aA2GHb}QM^h@A= zd+|Dgdz7)zYh0BZS>S!mzXokGF6ydTaIqPmWBKP~eKan<}9bulcp(Ab2T-Y9Raek2RLgdT)l-7Je>URN)^g0y#@PI`^|bZ+OX)+q^KsILwrDr_ zZSIGTy?Frl4|!<2NxLu)ft>OgS2@nHH?P9HE)x>x>&X*0;72<8l{RS~Z9s7E#CV8* zh`eLJb`N*-dxSgr`N(hDgRbJdiZ+1vO0-Euh>tx&`+2RS-Qu;3k}$cHy}YeT=bR09^bz%t0V$#@S9m{(;>pMEI;PV%`Bc z`^!N(*qV{RbHCy$&aI@6Xun1weE#2J4g$IG8?G7yx%gYI3a6=b+AqV4F1>g!^c$?l zcYh`v`A10gY&h~;;^)JWf1!LS9HAYPHlrQONXtua=DDATBmc%UN5heSCw`sw#L62s z>^~X)_u&X_7q6G(JMA<1PWwxq(vC^~v}3e)Y#YrL*dp38juo~K#|qnr9A|s59oZi5 zz5RBF*HBiMuZJUVK(>4{9C?fQ$#CRXl-~_UI=|f~optRJ)U_q3YnPy|T~JrrvG!a& zo!;&B{7dOb+xBtSk@jvZo^vhv&epVRA(y}KNZL5awQqhqZIXWe^+wucNIol@0?B7> z#gMyuj7h&9a^KZAr89lWGk;J_zw+mQQcS<$AC4=gzvI8V==9Ohl{)>sm-R?v`p@(k zmd5lC4;_)l^kvVQ^s8Sr>2LjKlm5>CG3l$)O#1t-H0dA6Ht9b*!lYkz^bfk+&2Jx9 z)c2k%uGHl`IDA+dIbDD-K8qg?$+{LovaT$fb*1inPDkBYSL)8XGB4JZ=@)$cWsG~) z^%ceRD}VlmV)_mLV06FZzm4wE(0jV>_g;3nuKQ>DT$#r7i@s^nm;K13Uwzc1zx7{D z`a9n?>8m=J^!Ht1(m&AOq`&Femvy;o{`n2v)_0}-k1l6d_T_2jy8hQ)M{}?6*Ih>$ zk9^01oa8$e#hu|VF4jE3(1Gt_%@+(E_%7B0 z!O(&4Vl5O59r!NRBEiss?_w<$3?29`)=j{&_l16iQl*3JeMzMbvKK0?Psmx0wplLy z)i30f1D6YiKb;l8D+I%z&Pw2wg5ghR74Ry-@TapHc(q{o)42us7QyhRvj%t#a4R36 zgXX047j)2^6bv0SCj~3>`El1w#kTN#IsKq)>-eKBQ0wvlnU$ zeEMt+eEL)dK7IVar%%kEq~GnW_Ki=U8sGTz+3qL#^tsb@)}nvcN`K$wI_rSf3BKEP z)&s8>jJH@e0B;an%{?=JW~Z zU~~Eebg(&n0y@~7J^>wUPM?4dHm6TO2b5b!go$oI)MU9`r4NPoJ%U zPoK)br;i`_^ojY#r%yGW|Ik-P4c3V%w`1)Wf8x^z^{7A}S4f|uKW_uRO)&cNcHr9u zqd)HezC$qja|`em!RXIBf$tQI{@e<@RWSPVF5tU>ZBE&|CZ}v(lT$XY$tj!HtyHppR>$&(WXTfVT-oe{KifE*SlJFYvvB z(VshjcL+v*-Uoc2VD#rs;GKfepSyr}0o$Cic}-5)ye6k?UXxQcugNK!*W{GVYjVow zH8~}`wmFq59c)geN(bT8)0!(=1D`&XflnVl@aYrtf1uA-`^KkFjcGfME3JUf{ig(Vq_jKL~7d z%H}mWW%HVxvUyET*}NvFY+jR7Hm}Jko7d!&@Y?27s&ue9l`0*CQ)VyJ*1)GvW#H4t z4}AK>eB;xn+CLiDDdW>;yPxFK2b|i6KHevNj{bZI_#wgQ&syMG!RXJ2fgcu({@f3| zUoiUfv%sGfjQ)HC_z}VA&(8sW4%p_D&1-VX<~2EG^O~Hpc}-5)ye6k?UXxQcugNLl zwauwi>0omzRXTjqeflf`r___demwYC;hKT^POu*jm}>^=yTN`uV6GXcCWGnIhieAv z8H4H5hieAv`v%je57!LTvj)?r57!LTkJ{EP=*wmB>sT$poC=bnSS`VxLi+UKI#x@tr;t8z(5DaQ{OSdR>C=aEe)W>U^y$Mnzxt`c^y$Mnzxuhs^y$Mn zzxsv2^y$MnzdC9#efn_DuU>8A1AW<$lPv)!>C=atYza6?pFZSdOTbC`^dTo(0#4GW z4>{QqaFRZK$jO#~ll19BPPPP`q)#7mvIU$JKBQ6y;X^8Q_^8&g#$x~OTC}Zt-Sqdh zz?|=^Hw_*K%=y0hdxOUVbH1;BZEz7V=lkj(44weY`M&xmgC_!WzOVk-;Ol@n-&g4>~!;d9M1M>2q?5^L_PugUKn*_tk$GOippWul~zma*FeP_1^}QQ=IRs|7$Qg z#reK^+hB5v^L_O{$(%~!HTpb>*T6}<22SENa1yV9lXwlB#A{&Twauwi>0omzRXPZ# zV!`~dO3QOx$FexZd9Hfb^f@`j`M!G3U~-D{eRaZMa*FePl`xo`;(T9)%=b0ODbDv* z*kE#s^L>?OFgeBfzUuIxBfiHtK9hJ2oWyJ3Bwhn2@ftXZ*T6}<22SENu<+XERH}5a zIh86Mgi{}C9n0br=egalWrE zH<+B_d|zE*FgeBfzPd7*Q%Sr=pC|DeIEmN5NxTM5;x%v*uYr?z4J^F2Ih86MY)++0 z2jSGyft@lwea>+m%i`2-_VHDw&&esy_f-#r$tlkFRZoM-DbDv*Z-dDx&i7RxgUKn* z_fFB@Tf<7J6-iV-3@%NkMCd+#Lt?7&_s{wu)6oWbQ2HnI3Jp{Ywh4tlcs?ZVTW0Uq-Q$Tqi*1%y`Mlj=QQ~< zocU!2c`@Gwcvn@|W#H1@PvG5Qo@<}KeEX4cbzPueg8L;-Qx3yfPN`K+J>EBEIdv!} z!9B#MD98T3F5|PTdX%NVwHtholJCh6M?VyxAHIfqi|+Hj#=XVHe+%1&K3odc7#}XJ z@h$zII8A)MYbf#G4Rqd5o!>om{^GOcUHEK~wqf~=h)*9bJQhDL3HWgleNQ59@#)gt zx%`#*OC7W5*QE=NAAYI1KYhE%G2g$Iy!o!Rg!7$2hBr)qKc|mO2d4*^eGIlw7>4OR z4CY)teTfL4>!$~NHgOJ}&UcJ(n&q}~8fn1k{q+4JJg&!B(>%sG@Zo^%Yo^r1ucO4d zgYA9K2&Wq`HZ`ZM{3RdOh52R%dC`ZB@VOpiQ}g*W`HOEG;dBFZ)to*>IYmJ^)D?BC zV>!a-29%@ue1>v_&qq*>j3=A#<*}@)uYud-2X*K7T~GI2SKa5-H~7vqxi9r;|IKR$ z+*tyj9ZTT9<5Q5If_yiyUo6J~PfHPAitwe7OX1sNIplKSCEgLAaiu)QU4e5eaPDUB z+x{vXufp-Cyr=XYO3|IXyanfOL0%6 z?GV^?)6UIt?oX64upeWDW1!UJci8knbDaAVO}nE^>`xTF*J0dhzMn=fwgL5GJ)!se z^b-E>$5__<56ZQ5vvm;u*FjIs|I?IvruLBWP=|8$csN73G9Kz_yTJP`X!kAXqZQtZ z{#G1sMZd1}e&T;E;416mfajGsSBZG5yq{^`0EV;2;@nrr>w)GE+Z=HXkDKG%XJ~la z9Ou5mcM(t9VdOavoHjg0-(sJk>v_6*+4VS`zGl3=Uq4$PTMu3jKMs1d zUh7taFXkKXbw0S@gA1#@m*Jm6?;k$MZ@hmV<2i_>B)Hj3pSto2*+hPTae+9$CI4bV|+ zg5mP9IBk^JgnHNvZ4*ph2Mwp2=zh@F z$u3)LK?C%oEjZWuiw$T%dD;e?p**nxM`-V11Gb|twxciBd2jeTaJ&P@>%F&HZ2;>* z8?Y1ScB20_c)#*@<9IjXZS;Plv~`nYL+;lk28!n&afQ zX^UfVa@n*+bDUf@b#IQdElu5<<7`V)*JE+E9c_%wH(O6zr$4tIv@stCJ?3`#LKFKh z!g~HR_b zALz<_xnC4qFEw$Vx*2gk)#R$J-XB~2qHy1N>9cO+F7Hp;FADW(??Y+sSKSCX&%M1o z4&F}?egdoMVA-s_x%*wq4fK%s`TFROK`mON3Oct zJFfi{NPmeBl;tnE5#MX^OA&AR3;6J(cT)R3F?^UE;U3>IoLlizSH(OBZ!`^jaTWXz zRCzAGVcXV5+}pbp@mBr9Rn?w~k|(!d|M@*$dX)QmTl2%d-erh)%d4)c@jBwWxZ9f+ z0CRsaZ5M5jv^(t&eGHJpv}LqE3lW#RrTtojWAc;yX1|ug*8usv7~!lxdCPWMiv8&1 z>rDt}Td-YdUu?eEI@o1?ROQi*{3Xkytr!drp2~+(0mnt)2f)eFwxW*yDb`{y`8Kap zG}D9~EHbiwIofUQo37gKT@U_Mx;Mytvo9Br5*a%8_pUuU)mn>l~%O zHR{vehthKF=O;I5D|jA0l#H!FzOV=TX{X3ba+G=@>;UzG)V>B*pwG!!+6vYSK9r2D zFmVpjh6#6VTR~o|0x!s8+6vYQK9r2D2+Af8M8EcB+j`Mf=(dLL+ExT*lar#KwiUCC zzcpJY+6tB}z5twaESt7Mx2xESQ&YgQ9B@zw$j-)p~=qZK4fm^`nINuoK#k z(kAp3Im+@7c8KLcHt<>`Jf*J^>e}9qQU&^x9HouodH7M%Hj4TnkHfT&!rKbe?e_mR zJf%&d{_vyJQ~OamM}6p@MB8fEH0?(zD35#*o$Rt{hjd#*KkY~99Lu9$5|$_TEmFVJ zl}Fp7+l$u{_))5aUVbPXdDzR1u0xw(W2V~g<-w14kZ*8*;v-)51>wkNz5M8U#EXT) z>JhI%+ZSnn-k-z}Z~JB8$mhI5eLt}ioZ)q9H~OA7#q{;d`W}U4-~TS|Wgz#QNcN*7 zZ9+T3JoiFJ-pA~N?I9OygM9ZRU*4;<=8Jn3=KBcp-2gqSJA}0#C0^IW{_uW9@~2(7 z9r3p!KJAaJozRAg9omBPY$I}?W3n2&Wc${DW8}{E-{D>da>sw*z8!KWxI`PY8#ajX z?+@bd3F7ZXeA-%$8QK_|f3}{sUVkZlXm>tN`p^~?gWvj|XqI{Yun}!nb75HB3c3B# zuyPQtpFx=O$$pg3ue3>XX#;}$r%V(75P8Rb6+cRwkl&8W!`hD$Z9uCZCB)~m2wv-G zw|Fh%I4MFnpBI#x^T~dc(8sh*)ZO-@#6G315kE>i4?jxA$D7Gd`%&U`kYkLtl^iCY z#E%l|0zXQ|KAlE4wuye0aT?udr&ve$QPTe2*d~a7(AJNBltd5ODcujyRr^soO}S@k z6WS}5EB8)pyALXt_DZ*5V6Qf#J$7ChR^msg0(f_iFupgk?$!Hyvz>zc1G{xQ!uRwG zt2-d~_7Bp*)@%XZcXe3t{z&?W_UlfB*A5A*t&sbNh1Fe^Wkv>Q0N2w=pPr>kE)C;(mVE8b?gMg#G1;d9?CUB-;_%P}N+($5c z81)72D;PeE`T_R?ZuOfJ3h8gkcNM=j(BTTixk4~>=my+PFm$*Q_)5XhApZg zpgAcRI%rM`h7OvOf}w-vq+sZvIVl)AXifsR@*#yfwDKW^I($^$luvutd{cg~^!Iz_ zoATEPK4HEopDQ?Fz9~OMaL9anJ5O-fe0zJS;575??R>!<%(u6P0k`hah7LBTPe6xk za85XV0y@~7J^>wUPM?4dHm6TO2bm_&z>ks>ovYu`ckv8}{z(+pi7C^u_KQF9ql9`Cp#KV_&(WX5frkr5e-;84 z3Pyj903IP2{W%hNq+s;tDBw|o(VwG%M+-)OjsYG6Y;$Tb!i3l46keV@D!eAAa)E`{ ziW(;793X;77@c{xYyr zM*}-${3xYIll&-wQ*+VBbEVJGpYwp{2}Xa;2c9n&{kZ^mfnfCKLg0mh(VvTe7YRmx zE(TsK82xz@@J+xrr)*x6Q#P;3DVx{il+9~$%H}mWW%HVxvUyET39oHVrAh~zQ>oJ7 zlkP_;0#2z>W*w^ver>pBpvD+n4a_wIb*;ho0CUYijW?Knl(=S~CKyaVN?bEg*BMMd zN?bEglMSXHC9WB$scq{P^dHm}VJ!nq9Qsk>I#$co2Gfrc*RfizF_?aoxQ^8_#9;bS z;yPB#P=o15iR)M`!wjY$C9Y$&3^$m5l(>%7GNR3IPAYYfb&FK$AnRE4qm+fVQPWLZ z(T@`6{K_+!ev~-pSJxX%KT4eQs~ZfaA0^KD)r|(zj}qtnYL3D5qr^GCnrATmC~?lO z7PRq!{)5QLmVlG=qeM=&1e~NFC33PQ;3WMhk&`U}C+SCtoNNg=Nk2;DWJ|zF`cWb$ zTLMnfj}kfA0#3?0Rw{K6KBQ8Ik7^yO5__fxqixk9)8BUkbH1-`GS~;^d|%yca1@yH zeN|#`44CtMRc3G%Fz5SfnZeb-obRi0gYN<6d|$0>^LbN=J<~R)?*_Ix?E~AKjsn}9 zjse@8t^&3>T@7q=`W|56wCvGNr4F)3JC!<^eg7YF9V^)PPfl^3t5%ynC#N{yS8EI= zr#RnN>kKBRINw(r3?`>I-&eO9OippWuQnS@PI11kZZnvi;(TA-k<6(iUZc;GcnzGy zYv3ea11IqsIEmN5NxTLYUfY~Xl@2ziQl*1%Di+KStF&wh{DGhJjX!WFTBF0=sPPA` zqQ)P1dQ|%Zw>ZUluDa9oIXT7ozPig`a*FePb+^If6zBUYYA`v)`M#<$n4IE#U)^Ic zImP+D+Ga30#reLvHI-&YSBOippW zuO2d(oZ@_6J)F#`BwnM>lXwlB#B1OrUIQob8aRp9z)8FY7GB$&N|g>ar&6VZaO&y6 zPMy~}mc^-J_VH&;pOaIZ@2k%lOippWuRd=uImP+DiW^K$alWsR@ z`|2@+$tlkF)t8ewmBefGc@nRIlXwlB#B1OrUIQob8aRp9z`|>rQ>oIy=2WV55Keul zbu5chbJ@p_n?5I}<`I9zU~-D^>u^EDXwFwZx~EYaUDy2 z(_nIn>sabr$(%~!HTpb>*T6}<22SENa1yV9lXwlB#A{&Twauwi>0omzRXT|OB$3)L z!jss)9PE#6^`rFUqrIKw#%~ATiR3$2#M)?~dPaye%kG_Sx@$-0ZV2LOi^)e5mHa{}^0q(lk&a>@f9_ z^s?W+?(ty1{b}-dg8VXryqK@-wcr1E;CJH``HSBd*=Jt|{j|@G)09&bltW!n&jyww zd+h5_j^1N`igIkfGP1|M9%X4iN?F_=_hcjeDBXsBsIdJgA_-Wl*M5|64xCN)qh#XOm6~&> z_oIZdseMRT`M3H}3i8bi@=Effgt4jp$ebpB@jE1(Zh)?u)2Aq>C@6=zqK?Ualu(Z5 z^BKw!J|96jGM;R{YmQHIoK{~-$oGuCl&JGr`cjI(w?hPeA9_Ldf^7ArL>mTfXCgck z;e8?d!UseaWEOD58`vN?AoALsH> zMn7+g@ul=Sd?}eYPZi)?0n%l8)BHjl7vebEo6+hoW&i828}w_3__W7e5vMDBD0PGE zMqf$@r~NH8=NmE*o`LZ0klkOhd?`WK4j7lr%d~TIysI~$)o;pvj1%oQ#pHL`^g(m{ z3U82a+8t$jgIoQk)M4yu&YwmvC(tYNGJ-E19X4AenSJqAuw?wQ)7 z)yAV-Jtodju8fI#+ArAkk!aVE=%XC3*dLAK(dgIy-pp1%D*I`l;2&iy&W%O90p2X_ zr^ImfSo}&ax6PLl+Z=HXkDKEeUY>7w+Z?~j%Ww0gggog>$?zC`3%_#j^QFYR5XbQO zSiFZ<81T6{&hf$hG1`|B^J?XDbDZPDaJMksooJj2~%agGn!4`?{s9OpRU zo;{l%c0Es5FS{P6)7Ol*_v>ftW9z|d;>SS`+6wI_=Xc=CG~=6K9JnwJTo~xh_A^b` z!6GB;i*T+8=LUImjW4C&!IzRbcW5HcO$5KL_U8MOa6Ad|273!zZA1O<(3cp`EC=uQ z>^JtKIZhkJau9ZaK}WF(hRes|v{7=8R}Y(^ZGy?;py70LoVLo; zsX0ztCHHm>uxr{ToTENs3+iF(v@fNgJX++F1~i~NZ3E6wp4fmR zwD+(9Q_vSv&==Qui~VUho`&OGZ%L~SU_EF9X5icm^xqKgQ+_dyixDr+D{b?o#QYF% zCeF>oxuM=te-@5s;W*!0-sVe*`C)9&#<|(ZZ@5Q;7iHy@mQSpjJ@#Y>0nXHe zjdABoja_1j=Zm%M9LP+kcwu5uEP)e?b}ZVqsA|!!zRtnMH0ROAEN5F|-?a3m`{zlL zznYT7!m1eZU6`So7G^Bmzc8ViFyC+z3m5LE>=wJYsRQ&}Saq-}Ye&|$togHRvvy?H z3`ZHU`L9&P3JzD5V;(gZ8tOA`2A_faL9@O_l~9k zmpxWH;IgdRtkTk0c3D|=u@%K1TK%y|~S zA2ueOA2p&%u|8G(clA&7uXXaUPooODC$egtE|~ZA&#J<46&y=r4)|vTW+q-7)7m}dUNA`NgY3CNf{8`eTRT|7kI0un5r}OK2 zI{t|6l-x%x{>E(oVzwVK+ke>U{8Z7$!>RmIoz4%{>HJQ$_cO)4%;NjY_Vs1^^rDXi z@#FQE^w&fi{c+Zhw)^9(pXMQKneE5Q_FF}6ihnBGFBSPHK96i)N6bh38ri;#n2-1@ zvi%h?pHulKVm_zxM?}A8^!p+HrEGsvY-jNsW&3O*H^pbwU($aNx%o-al|22V=qj8g zcf=o(?f;0}IhCKIN`#-v&klW%oXW4x2l>%qUbf$yGx^71{PzBE$V>6BWcyQMKb*=> z(Z|Ur47vSD)}MU;B8d0T{IteNkpKAAPyztWCn34 z8Hepy6vUxCRX$oDJS&7g4GyIJOg7Wtr?fse<{EwxB=R{_YH;iGQms!AhxsrL(<4m- zCGcUbPY|alh*KKGp=2BfYcrg8VJ_b~50kmnq-m9vvPN0=WOZ--Z3jHRlJAu4e{yMW zz5a`H=~!FjF`xD7Z{XnB?E-vnL9Zh}9>l}BdibSd7@wOmKA(rmGa^2-)%hXpU95Gr zPs?YP?ZZUBhUZPWd`>9()jx0Y<2liLL2o2le1HGtsXh2;=iD(a(-2`q^Qk zpB=`w%|6>X?b_t|sq_P%>z{HS9K$-ohndfEY~VSc9vhJMb5uTq1U6#>$9il)f-?u? zxgjO685=5mlm>Aq8Ap!|JY&&g15%F-gOBjp5U?2=I7Z&} z?VsP2A{_Y~)?*`xWmqt%YBF-lqAA zbHXn^8`Jy;4~1WR&ZFbuobanI2;;LN;g=mx_+^I)zw9v4uis&R~+v4}eao?$~y4Kt3b8eG$ zy&D&g1FtW5ajWc|A%ob_b=nSWp0-?9EZ z5Y9TY{v3lJXZ<+-KhFBeT!Gvfe6bsu?Ctck(7uCja@7=Xx8_SO_%hYI-_J&PZi%a= zd3!Wp^1zqr-d;Zk;d#qkHN)GdIhc=pJg?U8kMR7Jt}6ES>wF53&-LCT{s4p*tZ~&$ z?|{yy5c%BT{f$2m;e{JqHOs5h`HVz9H+qlygAhJ)v#Vx%2Q?qbqb&GpCFeO_$a8X@ z?VJPdknt!q31cR3#DtC?#& zyiRcJ(iU)x>Gql5N1YVjHln*ajb^FW$e_ zx^Z0nwbo6~8=YVt3#sR^2G?gAZ04~z#vE{e5hUh!2TBdDqXahd*dUG*#K{ceP%;kN zu_%Z`d8&N0KKQP#p2tG!d91-lG7UEKSRCtlEF|)2{Z6lZlUL7UgE&qQhw1e^)?hP_ z4dN69ahRTQD3R~svtHvF70hu*wap>UcOOb4Y$; zm|^^Ou*@Otcru5y!(23P)Te&QQ-@q09_`O>_$3;C_=eR*V_K&&kp<8ukqlUxO$BTQjZOT>-oK2U^6yw49@It|BYRK zYnQy&TnyqkCam>)zs!em^wAkOLXNm}Ds6!3ph8^8E{TJnM4!WDk;Te6z};Gyt~-$m8& za8CHeZ?H0q-&PfV+3|#5c9`(X4r}HA>VW^N+w|i%GBsauPW0nmH(>S!FAjYY6d({yxL!f4Sv<_={Wt|D59^*ZYckGJL zmRX~(_7XUDEo7aRA1@2)XE-%T5z7_Zamch|;^Gw-gAAHSR7!FlAj$03BtTMZ0^V4f+0B7W9z!*Cr+=>hlw`C+^7L# zkMF|seveOm#P9Jb$9|8mrnN0j^DbZK=fHzllLP)K{4)CFrMuXtmVte0jsnK>U~)4f zoax_Rw7scwpUm{*INurPcis>DcAxYg?}Ws8zI)p7$9!L@+tD~U>~!4T6u3+t%j`(%*LpUFPzN1pZJ9bVR z-l;R^HtLyh#y?U%8_xJG<@4c;f1!LSoWW<;>gVB%;MsKspIxih!x?;bt^Pio@f*w~ ze)IjX^AC{!_@l7%kC4B8A?*AU*!fq;|NJ%g z+;g3#+tG(nzsVD}H$8kJaX|ulCs4lxzEiJSY@FlIn~b(W9Xe;vR42o^QN0HYb?kh@ zpQ4@Ty@TKJ=({-o-t)~JTC(4{>_qnQ%U*-7Eng33yg~WRaK>AdPlhvoMfqLU0WxTV zmY@w= z!XA3fggx|@;~anc{b7mY?++V?{N8=g_2ZZBXv!PM^98RRz`NV_b04S3e8ZQ|GUb@h zmwE5p1J6f8&ab{TH1GZIXQ%T$ZG0!810}wX(2)|~MYwqiSn|L zb0OvB=KXH5V|-t|ve7(`!t@Aw#{c*C|6v>8C3O9_s*{ly1~?;-X@UG>a6S;u@*w=} zlZg#HYzR)MApDmSF~B0m;~)*p9RaCAq4zGi*a_he2tOS34&~uE9O~^}n$}B?8|Rs) zp4;<3&rUDz_q{dueecBjPPs!iuPR$!zG?H273)@&l`LH`w6J7j`KF2uYs%M8Bv`e) zu%KjU{>Ux)!wYjaJ45c+uxZWajb+Qqhg7UyzjE2C^7TVDZdg{bW^Kv3@=Ys)<00!- z7v$xcELYsIyaX9J#jDq^Dc>|YbHeqLGK-5RW#$jd&CAWp9I&dQV&mq~Lx!weU9sx6 zrMb&CtQ#`>hU?Z1S-bZ3b=Pd%wBeTWWfi$NnK^9KvXM)d6$~F%HoRcis1fDEhYnq_ zyllj>rFoD;%Z3dfv0`Ll!N~lpXHA%U-L%P*MlT;aV%hMrvb^$PN#A%T`yEZyGlIEV_R3xyH|3v)n0}v}opp>!)8Q)&H=P?TyxkZ4UnAAdsujMfRVJ z9|>bxc8MgDaViqqIP@RKDa(@VWc*%=usBWwSTppwsk<3=idvStclE3g%WRWBcWP3aW;Y=@fu9f!}1qpEd zO!{jFhX-j{2mL4G=?W8ZV-PHMQrfR`5Izo)VUm7hkbYy3eos3)*{*o_l`;AU*|iEu z-=3)L(=Q6r)2iBk-w4XT#SX-=q!+n52y36dXEzfAYgb8sl1}6%oRz+_BR(CZFAW08 tQ^u3_6aH>Rn6zCGRg26kO$nITCjCjd5?F(SQ>8x?q+cfy1KB?P{{zDxHBA5j literal 0 HcmV?d00001 From 389e9f17e622dfce8ef1445d074f1812163613af Mon Sep 17 00:00:00 2001 From: tingchen Date: Wed, 27 May 2026 18:25:05 +0800 Subject: [PATCH 27/43] rename f16 to bf16 --- aiter/jit/optCompilerConfig.json | 2 +- csrc/py_itfs_cu/asm_fmha_fwd_with_sink.cu | 8 +++---- .../fmha_bf16_pertokenBf16_hd128_128x256.co | Bin .../fmha_bf16_pertokenBf16_hd64_128x256.co | Bin .../fmha_fwd_bf16.csv} | 0 ..._asm.py => test_fmha_fwd_with_sink_asm.py} | 22 +++++++++--------- 6 files changed, 16 insertions(+), 16 deletions(-) rename hsa/gfx1250/{fmha_fwd_f16 => fmha_fwd_bf16}/fmha_bf16_pertokenBf16_hd128_128x256.co (100%) rename hsa/gfx1250/{fmha_fwd_f16 => fmha_fwd_bf16}/fmha_bf16_pertokenBf16_hd64_128x256.co (100%) rename hsa/gfx1250/{fmha_fwd_f16/fmha_fwd_f16.csv => fmha_fwd_bf16/fmha_fwd_bf16.csv} (100%) rename op_tests/{test_fmha_fwd_f16_asm.py => test_fmha_fwd_with_sink_asm.py} (97%) diff --git a/aiter/jit/optCompilerConfig.json b/aiter/jit/optCompilerConfig.json index 410ca8d061..81849a45ed 100644 --- a/aiter/jit/optCompilerConfig.json +++ b/aiter/jit/optCompilerConfig.json @@ -1120,7 +1120,7 @@ "extra_include": [], "verbose": "False", "blob_gen_cmd": [ - "f'{AITER_META_DIR}/hsa/codegen.py -m fmha_fwd_f16 --output_dir {{}}'" + "f'{AITER_META_DIR}/hsa/codegen.py -m fmha_fwd_bf16 --output_dir {{}}'" ] }, "module_fmha_v3_fwd": { diff --git a/csrc/py_itfs_cu/asm_fmha_fwd_with_sink.cu b/csrc/py_itfs_cu/asm_fmha_fwd_with_sink.cu index a8d725cd8b..c057c8a48d 100644 --- a/csrc/py_itfs_cu/asm_fmha_fwd_with_sink.cu +++ b/csrc/py_itfs_cu/asm_fmha_fwd_with_sink.cu @@ -24,7 +24,7 @@ // the kernel never reads its contents. Pass a zero buffer. #include "aiter_tensor.h" #include "aiter_ctypes_error.h" -#include "asm_fmha_fwd_f16_configs.hpp" +#include "asm_fmha_fwd_bf16_configs.hpp" #include #include #include @@ -88,7 +88,7 @@ static_assert(sizeof(KernelArgs) == 0x84, // _brd (border) kernel variants which are a strict superset (handle aligned // + unaligned q_seq_len/kv_seq_len uniformly). The csv schema therefore has // no `border` column. -static std::string get_heuristic_kernel_fmha_fwd_f16(const std::string& dtype, +static std::string get_heuristic_kernel_fmha_fwd_bf16(const std::string& dtype, int hdim_q, int hdim_v, int mask_flag, @@ -283,10 +283,10 @@ AITER_CTYPES_DEFINE_ENTRYPOINT_VOID( // and unaligned q_seq_len/kv_seq_len uniformly (border path is a no-op // when sequences are aligned), so there's no runtime branch on alignment. const std::string dtype = "bf16"; - CFG* cfg_map = &cfg_fmha_fwd_f16; + CFG* cfg_map = &cfg_fmha_fwd_bf16; static SynchronizedCache impl_ptr_map; - const std::string kernel_key = get_heuristic_kernel_fmha_fwd_f16( + const std::string kernel_key = get_heuristic_kernel_fmha_fwd_bf16( dtype, qk_head_dim, v_head_dim, mask_flag, arch_id, cfg_map); auto it = cfg_map->find(kernel_key); AITER_CHECK(it != cfg_map->end(), diff --git a/hsa/gfx1250/fmha_fwd_f16/fmha_bf16_pertokenBf16_hd128_128x256.co b/hsa/gfx1250/fmha_fwd_bf16/fmha_bf16_pertokenBf16_hd128_128x256.co similarity index 100% rename from hsa/gfx1250/fmha_fwd_f16/fmha_bf16_pertokenBf16_hd128_128x256.co rename to hsa/gfx1250/fmha_fwd_bf16/fmha_bf16_pertokenBf16_hd128_128x256.co diff --git a/hsa/gfx1250/fmha_fwd_f16/fmha_bf16_pertokenBf16_hd64_128x256.co b/hsa/gfx1250/fmha_fwd_bf16/fmha_bf16_pertokenBf16_hd64_128x256.co similarity index 100% rename from hsa/gfx1250/fmha_fwd_f16/fmha_bf16_pertokenBf16_hd64_128x256.co rename to hsa/gfx1250/fmha_fwd_bf16/fmha_bf16_pertokenBf16_hd64_128x256.co diff --git a/hsa/gfx1250/fmha_fwd_f16/fmha_fwd_f16.csv b/hsa/gfx1250/fmha_fwd_bf16/fmha_fwd_bf16.csv similarity index 100% rename from hsa/gfx1250/fmha_fwd_f16/fmha_fwd_f16.csv rename to hsa/gfx1250/fmha_fwd_bf16/fmha_fwd_bf16.csv diff --git a/op_tests/test_fmha_fwd_f16_asm.py b/op_tests/test_fmha_fwd_with_sink_asm.py similarity index 97% rename from op_tests/test_fmha_fwd_f16_asm.py rename to op_tests/test_fmha_fwd_with_sink_asm.py index 39b210430a..8e42b129b9 100644 --- a/op_tests/test_fmha_fwd_f16_asm.py +++ b/op_tests/test_fmha_fwd_with_sink_asm.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: MIT # Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. -"""Correctness + performance tests for fmha_fwd_f16 (BF16 ASM, gfx1250). +"""Correctness + performance tests for fmha_fwd_with_sink_asm (BF16 ASM, gfx1250). Public API: aiter.flash_attn_func (preferred) Ops layer: aiter.fmha_fwd_with_sink_asm (low-level, ~v3 style) @@ -138,7 +138,7 @@ def _cmp(a: torch.Tensor, b: torch.Tensor, *, rtol=1e-2, atol=1e-2, msg: str = " On gfx1250 + ROCm 7.13 some bf16 element-wise GPU ops (isnan / isclose / contiguous) deadlock when invoked right after a custom ASM kernel. The - deadlock is unrelated to fmha_fwd_f16 itself (it has been reproduced with + deadlock is unrelated to fmha_fwd_with_sink_asm itself (it has been reproduced with pure-PyTorch programs). As a workaround we cast both tensors to fp32 on CPU before comparing -- this avoids triggering the buggy GPU bf16 path. """ @@ -339,14 +339,14 @@ def run_ref(q, k, v, *, is_causal: bool, sink: Optional[torch.Tensor] = None): (128, 8, 1, 130, 2048, 1), # D128 q-unaligned (128, 8, 1, 128, 2300, 1), # D128 kv-unaligned # ----- Large shapes aligned to run.sh perf_v4_d64 / perf_v4_d128 - - # Same memory pressure as test_fmha_fwd_f16_perf, batch=1 only + # Same memory pressure as test_fmha_fwd_with_sink_asm_perf, batch=1 only # because the reference path's fp32 attn matrix would otherwise # exceed device memory (D64 batch=2 sq=sk=8192 → 32 GB). (64, 64, 8, 8192, 8192, 1), # D64 perf-sized, aligned (128, 64, 4, 4096, 4096, 1), # D128 perf-sized, aligned ], ) -def test_fmha_fwd_f16_correctness(head_dim, hq, hk, sq, sk, batch, is_causal): +def test_fmha_fwd_with_sink_asm_correctness(head_dim, hq, hk, sq, sk, batch, is_causal): if get_gfx() not in ["gfx1250"]: return device = "cuda" @@ -440,7 +440,7 @@ def test_fmha_fwd_f16_correctness(head_dim, hq, hk, sq, sk, batch, is_causal): ) -def test_fmha_fwd_f16_ops_layer(): +def test_fmha_fwd_with_sink_asm_ops_layer(): """Direct ops-layer call: bshd qkv (sbhd memory layout), D64 + non-zero sink. Uses is_causal=True because only causal binaries are registered in the @@ -482,7 +482,7 @@ def test_fmha_fwd_f16_ops_layer(): _cmp(lse_asm, lse_ref, rtol=1e-2, atol=1e-2) -def test_fmha_fwd_f16_d64_requires_sink(): +def test_fmha_fwd_with_sink_asm_d64_requires_sink(): """Direct ops-layer call without sink on D64 must raise the C++ check. Note: when going through aiter.flash_attn_func, the dispatcher auto-fills @@ -519,7 +519,7 @@ def test_fmha_fwd_f16_d64_requires_sink(): @pytest.mark.parametrize("head_dim", [64, 128]) @pytest.mark.parametrize("layout", [0, 1, 2]) -def test_fmha_fwd_f16_layout(layout, head_dim): +def test_fmha_fwd_with_sink_asm_layout(layout, head_dim): if get_gfx() not in ["gfx1250"]: return device = "cuda" @@ -578,9 +578,9 @@ def test_fmha_fwd_f16_layout(layout, head_dim): @pytest.mark.parametrize("head_dim", [64, 128]) -# Only causal kernels are shipped (see test_fmha_fwd_f16_correctness comment). +# Only causal kernels are shipped (see test_fmha_fwd_with_sink_asm_correctness comment). @pytest.mark.parametrize("is_causal", [True]) -def test_fmha_fwd_f16_via_flash_attn_func(head_dim, is_causal): +def test_fmha_fwd_with_sink_asm_via_flash_attn_func(head_dim, is_causal): if get_gfx() not in ["gfx1250"]: return device = "cuda" @@ -690,9 +690,9 @@ def _make_qkv_perf(init: str, *, layout, sq, sk, batch, hq, hk, d, dtype, device @pytest.mark.parametrize("init", _PERF_INITS) @pytest.mark.parametrize("head_dim", [64, 128]) -# Only causal kernels are shipped (see test_fmha_fwd_f16_correctness comment). +# Only causal kernels are shipped (see test_fmha_fwd_with_sink_asm_correctness comment). @pytest.mark.parametrize("is_causal", [True]) -def test_fmha_fwd_f16_perf(head_dim, is_causal, init): +def test_fmha_fwd_with_sink_asm_perf(head_dim, is_causal, init): if get_gfx() not in ["gfx1250"]: return device = "cuda" From dcfbe7b0c8bf90427f8863f5b60734232961b880 Mon Sep 17 00:00:00 2001 From: Satya Nikhil Date: Sat, 30 May 2026 16:13:30 +0000 Subject: [PATCH 28/43] resolve issue pointed by copilot --- aiter/jit/optCompilerConfig.json | 2 +- aiter/jit/utils/cpp_extension.py | 69 +++++++---- aiter/ops/mha.py | 39 ++++-- csrc/py_itfs_cu/asm_fmha_fwd_with_sink.cu | 25 +++- op_tests/test_fmha_fwd_with_sink_asm.py | 140 +++++++++++++++++++--- 5 files changed, 217 insertions(+), 58 deletions(-) diff --git a/aiter/jit/optCompilerConfig.json b/aiter/jit/optCompilerConfig.json index 81849a45ed..de2c3aa640 100644 --- a/aiter/jit/optCompilerConfig.json +++ b/aiter/jit/optCompilerConfig.json @@ -1116,7 +1116,7 @@ "'-DENABLE_CK=0'" ], "flags_extra_hip": [], - "extra_ldflags": "['-L/opt/rocm/lib', '-Wl,-rpath,/opt/rocm/lib']", + "extra_ldflags": "None", "extra_include": [], "verbose": "False", "blob_gen_cmd": [ diff --git a/aiter/jit/utils/cpp_extension.py b/aiter/jit/utils/cpp_extension.py index cb403da2e6..738e2da5b2 100644 --- a/aiter/jit/utils/cpp_extension.py +++ b/aiter/jit/utils/cpp_extension.py @@ -92,32 +92,49 @@ def get_hip_version(): return output except Exception: pass - # Fallback: try /opt/rocm/bin/hipconfig directly - rocm_hipconfig = "/opt/rocm/bin/hipconfig" - if os.path.isfile(rocm_hipconfig): - try: - output = subprocess.check_output([rocm_hipconfig, "--version"], text=True) - return output - except Exception: - pass - # Fallback: read HIP version from header file - for ver_path in [ - "/opt/rocm/include/hip/hip_version.h", - "/opt/rocm/.info/version", - ]: - if os.path.isfile(ver_path): - with open(ver_path) as f: - content = f.read() - if "HIP_VERSION_MAJOR" in content: - import re - - major = re.search(r"HIP_VERSION_MAJOR\s+(\d+)", content) - minor = re.search(r"HIP_VERSION_MINOR\s+(\d+)", content) - patch = re.search(r"HIP_VERSION_PATCH\s+(\d+)", content) - if major and minor and patch: - return f"{major.group(1)}.{minor.group(1)}.{patch.group(1)}" - else: - return content.strip() + # The fallbacks below previously hard-coded /opt/rocm, so they never + # helped users whose ROCm lives elsewhere. Resolve the ROCm root the + # same way the rest of this module does (ROCM_HOME / ROCM_PATH env, then + # `which hipcc`, then /opt/rocm). NOTE: the module-level ROCM_HOME global + # is assigned *after* this function is first called (see bottom of file), + # so we must call _find_rocm_home() directly here rather than referencing + # the global. /opt/rocm is kept as a last-resort candidate so behavior on + # default installs is unchanged. + rocm_roots = [] + discovered = _find_rocm_home() + if discovered: + rocm_roots.append(discovered) + if "/opt/rocm" not in rocm_roots: + rocm_roots.append("/opt/rocm") + + # Fallback: try /bin/hipconfig for each candidate root. + for root in rocm_roots: + rocm_hipconfig = os.path.join(root, "bin", "hipconfig") + if os.path.isfile(rocm_hipconfig): + try: + output = subprocess.check_output( + [rocm_hipconfig, "--version"], text=True + ) + return output + except Exception: + pass + # Fallback: read HIP version from a header / info file under each root. + for root in rocm_roots: + for ver_rel in ["include/hip/hip_version.h", ".info/version"]: + ver_path = os.path.join(root, ver_rel) + if os.path.isfile(ver_path): + with open(ver_path) as f: + content = f.read() + if "HIP_VERSION_MAJOR" in content: + import re + + major = re.search(r"HIP_VERSION_MAJOR\s+(\d+)", content) + minor = re.search(r"HIP_VERSION_MINOR\s+(\d+)", content) + patch = re.search(r"HIP_VERSION_PATCH\s+(\d+)", content) + if major and minor and patch: + return f"{major.group(1)}.{minor.group(1)}.{patch.group(1)}" + else: + return content.strip() raise RuntimeError("ROCm version file not found") diff --git a/aiter/ops/mha.py b/aiter/ops/mha.py index ad0c1a29c7..74bd26cf52 100644 --- a/aiter/ops/mha.py +++ b/aiter/ops/mha.py @@ -1440,9 +1440,25 @@ def can_impl_fmha_fwd_with_sink_asm(): ret = ret and (dropout_p == 0.0) ret = ret and (cu_seqlens_q is None and cu_seqlens_kv is None) ret = ret and (q_descale is None and k_descale is None and v_descale is None) - # D128 kernel ignores sink; if user passed sink_ptr, fall back to CK - # (which honors it) so semantics are preserved. - ret = ret and (sink_ptr is None or hdim_q == 64) + # Per-hdim sink eligibility: + # + # D128 kernels (`_rxy`) compile ENABLE_SINK=0 -- the kernel ignores + # any sink buffer. Routing a caller's sink_ptr to it would silently + # drop the sink term, so we fall back to CK whenever sink_ptr is set. + # + # D64 kernels (`_rxy_sink`) compile ENABLE_SINK=1 -- the kernel + # ALWAYS reads SINK and adds `exp((sink_raw - max) * scale)` to the + # softmax denominator. There is no "skip sink" mode on this kernel, + # so calling it without an explicit sink_ptr would either (a) crash + # in the wrapper (it raises when sink is None for D64) or (b) if we + # silently fill in zeros, change the no-sink result by an extra + # exp(-max * scale) term in every q-tile. Either way the documented + # `sink_ptr is None` semantics of flash_attn_func are violated, so + # we require an explicit sink and fall back to CK otherwise. + if hdim_q == 128: + ret = ret and (sink_ptr is None) + elif hdim_q == 64: + ret = ret and (sink_ptr is not None) return ret q, k, v = [maybe_contiguous(x) for x in (q, k, v)] @@ -1463,14 +1479,13 @@ def _validate_cu(name: str, x: Optional[torch.Tensor]): # gfx1250 ASM bf16 path: q/k/v are bshd; kernel reads strides directly, # no API-side permute. softmax_scale is forwarded as-is (kernel applies # it internally to Q·K^T). sink_ptr is in AITER post-scale convention; - # the public `fmha_fwd_with_sink_asm` wrapper multiplies it by - # sqrt(qk_head_dim) and auto-fills the D64 zero-sink case internally, - # so we just forward the user's sink_ptr here. - sink_for_kernel = sink_ptr - if hdim_q == 64 and sink_for_kernel is None: - # D64 kernels always read SINK; pass an explicit zero-logit so the - # wrapper does not raise on us. - sink_for_kernel = torch.zeros(nhead_q, dtype=torch.float32, device=q.device) + # `fmha_fwd_with_sink_asm` multiplies it by sqrt(qk_head_dim) before + # launch. + # + # `can_impl_fmha_fwd_with_sink_asm` already enforces the + # (hdim, sink_ptr) compatibility matrix (D128 requires sink_ptr is + # None; D64 requires sink_ptr is not None), so we can forward the + # caller's sink_ptr unmodified here -- no zero-fill, no None-coercion. out_, softmax_lse = fmha_fwd_with_sink_asm( q, k, @@ -1478,7 +1493,7 @@ def _validate_cu(name: str, x: Optional[torch.Tensor]): float(softmax_scale), bool(causal), True, - sink_for_kernel, + sink_ptr, out, ) S_dmask = torch.empty((0,), dtype=torch.float32, device=q.device) diff --git a/csrc/py_itfs_cu/asm_fmha_fwd_with_sink.cu b/csrc/py_itfs_cu/asm_fmha_fwd_with_sink.cu index c057c8a48d..f387774290 100644 --- a/csrc/py_itfs_cu/asm_fmha_fwd_with_sink.cu +++ b/csrc/py_itfs_cu/asm_fmha_fwd_with_sink.cu @@ -24,6 +24,7 @@ // the kernel never reads its contents. Pass a zero buffer. #include "aiter_tensor.h" #include "aiter_ctypes_error.h" +#include "aiter_hip_common.h" // HipDeviceGuard, AiterAsmKernel, ... #include "asm_fmha_fwd_bf16_configs.hpp" #include #include @@ -147,13 +148,33 @@ AITER_CTYPES_DEFINE_ENTRYPOINT_VOID( hipStream_t stream), (q, k, v, out, lse, sink, softmax_scale, is_causal, return_lse, stream)) { + // ---- null + multi-GPU safety ----------------------------------------- + // Validate pointers BEFORE touching anything on the device, so the + // device_guard below can safely read q->device_id. + AITER_CHECK(q && k && v && out && lse && sink, + "fmha_fwd_with_sink_asm: q/k/v/out/lse/sink must all be non-null"); + + // Pin current HIP device to q.device() for the duration of this call. + // + // Even though the ctypes layer (aiter/jit/core.py) already picks the + // stream via `torch.cuda.current_stream(tensor_device).cuda_stream`, the + // launch path inside AiterAsmKernelFast::launch_kernel does + // `hipGetFuncBySymbol(...)` which resolves the kernel handle against the + // *current* HIP device of the calling thread. If the caller's + // current_device differs from q.device() (common in multi-GPU code that + // sets a default device once and then operates on tensors in several + // devices), we would either resolve to the wrong device's module table + // (returning a stale / null hipFunction_t) or submit a launch that + // mismatches the stream's device. This guard mirrors what the other ASM + // MHA paths achieve with at::hip::OptionalHIPGuardMasqueradingAsCUDA; + // we use the torch-free HipDeviceGuard so this TU stays no-torch-dep. + HipDeviceGuard device_guard{q->device_id}; + // ---- arch + dtype validation ------------------------------------------ const std::string arch_id = get_gpu_arch(); AITER_CHECK(arch_id == "gfx1250", "fmha_fwd_with_sink_asm: only supported on gfx1250, got ", arch_id); - AITER_CHECK(q && k && v && out && lse && sink, - "fmha_fwd_with_sink_asm: q/k/v/out/lse/sink must all be non-null"); AITER_CHECK(q->dtype() == AITER_DTYPE_bf16 && k->dtype() == AITER_DTYPE_bf16 && v->dtype() == AITER_DTYPE_bf16, diff --git a/op_tests/test_fmha_fwd_with_sink_asm.py b/op_tests/test_fmha_fwd_with_sink_asm.py index 8e42b129b9..0bd5c884c1 100644 --- a/op_tests/test_fmha_fwd_with_sink_asm.py +++ b/op_tests/test_fmha_fwd_with_sink_asm.py @@ -51,9 +51,35 @@ # # ) # noqa: F401 (kept for easy swap-back; see doc-block below) +def _is_gfx1250_host() -> bool: + """True only on a gfx1250 GPU host. + + The fmha_fwd_with_sink_asm ASM kernels are the only ones shipped in + hsa/gfx1250/fmha_fwd_bf16/*.co — there are no gfx942 / gfx950 / etc. + binaries. On any other arch the ops-layer call raises + 'no kernel for arch=...' at launch, so without this guard the tests + would FAIL (not skip) on non-gfx1250 CI runners. Computed once here so + every test (current and future) is covered at the module level, instead + of relying on each test remembering a per-test guard. + + Robust against the no-GPU case: get_gfx() queries the runtime and can + raise when no device is present, so we short-circuit on + torch.cuda.is_available() first and swallow any probe error. + """ + if not torch.cuda.is_available(): + return False + try: + return get_gfx() == "gfx1250" + except Exception: + return False + + pytestmark = pytest.mark.skipif( - not torch.cuda.is_available(), - reason="ROCm/HIP GPU not available", + not _is_gfx1250_host(), + reason=( + "fmha_fwd_with_sink_asm ASM kernels are only shipped for gfx1250 " + "(hsa/gfx1250/fmha_fwd_bf16/*.co); no GPU or a different arch — skip" + ), ) # --------------------------------------------------------------------------- @@ -347,8 +373,6 @@ def run_ref(q, k, v, *, is_causal: bool, sink: Optional[torch.Tensor] = None): ], ) def test_fmha_fwd_with_sink_asm_correctness(head_dim, hq, hk, sq, sk, batch, is_causal): - if get_gfx() not in ["gfx1250"]: - return device = "cuda" torch.manual_seed(0) @@ -447,8 +471,6 @@ def test_fmha_fwd_with_sink_asm_ops_layer(): CSV (mask=1 rows). The test purpose is to exercise the low-level ops entry point with a D64+sink call; causal vs nocausal is orthogonal here. """ - if get_gfx() not in ["gfx1250"]: - return device = "cuda" torch.manual_seed(0) @@ -485,12 +507,14 @@ def test_fmha_fwd_with_sink_asm_ops_layer(): def test_fmha_fwd_with_sink_asm_d64_requires_sink(): """Direct ops-layer call without sink on D64 must raise the C++ check. - Note: when going through aiter.flash_attn_func, the dispatcher auto-fills - a zero sink for D64, so this error path is unreachable from the public - API — we exercise it via the lower-level ops stub. + Note: through aiter.flash_attn_func, can_impl_fmha_fwd_with_sink_asm() + routes D64 + sink_ptr=None to the CK fallback (the D64 _rxy_sink kernel + compiles ENABLE_SINK=1 and has no "skip sink" mode; auto-filling a + zero sink would change "no sink" semantics by adding an extra + exp(-max*scale) term to the softmax denominator). So this error path + is unreachable from the public API — we exercise it via the + lower-level ops stub here. """ - if get_gfx() not in ["gfx1250"]: - return device = "cuda" q, k, v = make_qkv_bshd( layout=0, @@ -520,8 +544,6 @@ def test_fmha_fwd_with_sink_asm_d64_requires_sink(): @pytest.mark.parametrize("head_dim", [64, 128]) @pytest.mark.parametrize("layout", [0, 1, 2]) def test_fmha_fwd_with_sink_asm_layout(layout, head_dim): - if get_gfx() not in ["gfx1250"]: - return device = "cuda" torch.manual_seed(0) batch, hq, hk, sq, sk = 1, 8, 1, 128, 2048 @@ -581,8 +603,6 @@ def test_fmha_fwd_with_sink_asm_layout(layout, head_dim): # Only causal kernels are shipped (see test_fmha_fwd_with_sink_asm_correctness comment). @pytest.mark.parametrize("is_causal", [True]) def test_fmha_fwd_with_sink_asm_via_flash_attn_func(head_dim, is_causal): - if get_gfx() not in ["gfx1250"]: - return device = "cuda" torch.manual_seed(0) batch, hq, hk, sq, sk = 1, 8, 1, 128, 2048 @@ -635,6 +655,94 @@ def test_fmha_fwd_with_sink_asm_via_flash_attn_func(head_dim, is_causal): ) +# --------------------------------------------------------------------------- +# Multi-GPU dispatch test. +# +# Regression for: `flash_attn_func` must launch on q.device(), not on the +# Python thread's current_device. +# +# Two correctness layers are exercised: +# (1) Python ctypes layer (aiter/jit/core.py) picks the stream via +# torch.cuda.current_stream(tensor_device).cuda_stream — should be +# q.device()'s stream regardless of current_device. +# (2) C++ launch path (asm_fmha_fwd_with_sink.cu) installs a HipDeviceGuard +# pinned to q->device_id, so AiterAsmKernelFast::launch_kernel -> +# hipGetFuncBySymbol(...) resolves the kernel handle against the +# correct device's module table. +# +# Without either fix, calling with current_device != q.device() would +# either crash in hipGetFuncBySymbol (nullptr handle) or submit on the +# wrong device. +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("head_dim", [64, 128]) +def test_fmha_fwd_with_sink_asm_multi_gpu_dispatch(head_dim): + """flash_attn_func on a non-current device must dispatch correctly.""" + if torch.cuda.device_count() < 2: + pytest.skip("multi-GPU dispatch test needs >=2 ROCm GPUs") + + torch.manual_seed(0) + batch, hq, hk, sq, sk = 1, 4, 1, 128, 1024 + scale = 1.0 / math.sqrt(head_dim) + dev_q = "cuda:1" # tensors live here + dev_other = 0 # caller's current_device when we invoke the API + + # Allocate everything on dev_q with current_device set to dev_q so the + # baseline run goes through the "current == q" path. + with torch.cuda.device(dev_q): + q1, k1, v1 = make_qkv_bshd( + layout=0, sq=sq, sk=sk, batch=batch, hq=hq, hk=hk, d=head_dim, + dtype=torch.bfloat16, device=dev_q, + ) + sink1 = _d64_sink(hq, dev_q) if head_dim == 64 else None + out_baseline, lse_baseline = run_kernel( + q1, k1, v1, scale=scale, is_causal=True, sink=sink1, via="public", + ) + # Clone so the next run can't alias-overwrite us (defensive; the kernel + # writes to a fresh `out` tensor each call, but we want to be 100% sure + # the second comparison is against a stable snapshot). + out_baseline = out_baseline.clone() + lse_baseline = lse_baseline.clone() + + # Now switch current_device to dev_other and re-run with the SAME dev_q + # tensors. Pre-fix this is the crash / wrong-device path. + with torch.cuda.device(dev_other): + assert torch.cuda.current_device() == dev_other, ( + "test setup error: failed to switch current_device" + ) + out_xdev, lse_xdev = run_kernel( + q1, k1, v1, scale=scale, is_causal=True, sink=sink1, via="public", + ) + + # Outputs must land on q.device(), not on the caller's current_device. + assert out_xdev.device == q1.device, ( + f"out landed on {out_xdev.device}, expected {q1.device}" + ) + assert lse_xdev.device == q1.device, ( + f"lse landed on {lse_xdev.device}, expected {q1.device}" + ) + + # Same inputs + same (deterministic) kernel -> bit-exact match across + # current_device contexts. If the guard or stream picker regresses, + # we'll either be here with a numerical mismatch (silent wrong-device + # launch) or we'll have already crashed before reaching this point. + _cmp( + out_xdev, + out_baseline, + rtol=0.0, + atol=0.0, + msg=f"out differs across current_device (d={head_dim})", + ) + _cmp( + lse_xdev, + lse_baseline, + rtol=0.0, + atol=0.0, + msg=f"lse differs across current_device (d={head_dim})", + ) + + # --------------------------------------------------------------------------- # Performance tests # --------------------------------------------------------------------------- @@ -693,8 +801,6 @@ def _make_qkv_perf(init: str, *, layout, sq, sk, batch, hq, hk, d, dtype, device # Only causal kernels are shipped (see test_fmha_fwd_with_sink_asm_correctness comment). @pytest.mark.parametrize("is_causal", [True]) def test_fmha_fwd_with_sink_asm_perf(head_dim, is_causal, init): - if get_gfx() not in ["gfx1250"]: - return device = "cuda" torch.manual_seed(0) From 8924874302369abf7f152c9026c8fbb9b636343c Mon Sep 17 00:00:00 2001 From: Satya Nikhil Date: Sat, 30 May 2026 16:32:09 +0000 Subject: [PATCH 29/43] reformat --- op_tests/test_fmha_fwd_with_sink_asm.py | 50 +++++++++++++++++-------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/op_tests/test_fmha_fwd_with_sink_asm.py b/op_tests/test_fmha_fwd_with_sink_asm.py index 0bd5c884c1..f2453c3060 100644 --- a/op_tests/test_fmha_fwd_with_sink_asm.py +++ b/op_tests/test_fmha_fwd_with_sink_asm.py @@ -51,6 +51,7 @@ # # ) # noqa: F401 (kept for easy swap-back; see doc-block below) + def _is_gfx1250_host() -> bool: """True only on a gfx1250 GPU host. @@ -685,19 +686,32 @@ def test_fmha_fwd_with_sink_asm_multi_gpu_dispatch(head_dim): torch.manual_seed(0) batch, hq, hk, sq, sk = 1, 4, 1, 128, 1024 scale = 1.0 / math.sqrt(head_dim) - dev_q = "cuda:1" # tensors live here - dev_other = 0 # caller's current_device when we invoke the API + dev_q = "cuda:1" # tensors live here + dev_other = 0 # caller's current_device when we invoke the API # Allocate everything on dev_q with current_device set to dev_q so the # baseline run goes through the "current == q" path. with torch.cuda.device(dev_q): q1, k1, v1 = make_qkv_bshd( - layout=0, sq=sq, sk=sk, batch=batch, hq=hq, hk=hk, d=head_dim, - dtype=torch.bfloat16, device=dev_q, + layout=0, + sq=sq, + sk=sk, + batch=batch, + hq=hq, + hk=hk, + d=head_dim, + dtype=torch.bfloat16, + device=dev_q, ) sink1 = _d64_sink(hq, dev_q) if head_dim == 64 else None out_baseline, lse_baseline = run_kernel( - q1, k1, v1, scale=scale, is_causal=True, sink=sink1, via="public", + q1, + k1, + v1, + scale=scale, + is_causal=True, + sink=sink1, + via="public", ) # Clone so the next run can't alias-overwrite us (defensive; the kernel # writes to a fresh `out` tensor each call, but we want to be 100% sure @@ -708,20 +722,26 @@ def test_fmha_fwd_with_sink_asm_multi_gpu_dispatch(head_dim): # Now switch current_device to dev_other and re-run with the SAME dev_q # tensors. Pre-fix this is the crash / wrong-device path. with torch.cuda.device(dev_other): - assert torch.cuda.current_device() == dev_other, ( - "test setup error: failed to switch current_device" - ) + assert ( + torch.cuda.current_device() == dev_other + ), "test setup error: failed to switch current_device" out_xdev, lse_xdev = run_kernel( - q1, k1, v1, scale=scale, is_causal=True, sink=sink1, via="public", + q1, + k1, + v1, + scale=scale, + is_causal=True, + sink=sink1, + via="public", ) # Outputs must land on q.device(), not on the caller's current_device. - assert out_xdev.device == q1.device, ( - f"out landed on {out_xdev.device}, expected {q1.device}" - ) - assert lse_xdev.device == q1.device, ( - f"lse landed on {lse_xdev.device}, expected {q1.device}" - ) + assert ( + out_xdev.device == q1.device + ), f"out landed on {out_xdev.device}, expected {q1.device}" + assert ( + lse_xdev.device == q1.device + ), f"lse landed on {lse_xdev.device}, expected {q1.device}" # Same inputs + same (deterministic) kernel -> bit-exact match across # current_device contexts. If the guard or stream picker regresses, From 292c9ef3dc5a22b6cd532874dd0852f41625e90a Mon Sep 17 00:00:00 2001 From: tingchen Date: Thu, 4 Jun 2026 08:21:54 +0000 Subject: [PATCH 30/43] revert kernel to non kargs preload --- .../fmha_bf16_pertokenBf16_hd128_128x256.co | Bin 83488 -> 83744 bytes .../fmha_bf16_pertokenBf16_hd64_128x256.co | Bin 72736 -> 72736 bytes 2 files changed, 0 insertions(+), 0 deletions(-) diff --git a/hsa/gfx1250/fmha_fwd_bf16/fmha_bf16_pertokenBf16_hd128_128x256.co b/hsa/gfx1250/fmha_fwd_bf16/fmha_bf16_pertokenBf16_hd128_128x256.co index fad880078b6e7885b8d91e35a4646fb5da906d66..9b7c3454837c173a084309057315a20e71809e22 100755 GIT binary patch delta 3063 zcmcJRZD> zBrRGMr>oxKEn{eOy2XANo%xW|Np2@1Ep$UfY0E^^vEc`CA`ZGz&7SAnbGvLGoG=zB z|DNak&;R`HIX5RK`iVJu)ZFu@D7{xQ_1H>ql&e``g5^X%H4>6wSqX~uS@%M=?<=xoo^BJ64E6l1UKW@l0+b|+ge zY)WFMwV7Cx*_SD|HPTC5%8mRzjtIMrhW#mLTb@?OI<$!ND6F*RE(zw~h*7q2C~FEZ z(rlZqIR(J-(sCq>3GxAXzua~b|PaOFa$&RgQ5sdct^GH(my z5Mg65X!A9{s^k>TS>-hkhU|#R5U(JDc$e51*FmATggy{DE;JW(pGRmpj45*dL8T{O zR#w^Sr+UplhJ5D!kheJ)B7~DmgEp_yR9G0Ejnl1fvuJ7dRf^$Wq$4F=TxZE$%!CW; zI(tZBIX77yu(T_Ah~YC-vxXbehnOQ}hfJ(|NLs{jNXbp<4aR0sUjy}7LK5|rGIlEe zpf;;5bl6f`^bl8o;#oDet+hqU&$el%YW6G@H`)8a+J|d)GOIkS>?*vUOQ3uo+vLN_ zo~f&qYg0X@05zUW?NkmrWs`Hgd^nzTwq}`X(jA)8tG;X% zUWf<1iy#L<%W+H}fwuZ|&`K;M4bV=X4odWWI%u_D2c>~Y19Y7~kc*sD>knK&&ToA> zC#CfTuIrrZ{5mJ4_1ljVCr?989pi$|i9X`=&hP@hA6GS{vDcv-^t<>%*5lxbg^W@n zE+kDO7II}k=L?e0;M)@5d{m1C9wXoPew~lfqJe@Ld>yw0bUsRp*{>5HPeVQ$8MQyw z`Orsvj{`o)LcF+b=Es%c>Ec`Ue}CzDejgiSFp^mN&f2z?H74!!?9=h0HAlM3Cchwr zIxKX-!n>_^z@>Amc0W@>5@6x-<49mQI*_a4?dH1!U;rZi!77^_{q zn|$3(>Rxg-;oEx$Yti0ztfBJ@?!>AwL%K=NY(i#Zo-{crQ?jJhXAT%r|EPdcQ{O{FK%fjj{>JRsoKp=SavIl$pUMHz6E%i*HjDc~p{;i|b) zz|kxmjxq_(>x7|vd{r14_;ZdRqSa-r;0Pj?QHwv>2qM1j%0Ntx2*fSunLz9s;fP0F z8HiEL<%m7pX{;aSBjQT#G}aGi;fRz;h@ojPL~nCqwHq!03?}Rz+U#GvwAsTm4Ad$_ z9i;BcGfX2A7fE?>ue+-Ynz<|CY!JB_Wz zSvVYJ5}Y*WA6wmmh!3z+|FX0oVs3@8el3VtUXg*AY!Qf!==p1_TR7s5iVVak=5oZ( zxYJla%tyqz+-a;I&cYEXlMpw}6I;DxK5liG?g(@g$aDl00V^S$2CNu=u+4annuS*; z8y+nG^<*=CABzu^?kk;9dZNsC4*%rDrwid+mP=*@H(YoSnHbzO!7*k=HKQgf7lRDE z+lJlaSRTjnQ`D!_y=DoNJ*PWT_1x}QH6-yoW49BpYHP=UYhXN!CeU|RjD2-CL34kTD7_!7JEdqRosR<^J%R3sHf|pP>t5ZG^kZC p)BFv9|6Fth%rbo!DL#`dG9_Lghb{2aewFB z^E>x^y|3tpQuG6<>uFfBf7)jqbAV}=fp%S`T{DSw+`j%{Y}aXw@omQag#YMg_$#Zx z;rrJZv7PXdwUIG7&DidA4`cJ*WJgn5vs3nEvy5%cmMeBlpm1kS+QdFd`&n;l3fcqg z)0D_=q$GATRbr8ol$*@1rwSuE=_2GU#|$@b8j+vN`QS8<^#!){p0VQ$6fhDqj}wBDFz7J4Y|aDvm+c zp%Yf|jb@JR9A%DO9HSghvjz%W@JL|U2z;LN2032i_$$Xzj<$Tz6L4{?fQFbiw++;w z1Qw;(E{i>RooS`p9#GcVwkzlD^&MUeYIG-C+`ETiZv{5tN46N)R2I4-gwQHj!?Iuw zi>zWuc!unVWsK2b5%zb=94z_`yF;$b!etTVsgKyEX2bh%%40dP9~@tTvMy(KjyY@- z#PBLnw42tL6*aH2Mvw->f^;!#k}iaWHDZ|3JR*s9p|-X%h!a0zs_2k|I%Z@YjZ&0@ z6GxPcvr1X(vx{{gyCL=`3gzeUZRESL4XqTX0?*6vICD`>@HiAR!{ZPq!Q+s8!FNWBL;Q^IvKEK<7aRw)IQp4q zD9&Dw8zgE^nc5;jxpKHPPxZQ>S=m;4SaG^N%5t~XpSRq8r4L2yjUi%RbkS#`W+uE| zWyGtI3w?fNxlbb-B9e8{XEd+|6EYfDkFwD~O4?{3af}9%+~>Qa6-NB1FTa!;Nc=v> zWvwv%%sW)rG`|O8E2iw2+u?rX@%$g}g(rS3Xz!a0YhW-OTeS9Y|6JP>B%zK9;I-{x zN7FL9oQnFMI;7T^pu6ZVYS_nS^+*=QiT$7ISp4E7> zUq!h)+3LB07)|c?MUP+9R&MSB&fvv|K4h7mHJOndd~->!e`y zKNX`w7ByH27vj+Y*+CI=8>Lx@1s z)7JKuS2w74g7C}!E^)$Z_MY!{Nrh!FUvLN2h0CB0g6ca2i`8=o7W~nmYqRG44{Sky A761SM diff --git a/hsa/gfx1250/fmha_fwd_bf16/fmha_bf16_pertokenBf16_hd64_128x256.co b/hsa/gfx1250/fmha_fwd_bf16/fmha_bf16_pertokenBf16_hd64_128x256.co index c968a9a059e712ee18684b963ddaa0e6e1872cc1..ce41004140280db541ebe484bd6e527964d58adb 100755 GIT binary patch delta 1957 zcmai!ZA?>F7{{OIa$5=%xK^kYYg@|YB?|+xs1wpDW@Zc97g)r^eK6oKB{~RpOuT!}?WqzI;|KER z{?B=S&(r@s_rxaU*rc3Lov-zp6+@;8&}-H(1r%`i7aAHYrcME@+Xo{l8E&O)@JlKP zqbUi7jSBKqIGrkUXh{hGTuwQxx%8fVtYhio%_SP#q`d#<@0S!PifOeN_c7vUDy%gCHc1oeJN;PQLl1SU(_0iPj zL-HK`w_vFvG$6D=XtU5(p&df^2-Soh6&e$o5PFWZf;ajB7WgknL+3Tsjli|i!$--? zI-V$e4-5Fjt)Yjd{FDX^{GQ_req6iF7?Xg z{GqG#C`*^BvU|HaG+yIQNYX}=y-^aBA{$u`i4n2HG`N&xIRI0J7j|5XEtAUF4Nss5 zshdhhl#F&{k37Mmc>ADH!uOz5HWs(6Gh>5RS(GgE47ViNWG7c18UM3vEfx^8mI@La z8t`VIYsiRapr3d%P~InzfpUvOd09LIC5uD1mmN{~lwb}TAzHO2O$g{_GQ^~1t20pE zM@-5s4&`NHQnEO7@9NKUi204rF%`%ahv!*movpbY2rN?f4(Pa+MubkTIS8@x%H#bo zLSOIY6zjMh9anxoh&dn1!)$l@TXz4SPwo6=nS7C+$@A1~SOXWDGQF+&gW z zwmvw(Gi4Va#Prr&Jb@puUFoF$lZ->zAgcd;Du~8@zuslTK)#hk^LDznYnM{(#)cFg9{Lb!XmC<192@#~5Mmn70+!PYr=0|84=4Z+?fkHdbp z<5Qt*Bc|pM%=k89#+EXp+brhPQf7>5hU!Y!=!ov_t3~Sjc|n!9&#_Rj;l-UgQ6Y{?v&- z@^L!Hqk`*-hYqw1cvZ$Pz(#5#)xRsk+t#W=s}IS2NbVx(B46FJ43FyT72NWiE#76% z*^+i3+?)v7?1mn7;g-jj8&5#*cA;uR`n(1v7F{@j$4l{8J-;b)&@UC+e7J!>?EdY> zqAz*+GyC`Z8@3j?^Xj{I@7h`aPUqh4eVsk;>VK7DLw;R-Z+Bx?bJu X#>t86Zmf~9N5@t8GM4L&tMJbMB2m|9 delta 1817 zcmaJ?T}T{P6h3!$X54IM*ERmc-?+O@)fSVAsEs9o4Ft*tDX}QTmtxYS&;(i=Z8b&K zF;uGviOmJahoD<4vXMZy-3PTlJDbo31xZ6`iN1ui*y=+F4eDm|kZ#YNxf`tyefrMa z@4M%G=ggfup*b-$Cq`7I89*}t28;=S`j22T){!b3PNe`kQkB#VZpgdrG;HvN;e)AI z8S*N8857}QOoHEIg?4RT0L%anVtIW|V?XMSVJ`Z&a+p>*^Lx|Deb8?TBosJPs)hup zpQ-(7-;}2M`Zcd68y@6{tj!bT;Ek!+1Wd>FY2qe6HJa$==;2t+(a-TD$2N}b@Se3% z;|n7kLmVR<;~einBQv7xGNPr8EOXsD$0SEPX^bj=b`t7#cZ(C5g9aURtMshWO>er7 z_p-jyb4cY+4FOI}fW5!LF#_cf_*DoXD>#ChAO}ULx-Zl*9T8x zY&(xdjLUa%<*DE>zPp{8sFC#pp=X^nPC2LuV(_FWx@|2o{VMa|5kXoC3ew%6O-w=k|*m~MrBn!Qd><$DA_(D8hqp!rRRYYg0JRT^n` zPP6FkQ-|qNPO<&P>Z6uDS8CX{Dx@QPTP?<6 zzKx}b@NG;Jwzj=ed?k9gu#V76MWxKobdl+Y0s29(+%6QGZWF~O%K%w~7jBPW%Cy_# z7^mGb3u(7Z!`*&Tay9Baka4HVvFlX+-{(PVS3SxQOd1;e5({}>8l|19WBJdq={9J) zpCklMGqe$ycrs|<+3hCe67BzMP5>?X^V-Z_lA^D#UA3|C4{ms9WJ6`M|6{}K=9>*4 zy}4QG3(a3HT}P+k`N`5Ioc%I$J%6V3A?pM=xrx?qcD;;A*h*y4QqL`JtHXH}lIzhVWhsk-OMZbGywq5?rY+LV?h)2IJ5mrUND-l*kA4jxWZ{YbGcI0p1 z`H_>Gbe~MNdk^s51KaX&fcK6Z$}eV+9nI@M%2@u9Y?VKdVU>R@6Keu`G6QLk-J;*J zTeJ`Hzf)-CE?c+DjpkR+Q8|*+f-*l57;=p(XHOR*&x^%~|Yprp+*OJ+pyqGE4OKY|=QIORjmV z@vgIxQXx}1VE#7X*7pk|Pk#!WE4zJR==SsX9r}}8Qtf!P>w}9II@(Xab56g7&$>5{ a%zi#1&MxPXN|Bt`NfCLCl;{mbWbI#&wt_$a From 22ab61caf95743d9aa8a4154d265c721925c6ea6 Mon Sep 17 00:00:00 2001 From: tingchen Date: Sun, 7 Jun 2026 17:10:00 +0000 Subject: [PATCH 31/43] passthrough sink; add varlen kernel --- aiter/jit/optCompilerConfig.json | 15 + aiter/ops/mha.py | 151 +++++++--- csrc/py_itfs_cu/asm_fmha_fwd_with_sink.cu | 30 +- .../asm_fmha_fwd_with_sink_varlen.cu | 264 ++++++++++++++++++ .../fmha_bf16_pertokenBf16_hd128_128x256.co | Bin 83744 -> 83872 bytes .../fmha_bf16_pertokenBf16_hd64_128x256.co | Bin 72736 -> 72992 bytes ..._bf16_pertokenBf16_hd128_128x256_varlen.co | Bin 0 -> 84024 bytes ...a_bf16_pertokenBf16_hd64_128x256_varlen.co | Bin 0 -> 73136 bytes .../fmha_fwd_bf16_varlen.csv | 3 + op_tests/test_fmha_fwd_with_sink_asm.py | 92 +++--- .../test_fmha_fwd_with_sink_varlen_asm.py | 242 ++++++++++++++++ 11 files changed, 686 insertions(+), 111 deletions(-) create mode 100644 csrc/py_itfs_cu/asm_fmha_fwd_with_sink_varlen.cu create mode 100755 hsa/gfx1250/fmha_fwd_bf16_varlen/fmha_bf16_pertokenBf16_hd128_128x256_varlen.co create mode 100755 hsa/gfx1250/fmha_fwd_bf16_varlen/fmha_bf16_pertokenBf16_hd64_128x256_varlen.co create mode 100644 hsa/gfx1250/fmha_fwd_bf16_varlen/fmha_fwd_bf16_varlen.csv create mode 100644 op_tests/test_fmha_fwd_with_sink_varlen_asm.py diff --git a/aiter/jit/optCompilerConfig.json b/aiter/jit/optCompilerConfig.json index b04ff28b44..9bba721a5e 100644 --- a/aiter/jit/optCompilerConfig.json +++ b/aiter/jit/optCompilerConfig.json @@ -1113,6 +1113,21 @@ "f'{AITER_META_DIR}/hsa/codegen.py -m fmha_fwd_bf16 --output_dir {{}}'" ] }, + "module_fmha_fwd_with_sink_varlen_asm": { + "srcs": [ + "f'{AITER_CSRC_DIR}/py_itfs_cu/asm_fmha_fwd_with_sink_varlen.cu'" + ], + "flags_extra_cc": [ + "'-DENABLE_CK=0'" + ], + "flags_extra_hip": [], + "extra_ldflags": "None", + "extra_include": [], + "verbose": "False", + "blob_gen_cmd": [ + "f'{AITER_META_DIR}/hsa/codegen.py -m fmha_fwd_bf16_varlen --output_dir {{}}'" + ] + }, "module_fmha_v3_fwd": { "srcs": [ "f'{AITER_CSRC_DIR}/kernels/mha_common.cu'", diff --git a/aiter/ops/mha.py b/aiter/ops/mha.py index c5df82bc8c..30a35776f0 100644 --- a/aiter/ops/mha.py +++ b/aiter/ops/mha.py @@ -296,7 +296,7 @@ def _fmha_fwd_with_sink_asm( v: Tensor, out: Tensor, lse: Tensor, - sink: Tensor, + sink: Optional[Tensor], softmax_scale: float, is_causal: bool, return_lse: bool, @@ -313,21 +313,18 @@ def fmha_fwd_with_sink_asm( sink: Optional[Tensor] = None, out: Optional[Tensor] = None, ) -> Tuple[Tensor, Tensor]: - """Public wrapper: allocates `out`/`lse`/`sink` buffers as needed and - forwards to the ctypes-backed kernel entry point. + """Public wrapper: allocates `out`/`lse` buffers as needed and forwards to + the ctypes-backed kernel entry point. Contract details: - * `sink` (caller) is in AITER post-scale convention. This wrapper - converts it to the kernel's pre-scale raw-logit domain by multiplying - by sqrt(qk_head_dim) before launch. + * `sink` is passed through verbatim — it is the value the kernel + consumes directly (no host-side scaling). It is optional: pass `None` + for no sink. Whether the kernel reads it is decided inside the `.co` + (ENABLE_SINK). When provided it must be a 1-D fp32 tensor of shape + [q_head_num]. * The kernel always accesses `ptr_LSE`, so an LSE buffer is always allocated even when `return_lse=False`; in that case the contents are undefined and callers should ignore the returned `lse`. - * D64 kernels (`_rxy_sink`) compile ENABLE_SINK=1 and read `sink`; - callers MUST pass an explicit sink for D64. - * D128 kernels (`_rxy`) compile ENABLE_SINK=0 and ignore `sink`; the - kernarg slot must still be a valid non-null pointer, so we always - allocate a zero buffer when none is supplied. """ batch, q_seq_len, q_head_num, qk_head_dim = q.shape v_head_dim = v.size(3) @@ -343,27 +340,102 @@ def fmha_fwd_with_sink_asm( (batch, q_head_num, q_seq_len), dtype=torch.float32, device=q.device ) - if sink is not None: - # AITER post-scale → kernel pre-scale. - sink_for_kernel = (sink * (qk_head_dim**0.5)).to(torch.float32).contiguous() - elif qk_head_dim == 64: - raise RuntimeError( - "fmha_fwd_with_sink_asm: D64 kernels require an explicit `sink` tensor " - f"of shape [q_head_num]={q_head_num} fp32 (AITER post-scale " - "convention). Pass `sink=torch.zeros(q_head_num, dtype=torch.float32)` " - "if you want a zero-logit sink." + _fmha_fwd_with_sink_asm( + q, + k, + v, + out, + lse, + sink, + float(softmax_scale), + bool(is_causal), + bool(return_lse), + ) + return out, lse + + +@compile_ops( + "module_fmha_fwd_with_sink_varlen_asm", + fc_name="fmha_fwd_with_sink_varlen_asm", + ffi_type="ctypes", +) +def _fmha_fwd_with_sink_varlen_asm( + q: Tensor, + k: Tensor, + v: Tensor, + out: Tensor, + lse: Tensor, + sink: Optional[Tensor], + cu_seqlens_q: Tensor, + cu_seqlens_k: Tensor, + max_seqlen_q: int, + softmax_scale: float, + is_causal: bool, + return_lse: bool, +) -> None: ... + + +def fmha_fwd_with_sink_varlen_asm( + q: Tensor, + k: Tensor, + v: Tensor, + cu_seqlens_q: Tensor, + cu_seqlens_k: Tensor, + max_seqlen_q: int, + softmax_scale: float, + is_causal: bool, + return_lse: bool, + sink: Optional[Tensor] = None, + out: Optional[Tensor] = None, +) -> Tuple[Tensor, Tensor]: + """Public wrapper: varlen / packed BF16 ASM forward (gfx1250). + + Layout is packed [token, head, dim] (THD), batch folded into the token + axis; per-batch boundaries come from cumulative-length arrays: + * q : (total_q, nheads, hdim_q) + * k : (total_k, nheads_k, hdim_q) + * v : (total_k, nheads_k, hdim_v) + * out : (total_q, nheads, hdim_v) + * lse : (total_q, nheads, 1) fp32 (kernel writes packed [total_q, nheads]) + * cu_seqlens_q/k : int32 [batch+1] cumulative (cu[batch] == total) + + Contract details: + * The varlen kernel carries NO strides; q/k/v/out MUST be densely packed, + so this wrapper calls `.contiguous()` defensively. + * `max_seqlen_q` is the maximum per-batch Q sequence length (caller- + supplied, e.g. flash_attn_varlen convention) -- it sets the launch tile + count; the kernel early-exits tiles beyond each batch's actual length. + * `sink` is passed through verbatim (the value the kernel consumes + directly, no host-side scaling); optional. Allocation is caller-side. + * The kernel always accesses `ptr_LSE`, so an LSE buffer is always + allocated even when `return_lse=False`; in that case ignore the result. + """ + q, k, v = (x.contiguous() for x in (q, k, v)) + cu_seqlens_q = cu_seqlens_q.to(torch.int32).contiguous() + cu_seqlens_k = cu_seqlens_k.to(torch.int32).contiguous() + + total_q, q_head_num, qk_head_dim = q.shape + v_head_dim = v.size(2) + + if out is None: + out = torch.empty( + (total_q, q_head_num, v_head_dim), dtype=q.dtype, device=q.device ) - else: - # D128: kernel never reads sink contents but slot must be non-null. - sink_for_kernel = torch.zeros(q_head_num, dtype=torch.float32, device=q.device) - _fmha_fwd_with_sink_asm( + lse = torch.empty( + (total_q, q_head_num, 1), dtype=torch.float32, device=q.device + ) + + _fmha_fwd_with_sink_varlen_asm( q, k, v, out, lse, - sink_for_kernel, + sink, + cu_seqlens_q, + cu_seqlens_k, + int(max_seqlen_q), float(softmax_scale), bool(is_causal), bool(return_lse), @@ -1449,14 +1521,13 @@ def can_impl_fmha_fwd_with_sink_asm(): # drop the sink term, so we fall back to CK whenever sink_ptr is set. # # D64 kernels (`_rxy_sink`) compile ENABLE_SINK=1 -- the kernel - # ALWAYS reads SINK and adds `exp((sink_raw - max) * scale)` to the - # softmax denominator. There is no "skip sink" mode on this kernel, - # so calling it without an explicit sink_ptr would either (a) crash - # in the wrapper (it raises when sink is None for D64) or (b) if we - # silently fill in zeros, change the no-sink result by an extra - # exp(-max * scale) term in every q-tile. Either way the documented - # `sink_ptr is None` semantics of flash_attn_func are violated, so - # we require an explicit sink and fall back to CK otherwise. + # ALWAYS reads SINK and adds `exp((sink - max) * scale)` to the + # softmax denominator. There is no "skip sink" mode on this binary, + # so calling it with sink_ptr=None now forwards a null pointer to the + # kernel (the wrapper no longer raises / zero-fills), which the D64 + # binary would dereference. To preserve flash_attn_func's documented + # `sink_ptr is None` semantics we keep requiring an explicit sink for + # D64 here and fall back to CK otherwise. if hdim_q == 128: ret = ret and (sink_ptr is None) elif hdim_q == 64: @@ -1480,14 +1551,14 @@ def _validate_cu(name: str, x: Optional[torch.Tensor]): if can_impl_fmha_fwd_with_sink_asm(): # gfx1250 ASM bf16 path: q/k/v are bshd; kernel reads strides directly, # no API-side permute. softmax_scale is forwarded as-is (kernel applies - # it internally to Q·K^T). sink_ptr is in AITER post-scale convention; - # `fmha_fwd_with_sink_asm` multiplies it by sqrt(qk_head_dim) before - # launch. + # it internally to Q·K^T). sink_ptr is passed through verbatim -- it is + # the value the kernel consumes directly (no host-side scaling); whether + # the kernel reads it is decided inside the .co. # - # `can_impl_fmha_fwd_with_sink_asm` already enforces the - # (hdim, sink_ptr) compatibility matrix (D128 requires sink_ptr is - # None; D64 requires sink_ptr is not None), so we can forward the - # caller's sink_ptr unmodified here -- no zero-fill, no None-coercion. + # `can_impl_fmha_fwd_with_sink_asm` still enforces the current-binary + # (hdim, sink_ptr) matrix (D128 requires sink_ptr is None; D64 requires + # sink_ptr is not None) so we never feed a null sink to a D64 binary that + # unconditionally reads it -- forward the caller's sink_ptr unmodified. out_, softmax_lse = fmha_fwd_with_sink_asm( q, k, diff --git a/csrc/py_itfs_cu/asm_fmha_fwd_with_sink.cu b/csrc/py_itfs_cu/asm_fmha_fwd_with_sink.cu index f387774290..f118566295 100644 --- a/csrc/py_itfs_cu/asm_fmha_fwd_with_sink.cu +++ b/csrc/py_itfs_cu/asm_fmha_fwd_with_sink.cu @@ -130,10 +130,10 @@ AITER_CTYPES_ERROR_DEF // lse : [batch, q_head_num, q_seq_len] fp32. Always required by kernel ABI // (kernel may touch ptr_LSE even when return_lse=0); pass a buffer of // the right size regardless of whether you read it. -// sink : [q_head_num] fp32 in the kernel's pre-scale raw-logit domain. -// Required for D64 (ENABLE_SINK=1). For D128 (ENABLE_SINK=0) the -// slot must still be a valid non-null pointer of the right size, but -// contents are ignored — pass a zero buffer. +// sink : [q_head_num] fp32, passed through verbatim to the kernel (the value +// the kernel consumes directly — no host-side scaling). Optional: +// may be null; whether the kernel reads it is decided inside the .co +// (ENABLE_SINK). When non-null it must be 1-D fp32 of size q_head_num. AITER_CTYPES_DEFINE_ENTRYPOINT_VOID( fmha_fwd_with_sink_asm, (aiter_tensor_t* q, @@ -151,8 +151,8 @@ AITER_CTYPES_DEFINE_ENTRYPOINT_VOID( // ---- null + multi-GPU safety ----------------------------------------- // Validate pointers BEFORE touching anything on the device, so the // device_guard below can safely read q->device_id. - AITER_CHECK(q && k && v && out && lse && sink, - "fmha_fwd_with_sink_asm: q/k/v/out/lse/sink must all be non-null"); + AITER_CHECK(q && k && v && out && lse, + "fmha_fwd_with_sink_asm: q/k/v/out/lse must all be non-null"); // Pin current HIP device to q.device() for the duration of this call. // @@ -183,8 +183,13 @@ AITER_CTYPES_DEFINE_ENTRYPOINT_VOID( "fmha_fwd_with_sink_asm: out must be bf16"); AITER_CHECK(lse->dtype() == AITER_DTYPE_fp32, "fmha_fwd_with_sink_asm: lse must be fp32"); - AITER_CHECK(sink->dtype() == AITER_DTYPE_fp32, - "fmha_fwd_with_sink_asm: sink must be fp32"); + // sink is optional: the kernel (.co) decides whether it consumes it. + // Validate dtype only when a sink buffer is actually provided. + if (sink) + { + AITER_CHECK(sink->dtype() == AITER_DTYPE_fp32, + "fmha_fwd_with_sink_asm: sink must be fp32"); + } AITER_CHECK(q->dim() == 4 && k->dim() == 4 && v->dim() == 4, "fmha_fwd_with_sink_asm: q/k/v must be 4-D tensors (bshd shape)"); @@ -225,8 +230,11 @@ AITER_CTYPES_DEFINE_ENTRYPOINT_VOID( (int)lse->size(2) == q_seq_len, "fmha_fwd_with_sink_asm: lse shape must be [batch, q_head_num, q_seq_len]"); - AITER_CHECK(sink->dim() == 1 && (int)sink->size(0) == q_head_num, - "fmha_fwd_with_sink_asm: sink must be 1-D with size q_head_num (", q_head_num, ")"); + if (sink) + { + AITER_CHECK(sink->dim() == 1 && (int)sink->size(0) == q_head_num, + "fmha_fwd_with_sink_asm: sink must be 1-D with size q_head_num (", q_head_num, ")"); + } const int gqa = q_head_num / kv_head_num; const int mask_flag = is_causal ? 1 : 0; @@ -295,7 +303,7 @@ AITER_CTYPES_DEFINE_ENTRYPOINT_VOID( args.d_hs = stride_o_head; args.d_bas = stride_o_batch; args.lse_hs = stride_lse_head; - args.sink_addr = sink->data_ptr(); + args.sink_addr = sink ? sink->data_ptr() : nullptr; size_t arg_size = sizeof(args); diff --git a/csrc/py_itfs_cu/asm_fmha_fwd_with_sink_varlen.cu b/csrc/py_itfs_cu/asm_fmha_fwd_with_sink_varlen.cu new file mode 100644 index 0000000000..006c3e8b73 --- /dev/null +++ b/csrc/py_itfs_cu/asm_fmha_fwd_with_sink_varlen.cu @@ -0,0 +1,264 @@ +// SPDX-License-Identifier: MIT +// Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. +// +// ASM FMHA forward, VARLEN / packed (BF16, gfx1250). +// +// Layout: q/k/v/out are **packed [token, head, dim]** (batch folded into the +// token axis). Per-batch sequence boundaries are described by cumulative +// length arrays cu_seqlens_q / cu_seqlens_k (int32, length batch+1, no +// padding). Unlike the fixed-batch path, the kernel computes all addresses +// internally from (q_head_num, gqa, head_dim, cu_seqlens) -- so the kernarg +// block carries NO strides and the tensors MUST be densely packed / contiguous. +// +// q : (total_q, nheads, hdim_q) +// k : (total_k, nheads_k, hdim_q) +// v : (total_k, nheads_k, hdim_v) +// out : (total_q, nheads, hdim_v) +// lse : packed [total_q, nheads] (token-major; caller may shape (total_q, nheads, 1)) +// cu_seqlens_q/k : int32 [batch+1] cumulative, cu[batch] == total +// +// Memory-allocation policy: all tensors are allocated by the Python caller. +// This C++ entry point performs only pointer bookkeeping + kernel launch -- +// no GPU allocation, no temporaries, no torch dependency. +// +// sink: passed through verbatim (the value the kernel consumes directly, no +// host-side scaling). Optional -- may be null; whether the kernel reads it is +// decided inside the .co (ENABLE_SINK). +#include "aiter_tensor.h" +#include "aiter_ctypes_error.h" +#include "aiter_hip_common.h" // HipDeviceGuard, AiterAsmKernel, ... +#include "asm_fmha_fwd_bf16_varlen_configs.hpp" +#include +#include +#include + +// Kernel argument block -- packed varlen ABI (0x58 = 88 B), matches the +// FmhaFwdVarlenKernelArgs layout in the poc host code (s_load order of the +// BF16_FMHA_FWD_VARLEN_*.s kernels). No strides: packed [token, head, dim]. +#pragma pack(push, 1) +struct FmhaFwdVarlenKernelArgs +{ + void* d_addr; // off 0x00 O output (total_q, nheads, dv) + const void* q_addr; // off 0x08 Q (total_q, nheads, dq) + const void* k_addr; // off 0x10 K (total_k, nheads_k, dq) + const void* v_addr; // off 0x18 V (total_k, nheads_k, dv) + void* lse_addr; // off 0x20 LSE packed [total_q, nheads] + const void* qseq_addr; // off 0x28 cu_seqlens_q int32[batch+1] + const void* kseq_addr; // off 0x30 cu_seqlens_k int32[batch+1] + float scalar; // off 0x38 softmax_scale + int gqa; // off 0x3C nheads / nheads_k + int q_head_num; // off 0x40 nheads + int opt; // off 0x44 bit0 reverse_kv | bit1 double_q | bit2 remap_xy + int lse; // off 0x48 1 = write LSE + int max_q_len; // off 0x4C max over batches of q seqlen (dispatch basis) + void* sink_addr; // off 0x50 per-Q-head f32 sink (verbatim; may be null) +}; +#pragma pack(pop) +static_assert(sizeof(FmhaFwdVarlenKernelArgs) == 0x58, + "fmha_fwd_with_sink_varlen_asm: FmhaFwdVarlenKernelArgs must be 88B packed"); + +// ---- helpers --------------------------------------------------------------- + +// Kernel selection: only (dtype, hdim_q, hdim_v, mask). Only the _brd (border) +// causal kernels are shipped, so mask is always 1. +static std::string get_heuristic_kernel_fmha_fwd_bf16_varlen(const std::string& dtype, + int hdim_q, + int hdim_v, + int mask_flag, + const std::string& arch_id, + CFG* cfgs) +{ + for (const auto& el : *cfgs) + { + if (el.first.find(arch_id) != 0) continue; + const auto& cfg = el.second; + if (cfg.dtype != dtype) continue; + if (cfg.hdim_q != hdim_q) continue; + if (cfg.hdim_v != hdim_v) continue; + if (cfg.mask != mask_flag) continue; + return el.first; + } + AITER_CHECK(false, + "fmha_fwd_with_sink_varlen_asm: no kernel for dtype=", dtype, + " hdim_q=", hdim_q, " hdim_v=", hdim_v, + " mask=", mask_flag, + " arch=", arch_id); + return ""; +} + +// ---- main entry ------------------------------------------------------------ + +AITER_CTYPES_ERROR_DEF + +// C ABI: every tensor is caller-allocated. No GPU memory is allocated here; +// no torch dependency. +// +// q/k/v/out are packed [token, head, dim] (densely contiguous). cu_seqlens_q/k +// are int32 [batch+1] cumulative arrays. max_seqlen_q is the maximum per-batch +// Q sequence length (host-supplied; used for the launch tile count). sink is +// optional and forwarded verbatim. +AITER_CTYPES_DEFINE_ENTRYPOINT_VOID( + fmha_fwd_with_sink_varlen_asm, + (aiter_tensor_t* q, + aiter_tensor_t* k, + aiter_tensor_t* v, + aiter_tensor_t* out, + aiter_tensor_t* lse, + aiter_tensor_t* sink, + aiter_tensor_t* cu_seqlens_q, + aiter_tensor_t* cu_seqlens_k, + int max_seqlen_q, + float softmax_scale, + int is_causal, + int return_lse, + hipStream_t stream), + (q, k, v, out, lse, sink, cu_seqlens_q, cu_seqlens_k, + max_seqlen_q, softmax_scale, is_causal, return_lse, stream)) +{ + // ---- null safety (sink is optional) ----------------------------------- + AITER_CHECK(q && k && v && out && lse && cu_seqlens_q && cu_seqlens_k, + "fmha_fwd_with_sink_varlen_asm: q/k/v/out/lse/cu_seqlens_q/cu_seqlens_k must all be non-null"); + + // Pin current HIP device to q.device() (torch-free) so kernel symbol + // resolution + launch target the tensors' device. + HipDeviceGuard device_guard{q->device_id}; + + // ---- arch + dtype validation ------------------------------------------ + const std::string arch_id = get_gpu_arch(); + AITER_CHECK(arch_id == "gfx1250", + "fmha_fwd_with_sink_varlen_asm: only supported on gfx1250, got ", arch_id); + + AITER_CHECK(q->dtype() == AITER_DTYPE_bf16 && + k->dtype() == AITER_DTYPE_bf16 && + v->dtype() == AITER_DTYPE_bf16, + "fmha_fwd_with_sink_varlen_asm: q/k/v must be bf16"); + AITER_CHECK(out->dtype() == AITER_DTYPE_bf16, + "fmha_fwd_with_sink_varlen_asm: out must be bf16"); + AITER_CHECK(lse->dtype() == AITER_DTYPE_fp32, + "fmha_fwd_with_sink_varlen_asm: lse must be fp32"); + AITER_CHECK(cu_seqlens_q->dtype() == AITER_DTYPE_i32 && + cu_seqlens_k->dtype() == AITER_DTYPE_i32, + "fmha_fwd_with_sink_varlen_asm: cu_seqlens_q/k must be int32"); + if (sink) + { + AITER_CHECK(sink->dtype() == AITER_DTYPE_fp32, + "fmha_fwd_with_sink_varlen_asm: sink must be fp32"); + } + + // ---- shape extraction (packed thd) ------------------------------------ + AITER_CHECK(q->dim() == 3 && k->dim() == 3 && v->dim() == 3, + "fmha_fwd_with_sink_varlen_asm: q/k/v must be 3-D packed tensors (total, head, dim)"); + AITER_CHECK(q->stride(-1) == 1 && k->stride(-1) == 1 && v->stride(-1) == 1, + "fmha_fwd_with_sink_varlen_asm: q/k/v must have contiguous last dim"); + + const int total_q = (int)q->size(0); + const int q_head_num = (int)q->size(1); + const int qk_head_dim = (int)q->size(2); + + const int total_k = (int)k->size(0); + const int kv_head_num = (int)k->size(1); + const int v_head_dim = (int)v->size(2); + + AITER_CHECK((int)k->size(2) == qk_head_dim, "fmha_fwd_with_sink_varlen_asm: k head_dim mismatch"); + AITER_CHECK((int)v->size(0) == total_k, "fmha_fwd_with_sink_varlen_asm: v total_k mismatch with k"); + AITER_CHECK((int)v->size(1) == kv_head_num, "fmha_fwd_with_sink_varlen_asm: v head_num mismatch with k"); + AITER_CHECK(q_head_num % kv_head_num == 0, "fmha_fwd_with_sink_varlen_asm: q_head_num must be a multiple of kv_head_num"); + AITER_CHECK(qk_head_dim == 64 || qk_head_dim == 128, + "fmha_fwd_with_sink_varlen_asm: only head_dim 64 or 128 supported, got ", qk_head_dim); + AITER_CHECK(v_head_dim == qk_head_dim, + "fmha_fwd_with_sink_varlen_asm: v_head_dim must equal qk_head_dim"); + + AITER_CHECK(out->dim() == 3 && + (int)out->size(0) == total_q && (int)out->size(1) == q_head_num && + (int)out->size(2) == v_head_dim, + "fmha_fwd_with_sink_varlen_asm: out shape must be [total_q, q_head_num, v_head_dim]"); + AITER_CHECK(out->stride(-1) == 1, + "fmha_fwd_with_sink_varlen_asm: out must have contiguous last dim"); + + // lse packed [total_q, nheads]; caller may pass shape (total_q, nheads, 1). + AITER_CHECK(lse->dim() >= 2 && + (int)lse->size(0) == total_q && (int)lse->size(1) == q_head_num, + "fmha_fwd_with_sink_varlen_asm: lse leading dims must be [total_q, q_head_num]"); + + AITER_CHECK(cu_seqlens_q->dim() == 1 && cu_seqlens_k->dim() == 1, + "fmha_fwd_with_sink_varlen_asm: cu_seqlens_q/k must be 1-D"); + const int batch = (int)cu_seqlens_q->size(0) - 1; + AITER_CHECK(batch >= 1, + "fmha_fwd_with_sink_varlen_asm: cu_seqlens_q must have length batch+1 (>=2)"); + AITER_CHECK((int)cu_seqlens_k->size(0) == batch + 1, + "fmha_fwd_with_sink_varlen_asm: cu_seqlens_k length must match cu_seqlens_q"); + AITER_CHECK(max_seqlen_q > 0, + "fmha_fwd_with_sink_varlen_asm: max_seqlen_q must be > 0"); + + if (sink) + { + AITER_CHECK(sink->dim() == 1 && (int)sink->size(0) == q_head_num, + "fmha_fwd_with_sink_varlen_asm: sink must be 1-D with size q_head_num (", q_head_num, ")"); + } + + const int gqa = q_head_num / kv_head_num; + const int mask_flag = is_causal ? 1 : 0; + + // ---- kernel args (88 B packed; no strides) ---------------------------- + FmhaFwdVarlenKernelArgs args; + memset(&args, 0, sizeof(args)); + args.d_addr = out->data_ptr(); + args.q_addr = q->data_ptr(); + args.k_addr = k->data_ptr(); + args.v_addr = v->data_ptr(); + args.lse_addr = lse->data_ptr(); + args.qseq_addr = cu_seqlens_q->data_ptr(); + args.kseq_addr = cu_seqlens_k->data_ptr(); + args.scalar = softmax_scale; + args.gqa = gqa; + args.q_head_num = q_head_num; + // s_opt: bit0 reverse_kv | bit1 double_q | bit2 remap_xy. + // The shipped VARLEN _dq kernels use reverse_kv=1, double_q=1, remap_xy=1. + args.opt = 7; + args.lse = return_lse ? 1 : 0; + args.max_q_len = max_seqlen_q; + args.sink_addr = sink ? sink->data_ptr() : nullptr; + + size_t arg_size = sizeof(args); + + // ---- kernel selection -------------------------------------------------- + const std::string dtype = "bf16"; + CFG* cfg_map = &cfg_fmha_fwd_bf16_varlen; + static SynchronizedCache impl_ptr_map; + + const std::string kernel_key = get_heuristic_kernel_fmha_fwd_bf16_varlen( + dtype, qk_head_dim, v_head_dim, mask_flag, arch_id, cfg_map); + auto it = cfg_map->find(kernel_key); + AITER_CHECK(it != cfg_map->end(), + "fmha_fwd_with_sink_varlen_asm: kernel not found in CFG: ", kernel_key); + + const char* name = it->second.knl_name.c_str(); + const char* co_name = it->second.co_name.c_str(); + AiterAsmKernel* impl_ptr = &impl_ptr_map.get_or_create( + name, [&]() { return AiterAsmKernel(name, co_name); }); + + // ---- launch ------------------------------------------------------------ + // Dispatch along max_q_len (DOUBLE_Q halves via tg_div); z = batch index. + // The kernel early-exits q-tiles that fall beyond a batch's actual seqlen + // using cu_seqlens_q. Shipped VARLEN kernels: ts_qo=128, double_q=1 + // (tg_div=2), wv_tg=4 (block=128), remap_xy=1. + const int sub_Q = 128; // ts_qo + const int wv_tg = 4; + const int bdx = (wv_tg == 4) ? 128 : 256; + const int tg_div = 2; // double_q = 1 + const int q_tile_count = (max_seqlen_q + sub_Q - 1) / sub_Q; + const int gdx = (q_tile_count + tg_div - 1) / tg_div; + const int gdy = q_head_num; + const int gdz = batch; + + // remap_xy=1: swap gdx<->gdy at launch so bid.x indexes heads, bid.y Q-tiles. + impl_ptr->launch_kernel({&args, + &arg_size, + gdy, // launch_gdx = head count (swapped) + gdx, // launch_gdy = Q-tile count (swapped) + gdz, + bdx, + 1, + 1, + stream}); +} diff --git a/hsa/gfx1250/fmha_fwd_bf16/fmha_bf16_pertokenBf16_hd128_128x256.co b/hsa/gfx1250/fmha_fwd_bf16/fmha_bf16_pertokenBf16_hd128_128x256.co index 9b7c3454837c173a084309057315a20e71809e22..027171274bd7cb70fa0b674fa04823aee02e9c1a 100755 GIT binary patch delta 602 zcma)(KS%;`6vy8?|DETkLqmgsl|xZvQw^R$(9}Pn-+EhZ z4GxDxWCS&|xVE)5=5U2g^I2+9XYgP$4MBq)4_|f8bjdkhtTz1 z0(YIgVr zE^&aEXHP7!?*OC7p197QNLOw*h&#lp*DKor9&Tz8ulBAtCF~o=C;csfcX+PgS#W@^ zX~TfzbM%-G|M>B`7Q5>29eKHJULH8_#kGP3SF}`IF05>qL@bO;x194Hnv1Pi%Iuz$_#G^%u!bp@f;>4B`tPy SI7wxlN_S++3~h=^1_NJRB#o^A delta 409 zcmZ3`&$^(Gb%F+?!bHvG^$aGA3}67G1t46IC>x0B0F@6wr3)a+7#N`HVKftw`k?wJ z`!fnE5w>G<6JrM#(CEomcoa7O;92mEnStT>_UX)ws@9A*w(qrLJj^)#fgPjA^acmU zN!o@$H!w5GGx4*^K^*faCOEZj5$}3ezp!fr{2RK}^h_&gjZ0 F4**HWMb-cS diff --git a/hsa/gfx1250/fmha_fwd_bf16/fmha_bf16_pertokenBf16_hd64_128x256.co b/hsa/gfx1250/fmha_fwd_bf16/fmha_bf16_pertokenBf16_hd64_128x256.co index ce41004140280db541ebe484bd6e527964d58adb..3a8a9066dcb0ed257bfb9f6a4a37ae74bc30fa88 100755 GIT binary patch delta 631 zcma))ze@sf6u{rJ%g*bFOuY1r)FQYdup#;(g$@x#4Gj$q4F(zw1(Hn-Y7P-i{eZq( z7!9p~gK6F8u)&H#k-A@P7XEOkNjr3UIw7pf!*hz^gn%#{^JF7hyR>WD&U@y`@(z(EE-UQFjz7%Po%*$ z=mE>eIWfd}6Icc@P8{TY;t;Vy9A+8U~wC5e^+AuEvvUBC6wPmz}2V*En!-o4v%D sjvi6e(T|!ib&Sb&)w;j{jyi&!A{xLLc8YC=PnZPRSN~@=#Eu_>-{6ahBme*a delta 407 zcmZ3mi)FzMmI<1S5)-wS*K;y5fB}pafN(*gY#^p6RNfSoE`TUwVBm%_p!%4ATw?UW z^iK|86jUT=$L1!+4lbb4ldte-Y~nq0otc5*_~z4361A8(PHng2XMDuM*t%U&gmF9L zG%?0`qIMuRN;3*iUm(pW y$tXH~mo%e22UJzwbVeCQb0$Tx=~gm~_KafFt3Z_a^j$!TLmU#Oxzih^807&x$UCe6 diff --git a/hsa/gfx1250/fmha_fwd_bf16_varlen/fmha_bf16_pertokenBf16_hd128_128x256_varlen.co b/hsa/gfx1250/fmha_fwd_bf16_varlen/fmha_bf16_pertokenBf16_hd128_128x256_varlen.co new file mode 100755 index 0000000000000000000000000000000000000000..5f743893f66b5d88b92c8df25f48ca8ebfbdbb07 GIT binary patch literal 84024 zcmd?S3!GHdl`ej|`gHq+lJme)5h`bIWAa7B+6;KpG0bhuxszP;DpXYIYez4qC=&RN8+ojxg)mlt&%`7hsTa`J*}ifC{>Vxpu@EnwOR z=Un{%Lgy<^A@W&P^;*kRP?^eeq09f6toJktt^Sw#k|e0a&CGu+2NN9U0s|c<+)54$ zZm7~KM_ZloL4HNHygR7I`D$nDi4>*B+CaTOM3uMo1IaJ$U!B zKV3egEK_P=hfA%nN)0}%QbW(G)QGbxHRh~JMbE0##Iq_j<*Z6gKdVwR&#Kg%vnsXV ztV%6Dt5W4>Rch&3mAd_`O07JrQg@$KsdZ;n>VY#Ub+&GOgWRN@1>?0rx2;)y@7+t* zF1zinWvkXLS-#?dWlNWEmH8_^BK!}td|}MUA?XtB?mOhBq zthjZ_+I1DT-uY8F`po^&)IS?^f5m;vmakcj9%aWwUK@0xJMLbynydcmHB0VVy>waL z_k!nyb?D_~>wW@s+pViEhu|L`ATGcBw&f38SvG92K2Y4ZY|Yvgt5-dhr%xw%>d5|& z&q1B@@l>MEbwTz>$WC|?iwDnz(IB1r`qy33q9G@GF4KCxvgh#V86R!R$2I@-rp(Lw z?=1V;Im>bwBkT6&Y1XLAc6% z>2zK^;`|PEed1I)zr!QpCr*j;yD5(%__V?S^y{hn1 zQ*P*%Kz?oGm-ok=WAzEg8Gf(xV!i7$)fYIwsqgLVst-9Y?(c)NV&|p$?&ro|%|J+)%+e=Z7lYLX)1yG~j&5f`XS&Z+w5}ba)5J z(>dMC$g{Ddvt>u;Gqz)lv7=|Y$lSkYx~Gwj^NamozM#7Dt6%wMphf#aoIueXa7#``M4hmBn%VZR%Zo-u^sb$EiiX{~h{yQ$2OYJDc~%Lq}qtI2G@e zqoI@)A(4fy$a4xso{Jg7{PWHi8M+|P`4Z}6TK-og?feTxeyMYwW7|_)xhr1WdsCeK z?%j32>0^9tjynK?RnSN+>tTypZLkrxIs63D@Uj0SRMAj<=JcOc_|d?Jv^ zK-M}(*+*<+@x{`%lCHe7w4;0Y#6&usex9iE5KBX`3=u>(kJ)ADx zfyO(%k>1-%k0eWXl%kGOtBy+NKz-bK1AjlSPdmRr+nso%vhQPk(|vb1gF{XP<5`?8 z-r@AYSog)~ri&{b^m{SNl%q^Vy28xW$?3_Dq3rRB$&?Ej9FbL`Y>!LNtYO%8R;1&Z$`>sGpLag7k66AyGD(Cpb;Xx(dmmnPbLW*lX@sKOGtqOQ&P$cxbR6HJpz4@hG_+ z@l$b;I%Yi!4*H!q7r>p)ad2m!bJWO#f&4I#{}RZSKwgk1df`A0%sZ-ehMgYoIKA?) zBz@_fa5|5}0e?NJtO&TMH_}IsnAVXo3_B(7gr}mlXhRoXqpmm_%eE-SBIo=RZ6fXx zuli}(taB>zo z8Kn2<3B8`c=bjrcIq$r-dO60d;Ly|Yl4>rt>C)nA#|4kO?RqqXzT(+Sx?A0&oI|6WsfR-Mpk<-+eHT7H zbT!xKl6OL$>2*JtGe z+xf2pYhT8lw=p;VraoCTXGgN<+#TtndB*pqd(Jl!^YF2Bd|5T;WO~`KI0s$kW{J#A z%o|Kfr)1f(cMF_f!1+tp4is z)n_8BGrt6<^8`DEC-XzYj9)r?UvfHs(DUs5!B?T>aA-j{cmINJ3#u2S-TmZgdclHf%1E+!$2XkN@G8eG+v>Vl0ZZP? zD+nR0^u0WAm3%4QR8j?x{q^EarBx;$T;^Q)lsdQ`kNNT`%6)nrS}{IIhyQpzF4v#p zTHB|R^@alfQ{@zveGBs_6!@TgT<4?Ax6T>m@jlOD{(K90^dX#^84M2d9>mWwAK1m5 z4ZV)EH%G;;2~iG+wYle4^Wf(S%N{8#9L(pgox!~$sNxay1MLdsL&tRo?<^SM?L_@M zSwH4VH@)sGeXujXU#wexALF;5(LC7E3D?~Mn`k5Ro(VAXItAB-fxXYXIU?5S%`%@h zKl5fkT=y})$wB+Sc60G#7eH@8<*v%&tyrVx&E8Qw|ArmKTNhOoZ!M`B0zM2a*;^T( z_ikl;{(F`28-HCHFZ)5|9PnjGym(v5?2>r8xY+oo!je((bJC+XazKio7+W* z!wH>pOD4t7OHU4Trd81g_Je;`zdT~hh7EH@{bbAN_~?ye)Ag08%E7$ewv21~MEFJc zht|Hl-7;IV%M2+SQaO9x?8@2k+1onOhCJuZ{k@&!{*KyoO1;l%hYLGSw7cck*y^1d zag4kmD)Pc|kzJINd`@$ah6IPYkS=v0&!sNp1**I840L~X2D(~5?f91CBxwiaF4_TE zLpvb%#6{L7Mea?BtZNdvZ*FkUXC0?Hf;7mTQIW}Vk-Op|Ymy@Oq(s&>iQKz;uJp&} zsUyAn^VcyhjpwgXsaSl}0sJ-K?;!pf@plM+hw=9z{+jUj5BO`w-@oAR2>yPDzfbVj z5{r#v9ksaLi@!Sj?Ze-h)ww_R`RhpU|NM2F;KyVYum<{FSa)8i``j9gA=1Hvx`@bqQIV-~kq2G@ zS7=K$^~qi1sIYKNa2`*xz3LHPEO1 zHJxUM(;cxN9Fx9W6Ha%)emBss&07;LaK*Noyr;r@3go`Ed9%ac+aq<>K*#-_T-S9v zpk5ACHrwWvq+X{YKlUG)M&FQ6a1i5v);^r(Azp*>qC9&uPbr^-0bG9Oviz2cnNtoTC%Xd`?D0?uv@6 zDHpki^LZ$A>vTR#dTmnV-jv9?CXxF%pE-Y0I-ikl{JO}UQIW}Vk-IpbX-l2XXGyO~ zirkYDS=%IXFXuD$8+!jh=Ch<%M?|)%pYxgar+U9F^I7yxSwH79?Qg)GG4uH}^}hnX zN?-m!=JRRl=X|DqW8M$Ue5P#;d2gHf%={R_m z=x{!>9*!61C(H7h^V6gU^Vy^a^O@->_&e24p>I_`h59+4sh>jqoX^y6fSsJ5OmBdl zoS&M``Kjrg&rEMbJ*rPTRiEjq&vbIrL76cqW7eDH!FnU|KvZNyxk$6#OoT4|msxKl zy)h~BP)g+CCXr^nVV`H#8>E}{M&!Pz$W*yVv)<5_%z7i~4M~v)Qz9FiM4I)6`Zc{j z7Jf>4T|{J?`pHk)pX}Ya$gDS_cgp(7PugDt4j6u(rhf8M`toDp=V|IEKdE1v_p#w8 zZL7)aT*UQ;_pJq2$v4wR=#ZbhuIqF{zqZo}{W{pG`nAv@KUojQi+p2QUXyPoJ>aKF z5BSOSB<8Q`C(*a6pG5uSC-sx4pZuhL4eTV}m|g=r$u~_W-!z^4WO^;?QGME}`b<}S zrYk?EqYV8g{o@>5bB@*Ed4c^!-}efh1*k*6=3t%Ybwls6BGpgzo?WE+4d^e`PjxCQ zas&Owys{#$`K-SoZ?+i&*58QnV4s>gp~ErY^3mjGb6xzZuFzKkDV?ce{&oN+n1MFmE~M}$0u)`mi|k#C&fANwqe|pPT^YrMw>lpyJeopE;F=j zXypy_Z>YQ>e#5qo?3wj>dC_8b|Mll_?=0)>l0~~J3xczibm&~}@zd9WH^-3nDb8Ec zp-*B=+jec+v|nv&o96tAK6U(|f;^t%=Dia>?Q)Jr$pm=kx;Z zrTDx&>_79~i75V3p3@t+x8g77;pjf^oj!_jhH@$JrNFeG_dEt0x*)B~r%xLjj=_eD zfG<)E8!iUESTSt41o#rgu%Rn(SH-ZQ8*n$pumO(!o$iWZLl58{!0fMF+raV3wGA0Q z^yY6XdG8eKcwLH+w&&*=-?S8@M5ryp=X#h2wd{ek-{zC6zv06akP z6?x8Oz?T7M_y8LWCw07FgW;rN*kCxR7&aJADuxY)lZs)3;iO{NU^uB5HW*FVS$NMw5y{;@D+;l@}0rJgB9oJJ68fBi<6Ss0rS>Exf_ z)KHA^P#trO=P=-5iZPzUfrl%`cwPm3m12zN2;dQlF`grVM=Hj6jshN~7~?q_cr>uh zDVx{il+9~$%H}mWW%HVxvUyET*}NvFY+jR7%4?fbxw66LRIY4LPQ?vJj{10bGk%Ic zK%_&*{YQ*+VpT?lVw;R~W1EdEjBSCmIW-1jJVwVH<9Rjk)rv8mV}Zvi#(0hc9;X=N z83m3i#(0hg9)2K z#(1KRImYu^;A<6QJSPE9QjGDO3_Mve#&Zhr6vY_NslZbeV?3t;Pg9KX^ng8Jn^QKg z$tj!HjvV!0H?qb5sgcM1 zpBd@I4j37Vyi{z8Nf3XV?3`1zFslLb0+Xi z#Td_7z_S!%JZA&XR*dny0r&=Bn^QKg$tj!H&(Sf* zc+Lf$s~F=s4|twpjOTpd`HC@~3xF3W#&|9SUZ@!3xd?cXVvOgFz&8ThoU(aMPT9OB zr)*x6Q#P;3DVx{il+9~$%H}mWrM$K|l`9);PUXsmMK`(oYuyuPPTdyms?_&;B9$eN zm6-QJ+#-H7-<)H$^cS5OTqbxU zFwe1CE*Ja=Fwe1Ct`J-W%yX=kD+O-?<~df&5W$;)d5+aGOz;+9o@2FK^;G8EB9}Jk zxkWB*P(D0?@hL{%xKpLCo&@He-}MAP1K4GM_1e|;ln4D}0IQbMXIoT3$@@ZglvL)c;Gr;6z zOTfuzfyv31fRj6b$;p<0lg|N@lP%z+o@3?G2IWI8ZTQbR$J+2EImdDr%6LBv%>BN5 zqhKGH`+fH&!7*U&_uVCe0;bPU+$bR5{`bS1FO>5afPryl`UPH&x#mS7C#SgIckdKTPI15Q-X)lv;(p&W=U5h}xX*PTlrbl#xZigl5=>5UzwbUQn4IE%-;D_-r?}sDD+QBN-0!=O2qvev z-*-0&Ca1XHceiA7DvQ?`^DJHiXYm?1i`T$eyavwVHE3 z3MQwx-*=zO=2RB1G3Hsk2F~I&a2BtDvv>`h#cSX!UIQzyZBFIN2Afm4vOzhO3jEYr zonu*?;y%~?ri?i`#r?kff?#rr`+fIYg2^fF_uYhGa*F$X_eH_v6!-h?p9&_YxZihQ z7EDfYzwdr0n^Rf5#+YaE8aRvBz*)Qo&f+z27O#P`cnz$)wmFq68*EPH$_C}s=Q_u- zI5mf3{ECb@IW?E~yMoCno@2S+6HHF=9LxPP!Q>RrvE1(qCZ~9g<^E7GImL4<_eX-s zDV}4wKhEY<7OyepS-b|$;x%v8Fa^0Qu+@(X_~wS=HEAP#=(ss6$wzI9Yoo!`ZY%9xOa^>3vEZ=MVy9F%Ych(1{ z?N?NY?N{9+wvVe6+b2IMwqO6^#f2>2ZJ3nrJz2{4pDX3ByiLlFyjRMP-y-FwJ}c#C zeMidoxb|&x-^-SLVETIWrazcI_FnE>$Zvf9*xSf&kp9@)Xyzr}*Ca1De0J1~bJdr6 z6mIWvY~YEs#qVxdxEJ3wpZm_AyoK+bjqdsk&)5C)2ZpCCU-hMb9mqbDw8ihPL%E|U zw*%##GwF*m+&R;7Iv!_QZYbK9xMJMp^+PxBf9{GaF5gfOoIfabd;>6~gLhZsp`kv% zCyIxP{ka%-$}~7m$@kY2`D0>ie**33?hQq347qlleh1MfUAyNG_q$&?_(IP8=kVP* z+x#4KJA1|cXTfy~bkPr8JLi}BoqIwdWI>5cIR;CcXbdwFC1fw&%s^oSSr2jO}U zu6uhE{K2>$jO#w$M86EzWzfIWo8%A0^-!c2dsF=3xE_w{5^tJ60@ovOUFuEuN8x%D z^!s`<{4uy5gYp|XPe;Tf*LH`QxW`8=arz3r^SMJZi^$c8J z>D}Vb#Pv*Emw8M5*|?q!{UP3Re-5taAbqHJyFU-t^Kd=PyTf0A>jk(T?yd9};d&AD zukuzI?#p~>PH=w7d}&T_e#v|}lHmN3`O=)={E~JyCpf>Poy`f(FKOqI1m~BuvpK=} zCGBiZuwSH|%?b94wDU-U{UYsbPOx93oy`gMi?p*j!G4i;9!aoY$oVg#-Q@Zg(Qf^2 zqTTQEbMP&{nS1cx+27I!-zshmzE2dnD=M<4T;!g($l9dHy(y7(O(Ny{bX?mX;G&shiK*z`b52*v~YuZ|^U>QCWOEbsjv zFS>D2<-!H-{uc+lXvI3vZ%IX7i0}3m;~O6H&7Kv@ClBMg6zTlN%fa_i&Tjdh55LGB z3|}1zkMQpHBa(I?DzYK$-1S~~bjTg)t?~IiDZhW>_j(S#sVWzpgGJEkIWFXm^49rr zNo!1sJmjM8$hD#HXzxD%6_lr4{CeMJKB*DM@Q=5_!mO!yc4pJMj&BQF@PX<4pSbdw9+^;{==2_@=E~ z?$P0X=C_V4k8kCR(rB~VYTKxG+x=XCa%?xgt1sgBXAat4i191FE)*W?ZSd{y(IOb1 zVlU*5^L&3#z-4@cU&Qa!9JIMN@=LD|xlu3foA1^R%QtA8AN>kK;ql%^|9b(4@x6PI zgLd{onSM8f+zDQlPp%I*FBHDU+l=?|r9aJF!*_j`LT|wQkUP=aig)y-ttJoO?a|(W zUk-(@^|qPkhvUGUk9@W`8t~72^NR2Kx}qM=M?PzG!!_p}=j~?!-;5r<`6JgNzt0bI z-gbu`=NISa?4Ulx3@DllyU6uEpXQtS*#mlq>Cj z#0F^P$Jd#;68Q0hvMv{6{080_awmD){fMOPii)fuhX;YflfB1{ALpFmx3R{Li_V@B zlpnMtWPxYQMew=pc_p~X|sVhbK!MBFoY2Gt_lcenHwJ*~oX7OzG6p{|W5Vx&i4ilv_-04S zbhsV#(`*Og3>ZIMF6BGi4*F@f1F;E;cEe|-I@pfyp`7V|*iHX@rgqamvpixS82@~x zcGFL@-H4Y^l!lM)sNH)|j_pNk1@Z4_b6<>E*=-?rx>ud$-^uZz;P`dk^Ttn`ae}?Z zzoX6lP;Tg*A$Nwi(;pURJdro*$?M_Z_4VGL7@y61@mp+uZ|uN7_D7lFt3&QgFX`*| z$K><~aC(;alJVud56keoWCy--0LqM58**oRyL^r9#rurHeQxl+?H`5zH}}E()3{&` z{N`mSH|qY7JIAZ>?HFKu?&0&0t{?RE=AI~D{KT}Xxq-hQh`GWwgzKF80s4F13o#`! z{(cbV1pNTlJDuqP8~jQ>~rv+T9&3;e&@Z`-SN*>-bXux;fUu>toq z&-(u+&-3Kx81Qqhx7&|M+JUIZhJhHPF%N~@dEOqM`++KyHP0b@HWrMPsIgL9T5&kGYnx-H7wilY4@c=rj&T`B4wbyl#@TLtN8zE^`fGdlB!WXgBr(j_7pQ z?pMGo8LN+rzJ_t+T0)HY9%d~mm-;%~4z49^2V#Qs z^grx2&-!O*H`fxDN1PIQ)|c|9?SHNzY%gM(^yG7X6yr7N$x!%4?|+zQ?`iO_qo2?vo^5aqd|Sg13NhHI_G~9 zQJx^1o7 za@@WTY;WB*yDeHT*Af%&8D+QQUb4?P^6Q<5LD?h!e7_9iKjrC=>o`Y#-3{?5Baq0e z8G`g_&jlRE+~7L$49fEvi04YwgZ`eO(3}1O&O;z)B(SfR^0k!6-#Z-XGyfEOS;*Pn z3AsF9;{49+OGLlU%q7g(5zw3S-H=-@<@b#Qp7&?i*FrA%VaVnA65GLjiHS!DJ$zqU z{k8m~h$SuSPaJ6oW4kjcc}YHVaXy>=!rb86!a5Kis3)Jhs0aOBTsJwtx&Co}r=)xh zCGz)hePdtIKd^7O#;{KiTga?2O`>0G?t?i?f5EW#ie{!+n9#3ryM7)L1vuL7Op`_(LdvH;#$Od5U;5x&z+jY zmNso+`pMeDHHm$J7*A$R8jbN=^y845U*Hxt;Wy*%ujhw<3+cX*Z`LHX8!@1Ia!vXN z=KnH3{J$vQ$`5xf7+l!Q>%Y!7>ya5N*v0kepIFB``Qd+I{`>jif1~`{{O}RV-@vB_ zbH~g<`1Fsd|Bw0M-!cEA{O}(r|204S3FW`%oArqG8-1)tN2zbt3gjE#4vG1|Il%eO z`AOeS`_24gZd{$Ne|JEs3de)z)Q_cr~~ zIp~)b^b6M`)|=w<7;I<1bPoEZGx~*n(iwfiK4?K7a6Mw(hfI+dv(+~l~kjZC-OsBPl<#QG7} zt0&L4T92c)k!|9d#JUmpt0&L5$ZvZbwT;>)u1T4?VSDRw)HZ6Hv~F_R9zU*A+CFX1 z|I4^ui}7ya;`8KPMaK2I3HT<(h^cim@Xd-5Q)>zE62*wARSsOP7%{affGZRurq(UM zw3KIR;avjiLXKIUA-uz~Mm&QlB<_&(-*#jt_zV=hn(8~8rv zLdCFw?_(}f3>)}9=8eGm{TpM8<;n*A{w$nYVDHpuso4OI)wbepXyBv|}B{c%6z+UF`hBtm|~1)95}8R<5>w@sTkwA5qP6wjOQc3j{w`8vUyET*}NvF zY+jR7Hm}Jko7d!&&1-VX<~2E`ytX-&D;sQ1<;n)-lzb1lK8WkJA&Bec2XVdPzQpy~ z=;K`9_$i6&wb{>#>jh3#VT`ME%rTyufHx_|cy0#XtQh0D1$c{MjOSM1t%@<8j{-lc z7~{DOc$;F3=VQQ+0o$Cic}-5)ye6k?UXxQcugNK!*W{GVYjVowH94icwmFq68*EPH z$_C|B%5Y_U5Z7x%5ZB8O;(EpX1A(8CxL#Gh#P!sg_qE`gT*O)8nSuM~!8f_UJTq|rBKRg3m}ds=e!-0E#WMrwV8-?0p5OhIV8-?0p5Ogz!Hnz0J-_=mf*IF~dw%yF z!Hnz0J-_?j2|h5M6*<`waFTJo$jO#~lZ@*{PPPP`WLz(DvL)ao<9d;kEdeJP*NdEN z2{_5PUgTs;z)8mSA}3qGN##Q>ZBRbs(uV)6bF4A=hJPjc)_q^b`)XkB_uXF$9t+I< zzWcX=#{qM{@BY2uC@}Z??r#K-2j+g?{hxv-0CT_Z{-fY)fVtmy|M!WQ0b}qDzs>2Z zfo)EY1-3ao4%p^&6xinUcwn2;6M$__UjwY1)|dggv_WGA5Uzwcg>&8aM2W6ZO74V=Ym;4EGPXYm?1i`T$eyarZY+nmak4K}B8WrK1m75FKM z>ve|bSQe+YbBw#mn3Gf7@4Gz&lT+O9yG4S@Dem{(UV_Of?)Tl^g2^fF_uWeclT+O9 zyCs6jDem{(zS*40;x)!Ri`T$eyavwVHEl_dxc3nPUHzM*F7JE)zjrQ}g?AbbqipraTYAl%wRwLW zdiX7>ZYc5@e{2{e+rf0a&$%xt`KjSZXS;YGwqq9F3DkaJJ=BB#0p6c6jCfzRCne<@ zD3O1VcCdYHFWbX#4z@;6lfAg`+^4gbt z53Z1PuzmeE_S#*zdA}p|wP_#I*}jP6w`pG?-Up;z$^I{ydZ>4@_C=+91Leuu$9iaA zP5+;oddfw=P5YRBvi7k&$GW!vZ+oTDp1726(;lX?JxR%L(;mK?XvV$&Exq_|BK1zz zo|Ke7<#@3kGw$eL^NyqFw`ot4)Zb>jSl)~``j!8FxC2|rg;?GthWlGO^(Yt}AH8vG zx*pq@xI^B5qxX;k^8UFiuPT@i{+jVde~yih-Ny3dLd6@x{T1*1!CjWdzo{DhTjk{8 zJ^v{DfABv#7w{c=_Cpf=z;Anb@5%zX_oJ8eDB%5`ERp+t^Qx6x zYHHgxr(r7AoGJ1R3c1MminN9C6`2m7zbhj7HLSC7D%KcD-!uJ%4FjW+S4#=qy-uFv zYdxPp|L|0-DWX%yJYGX@AFr==7QZ`Y;aquH9_lA+Z?H*nvyos&$7?!u+DSch)8~tw(%M|yP}d-W80?dL<-LWEJq!+zg+Zd zDUp_Cqpk}Lcm^|hlSt<$Egu(Kg%Vh?$h?6`u{qt`^IkvwwBh(pSTRL-6^8{2iq|EJp- zYj9Ak=AUHVABKJO>)oBNy_-cNkvlNbK9SGnjMz8!hDBkyK!pU*Vte|QJd?!Y~k zc=bN#W!=Kx_To1|apd#4TH@n1CnPprbAsvc1^becpOQFv%?XK-cO*gI#c`-!c#HT* z(LW$dc{QGcyAFh{qe`#VCkbgi^rSzIBsf>4{mlvXC-sbv zAjhC*+iu&MdDew})U$23ZOuI6!an+1+jiU5%(E@$2J~#(ZCf+Xw6qU;w(a!Cw!N9> zS@Isbw*9uf#>c=u+iv~V+P0TtOb*(1+jeTd+4kD@Dd*_t^;>GQeyjj*9Er>1gF8OB zv%-5Du~|&ouBgbGIP&7iyTd!=oAqVIOTEe^Z_h^LZ3G|h^xpO3l2)4(xwi^=RVcU8 zJ8b-T&*Fociy&72qcPXu0U@lDTsv!MPZhw3?*IJu-Ki6P%0czfw%%>bFx|PvEyZ z+79*G4On;SZ=Ex2hx+SAtivo{euj3dzplgjOMl%_yVYN(_?&`r+rX!7;O1)Y@BHn! z-VScv?KNfmbp!f^dXFRTapc|O{nmd1*H7SjjrR{3f8B_Fpx#r+dkXcf_5PRt46dJn z-a4;2+*E(fKBXU){%B5cZc6)` z6P%0cui1yxlm2KaW>{^uxC8wyo-~*@x7#ZMSVze@*-7Uv1lMTh(9F zKI+-F+qSB|Hugc!ww?akwpaZ%ZKS`p?YHfvzovb*-L{>^r@}7VUfVw7JJDaZowi-{ z(}rt^p-Nw^aa6Cr2r*o-9|&_DVEl?!arTOlZeq3|u7%S_;wv#8#p1sRg&**a_z_7v z5Ea?57~je-{_~Lgp!c!iWEa+pZ{W$va?v?>6LfCgA9B}wzw_gg)|eD&;-!|q5eh%# z{lUaZqFsywW8$TX&f%L;zWiq)cZ2td-y~@tEp zI}tB6Y~rPgPKW!M@hi+Y!DbUfNy>M)pBcA;~}j`3OgX2^|s$9=|AVtWxUHLUScE0DkZS0Oj7Pp|x?SB(;H+pW2F_!K?jHE}r!WiRMuzoYw5Ch^?=-u&N$gT3u zjWKo->o9qU_rMrWD-m~QlXrfMdr`(k;(X*fz*rBa9>$$OtcL}thx3u^z(QPe-f`YC zW|FCsdO`fWa>T#jyj=u6&M(eS#-?C>h?g4Hc&WD_J_hIKjnLzq<2+dEY9oQX^wh}B@*PoHl0vDRnXYTL$`60EBh?bCRvYHOBl0f!MUHOyxb z#y~o$ZFYUFY@;nKPoF_QJ`nh1@luB(-NZVfAD<}CMD zT?Gzr@xBt{Zz`NKnYcuvvu7E~uloCtyVbie7MHZzq(~Dl^=@$cQLl51afxUbc)kIvsFNi*?M z?*P|1kLkx{41Qq7gfZ@j5iiyFW=G0&xE=JKml`(lQafljGb+5be(`KAtuf|Kg9p%>jbI5(%3yXip%W@hs^*(U>8(vT2 zw|PI#EsdGF0_E=eKOy%CFA}q3rak~(Kk4-`{+#z>8I75G2g*F~e?#t5UUAHhnYteL zdD<(DF=nc{59W}br7~vfohY~dKSJ&^UcXrDn5ldw(lvzs-rN)A?U<=6F<-cjaII5c zK%dWhA!cgE=dZ%NpfBK>r*ob@pLHT;YR2c^jd{R1&vj1cHrFImAIAhX)6cCzeVpT5 z=X7k;M#M}t{@%8q{$6dRzh`}jnQHvKZ9D5$Tj~30A7Z8&-*4Maf3CLD_h;&Zf3di%VK#Qskk#P#%Fo+-h&ISxY#tbx)8Iox?uLZ#*wQ{7tVc)+A{k zu0pwZe!ly>H`J^lw3&MW#UA<2rDTb2uVt`=TOKT;Fv5GOiuVBW9{uPs&B- zz&ez_@84xE$0e;HDe@rKH=V~^PuOn6Of~VhQ=-#&AIh))ROWS)q#feArgNF=2-}O8 zsbLexPINkKH)GhzSbZGU7^c5oeGI}kHfp7+nN z9emz5{SUj%^Zpsy&Gm%k5i?bu_oe)4`=9Fw+l!c~MU0sm$9Qe-lplV<8*ZMzx52-X z``f{NKI5~Eh?yE@d^&Raaf~gw`NXkMm}Q>=$GL7igLQ+?_GT<#lg3Ql5!6?W`dI#X zlwS=#Y&<95#7t%VX71n_|8AsLeJS6)2XgaQ^Ig`PiJ6LWeC}ucMjy}Aw1aa@W2Qb3 zV6#31JTvnn@Ua`=V>v&nFh9ui%^zWp0J-&FC1$FroAU|J)~stA<^lcecB~2XACCv+ zpFnx83A8=Q=PB6E`kq34TwA&RaIK_|;M&Kz&b5K{BW9{u8%*5CpuXdQ9h`R>Gu711 zIRe}5cF_*I9o!R`deP_H6SQtu>vnLhVBLtBYSxO@?P}c)t{JQ!F;mT&(Yjr&+rhOX zQ$PGN`&Zk=HNkF+*4w&Gt=qu0gLNWis#!Z)w@J_OTepR43GacJsb(!{-L}?kVSKvG zePDa*w%KjbdbyV9IsbIr%j`9tkoQ#>Gj$Edf9u!sT?c91_^yiN)vQJOwo3wzV{ULA znS=6t267lPRrL3)gWmQY`S`a-?|Sd?UiofR%GXk&zPYHz3rgDB~ z_9mh)F;g*TAAsI7m*uQ5HnTcb4&TGwm4FDXGHSa zw1w-9jB8nut~bPpnQCHmvmEZ-rfpHFuZHquZR5JbwjpM!{JRgL-==NlQoc>wxc;zh zh?#0)cE_cBR@<0{d!KTgxCWVVLR(DCRMBtKmXy?e%5maa#Ci}j)x_^^68$!9Vfx9& ziE9%30x?s~n)DFH@A<3pPZ=|n?MBR06UX~>F;mT0!LE+ROl93>4#KB*BxWk>H~LtQ z+KZXW`Of*tXMb}K&M)}()5T0R^#^N`9ecBNtWB;*tQRp;O$_hWu{T@C+T?n~x)C$g z#PDt%d$V=SO|=^_Q_a8g&^rDm&$P6S8Z*_@kG}6{%v9Qsn5ia~_vvD$vTnpoHF3N( zzG&++4D(gnfR1?R$ z^*FX3H?B#Ux?%gN+Nf>Px?8u6>y)-n+w=c2W@;GUeS{Iegz*I#Gqnh~NHOA-_5|*! z81YJb0rygjc%>2Gh+@Pm?G4;pG2)f>0q&z1@k%cRz7#kUBQr10>4LN_pUSr=$6&)n zz!xco4HpAntQa<20(^;L*w7WYt76#D4Y-?P*noxmo$iWZLl58{z#3bC@k(=L!>_h+ z|2_UtE^W|Q6O5T!jJ_$>@wzn6DFH4~jCiG`z@>^2ue2|4U&V-5+7Gy&V#F)$58Pid z;*|~n9-tWUN-qPx3^>CF*kCxR;{_WGCl$j6!%4-k!EjPBY%rWu3>yq56~hL@NyV_i za1uDfhaB3F;X@8>kXRF+V0&~lh?&~rPcYv~9`~;?(uqwpG8B`TscuYSrWVGsVy1o? zat3022kLl#7IH2JzFhIKkTVE)km7X6xdQkK#d-P8VBo=u^Yfi6fv;3tknfZMmnl9c z-x&ft1UM6q2{zcAJ_Z|XP9K8}Hm8rl2Ak8zV1v!+W3a*I^fB0AbNU!;P)=v!G3C&P zOgyF>+8{B$KJjN7&KwP5rnUqzQ;!ERQ=QnHfKzjgbYl`TwJ??yGZpO^iZLFlV~+70 z20TnL#&bCEaK#wUtAMXkjPV=+JVG(Xb0qLc#Td^~z@rpnJVyhM2DUk6^O~Hpc}-5) zye6k?UXxQcugNK!*W{GVYjR3?ZF4GDHrSlXl?}?Nc)%%qq=opEEkVrGc%8yYGEuZW-2%}24g%%#~kB%HSpDnF`i?A$128njsqU27~>fQjw;4@jt3sE z7~?qsc!FY#=QY6B0Nb3hc}-5)ye6k?UXxQcugNK!*W{GVYjVowH94icwmFq68*EPH z$_C|>#F{u7#7u1oVx}GsVx~GVoV$Q)p%~8fA>Ek7Of8IM#Y_dKCSr^y>X>6ZuLZtV zF~)Ne@Fc|;&&j}(6=OW708dej@tg`gRWZhM8t^p57*7w_1GYJ3^O~Hpc}-5)ye6k? zUXxQcugNK!*W{GVYjR3?ZF4GDHrSlXl?}?Nl;O(JAZBVy5Ht075Hr<@9SArjF;m@` z#7r%WWyMSdr>0|!r|XzwJg)=3PBF%F2Jj5U7|-j0uUCxmoC!QrF~)Ni@GQj`&)LAU z6=OVa0KNg(=9JBAa?0j4Ic4*joU(aMPT9OBr)*x6Q#P;3Ddn}zsa)A$b1GLhD5oUW z#L*yTYD*9^^>`37)rlPnIQ5Q^ZcJjP7RIt-rh-#*FvfFq%rTyGf#)j5c+LZ!rx@co zA9%iEjOPO21&T4A3xO9Z#&|9QUZfb~c_Z+Rz&59BUXxQcugNK!*W{GVYjVowH92MT znw+wEO-?DVZBFIN2Afm4vf+y!Gc^oOxg+Hqs}eCRd1l~_7Q7LdX9n)of*%3qnSnb_ zFk`0j%)lKlm@!j%X5d~Um@!j%X5d~cm@!j%X5dafac;p_p>YGiEB!v08=+X3SKcW3^m$B1UE| zZP0UzT-u=LSd5ukjJ|QFN?$Q%D);=ZCzvr)x#xGU6U>;Y-1EEF3ueqz?)lwWf*CWF zdw%x@!Hk*8J-<6wFk_~2&+pDZ!3V|)B_~?~PBLaHIoT3$k}*@s$(DeVjG0PKwgjAH z%v5r+CEz4urjnB_0Vf$Vm7Ht|ILVl)P&apNiKGHz+t-Dah z`(a@2_uU%>`@r1qyEh4r0dv3aE)g6D=6>I;5L^k&{l0sv;EllC@4L$cKLX7CzI)q= z_?8ZDsG<9 zj|Ttt&l#R$S)AfN*L_gNoSfo*-+f3hImP|H`>-`! zoZ^1p-6WWt;(p)VlFg|sUSrI&cnzGzYv3$i184CXIE&Z7S-b{TUfZ0?l?^tha%F>Z z>T{iAS)AfN*L_sRoSfo*-+fFlImP|H`*p$O6!-h?Hw2SY-0!;M-oZ^1peJ-0*S-i%WXYm?1i`T$eyavwVHE1j^zbRu*PI15Qz95*K;(p)#mSA#<`+YYdn4IE%-+fUqImP|H`=^4* zDem{(mj#nk-0!>J$>vlRuQBFXyavwVHE`h#cSX!UISP8$#>ESy4Mij zPV^4*{=LS3T)_MHo%M$TrlZ`4yzdniM+!W55Gm#twVm7 z%NF5XWN8P}@jmCiq~xdWLps~V`>-7v+arm7VLjA?{sG>f?c;sfo|KetphW&b+QIg* zy=;%h`lvx)vYsZSAHt7M?}9(`^u$! zoA$9h$GY|vi2)Lq@@?9~bhalc`EA;xu|TR{kys$qJ6U^DQvQ_VrLjSh=wA~9O7z>b zr%CE>(;k*La|ivZF;mHfag`SrlKYdN>|SWc#Ut;pf3d8Pyr2L5;U-p}8E^Eb#!Mv_ zdS5Ht??3+!1tw-HUM^zXJo50$Wy1fF_jfML#7sp$Fir#SJ${qid+KxD3wgg;FU$QF z{&=|2=lrihd#TTRT{`#U0@gd^-lK-c*F1Z%DR;w<%8WkSSBv&BCIaWfRrmbC=#Q%u zJ0?Hbxsdn0{>6(6`5rslQ-}6YpS=uLkx{=s{>v(eY_VVsfE z+5al|spCQY)7h`_NytOS9XXx-8mENzn|M*Dv0vkr)S&+vgXDDfYs`{b^gs1aXTSFS z>FhRZ25dfEe`{ZBAJT{Mn?(}?<(F;OoQ7Xw%{eUJppc7m%I(5wBHT3rJ`eDQ*-LTM#nfgm`kZTBZ zu!dB#t&EpVpG~`%4j;2KDfuU2rlMVD&5-blT?X8;q^PjpY)hnbjASo=*3KdJk)eW?Dw4(q=0 z+ktJT>O=MODL(fgUV|Mob;D;?%v7%1$hTvjuKQ;zW-8j(I%X>KGci+X5By&1@-@L6 zXu$p3jhUJ*uEak);k1dH3cETSHx)M0x3`I#3ft(@GVxIZj<8SYOEc-<8|QHY?wPjJ z(FV@zM%It?Q^rljeL563mG^~@Y8|UI6E~G(f&5d&EX~AC<(QnL9)1Hs|3;2E)4!>Mf0{y3|+sqce}E%Lk_M$E4;;(_&q>& z5jR!z4i+Q582MpugddeMjg+Y4P$}|Ckyqr6lDMhwBW|kb9PWp_e#q5-r-WBQHA-lh2#Z7(x z7V(p!e?Z#Yoao~9^HWl`ff98clzwPVT;vVN#NA7xzl^^W-A3sX*x?QIo21+!>7OHs zOT0muxO+9|Clhy1%C)iIk+So7Ensp`n29<9ax*OT1|&mKps(686$BjzHcBGmM^jHl}^h8-u(t(Cg>TG;#Q3jx{H`dV@`@FwVhRuHn#?Io6!$=9T#}r@()2 zXeJI{9c-kY%&jAd9^P{I$;>5t|F=d859Il(!adH&@y2lS*rjwCo& zrTxtb_NT_-V;|Dz+qT=bW}bIpAN6e8ZCf+Xxv-DE*0$ZYHS>JSxdA=fcH7p>b1m(I zo^3n*wry|bnU=hVu5G_F>?PLv1DtUV*Aa4Trc$qiP zk4su@Qsmx=$eW0A1HA-f+Ft3K<^<jv}-^=2S%2J)`-Zt-X0dM2*Ryrmg`-H3jm-fZN} zMtwuP<^CL8&w<`h@AizpPQhOK;d#iLhrD6l9sUAbFTnM1Z)L_`H_$%lEkfQR)OVG) z%J^%UOGgsMw+GL(jj)k^Smsi5g1%kmPIH2DRnIRD!$#`K9BNK*u1folBse$KU$al? zhowK76P%mU{^kVdqWWw0A@!s`niHIp(*7d}_NV%5_96YSZM$u&`fK(f^=#X1Th(9F zKKfVNcH36<*R+p%w(Yj9>aUG`(6eo)zqajFe@z?duWkEndyP+leYV}UoyKRtF56z) zKJ#pj{<7_~?K02i(e`)_?Gxjzyw+4nN-=HS)qhMglq5 zIl3UdFrJ3O!mSH8Ev#JlSZ`-neS!00eX+BtzIQ?4{$to1l{@9{eUg{wR8Z#UIZ6CG z3Y(pm5Vz*-`fepZtnB6-uJ5+>rcF0(y{W4Amc>=wy6&o6T)en)vGe!!dCrGFUc7bj zX8ctxNiV33<6aB8x%(G%TTs0q?e53E)=4i|P)+%@_?P#e1G^Vg?y4-_TD+-v-s~O4 zTT7~j;2!aL?^edkeo(m#XNCIjIJEa~ICKxcyrX-#ct>%0dAy{eVq@C*c0KRWr}X6= zeM(QHhts7yXj5;b_qNg_$L@%I+~{tAD;s-K#kiZYHfEj(9-t^>j*z|IJ7&ero zON`Ep^bC_Xqh!YRXF503Pf1Tf`ncneC>zZaoTg-5h5CEU!pigazf}MpI@|WYJ|kYw zX9{wv6x^|Yqv5%MeU@zgjTQD?a5k)EJ5&Z|3_MTJGWzeZ*!$S_c%!yEh?B9?agvnS zJ7!|WL#GWS;10*SUS4#3TF+G|D*HS`%FXK5l zl2%R0IwFFTl)$^9f-^Br$_3X_vJU#NxTKx69i~s<2N(-VV=UV-mF?Kcj32A9lI@tt zb}VGZw$j+Pc05}{LeTN-WmVK@VwPV1N zvl{!=j{nGdP8RQx^_(=mBhpV6&yo5kiMEPVU3|_$IN8>IE{^I$HKH@UfQuP z?O1o@tsS?ob&R{#G3}^t#~VCd>^jb`qo;@`$hq;y)+WxIKejgMoU-EvwvH3nIv!x_ z_=mPEVX}f`WVw!;Cl8{crQbj5F2!H~Hby#Y$^0 zHkut?HuG7WeY8`j0b<8c!#s5z2fc&+vQ} zM;d=Kq{Kce^FCKn@Bzl({KWi5`&*-*68+5kT1|o*DN+8Ao<;M0KXWE5`E`!uXWoB{ z2tF_iV`|R((LU-&ML+X?Te;vyN>e}dwVi3`$0h%i_Rk7_6R$;ksh886Sq_)d|rJ<#PlK&Ee@TZ8jc>LddlzR$(_ z_?{MZ`2N+aIKMnyJ)hzX@pL*o!#kZ0xx_Pz0-RasnS!1}Oq{Y|L)JS$cvr)m89$km&c!J32|hyOd{qABg%X z(a*e}(*{5|D@Gw;q&U)yPT zCi&HI(QjkF@{H{@JQMv>+OIt0`#-!7-{)a}@%@}n*l&2>MtR2fBg|QApv(6@Oy5FR zd6o)v`2Gs(YYcP_Lq~bmUOmdQ_H>kI?dfFstoPxw@H?~dS&+tO32ul87N7M#xFqSB zcOuFKAEwOstoH*hAjOOjs~m;5&NE0=1~Uc)8PKc)T3CBE;# zvFCdWhd>wLgBIA20w!IVlU*5^L)QIu1l{Exlu3f^IrY%Z$OOq zHu`;#-tUHxJHe|m^$)(|W_UsGp-@Yie zl1pC{Ta~lL==Xs)hTKWscEhPb;M8RAalZuVgO-HcDc%!?Q-i^&soqn5Dbfeu8gi$3 z&lpaXfm5Eh!|#jqvfD!Lbg$afGZgh)=RNQDL;BD=L+%W3r>SQ+>bc(g6Td&whp!H~ zGrgp#X9Vh*<-Oz&K>CQaA$PX7%jl1S{te!@{mYO(>i&?+f1jSuzMNm27n~#P^HT7O zbA)qE=Lq@L5B%aB;at->LVgVZzc@!Y?{to^o`I-`bArr( zh39*(`j;bpY%JvR-;*DU^a+oK!uLyd^i-Q?rhXeKRpL?f%@|T{RM&kBIsX%`X_7&xq~4mJ_a8QIq4gr?0<_V zsF$|tzr9254F&f&$2f;|jtxcn@OxyAaSrPo8;ar+QfPD$2Lwnr+8*whH;(pbjTe7IqkWCv*7kn;OQ^m`2cc8BIFK-ocX7C zE`Xf6O zx!}W;z;b3jGB|%inmK=h#GYzjQgDhA_{90s$lxqf@*7f;f3Qh#BPH}toIi~W&YvW| z&XN3m5y2@+)~{!oY@a!QlKh6KEa2M2gYQG<4x^C!u#i%WhR`!nZHXsj*)CM|r(9yG% z_Uh5ImG*S>Y^6P&4xgD%1<$4gX9keq%dUvv8cJX}GoK2cNqQ|M()X4NuA>B&GxMnd z&yZ%$5Fjy*JClNwl)xv>%%=uCll+>LCbA}h_HUzqj&{dwL0-c6Hr!mku3?1cJT|4zC&)U;bp0%fw<+B!o zODhAP1qn_ZhzM?=1QwsQ5L}YFvW3?|SI7JD3!e=cE zxFq=vDak+BB)E|h`X_wW!hlPXU*|~vzKGxyCF@r%CDA^^CCP7yO8&ue!Htxre)U;s zr{R+1*Tp5jjs41{TC~@2N%T)?zjCP>+$NWj;4HaR11_2~bId8_QZ?oaxs(iaYXaR` z=qi_zflf`JQyb{iK}WgNUOmdC_H>j>?djNDO0AH#HLM8EuUAOh4zIAz%vS{SNA2lo zAGN3R|K~Gve)GU@6^i+d!X^B-tRTDb--G(L`JSN^-!oi@ZwpTMJ%q`R2H!`_3}ksA z?{h1YYF9Ck@6MNI_#1j4?)01VmJ>f`S(i%E0*j zv**B4tfKKrJ;~{J&VT;@%*;2Jo!y@AoR!zsLT6A5J%PtloF>GjIs<8U^#t&3DD!PZ z8`kn1zA%ozab-gzM(s(U0s}3Q!{+yaKlIg%0-mbet77*?NqDL zYO2}paWs}2&a57+B7^0c?l{XLCBv0S6D78-@N_PGwUA4Lq{kKy?0+ir2|gojEo zc5&T-K7jqE)z=p{7fWqwe}<_g^rzc<8{2DXo%=Ywj(*-ieiM3%4HN1qF4-|a$VOeT(u4Uz49pioI_ed{O1?e~;amDtY}u%{&R02xb}!nWdUd?vP5=~TsI zS@7G!7GcQW7!3YCt#1_uuXCw?C=C8St$!>GIVR8GF?sqI!r(D^`j^7sF=+bN!jNO~ z3?74~9~K6$`>1~}j8lw13PX;`Gi5%e|ioA(5L1Kjt4pl^W( zJ`%JKc<2*B`+?tmF6aR8=q^DAfj@jD=n(Muw}K7>e>y1WJK!%z1RVjMJSONU@YK(O zz6Xx|D(DBGjXCCy0na%+XJ%=xkLSUicfrZWy!W0$9(#;qq%ser?IGE%~*#3zuJ8 z@@q?eZON~_U%33*l3yGA?!a;ESTE=c;Lg_s?F8=LAZQnG�i?K<4*LAoIHi$ozf< zWPX`{=9l?rewlyfm-%OYnSbV&`DcEaf999@XMR_@|D&qfe!2XuTX})M7uWsI-x0jW z@?0~Q%iozJ7x;6q2K#^g`a6zyA)-fit=>!IW8BA|>%c=q4=^4jI&r+@vtmcdXT|Q4 z&kDq_&kDqho%nMf2Wu!3`~N{d9oV1K&w7@wnexPFu!H@ZcC5m?ibi{00~I}5=UMfj!us+GV}db);~z|`mzhH ze4DRp{!HJzc{6?Ch(G8L`ff@W3Il^}fk1CxAwAUNPvkOzyBEyJ1p50IWo{kF=N_~Y z1wR^nk=8^@PogOrF{4e9)@Caj3Z;@}bD}2*44IK=bE;)}Q%m@kJG<_gF=y7ywq&R| z5jD-A6|u~4Ya(ei1;eRmODf!KMOu5N2O~{ZDrJV7Z)S=nPVRZ~R1ujjvF9DRHdRa_ zEYfmOYDMj8s-&Vk-!awd&*lmi`IAesgG(~x@68VR)8=5B;z+uM9A{g5#m_5KW?4?o(DSrPQ zQM0dL<(ndZ;WWZ#W-c&%g8c!Ijc6%B^K- zTs4X&j^$%;le)jUN{$24te&5WKd-cR0NJME2W0$!jDLEvbd)e|^O#q=wEq`-5vlm` z7wY)<`(%7@TnFpr_9x95$@1l!tb)jpi!d)iK?Gm}o|!F0xsV&MM2-@VUD_6;C{wpaZR zpHEolyU%{Cz1MfId-lF(uN#{%ZK4|r6)VU53p>AcLcujfad2JV*`!X%VA@cpJ^t_D zeA>yxeU_DcZRL@mGLh|(m;ab7ZR&(J{~fu=B#1;~`X9?d1;@EqL&u4v%J_w*L6LSj z>KeZyxL=ei*9FzYF9mRu9vAnX1cm7B`je>Ly+vQ-<(OKx4cX6e^VmXgIAmMtrJ_<}I&SyJ20)`ts$g%hyzvEL*v; zd}#?kWq!s-g#V{MzVNx#lA&zX%H?a?fBTel7I~riaNg>&jU~%gl~tD9v9{vYpl;lH zQQ^aDP1)-5*NRFOP9I!`&$^1jf@P~$l$9)AHn6BzA*qC|R<0!(aw{8`i8`wzgum?nOy!H$vaa+pxZTeaX^0(U+A=O4e7FExGjvaPuwAqoaSD zcSqUn<;yD8Vmvu4*-z%R4#v8QwfxLqTT!xl?b7nl*Mg^nN{n%N<#&OWFIjU91pn~( zaLqN#mu(zaFgRZxCvGpVSif@Znmr+THo-GT+kbox>KMjTi9FW@8HPe$iYKvn@LX6N zq%*(#bzxcuJj%F^V%iIlpWe~eoAbvouKCA*Y?}8{{+-`oJLgvnHcDw+Kg~MTL0UX` z?o7!NfMb_9&KT$7BS$WEoc5~w>9U-2+6Be&`cN{oE167&;@Qsspn|uZD(8RjNcgss zLuO<=ihARX9g~roINI-+?4jlP=+V*AqvJX2F;eT% zJ=smS-#yt~OUL<1uhU`yT@X0J7uqwg8h_bu92CTDa7qEjcniE$P2KJ zV!C^A$hio0@;dx!lXhVTBQNS0a%_E~RR`kHo?9`LapM=SQgH8T|dYA?f@CW8uWJt9tM1o$OuXnV!Rwl9P6!?8&l8lXl0QXHnmC`1>jTp2yz{_S?u=y^OfITdA+ zN#NaR)|ncgy1QqfdvRzZJS+FhRk^jf_vXfPcQQZcznAoMx!n7s)zPU_cSCNC zdfslP2lsV-(~{G4zG=17pwnXw5$KSU%+YzKC#UOs({rZp-rMonhRMmvNFQ?&5@nr` z!AVNuOmb?*$dMg-%o*(`-I1|mGM0?H`F^qHc(xxeCfBq5BXJ|eX5BMh@Gs!p0GB$( z!KGK7<61Ta^6fyj4;_~>5vS`>r$-1MMHd~7Btsl-`03#V-GICGMEZ!KQ{Rt`B2LcH z$Q168Y-{Yq>neC5=&7$!uY)~@A3$H?PPixS-&~d1nBhFy`1JI6Lo&G+b86r(aMKw- zdGqG3-5#sy+6`RG%#7!>YuCcT8^E<2$h9E7Yj@=94&1(dJm-Q7PR$pjb6mS;{LJr> z@vPr{&PeTlzc^}Shwh_BT|>RU815gV&XoNyj?|63=Lc#sp0iKO+~__>fs>sN-0NbD zoUtJ%+4=Cj{JD^AL&|+%`~K6wGL~`YC2;=l8fv@EuBq)lrzY8LZVmNH zcAuvu=GU%dynG+$TC#j_oW?eDF2~F{%m=5F$nB*9J4^pQUCyRBS z;@V<8Z@Qr#nK$E-K5=P3N3%RdvoiRMmOMOk4?36Yy3BkW*7TlAWna}5|{XyHc z(DELP1NCyl$fH#L{*0mCe$>C8^<%Ddmg@`680-)CiFFS5(*Ag_+=CvM;<|I76LsXh zy#eOEOM~moK;OO34~<>=e1T7$_deeT*S)lFa?t%*EH>%jMqz6^>-cjnB@i6^5`?VmDp zhR53_M{J=%qW6!ko`XDv%rmPsPy3vS@e7iZf;>~J=>z+~KZ{=;I&$;o*~7oPeMEf3 zmeI+EDpciQUhgdDXLXdH)lW9G_2utbW^!toK?Q@VX3m{iH8VbQ=lkhH$a%i8r&HVb zeswxyz7ME}f*!5?ZhiwxyY|_RmKO^L`Ml*I4KCKd@qoS=?}9A_W%K<;NA$Xe!sJn+OEqu+bwNj`D*JY$l0E*X9#79aj2 z{5^}m7x4EB{QU}luj22`SZoaQ)Z_XP{tn~sDg33DX^fpcPqOjsd0NNl$BhwZ@e8$= zx?P-(dYtOBjXYFrX;w$AoD}kGe6{^I*eT>)*W)Y zJW+S{{EuTCsmng(Pdq0XlUpr{>)FRK!ubFh} zKWk>t7Os!a3ewT;yT@B~nES5iKQl6KZfiX!=e4FMPqwDhFZ6!DeqmMW96zfs;Y!c) zBj@T9T+GF6_yo!`&i6Jtoi^VG)T7lOm^tn^KW^*~8`hJ5(O|wBd7#+Hx>6$#l7EG? zx9|_?;Mu-xBlj~ua4qvg9w7h7*LvaKS@M&A)TK`Ncb5F*AM+m$eN*#~y48nX()?q) zhrvbiPxlXb$Uml2fAUYa74T1|2mBKq$Ume19?d_ghy0Ut@=ww^mJW0pi8l0nE(+$e zk%x+nJX~s|p3mcvw`D#f9phG)ZRA1b2d-y+NIjp~$Cmkgmi!z~>e4cw&yt_x$^3Pp zKk4zLZndF~-8i4M-7&TtPu)M{;dnA#=Cf`q7*CxZjHl?p@ih9sq{mb0;dn|q$5YZ} zJg1=z@{|0VP5p3P2meR?$gRiW+n9&EQSe*5J{&6OMxVs|^`V)%56piU{b3(;oyfyJ zFkLvI>k0ax(}O-p{p^Ejw_`W@B&mmekaYGz(#5ZE?Ko%RkGD^XPuemy`BVLjg7yB+ z!F)#PLtpgFDbFbHS!PCRnZkm?s#)`9Rn3ae+WCH-q2%*t;Ud*|{RMoMGX9`s;eo1* z;8{EAw&ycnastNv1k&EY^LNsHo6oPwofA$?`}xjO)104ku8jRvMu_YA(9y^Rd(c;4 zJkNFm?j|@RgnB@C6|}pM;KP3%)4i^aSoH_){UL7jQ4Zc&5Gz_$pxP z&o)m$hfYZA^v;7?hZE4@a^TAaLx(GXuMi9!t^~ePFm%WQ&JqkAIs~@vcM%L7 zx&n6v=6I!B2ihmyI<)YiX9zY39gT{;uEI(VI7e^}R(!y@f_sOY-oU*D_YGm%gpT$R ze09j_3*1-mH6f=Ta6iEVLeAB|R|B{30Xk?-ioKwN=A>ZgpgAcRI%rM`h7OvOf}w-v zq+sZvIVl)AXifsR@F9&lwD2K~I$Q$(aKwEZdy2>Xu~>s7-DZEBmM8sdwRB?RwRB?> zv{bQ)T4u&3K{}TN{^cF)V8Ooq#oq7YTMFQ71fOu7Jm5URN!J+wJV03IbmY)+qm4mPJxKnI)CC!mAP=@Zbw=JW~ZU~~Ee zbg(&n0y+q%w_u+m-8%emOZs&P|8QF)KBIhmJc3Kd{TsAw_GfB&(x0WJ6PvB28=Ir0 zip|wBGd2&>3Ab{p5H>Cpo5P-ifd>nQJ&S;g1jC*~fQJZ%J%<7h6%2b010E(A_8bm8 zTrli80(bH?~zv72Bp|W^6m8&8d;F@kp^b>^TZ}lwjC%H1KG_ zu;&=yF@j;wV&G!Iu;*Cdv4UaGalqpQ!=BdyUkhw=%H}mWW%HVxvUyET*}NvFY+jR7 zHm}Jko7d!&@Y?27x^%EPl`b8GQ`o}Qe(C`&oBcgnp7bBo(uwWW(v4yLhkPnlqh)67 zVMv=(<6-0RVsqGY0`LUEu;)bJiGpFzNx+i?!=96YCkuu>rvOh840}!mo+=pj^ng8J zn^QKg$tj!H=`Xp?8jPW#(n~6b7~rFJWXs4dtL{8onY8=I`DMCu;=x_*9(R{ zX8_L-413-He1l-vb0+Xi!La8n;90;nr)*x6Q#P;3DVx{il+9~$%H}mWW%HVxvUyET z39oHVrAr5!Q|Zz{IQ0w7k>ma^wQTlZ((ZFzh)Oc&=dBa~|+K!La9i;Q4}K&jr8>1jC*SffovfJ#PfQ5!mLG&1-VX z<~2EG^O~Hpc}-5)ye6k?UXxQcugNLlwauw?>0omzT{W{!AFH{q$x{W)eXQoI z4c-FGeXQnd488}L`&i8b46X*|K34NUgSP^6AFFwg!P|hjkJUWb;O)TN$7&w3r)A$F zojSPCZoV6OMoO$Nt+x!zYL2FHQ9-dAM?R{?Xqua+3R1(@r7Rc`P-z+CUE<$Ij*!9LdJ zcf%>1z8l!)v=3}^ItFZWIu2}ex(e9l^cG;7)As-ir+17udoStK;RSpya4#*LI$Zq1 zhp>-zanKLgnCo1%(%77w;(A}*YA`v)^}bqdFgeBbzFKQAImPw9y3Js6itBx~-e7Wy z>wUGsU~-DMn!HDX#a`-3F6WT<@!x!Q>Rz`>M)da*FGHb&tX1 z6xaJ|tHIwWcEgUKnb_tgUilT%#ps|O7xr?}o% z4;f5OalNk|PUTbzuVM2PUIVA_8aRd5z$v^2PT@6h3a^2M*EXlprGw3>bm<_RIuiJ) z^V-LRz`|3*ulT%#ptG_XroZ@<4 zJ!UXD#r3}WN-C#PcnzDU@ESOU*T5;f22SBMa0;)1Q+N$5ytX-&E*)%6rAr6l)Cbzf zvN$!HHh$dLoSd3N{8fX=DehybuNh2EaUVrQ|Z#d=2W_LkawnRx!LLW za%99NbvIDTC#E-|*)QmLE`dJcH$j+}1IZ<;PTAk;(Ft9_W(E^4EWh<;QL{<)=Js%HQyqDZk)5rudwAWclMRKv#+d+eP#J82fmcS@;%1BoWb(FZ+Knz zeL$JfeaLM__c2vQ_el>J-LL=B6`3sGd9W$pbCN0FcaAAPaJeZzY=bF3cDpG*?oi2fx8jJc+va9iWU0|s2PxdAww7dyEb7}CMJsByQ@e**J1 z?neDNusdZE9H->_%8BsE82g_(#p55@IRT<3bz{NcDBj{Lp7 z>HbJuk3@POZ-&ot>;&BD#iqm+kXJxvL1saAf$Z|)v4rVgQ=*gC$2Wa!N?h*s^G!cd z&g<`+z8*_l>E-#Rzo^H{5Bl4b=rcLYob{JEFdAGL4Nmp-X8OgrF2;2~Z?-=U*W$$id?A_we$Mt+%7kR7vg}7db{6oAo zn)_zHG$lB{%zSA|aDJKjaxB65W#&s$g7eGtvnj#(W%}8a;QTWEJeJ`6GW~2yaDJJ7 zHYGSNrk_m-j*IE%u>{A(^s_0!aWVaDN^o3EKbsO97t_yU362Xn|8dltT>m)gE$1fQ z^PFb%9M5nian|Ako~hJx5B@v#Y-MnkYI1OX%E$x7M%I-YY0f+18t-!L%QkX9^8nW} z52QKoc$Pf0sdJV*PUyAoEWB}H)q?q|@vG4-eM5c;?j z={(!x@T}oNbCwC;PQDNs-=n!cBt6M!x3loE0Gbd8s=5_btuoXH$1E4;CxQ0 z$@5e<ccZj3cAg~d7k6A-{~4RLZ75C4|yMF z4!h04`5x+pajWZudOKb1D*7twg|vgz3$p%l+`D3ctLVEjP@et5ImK>saIT4ZA-K0roXPBtamc>b zRU^HPeh*ytn&hfc-X=W%wc5+U*{Dp6WAr*VGTPhhXCq(KbJZBn_vg0oVP-c6=can% zUheg-D)!=tJ<<1tJ{fLgthdFV7u1LIqTL*vsp^G%eP+38oLB9WC;cvPBiDM{{F8Ql zc*ea7`TEUs)p&1*{*1X2xXTb%@w14}x-#x_(65uIhaBc-6ZxNoyyPuEpE~23{3O3Q zw%KomBjj@zq_h9zEytcw2&M?XbgYL1$; z{nQKhYH9Oa*qofDuVBA$rquWflji_^m~hwj734)9@Pa(1uV9~W2G#hAplw?}jtSaq zY1`I|zCw>Rbl1KjXxr9raGYn7pJ}l5p`T#8w1xH)=h!a&gdS7z6Q^lc&Klb7(qHIy zVMCrlq|e}d&BuJ5;_dO}Okys^E`N!urh0p|?-2Vj-h=p$0&vFjYJ7FNNUCb>1U>^!};UeNd+GR##2;_G`Z)<4M0l-HM>w_1+i# zzQ|X!)>SjSTJ2Y)Zmyll;i1sy2Jcb7AMy=d@2Z*J0Y3@YMVZ2 zGx)8aSA6qKLR$`pEoXUO_Ongep<*KsUyU&veut}Odv!jaX{u3Ho@q)=o~LNzk+AU` z?+L#@@{hdBRdcUkLao$?{C+dSb4$(IXZwt{@(QSsO^heYmXR^CLfoGR<)#o7Wru{Vh z8J}OIJ@Q3#vfHK)(qj$%l6YP@*Y;+@R%}n6SQIPJmf3B$5jixL)yQ{_|vx(Bj3aa+{lgI z|I^PBQ^6V9ej04g=M!V=m-O=q+n#d7sZGGA8sRtaB!Gr0s^R?2SCqp+g-)IAbzHa|KYQW)K9-sg#5#ipZ121XM@!%M_GjS!Jb08RwqAcNedu>SO8U?j?FPTi^U$$3_T%{>A7eLu7uF$=6F=)J$2s;!XRPb8A#uNs zJaGelWWuiWNqgx7f@de@L;i#09qrmB+}`gRzR)i~eUl$_71vet0en`XPbx-!oDtf` z&pP@oewJ~bF#S?ITa=pn`nl$KVC$2xF?|zt=d+XeCfby~repZ+7=07(BaT9MuCrJV z>N{{*_&DOwnK?$^N)D4x$H5=^C-w#L7L0#7gKkcsTlN`rqn~0Q5v!p){S@=DZCgKE z5Be!>2k1&aReXkanf_j5LVv|}<=Lt9EbY=?=`jra)gX+=)Q4R}B!!JJ_X~lieI9EN z$mt1=Q&4~4w~CNH<8QF;fSmc2pd5V7P~h2LbrsiEVk7#mVMw3*w^)Nf&i|IHMnEq7 zj;q2MDwF=p@SdIP%YwFNP!ZV=`v+V_6ybnN7U+ z%W&jhS>|Xs@^8ei(Vtj#!-xF`)Bipkq3`17CHYSOOup0qlBe`zQa}9|{T;_fa|OPL zzKnB)dwAWclMQav9ByY?;9^+-m|YSE0$mW%hwglul)z3`yKyfbdQDJ)qUT7>1Dd_pY3^N z2Fow_wkcos6H|WWQB(f5e=+6ne9M%tYG=ydd$}q9KyOq2#%EsA?cVxNuj{eCE8~B4 zJG*i&%P^nopX@%GXN6C8A7wuB9Sd@j?^sOx{O10D&6qxm@LtlQ3(R{;jOlX|@J)gd zPv>Ufn*}4DP6=>{V8qiY1uhkgcsgakWr7h;XEE?%!HB1`1b7K>OAMD#$l<$Kvn9p~ zbl|&Ka|A;NzKbT93Wg4vlY*gx=A>ZgpgAcR zI%rM;x9}m2I<)X1jXIdKP#Y1`XHyW6h*msrK`!3g64ZK?L-LA6+c#UAZ#j+N7t>7xxSqHpM@I9_`8}My{x4KRRaE0LQ zuCpF^J@8mO2k3Y@(81>P3Fu&R`UG^aIeh{;*qlBA9c)gYfDSgNPe2Eo(9n2Z@jX_MGO+ieb%|T2bKZxlQ_f1TnE%^S2u`;T$PfWQDd%yT6F@4aF zO4ztkYz}*F0Nx-N_PibVcEPac9l&=8hCMd|ZxjrB-U)oCVAyjL@Fu~q=Uu>e0o$Ci zc}-5)ye6k?UXxQcugNK!*W{GVYjVowH8~}`wmFq99c)geO9$apTytbo5YuO~zeQu; zH!*$UeznG1d=t~B+BY$Mw)rVBeZZ;Bu<>TGIqZ2i@ZEx8PaoJ94130aV}fDNIB;Ar z>{$g|B^dVH0=z{q?0FCHJ-{}nY+jR7Hm}Jko7d!&&1-VX<~2EG^O~Hpc}-3UuWe4H zO9z`%>C!1~ZBC_22b)vr(m^rOIqdlv;Liw#J?{g)PcZEHS>VqKhCS~GzF#ox`2g?( zf?>}+z)2yAo8<~2EG^O~Hpc}-5)ye6k?UXxQcugNK!*W{G&+U8Wcbg(&< zE**qZ<}B2vAg0geAf}HW#Po^#CZ^98|7hT+OiZ6`eo9OqaB44XyjN@vdp-pGkYL!e z2DnBr?D;V8!-8SYeZcz!!=9f5{+wXg^AX@j1jC-62mU;;%_*DLBD`j=HN^r zWBPC(s~KksW#1y5I>^37I(3kJEXMR%im_488Cx)>57+$ad4n0#hiiWIqQQ*m!!^J9 zxxtL-!!^J9rNNBp!!^J9JA)b1hiiUy)L_Q+;hJB)(#i+MvLPp%15Prg4>{Q!aFQ{7 z$jRn_lZ@#@PBsUeWK17&vN_-+WBQPj%>gGF(}$dF4min}KICLGI4OKcrw+o0bn5V7 z?PHC^`Q23*TlJc;_b6bl_thH)j|S#?U;VwoV}QBdSHCg17?|sQ^$!M*1?GBR{iDI- zfVtjR|77sBz+CUEe`!50JQC-3ZBCB@wmCf-*yi*YV4Ks$z&5AH0^6J(2W)ftT43R{ zoEJ`~4su>NojRB^=pS$&D>#ErPH~;9{$OlQPI0}j{%9~c#r3}WcZ10(uJ_e{8ca@c zy|4buU~-D41>ujuJ={D2OWt$#`&4TYv2@K1E=sBIEB~1DZBJu0r?}2l7Z{t9Q(W(>iwq{GxZYQvGMJp=dS6{) zFgeBbzPi+4a*FGHb(z8B6xaLea)Ze!uJ_fIshmpTHEf>3Yv2@K1E=sBIEB~1DZBwf>wVSLU~-DwR^V!Q>Rz`zps^a*FGH)jO3_DZGZwQ+N%W!fW6ZUIVA_8aRd5 zz$v^27GB$&N|z2cr_!Z^aOwl?V_BTqOB?q!HYcaJkEO0Qn4IE1mb%7Za*F#{YJkDy z6!)>zK!eFC?qjJz29s0V$5Mk0Ca1WMrG}((Duvguc?z$AQ+N%W!fW6ZUIVA_8aRd5 zz`|>rQ|Z#d=2W_Lkn_DFe~b5lYlm&=VV_5C;yc2_9_`_D((iQF^SwU4gGG#Yw-05T z^utcb@kcp}ULLvN5MnEBYkU>u_6=Ly!?|HwBl95dQxbQCc^P|Tg?(4gy$DB-)5AQf19KUN-|58(?o)T$?*oKs6IrgKr z@1s4QKso0O^)sFIWe0V!-g$UeRrjUu;vP@n-C^EqTfh8vCG+aOK))ogw{d>R6&$jB&mj3PE;P*87p8R0gp%8ZXI{GcT&-psf7Mu7j z92>@PDZJIhaB0bJ>HowT^7CCo$^TBE^FHeQ>uym@V%hW{Zps+iyUA#&F@a#BoU? zj*IAf5_L;VmoCm_FDG7XA7xyZj<|mK#irhj?IPEF|61ziyVjD)Ed9p=>33SBE1ypiy;>yw#QP)rNE24!#?v$ zd(68G_m<(_P2P9?6}Vo3>zlo&^chOgoxHpS_ijO5C0>KS3fHTUuhcuD^R?86d~0xT z4ephBKhS5Qc&~N=_&n11<8`>d4)+&(Kk}KTeX_ZCxB~Yo(8dz)$G-OC3m)x}ZSp)- ziF=hOx72&qza7`N!(7(<58AbLvvm;u*FsOt|1-3Ew(*epP>Xi;d^k(HG9T*byTJR6 z8262^(K7D^e-o}Z!LG}_pZQ-8xXS)G;Q40U+l+iGykF?p0EV;25Dr(IBj?g+v1#|oDF22mb#F~@cCGR=M4>?n-ZKK^vx}G zweYzq!TDjh+mz7r!>$kc40n$uI6veppy6y&g7bv$_1pZg`+25*+5I?^zGl9is-LZo ztp`63KMH!ZeAcZ5U(9dL*ZSas4=$|qUP62deSY`=e{=o|n9p(Ci{suc-mkQ8IK1-F z9{e48+&j1h_qKpvw|X!8)wr%kzE$3^gT_=Gz64DAz4T?Y)On-cU@rcX@?`YQ1W_3&%jC!C``;tT5F z>*x#Ef9QU|*2!*Ld_g_*qc1qu{)-Q&M|;`_oTWYS0f*`D;RCk87TaKp)!ys=4qWfR z^&0QZ79YTV&CYX0f3Hm$w0*UowxY?ATuQOxQ zlpxp5*fb@`Z8H|f66CTOi>3s*Z2I1m;8>czHzhchrmx2m96S0Ln{T$BwoZR(J?LXT z3VO`$_$B?D?-4$O@EmBHxF}JiOS}Q6qls6*^P!8r??yIy$NX%QcBt6M!;5hCanbi& zb*J|y9e*aY-aJ|&zdFm$Qxw+9*o4o&NiHpK>=cUiMk-NM<>$oV?r)>$I)>6|+P2$cOcdQ-aNhT3j6>-UUA5VJ+b_ZO z;-9$cZtu8`Qy}(|7${3$bR)jk?3W_n(&zERkKRcg_r&mFW`t*a%W!Ym&s`Pw9K6xg z_k|URKTzel_zm0EG2+?Y#mKkf?_9OTQ!(=77Mwr7$IFcITyIN#IM=%b`EGf|Rn=a5 z{4Vac#(BUzUrgUcA0*>W|HBvq01{m*>lC2#4!7T}uvB)@6bQp6e{pBEyX{U>iZ zPK$9KoqW9!=^P7=3;m1D7h4Cr%@3g8J}Vm z&XRBSE{tWHv;)OP)-A=jt$M>%+q{coj0?ay*%Cv^k_r1&cO725h1aFPCsK2!V?ZMX^? zclt-+IqO-6dN_vkhcb5bH#X<&_W#mt)AyvSZTg{+;J1EeDKgI`{A}0+TR!OJ#-C9+V6*WkE6+8hCXbGzv>7&j$m+rJz0XMRc;;rXSK{4gGW+rE_eLaY@*o zJhw>y&eR@#j~*|6mLQJOX6WUI!jXr)yx3}t2@YneeO^A|cn9?c&nG_eZMh&E`J7h} zTZ4S@a9BO!6>9$?t~`F=KF_r7`y6= z!s<52ZI^_VgLM5Jgt?y@M+tVNPnu015IjF+oBRjKJK9y^C{>`o9hZf593}dImN-ht z&+j7mSx3Lc&oa)FVx;r?f>LuoHI5Q&Oy5M^?Kn!bDSeH^QR02XQ8F>!Ono|z5vHUjUxI;{A7BsQY|x)bR&1Hx((Ns%kCytVg+53s3)KYhwI7Zc#0zvI|wuD}=3mvOFed^lG) zKIAyZgX75Y_%P!rwdLxW^lq!?UrR^&wvW1w^mm)_UGx9_PSF4TPEaLy@KOFPB=fBB z$^N}3nU8IMBj~RgM=64L!6G=*$e1IHqtp$!n_$E+>JHppFk%?ti-4m&1S5t~HgL9J z#4zd!+*2@O81(|~B^WV`t^&RaxFv2*D5QT=zLUhYfex1=&*g%l!xg|+2!;+<0$(W@ zI%EN735E`xfjbL^4qbq|2!;+_fx80Bc_YR!>V&j(=pg5f(y4=-KVuxFD8?Zw_PQ#h ze}6kiFk%?x;(o4R#4zd&+*>eW81(_}BN#D^`U3YAj2K4!fcptX45O=ouLf@619Z@w z6njAj%}K$~L32_tbkLj>3>`El1w#kTNx{%Tb5byL(3}Kr;X@jAXyHQ|b@;G;Q$FJz z^PBSh#oq6l-;}>b@Coyq@_B-j<~QXB2o9Ow-p&^sHov_+P;iF%?d<}=?aXg)4+3sE zqYWKwPM?4dIpCad`UG^aIeh{;*qlBA9c)gYfDSgNPe2Eo(3Whz$0gn?5dtM8C zEwIfgo7d!&&1-VX<~2EG^O~Hpc}-5)ye6k?UXxS8YnxN)(!u6bx^xgune%7IgE&gf zK^&!%K^!F~hW$GDB{zn>e(k4B9Hq=yN*pElsqwJ!c(FO`IRSWrVAyja@I=9|=Oo}s zf??0ez>@{To>PFQ2!=hU0#6kTdwRegu+1r(*W{GVYjVowH92MTnw+wEO-|XoCZ}v( zlT*TLn^Wo1!RA!DbP!G*3H+2ff7TqtQ92pKQF3A?j*=TQag4Qi z)5PYm=XJo>35GqV15XzWdtMKGy9Ko>XT;REaVb6KM^8~}5^MU6JhCLSm zFAxlSE(BgE81}po_(ouxQ#P;3DVx{il+9~$%H}mWW%HVxvUyET*}NvFgx5Bw(xrpV zsdVY^agU=E0jJb3vyWAUxHjA~P$LZ90?a)FHOk<7fVpR&#u&^vO58J0V-02;CGHuh zYYk=`CGHuh2?jHc688+$q}F{4#t-U*wC27h5926tAFKIlgBeGO`&iA_7|b|I+{bDj zU@+q-aUZLBpuvoz#C@#hK?XC968Ev12OG>dO5De49?}{&C!IRTzC}89kbNx1QHo-0 z)D$yTjHARgzw!)b93`&#)pZ6ljuO}W>Ux72M~Q2Gb%Vi-qr^48nq@HKC~?iN<`~R4 zN?h}+d98e4{2+3&Ip8GYD3O!R0Vf$piJWW>ILSCly#?6j^gY1BX*r{vP95Zob~<%1=l(z7K2~tC!jRa!O&@xV{|CLXvGtJdjm z%)|p%F%u6wGp6H#Tb$xLSKVoBPEK*XukJFKoZ@<4-EA;A#r3|58B9)by|1baCa1XG zSN9l9PI0}jwi--MalNm$r*bNV*RXjCuYpr|4V=Pj;1pg1r|=p$h1bBsYnxN)(!u6b zx^xgueV~0Ti&I?Zs(X#i$tkY))h>g{DX#a`XACB%xZYQvHJF^@dS5+YFgeBbzIxDL za*FGH^^n2j6xaLe;Z#ng@ESHx;WcmyuYpr|4V=Pj;1pg1r|=qBcx`hkT{_sDN|z48 zsUv}(IucUG+h1alo3a^1vcnzGwYv2@K1E=sBIEB~1!fTsT>C(aGRJwE! zPJN(#EQ?dKY2(L@&B>`b#9uX-oZ>!~`kKMy6!)>z-x^F#aUV;4!(eiX`&jB*29s0V z$5P)mn4IE1mikUAr&4$go2T#^IEB~1DZBKOl-H6|_*^Z<1B+gAU zj*`iHN*txl?^}%1EK4X8HcI>z^a>l+6ZRt2lQJ#-`vVn1wHoy**b{r+tdAc}Cm47sG zlv?uRyq_7zVnKsgu3~z^(k?b zo;#J(ho1wdH=ZVr64LY;26Hbpj?!}hpG}^#$5BGNEu2Of*g7?i5;(8pDB&JBn;J*S z&T{Wjq(@t^F4s}Hz zQ{yP19nI&nv?F{zjCN!`*?iX=pX@j-v6N8nSz{?t=l2;)DT3Gz5yXAy4%r>DC6*F> z7`UB{^lYT}g6xGD5K+h|aKszh8czc4F>fyJ<>Fp9Z@AwF*L`r^-5c2&Pl9#g{qlae z*AI2|@J9Rnaor#Jvc2NgSW2u9`SNft5BGX{<62`Wy@q?cjZe(#Z5-(-nYw#P4rG)(S$DNR; z6JjV`0eJ;uDIuNyx76IP&q8_@(z`%*dC`ic1YO%;QTg`Rp2 zoT1&bjYo@*N4t7XoTXiv6Ls`o@asb{u0vs?TyL5`9M{8P*WTXrmN+W==${ZDWhCy6 zM7}=W3>~M$aQ0Z@N-wW9mJ-Jtc@2-75?NlpZ+P33=ms^X*jqY<+A!_?h@o(1X50$I1Bv_%hkVCKwGaj0P9_ zdNciOlXjrk$hu5fNqu378_Ol(xTEp9>1pQM>EG6hDKEZJLSb{!Ep7HA7Gqg`IbsR99 zZc5NsnLafo=&R(}t{#3(`-F4UM|?pYe4UP^6trjSWVbE8pdR|^SW4&Ep7?-zw5NT* zS=tjHaG3rcK42njF%h=7##`u5#`R=e=Xr}-d;t4FA21d7row&$yqo=LxSoc5`Ce&j zEG5>5eA97nI_?eh7W*@BJp6Z-}=>`zXW3rUZQ({e|W+>kMKk89p9M z(BIJ)NGv78&87r>of(^^1i5a;rYS*go3S{SAeYTpG$qJo)AyzX$I|q@DZ#NceLa@o z*wM$>e6#hmb^1%|K_Bx`&?Dr0sd2#@wT->DM5}Rp9$AYa3sut|(=^SXtaK6+Kb+$J2%*bp!0Zx=UrLVjl;%_-a zhC@zmc23PU=TV$Hf2pB!&bO*MJFhl$-f`2`n|9n(-E;e*>dsjQsuo2TRV{LU(-3mr z{LZ2si?-pfx+FQjDvq}1cUFz_JI~)YKdBnA=5Uhp=kKGuEdD92@BFF*RnZ;Mt7ELpZInM{TzB`57d*^_0H zChd+p&!WEP@b^>vJ&(T^@b@DAeulrFH%v)RK^e!Hnw*L<$t3V@H0w-_Pu<-!&^^00 zrzSTiH=dRI<*MA;+G6hSa&IF!l?(3Jdl_e3R|IQkDf_<4U@gM+hm<)}OxH8jTngn7s_-4grE zj&)|oG_zxwohfD(<4v3{M%I~PVx1`lR@<0YJXbAo$n3adc6>3$dytr7f6drYw8ckR zKl0$Cte}Y_R+b$ji+q&$OLp8P)*~^N?6^v-N8%^hv6EQO>0%|Zp3}uf z;{0R`5{Y$X$F|})OAISJeiXSW@w5J#v5d&gkBhG4>BmJ^;Vii$v6<{xOyth#VlHh) z`srf+FdoY3V*I>UOdr-|$M88@tRCiX8=Hr`lvq!8Y$w{`bTN}YO7UsP?T@qm0WyKyqL4ahsnyw&unt~ywK&B;mE&2s-xk^ zzY)I{j{FDZ--jdg^KIh}(Z3%LVoIG1Voym-Avz&Pu@Tj-JGcLqUz%G1@K8SGW^yIlPnH~4OL9)3?jxB2+u9=a~vt3%v5 zrty1Grt$ku`3{KR$x6O=u$R@gEWeX%o5nHY+AsraLs`4YdT;#X&6{OB>z~7T;yWw7 zHiNXk6Xf@4zz3-}zem*Li8Q@NgG7J#%XeXvzz1YJ>z)h7Gsw?2n4fi`d@ZHUAB-pU z(Bm28FAnmT2KgzOU&gZz<4?Wn@x7poXB}+FapHG)GM@E88ozIo@wD^Fc-m<)o_1Oa z2VcXO;X4h@K}gL(gAen&7+}Le?A7WqLz?CwB>K?uJyNN`hh)t7J(T8Pke_WZKkG#K zdP?9{4nhw-W#jr+x-qn%H5 zw9`aKJB@QB=l$-VtV-Prk@>{;M^3xv633p^2iQB=q-sB&dp%CzPaw5FF}N<#oo4uYOj}zo!dF?xbJp9Zn4)Rc*t{!<_^SBdZY`o54o!HtaYZuh$*J^NZ$jr2D9J#gJ?lB-5}n|!WuqSv{R z(cWf18|hKcRbxEg?}_W&>s?js#eKHb2XRBkdRzQnNbfVtRpY#BU4K73BVOxm^RGgB zzj>}2@9ogv3-B4M(=~2{-(7UVy-ruVir-aSj_WH1xQgFZ@VPl_up8lb6IUQTYmlq> z-Nco+?lQzx{4RoP81|FjMPwnJ{p5EKopH^6vj1!=8?i#!|1L;pKiPlQ-wV%q?0;9J zv(N0m?Dc$9^&<~H%Ht;-BzO9M%8g9$cKT6_Z~q%zHPPFx`H}~|O!DsYbC8}_;;PBs z{hBZN;L8+mkDrV5{3WiM>h0AWEI>V;SL63adcks6P4o8YdJ0j`b>1U>AEX!F>Zbc(gqTd(kMQdF(!>iTx3`IRRc#r!1kUn(1t7du!G#|;MDB{PG^PDf_IXTa9 z&INbKdCnc-Jh{^c+#%;VcZBn-ryuGe=Q)pr^Q@;o>fu;$9?4j+o;=jUvEZDNv0y#< zsE1?0IVEEudp#cqUCGmr%eV?>=@W;8JF~nm`&VP!hu`5U{rg}@9|`X8cfk51edJxP z;_rZsMEd9|H^Seux(4Z^W3J-wSB*ycxO?3Qf4?dZ>BaZBioaJ?jP!{QxDoy?72BGM zwm46wp>EbS{Z9B=$QgIL5$2tVy6InMgXhdYH^@If$iEQz2cZ6O+g%-pnS3e$pEy_O zb7ij34u#+o=L+Yd%#}i<7u{y&3g@ECl_I1MN8Ow&oQpD7h9aH)<@m6zacGNmk4N3C zYvM}yV#vv>@!1186?K#Q)4+Y^pC06&5#*nV{G8JqXW8rdxav>7f1Jl(=0HB|F@6_5 zk0B?1)-|z!1Kwo@{3Hh?u5$tMO@7c-gCM6q9OMT-3xTJ79-q^Y(-W>Lf}HU;_-EzkM?dlD~5^9Fo6TGXj#offCM8nIG0(4?ib3clo)%xk_%f?DYiP<$PfL zXzum!bAof1zJPN~=LdJ`12{&U^Bg12agG7!BF8}I!yMpPatt_@90QIO$ADwSG59d! z1AkKc#(DKg?VG)3)W3C%-`UmcY)HM%Hu!M1!DgL3ELdkFO|P>du@-3g{a*R4-j;Rt zuwb1X@bbzQh8qUR5i9v*Jnb|YPdhDzg9|Wb_$^S)K}gL(gAemJ zOo0st7X)L5H1xTy?eC42A{~8d-RoHpj9HMMZQ%Yv)=5rL>iof&@i%4lm<9QZgZ!mI zeoE$-G2`#H>b)M=QpSwGLCUe7qZ@%K)d#@{iOF|+f@nAvGEW_FrAX8ip~w$0zH zY}xBs8Q1~$TK0O-Cv8`Zo#@ElNYwViJ<*ZB$H=t9NE03He4?YBCOX<_ALYFszFWq3 z!=ijA>`KPih3w2YwO`inkLBY1u?~3m>`d>S>Gb^IJ+$IL&In{_AlEs^^)n5=zbN-w*Kj!-i4GQTpAn(aF*J-7#SOp4sU5==XeYjrsXr z+L+`RT~5EZHhQ#qZ*9z&-AoVeqrUr{;$*STS6o}Xy9dUQ@2*9mhrGKsE;&x$AD1(3 z_udXqHN4+<_)^A#@Tm{^9X{pQ@9=fRd!S}4&hQ>z zhiAcqc%uXUDg0&i%+GXjJ}nF9(;Nki?_SBxtZ=q}U-7oa4n4CokK=v^+~0X0@LN4I zf4UPA_Z{aajXQse>)FR*>ezo>`gTTpzdi0{I#(P`fYVOjQm#zolsyX@tx!E01z7*F-cm+0v@S=VR!S##Rs??SA3JN_yQ@1QE4R}DSjWI$#> zhEOj2TzjPkx9^ZK_`(icXQ-#cS-+!vCY<$q%ICsa|4jK}IE&vWt6zq*g71^F_xW2s{4*`5(XGnSAbKJ~yM!Cm>HizJqe_KG)Q)Ip^(5PvjiG z^i}ks`5WP^*D1dp&U%yb$#B-MDZkIYK?Y;f9E?qKFgDE?n^)0?SHBf@UW0u7J7MQ_ z$Tz}bp%&HII~?hoNbW9Qf3;WN>|{8Qga&*bw*M#yPLiQiXfPl?}ExPTHb#JiOE z9fdF@em~(t%1cAeMU~R2>*aMDMBGUj^bZ9)Wf|bqkCu~!M53*pZL-5E`IpGuWbKZ+2zk38?a*S z>hb|g$|_cDC|k39K;_Cc%a^PuUo&9c+9f5ot}0nwUa>s59%i;+^YuDqgh?XBf&#u2PoT2xpvuwdxMg26>4x0h9{DqoYg z-kG*?&8_7X!?VX;H$HpXwDH*mgYxq8^0WJ_sH|MKe)xa^%U4#e*swTn$=cNeW?p~o z>H(`(-M;#obroxGDPK~Vhnv}hhAkPocuC>lL1lvr2MrriK6v23WlPJ3ELofnIk0Td z;33O~78MRHxO&Fe+1E~meFl@=v^1}RrWrK$&licg|(1gzIem(l2t3$u*R0_5?uoZadr6`6h=FhWs4zKAl+OU>69#-KDcaUWqCzm z!TUGkg!3$xck5E8Wc-5ZW3QWXtqjKbOO8869X>qxCl`U-*j{cJDf3BES(c`eVlr<< z;>d{p$9c=Pq&k_;mmuxW$>droFY{JpRF`h8U&;r`h=I!a+j?K-xybAwqtq|@i|m2> zVXM6O6_Mgsf@*Xh#<2diX&&)wjQ&;@eJSPJ zQnYRP1wr}zHWduQz6bxUZ<8+NMXtnsUbij(<)A$F>QeqmIgzxl*v`&4Im_fI4HC&y u=9BRg{%%5=j9rjbi@KmZ7SbvIq+Lm@#>MH%7h=DM|L)TNxBl0*{Qn0d9rRBC literal 0 HcmV?d00001 diff --git a/hsa/gfx1250/fmha_fwd_bf16_varlen/fmha_fwd_bf16_varlen.csv b/hsa/gfx1250/fmha_fwd_bf16_varlen/fmha_fwd_bf16_varlen.csv new file mode 100644 index 0000000000..ad14180b78 --- /dev/null +++ b/hsa/gfx1250/fmha_fwd_bf16_varlen/fmha_fwd_bf16_varlen.csv @@ -0,0 +1,3 @@ +dtype,hdim_q,hdim_v,mask,knl_name,co_name +bf16,64,64,1,_ZN5aiter42fmha_bf16_pertokenBf16_hd64_128x256_varlenE,fmha_bf16_pertokenBf16_hd64_128x256_varlen.co +bf16,128,128,1,_ZN5aiter43fmha_bf16_pertokenBf16_hd128_128x256_varlenE,fmha_bf16_pertokenBf16_hd128_128x256_varlen.co diff --git a/op_tests/test_fmha_fwd_with_sink_asm.py b/op_tests/test_fmha_fwd_with_sink_asm.py index f2453c3060..e5df2226c5 100644 --- a/op_tests/test_fmha_fwd_with_sink_asm.py +++ b/op_tests/test_fmha_fwd_with_sink_asm.py @@ -16,18 +16,19 @@ Sink convention --------------- -D64 (_rxy_sink) kernels compile ENABLE_SINK=1. An explicit sink tensor -[q_head_num] fp32 (AITER post-scale) is required. - -Sink mechanism (from common_fmha.h::fmha_merge_sink_rowwise): - After computing standard softmax numerators/denominators, the sink acts as - an additional "virtual KV token" with zero value vector. It only adds to - the softmax denominator: - new_max = max(max_attn_raw, sink_raw) - sink_term = exp2((sink_raw - new_max) * scale * log2e) +`sink` ([q_head_num] fp32) is passed to the kernel verbatim -- it is the +per-Q-head logit value the kernel consumes directly (no host-side scaling). +This matches aiter's CK convention (test_mha_common.attention_ref): the sink +is an extra "virtual KV token" with a zero value vector, whose score is the +sink logit in the SAME scaled domain as Q·K^T * softmax_scale. + +Sink mechanism (zero-value virtual KV column): + After computing standard softmax numerators/denominators, the sink only + adds to the softmax denominator (contributes 0 to the output): + new_max = max(max_scores, sink) + sink_term = exp(sink - new_max) denom = denom * rescale + sink_term - numer = numer * rescale # sink contributes 0 to output - In AITER/post-scale convention: sink_raw = sink_user * sqrt(head_dim). + where max_scores / scores are already in the scaled (softmax_scale) domain. """ from __future__ import annotations @@ -111,13 +112,15 @@ def _ref_attn(q, k, v, *, is_causal: bool, sink: "Optional[torch.Tensor]" = None """bshd-in / bshd-out attention reference, sink optional. Pure-einsum fp32 implementation; lse is returned in fp32 (matches kernel's output). - Math: attn = Q @ K^T, scale = 1/sqrt(d), - denom = sum(exp((attn - max) * scale)) - [+ exp((sink_raw - max) * scale)], - out = (exp((attn - max) * scale) / denom) @ V, - lse = max * scale + log(denom). - sink (optional): [hq] fp32, AITER post-scale; converted internally to - pre-scale raw via x sqrt(d) to match kernel ABI. + Math: scores = (Q @ K^T) * scale, scale = 1/sqrt(d), + denom = sum(exp(scores - max)) [+ exp(sink - max)], + out = (exp(scores - max) / denom) @ V, + lse = max + log(denom). + sink (optional): [hq] fp32, a per-Q-head logit in the SAME (scaled) domain + as `scores` -- it is passed to the kernel verbatim (no + host-side scaling), matching aiter's CK convention + (test_mha_common.attention_ref): sink is an extra + zero-value KV column appended to the scaled scores. """ b, sq, hq, d = q.shape _, sk, hk, _ = k.shape @@ -126,30 +129,29 @@ def _ref_attn(q, k, v, *, is_causal: bool, sink: "Optional[torch.Tensor]" = None v = v.repeat_interleave(hq // hk, dim=2) qf, kf, vf = q.float(), k.float(), v.float() scale = 1.0 / math.sqrt(d) - attn = torch.einsum("bshd,bkhd->bhsk", qf, kf) + # Work entirely in the scaled-logit domain so the sink (which the kernel + # consumes verbatim) lines up with the scores. + scores = torch.einsum("bshd,bkhd->bhsk", qf, kf) * scale if is_causal: m = torch.triu( torch.ones(sq, sk, dtype=torch.bool, device=q.device), sk - sq + 1 ) - attn = attn.masked_fill(m, float("-inf")) - max_attn, _ = attn.max(dim=-1) + scores = scores.masked_fill(m, float("-inf")) + max_attn, _ = scores.max(dim=-1) if sink is not None: - sink_raw = sink.float() * math.sqrt(d) - sink_raw_bhs = sink_raw[None, :, None].expand(b, hq, sq) - max_total = torch.maximum(max_attn, sink_raw_bhs) + sink_bhs = sink.float()[None, :, None].expand(b, hq, sq) + max_total = torch.maximum(max_attn, sink_bhs) else: max_total = max_attn - denom_real = torch.exp((attn - max_total.unsqueeze(-1)) * scale).sum(dim=-1) + denom_real = torch.exp(scores - max_total.unsqueeze(-1)).sum(dim=-1) if sink is not None: - sink_term = torch.exp((sink_raw_bhs - max_total) * scale) + sink_term = torch.exp(sink_bhs - max_total) denom_total = denom_real + sink_term else: denom_total = denom_real - probs = torch.exp((attn - max_total.unsqueeze(-1)) * scale) / denom_total.unsqueeze( - -1 - ) + probs = torch.exp(scores - max_total.unsqueeze(-1)) / denom_total.unsqueeze(-1) out = torch.einsum("bhsk,bkhd->bshd", probs, vf).to(q.dtype) - lse = torch.log(denom_total) + max_total * scale + lse = torch.log(denom_total) + max_total return out, lse @@ -505,36 +507,6 @@ def test_fmha_fwd_with_sink_asm_ops_layer(): _cmp(lse_asm, lse_ref, rtol=1e-2, atol=1e-2) -def test_fmha_fwd_with_sink_asm_d64_requires_sink(): - """Direct ops-layer call without sink on D64 must raise the C++ check. - - Note: through aiter.flash_attn_func, can_impl_fmha_fwd_with_sink_asm() - routes D64 + sink_ptr=None to the CK fallback (the D64 _rxy_sink kernel - compiles ENABLE_SINK=1 and has no "skip sink" mode; auto-filling a - zero sink would change "no sink" semantics by adding an extra - exp(-max*scale) term to the softmax denominator). So this error path - is unreachable from the public API — we exercise it via the - lower-level ops stub here. - """ - device = "cuda" - q, k, v = make_qkv_bshd( - layout=0, - sq=128, - sk=2048, - batch=1, - hq=4, - hk=4, - d=64, - dtype=torch.bfloat16, - device=device, - ) - scale = 1.0 / math.sqrt(64) - # is_causal=True because only causal kernels are registered in the CSV; - # the error path being tested (D64 + sink=None) is orthogonal to causal. - with pytest.raises(RuntimeError, match="D64.*sink"): - aiter.fmha_fwd_with_sink_asm(q, k, v, scale, True, True, sink=None) - - # --------------------------------------------------------------------------- # Memory-layout tests: API takes only bshd shape, but the kernel reads strides # directly so non-contiguous bshd views (backed by sbhd / bhsd memory) must diff --git a/op_tests/test_fmha_fwd_with_sink_varlen_asm.py b/op_tests/test_fmha_fwd_with_sink_varlen_asm.py new file mode 100644 index 0000000000..8554908933 --- /dev/null +++ b/op_tests/test_fmha_fwd_with_sink_varlen_asm.py @@ -0,0 +1,242 @@ +# SPDX-License-Identifier: MIT +# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. +"""Correctness + perf tests for fmha_fwd_with_sink_varlen_asm (BF16 ASM, gfx1250). + +Ops layer: aiter.fmha_fwd_with_sink_varlen_asm (low-level, packed/varlen) + +Layout (packed THD; batch folded into the token axis): + q : (total_q, nheads, hdim_q) + k : (total_k, nheads_k, hdim_q) + v : (total_k, nheads_k, hdim_v) + out : (total_q, nheads, hdim_v) + lse : (total_q, nheads, 1) fp32 + cu_seqlens_q / cu_seqlens_k : int32 [batch+1] cumulative (cu[batch] == total) + +Sink convention (same as the fixed-batch path / CK attention_ref): + `sink` ([q_head_num] fp32) is a per-Q-head logit in the SAME scaled domain + as Q·K^T * softmax_scale; it acts as a zero-value virtual KV column. Passed + to the kernel verbatim (no host-side scaling). D64 kernels read it; D128 + kernels ignore it (pass None). + +Only causal kernels are shipped (CSV registers mask=1 rows), so is_causal=True. +Causal uses bottom-right alignment per sequence (query i attends to key j iff +j <= i + (sk - sq)), matching flash_attn varlen semantics. +""" + +from __future__ import annotations + +import math +from typing import List, Optional + +import pytest +import torch + +import aiter +from aiter.jit.utils.chip_info import get_gfx_runtime as get_gfx + + +def _is_gfx1250_host() -> bool: + if not torch.cuda.is_available(): + return False + try: + return get_gfx() == "gfx1250" + except Exception: + return False + + +pytestmark = pytest.mark.skipif( + not _is_gfx1250_host(), + reason=( + "fmha_fwd_with_sink_varlen_asm ASM kernels are only shipped for gfx1250 " + "(hsa/gfx1250/fmha_fwd_bf16_varlen/*.co); no GPU or a different arch — skip" + ), +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _cmp(a: torch.Tensor, b: torch.Tensor, *, rtol=1e-2, atol=1e-2, msg: str = ""): + """fp32-on-CPU compare that hard-fails on mismatch / NaN. + + Cast to fp32 CPU first to avoid the gfx1250 + ROCm bf16 element-wise hang + that can occur right after a custom ASM kernel launch. + """ + a32 = a.detach().float().cpu() + b32 = b.detach().float().cpu() + torch.testing.assert_close(a32, b32, rtol=rtol, atol=atol, msg=msg) + + +def _d64_sink(hq: int, device: str) -> torch.Tensor: + """Per-head sink logits (scaled domain), varied across heads.""" + return torch.linspace(0.5, 2.0, hq, dtype=torch.float32, device=device) + + +def _attn_one(q, k, v, *, is_causal: bool, sink: Optional[torch.Tensor]): + """Single-sequence attention reference (no batch dim). + + q: (sq, hq, d) k: (sk, hk, d) v: (sk, hk, dv) + returns out (sq, hq, dv), lse (sq, hq) in fp32. + """ + sq, hq, d = q.shape + sk, hk, _ = k.shape + if hq != hk: + k = k.repeat_interleave(hq // hk, dim=1) + v = v.repeat_interleave(hq // hk, dim=1) + qf, kf, vf = q.float(), k.float(), v.float() + scale = 1.0 / math.sqrt(d) + # scores: (hq, sq, sk) in the scaled-logit domain. + scores = torch.einsum("qhd,khd->hqk", qf, kf) * scale + if is_causal: + row = torch.arange(sq, device=q.device)[:, None] + col = torch.arange(sk, device=q.device)[None, :] + # bottom-right aligned causal mask + masked = col > (row + (sk - sq)) + scores = scores.masked_fill(masked[None], float("-inf")) + max_attn = scores.max(dim=-1).values # (hq, sq) + if sink is not None: + sink_hs = sink.float()[:, None].expand(hq, sq) + max_total = torch.maximum(max_attn, sink_hs) + else: + max_total = max_attn + denom = torch.exp(scores - max_total.unsqueeze(-1)).sum(dim=-1) # (hq, sq) + if sink is not None: + denom = denom + torch.exp(sink_hs - max_total) + probs = torch.exp(scores - max_total.unsqueeze(-1)) / denom.unsqueeze(-1) + out = torch.einsum("hqk,khd->qhd", probs, vf).to(q.dtype) # (sq, hq, dv) + lse = (torch.log(denom) + max_total).transpose(0, 1) # (sq, hq) + return out, lse + + +def _ref_varlen(q, k, v, cu_q, cu_k, *, is_causal: bool, sink: Optional[torch.Tensor]): + """Packed-THD reference: loop over batches, slice via cu_seqlens.""" + total_q, hq, _ = q.shape + dv = v.shape[-1] + batch = cu_q.numel() - 1 + out = torch.empty((total_q, hq, dv), dtype=q.dtype, device=q.device) + lse = torch.empty((total_q, hq), dtype=torch.float32, device=q.device) + cuq = cu_q.tolist() + cuk = cu_k.tolist() + for b in range(batch): + q0, q1 = cuq[b], cuq[b + 1] + k0, k1 = cuk[b], cuk[b + 1] + if q1 == q0: + continue + ob, lb = _attn_one( + q[q0:q1], k[k0:k1], v[k0:k1], is_causal=is_causal, sink=sink + ) + out[q0:q1] = ob + lse[q0:q1] = lb + return out, lse + + +def make_varlen_packed( + seqlens: List[int], hq: int, hk: int, d: int, dv: int, device="cuda", seed=0 +): + """Build packed THD q/k/v + cu_seqlens for the given per-batch seqlens. + + Uses equal q/k seqlens per batch (standard varlen self-attention). + """ + torch.manual_seed(seed) + cu = torch.tensor([0] + list(torch.tensor(seqlens).cumsum(0).tolist()), dtype=torch.int32) + total = int(cu[-1].item()) + q = torch.randn(total, hq, d, dtype=torch.bfloat16, device=device) + k = torch.randn(total, hk, d, dtype=torch.bfloat16, device=device) + v = torch.randn(total, hk, dv, dtype=torch.bfloat16, device=device) + cu = cu.to(device) + return q, k, v, cu + + +# --------------------------------------------------------------------------- +# Correctness +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("is_causal", [True]) +@pytest.mark.parametrize( + "head_dim,hq,hk,seqlens", + [ + # aligned single batch + (64, 8, 1, [256]), + (128, 8, 1, [256]), + # multi-batch, mixed (some unaligned) seqlens + (64, 8, 1, [128, 256, 384]), + (128, 8, 1, [128, 256, 384]), + (64, 8, 2, [100, 200, 300]), # unaligned + GQA + (128, 8, 2, [100, 200, 300]), + # GQA-heavy, larger + (64, 64, 8, [512, 1024]), + (128, 64, 4, [512, 1024]), + ], +) +def test_fmha_fwd_with_sink_varlen_asm_correctness(head_dim, hq, hk, seqlens, is_causal): + device = "cuda" + q, k, v, cu = make_varlen_packed(seqlens, hq, hk, head_dim, head_dim, device=device) + cu_q = cu + cu_k = cu # equal q/k seqlens per batch + max_seqlen_q = max(seqlens) + scale = 1.0 / math.sqrt(head_dim) + + # D64 -> exercise sink; D128 -> kernel ignores sink (pass None). + sink = _d64_sink(hq, device) if head_dim == 64 else None + + out_k, lse_k = aiter.fmha_fwd_with_sink_varlen_asm( + q, k, v, cu_q, cu_k, max_seqlen_q, scale, is_causal, True, sink=sink + ) + lse_k = lse_k.squeeze(-1) # (total_q, nheads, 1) -> (total_q, nheads) + + msg = f"d={head_dim} hq={hq} hk={hk} seqlens={seqlens}" + _ok = out_k.detach().float().cpu() + assert not _ok.isnan().any().item(), f"KERNEL out NaN [{msg}]" + assert not _ok.isinf().any().item(), f"KERNEL out Inf [{msg}]" + + out_ref, lse_ref = _ref_varlen(q, k, v, cu_q, cu_k, is_causal=is_causal, sink=sink) + + _cmp(out_k, out_ref, rtol=1e-2, atol=1e-2, msg=f"out mismatch [{msg}]") + _cmp(lse_k, lse_ref, rtol=1e-2, atol=1e-2, msg=f"lse mismatch [{msg}]") + + +# --------------------------------------------------------------------------- +# Perf (single multi-batch shape per head_dim) +# --------------------------------------------------------------------------- + + +def _bench(fn, *args, num_iters=20, num_warmup=10, **kwargs) -> float: + for _ in range(num_warmup): + fn(*args, **kwargs) + torch.cuda.synchronize() + start = torch.cuda.Event(enable_timing=True) + end = torch.cuda.Event(enable_timing=True) + start.record() + for _ in range(num_iters): + fn(*args, **kwargs) + end.record() + end.synchronize() + return start.elapsed_time(end) * 1000.0 / num_iters # us per iter + + +@pytest.mark.parametrize("head_dim", [64, 128]) +@pytest.mark.parametrize("is_causal", [True]) +def test_fmha_fwd_with_sink_varlen_asm_perf(head_dim, is_causal): + device = "cuda" + if head_dim == 64: + hq, hk, seqlens = 64, 8, [4096, 4096] + else: + hq, hk, seqlens = 64, 4, [2048, 2048] + q, k, v, cu = make_varlen_packed(seqlens, hq, hk, head_dim, head_dim, device=device) + max_seqlen_q = max(seqlens) + scale = 1.0 / math.sqrt(head_dim) + sink = _d64_sink(hq, device) if head_dim == 64 else None + + us = _bench( + aiter.fmha_fwd_with_sink_varlen_asm, + q, k, v, cu, cu, max_seqlen_q, scale, is_causal, False, + sink=sink, + ) + # Causal FLOPs summed over batches (each ~ 2 * hq * s^2 * 2d / 2). + flops = sum(2.0 * hq * s * s * (2 * head_dim) / 2.0 for s in seqlens) + tflops = flops / (us * 1e-6) / 1e12 + print(f"[perf varlen] d={head_dim} causal={is_causal} seqlens={seqlens}: {us:.1f}us, {tflops:.2f} TFLOPS") + assert us > 0.0 and math.isfinite(tflops) From bd3537194b56e37e018f4210aa796c454c21ea62 Mon Sep 17 00:00:00 2001 From: tingchen Date: Sun, 7 Jun 2026 17:15:32 +0000 Subject: [PATCH 32/43] reformat --- aiter/ops/mha.py | 4 +-- .../test_fmha_fwd_with_sink_varlen_asm.py | 28 +++++++++++++------ 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/aiter/ops/mha.py b/aiter/ops/mha.py index 41e27af9ac..9d57019646 100644 --- a/aiter/ops/mha.py +++ b/aiter/ops/mha.py @@ -422,9 +422,7 @@ def fmha_fwd_with_sink_varlen_asm( (total_q, q_head_num, v_head_dim), dtype=q.dtype, device=q.device ) - lse = torch.empty( - (total_q, q_head_num, 1), dtype=torch.float32, device=q.device - ) + lse = torch.empty((total_q, q_head_num, 1), dtype=torch.float32, device=q.device) _fmha_fwd_with_sink_varlen_asm( q, diff --git a/op_tests/test_fmha_fwd_with_sink_varlen_asm.py b/op_tests/test_fmha_fwd_with_sink_varlen_asm.py index 8554908933..875a719b10 100644 --- a/op_tests/test_fmha_fwd_with_sink_varlen_asm.py +++ b/op_tests/test_fmha_fwd_with_sink_varlen_asm.py @@ -124,9 +124,7 @@ def _ref_varlen(q, k, v, cu_q, cu_k, *, is_causal: bool, sink: Optional[torch.Te k0, k1 = cuk[b], cuk[b + 1] if q1 == q0: continue - ob, lb = _attn_one( - q[q0:q1], k[k0:k1], v[k0:k1], is_causal=is_causal, sink=sink - ) + ob, lb = _attn_one(q[q0:q1], k[k0:k1], v[k0:k1], is_causal=is_causal, sink=sink) out[q0:q1] = ob lse[q0:q1] = lb return out, lse @@ -140,7 +138,9 @@ def make_varlen_packed( Uses equal q/k seqlens per batch (standard varlen self-attention). """ torch.manual_seed(seed) - cu = torch.tensor([0] + list(torch.tensor(seqlens).cumsum(0).tolist()), dtype=torch.int32) + cu = torch.tensor( + [0] + list(torch.tensor(seqlens).cumsum(0).tolist()), dtype=torch.int32 + ) total = int(cu[-1].item()) q = torch.randn(total, hq, d, dtype=torch.bfloat16, device=device) k = torch.randn(total, hk, d, dtype=torch.bfloat16, device=device) @@ -164,14 +164,16 @@ def make_varlen_packed( # multi-batch, mixed (some unaligned) seqlens (64, 8, 1, [128, 256, 384]), (128, 8, 1, [128, 256, 384]), - (64, 8, 2, [100, 200, 300]), # unaligned + GQA + (64, 8, 2, [100, 200, 300]), # unaligned + GQA (128, 8, 2, [100, 200, 300]), # GQA-heavy, larger (64, 64, 8, [512, 1024]), (128, 64, 4, [512, 1024]), ], ) -def test_fmha_fwd_with_sink_varlen_asm_correctness(head_dim, hq, hk, seqlens, is_causal): +def test_fmha_fwd_with_sink_varlen_asm_correctness( + head_dim, hq, hk, seqlens, is_causal +): device = "cuda" q, k, v, cu = make_varlen_packed(seqlens, hq, hk, head_dim, head_dim, device=device) cu_q = cu @@ -232,11 +234,21 @@ def test_fmha_fwd_with_sink_varlen_asm_perf(head_dim, is_causal): us = _bench( aiter.fmha_fwd_with_sink_varlen_asm, - q, k, v, cu, cu, max_seqlen_q, scale, is_causal, False, + q, + k, + v, + cu, + cu, + max_seqlen_q, + scale, + is_causal, + False, sink=sink, ) # Causal FLOPs summed over batches (each ~ 2 * hq * s^2 * 2d / 2). flops = sum(2.0 * hq * s * s * (2 * head_dim) / 2.0 for s in seqlens) tflops = flops / (us * 1e-6) / 1e12 - print(f"[perf varlen] d={head_dim} causal={is_causal} seqlens={seqlens}: {us:.1f}us, {tflops:.2f} TFLOPS") + print( + f"[perf varlen] d={head_dim} causal={is_causal} seqlens={seqlens}: {us:.1f}us, {tflops:.2f} TFLOPS" + ) assert us > 0.0 and math.isfinite(tflops) From e6a818ffe715f3a0c4b66d0c5e3a12142b3c8c67 Mon Sep 17 00:00:00 2001 From: tingchen Date: Mon, 8 Jun 2026 02:26:38 +0000 Subject: [PATCH 33/43] set opt=0 for varlen --- csrc/py_itfs_cu/asm_fmha_fwd_with_sink_varlen.cu | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/csrc/py_itfs_cu/asm_fmha_fwd_with_sink_varlen.cu b/csrc/py_itfs_cu/asm_fmha_fwd_with_sink_varlen.cu index 006c3e8b73..f01cb3e36a 100644 --- a/csrc/py_itfs_cu/asm_fmha_fwd_with_sink_varlen.cu +++ b/csrc/py_itfs_cu/asm_fmha_fwd_with_sink_varlen.cu @@ -213,8 +213,9 @@ AITER_CTYPES_DEFINE_ENTRYPOINT_VOID( args.gqa = gqa; args.q_head_num = q_head_num; // s_opt: bit0 reverse_kv | bit1 double_q | bit2 remap_xy. - // The shipped VARLEN _dq kernels use reverse_kv=1, double_q=1, remap_xy=1. - args.opt = 7; + // 6 = 0b110 -> reverse_kv=0, double_q=1, remap_xy=1. Must match how the + // shipped VARLEN .co was built. + args.opt = 6; args.lse = return_lse ? 1 : 0; args.max_q_len = max_seqlen_q; args.sink_addr = sink ? sink->data_ptr() : nullptr; From f6928771da196dfd10298c823c833412601caccf Mon Sep 17 00:00:00 2001 From: tingchen Date: Tue, 9 Jun 2026 10:01:18 +0800 Subject: [PATCH 34/43] connect to public api --- aiter/ops/mha.py | 62 +++++++++- .../test_fmha_fwd_with_sink_varlen_asm.py | 109 +++++++++++++++++- 2 files changed, 167 insertions(+), 4 deletions(-) diff --git a/aiter/ops/mha.py b/aiter/ops/mha.py index 9d57019646..0e2968178e 100644 --- a/aiter/ops/mha.py +++ b/aiter/ops/mha.py @@ -2342,9 +2342,69 @@ def can_impl_fmha_v3_fwd(): ret = ret and ((gqa_ratio & (gqa_ratio - 1)) == 0) return ret + def can_impl_fmha_fwd_with_sink_varlen_asm(): + # gfx1250 ASM bf16 packed/varlen forward (fmha_fwd_with_sink_varlen_asm). + # Packed THD (batch folded into the token axis); no dropout / swa / + # quant / alibi / bias / paged (block_table) / logits-soft-cap. Sink + # logits (per-Q-head fp32) supported; sink-token (sink_size) not. + ret = get_gfx() == "gfx1250" + ret = ret and (q.dtype == dtypes.bf16) + ret = ret and (hdim_q in (64, 128)) + ret = ret and (hdim_v == hdim_q) + ret = ret and (nhead_q % nhead_k == 0) + ret = ret and (not swa) + ret = ret and (sink_size == 0) + ret = ret and (alibi_slopes is None and bias is None) + ret = ret and (dropout_p == 0.0) + ret = ret and (logits_soft_cap == 0.0) + ret = ret and (block_table is None) + # The varlen ASM wrapper carries no physical-padding arrays; route any + # padded-cu request to CK (mha_varlen_fwd) which understands them. + ret = ret and (cu_seqlens_q_padded is None and cu_seqlens_k_padded is None) + ret = ret and (q_descale is None and k_descale is None and v_descale is None) + # Per-hdim sink eligibility (mirrors the fixed-batch path): + # D128 (`_rxy`) binaries compile ENABLE_SINK=0 and ignore the sink + # buffer, so routing a caller's sink_ptr to them would silently drop + # the sink term -- fall back to CK whenever sink_ptr is set. + # D64 (`_rxy_sink`) binaries compile ENABLE_SINK=1 and ALWAYS read + # SINK, so calling with sink_ptr=None would dereference a null pointer + # -- require an explicit sink for D64 and fall back to CK otherwise. + if hdim_q == 128: + ret = ret and (sink_ptr is None) + elif hdim_q == 64: + ret = ret and (sink_ptr is not None) + return ret + q, k, v = [maybe_contiguous(x) for x in (q, k, v)] - if can_impl_fmha_v3_fwd(): + if can_impl_fmha_fwd_with_sink_varlen_asm(): + # gfx1250 packed/varlen ASM bf16 path. q/k/v are packed THD; the kernel + # requires dense packing (the wrapper calls `.contiguous()` defensively) + # and carries no strides. softmax_scale is forwarded as-is (the kernel + # applies it internally to Q·K^T). sink_ptr is passed through verbatim; + # `can_impl_fmha_fwd_with_sink_varlen_asm` already enforces the per-hdim + # (D128→no sink, D64→sink) contract so we never feed a null sink to a + # D64 binary that unconditionally reads it. + out, lse_asm = fmha_fwd_with_sink_varlen_asm( + q, + k, + v, + cu_seqlens_q, + cu_seqlens_k, + max_seqlen_q, + float(softmax_scale), + bool(causal), + True, + sink_ptr, + out, + ) + # The ASM kernel writes packed lse (total_q, nheads, 1); the varlen API + # convention (mha_varlen_fwd) is (nheads, total_q). Reshape so callers + # and the autograd backward see a consistent layout regardless of path. + softmax_lse = lse_asm.squeeze(-1).transpose(0, 1).contiguous() + S_dmask = torch.empty((0,), dtype=torch.float32, device=q.device) + rng_state = torch.empty((2,), dtype=torch.int64, device=q.device) + elif can_impl_fmha_v3_fwd(): out, softmax_lse, S_dmask, rng_state = fmha_v3_varlen_fwd( q, k, diff --git a/op_tests/test_fmha_fwd_with_sink_varlen_asm.py b/op_tests/test_fmha_fwd_with_sink_varlen_asm.py index 875a719b10..a2627c9a6e 100644 --- a/op_tests/test_fmha_fwd_with_sink_varlen_asm.py +++ b/op_tests/test_fmha_fwd_with_sink_varlen_asm.py @@ -149,6 +149,57 @@ def make_varlen_packed( return q, k, v, cu +# --------------------------------------------------------------------------- +# Kernel entry points (mirrors test_fmha_fwd_with_sink_asm.run_kernel). +# --------------------------------------------------------------------------- + + +def run_kernel( + q, + k, + v, + cu_q, + cu_k, + max_seqlen_q, + *, + scale: float, + is_causal: bool, + sink: Optional[torch.Tensor] = None, + via: str = "ops", +): + """Call the varlen kernel and return (out, lse) with lse shaped + (total_q, nheads) to match the in-file `_ref_varlen` reference. + + via = "ops" → low-level aiter.fmha_fwd_with_sink_varlen_asm + (lse is packed (total_q, nheads, 1)) + via = "public" → public aiter.flash_attn_varlen_func (dispatcher → asm + path); the varlen API returns lse as (nheads, total_q). + """ + if via == "ops": + out, lse = aiter.fmha_fwd_with_sink_varlen_asm( + q, k, v, cu_q, cu_k, max_seqlen_q, scale, is_causal, True, sink=sink + ) + return out, lse.squeeze(-1) # (total_q, nheads, 1) -> (total_q, nheads) + if via == "public": + # q/k seqlens are equal in these tests, so max_seqlen_k == max_seqlen_q. + r = aiter.flash_attn_varlen_func( + q, + k, + v, + cu_q, + cu_k, + max_seqlen_q, + max_seqlen_q, + softmax_scale=scale, + causal=is_causal, + return_lse=True, + sink_ptr=sink, + ) + # public varlen lse is (nheads, total_q) -> (total_q, nheads) + return r[0], r[1].transpose(0, 1).contiguous() + raise ValueError(f"unknown via={via!r}") + + # --------------------------------------------------------------------------- # Correctness # --------------------------------------------------------------------------- @@ -184,10 +235,20 @@ def test_fmha_fwd_with_sink_varlen_asm_correctness( # D64 -> exercise sink; D128 -> kernel ignores sink (pass None). sink = _d64_sink(hq, device) if head_dim == 64 else None - out_k, lse_k = aiter.fmha_fwd_with_sink_varlen_asm( - q, k, v, cu_q, cu_k, max_seqlen_q, scale, is_causal, True, sink=sink + # Drive the public API (aiter.flash_attn_varlen_func), which dispatches + # to the fmha_fwd_with_sink_varlen_asm branch on gfx1250. + out_k, lse_k = run_kernel( + q, + k, + v, + cu_q, + cu_k, + max_seqlen_q, + scale=scale, + is_causal=is_causal, + sink=sink, + via="public", ) - lse_k = lse_k.squeeze(-1) # (total_q, nheads, 1) -> (total_q, nheads) msg = f"d={head_dim} hq={hq} hk={hk} seqlens={seqlens}" _ok = out_k.detach().float().cpu() @@ -200,6 +261,48 @@ def test_fmha_fwd_with_sink_varlen_asm_correctness( _cmp(lse_k, lse_ref, rtol=1e-2, atol=1e-2, msg=f"lse mismatch [{msg}]") +# --------------------------------------------------------------------------- +# Integration test: aiter.flash_attn_varlen_func -> _flash_attn_varlen_forward +# dispatcher -> fmha_fwd_with_sink_varlen_asm branch. Verifies the public-API +# path on gfx1250 matches a direct ops-layer call bit-for-bit (same kernel, +# same args) — the lse layout differs (ops: (total_q, nheads); public: +# (nheads, total_q)) but run_kernel normalizes both to (total_q, nheads). +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("head_dim", [64, 128]) +@pytest.mark.parametrize("is_causal", [True]) +def test_fmha_fwd_with_sink_varlen_asm_via_flash_attn_varlen_func(head_dim, is_causal): + device = "cuda" + hq, hk, seqlens = 8, 1, [128, 256, 384] + q, k, v, cu = make_varlen_packed(seqlens, hq, hk, head_dim, head_dim, device=device) + max_seqlen_q = max(seqlens) + scale = 1.0 / math.sqrt(head_dim) + sink = _d64_sink(hq, device) if head_dim == 64 else None + + out_direct, lse_direct = run_kernel( + q, k, v, cu, cu, max_seqlen_q, scale=scale, is_causal=is_causal, sink=sink, + via="ops", + ) + out_via, lse_via = run_kernel( + q, k, v, cu, cu, max_seqlen_q, scale=scale, is_causal=is_causal, sink=sink, + via="public", + ) + + # Same kernel, same args -> bit-identical (cast to fp32 to avoid bf16 + # element-wise hang in some ROCm builds). + do = (out_via.float() - out_direct.float()).abs().max().item() + dl = (lse_via.float() - lse_direct.float()).abs().max().item() + assert do == 0.0, ( + f"flash_attn_varlen_func != fmha_fwd_with_sink_varlen_asm " + f"(d={head_dim}, causal={is_causal}) max|dO|={do}" + ) + assert dl == 0.0, ( + f"lse via flash_attn_varlen_func != direct " + f"(d={head_dim}, causal={is_causal}) max|dLSE|={dl}" + ) + + # --------------------------------------------------------------------------- # Perf (single multi-batch shape per head_dim) # --------------------------------------------------------------------------- From 204f4b3bcfeefe9d5715eee19565c41aef61b4f8 Mon Sep 17 00:00:00 2001 From: tingchen Date: Tue, 9 Jun 2026 10:05:32 +0800 Subject: [PATCH 35/43] reformat --- .../test_fmha_fwd_with_sink_varlen_asm.py | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/op_tests/test_fmha_fwd_with_sink_varlen_asm.py b/op_tests/test_fmha_fwd_with_sink_varlen_asm.py index a2627c9a6e..74887448e5 100644 --- a/op_tests/test_fmha_fwd_with_sink_varlen_asm.py +++ b/op_tests/test_fmha_fwd_with_sink_varlen_asm.py @@ -281,11 +281,27 @@ def test_fmha_fwd_with_sink_varlen_asm_via_flash_attn_varlen_func(head_dim, is_c sink = _d64_sink(hq, device) if head_dim == 64 else None out_direct, lse_direct = run_kernel( - q, k, v, cu, cu, max_seqlen_q, scale=scale, is_causal=is_causal, sink=sink, + q, + k, + v, + cu, + cu, + max_seqlen_q, + scale=scale, + is_causal=is_causal, + sink=sink, via="ops", ) out_via, lse_via = run_kernel( - q, k, v, cu, cu, max_seqlen_q, scale=scale, is_causal=is_causal, sink=sink, + q, + k, + v, + cu, + cu, + max_seqlen_q, + scale=scale, + is_causal=is_causal, + sink=sink, via="public", ) From dc4812fefea63499072de04d20356c3013622418 Mon Sep 17 00:00:00 2001 From: tingchen Date: Thu, 11 Jun 2026 04:12:13 +0000 Subject: [PATCH 36/43] enhance test --- op_tests/test_fmha_fwd_with_sink_asm.py | 53 ++++++++++++++++++------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/op_tests/test_fmha_fwd_with_sink_asm.py b/op_tests/test_fmha_fwd_with_sink_asm.py index e5df2226c5..4432d9701c 100644 --- a/op_tests/test_fmha_fwd_with_sink_asm.py +++ b/op_tests/test_fmha_fwd_with_sink_asm.py @@ -788,22 +788,33 @@ def _make_qkv_perf(init: str, *, layout, sq, sk, batch, hq, hk, d, dtype, device raise ValueError(f"unknown perf init pattern: {init!r}") +# (head_dim, seqlen) perf shapes; sq == sk. batch=2, hq=64, hk=8 (D64) / 4 (D128). +_PERF_SHAPES = [ + (64, 1024), + (64, 4096), + (64, 8192), + (64, 16384), + (64, 32768), + (128, 1024), + (128, 2048), + (128, 4096), + (128, 8192), + (128, 16384), +] + + @pytest.mark.parametrize("init", _PERF_INITS) -@pytest.mark.parametrize("head_dim", [64, 128]) +@pytest.mark.parametrize("head_dim,seqlen", _PERF_SHAPES) # Only causal kernels are shipped (see test_fmha_fwd_with_sink_asm_correctness comment). @pytest.mark.parametrize("is_causal", [True]) -def test_fmha_fwd_with_sink_asm_perf(head_dim, is_causal, init): +def test_fmha_fwd_with_sink_asm_perf(head_dim, seqlen, is_causal, init): device = "cuda" torch.manual_seed(0) - # Shapes aligned with run.sh perf_v?_d64 / perf_v?_d128: - # D64 : batch=2 kv_head_num=8 gqa=8 -> hq=64, hk=8, sq=sk=8192 - # D128 : batch=2 kv_head_num=4 gqa=16 -> hq=64, hk=4, sq=sk=4096 - # (D128 sq/sk is halved because per-head buffer doubles vs D64.) - if head_dim == 64: - sq, batch, hq, hk, sk = 8192, 2, 64, 8, 8192 - else: # head_dim == 128 - sq, batch, hq, hk, sk = 4096, 2, 64, 4, 4096 + # batch=1, hq=64; kv_head_num matches run.sh perf (D64 gqa=8, D128 gqa=16). + batch, hq = 1, 64 + hk = 8 if head_dim == 64 else 4 + sq = sk = seqlen q, k, v = _make_qkv_perf( init, layout=2, @@ -836,8 +847,8 @@ def test_fmha_fwd_with_sink_asm_perf(head_dim, is_causal, init): flops /= 2.0 tflops = flops / (us * 1e-6) / 1e12 print( - f"[perf] d={head_dim} causal={is_causal} init={init}: " - f"{us:.1f}us, {tflops:.2f} TFLOPS" + f"[perf] d={head_dim} sq=sk={seqlen} b={batch} hq={hq} hk={hk} " + f"causal={is_causal} init={init}: {us:.1f}us, {tflops:.2f} TFLOPS" ) # Sanity: catch silent-PASS when timing infrastructure breaks (e.g. profiler # / ROCTracer drops events → us=0, TFLOPS=inf). Without these asserts the @@ -866,6 +877,7 @@ def run_cli( head_dim: int, causal: bool = False, layout: int = 0, + init: str = "randn", do_ref: bool = False, do_perf: bool = False, ) -> int: @@ -873,6 +885,9 @@ def run_cli( Returns 0 on success, 1 if --ref check fails. Prints a one-line summary of kernel shape / time and (if requested) ref / perf metrics. + + `init` selects q/k/v initialization: "randn" (random normal) or + "const0.25" (every element filled with 0.25). """ device = "cuda" torch.manual_seed(0) @@ -880,11 +895,12 @@ def run_cli( print( f"Shape: b={batch} hq={hq} hk={hk} sq={sq} sk={sk} d={head_dim} " - f"causal={causal} layout={layout}", + f"causal={causal} layout={layout} init={init}", flush=True, ) - q, k, v = make_qkv_bshd( + q, k, v = _make_qkv_perf( + init, layout=layout, sq=sq, sk=sk, @@ -1004,6 +1020,14 @@ def run_cli( "(API always sees bshd shape; non-zero layout returns a\n" "non-contiguous bshd view of the underlying memory)", ) +parser.add_argument( + "--init", + type=str, + choices=_PERF_INITS, + default="randn", + help="q/k/v initialization: 'randn' (random normal, default) or\n" + "'const0.25' (every element filled with the fixed value 0.25)", +) parser.add_argument( "--ref", action="store_true", @@ -1028,6 +1052,7 @@ def run_cli( head_dim=args.head_dim, causal=args.causal, layout=args.layout, + init=args.init, do_ref=args.ref, do_perf=args.perf, ) From d15d5f0bf172329f93b5b93679ad8b54a8e467e1 Mon Sep 17 00:00:00 2001 From: ahmed-bsod Date: Thu, 11 Jun 2026 09:45:34 +0000 Subject: [PATCH 37/43] set double_q=0 --- csrc/py_itfs_cu/asm_fmha_fwd_with_sink_varlen.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/csrc/py_itfs_cu/asm_fmha_fwd_with_sink_varlen.cu b/csrc/py_itfs_cu/asm_fmha_fwd_with_sink_varlen.cu index f01cb3e36a..caf9414067 100644 --- a/csrc/py_itfs_cu/asm_fmha_fwd_with_sink_varlen.cu +++ b/csrc/py_itfs_cu/asm_fmha_fwd_with_sink_varlen.cu @@ -215,7 +215,7 @@ AITER_CTYPES_DEFINE_ENTRYPOINT_VOID( // s_opt: bit0 reverse_kv | bit1 double_q | bit2 remap_xy. // 6 = 0b110 -> reverse_kv=0, double_q=1, remap_xy=1. Must match how the // shipped VARLEN .co was built. - args.opt = 6; + args.opt = 4; args.lse = return_lse ? 1 : 0; args.max_q_len = max_seqlen_q; args.sink_addr = sink ? sink->data_ptr() : nullptr; @@ -246,7 +246,7 @@ AITER_CTYPES_DEFINE_ENTRYPOINT_VOID( const int sub_Q = 128; // ts_qo const int wv_tg = 4; const int bdx = (wv_tg == 4) ? 128 : 256; - const int tg_div = 2; // double_q = 1 + const int tg_div = 1; // double_q = 0 const int q_tile_count = (max_seqlen_q + sub_Q - 1) / sub_Q; const int gdx = (q_tile_count + tg_div - 1) / tg_div; const int gdy = q_head_num; From 0104bcee31a656ee5922b4c380f26567bd4665d6 Mon Sep 17 00:00:00 2001 From: yihongli Date: Thu, 11 Jun 2026 05:04:44 +0000 Subject: [PATCH 38/43] Route gfx1250 prefill varlen before FlyDSL --- aiter/ops/mha.py | 66 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/aiter/ops/mha.py b/aiter/ops/mha.py index 481257c1c5..3690381296 100644 --- a/aiter/ops/mha.py +++ b/aiter/ops/mha.py @@ -1594,6 +1594,8 @@ def can_impl_fmha_fwd_with_sink_asm(): # (per-Q-head fp32) supported; sink-token (sink_size) not supported. ret = get_gfx() == "gfx1250" ret = ret and (q.dtype == dtypes.bf16) + # Only causal gfx1250 binaries are registered in fmha_fwd_bf16*.csv. + ret = ret and bool(causal) ret = ret and (hdim_q in (64, 128)) ret = ret and (hdim_v == hdim_q) ret = ret and (nhead_q % nhead_k == 0) @@ -2503,6 +2505,8 @@ def can_impl_fmha_fwd_with_sink_varlen_asm(): # logits (per-Q-head fp32) supported; sink-token (sink_size) not. ret = get_gfx() == "gfx1250" ret = ret and (q.dtype == dtypes.bf16) + # Only causal gfx1250 binaries are registered in fmha_fwd_bf16*.csv. + ret = ret and bool(causal) ret = ret and (hdim_q in (64, 128)) ret = ret and (hdim_v == hdim_q) ret = ret and (nhead_q % nhead_k == 0) @@ -3131,7 +3135,67 @@ def flash_attn_varlen_func( The output of softmax (possibly with different scaling). It also encodes the dropout pattern (negative means that location was dropped, nonnegative means it was kept). """ - # FlyDSL path — returns result if supported, None otherwise + # Try the PR3039 gfx1250 prefill ASM path before FlyDSL can claim it. + def can_try_gfx1250_fmha_fwd_with_sink_varlen_asm(): + # Keep this public-router gate intentionally narrow so the PR3039 + # prefill ASM path can be measured without changing decode or other + # FlyDSL/CK coverage. + if get_gfx() != "gfx1250" or q.dtype != dtypes.bf16: + return False + hdim_q = q.shape[-1] + hdim_v = v.shape[-1] + nhead_q = q.shape[-2] + nhead_k = k.shape[-2] + if hdim_q not in (64, 128) or hdim_v != hdim_q: + return False + if nhead_q % nhead_k != 0: + return False + if not causal or dropout_p != 0.0 or logits_soft_cap != 0.0: + return False + if window_size[0] != -1 or window_size[1] != -1: + return False + sink_size = window_size[2] if len(window_size) > 2 else 0 + if sink_size != 0: + return False + if bias is not None or alibi_slopes is not None or block_table is not None: + return False + if cu_seqlens_q_padded is not None or cu_seqlens_k_padded is not None: + return False + if hdim_q == 64: + return sink_ptr is not None + return sink_ptr is None + + if can_try_gfx1250_fmha_fwd_with_sink_varlen_asm(): + return FlashAttnVarlenFunc.apply( + q, + k, + v, + cu_seqlens_q, + cu_seqlens_k, + max_seqlen_q, + max_seqlen_k, + min_seqlen_q, + dropout_p, + softmax_scale, + logits_soft_cap, + causal, + window_size, + bias, + alibi_slopes, + deterministic, + return_lse, + return_attn_probs, + block_table, + out, + torch.is_grad_enabled(), + cu_seqlens_q_padded, + cu_seqlens_k_padded, + True, + how_v3_bf16_cvt, + sink_ptr, + ) + + # FlyDSL path returns result if supported, None otherwise. from .flydsl.fmha_kernels import flydsl_flash_attn_varlen_func _flydsl_result = flydsl_flash_attn_varlen_func( From 169af673783e74af0b38746fc731dbd11b90f525 Mon Sep 17 00:00:00 2001 From: junxiaguo Date: Fri, 12 Jun 2026 02:15:35 +0000 Subject: [PATCH 39/43] reformat --- aiter/ops/mha.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aiter/ops/mha.py b/aiter/ops/mha.py index 3690381296..df9dbde931 100644 --- a/aiter/ops/mha.py +++ b/aiter/ops/mha.py @@ -3135,6 +3135,7 @@ def flash_attn_varlen_func( The output of softmax (possibly with different scaling). It also encodes the dropout pattern (negative means that location was dropped, nonnegative means it was kept). """ + # Try the PR3039 gfx1250 prefill ASM path before FlyDSL can claim it. def can_try_gfx1250_fmha_fwd_with_sink_varlen_asm(): # Keep this public-router gate intentionally narrow so the PR3039 From 6e27400cc331dbe487caa329cd77561afd8308c0 Mon Sep 17 00:00:00 2001 From: ahmed-bsod Date: Tue, 23 Jun 2026 11:19:37 +0000 Subject: [PATCH 40/43] add kargs preload mha bf16 kernel into aiter --- .../asm_fmha_fwd_with_sink_varlen.cu | 7 ++++--- .../fmha_bf16_pertokenBf16_hd128_128x256.co | Bin 83872 -> 83736 bytes .../fmha_bf16_pertokenBf16_hd64_128x256.co | Bin 72992 -> 72728 bytes ..._bf16_pertokenBf16_hd128_128x256_varlen.co | Bin 84024 -> 83504 bytes ...a_bf16_pertokenBf16_hd64_128x256_varlen.co | Bin 73136 -> 72608 bytes 5 files changed, 4 insertions(+), 3 deletions(-) diff --git a/csrc/py_itfs_cu/asm_fmha_fwd_with_sink_varlen.cu b/csrc/py_itfs_cu/asm_fmha_fwd_with_sink_varlen.cu index caf9414067..42fdf2f703 100644 --- a/csrc/py_itfs_cu/asm_fmha_fwd_with_sink_varlen.cu +++ b/csrc/py_itfs_cu/asm_fmha_fwd_with_sink_varlen.cu @@ -246,9 +246,10 @@ AITER_CTYPES_DEFINE_ENTRYPOINT_VOID( const int sub_Q = 128; // ts_qo const int wv_tg = 4; const int bdx = (wv_tg == 4) ? 128 : 256; - const int tg_div = 1; // double_q = 0 - const int q_tile_count = (max_seqlen_q + sub_Q - 1) / sub_Q; - const int gdx = (q_tile_count + tg_div - 1) / tg_div; + const bool double_q = (args.opt & 0x2) != 0; // bit1 of s_opt + const int tg_div = double_q ? 2 : 1; + const int q_tile_count = (max_seqlen_q + sub_Q - 1) / sub_Q; + const int gdx = (q_tile_count + tg_div - 1) / tg_div; const int gdy = q_head_num; const int gdz = batch; diff --git a/hsa/gfx1250/fmha_fwd_bf16/fmha_bf16_pertokenBf16_hd128_128x256.co b/hsa/gfx1250/fmha_fwd_bf16/fmha_bf16_pertokenBf16_hd128_128x256.co index 027171274bd7cb70fa0b674fa04823aee02e9c1a..9a15756d86aefec75dc72a062d99395ea4d2893f 100755 GIT binary patch delta 873 zcmZ3`&pM-zb%F+?#6->I^$aGA3}67G1t46IC>x0B0F@6wr3)a+7#N`HVKftw`k?wJ z`!fnE5w>G<6Jri(m@V%G9Lz1mn%{A<6XyV~c6AEo*VmM#W(^)7IOQBbt8AcYeRzsJh-h zsHx?(`TcSs`bD>s*xu}2#P_%=<4f-edZrdUCb?~6{jq8EzNf=){KwCHd)IvZ*#1F$ zok5>}atT72LM|0b#EY5x@q9Y8d^ev?WK#LiVj;0`FT0RS71QFAj7J=yrA$1TDvARN zz7Lp)n}~Vt|-DdpK+QPOmt(dTryVMBn5z15p5%iC`0 zXXNLm>L(`_Wt1l7rR$eu=A|cRq~_@tl4i8$fT~KG&M3oZ&Lk=}-Aabhj%kP3^ePZ7 VAwGQ4#`2e(;KB2 zi!UnTa!63U4^O@-xDgspiyh6Dt4>}jwm zeRrX#f=S~o2!QD#7Kya21;@bbbPKK^)+|2<8y?i5Iy~40E42 z%>6@Pj&h$k#(m;Az*WnjVuA;pU|#0|;v^3cr-=qJ&iytpQ`{$}xlg1QwhZDNvF!1L zM1XfyeZaFWih-iDTAq|cy44XE1OxPvTsm4nI8hg_0Uah-?-vBv6J4{1KsSK7d5GMH5GQ&hvz%(HfP_eWr=4xm_W53kb)4cCb8`2m_g3-hBe!L{9y-n-Ez9D|}N-MUW`K=@T0^eOr A#{d8T diff --git a/hsa/gfx1250/fmha_fwd_bf16_varlen/fmha_bf16_pertokenBf16_hd128_128x256_varlen.co b/hsa/gfx1250/fmha_fwd_bf16_varlen/fmha_bf16_pertokenBf16_hd128_128x256_varlen.co index 5f743893f66b5d88b92c8df25f48ca8ebfbdbb07..61bae611fc4698aef2cb5452feadc7d8475f6136 100755 GIT binary patch delta 1343 zcmd5+Ur1A76#vehoA=%=*{&iwQZ%l*ti030G{W8K<_r-^da#$qQ0J^UhqHp9si8w~ zkor*{L9@d%nXt-|I#D zYTmx+WBi>8O$ zB~VV9Ll7Zy2-@@ouR_VP%T+O{*5FE7z>IogK~)jeKJ2FDP-NF9yQ}oNxFFVud2!mY zp@f7X1|JuE%;vQ6qU^Rf znA`ZR#qQZ??5`;;b~bFrQvdwyVgJ^AfoprK!1ZA}S8S5u>|D9B-hQ$7s3fFh=U!et zD#@7DsSuQ}-$VWIZmli=pMl5$D0>tHmO(7xgHX-nS<*U4#zxQi z{Zf1^5sSv}3`OJ3t*A-LBw|Cu6B!A+ou`P{3G3ubCpep}AvL>K8!o{{r{=#czvk_N zEHKDH9*3ZbM`r5uN;@HF<*lpqutW$`J8AlpEa|7VCvV!8p2RbJDLK;(PO=sT50A_z z2XSO44Cgs351t6!)U?tKkbY{qO*3si5{n1Oy1~nua908Tc>unl!>1!4a#gbNsyxx5 fB!!qwnJh=3gR7wfrAht(Ndzjn6%)Vr7YFnQhj&gD delta 1361 zcmd5+Ur19?7(d@}ci!C%n+wEDmcrC!wnbD4s!bi`bt&7)D`+EqmL_nL zpe4F4*;?few9nE9Xc_IZp&Xztw11m1u}Q6LDxY^0msAr>9kO|cOgr3)XpduDLdu5Y zv1orfk+c4y82g8aOQ-k0A|>-B+7^Hh zgIR8f*>yACJlAaTxBCCvz3m9XBH)}z*F)fua(dJ3FqIJ8kc>l=*x|xV*Y$i|q9a_J z^nWOJ=vA&|UCUNl2}8fdcvUtkMVK_&3(^rL+jW^{x0uL$>2NFB0F1iKz@gQvrM!gwCwy5h$>nhp@B;*#d zm`xl;QhBy1)zT%S!eb$FnH(4t@ugyNj#A^x)T$d}c$8G{DX85&;IB-2YH+1{~NQW&9jilOk1%m8b zOSbH-5H}9&E+8=}43`Ro$St@MOwm|+P&Nha@X ze3^ZA=BRyh_E2kU^HA&4R;E~p8OyB~&mWs$?oPdr9GhU4TLW%m@AJYrn8-c!zz?7+ z9`Mn$1`WQ(@A$=e=k+>C04eK{YQo{p@p_D#8Hqd_~sgI6?f_45_2<5mxatFEDHn!4;@0=baf+L#oc1H(q}A- zcygP=)Xgpxo`QE+u!CC&Sq5DMJ9LQHOSh$jR0x}SZypjtXW!xXo8NrjeDlrsX1=AI z^!kmo*nnSqvW@X`c^lF`gHf5Tfsu(1?Yc%)24_FkUv+XbLQ31eJRy7(s~Cu2m`OAD zKeUd7puYOhtRD3s8hH%EzLZ4|YOo~sC}n)xLd>|XpnS1c3w3;<%fPQm<sC9#Dk1au(~KV60)qj#jIX+lonDcBhSH0 znykmbN5a;wC$inI_|Y~q@1Qxf&5E*vYSDH#V})@qV`7)xtUB*k)fzc(lKhH!mGn6p z)Z+0WjPppHab$e6fa*js3fHJPrd6tD#?iI zxE<=|=Fvv=$wrqMJMyxAWsG-Gkh616S}cd=;AL*f^m&%o zzT4?9m*qpGV!arMjgLhGgOSPDy-0i{kh~Kg8H`2afr;_KfxDvvW6{YGdaVMy)<7Z_ z8H!FO5XqYx>9Y#RWf#q~*9q1Z@&5IV5#sOm|2I9i;Dq~tQx=Nd&@87!&g?89H?+yd z8S*ln=DHy41(~jJEs1W$&b-I9BD7+oArG`$jTdI8(_9+u$w^n3tP-*)`c0Y9rFi Date: Fri, 26 Jun 2026 13:33:46 +0000 Subject: [PATCH 41/43] asm bf16 mha: add non causal kernel --- aiter/ops/mha.py | 4 - csrc/py_itfs_cu/asm_fmha_fwd_with_sink.cu | 8 +- .../asm_fmha_fwd_with_sink_varlen.cu | 6 +- .../fmha_bf16_pertokenBf16_hd128_128x256.co | Bin 83736 -> 54544 bytes .../fmha_bf16_pertokenBf16_hd64_128x256.co | Bin 72728 -> 43792 bytes hsa/gfx1250/fmha_fwd_bf16/fmha_fwd_bf16.csv | 6 +- ..._bf16_pertokenBf16_hd128_128x256_varlen.co | Bin 83504 -> 54440 bytes ...a_bf16_pertokenBf16_hd64_128x256_varlen.co | Bin 72608 -> 43552 bytes .../fmha_fwd_bf16_varlen.csv | 6 +- op_tests/test_fmha_fwd_with_sink_asm.py | 58 +++++---- .../test_fmha_fwd_with_sink_varlen_asm.py | 116 +++++++++++++----- 11 files changed, 126 insertions(+), 78 deletions(-) diff --git a/aiter/ops/mha.py b/aiter/ops/mha.py index df9dbde931..890b81140b 100644 --- a/aiter/ops/mha.py +++ b/aiter/ops/mha.py @@ -1594,8 +1594,6 @@ def can_impl_fmha_fwd_with_sink_asm(): # (per-Q-head fp32) supported; sink-token (sink_size) not supported. ret = get_gfx() == "gfx1250" ret = ret and (q.dtype == dtypes.bf16) - # Only causal gfx1250 binaries are registered in fmha_fwd_bf16*.csv. - ret = ret and bool(causal) ret = ret and (hdim_q in (64, 128)) ret = ret and (hdim_v == hdim_q) ret = ret and (nhead_q % nhead_k == 0) @@ -2505,8 +2503,6 @@ def can_impl_fmha_fwd_with_sink_varlen_asm(): # logits (per-Q-head fp32) supported; sink-token (sink_size) not. ret = get_gfx() == "gfx1250" ret = ret and (q.dtype == dtypes.bf16) - # Only causal gfx1250 binaries are registered in fmha_fwd_bf16*.csv. - ret = ret and bool(causal) ret = ret and (hdim_q in (64, 128)) ret = ret and (hdim_v == hdim_q) ret = ret and (nhead_q % nhead_k == 0) diff --git a/csrc/py_itfs_cu/asm_fmha_fwd_with_sink.cu b/csrc/py_itfs_cu/asm_fmha_fwd_with_sink.cu index f118566295..e8537c032b 100644 --- a/csrc/py_itfs_cu/asm_fmha_fwd_with_sink.cu +++ b/csrc/py_itfs_cu/asm_fmha_fwd_with_sink.cu @@ -85,10 +85,10 @@ static_assert(sizeof(KernelArgs) == 0x84, // ---- helpers --------------------------------------------------------------- -// Kernel selection: only (dtype, hdim_q, hdim_v, mask) — we always use the -// _brd (border) kernel variants which are a strict superset (handle aligned -// + unaligned q_seq_len/kv_seq_len uniformly). The csv schema therefore has -// no `border` column. +// Kernel selection: (dtype, hdim_q, hdim_v, mask). mask = is_causal: the csv +// registers both mask=0 (non-causal, _rxy_pfnr source) and mask=1 (causal, +// _rxy_pfnr_cas_brd source -> *_mask.co) variants, so is_causal picks the +// matching .co at launch time. static std::string get_heuristic_kernel_fmha_fwd_bf16(const std::string& dtype, int hdim_q, int hdim_v, diff --git a/csrc/py_itfs_cu/asm_fmha_fwd_with_sink_varlen.cu b/csrc/py_itfs_cu/asm_fmha_fwd_with_sink_varlen.cu index 42fdf2f703..70972104c3 100644 --- a/csrc/py_itfs_cu/asm_fmha_fwd_with_sink_varlen.cu +++ b/csrc/py_itfs_cu/asm_fmha_fwd_with_sink_varlen.cu @@ -59,8 +59,10 @@ static_assert(sizeof(FmhaFwdVarlenKernelArgs) == 0x58, // ---- helpers --------------------------------------------------------------- -// Kernel selection: only (dtype, hdim_q, hdim_v, mask). Only the _brd (border) -// causal kernels are shipped, so mask is always 1. +// Kernel selection: (dtype, hdim_q, hdim_v, mask). mask = is_causal: the csv +// registers both mask=0 (non-causal, _rxy[_sink]_pfnr source) and mask=1 +// (causal, _rxy[_sink]_pfnr_cas_brd source -> *_mask_varlen.co) variants, so +// is_causal picks the matching .co at launch time. static std::string get_heuristic_kernel_fmha_fwd_bf16_varlen(const std::string& dtype, int hdim_q, int hdim_v, diff --git a/hsa/gfx1250/fmha_fwd_bf16/fmha_bf16_pertokenBf16_hd128_128x256.co b/hsa/gfx1250/fmha_fwd_bf16/fmha_bf16_pertokenBf16_hd128_128x256.co index 9a15756d86aefec75dc72a062d99395ea4d2893f..8cb5743a2542f7cbeeeef765c4d5108ba247601a 100755 GIT binary patch delta 649 zcmbQy$2wsZ^8^iMfr|_iwU^Z&Vt@b`Edb#%0x32S^BjcF@D7zOfN~j_pbV%!5SJKz zF#VGQ7zGsx+OfHb(T$IvnPK662KOcVgc?}BFzjL2%r9_|d2@@%2Z7B8ym)x|7#MGE zkBW|JjErE}|5tpoU&KENCWil;r%n4O#`Kqa^R%_E!2DCM*~D4cUo6-^jhWFxppaqF z{sx9i`y^DiHcBwu*tflhnK8$T$?3xOU$%^!7^gWfPSQ36`iYrQo{67T4iqd53{|`R z%wEL_`u{xi{a)I9)1`X)#TkhysYS*5(;qr9s_^O+Gw7G+7iAY0Bqpa$Kj^@yBIJ{q zmz`QqF`h+UBi)4Kgu#OImyJ>I4vd3z$C@c!qPG=#nLb($;`yU+{h#) zCE3i>$Rs( zwUBE>ueBJHq zKXAHld1dhO+cw<4e9f}ux81gK{rbJ@RxV$$?7?-bH>^a?iWTdSwru^q%Wqrxo!i%} zy>jtsRBGHgm5QEIsmbS5YT7xKnt4v8W}j23dFNDW(K(e` zdQPRv&Z*Rjb1HSmIh9&-PNnWWr&1fvsnkPfQ|esZ`X;$a8w*50-9 zo=F6EthjpUsAc$jXz1_}*ZB`VGr( zyX&WL?U@^+aeqAc!Q~IET(xd3`jA~w^y=W|4!C#STCVeJ*Dbqy?TVFo-w&S*HlSZu zZukk%?YG@?B?SNQ$Z+MAx37BW>Y>Aj=p)1fE7z@Gz4o4`^YnQH&l#uskIz4y^6@mH z&vT*DP{@vW0*i;wgV8XZ`uf++q)p1FVA|1>L#ABz#bdWI!6ZVqygl`tv+e4$E`YJ& zecJrbJk2^4p-wz}UOiQjW0yG21n1&Zs-xqa@AW*D%2}sf5RF&orSqOhr_*_OsQ-_L z9OvT=>E6#=jw!T0-52Q&WdCP6;DNZy<}VzKJ11%rjx*wZ=fzsjX{ar5epB1W*;DH} z-#XYAX~oXBYrCBvf0g~+>0&R4ciL0#F7yJ&i62aRUWb$E*p2QbUXbq$dbikfF+Nwm z>z?O2?s?t}7uS7F+Msvc636LVc;U=)@BA5^F5I=L(}jhzSciLo=N6oIpVbhk{F6Cc1IMLa9Fzz0Uee5iMw+!r{T<(f2^hBOlDDwRCMP6{B zNcZAA=OVO=W%9o$X%}`7c~Pf4=L_lLcns|<-d4P&xO`8%xX+e2ZFF$Y-toeNX{Ww+ z?B1FQX=h(;hoSQxNIT!Jt^COcC)3U==(~$APCI{G>lHY8X{Wl@ankA8?)czhcU(|} zd*&6!Uo{+^x~KN7ZQg(2Hph+Z5Xz!Z4hdy6l(R!w7Rq}=84u-?p-hId+BwcXVjGLQ zNZU%f^3KwZ{vBW1)xTr$uHv$?c**kRoBMWnX;3+MMA+FQA9Q~Ir-sSW`U-m05$91`belA9t zGL%`KUT*j~H9hqSls&n8>eQWarw;Y~5`TYzzc=uA7=Lf#?=SK9tJ>-5>4v*A(lbyd zod({CvUxM&Gj{e#m+p!K^iG!SDlI9Ef1&g{<)z8e$4ld-JE%|2`)S{lD}A!KqIkxP zose6KeSas@!+cZU%=Ap7H#0dCHoa8)Mc7c1E-^Z@(z8t7tddzfpY2pvJ1sp8=@U*u zqHHuzaGH{J6_PWv$BpaIYyS8k?T(A3)3J2i9TG$hhl_%Elw2+fQgM+wMm-A-2OYr~ z2VChC2UiX_$BjH3$`3;MFQIG-<;8iT7YXH{yyIGD#OeOF(<=||_@cKXa19s^_^2^M zdjR+7jr6gjX0&GvBTmWNk?CkH+R&NTsEd8iwkU>!cYcaC5qA#r)s8Qh7alBdUOf2P ztaxoY{Ve9z)%7UuOrExBQ~yiB%}c?v!oqmTdFO4O)A^-cb2>NU{t@IyfX_caUUI<& zo9A>#e)neF<;%rS6!$LPS&V)ki2mcf($05kxmcv*C6z{&7FRkRc-eK&<1X6nOw4oA zU27iaLNmshe#E^Gmbw=P9{ia*oa;=<+ph08rOZRS<8EQ#GOwTGBre&0u(UjQqA+^d zzp)MUkz;_*4>4|oawluE@`3IA*MPNe;?7&(`d`;3zcg=GvfKP!=`StVMZ41778;3p z^+YeungD7*YMCKai0VbJKvT|j-bmYi*xl@DsUd7)}Fi-!?%+r+yA3)ll zsF^Qc zLOnWPCZ#8tyh$aKFkcSTj!Ta-^99n(7s2T~!A{|+{K`noN0b{JNNnh#hwL! z8T2Zs3wq8U7jO*!=-P1|dR}|&m7E)Yls_oOIYG(!P1!uo@6~p7j-s#M$GH5u){L>U zzZPySo$Ot*E7`krSJ#psm3J-u)AFv{Z`pFo_FF3YY+G8<^$UB-mliKAU+Vl_ZJzVN zkC$#=x)pyF%hHR=UXlm@Ryg#r!oneZ7TF!%y9iZ0hJK)3Za#E8Z^-U~QT}e! zznk@APF=3ooum(T=l73Yp5Mp#>t{6&c67w`<)KZqk$KOCn0Xz;>%!39XWtkV>-fgd zfHpt-Mt@xQF}!ime)_J9p|_}fPkHfntW68%?kZk*)2`y}ODc-DmsAV~A4Zh?X?c9X zJLU0(@0Q1J{&jhL*#7c);LGrM@s5(YCGm7|vGHStC1c{}rN?gOfD}JDzG6OfMo{Ob zW}Wj(ro=BuPYrcuRM1!UhhJ8IJZjvgP4mY5WZT&I*v;e9wdJVF!TjH`l56>7_)qwe zmOi}AGTXDu3?DYUeC~p|<#Xe6ceJMsdCnULdppU4?X~H&dY{n_4|X)SyXn{1-ke|L z7SNCUUp{wn2^SE*O*i+KIRbq>hO0MfA#n~g1@8q`v89p`1^bOHRA7I@OKP<|AD`c z@z)fKO<*0>xZaPy8vGr=-`Ul9F!tH&NFV&{bu@G2=LaLst%s8x-EK}F!<#U2tNtqJ z)BdWCb0g{Y*bk0LU#^R!+hM;K>R0EjixhZbTUFlEk$nYn-|D=%k?-%5I;)`LeP6C? zIyUQ9cWl>a-r12^z_N*^aW@y?WsmRs!uyrpXsX4bgonV+Ph92&R%oQ zxVFXd1Z1siXS+tZSR0D4Mp2%2{b^y-DfK?19nI^LtUr$P^MixX$0^Ry$)W!jxhE>J zs!ZfQ&gWs!tD5V*`%@xo8blu8eCGT~>3l}I@#`XYM@1&fMDF2yrY$u(pC!F2 zDRN&*WOakc{hZI#uj~Cona`45StPPm{hZIVKh^s!na`qk+WI-4X@4E&jG518sQ+c~ zRr>OWGM~>-Kj$;`>+^nS<}+=p%X`brXWq9SbDQ(iv=KU-&%8d`v01;qW3&EI*s1#U z(BXV$JsdC2PnP91=ch>z=d(!<=QGn&@OP@8Lf@)>3iWe7Q$L0JIiIOt2Rk`GnO+Aw zIX^X>^Hb9~pP62ddQ_iwsy@?IpXubLgEHe##;iB1!u3Yvp{U5ZGLdGznG9X}FSFiA zdVNylk(9`z4I<5Y!#>ZfH%K?@jmQI0k*P9~X1$>;ne|4}>yjc5r$p8_h&1aB^{aY+ zB>a^0nj(>{>L)*Gf3kO{9%j7}z0=lDe$xIbaKP~M4E2+r(w83zKhID<`APliypIe& zXzaQ_UD{A4{GFY=9Lc}>2V^pKw> zJ>)0TlbFA%pG4oPeiHSQpVUvHe)5z0Rj`wMV|o?rB;PcheA9IDlj+r{NA+o^>N8#S znXdeti8Az`^pEp!%{f+u=LPl`ec#J?7N8FOnuB$o*LA&z_E7y)@3}oxzYhJS`l*gX zdw8LKecsR>T=Q9fUEW+X2CTmxJkZx<=P2C*VFMid+uanyhVH=Kf!SZVwt?f5Ya23r z=*{0$^4>1i@#>3{JKz$+ww&*=x;Pw{{}r$2Ci#aHAx1Aqr8zB11l2s}{n zRe8=8z*hig_y8LWCw07FgW;rN*kCxR7&aJADuxY)lZs)3;iO{NU^uB5HW*FaVQea-b4lo5 zKEW>=7~erU-k;(ZT;MAepKzVQz=IX1UFRy`s}$$uJ41knD9+D!t_HqZaY4Q_6nLoO z^YWc*fUg0bsQv{u*qlBA8*EOWfDJaMPrwG7(6l|YhXW5+jPV=+JVG(Xb0qLc#Td^~z@rpnJVyhMR*dl+13X4C#&az2SYVq| zHm}Jko7d!&&1-VX<~2EG^O~Hpc}-5)ye6lV*EXkeWrNMBT-l(UiW`m`5Ag72{1ktH zNV_M4$BcAh6-K(TEk=5=twt8cwn5sQ8iz3+r(=%sycYOc#Td`=z~dETJSPB8P>k`6 z0!I~NJSPHARE+VQ1UyMG#`8Mh>ws-e*}NvFY+jR7Hm}Jko7d!&&1-VX<~2EG^O~Ge zUfZ0?l?^tha%F>Z3Xk#N(DC3YBb$P!jXW7VW26&%)<`#oeLwWP*e)XrW6wj{oSKX= zo~&bz@w^`Rdc_#eDZo<{V?3tlEO9j~RXU%k;+SmnSxGw{X=-VDq$1MgbFj{)<{ zz?&er0+?q8-bBG$fO%%%T_<=eFwYFU>jiHE=9z&vwfWp4emvitV>JyBopNBFV>MkN zcr!51v6`+F{21_M#8(Nf0OmPX)765v0P`HH=^DXXfq9PAG+gjDV4h<&jeI(DZjnnH z^xPtsHYgvS#P}4WZ@lT!S5EH&apOKBbHDH1A~*)j{l2$Ma2%NXeQ&woa$xTFz1swD2IhX>TPgT4VD9(5+n>g(adPgl z>C^lyPCp84b2h|i6!-hyV}i*k z?)SYdg2^fF_q}b|oXX-g#ypGHz*)Qo&f+z27O#P`cnzGzYhdNI&8b}3U~?*0HYle) z(>a#KDeiN<$7RgPDem{ZCj^sI-0yo|5ll{TzwdobFgeBjzW0=1a*F$X?-{}56!-hy zbArh!?)SatvpJQ;Ym9jouYt384V=Ym;4EGPXYm?1i`T%)YnxNKvccw5u53_Fr9wY- zPUlz_r?}7czAj@Z>NA~V zS)7{3F@9OboSd3Z{5`?s6wk4|?+YfU783ulU~-D*Sl$l=lT$p$^8Q3HImL4MoAdhumcMHG@dB0~d0(eOmY-1GrI6*PKGm&|``(Cl~ebd)txBTApvHw!1LVn}(hu%hhgY<{qMl&z*z9xCO>C@w8oGUKs zUbwUSi9yY2OW)bFct5^>KL71MdK2G88{Lf=o^SZ)_YF^3zT%>P9mGD9w59KCK)K^6 zw+rQ-H|a|<+&SBFIv!_RZW!8^xN5?cwZpa^eEzDduG~}$oIf~rauYD5gLhZs?y!L0 z6UE))U_Qp3G7XMX^8NKh{A<=FJiY~H zo1cenC%-uOG`wztF8ZNsr~J~OQ}3bx z2eQwb9d;K(76V88Q9&uLOL5)99~1P)b$?v<^v4AQaXk>az5MaPAY2bZdXXOu2IG1# zu6z5Ff+4sbg6ls1tRSQ_NN6Sa6JOoCH{hXt!?QKk4?hg&yi+VAx(%xfkF>Wj!Tyo< zHYV6V(%xeU_K&o;F~R=f*nXbvC*MBL_UjxN53Y;{rv~_QgD9?}xE|=w3nt-u5_oro zzaW^5>&ZwTywH+@BIz(;!m5PscUh^{Olq zxjQN{StfE%Tx3;J1>IRYWeY#wqa~_d`3l#gU(bVIW1H~}d5`&MbD8KI zx)eHHu5vy4`?#dlB}E=?Lfu`5yAk^TDwJoN@lAOTeiNS(o%${)-}M^T%sBZi}uSV=ulri;7OFOyr@s$U3_X z1t`OI;(PQSd+~i>QgpJ~l9IIg29ZbXHta)rwiDm5_ek#(Zk$a&e;?1;W}IMi8sD^) z$vxWL&-~Vrw%RtT-F810pd8zc@9KN-`!fe^FU0s2-{3~Z`EnY7W}m8~LR-x?a?e2j;uAqw)#;P^SM)t~bfA2*~w;7r2q@{H=H&U;5L`HGJ3C7kUF1y53}eJKoWkwwgSAw?}&i zeZh@f@9!|r4<~^+ANg!?JmjDG<`v)dbwxd#k9^j+9M_z8oVTBbd^39Z=8s$}`dxm6 z^R^rGIKMbQ=Z5tmWG!lL(Wxm#`60Kt-VFcQph40OklRDS zZQtJ&unb}ah#yBjp98qQa87cr7D?LfsK_MyxfC4ce5D^}n-M!e{J7}s;o75fHZEyZ zNs;@=jsD;^=Pms>+l*KO;>Sg&nrn~F-v&wBPi_nZ*Ex^r$7KwDXvT!!0}~@=fbq?a zlxcT6=%?8Z#2GMtx=hNqyB+k?YzJZ!^w|IG4;ePI0a z+1gD%&2}SRLXR|jbbIaIhjMH$Vk?M$N1OX$%!c0XdNcjXEdNf94+F<<@Lw=~+KdzI zHU1rK?vHZA?sC0Z{_bF8obg0nuO+WXfY&$re`I_%@5OJi`Mt3N|2P0;Myz$c*?uz6 z?~lppQQ-6(|3%}=c^{VHcgYTXbLt1^?|Co8 zl*st|!I%^D16=QPzSG~cPQ;tY`1>K41Dx+%<8)qgJu>xiJYX~D*ih8R`OP&>$3<;K zoC@RXZTmT|)mHj?)`u7t#@E}nvu?GOexLRszJ>Aow(Xp+YAgMIrat&q+jiSlt^t|) zz%ko)=Gper|FcfS$H@49wcoaPA@o=$VrOLhzuKQ=uU%j0|J8omUaiZvo9lvYE7yok zxTks6|2KJ_CqKu5pY#2_L6M{#ii)fogfSZTi0duz_XXS!RG_SR4&k%0=o}u0^5e_h z$U^^>ATDY3Ns&jcMEUVC*IVRQn{|Y9TK5Af(K$LEqZugTW{lea!Wi z`1{TJL7UC93jC=99X@Mv9EX8(oWDhqb|5M;#kEc6F5}LyJmPwowWLgR4h=*35%;)Xr zX}8@kgI6+EANBYW#*u3YG2(lewWLhyYj-=imarX&3DT3#{OxTA*ATV?@k8WU|18_V zXMNNEu-iQApQYVgOIRLpO5|Bz%Ac|SxrVU4h-uQ3&-qb|*OaH+$j$zrnrH7B@UP_l zOmLsi_iQ6#p7h|eJvluKV@qz%e$|Ai1YsNgR8GNocV*#7^Tu*SOsO zD1R0BFlifndsx4jJ6I=%Abs)^*mFWo`I?K!+5--|VfjLo<1;_&H~Ls3Xb0z5xBTI#SRd?mwQL9X1EyZ|IrjrC+tsoiTq{^N;>nn`qGh{Uwu5U1>qpEPvu3nx zSIc&A?a0&*zs&yCc5zLx+oJWhY*WiNaP457h+SjWj+SlGbGDXk;ab9bAkK|hOIo(A zWm`_#_kry#+h(^#>*ZQv;yk15PTWiO8OMIT8!;%m=U*5M#rRKq#`PTM*sm{loIOR5 z$g8>r=`)@WIgYu(b>vx;=Q9w`m8b{(eZ!zP^BXu1ft;1VzFNvxQzC!=2&B*cHukcR zbHD3)JYVAc&g@G>zsAfZ%-K=UoA*7}E0gjEMguSSW9(}o7yXIr@qCHx;J(DfBZMBl zFRlDi{_z-}$)Z?);z&an+ucdYOY)hE^V#$l<_6am)`9pyJ^9>4J?QV@y2<&?^^fyA zCFQFqk-v}Y8~cj>fqla@hJAwALS~I=5dCU%AIw?$3-%4uQGP%D2K$J8&OYHair|-BFSsj7Opoku0wn3dP9s@Mm>41#B!%0<*fzvY>dCXFxRlRo8`DtswBy7z$cz)(!Zj!<`e!{(T#Hx_;x+Z; zxl@DK(yA>?KUG_}Cb2INESQAFfG%%k)3bk97E5 zZ__WG!hUH&zi>Tby(vDA!FKjbr?6i-pa6!uA{ zuunRnPuK@d=mWJog?{Irg?-W~?2}ID6P{;j8`&nVOQwGGJ=dkvwNcx|b&2*P##T?B zSDmVj+9s|^tQ#@7dh+~=V}GhPYMZzwW$K1s=eVD$joK!zQn5k|@#8wB z?bG)Bzl`g(6z?W3y+GboWL&RXfNxQZm|9;3{<31k)LI6-Ofh0=l>wJ2Mog{cz{?dQ zrq->%w<<3KIS})vjiLXKIVMIuz~MmE>H{`_&(-B#jt_zV=ht* z8~8rvV#Tn5?_(}e3>)}9=FPzR{TpM8<;n*A{wUhBh z!%4-k!EjPBY%rWu3>yq56~hL@NyV_ia8faBFq{O=@F9mbWcZLn8{~V)hY;6mV;I+K zQyAAP2;+Li1N?lCZ?!iE64$FDkhor3gRHn-54+A9jPDv9??+tcZs5BWKk7R70N0v9_Cdd9cCe^aalN(%S#iD4jtv;&4Lar+&-;PzSB&v|0QdpL7|#cRA5@I- zd$NG^Y;X`fW@J35FmiJsalI-6iR-mB$cpO)PHn;% zZ_+Wxcs>gJsA7y~030aBc*cNZiZPyX;J9LpXE|`WVvOfz;LVCLo{s@P25fW6<~2EG z^O~Hpc}-5)ye6k?UXxQcugNK!*W{G)+U8WQY_K_%D;tzk@;&6nFs|38Fs@e+#`TH^ z64z^UfOCE0rzEb|)*vgc7dTacF|N=t$9Qf5-l7=exfOV;VvOfD;BAUAp4)-9E5>*} z4*a-cjOPyE9f~oYPXIpwY;(%yH92MTnw+wEO-|XoCZ}v(lT$XY$tj!HlF_Ug?>undQ}7x*K2E#71s-#+KDmVsbh}u{0i__ z6k|NU3j9^Y7|*W(e@!vQ^GV<*6=OV~0)9#{#`9_5rxjy7p8lF_quGi+^ z?a)t2T(7M`R$MP|>RF8OvpVJ&&*y-jQ;hN41-wf!#`AgL=M`f-D}gH&V?4hO{B^|` z&liASP>k{X2Jkn4ZBE&|CZ}v(lT$XY$tj!HtmYxD>1EMjTrZwuHHF_5Gp-lUv6{l~iW%37=U7eQ zcg2kB#dEBt@VjEh_2M~JQ}|sm<9hKNt10}hm~p*$j@5+kiuK$gmp16RMJ{d7b1cU7 zT7kateko(YxL(}zdv6G4TrckVy*C9jt{3BOicY>q9-0yq85j+u?`+e_!37!PZ{l51Pg0BPSe&74Y=9mHF@D0Ds z>1%;)PLBt+IXwZ`=5!R;=JZ5ho70nkZBAbYten=E0lBn6V+Q2X28l!Q8PBmUmUBsR ziu+veLm6{&iu--`h#cSX!UISa%F?fsa)BhoJxg$ zO5%E*S8`(D3nPG#{LW1hup;4EGPXYm?1i`T$eyavwVHL&v9=2Wh1usM}089Lu{(Fge9@EbnT;*^`&7ZUN zU>th*EvjZ1@)>_@I3(M_biB`bASwB&5lCmdcptW74&DjWeqlY-gZ?4jpD~PhU$!SD z}wTJOsW z_#QjkQ-k(UpS)lEBjNq>U%cay@2lSyzTfwcA9=4pU&nF&D^mZ%zmxjQ&y)Hezf|gf zs^4V=)YtK%KI^~v^|wsDcm2cr#$P;A@Sn!6CrZ9#^mTmphV{2)|0^NCtI6-M{YvGe>Z|?fu>Q8}{{(zgK2iTn{jaujT-)k*wfRi;s;z1ReHg!4 zWck$e)@x4PbgVhkppg&id(CV|daUbckX;&QS z@G-lSlAqKygY7bFhUD*={KT3;DbcB-M45fOpVp^q1lw-b2+^&kPJ`&1b_|Do!)RYP zr`UG(=U%J{T9?}9NF9}ww2^+6eO`lgp6f=Daq5-TvF;jV;`#jufe)+{B~&D>H1Lpe2ULK z@UeD$#Bq;oETkVMck9sZ`N+59AP&20N#Q~r1GJCt@wd$9zYWj1l%hTGd#&=rIZ%iD zbDb!L4&xl!YXklN%6M`4E5(Q(h}ef!@ISPv9yS#_uh!ndGKhgVeAw{vxeMlEyY2-4 zbO&P%4vtm*lg#_0u#bMdoAaf25^tVg{EBiLcF~`8Li+P>Huj^RGjkI5(2r))(N67$ zw4EOMm3r2X^p-IeKe6uP!0((c<|6en_k~`|IEkNF_n(ja)5T+?p7949k5kmc?;kiG z4&W%d4Z<8yb+Lo9&d`1YUkvt({bT)*=#EEy9&FFls8;Khrd z9ABZnFu52!TqDogOA$|QDPqfg8S=}JOoxv;R3!OzWk@eW`mK<+BJLdRs#^SFFXlt< z@Cu}_K>kv{CWuOzdP?LSS%th+$h*b=Nnm{P;-B^^6P=@XAny+3ec3+{Fb(=2+=;Y1 zagSwwZNPb1v-r2Y_)Sn8`FyUH_;`&8iH+BoU^;xkfu!W8Bu-vqLSp0{OVD?594Z&z zDn3&54@sLF6O4V3lCpJ_sOzxwLt}#9v#XC~e;HpXy7kg0umj(}NXdFOvM$_JbeVZ~Ue%^E?ipZ~Us1&uULp>Z+q`*&g+;N%(%2FB9FY z_Am|Qvf88mwF+y1@vl;@ReR!6_O#=n{OSRosNb!`T50^Q=w{j9 zAa!Qhul_g5X8`!mHQ?DAaIefy1^3|k9$YW?e~R~-!{;477eMb`DbjxL)c1g^5`}{v}~A{o+H&dkA@}{KLUUTyI2u zxBGu-^ymkwXdm=8A#W4(?(pl)_u4YY8WScCLg=Haxqd@e=2&Bb@egE9f&Ykmka-q{ zjntF5bu7W}*QNc93C`KfGccb&;9I3X8WWte(*DK-=V<2Hm(Lr}lm0lC;9QmVHzwGh z)H6PU9D|;1yKQUcSr_(E&$ivRHS>%M`{-+J+ihDj&$gT!(6eo~ZOuH>(mv?fw$mTm z_GX@E$$RM9_S^Ov9|QYryY*XZ+g^?_IcVE$+o}C#+iTmWoTH!DZ>i1tu^PN_Bra0` z?gZe@YX2?7W-)1dq9Uu}$crQIPX9OFl&=T^w;gRTm5y4&nYOk1AN*6Zm#wJ zHrR>lo#58JenZAz*P&mi_f_P56?ymhzYU(m^^>?>=l^}iU)Q4_sP{DTo<@D^{r??2 zi|c2hx501B`0Etxr61mfyj{q<-~Xqe64#Zue!%~ijK8j76P&B+uaCk;>d72xOmMDB`;R3!H`QOWPw9uHKN=I9 zo6`Qq1m~jqYxW`aq(2%HoRiZ2V+r=B`fK(f{jhDjZL9ig_969b+ihFbU(-JNSKD^m zR`u7kk9xN4wyo-~jeXFwZKuDs?NxtG8|klY`)zyauW6rcw{55Ksj$no*S632PV|>; zr)?MgwBZ_JsM1$!9Mu~yMGRN$2O?Yt7{8)LoV{YCo0u(#YvJ^f_)3gNvGmW~$V2|I zph(gVMMc&v#kcZH|IGCs_CGS5?96)c4LmtnCOU_2fzFo?y52_rKZ3ZV)h9)oc&TNt zyOBry-`9r#7i~b?mNhb ze>d?`=Y8LeaE=y9+JUIZ)I5}BTn)}q#;stR5id1j;-!ktq505ZtPRfHxTMu3MVc6+ zi+g>=X8Ul9a)Gp#!BE^W}H14gLM&erV++VRhu0t zQ&S}JfNeA5rLrvIrAGE5hC@_zQe`3!#YNWHZD0%wwiEGEBPL#|=w!7eC292yB9GW@ zUjRbyERy@5jjL(WUTrcLI3>Z&|?M1xQh{j7@j{H@>LQECkiJ5q*)0*R@ z-hr4(<-QwZTqQF$u#<5tZbg|p{@V36`(BJOmhME1q{sZi7~@y4elyn)1L8L5-TAKT zRru$}7(0n|m^{RLV2r0Vh&!{zzc9wVDB~h=K5`vktOrvM<4z#f!y?qf`N(x(F|Il9 zIByv<$<#@`Fn(Sc;$LvyE`c8B7w0EqQ?Ne7OO0r})LRiBgY)xd=yA?*9x^5c>q~Op z!A9D&0`W6A9~oc59s|ZpHFcsdHD0RPZ`;c_6sAt}9pg~g_A`!*+ROM8tPk;0BaA;` z+s}9ttOM~$7dOZDULc)>V!6X}namHOsb;!-$s} z;j;*1Af3`SyS^5-(H54c&!8V41bnJ^sl$+NVx7>BPnPHM6&SxYzjnQ?{>8B(N!t?@ zY2u~c0}gNVzZm0hDx5Q!xJ06}Zzam#^LMVd-R}^KOImeOq=}b$FSz}<-zmnpM6`=> zQ;Z)MotjlBfA9Zty&Zn%Sc9aQc&Y2a?I-*$F_uBRRPp1;=W{yO7tTq})gnpT9TjQf zrLF>pIbZ3=*=EE`6+bRId${)KoQ+FbRZ^shmwE@d&3Q{d&Nd@ns`zoyspi_F^S43L zOuW=P!FA4K`f(Y9ADS^?j5}h)OEtdPkuvRW2mLhLfq1FLPnSvgcDI9mn(aWm)QE|f z+Rk<`UaIMT*lpsao~7ON&n%C4sm4E_t=;s~Y&YViMohfacG}H&scbLerHX&wxAoxd z7_)W%=z2T-t}#0vHlJfQfa72ByBR-i#tHUnywp2TZo@yj-dFvI_;0Jp#9 z_cVT+_v75sn5nB#?t%Z~dQbXAF*|1JL*VsOejnq{c`ugHn5lQ7%tQaz^`7>NVU=Hb7Dr2VJg>oDJ)AgS9`^Q?wOyx6?t|9dI=AI~T$4p&=`NDOC zYn}Q6`h4CCF;g=>{~pW>`U0+bI_K&0StnwqW_(*e5Hht=PyZk}1Jn(Qb%9>{p ztR-clb9fWV2YLCC=l#L4xTMu5MIN~uM~BZRTEpvF#k_@VTDjxB;Bw94?Zy15uGFu5UVj8P|^G5i`}S zCuO2@XamYW@b5C0dH*c!=6b^Nh?y$S`%?al{m*rT?M2Mg9*mh9$9Qe+m>>CuKf*kJ z?|^?L_jiK(e8y)R5i>Qy_;lp-S24Ea=94Gf2+KYVj&t337V8F|?af%gCXJc8E3B^) z^|Aa5D8Ckb*nD2TiJ8j!&D_B={=G=AxG3Md4|3}z`5x=d#7spwKKHYJqmO55+QB)d zF;gE3u~{ENo|*X(`q<6zv78?jm>=Z%)(^2qfZYDC5;N7*&H039Yu2>`^MHPKC)Nb| zkFSR1pG0}C3A8=Q=PB6E`kqF8TwA&RaIK_|;M&Kz&b5K{BW9{u8%*5Cu)dR_9h`R> zGu711IRe}5cF_*I9o!R`deP_H6SQns%XV=Zd*ypkDPK*A`u0D7^e6k~dk;cB-7nw7 zOy&H}>`g>pVy0rwJ_NmIugLexr2K)0fp=Y%?`?#vye8koOl3Q`H!(3&p~rV!7&Dd6 zWL$sZNW<@syOWZaXZDe)~;$hZ#eq3aDXVy2oH-7JTDw`yBd>Z_tWRol4kux*H$D*x_-=(lQHnUrtU zHm*Nx8)BxKnB8$HpVc;|;ohemC$2$eoX{2%Ggb6kwIwBWpLU$M7O@`0Of~Vl8$`cV zTbO>TapIc9zCg@WvnD-)@q1xp{%K>TvfYT8YT|gGDQ2n}E7;ZEn5nGW%t83{_QXtO z{YD?_QCl%nIo~-y`Rs4*!TAN>ex{hIrv7kEvSV+yjJ3)2i1i|7s)^y`qB68jhRaO5i`}q@;+0{RMw4{ zsV0uM#ushbMr{+l$CQWgE3kT&FVigPR&-l<9UG?U<>oA2Cx+Oz$(rOl94O znQG#Aw;aco1?~%+iIJI?=X6F|=TGEY zloPPwQs7G!!-g)vT@=HH%YZLa3>&%vcU24JuX{dPCSu%SC}cVLYzz<8y( zvf)=dxc{DbB$qa5tO>?UEk@rI>v;9eb4q|q6eC_~DR8M`#4GIw+)pv$mG%ejuNd)4 z2LKOHjCiF3fd?u^ywWRxuK>>Q0X7&;>UhBh!%4-k!EjPBY%rWu3>yq56~hL@NyV_i za8faBFq{O=@F9mbWcZLn8zk1m$Jibn4`Ze_1(VFTk|%@fjC5j?jdWuYGu4Yp%+$hI zR?O5-TxSr*caV=7)@Rf>BxXxhU!HUzaa~1Gaiu3ZFA;3cv=jS_D17EGUAm14Z zJXG;{`OY=K*8peYF~J6#(W~vjL7jkO8kzPz+^0Uo0m<2e?1EU?Wfo7d!& z&1-VX<~2EG^O~Hpc}-5)ye6k?UXxSGYnxNKvccw5u53_F#Y0ZvBQ3Bb~xsuz=(sfDqun5p2@IE?W)9dnH5wZPXZ#(0hg9PIRD#mzD1D>WB<2fC8x?+sy4B#1xF`hoK4{USF<~2EG^O~Hp zc}-5)ye6k?UXxQcugNK!*W{G)+U8WQY_K_%D;tzkDZ`cHVa(K~FlOq>FlMR~I}~zC zVy1dAiJ4j$%Zix_PR+y^&(tx;c-{bfgJO*5EZ|v+F`hR9->4YlIU9JkVvOe;;5mvh zo^yfcD#m!;1bh>)%_*DLd7!>suMdBa_Vg(y_m#IEsSNwOa-UrVT|YLm}5NW1J75C@mv7B zKrzO1A@D-Q7|%t(ixgu#7XvR=jPYCoyhJg^^Jd_ifo)FNye6k?UXxQcugNK!*W{GV zYjVowH92MTnw(Nz+nmak4K}B8Wy9w^W@-eS@Gw>z~X3SKc8F<$TX3SKc8F<$VX3SKc8F*8h&n*}$v@_D028a%0rt%!C z=?cM&naXplrYi+AW-8CInywPen5jI+YPwo5W2W*PtLYlSjG4-Ftft|D88emVSWP3F zV`S#i20gdPr44$H#h9tZ=o@dk^c7>Ka?kJif*CWFdw%Z*!Hk*8J->IOV8%@4p5L1z zm@!ki=l5jvINH(V*1-3aI0Nb370o$C81KXS~2evuA8QA9ZW5CL3jmMNr8#EqME^Uw) zU!Ut(Gw-r?}tu?h;H+alh}~Ets6*e&1Uwn4IE%-@8vRImP|Hw_Y$g z#r?i_zhH8T`+e`hY))nI8e^WtYv3$i184CXIE&Z7S-b|$;x(}H+U8WQY_K_%D;tzk zar2CRJp8wR&hi|~;uQC}-orBHS8`(C+Va*F$X z?=ivT6!-hy7Qy5c_xs+qY))nI8e^WtYv3$i184CXIE&Z7S-b|$;x(}H+U8WQY_K_% zD;tzkpXnUS;uQC}-s3XnWKb6!-hy^Vyur;x)!Ri`T$eyavwVHEAI7r?}tu zzAc!X;(p(INiaFZ{l53zY))nI8e^WtYv3$i184CXIE&Z7S-b|$;x(}H+U8WQY_K_% zD;tzkpXnUS;?z8j@yjyivUrU#&*C+37O#P`cnzGzYv3$i184CXSb1%8DpxkxoXV9A z8Y@+$i5v1NVhx1fi;o(&X_JW+^6HDdoc$8VX%phaMaFLQ9)&FSU+|cYcX$sKNq${` z^g%cO#H)a0Tsad<?@Sx1{h>V}Vq@EU`eScdGWJr2J{eOJjp1(Z41J zl<2o=PlMFosy!@k<_`K*W2TY|6Uw_3lKWGi>Q-pS#Ut-;{MOJy@_ykDMwnQAX1vj# z8Z(t#=zYC#f55`u7nqo-c)5si^T@-iR|@|}KiH`-6EhY4z&H)O_rxu7@9EEXE9Cv= zyd?Kq{NoWupYy*8?WI2N)p!0!1+4d)`;QwQU-w)WQ|_i84K?~~Up3mtm<&?BNNlq#5;oh zj4Q(WN8c~?PuwQ;Pk&D8pYz==g&K#1<3)Ye-~IZxOubjEeBbzsv0Hv`{Dc2eC!??9 z!#E>nvi}wEQ^$k)XR=@8laPmuJ8~xbHBJfbH}Rs*V86yIsY3rV2FaQ1*O(>M=zr>; z$$stoGuds{4A^|8{?@+MKBN!hH;X0)$}d~5Id#9pnsZdXK_M3zmzuUP4mH!^^Y;`< zzQj!ZCDs^8-?y{-i9u1xtEPnRe#Vj1dgy14{t|17=+rQe*U&q_>qiljbkkxhX6i4& zL9QXt!5UJ@wlZEeeKzf4I(*FTq~tfpOhvoQnj!gnDlYohK`GIxqC}Z}yr0&mYXsYF z)(Fw9rcQ(Cn|9m}`!>+Na8A{u?d(s+kkY!;Hb?5Hq@<1Xv+VO4tY=&|iX=Zt+jtF~ zJyA)kvTf6KB8Byf<*1|fmx+EgCDO8N)ODc_&j2Q-pXi>k4>K{Lu=bl6ep2@t`%wLV z4c2|*w?o@b*N5uoQ+)10yaqdF>ZVVvn5kU1k#EO5-SE#=%v7|mWz1COXJV$(9{9bM z<*ULuP>1`s88bCqT#kQu!f6#Z6?U~dZYpe~Z*LVh6}Hi*W#XfT9ATf(muAwzH_qca z+%s*bqYa$b^{gN1r;VG6`?M==D(?#))iPFTCT=Rn0{N$lS(=HP$}u@bJ^TiO{*4@I zSKL(C)6y30htt`h{c%olQ{Mv@o8);rf|y?s!~^RI*%OlK@KcA1B)_f*=|xEI1K9_$ z!Hl1M4{=jP?{G2Fi;*AkM+H$SQ%{LHj+7#=6nQ=TF%mcRJ;Y5FoumDc*B^O3{c!=) zp#Q-@qz%M9dimoeZt8o#mAI+M=d*QZ{X^2`#zbeoe~^;0b(E;-cG zNdFv5T;>nX#NDexKbg3DQm&Q#j+8x6B>AoF*JlFsn;n0z5`N$KRwSL4e|14i7x>@aE8p>t0M}2G+)&b*VrCh7_#HH+M$3uN?HP%Msb45SP{*=^x%JER& zTZ#43_+HV?vcEy<%(7p7aFWjf@Ueryk3ryGksl2P<9aZzd;60zarmnEYyiC>$Qy#Z zKK|rjD6WTsn|=K$nK*pa=nv`*L*6jx75meI5x5?K>k@y4iDgDVkc7STi=&V?3VEge z%wPafPgG=86nRnP4fN+p+|&;dH&ycXO+wxz z@bLZHj1laV(WTHB*o`1$kHcOM(VTJ1`At(~vgUUn+4^ zKm4u4O+`N5)2);_(wN|!E0VO`QIScRBaI2pIX-`yb@W4U6VIMyqO(Wl&anjNVqDUy zk|OuX+-Xd3F0#D#56h=Sr&{``F~K?7AZh!hZyFPvo0;ci^pEl1qSJo6L%&^*ezN^` z4b~Ckccs2|w?qAQit7pdc6-~Qe!C9q2mP&cmhDh~U5|B`<;%{}ZuQqSSbyoS+iSP_ z>lB|;P;LhJGy~kc%KvgO6W24rts#C{#$VT=U#K?=d9#prwSQ|c8`raOJ=9;3@z?d} z2kOm5-dxmojlU|GhwFLJ8|L4U@z*KXOFz5-c?*y?+`ltegzH7P9^tRa`0F~_2fZc8 zTY~yV`u7-rEpzEu!ua;^nYJD_(htjAYE00#%iL*9aIWh4#ZlNuJ()v|3C>k%|FHz; zruu94DgChYM`MC>Q`+B{;9OLH%|4`_^haZYb5h!WEW!R%f6YFmAGU3`ZB>8GKBS&) zyKSrbYuZQuYTItxs{WexQO~yBwpIPLu@8E-?ey2Sz3Q)NBmK2)ziqGaDX`DB+qTpA z4A^DcYujg@&Cy@Bowi-(*_>Q!5jQo@`PRYP-cBCu9WOk10j`~?durd>=KTkLzB_JY zhfo%Ua)@($QF?Ja4TZ(q7jIcyzW9kg&Ys!==f&D$XG?9Lg2IC*us14m%HI7rFV9&{ znV;t*@$V>Xc3wo>nzw4Zmi(x^t8=ur>-Jl=+_L?aiay(xR&@Qsp7N!|OUsu!e^;C5 zeDLF?+m~*|U&XTYqVhQIwWzCia8cJqm5b8eLF{Xt^rA(TlwIOqICviHUR1uPym))@ zmf{6-cNK3hsThuX#236%9v`;9d?n5b_1|%5>)&wb-|?kg{W})#DlRLFmn>htxo?M; zcJ=MhjQd>j(yl(2G~?dM(p{xrdUR#DT%hSuBNT<_zQ`1wQK-rVar%v4&cj{2z zFY)&m_EosG(jC-iIX~^2a-~leR}{~fu@iDjvG4C>dYEtOo0*Xc_%?SL}UjTRc(Q9mK`h z?KnwF>=iRHVu}Rsi%Nbb=1ZC28cOINF#rAru6e!|hr}Ky6Z0b}cuz|5Gci0G1Xoi+ ze?Q|lIFeRL$vTPzCn1iac4g-wvtPq7h9FHcKld!O5?@aabd|>jRR}P zdnKnd&Z`}tll5p!PCNc4>(Q8-c6?3Nqj5Fu7@E}A*qL^WO!^KxhAjJBW5(LCVaYFz z1#8EAC1*9(s~zu=^_(icBkMV3JV&ITDt;sNPZb-J{_68+H+lVev|Bk&era4xI}YY1 zq-(rOJH91j^l2PRJAP$baR*sfTX6=-<+kDqo+*wXZL;GAlEWG^(~gnJcySsF(~f;< z$GEg(TiUVh$Xh#3U(1+wEo0bG-;OVMrdV~HU&l`qKag|d53Nm{H-Bht(m7?v32Ye` zux0$emht|$KDbs~zn1a*+KQ#tR_wHYa{3?k-p-FOhE})t@*|8d)$MQcBaAE6?Kk<6 zGsQ-0D;Am^OR#0^z?LxrTgC)5ytiWiwT$_9s`y-O#p60t{H-&^+iDqSt7VKW&c**p z?d3fEpVVH*dKZK5xjA^J346H|CH7{Scl(M2A2#RUufEtTDteiB_sRqxrR490<~;mW zoR{JGERHn(Mo5W$R_6V!q~Jr0xB0R8P4>4&KPCE^_p=%V*HfbW5j}h6`+epNSn_Ke z$nTnB(ARdRp&yt0)7n2L{0+Ps?WKMyte+C~ z9c~buv7cwRw5Rgr&AoUZzJJI5;`?*#2fhcl5NEnP3*$RP#`aK`@BEm)g>DtjOR19# zb@=`k>*IS_)ZzP9ui(7$O!a(%Gs83K@XYQ^I^+`1EN}*vIm6I%h{@A7ZOVG*2k&N> zGXqF-W+1q#Nbo+kgYUxdT@&bK-sRzYCcrh6JUd`J`3{WX8Pd!d0wnr9^G;4u@E+wE z-~XY0O7t`D<1`4arbPMu%Cj2$eq?wi`I+}=iUcPqp}$9YmilGLGs&-tO8&kw!I^hu zsITobJd^y&xahaCUwOv%8lH*%Y3)~@@qHiOhwtyOzxY1R$Lu$}U!y$Z`w-@AHPq#M z9;R=ht2|4EI(%P+_0@+uN1>xUYpWjRSz9{Fv$k}yeAavLS@>Pq_$)}{vjo=_2^OFA z9=IgwnRg({1Rtf$_^kIrE+NhMEJ%!P=6Ca?;AWo{z6X&K{mgIU4T9?_QNGz{;cVM* zN%CtP$0{OxB4#*|GH=tbN@72ahL#n>hL#lp|q{;!;t$QO52X&H&T74yynAjo}5Bg-^=E2iDtfGEZ8Rg#%Jam zxT1Z;-(bvq3w8M$3U&GWi1rbG>!J>S%QEvV)Zy_vl&m8vxQdc>lnJh;WF357 zj!Rl4CDP8?4&6s`FBrMjjg0c|4I+p)9U1F-qy2S34_x=0;Cf^H4FS(iimrDfWBmt$ zo=7jc&h^Im4+Xt&-Dj%nUF&ZQc*awFgBuy|ZwiW#UhKQx1V0FR`QkH2MKhU;!4U5~$A^Gtwk z=5Nrx9ZDr%{H?yc&{S-8xHJ?Zb)aF*%z$856)277u}K0_OmZoXEEZ~voE-R z`@GmnE`45XRn8Wp-v{07dQ<$JhEs#Vsj2=~gA$|W0xzYclU;xrb ztaZKFe$v!43iZtKUknB!ebjo_o9pi}`eUGflmDII3Z##D(DnH5(ev4t^NaI>bA)|f z3Vv~paIWbbA;0>AUz{VHYdS~BuYuqf=LqMW&Jor#2=#D|aNcRZu%5xFhyB7isQtov zhM*qy3+JHr3-yOWpZ&r)sQp4Ne;(~7uRo7=E63?i$AMq-{k_2;jK{b~TyKHDFBpgP z@#Su0q5n#7CDO;oT#x^L{CK2Kdfbih--{oN^yp)*$A2$Aiu5T@xe@;R@bu#|z)jXY z6Lqt$Sr5Z!L(YEGjZk+k>ZYHbhq*xg1)=_;P=5*ZuR{Hkwz=LA$jMK@2SZNznw$M^ z?}YWzR{ghj$h~3U9_JY6u+FhzNFQ;Z%rVYkons@AJ_hx0j&Tm_92e_Hv%`%=~{!d+jrWGhOI zKT;<6C?&9*nU4<7pO9wGpCGZPI*=5cq6BU}e;OU0WlDZsO7agk2(G7ue)IX$=AXT6ksv3BK$p5?n(hE%ozeC#&LI2aFPAEMTMu33St``23w7#4ouklEp4GHbkMgW79pzbDI$1tz zF}SoQ^jVPL#GxX=b(Fy3vlfF(l3q`V^dn`0k5U4Q&srRE32DY>L1L^9Bn77^ft!8S z;*d*{Uzd{n!wrJ#DWTu&vlfS3lKdJ+@(&aVPEoRc zg?1V)Nq$XS@>|)jT&hNU4VOg!wDv2PD#2}XDGAP!OI6^aIWxzcQZ7|uzK~1FP`4`7 zt%k00DH-Zig*w%tP7QRFOKsJoTxv^4xzv`9&85_8X>&$$0I8RQY zn>jPbn3?&8u`u%ueQf3%xT1Yj10I<97V6f8y7kc2K1zi;b)imusB;uL+DC2GqkYtt zj`mSoI{$w@Gv_xB{5GMO-y~edf4d6ua{hZy-!b1Wl;Zn^4)~VfOy574{Al<+#OzR( zh4NnKxY_IAtf|koeii+C^xM_H7rra#g>M90H(rvTzqxNud{@x38Jq72df+>P9?fa} zpo@RAb94X8@r^-Wysck^{37IEcwxNcqKh^+zt`VyS7|@wZ!WDUMP3|vdVkmH{v8w)JdyggVx}YItub9okXo!YsFbQikh0Nj-s}3nGidn*HwQakvUvRx z+L=8)QcF>9)?4lM)9O1vweReCTusS6To8}YloUf#Qeu`^Tu4nx$$!?ADD3~$lqf7$ zQ?h21i-+h+?BC-$=)nKI*7hpM>Qr|@9_9s6F)s*HP|tT4(DkWu$IA7=wY~PKj)Gyd z+tI#mJ?cYK3l6LUqP^%qcHWCk_%K%&7OYg46d@PmH?c{-s?aSdGEBm=h>scy4a+SVED?f8pjKx!FES_?ht27o*ImT5Q zd#0S?su+u>(%3WQB3EgBN977vZxa5_RWTM%HK=}zt0RPWxOx)b`%muU>=f|ycbuID zp8bKdGr;pdads9s@C#?>fR_$&b{=^7C}$UdS5I&@0K9&Nvx~q#E^u}U_}3t3gTNct zIJ*qI`6p*rfWv=tb`@w~F1c&K5tHV|%+BfY8Mw1Ays^r@u(xgb5)G*Q~RmDovZX)g|Ul{4=DIGzU3CbhTzu_{2GE^ z;~=;AH3Ywg;MWlR8t1shuOav~z;7RpqpzE@!@wh3IXeRE-@(~Y;PGD0`hmpnF(C1K z97z100205%Kk-Za6Tiei@k{&@zr;WBOZ*eR#6R&%{1d+$tp80F9eiB=wrse|-&b3n z@;8Y0SDI7iarwJFc$YsDYpp-&*WY!#12Ojg(a%pZ<{&&vpX(kYr_31(k`E(&>cxZed6Kf?i@_$4>HQ1k9pX9CFG~vTucMbXb`!?cT#m0_rrT0mX z`+hxX0lm9n?bkxWLK`b0#9FV<5MqtjJVLDP$_cTiYcb(O8!I84jLya92YaHAXO(R_jOXhoU7!aHM8-J7%K%x^r$UGr9k zBf8)1jRq>B-g4bn*;wZG`Soa2^OjF1Mjnx@^O9G@*dmL_QBSH(;)M(gwcL}4;fati zcZd@ol&#K0GNUtRWJRKFMVvXC679~I))r&pFWEC`=1eCeT1I2ejJ`Z$d28|JFwA;u zIvGl7kqFKX?0RE+Q>e8$K|PL^LskzUF|H>NjO}E!Mqmu~Y$=N@^wxra)|}DPzB22) zj&lNM|LXu}ON50QmM*B9|2i%D`#&V&8*Jm*Es`tZR#C=}y-df(FBS3b zF&%V^?T5$MWxT}Zhs3{7m-W~sw!2kg;S^&aj1X64u<`M{>;Rq^S0Od09Kp9YdOh7I% z`e6De2QUgM60~D;6QdLlqtxWNyy2T=_>M60GczpQ&)~jfpHKtK7lsK8o7n|+GA}L> z;of|}Wd;uutI+1Cz;~jXr}QZBF)?y(-Ztx&=;k|DHTXpSzg@7Of#LXmEo_=vgVoGXJvHm1(WnR5v z2L1B=#N^cJ2SpiGgnTmdvQvw!6dZkB6nuPK6pW0ftBWz}M_DE&Cz%)%V21&quL4K0n0OwBEg zOij$q(kv6rEiIFiQ_WH=Qj=3s5|d2QQZ*+pWR#hHL5z`;=?KI2+hUAz%#1T87jg+t z=apiV;ey5R5{Bu;4sVq3;6(rm^UnCm|iT+Xv-8!PjW+{QstO`VdpnaD7dC54X$%?OzPAurVVr2O`Xu@zo#!S2_n&${>O4q!Er9s&~YMd1R?-%1i%d{cGhi-M=$zFVM!h4&`)vqR$yhwx!#n!u;1p zt_&0tDP!LbS!d&3F66jtCQP0=$8qlddik1VD>s%G+)}Y&ZN=)1_uue`O?Zw<*OH$FI`%(apMykD$18FzJ0^0$_m_Bwrm5^7H?c%zO>@YD^{;tQoi~t z1?!eC-&j%UJXWxA)s~9(UoE(;eD$V^#kZ_lyXSh{yCLebWWwp&Z*S0 zb1Jp+oJy@er&8BROFJHB} zV%cJT%KSYa5&lkpeBpB|m7#p~sugS7f9pNzEb>C_;es{gn-?!%U0%8P_H`R>3F^kJ z7ZpCd)|RiScx~X~g)@efmCH(s7Y`~L zwz+6X$;9s#tlzNew(`n~lnSjnI$tf=xMKZ=#Y@+1T3dPZR|+=Xv1ZA-)vpzuWfKLr zEIVAVY3-`z>o%;>dM|EOJoL?iO&cpVE?#yAv|qJ!@y5#XrMG+^jy=^by7o5(x0l~m zv3$cij35Uh_sN3R?yujlj-T@DHY{GVZdpa>Yr%6tB}TQP@;g8)mae@Df`52mxaz7E z%Qp`y8d4|^5Vuuq*tlxl+C3q88o^UW`aeGRbPVGuM4sb<+k+r4!IM`!cpfVa(wSfW zy5Rag0@O~CJ?59ax8~1(ER1XZF^&J;)A(7Av2&LBVH4Bv8U3%HW}Vs~Z#;NjJz3GC z7dy^a=fbC-zQl3btL`Vua@J|*mBtT-lA&G6WHN+@`af-UoVVixoIh1TRwjEsa0RB) z#$<2YcOd&d&;bv|oog;?iaW;}6OL1|$$7L~+;EZadd;+o@?Tu6ygW z0k61uj?+8){OQ%I{j`qf*Hv{qKYIr2aL-e2R=e}vYL%_iLren>LuO??ihASF@mJ7k zYh&l?i<-tc+nVC;(b(Hg`732$24%TxWVSN0UAB?!+Z%b_`9``IhOjL7ViU`RKV{O+ z?_lHw9YfAV$!I)=enz)Pw??b?$D_Tr#;K!&Hhadio087Y8%I8Icx=)+(Ac49_H9Y$ zYmNK9^X7@9^L32fg%>8BziCuiPAKUdY;>Gta;7`RztA1!N6}^|JN~5R@#OuDFKt(U zLNmv->=4M@Ko$nFG>|g`Sr*9kfs6<8zChLo@?apJ4&=|BV;nQ~IojFuHBYYrO~3kH z@>pHpOQLnrva)zyd3jCm4v*FK?$C;RUHn*GuZvr8&-(nj{7*fGzt*&B=ZB4P=UM#y zs4?mM7=3r*J*)fd>XYnK=M;vVp4Ax7WVFudh_USxt;Th=b04lzrVM4uljWMXlarHo zq3ntB$&+`-ou8n-=kWJa{5_Aq7x4EY{{9|+KWm(toT|AzEjbNkl1bp*Xx5n)pSHVK zGQSRnb9&b2)#c~q$1lqNa&>-v{yq8e{GH6tIp8HdT`vE=Xl-=bwB3+fqn@{$>A`(n z-}K~koo{;mbm;V0pvYgQn=SW>jnQjaL567I>y1BSDj;8 zHV5+UK(-GZlQI#f>k+3%2=4fTBN4a+3^#n!h@x)5-FhN@-78S%zsaxdo4pr7NWGhs?qRo~0N zxy!(x?Cf}6yLL6#b$YDsx=yXQZv^)v!0p?|^Ugc3=DM!9-?bHYzA3sZ+B3R4it+4^ zaba6Y=gW=!fJnyk_Gy_P-RCH9ENA~cF8b_@3pvT0!}svRWP~&Im+r05)IHx<@LTQ> zezxQtaXrV$=RNc{?q>Tg@AYw<#Kk+B@~iz_*`=5NfqkGa90A-uz-R{LPBf<72e$7& z11w_`cU}UA|Dmz|(%E(OUFOs!FP&RQy^>w#X^Hu=D;clY$GMZN7#gSX%pA%ya|rx) zIyp{WMMXS+=+Jnza~SnKiNEh){(V=^zY2|?LfZH6*NDF+{5_4oXYluZ{QaPDbaJ%j z@tEWo&0}EA`7!Y^yT>NS>U)qntu$Gx^OV+?>UnS}>XCUcJ~>|B8=p5G^Wdq*QOQwy z9zg1OU~n=N;Ovw6i=oH_TkZT>V~+DG#`<;G<&8#dW9LvK+*Pu^XI@=>&-}Wayl++K=l=GW@` z;5_^B4)fr89QyJO%Dr<9dND3Yhrf6RePf$;m;UX|?o~ec_kI9}gQFN63-(e zo{q&w{1AUX!QTt``#Ju8iN9Cz_hu|MmU#~1`VjsOGXM8$LL2* z5ogH@^_RF^oQ`^&4(1wpsMN^AWkx;~!gxiIcOUZ338BwO$G9{wKjZ=ChdjvqkcS#D zcAZ#v$nEk(!|C%sj&YL7l+mp|v?p>P%e31Nnicul0h6a5 zd1ighq*MRvX9aEH`q=eBI@*2rSgQ_m-xd95Mdr?tN*7W3w)^z%XK4*k zP-f%-@~@co7XBd}JlmIRe3+mJ57G_kNFRW{#Nsk zx*ZI?r1{5o4}**3pY9*>kbg|4{^Xx-E8w3_5BMiKkbg%1J(_<~5BVqQXMqz zr^(OpWd4TGAN6=rxB5`WZk*5B?igE+r|uu}a6FkV^I5kQjHgZy##40QcpCj*(&H)h za6BcQ<0k0ax(}O-p{p^Ejw_`W@B&mmekaYGz(#5ZE%{XV$kG4;aPp+Ak z{HcCM!8(8EP(Gvdr7wE+Jg(rSUwwW2`kiO;3?-jG3m2)TYtQ4e zl<^0P7w)gl3ZAu-ZhJoSB`0FskMp@1&)-S+Z9cyycTRk7+Rt{rH_iDO=gPQWW`($p z4;_h|w+DR%#`A19;BJDmLQZ$!?t67hq2YxToNYLQXH>UV`yV z-5aAP}nI1 zE)*ONJA;4+3C;>TMZiUZ+l8IMz=MIghSKv2I@p{(4jpVxABPS$r;kGio72akgU#vV z(81>Pap+)k`Z#nDPS;?cBhxzkpeFM=gnzKD37=6uJ|4lPWBzqow)nHOJmFujr4yU2 zr5l^0rHakfGCMX8(h0Y6su(sd7MsJKLx6_}hCNGwO9aE7LxG11hCPP?4-*V~4hJ4C z81@_iJVG$+ITCmzu+1r(*W{GVYjVowH92MTnw+wEO-|XoCZ}v(lT*TLn^T$6!RAz^ zbP!I(HAjy5czDx(iW^c%_k@49mQJizOEkq|K>Ou<H za}4kp!La98;IV>X&r;x0!La8z;BkUs&+)+H1;d_K178hnbIRs5Ic4*joU(aMPT9OB zr)*x6Q#P;3DVx{il>)^-Qxjn031V~Db0Y9W!La8f;7NjE&&j}(1;d_GfTswCJ*NUs6%2b$1D+-r z_Vj=~V4G7mugNK!*W{GVYjVowH92MTnw+wEO-|XoCZ~kgHm5SBgUzW-=^&hXT65%> z|BRL`{`a*!;r~EOC-y@v-Pp5Qs@RXT%#QsS(&p53*m%0w9QM2h_!_~m=M3N(f?>~V zfv*(|d(H%&DH!&=4){93u;(n`S%P8D>w&KawmD_A=L63d40|pBULY9uTnM~SFzk5)@D0E= zr)*x6Q#P;3DVx{il+9~$%H}mWW%HVxvUyET39oHVWl9H|Q<>6X;f<>4plaQ7%2~L- zTE6e?S)I2l58vx%W53D8_qSR29yfZSi_bo@7sc;iPw?Kjkp79B1B^ zF!Gu`s#@&1=)AbUqxQY`f3l;suIGQkYg52AKO;Ems+QH}K`e0}ZYP<~~--AcMC8b04c^u)*7axsTN{#Nh3~ z+{bDex+k@7kx3n7-y)Mb2p{f)eWDl}HPwvO{lHxFE6?CPz+CgIYYct>m}`D@t-*VN zx#m~b8T=qH*Zk^wgX@5~=2vqJeh8Rrel@R^5BI@75u~*QoV*{HoNNg=xd)h>Yza8| z05Cb(5^!=aFge*0aPmQ5aPCZOz+CUE#RkWLx!zah23G@fy|0!UTm#JYzN#?zZeXtW z)rvjNgkT@5>fLY}r|$x`Iqd`6oQ?t8oQ?zAoUR78Ib8#6bNX&z;q;CPX744FI=q0- z1@5I~Qils)_z?E7E)4ns8*`njRvDXTMQdS6u= zOippVukJROoZ@<4Z8eyj;(A|gPv=w`uVM2vUIVA`8aR#Dz-hb&PUAIj8n1zc*EXjz zrGw3>Oz9w;`at_w7N@w*RreU1lT%#pt6c_@Q(W(>Pa8~5alNlTV=y_z^}f2_U~-D< zef5CBOq6aDX#a`L+PAK<27ua#%tg-UIVA`8aR#Dz-hb&PUAJO@Y?27rgX45 zl_?#BQ%?te>YVnmEKYHqt3GRNPEK*XuRdomImPw9`nRz z`|3*ulT%#ptH%r`r?}o%UrFaw8n0pVG+qOz@ftXd*T8AK22SHOa2l_Hh1WKxGNpsf zsZ8l0occifSQe*d)5eb*o0C&>h`(wuImLY}^)-XZsd>bIV=y_zeJu42gUKoGW2tW% zOipnhOMS~=a*F#{>f7m@O5-(bp2ln7G+qOz@ftXd*T8AK22SHOu<+XERHk&WIh83L z;9a@@&n6{WwHFwTRUd6{MhQw*(^W#{w~=pf9)5GvRQuK zH%hWuzUM?!zTZ4keo%!eKm2x6e%w}5e(FP}{B@6+@(aFg%J-i0N8Rq=O~-qjCw z*6m#Xt)gtcvt5a_PFF5en;_ZO+aTFjw#~j$clMRKv#->hePvzjE6ZO#=%p-{?=kM> zESB$c-RrvV1Ivx>LvJ;@kF7SkPrl#ie(e`KXS4hjLrnRelTG=4b4>X`D@^&}n@ss} z+fDhY51R7VeZ`dTI`Jjl?v)j<>#-iW_0M{Yy~jFc^PTfgdLQ}D{3pGSdS2q4R`Rmy z-DBF$wHI{F-re>1fY!7{FIO!%g!6Fik9^@poRiaex1@Mp`TN&3Pg%b9g8v%8F*9k4 zUamyBV<=aLau4bBg(>cwZ8@>W*_JCt{}KboUe#E|a<@w*Hv?7+GI z5YC3N&krGQM=$EX8(g;_FUBFKV>sXM*fX~S=g^tvW#7*%O?|?v3*MA7C1 zUx4cZT=(?G`-QkJ#C0!kf?tH|BINJwP4bIzU5xaoH^nc(bqTKXylMV0To1!_zBk<; zf$I^--^ZKbkHYmRr1$k^`W(kjz@1)fPIQLs44DI&1K9<#%Zo=7rhmV;iRe~%`*dL==Bn-d%#)8FO< z$H(-yIl=KU{XLrC_?Z4SCpbQ|?Z;Vv^6lfSzs!L#;K~?qs-HK@FU55!uKRnl{qeXS z58hqr&GjeXdIHi1c=P>9xSoXTtGtE&6kJcib%D3YpN8vc$Uo4#$)Aqv=}0g1%KRC) zo`LH@-V%Q%u4m%9$Xn*m!u2fVAM7pnXXAP{(u=*7{#;zo#q|*HW`91e=i|D>TkS8z z^+Mzy>aEq>H}j=A!TDw8OLKzr%gmRf3C=GwUz!t~U#6eU3C=Im&*lW@m+9xx1m~CO zXLEw{%k;B3!ErJDY))`oOh1n%I4-82%?XZ+>1T6-<6`>RoZz^aejZJ5T*qu%8D z$5C%NH}SsbG^1yEhAWA)79a3TrJj56-|1&7gR@jqg7Z^G?k_d6q0C5g-VxV$mup|H zk&iGBa6R)tn)8mQ$wQkur^(}lUiN{u`man`&N8R@Fw-Uhz`<$3mo zXO$eB&nYu`p6Z4?-N(9Wgjeayc^HK{b0@lyk=||o<0waccxFjKw>db^a}4)8UFAmT zlN9D5@8is2w>dc9L%lF=4V_SLrz>4WUq!u;c7S?89=r_qIuCReeODIBvtKx;*liBZ zHBm3*In;tSa)!7O`ZW7|PG^+O8SEq>?HxmY-5%|S zGnw5n4!KvmYLvIx?}6)HlU+61+k)r6R(m-(8?2e{PBo zv${DrH`NpO@~?GOsTW6#iheKj&2l5-yc&OAP#?~Vc5`s1su%M0z1~&hy;`3<>3^OZ zx!T+2pRnu0Gj4C>>p#y`6TBVzGv;#OE<;_#&munS%DB%#zfPbYa+se@&3!8zA)mV-o&6_oIZnOa32VM~MLNfVUg8Jxle}S*yI7g3%BjhH11@Gg0sPPr37ju0d z{SzQpilPdAARL7zgt6oW6WSIzSF`$_m8sgFK|{!#oC zeG2W|A3UV5qQ4a1MEeZ@zx13yy37G`rvTg`*XbLD7U73uWx+Yiz#R?f<3Q zrr*g_+w?(I;J1EW@y#;{Z8-wAyx#k=pKH<%l^S{YN{rcv+g&x=Yw-C@Q;V|lOjBm^ zJVhIif{o{RPxu3nf7G3>n(H0Veu?v+&oFcF4C5d_=4%7{UIbf_qihdp2iP9uL4FPi zPalVks4Jgk+FAao81^Jb>7RHX=dHDWqCTkO5PhTYwis=d+-i7Ae?8k zeGb5G+E2ru@%csCBVR-(yKVX)J=V}KiRYEGZEqHA#rEWx<#g@Q_vrEB=LnurMnSJJ z)ox^-_jUg&jLDdotLA$L@wpt-%g+n=nK8&W{vJ27!24Uj0QpMqcGW`fkoGS!{`4)S z$T#VJH*$mbcludk8aPASPlxUKd}3_3`@~ z7>A5LRQ%8|+~*jP`{d<#@RIhN0FIG6lUCt52Xe|9JhMYi1DEK7ro#s@|BN92%pm_P z-E>thkoayqz`@3Zt&YY4;_8u5j;N>V(cdD!a4+U(q~-dI7i>O z0_(b5NZfBAPuzeX*{~~p(q8(2;Ms}!kpBRAN4s_jxA(h-&-aT^-;@Vj#dQ^Z0H2lU zlS+{vXN30gvyOg?pJkjUOuq!r7G>tXey%wd*!l!)Oy5M^`RpXVi8iIL=@@=HM&HEy zh@;S*>nzrT`u1NMJ{IHWAm+O)9Rq*ppV$|~TQL6V6uLQqZn>w>jed%KM68DH z^i#~owr%}vJ?N*j9iS`yROu<&W%~P#3H=q@m1n22)3i%}rN=PvSA#Jg(;jjWEfhA! z+%E>6{yD5cAZH{vPC@;F-zq`+%r9Zx0Xgd{K{@!EVZgJ$>ME|S#76XA!;wDsZ?Fb| zoc~Q%jf7nIZC8b}R5tyW;YG*pd=~l*_T#%e6OQ~dqwVN&I9u@+->k zg(Dr_>Y2^Hb`1L367;oW(ASRWEB#nouAWNowtD`xbfj3f%W@vcl@s@ zU!7&j-*dSse}A4S|Jk9Y{L&+T)a`D3>zJaxcVBk7Zs&m^gR{u#Vx;lA_#u$&YY8O# z%C^~8>dx=!s5|>g-Pu>x#lEuqyl=dOdC$JStXO`_0VOzI$Kp|X00oMjlBWn!=1A*TYkLNMa#EC*gL81Zyg0Iv{?cseVA zR|-ZvomIfA1S6i#&A>McMm(KcfNud#@c}w$PKv#tgXW}Q=%6_%7&>T93Wg4vlY*gx z=A>ZgpgAcRI%rM;r}&UT9a4PApbq9N)Mmu=*%HL`sS0BH_(4pcxIaO^+gszCm_D_> ziRrV=PmAeuhwH3{eOHUU?{u9tz-t8Gudzx2s{qY0Xm)zbg(&n96H#XJ`NphP9KL3Hm8q62bBK}?^xZ({n?;QJrO%BaOYG37Sw z{o0$6mkoQzoX*Ha{(<4>(l| z8`p}>Vb86=TLr_O+km$ThCR0fZx;-E?f~8)81}pe_#VNq=T6|Af?>~Hz`KBLPT9OB zr)*x6Q#P;3DVx{il+9~$%H}mWW%HVx5??D-ks&j^M+?*qP1 zFzk6h@cn{e&pp6<1jC*W06ze1bIRs5Ic4*joU(aMPT9OBr)*x6Q#P;3DVx{ilyJbSbdS>2bg|r^f@^ zoW2@ZI4$ReGpU1|7tW*(<_!7=+{X&epp#Qv=c?Zso0C&q@2lS%OippVul~zma*FGH z_1^}QQ(W(>|1p@H;(A~GKZD6BuJ_ej29r}<@2me!=TsW6Ve>Rz1E=vCIE~l9X}ktb z<27&^uYrZvHm5SBgUzW-=^&hn2kXOXEzfcv%ibSw= z6xaJIX)rm(^}Y(3-`60gxZYP`gUKnb_f?j`1_zGila*FGH)zx5fitByV z&0unh>wVS3U~-D#%tg-UIPoSZBAuM2b)uw(m^=&f%dU1PVJ?Q`x%>)Q{2Z=R~k%CaUV-vWiUC# zeJnN5U~-E4SZa{LayKYuG%E*T8AK22SHOa2l_H z(|8S>#%o~Vwauwa>0omzQ##1`UXj1S`@r?XYkJt{k(>FB@bE`_IGyx6-3R$zAK$?u z#=F~xa!vYSC*=5}okcH?T5t%lm9{m#igNpgFX`c2x2=hJkoPHxJHot-Ju>RfE!oT~ z?dmhI$X|;5_}-|2?*=lq4&N)Z#ysN7F(r-zRC-Ckt@3pO8e!G%+bzh)glIJB( z(GJtuPMOtC1Ku}fJN0NM$uq094QNaM_HXcentV@w2<%V{JA57e7TxE3 zoo9}{paf87u!b}*QFz_A9}I54`aK?HQ&FMy7{iPr1PCYrXQU8 zUQQpL3QqSm=NRmmFig{D7|gu}#u5=eH%txqZ1Nm9RqPnyG}}#a8fC!geT@AgJZ`{T z(>%sK@Zk|V)=ZhnUr&j9``gBz5l$b(+|-=5>X&-h7uK5_)WsM!!siCeP0i<1)Gx7Z zgwqG1tLF4c+9?g%p|0p-J=+mJA4EHv&!=fe_aN^-5gdSy4<;}QvGwNFGHTtV@y&Cz-yr*@( zRDH;|7WdZTUb**ueI|KA~|ql`aZkNfL!e~I@)pK01Bn|p^h;NAwbvDEvKul@Lf zM|jnAbuuTRYdf9?L|tZ_n-e^rD05&R<_PCNnW^uPu|ac!=M&AiqfMMo z6u#GE-fF&|LN6!KEB6$73IF$DE^Gb=?b^E8Itc&kp{M5mDcU{Vc*uOHN4t7HoTgow z4-NEP;QeNd`)1f^x%Yy<1=m|(*A?F1`(F>Z%KkXuc@^$eA>T^x=Q=ik;q1`_&lU3X zK=X%Vj=YA)%?X|}G`ww2@Lb_L$fx}<>YM{k8y>^9IA(W55wK&gq|OEeaL6Hdo;oMA!h*%XPXn8Cw#Bp=7-(SQ}xU4$Eox+ z^X~xd?THUKOn(m_uno4@23xH0 zUiWw4dIzr8dT*wD0Q*57uoL%o!hY+#U-`Rny&L(~d%w~7^k)m}L%w@)?_S)y)%$1v zK3w02>kZy-^=F#?tYUqb+k0?t59-_K{hPlR*L#t#(ra%0>|=e%SBHCbxVOps4}Tx7 z_u=|B?>}2V`&b|5_QSaMFzUPA`-ApThKtP!`Zjr%;}|op56{yXJ|0cb-_aLHtRKV8 z<^+A68Jp$=xo*a$IYDllu{fF_m(5r-C&*>f_vQr0()7JK!Lc-bJ(}Ry(Z|?)v-Pxf z`b+CUAM;VrV|K?c>fd~i@EL^XK-O4dVkh&QK(Pb z7)rCh=0?bQp6%s*#88Up7)q!YbG%_T>gBmpa-Vu3?Ev+HJU9pUc=nWYf#-bLFT_xa z=om^SkB*@<|66W^bHa{`G8bieUX}BMaZ%W(hS02tj-eE^&2yomU)#2Ay?EAFk2Q4H zaZ!S{ZT;-uROcCY(TTB9*tWzYaL%%Ap7+(`3Onl(WO*h^xdEPK(7_+E=&hJ4GO#}7YxCv@Bs!-rWBp7AZmz2!f1 zRorv%MpM5RRwDjDwddkDY+J{OXM2|*-^zb*RgI@&7z5-meHs1FeB>o>>Ax1>n*1ccY1cBu8X%t+ zBAxvwZ#hm&a2}m}y#eVQ3yur@i_I5X2fNJ=t3CRWzh-;%^#j1clf_Uf=DY~v060+}!&Te73uBB6z&V+Up=9#t7)oowi|yX0ViIqy z0_E2J+EqKe4l%}Cqdsk8C@sTzesYt(g7*l$K-s*8hvEc6yy+j1wR>wBsmk0B?4Aonx}*XB?#!D7WF?UA5cG!Eas*XZA$e z#!;#SXFlz9i7}4SN|dSmy{qo^BHFjem`WU_+o0QLyza48$amX+yXrnKSNj&j$2}2= zqqG_N-0$^@-Hd#j|DUV&c+nW+C`ozlzv`YZZO@lUkjYH*D6f<98_ z0r|5Q{2|}zABE?vXFclS7}6ig*wNqEoU_~iOSetmlc~1phem93|w3pU`oXHo#WoDBDBY0k#Kuke@}uQ^qQxu5IHeRl=U+D18*~BaV{xQPc-@ z9HM^|-d3V-xBZXdDSZ<4M;xVYI*!s=>cjXX+E>G;={QP3d*qAgWVcN}q{kZi={QPf z*&gGPuswNhk^Y^kJ^CI!Ui>UU9HlDg<%hzNhrEK=8jJ}JW~zN&A>w!k^#;!;KJwL^ z7mj?^D~hc}zIZsS9`=g0f06O$^GO`}wp|jAe9kM;&l5Yr8GcUfhVAK7jICeN&nRsB z-godU19{)^^f*c~CiEk$a}RXnbIe}&9&)iRsCOUg<+DnvUOcO?-iJ}|I_O!`F0A7y z@pDc551&`0e)^T$kbevE)Bnic34N&eq0P9@F(UUlCu_h#wB`{mw^8ANr!{;J1DznrXg& zSdX!*y&$Y^h1_;=SUE`7-$9uB>2Z``SNf#c^Z~*1Q?AK>fV`t!C63Yt)VJf(u#Tfd zACQWpg#7$2f}eHtTl_5JJSjywzb_~=_tWDj!N&AW)ZLDwM4QsrNE{{JM;s*+~l-hFjRC>47^RJ~NecMM}NBX-eeAoPczZ3L-zY|mm9(Id9UFk%?>2ktKzF^sMRz7jaa z2k4+VDfWU6nv;T|gXW}Q=%6_%7&>T93Wg4vlY*gx=A>Zgpg9Sg;zI^?NbwInsX@TPYjUaxSa?lN4F)E!ZBAuM2b)uw(m^;C z*Bm*=-;}pFH45W9N^A~$js_kr81@_kJVr3=ITm=VVA!)1xKuFgISzQ7VAyj!@OZ(n z=heVh1KXUkc}-5)ye6k?UXxQcugNK!*W{GVYjVowH8~}`wmFq49c)fzN(bSTIe&I6 zh@;dJ#8EmC#8Gl$*sp_Ma%0%**M7>xQOb^`#ZiKvngAP55SzoE6M-iRhCL?%PZA7! zP6nPV81|e3JVh|0qgQF3D@j*^O*I7->Ev^YxO z)O6T*y4W1{yaxCh!La8H;2DBp&uf9N6%2dM1fD4v_Ph@GI>E5#EZ|v!VbAMcx`hkQ##n3%9IYmDRchpSP)04C5WSR zB8a2p#C{q0sUv}(GI5l$V`*`ez^U1=@ocd<>^TQ`j$qhxF7RBzu;)DBd4gfj`M~o9 z!=4L(7YK$u7XmL7413-Hd;_q}DVx{il+9~$%H}mWW%HVxvUyET*}NvFY+jR7!fTsT znbN`LRHk(JxW`e7fKzI?*~h9zTpR8gsF4QO0CUekjW+mhVD1^Hu?91a688+$ID;8S ziF*d>YJ(X^iF*cWqQQ)##61Hwxpm)y@q;=ct)-vI!#GOZ$7;FKV8&76K32aUZK?u)&O@#C@!mAqF#!68EuMhPKAd$)palZ;?qIWFL!h zl%g0LHPwt2<0x^>uRMbpM~Q2Gb&bJ{qr^48y4GOEQR13kU1u=kC~?iNt~Z!*l(^z)9K1%A^j$hfM15VeMm8;mq^^jICN=?0pw7*Zb-QgMDDG z_tlLC$AG!sSBnjf19QEv$_=gt=6YW(HMjwQ&W@ZG>%@2eH9F>k7HX4>ZTUBEV{ zePEl@F<_h1abTO%)xb8VYk+M|-wiCBmNVL!)IrW@XHo}q?*9YsV+H5_$tkXL)hc6i za*FGHb&J8|6xaJ|jltv;*ZXRn!Q>Rz`|4JM$tkY))kcHKDX#a`CWFZ-uJ_gL>6}XA zHEf>7Yv43q1E=vCIE~l9X}ktb<2A7G+U8WIbg(&%wV#VM|H)g8v>P~~nDX#a`T?Ug=T<@!x!Q>Rz`>NVt za*FGHb+^If6xaJ|tHIwWbZgUKnb_tpIdlT%#p zs|O4wr?}o%4;oBPalNk|O6OD>uVM2vUIVA`8aR#Dz-hb&PUAIj8n1zc*EXjzrGw3> zOz9w;dOGk^=d_PyacVkk{8?jja*FGH^*MvdDX#a`=M5&OxZYO@gUKnb_th5-Ca1XG zS6?!ioZ@<4J!UXD#r3}WN;;>~cnzDU@ftXd*T8AK22SHOa2l_H(|8RmytX-&DIIK1 zWl9I()CbzfvN$!HHh$dLoSd3N{8fX=DehybuNh2EaUVQ(|8S>#%tg-UIVA`8aR#Dz`|>rQ<>7i=2WJ1 zkoZX=bzFoeaeg^CA3JPRRh5pv@#LdDoMk3%2j7Y0J6OazW`{{X%y>9=A%0Jl9Y^U& zoSSAGC6o6(ag?gwwc;qD-BipOLcalynX%S!Fl^r)b9lKZ{P zr!;7Xx}u*4*^ZpCuSYxjjQvU4vE$0f8T$sbrQ;|?c|PvRM#fRv1UppPagJzg z{@%nqwD537t#!*6j=M+Z?b@N^8 z_ry_p?!BBo{2VyF`6O|akfzTtn0x7Ql%5OtZ1S8wjuP5UaT;Y{>-0EE;Jl8bgnQs@ zdK@K_zrM`eJ9Qi-%uO9b!m2+NM=7W`H>fKujuPgkjw5r5`X%m=aQYy0)to*_JEcK8 z)D?Y9kE4WkG@nn?j_~;~+L8HW^Ida%isPhWDWTre#!{lrXBbNGZ#4=KjGPq~{>L3uKoUtyoIXwH@Xq>oViqoap5BO~p;whk2so zrkMH;85=YwF7x{PX57)HHy{-^r51Ie}ier_d|q(=ng*90=OAb+dJlIZzKh z^&B`wyQdqEl#fTddQP0CU6~UN^k4An!!WMHV559*x<3NfBVgA)-i%Znm3{P2h>tP~ z_eLRKUvH+4Q(`!KG;z6C&>BmLV~)Ir$IXcxuh2KVZBAU_6}84vLY<7IWOxkQBCgzf zVkxmM9^@PH=u0?lvd%{6Iea`NR5<&v5r>g7ZVp z0~*dYCpb@dX3yq_-Op3?%kIai^fmMCz53bu*n03Y@uQ#zeT9yb^E>cmiiu4y23!~e zF7)$e`MD--f2olTrMOp$d;PuHCYI9g5KGD2J1`#i#)Ds1dUO2=xSoJ~1HAbu-_Yea3$@C+MTt4$>ZBJCOB;x6KLqr&KH@=qNtHaQSG0K1!bP8sIauPcU`t zH=J%x&{vs0H7Dq+<4|oG~Am8`wjGN@~7i^I`S2I zWv#K4SReAuz`Yr`H^^J!&&2giTo-xET4O1(KFsY|xHk*+4fdA%vvEBe`HH=jt)G3Y z5BcWe-dx-p;@#}e$Mt+%mw2mNKl@l8c)Aex7NWkP-dgRW3>TXd^lkJPn#Zg&h^1us zcr-zOM_(YZlnghU6ZCavY?>3~x*4111i5X-;%I_gHe=D8AeT+wn-d&M)A!~C$I|rm zXo6!$A7k^)*3;JMFRcfC%tt|wkn_c+1#i?NhS-mrde=m2uLwDR!Zz2Gp7HFacDUzE z-rxArcEz+~T6PFzu5)aDazQ+a8w++U*t(#4!LDA;{>Cil(Z;B=wXs)LcGGe2qRc6K z9qwE~NNb5VldVh6v zM|5j+?yS1#j=b6-Xd^!N}@K{~%4z0M?#gEnXy0{hhtk18@|I}mnYfY%2bEYFt+%kTlkO&o-8!OnI_=S2CFlO-@eUg|a8g zCr{oTcYcEUp2Ocy@%KFbUcldr`1^bO{j70naw^I=&a~t-lu0ImccWQnT725>UdjAA zj$hCEyt@3n{P;!rU#`xt&%Y->p1+g%SCox1#!o?qykwrvGb1@e-PQ2^pNEB+iz-iUrl@ zH{GegH)m(lGc)3i$>iQ9aws2MvG*@dxt0jl%To4?%3%G$HHVZrRUFq-@kDgznea#O zxkzcBm(Ml$0OLB9;u_D~`zX0KEHk*C5_muTk`v^iWS-n04<+-^XOsqcC{I?8)(P{D z=eQ-_nH}HEj$>xWFFRG7EXJ5PSzN4B#lbpN+^e>6u6UMOVvpIe#_X74jPW3G#QvJ` zqG*ecvVP>jM_E7NpdClcj+;epN_;FkE*AMHF_-LEORPuYD%r7=SdYX^vg0MOo|DB# zVm&8|hs62GxFZtZ%8qBnahAAMcFZVpQ(|WQHRBhNn;#cl$RFFUmPFSWykF~U3?zqZyS$?yp;G(c04EA;bd`=K1wlZ$nB4_ z{^a||S$~-WZQ}sZ&$>ZesO%uF)XC!eoGymU>0-vbUyPX3#e~VpD$H(n`o7TR7vac% zK&m6*$bS;Q7LNRZ@*l$y`uVo8hUnjq1#zTK1o5UMj*uOv=c69qgmdSU+BeRtPio&B z?0v{SU;UHVAHny8SW6tFw7-wdHTWs{{#LIkkj8!dQb?=?4wM<3`p%Slb9$flN$iDj zZ!gGC$^5xNeoEj&*I}IXel7G!@_T|HKjq2l&kpt{`Tec_T^oEKI}g91pxbbE9+2Om0Uw~={O(YXC(`s94HEr*M85x`1l}*>+3;L2o94CHnC*yfANaJ^GGM;um8BaS+ z#?wwq4Ruj2XXk(j0^>Wz6{fiRKpW$(Yp#Y5WdG#>~zq zV`itxnAvIem>v9`Y5Oqt+f%>M^Si)axR;H&qpXs2%q15Baqu8Ro6` zBZxm28R@Fw-Uh!LuDg$Q)d;T=-|_W(A$Ou18R^~TcSm~e)vg-lZT5TMy4Pe^jrO+q zT;oKqaU)~ADnA$LQO{LlJ>Tz%>-=k7RqDlkw$&H0LdSVEelMi=z1~&hy;@y=e>@{z z?QQdWBfbATS55GC=&KcrH_it!zAgR3TayER`5z?aG1y?!3j z3l_U-ig%ypOCk6&)!XCeBfW5`tEPE-H3y4OkLT6-eUM(X!d26~eY&1v)N_sZu-_Ny z#kaU>hWCiBrv&v}>wUrRhxC$luA1r9>w1Qvp6k3v{r*TFw$W9yy#1Pwfu;$ zPRUrXouBnVUg|qaDBfy>Oy)XM$V%$gE?kfG8U`QVY?(nz2 z1|WUZovz||w0>0@H9;%`=sLHhW6+z5ZOssQPwce{$eQB{icN%y-E z{uUM6nufMGPo|@8)-~e}_*%%BcexSforSvTUuT2o%s)5CKR?L75cvn9{_)#g9ebI4 zDgvK4SLkzPuFwv};1lNx=c3G&Vx*VcYUT>(qRf>Nq>n(|oGYA*GFOHno&Dwbu&wcE zi*-*x-K=ZUD)?f^DQocA133+Kll#-bedeDL-o6qPriSg$6w|^ zA?z_>7e0?6Cw;~>@qYu}We5Bu2PBqr5%NuWz*U1Gr#%$p2S1B}r+*Hg(~vU~t}20? z`6YZlL(ck2kPkc`2F%~P84k(cwiyA*->Df1$=^K*XQ}KDYp;i&6P&yJT;N_UI@{pZb@uRJogL&a4f3;I=BGq^t?TT5!8*HN>l&87yQ|mP zxF>5^{(i7t^CC^wuy#IK!`f-GhPBhqc&}$F_AT(+u6nNrQm@SnZs70E0-L>_slj+6 z4gK5K_BUhs+p%1u$$0X&cJ;nRke_XEEyg-gzMfL&2XFN{9(w3~iy(h#kiRU*Ps#kU zZ^7T*)q4W4b81fjHq>?Do{T4dzm{qIU0NAW{w}QKlkv3EWIXM(G!8DnnBn(7H3uOz z2Ms>V-z^0;99$5L8Pd?_hPJ;kT84D=sdcYsK`>@Pezt-82UsULMXB=#W5(Z+)ngXq zFAegS1^FqNU&f5T)vEV;U`rV@{_ZHpg1?U{W5(Y&Wg36GRL0ECCu3%($(Y$`_L%WE zBiS~8r!uwIvnsFy?xps6&?jwIjGgGn-$m5+!adQEzro0~!$=by?R=u6ohCZkX&>di z9==P)cfq232kdgj)rG{1X1$y*>-WX-@xE9GylZx<_sn#9VesBrX&`3?vMiA6on!i$ z2H)}Y+TN$OPq#jGeS6?Nt{!+N%XQ;<;c!jw?s$)@dn?xOZFR$YTisgI`XDcUAEKu3 z6?mtsH{OoQ#r<5|KmYuA-US!bw7y5xr!Kz_?$_kk=Hp%*_oRK->3YQJ5pq87TyVrq zhWJ{SQ|qWAml&~a-H{``lf4TTx8DW(4ff+kjP#zyJ9VY#Q*^|YUJ}>Nd1$3H9Y2kC zu=1kO_|tKdE_&2B!v>wUoj;~mq@6i!uT$BkX!+6gt z85)@!iTA&p6XB5~@ov~tsPB9DYs6m@{+`C)Gx+;H{(jInIyqXuGd3nUM!z!#tluvi z6Cd-w@2fFC-$xsp9IMOe_tnOXG4HF59lM+9!F|+spHrGF)%i;6OLup}81kL9DD;qb z*2X8t>-*#L#_!(S;i<+meRnTyEI5vMckg_>yLUnI0zDS;?%oCG{O+Fmkl)=?j{WXl zZE7q|@%~$S`N;lISLrxxssbX;avaT(rry0dgf*y!~G7p zzw=(;w|Zv(bSEV4^ZnADKi$@J;q0R^b@YEOc{{7U-yZj}oz6!R;Iz|zTT|kqPqlC7 zv_I1BMaOB!a|svPxCDECm%f}cWUa~$<7poG68M11xi;I+nbRJB=VPtg@s~Mx*HrOr zYUq9^3o;8bgmU5M+AB4reTS?e=Xc=xLOm1C`De;!!#TgDd@h{xZ9P&8iJ1F<=bIt8q z^4`AWc;2x~UPT{Tz7fuOo$_1ZoHr?-4Cnlc@_XzXWH2@@!Pv9}W7C4Mc@=$l^_yYm zHOSY$9d=%aeDk|u=MBhTHHDowAvrex2+6Ve6(q;z*N_~WI{p5V>HEWVr|nASW(Ns8w_lsYB1n+@|oL_yL&q9L=-}~-)HlI7PLQXqM{FXv{O8kbx zd6f9=1eX%OnGmMLZzY^hc}d8*fbvpQ9{57O<8FU<<&+NMF@A^NmB}@~hYsPpw^*b3 zo(qq?&@u6Ue;=_izzgX5-^%liydc0ify@fzpMv{=aFzw>Z=Fc4VuDH#JS zVk|DQz`UW5DinJ6Qx`cQ`~xC+Lm@mi;$Jw_!@W4Gd#H%u5BE=he%_$?%;?w=|M|+uUdOc z#fA~Nu#=CT3LXbxr2u<9kyg?@sPpgLy89vA6hYF(4ghZ%7-ppQV2PyeDIK=%ZHT| z4=cKI=D68cPnkGj#Iiv{mkuc}FRU0`QC>8B>9UIA!lLCvhAl4|S}}O|l9Ix~#TCn! zmlqA~#{qf2f7&i?d&e1~2QrnFsAIUDgTRDo{{{hFf@28r!0@GVw_xqM$_l4o*&S;) z-m%6hSh03f!OHTDE1lqfrdMuo3O1};R$f`|6jWAhuGG29*Q{EK*dbM=_gG!D!MjJjg_$L>ETnQ7||76}s8q3loQcUKjNF3eJ z|2RL{mQ*M6_F|;{IhkB1Z&P+mkjTOGtNH2sqLg`XlzrC=c2;(!9w82y9lQbNkNC2HI93xe{6sgma6 zn?d_a+N4W)k*iKo{<6*{2lmX;{z*BJ8*o=_XJyCILrVBFq%OC!e<0hY(U&_7Q%0Mi%J(jxC{+UPzF>V zh)aw4jgZq+wLJcfm81^u1{?B)jd2@^K2Z7B8Jb8He z7#MGEkBW|JjErE}|5tpoU-&->CWil;r%nAQ#`Kqa^RzXu!2DCM*u+`bUo6-^jfv4h zpp;?J{sx9i`y^F2H%cB+*M#Vcp%=@(}trlb}X>rXzwrp>EY%%ESM zUzA;3keHl0{h&RgijYrcUUq7cm4c(Mi-M1ji-M8SbbSX#{V2=CngBa`GbgCy+$#}H=^R~IW2pn#E?p`oR*k*T?*k*SHf zS(;^{xus=ta;jO1MQUB_P)_aWFKzoy^5yw_U-J@dFc6!{O-@ zof+k(8@MnEaKmHyEW>nf7e-Ur5721;08Iu0u;>OEHGQKCqww?%E{qb)K>6vXT^MaS wjn9E(7#Px~3%W9zGtWE+G{}|Fmf7Mw!}M+tV+N3M8pz;Oxd2gF3$kAx0Q14DkN^Mx literal 83504 zcmd?S3!GHdl`ej|`gHq+lJme)5h`bIWAa5}=3Mh)8fG@;T)z~~l zVtiu9kW}el(h}ly5^~dKn5IlH>5Mq(aZu4XGo9R-d)s+k#yiOa~^WpfZ)`LYMzBS?_5Q+Waqeo+PNm&CGu+2NN9Ud;=Y)dmA|{ zxS>j`9Bp;R1^G*I#mznl;@P2_c&McHO_fva1D0we) zKN`&c+Dff7klOd>K=q%{p%KXO*G-%}bFSk&{PNN}m)*8*>7YB7uU)--)w(AhdvVax zwYRSO_U4xdExl#^Jxfjvv&E?WlQc`yK?<<?V<=?q= z)tZ}^uKMnvH7i!ETfW|TanQPz8UUiHA~Yd-hCtmX^<%WCd=)-`vyqa$Zsbw@k8-+x)ngU?uVmGWqWsJb?fYHogD ziD8u{u7BXJ<$tnbNO`u@m=2d(X_bn7s!|g_RjDbTs?_weDfMY5*i5;}X{PQggKk~B z=AOHjtXqEToy%9RU$SE5{mYjv;e7u~y6XR7+z9+fR$=L?mA9@w_s8whS!6-opgWh| zzhuR#rR$g6w`T1fLDk54!-W@Fz4XrI?_RZJ(TrhB;dIsx9kSxi+mi$E?vJoYe}oA&i4kbyY;TM zOKw?n&+7HJe|ONj2kyLi&8l}teac%6x?@@0pnF!YT(M^Dou(a2S{qXEqe1tqTfT0| zvIo$bmA5Qew|?m@cl-?O&E6kP{nJ7BExmX7inVLdq3oB)D}!3Q<(J>O;{GekhYeQ0e(&sGE={bYeI5^(-) z{^NSoxe%*^uCGD3)GNKdeL4U{W2aOoD0%v=$z|b++N{xm~vh$QCpBHcr=sA6s*rg zp6Z7FSeNMo>_GN=>Owqt_1o0Fs`ygp-_g9GTLbx(Oa~CWKq!zs5fz-b0)l##O$2uW#p%^qqAj4=d-qBjIpC5jq`mAezs-kTHbcL(>3B#IAYoW}amd+NqzoW1oIme092<9xrq z`ls)o$T%;94;NgJasIU4EpiGnPHny8WHK{DWBm(5WBds2Sx}sK#mxW7yX)WF>XKW> zjJzD~G`jox6vrk!TdN@Och z!1hS0bZ1#aQX*(b{M?G%`(}R3d-}KCM zqc=S@9X7pKABGL3nNp)OBQwL~%_yC*bIh0v zd(9o|XF_A*nM^#B2o3gQhSL#05hJ%FemWsi$E;`3A-@yn0=UyT0q*Q~jv09C~aVSMc{!%-cUR^LDwx`;qqN_^Zd?0sN)$_bUE=hQFWJ zU!A$y@PBM(tl>YfnJZ%xV|R?pj5B$VCM}kU8J$=vX68=`>e2Z#Av3|`O(>m!`Ln-% zOlFLkKaghr2+kA;c8X8tXNDQS^XYxXndW=XPe0$k)U)U>{a!^4e$TmMe2(F_uO4$@ z&#SM#oO9*dg#+W9Bb1!)l&$0ZZhbfB2>SXxjLUC~f6h2RtcRP+q#~s|Q<1Wr-AaE{ z*{y6}Ww&iNZN6#SO;x?OF0Sgt@wUZV@K?1Yv!F5ot}W>1 z9$3(ALG^-+dw_h-ELc!Y8A(NVe%%QTuX5b-ZLW)zu=JgRq7bsm-YEd5$*bt*(kgiF zuSGYPRhfKnp6&RUI=CK>wtS3oA76)7j1SV`M_xtSsPi$dwS6jCZz%9XRZemFw=myA zfiEh=bs@@p>zq*@@AC}i*|(5KU&4ONVsM@JAby7Vz%J%?=vAb>J}Q1qh;l%-Z?pIO zasm8Zarwi=#e@0$wJW%H1XVnYexO~ULg={e;9W%{yj`e&7wgBI>ZaG7r4M!$_KSBb z>|^}*)0zi6I^nuoU=wX*-qQhQUZ>!?II#EW*GI%Vy z7F6!8jBdkPHE;IL==>XYMz<}hif${d8Uj8HE!|g{nD=&NV*Wdoi5q`YnJE8ZZe;rCq{1?o2jovRSxF=_T^mLC&GWikF@par!BKBx6F|8A(gY|&90oC zn7zFtZ76VFKhWDr9q6b{r_}q5cDS&kwcRbh31jONag4kmCi21xkzLSd=+7PAVdDcT8kWg~+`Lk+mt2`_dxonndoO z8{G35$El7W4RTjZWU4~s?u5vil*qklk+n@C_wAW0ZTu{CWcGaaI>u%295yN)PmDT< zzXtpr!e1l)4&(0#{@%x56aM}lf6e&&XZ#(--@oJUBmA|*Lj~YN6uQ;aEr1P@-k1`3 zI4$x>lgRgLpx=dc7lgXct-%-~9XzOuh}<6&nXV9d@Fj4Cwp3I9C8;0!4JpY#lor|8 zB=WEm=-2dK8{R{%O8P15F945df2#MB;XQ%nmP`n12M)9i4jBld%1(wA$)nGV?R z2Ku!HYr{pZ*j7{UWO#3p+_$!1cKG{yrOq1YxZjuSx=yY7wVhh^>tLtq*FtAD^l3lG z>xS7u-{Sh%4M95k`IBQoI<9AbBI&8#v%{*NLf@)>3iYdg3iZzm^lM<}{CTZ*o|rH7 zFg)0R4&&yrq~61g`mvbIU&KF(+AH}w9Y%x6ij zj)-hmKj$;;PxpRP=CkOXvVP8I+TVaVW9IW2>VFA*mA?F;%;z)I&-qOK#)2Q3`Apjy z3f?sHnfGnP+~)i=ZG;ZzGp~P&&QDF}{M2;LXQnrz9@VFvs?T)Q zXF9p*pv)MQG3(8WV7(D}Feb90LZn%5CPJ6~%d9t&-k1`3I4$x>lSs4Pu+OvW4bshe zBXWOCWV%A6S#M}dcD<4GhLp%dX_1XhBF%b3{hHn%3O^;iE+VpB{p2U@PxbCxV%8hc zJ7xXkC+)8R2Mj;YP(S%8efgpA^9=QqpVY4{_|Wi^w$&7LF5!B^`__W1=C+p#Ok#8)^Yx2#c2mCbY0Y90Z!u(bJ6#7>6Q>dT(q<#wZ zlb_VDft}fs!uyrpXsX4bmixCl%fBmf1HDB&aoOiFR;Jp z`(DDc0Cnis9IW%aZs=WJqWbCHvrAOJ0sW===}zS(ZlK>-P+r0{pY=Bs%r;}d`WrDG z>{C-GbT|gQK7z8U-`J^D{|M?={YL0;3|J3sW}g~6gE27a!5EnIU<^#Vz){stqb(c* zlaBg12Gpnh90R5|z)tol(;HwX`&84}r<%?&V0t6!QGME}`b<}Srt7-RGsC%)ezA3O zV)CYGnZGc5Qk(~GAI3fD6t4BJwcC@Pw#?(XWrmgyt-N9W4V5<}ZrI+DJ+nS9FIwy# zxc)ruon^gUvS@c@QE;}B37yM5e&$;6<~YwuaNd#$eH3Tf_G{ax{d#-*H0Rg!spAh9 z74ST_;H~g^Pok~BIKLT=m;PeFUsrZ}%rx$Q9#pe}Z|5@->MDZ62oZi5_6@Re+ zNB9MA^-+v7luLmx1*ZMH=W*E31!-MAe#+Qz95!48e34?9*qlBN8*EM=hYiZPoktsfzA67U6#jh60jyE}Ov905 z{wyO~{MkmH@NY2EiO(@I6rXFP8=q%naeO|cQ`pL>p%~+#I_4P9VZg%_V?2ig4_A!w zybAa##Td^Kz#|l6JVyeLRE+T)1w2YI#&b0AXkeRDHm}Jko7d!&&1-VX<~2EG^O~Hp zc}-5)ye6lV*EXl}WrNMBeA%F!N*InD^YQRz{1ktH$b?S#4;$&ktBefAHyi23w-{L* z-wJ7SY7EABjE*_R^J?I$6=OWd0*_UU@f-&{PBF$a1{_n2@f;64UNOdV0`LUI7|&~f zuK~6>W%HVxvUyET*}NvFY+jR7Hm}Jko7d!&&1-T>d2MqlUpCmB%9jnwDLlr5L&y9l zjBN3rH1dT1l#x#SX(K~%?E9hT#&;T79Df$l=F~)t@kAYSjOVq$*DA(%P6D2!7~?q^ zc(P)Q=M>;6iZPy3fu}0QcuoVJrWoVt0eiqUr)*x6Q#P;3DVx{il+9~$%H}mWW%HVx zvUyETDX(o#<;w<}Q~9z%Ih8gXIp)7=WQ+eZBTx80H`0k8G%^%_%}6)?3nPo;zl5|o zH63F-UB?{bc^&X|iZPxufM+PicwP^Dy<&{#OyHS{F`l!4XDP;b&IX>X7~^>Z@D0E= zr)*x6Q#P;3DVx{il+9~$%H}mWW%HVxvUyETDX(o#<;w<}Q~9z%ImH*B=%@b5$QJ)i zBTx8$ZKM-FY-A|@mXU7!Z6k~0??Bp|nu9T(qhpTooC`cxF~)Nq@I1vB&-uXf6=OUX z054FC@mvVJP%*}H5%40#7|$DlZv?hEW%HVxvUyET*}NvFY+jR7Hm}Jko7d!&&1-T> zd2MqlUpCmB%9jm`ZgLOQx~*qU-4^Yx)c1QLm8FlC;ys>XoHvE=zD^O|+lgL)cgBLV zD86rVf_dW$%zHg2kbeSt*JA&3oVqQA(3LZ)DxA3(UF2`8YMC$YY8~{1TZ<-5^(ZKU~;l0;N(-lz4b}F8Ykx-8$T(`;q*hmHm7}Ho6~V%o6`wk zo70uRHm5fM+njzFSUJ6IV*YJ7gmr=ES8`|dr0$tmvl-TQJmHIQk_Ym9jg zuYq%T4V=Sk;2d59=kOXhhu6T$YnxN~vccw5zHCrVCCsz=r##29IK_Rg`+$r&ImP|H z`=DTQiu--{A;IJn_xo;KFgeBjzFR4noZ^1peONF##r?j!Sui=p{l2?3ms2^s#+c{u z8aRj7z&X4I&fzt14zGc8cnz$)wmFqA8*EPH%Le7tXFA8SIK_Rg`-qG=ImP|H`>0@Y ziu--{Yl6us?)TlV3nr(y-*=x7OippX?>;4%oZ^1peMT@j#r?kfY%ZsAc#Sd7;Wcm$ zuYq%T4V=Sk;2d59=kOX>d2MqlUpCmB%9jnwsdV6{KGivv#VPJ{-EYX4lT+O9yWbQ{ zPI15QeoHVp#r?jU6iiNWzwbUTn4IE%-~AK8@@0d~seIX>occ`XSQe+|aExD)F(;?y5`RxHImL4< z_xpm$srkfzDwv$&IhOka!Q>RrvD_aCCZ~9g<^EVOImL4<_b0iW%HcJ}Jcrl7IlKnW z;Wcm$uYq%T4V=SkVCA*VseIXBb1Gjp=sQ!k+~^E=J3M-W`w(Q*dk!Di4|sdvjX!m( zAcx-Zp1T=x%!9|=Eszr*?OwD4a{4z%6@3kI&i57-v3#%b9~QBE-&ubsV)-kU9xGz` ztM2Yx%<|(ZFD_>J$xn1IX8G%%FE3{K`9Bz5%<{djmGb@POZh98OZk!aN%`@crTo-q zrTnZHrToI5Ncl_WerWDHh zt+btOrR{7hZD(6q7u(A6mt6T~5zF@)|8^0}_nq~gY5NsR#rCW27Td>FitUr15ZkYR z{^DYm?>0=z_ns`}`_Gl~SKcb+N8Tgl$8VMLQ=gIYv%V|kdtCdbx$kAm-!pwZdh;Jl zAA2u$F6K8rf9!4KH%Nc%Z8Y-|?`x8m8$UT_#<}VXJ&JeqI6km7ZSmV17w*IN&*#4N z?KkjUw9(y=<@x%5de88b<*UB%F9X?UlD7Em^(c1?<#wXnvnG8}mOE!#PRHYH%MC^Q zl2?qoyng7G1J7P@#pN68feQ!4PizE+bnxzKA~e+J_e630Zy(zScxaKHPNgD>RVe-_^Yv(3*!x3d@ZKMAf|po@O!+PSdI z@7z0bA?Hv>b=mfg>gt$=C-yoNe&gI-Jt2ESMj#`QeIWb1apA5gWE42;jquBGU54ut zZ!C=GdQ<%2xE_w{Qg50+0@ovOUFJ>qN8x%D^!s`<{4uy5gYp|XPe;Tf*LH`QxtNwIcPe=M-ufm^!>lwJd(!1H8iR+oTF87xCvvEBe`a`@G z{v2G-LHbbdHh&(j=iz#oce}p;*9&kx+*{=@!u2BPU*)Yf+?V;%oaFqH`O=)^{F3={ zG|BlT^QAe-`6caaPI7)pJDZc7U((K_NzN~6XLFMCOWN6-WWPu|o0IGpY3I=-`$gK> zoMgX9JDZd27inj6lKmp>Jep*`kn^8MyUF#>quu)5#A(0F&%?L;X70g%=YC5ce5<%E z_&!nO?wH7$3XyvgB5PA3_oYSFHHnn(({YV=y{aQ3cf~}eDn#y1h^$G8+?y6z+ayxH zPnYXYT?aYie8xH$PpAiCLNG4Se|1bLQ-2zdVg>L1WYLX_Di`VoN%{Q~zt?l{O;v^H94diM&v7All(*hbNLph`M|z@s#=GBdlC<}GA+68kkbAYa!DktKGhSli8z3M2sXK;xyIdX$ z(~n0aZGTK;x(mv7xh&+;uNT0tvCa5~yksugTp>CKFM`g+SA<;p`-G%5q(mNSLET-4 zg~IgzH7L(EX%XOhi(hgsOwC-1hT&@rNmR81KZ3)L?587+n z>`0lqh{*l6%`W)KviQ!tWDmYMi-}IULgc}O$OgL&MJU5|;(PRxJ@`H_B|156NlRK| zlgPt%8}_0++lg=3OEPzl7hZIcRfl zGGW~7{xf8r9pIjesUMPHx zw*~LxOMjZVhVS|=h2DVqA$Ov;4e#hnTTLFm+oQb$zZeQ%>uopB4<~>*ANg!?Ea0E{ z<`v)dbwxd#k9^kXhHK6{&f8A{z8O7y^GB{leqR{oyzLG>&M(f-*+G4X8Bj78c9H9S zJ}xx#vj_Az=Qs~!A9B?Jzf~*eB`r&JqE`D?jYVk$sX8f+i%W zPxYQOew=pc_q1uzsVhVI!MB9mY2MR*lcen@x68q8&)ey<3}OX{A4fi)1Gv6$PI9hB zByCqrWQzS<1`cz+(vP#vh#eq)Ty%DG?a?`#khGeV$i3u7KX9A#mVTUVMl1pGZIMA>}*V4*F@f z1F;E8_P}SQJJ^ozqnzn~*iHX@wszA$vpixS82@~>cGFL@-H4Y^l7WxzsNH)}j_pNk z1@Z4_b6<>E`K=*$x>ud!-^uZz;P`dkbH-1bae}?ZzoX6lP;TfQA$Nwi%O93tJdxMx z$?M_Z_4VGjjnC%2_$@ZSH+JA3`=iY8H6eGVm-6-dV{&=~I6cdI!T56Chh_L(vIAc^ z0A)t33%Rqs-M+^5;(bQpJ~w#Z@sGj(oBLq?X&fc~EMLQILQzaNA-K|jFtPUk!QJ?ligiLAdL zj5)yh&NWWwHP<6kAIAeWbB>jxKF)8haXKz)BjQvTUvJybd9Ak6*RwvvurR*fww-mW zt@Qh}5AiLG-?we&d{tZN_p|lEx7xPbwsH-~)(4K+wlmMRm;Rr1B0fge|EvABz4M{R zIuSb~>;KjM9DD8h0{^e}+xBW*w%uG8Y+JcTY{Wgyv;M!z^E~-E2K=1s?eQa$b}%Ng zVIan6%!46!p10TMexM3v&2tE!jYa3s7?dAd84Az$UiK4`)|e7`_;QpV8xOe)yjru4 za8B!fAT2sa#-jX$M?&F+-VgmINqc_~%EcZIxr@AgX8oYe=2->))PWA4H93w$!8y*~ zh@|b0iA-~C)49vIGc1p|9%e165S@cVQGWQ{GM5vQ){qi;h-;h9W3DA^H{yKs&;leCO+5GU(XBbTY&mlei6!F0X|IF3f~^oZ{`lxiNQ#p_$cf%l^K<@BfRE+;n27m7 zo=;kd{Q=~ZJ40dmT2nXH1Fi+EYZ~SO{p@tC2`oP&C_fYBxh9x-%V#Or&iZDfKCY`= zf4EN4M{w=qT<6-r`Vs5JtPQO9SWw@Izz)v4&V?VvxjvY>IY+QQ*zIcD4(6KJcC~E>*9_K=m@{V0Xxpx~?cmyxtsj1w{j2Tbnqap@>uuYn zwr$|r!8#GU#;hG}+ob1gZQH`Ng!e$48?% zm+Ui+{$>|qQ1&Q1-!I4bPkAcjI?mDGbaR~D5lH0K3_<#|X9JF7Zg3rW8s+&6#B(L; zL4WU1=uQ77&O;z)B(blS^0k!6-!~lTGyep8S;*Pn4Y@pD;{49;OGLlU%q7g(5zw3S zy^vcW<@b*Sp7*EN*FrA%QOM=_65GLjiHS!DJ$zqU{guLFaXyp9u>K^FhA_6fQj(YA zGZ*Kx=`YL;t}Uzs@qv2sxr=(x-_3QC^PB4*=XYAl*H9vVFV{Er75xMIhHDJ_1hIw8 z8q*~DwdOvUv-B728>XZDKKc#z5&N8d!nKBdfp|nc%~~VnbK2ra*?GtZiI(*fzvLGV4x-=(lSd(@)kmu0L!WVkhVe32osTloI{39w)9vtOxO$dh*<7l`p>)}+xG zzePU@xrIe;aT9(s?*3|F_;--*YlUV_V!IIoswdZ^zi0lh3d8@4@{Pi9*P_A2&Ak4r zLbD#3v4UM(kN%N$yj2+fXXd|K82(qvzbOnKrTi^?dN6m)9E4B*kox~n82)$We^40y z1LeOIhCibGw?eZXv3{eE_2?M&&02wc{Bdc=Cud>(`C?3d0# zzjQ{wuunRpPuK@7=mV}ttQ&E!dh%?FebPDTlg>e(bVi@B4_eR%YIhp_&OHnJq;t?G zozW*e&(b!sODSxr$)}-^ZM%7&h>I%=wC81K-D7pcppreawZ5VFTaCT%;H_@O{i1f%W@0 z#um$$4f_3CK5fwNA&Yt!ILpvC%XGXhEpV0tFISA1S}TB8C`L@JTY+y?jF?)t0pF$= zF|}3#uT+efTDJq=t{5@3?f||6ILimvU^uDc1se<}6~hL@NyV_ia8faBFq~8j8w@8E z!v@1i#jwF}5;)6;Jlc@uLmq9A?;-C;T(1p5T(6BmTrWR}>y_~F^F6-R-sDSMuPR^S zdTsG@;(9$0a#mq{SLt{^7;^3ezEkl-A!jx4YQ=byZ4K}m#g!rFF5tTqKOA!I2EJSI z=8&@%c&*~CA!i-%I^gkG+fBSY*kE(|IBc*veH=E}oIVa4Y)&7C4K}Bb!v>qv$6Ep0LIh~CekVhM`F$3~wgT$e@KZxtKA&Bd>F^KEs2XVa;zQpy~gfjzjtIC(SUR(T} zxL#<-dW`XU9dnH5J;3)U#(3Tfe6M1R=Y7EUDaLr-4}8C3jOPQu4=BcXZUEk(7~}aM z@PoiMr)*x6Q#P;3DVx{il+9~$%H}mWW%HVxvUyETDX(o#<;w<}Q~9z%Ih8P6*$~9_ z+URdG*!LecGT~PlxyhHfURA!t_1fa+#PtHFHe!r7>X>6Z9|C?zF~-ve_7!71$Sc_4ZNS?UV>}-Lenc_Gb35>M#Td^=fgc67Ic4*j zoU(aMPT9OBr)*x6Q#P;3DVx{il+9~$N_lN_Dql9(oXVFC%Bi&B%7!4W*Tx{OmmkFS zO85r@KP7R!s(gv-wZ+eg>jh5jz!>k)F~@j*4ftz{F`kbBKc*Pt`E}s0E5>*}4*a-c zjOP=;PbkKCJ_-D!VvOfgz)u0&oU(aMPT9OBr)*x6Q#P;3DVx{il+9~$%H}mWrM$K| zl`k7?PUXu6<&=C6xgm(_wK0h6jh3djWK>&#~kDN z4Dd6GF`hetcPhqsJ`4P;VvJ`saJ6EL=Qn`Ap%~-&9Po3BF`nN9{wA=^DVx{il+9~$ z%H}mWW%HVxvUyET*}NvFY+jR7%4?fb`Le<0RK9HZyvOxg3{JVP1mEN$&JxcI+&>Gx z$pz+_f&1sdH@Uz(GjIa$PaE{yBA+(sITqu3Ekob9zmlA6=6>J(JHatv?)Tl_ z3LX#4{l5D@1y2Cxe&78E!PfwDzwiF<)|dff@D0Ds>8pWlPLBn)IXw>8=5!3$=Ja@A zo6{43ZBAbUten=E0r|8+V+Q2Y28l!Q8PBmUkaJ0Liu+vm0~vF2iu--{p9GUr-0!>p zBAA@we&78+g2^fF_uYRJOippX@BUxGD$tmvl-2(YHcE~C2_uWFla#Ksi!%{{bkI_DV}4wmkB1P zc#h>>E|{F+IhK2cU~-D*SnidA$tj*=xkCh#Q#{9VhY2R9c#h>>mCLCdUSrI2cnzGx zYv3GS1LyD>IEUB3IlKl|UfZ0?mkl3B8~M9l6{2^9_h0hrhc54b>tFrB zWje~e&-hy%Aaz)SdSTZ^sjlxQS{rj zr%CE>H(o4n#vA?0e?Q!TE#yMH;1a|At(|%ljZTc-G&WO@ZA`);@4w!AND+De?3Gs) z%?E$Yc%whZCdO`Od2(UtYr_4N@BG1Cp25GV8vI-3dpsjv2D zg8ENq|Ht5?@`?Iq>VLJJ<9fP&SDVjduiC0M(1-DxMV3!bZNKI;OvRcrMZQ5H7a3oX zwlKaT)8X@XM#Lo`Z;x5H4{<_e4C|%_YY23(hE%hyjQdERO}i3EhmYBnlKhme8EltXGbDfa#79>R zOp8tpCCcpO{j@$^BiMGcMu=`Lb(%!qv|||T8%q0vImNcKKlflw(7Mz%N9w4iq>c2m z?DIOT^ISI~lAoe&yoS#1n55O%w&^;N#p}yb0gPW4-81%K zF}T5TYQ);law)0%jD4v7zYgoZ@!NrIr|LuX^JzZ!z{lG05yw2Zp_qP{+-*R+=OW*Z zgE;h#MaA=V4A4Hl$KN)e|2915QkwR_@3qSh=0F4P&vhaS9mYAd*9Q9k<%wwJ%TdG+ zMC`*F_#fKT2%DnLEA_Xr3}PS-DIZcfd){np*B$>KZ)2>%LGhY@lzD#y_R+6*cfRs= z@{Mz&FDtiU7yVghq(A#cb6@&7GbdpW{b)8F?bLqA*y({^X=MFKZyRIrW9vQ+{LZOj zE>bUhU+A@sllZZ9|GCINRXj%O8Gpd>I7vPH{(<8`e(Ztmr>ci~YEN5Rv>#4YR_{MM zF?;(Q!~z(Sh;HZnES+7N(C_?F`kpBa+`xf%FQb-wb&(;?B{onuRa)Vm|Z^EkpV;{Fsz!q(t7~706qG zyqmnA`optQL;$=C;JDceAax(-P{ zG$;8zyZTu6m+_UN+bDekJMdk5laxCw{c|+Q@7UGHvY+TL9Vyq&ex}p@h~&4oANV zUybY4xL)f04DU4u&pUiBfZkomy9;?Ydk6isxLylx-s1hj=$Uare^75d^43Fdnb+Xo zi|c!Fz1;f?6SIK)OTk|H#ru(WKk`<1hx`q=-hldU_5RZ6(GS$nKIm;k-bU!%<~5q{ zwPlVqCruoLz(>_`{f4g0vF0S>AIO{n{}J~f`z#C_sV8&mXp-NrOZ%IXoU_?yU_O7q zw@QCBCpl-O{mn_v(d@G?pEsZ<{c$wOxhm~%PO?9#XM6-X20hz$+t%!}F6^V8ZM$u2 z_8AxU(bw9x+qPz(Z8$ldn zy&Pk5(6-yQQ~S-f*S1eNM?bIMQk(T-C3xdVTqYmf@xh&y-kXTcV$ycUMAjsbmq6a_ z-eKRYFDqZ@RUvtMHz98m_;`o+wx5u++LXwBRmiJCxmDf~5!|hPNo#uK1zunPxsNZhDx=VlSoMk)IUpHbM zX8DSyFy3{yNR)6qMTzK5Yj#*LZ*H@4)pAaO*CwDeJEr&@a?`40(?s z?{4pR{^PiQ9M@~Tzt8&XM)U*qoCXhmEk2epu#GbCSMY=1y~x zb5;HI5!gsQnM2J<&Q)pu(In@l`fK(n{jl^$bCPpY+TWbyTvUI}KBS)XM{|;MQrdqs z$^KM-%|4_bwr#gaS@d{k3hsZ7=;b?X&H+?KD0WcG>pY_8H%a{<7_~?V_JHTtf_1`f81%di_O+;fno0 znCk%JSG0+<7e%^>*@CzhP9KS{#CQ~o|2!1F-#h9@B<)~KWW!>7E5G>9Lhb|JhlZ0~ zSTDYTCnqaJ=g>{i`Raj?yTSW+KOt$2DUl{#YQ<}z@PpnTOq?Xz#W*k~UaIIE`6|j+ z{5<4t^gi;NB<=krNW1x$A@?Egn9nkZmukMbo+usSfRo=OA#!g}#CJ*r*7~^Rb;?8XL z&X031%D70Jk6Z^B>%r8+xD$x=umJUNK5`vch-=O}&RfPzGIdffh@V%1_!peFi=fB( z#retD6s!;NQo|ZA^=8D!;QYK1dYp5dhm1+V`cj;Cu#q+`L;MWRN5+@1$AIxtO`YgV zjhCwS+x9XJg{c#L$2b(W{fr}{_A>qi>qETMFyl|y_A}lD>p;BJu*OS0nf;72k*x!< z8f^RN)9pUi`fOWm+Za=Vb=9JM8ZT9C&9N=uFyf_#`7FX1NGG+;uCI-4w1wsAGw8<$ z0-r2i>QJPcSSR%36Xm&l8OCqbZ$j=C?}B(l(ssv0ns}+J!Qrjmm*V_Qg>xnwmq>K> zE=T#*e;0DMc^AeLl2)4%Y2u~c1#Um$b&fMG5$$5!6ywK5r)~wx-}OI3?sl(Byh+kb zywtVe_M_g#ah5^6RPp1;=W{yO7tTq})rh3+iitGwQdfY(oUiobY%}7eiXRu9-CTQg z&L$+SCMD9uOT7);=DejJXPXf(Rs6W<)N<|7`P(FECSL08;5z3q{kV+556ze`#vL)@ zr5fMtNSO||gMOOrK)h7rrz@m^AXI&(d!CXO>62 zRO6q|)^7S~wj1$M!zNy82kmCORJIrKQpLaT-E!bojM>`%9ddVgUE_8hCxsQ2a@$Yz9PGhFt3vPei>uLNp@5i~NF;iEf+`a!N#^2kvvu?GOzMu9XW~%Z1w(a!iYAbzzwm$e*+jiSlt^?Wnz%$!+=Gpdg4Pc#! znVMY#)PCDu?kia*Vy0%-0JT5IUc0_v4N&`Sd$lgxZmtcstz0L@;GX7re}z2L^O<5J z`1y=CARdvlgE5f}t1w0zKMA=zy@Bxp@Ngo^nr9KLB^9D`Xd}w|1%=^fy+QGWq&22Q z9=;Rh5h%p1_6D1^g!5YW1ZmMZ;-mbg^9sY?@XF&&lJ@>;luHyAy3ct-%^E_Rxfft; zI|n*^uID(e2j@74Ba*g1CNj0HQlccmGlu@`#9IRHn4ugOf_qRiTfDTcOtNZ^G;)?nz}hhV7uKe+F`eYdjeB0`kZ@$ zw(V-$4z3lf8!=POTG6&$ZQH>$gY_e3s#!DIwySMBxOQaghhJv@YP+~5*lp2z+qS7~ z8@P6`PQ*+#Ye(BQ={bJews0-sJrFb1tR-#R*0wE-PnW$9Y;W5(yDeHT*AhMFpN@N( zy~gA6z6xWeuEqFo`%0nfAgvqURgt`!bx7ZSNx*T;4Xz_|P@c~~4r8W@{@(S_+tH&C z|Muve?>yG4(2Ys?T1wQn?_Q)oerch5ALNsL3r);a&hPBrMD!(QD(39{(0lr_LbpQ7 z?|%Sz=M{zS2FU6mg(hYy+rhnwiJ1yLzU#u6seC5m`jbE!et+DRlDrh3xj3Iqe_?KL zZDAdVnOb6Eri%Vm(~(^9^M67}un`o_MZe_-EmjbWc4W~y0ZnnYh>abnKW zU$AeOj`I8HH`qt)bM^_>8ukTZrb>KnDWB68N6PMsNM5_PaJ`XnEicjah8QtZO^j}q z!@b+JEhhEVP@b%9TzA+u#7vcc_d)dAwXH(Rw`&{MAGQrKQ%%h7gp|)|8`E&_Q;rkY zATv&Ai;0;k`t911mbyxl8KiN2OO=4dlW~y0}9>n-PcU9pj zW2Um*h?#2Qc%La|su?TT)zO%#tlP{%`1Fp%OlAE>AM4TSVy1GwbAIyK-`s=q3%>nK zF;h+b!J1^p-fSCdlj{-dMa)za!@F(l&9<>NxgN1@#7s3YyxYd!Y#Vb^?MBR0^Y1*g zjlan=Ep4O5Of~hR?>ibZmG&cMs)^-&rkJU$8!=N&9B+*;+P00_Cay`@y5ZL~zG&Mv zYMZ!DW$OnwHO45@?KavmQ&~S^rka@EXNsB1x)C$g#PM!Bj%~+{Yf`pu*nX-uYMZp~ zwr%4&rR~%9{J)Hu8pd}YVZ<+Cd_l%cEdef3jCiFzfqN=OywYC4y%Zx}X#_Z;81YJb z1NT;pc%^-S`zS`d(o2Ca1MT%j=#lROUh7FehU!oW` zbOr9J7&deR?xq+vVBvnNyJFbT1GopU#ui|_(tO$Q>+Rfsk3XDG8#LAgW2Q#YH&Gq0 zOADM*;8MkiS6T*KrWo-``vUh>jCiH}fcq&%ywd)_{S_l#=>XsXiV?5$GT_UAvwVOJ zhLbv8u)%OrF>ElLR16yoCl$j6!%4-k!EjPBY%rWu3>yq5fwO$bqYYU;c=5xAjWr~j`t@a=W^i76(0{d zgMbGq&V-yRfUi(oQ0NQ>9;~>q(76)$O2tKmPC0P7;&Td}A;3d`v+9*qlBN8*EM=hYdETkHZG#bT%GS9&O0RW6Glq665P5f2QHgu^?t@ zOAs^lL=ZF8iO&f*HP=WtE-_P!<2f-?(T<@QUEK z#$$BMF`icgU#%G9ITm=VVvOfF;BksEo-yEWLs` zsuRb#3%C}F<6IxojZ4hb;&@KXRB&n{#(1KRImYu^;A<6QJSPE9QjGDO3_Mve#&Zhr z6vY_NslZbeV?3t;Pg9KX^ng8Jn^QKg$tj!HvaqjycBj zI^gRRV?1X7&rpoa>0z5%5$uiD+DuU zD$lW6t`y9esXWJO86uc5Q+bZnGE6XIrt%!C<*L>gnfbIq&n@z4gPvnCW@;3D<4%>n zV$4+T`CU&iW2SP??_MXEF;ltccdr-Bn5o?JyR!r{W-9mm?hS$&GnIROcdlT@Oy!>6 zo!`m_#tJ1TTLMlpW-2+^5^$0+Q_0DefRl`wN=~)}oMg;YaS8`|j5TlT+O9yH5xvr?}sDpAt+?alh|ABbc1xe&2mIms2^s#+c{u z8aRj7z&X4I&fzt14zGc8cnz$)wmFqA8*EPH%Le6CI`C7k8hMuISQe+IbBwRrvE1(qCZ~9g<^HK)a*F3z?hgc$Q#{9Ve#9*BJ90 zUIXXw8aRj7z&X4I&fzt14zGcg*EXl}WrNMBeA%F}QdOF`A+I3TK=8fzh%p;Cnph#P zywJ?6@3kou}we1duDa7AikaG9pU|ZjsLKS_wPIF4@FEz zx%YYBE0!KB;(f2WyK}L|yQ_W$@e2@>BLO}97FD+%`CTqsgm;mp9ZbjjocmLfpS~CA zY!~mtc4%ym6#9krP!IYCd4IN#_hoz1Qoex_`G;r++sF2@JsRty27SqTnnb^m_FQu1 zn?;Q^UNH4g?_}+ZN%;oKleJG{fYiM7Gl@MS`t90RA?4e(kL5YmwJ%8wkc5yAPQh&Slu)LW&=vR%IN-m76 zyttU$pZrAkVmmG#d4K)$<;CRv{2vTAvHHw-qdzrfD!I`6TH${G`F|)fF;nq!5##2O zhgU8a{*Sz`b8$9iD*AzO8hG#Vo8{h9pY2}E`^|b$?ziwK!;L=Ye+}A8ectQRxgQp> z-XZrKGd#ZLnTt)i8-84F^x3{zw2v_nI3KRM`wvEcT&37C`H9ZOyzlkTUtG-h*x8;s zw1@iSeYau4``(k^E+XIi&lSF3dFzKJrm2Z{1p66Rg!PZSN9rHHRqCJmjMP8tyB8O0 z91@Nf^;v(9Yu_~WUbg%_<1a>U{)6!k-iw`$zK##$jGW2-m%&dR59*)EevMB;9y0F8 zne5j%CA8nfi#mh-8n2`V{m&RAXR=>omeivEsedN>weQblw^=h_^O^cv`&#>uK8)Wi zniwd*YQN?*{0eK%5%~s%Tx48o+QK;0Ooz|k9g%#AnffcNF_OM_M~~wJW0F@(3Eh2+ zBdPV!&m8#`))djHV;--ex1ZM!Atvd@g;vbeUx9;ML!g5-q?&DIylnbx+QoGEm|ZE! zZ;hFXc9}Ip@^@E#;a>)(MW==mW%lxZTA!{FY`a+_M7NeYO`>nwaS!ZUPy2#7)rhvU zKN&+x>r&essiT^bHqy_s&+D+BaovbWeu}p78algUl2&8ert3r+>le#WNA0f={aQ+- z<=Cj}LIa)wOiVw~J!2ncV?tr=H!=LA?lbnG`u{qt`^IkvwwTujt*ht^rE^aDpqfg7mM-4c_KA|torh{*s#|^k=#!g2YIIkO7KhjSbHx>8kP~24B z7e1a*}%Z4FvrgIntrHsj#Q5E!q#KvO)XfQ^iev7hG(S z=j||JeuWVatS4koNT$P29gIkRLj>s&r1ydBgV#2Y1XQ{P40RM9!o4|)BN*V7y0GY$Ih4?x-g+@qH_R^q0<`#Xu7ihMp> zcR_xaH=2_dLtYG-Ksx-v{*>gWyCS_S(z`=;f5VEK`tHr*Cq@6Dw7EIi#p~y%rECKw z>N+I-(44%;8<35=mqLFTe<`|+(kHOP8|XJlxx>;wN0XO$gR*h=YS2$6?w*uuXTKw5 z_eUhZz5V)3fPS;%?^VO^8{aDBbJ`P=x*90kwnu$z3jUwvD?~S^JxoKnoc5@Xt-(5A ze5{mf*PeuwJ>__)&#lGUXnd~d=h&Z?x=%VD>U*oPUK-ykx;geYNu4?Ns}D}`IRHL( zAowv5+>3ZIe-N$*;kvgsAsdIUhR+7j8;rcc$m`=x^viKw4sKrRP0GgMt3`iMZz%GH zLNDq~@rUDjIIc^*X(pB#{Xh!#(l3rc-U#HCdDHz-xE_W2`g${ro_RK=eb5_&yfM)0 z=gl;6_+*YXC%bxsO{_4^!CJ22(3LsXob2Y6`!c7%e{X0u4qqK?q@K*JqsboLa9`Tr zoaCIq8Z}yp%yoauBziqF1 zmW6$`-TJ+?ZLfKjg?+Z&ww=be!Ylh?^>TdnX`o0{D2DH_uN{9BEE+&P60`S4?C| z=16mrbB@oSW*z+i+{CkIh3M>-xpOqhxtNf&nv}@BGIyGjoQo{4{loHU(W#X_YEE*F zHc8q(>6_*x=Vtaf8U17Yx9D`-?!a$1qMvNPU59nV_+6>5!|hPNo#uK1zunPxsNZhD z`ayr|oMk)IUpHbMX8DSyFy3{yNR)6qK6=K1~BRukgO=PsjCiaBHwv zk@eRN=ojkEK;8`GUFqHI&&2giT$g*xvi`ae{Xo6h$eWG&hIlLdIk=t!y`kQ1S$~~| zz4XKLkT(x`!@S%51-M>->*3z2tiNuceb8HkyhW(*DsQ#%*D{xmCXH_oo@pClBmJ<< zrRF4kyUd;DB5jKH>Lf}NzO&}*X%>; zNq;maIVYw4N0aPN_1Ek}`eEC4+gA10>_h6=w%fLc3VBvE|eJX|}Ich|qU)%|z;e0R*q3j-Mm)ezhxG4JilMEMUZm*cEZ z{~d?Z{TmM5!!Pda9**veR#YTPmoD9ualTW}d-N%Lac7^h*7R_uY$t8%jr87DdL&i0 zvkY~VS#?x82kR5gYxw&`ea87^J&GkFm3<%Wo9VmL8I1QzD`9gcx)U$!Inln+N?cbu z7xFvX*Xk=!W@%>WqnS*mU~*>iqbPe~>Ey{f5>5l^`xXBF0)MaL?-2gpz~5iu@7MKH zGgDErX<4Fc*>@_-Qe}^nCCau_pXI!aXUdg5 z9<7Q_o3;aTbJX*8Fg?gO^-a%AH+s`k(_zz#^2JD%>`P(LLz z1?l5XK%#7{KyZeVbrtLHF$*itKk!Boc<5|D@al|2J)bGasWNcK{*8v`2KHI9`8QVB zcfr}PmhDg(oH6h`LCfgB!(#7aPsbaz-9em;U5=BY#NII*GbSQeL*HJ=$ zzxnq#aLx0!1SIx6*%%-x!MoFvpN;9!B)FCm`uiBq!I89TO4bn(oT3EY9TS|5aZ(|; zmXdYQhb1KKQ`=$s1b%?Apftv^9aGtkoy_>L8Y|h3iEPJ0W^5~sZEMG~wPV-vUK+3V zRPkx;c(iTf&oT~<#woVr65H{H8LLQRB-=51?YOXvRdupBuy$-!-s@zsR9XK?W2YkB zjzjx-v6WoQyM?kjuT7HYFt=5{wq19abN9tovcS=blUMaS&zoxwBv2E z9*whU$JC_0#?rK7X3}@qF=g538bj8O6-$0;Y*;%6EIF&OU+wshtmkC$9$C*x<2xe# zWbquSf3jGa^jDupyUFX%qut7J@=N1n+Ho;AB3NSo|9g5sy>6owgRs{3yX!)J<>cDmSTc5K16u>{-33~U=C(D2@h z1=uzQ;K|~3oi0Atnc{JsDgIX5xLa*wZgDRDcWN)^>3^s8I@Y@xeDBS{J5Jckr75vD z%f9Ou5q!v;i@)+hubAj%-{q?ie1wv}ADZ*=S8#rY=d%RT_?sam_F38Yxl)1;GXCaA z<~Q2k8U3{AXW!Rq65L3M@`v>-n(zCWGhxZEb0k0e{#!)w!C4qnbKZ~kQ9ma7+4tKj z1UFKe`k}Aw%s@XO`KPphR`8p6E!s={bWlGf>O0gVIBP%8a%oTXOPhM}K71dK{l)j` z*bjU!Za&U>&(%tFr;^c-U1l#Ls6-T}h98s^LZ(wrFxu89cV%XaYH7`}S~z3jU^ zeD4Ifj*@2wY$xA|F+4+>IYWR%zh~dkNeSMqJmdR7)K80k_WhhD!L^hqzfXBqhu@P7 z&m=$leoaJhiW2&}m1pT+1w51dnwaG8tq`1jcZT}fPQx?FuTF@5JNuPqY_H*&=%3Pl zB6-GEC-Gd>Fv zW1IaQJter+X9e#?q(wjbTX~bJ@!`yWW# zj^H;_eaF1+gJ7PVL|5O_=5LE;zF{obCjRDU<{P-8eZ=2n%zO)U`I`!L`Fn}>5r6xl z4u9J+^DWTf?^4=Fr>jT%=yW>TN2k-FPcQB0IrO32GiCMv{J!9%XV(dw{eOlt=tD)R z&*z!B6RdxbX8jXf6A`>OCb*Uo`THsa*HHq?nK`b_dIt#~uq!1vMG3q+Ex3jfcyE*7 zT1sF!Gnce#N78mh1g9ujM@(=HCF`gVTuaG1_`IBuv}#JEowXghkK|r3e03;1!n?~4 zBmQ)FbjTg)t@TTA-E&;X9p$a}d3F-HHWVK1-Rt*6dgPjrJI1@;?}h6=lSA&+-Ugp% zJkjey;j!LEKZ5kA7jnmWzTX?yW!Hz?n3wQ*uYUM9AjW%}{60wUcSFdX;8mIW2jJg; zxW?P!UyAer^F!`LZ=0!eAmZL%>uoo`D{~*$LNzkPRsUYE;4E`RI32-g>15pwxk zH_t@64hx0(+xEps?>Z#p^0)0vaNYf?kjvk)c_zR%^SA7-NN1b*+jTcwv(4-a-YbHC z(}8`_9qDW{`-1iN!5Iqsq6gC1e)a|Hj3TZ*`-1zo&x@_((&xoiMv`%JX*meUV;%Ysj7M zRhxQ-qMqx#=lp(1A9_c~o#E{=^$bTn*L&ag`y+k$nvgrwOPP8`pq^RY3;qD4k60IS zXM4Mi{wV0*;C;ux4C$lp3%UIF>G|x-`Nes`Il?|K1HU*&IM;NJkYD}4FU}FpHJu~m z*8uQ~bAz8*h5F^tXTNX`YQK=n zpGUjN>(8Uz%5nPBG2qu+Z;w9^<1yyJkUP)Y>yJVD*ve3NzW1_!Inu|*LoWY4`LRf! z@JJ}ke?NW@(qj*YT>kseBX1{wx2$_2>SkS&R>HSKPPr578ss$8 z&3Q5%^Mv{{0{xkR{%q*e7jUj}?z68s*V(r$mpwBN>ZPsxH+b~S{Ew+koHu`LtmS!d?|UE0fe$}{u- zF736?3{oR;{uB+)pCGX}I~Wn%KnX18Pa|>uBn)4^oKc)TI^QZp7`BQ&6FRAGt zoImw%JzJ^nFXtwyK({8)t%a_ht)v2-nn0&E(5Zuto~@j&9z9z*osOQZoKC01XXaDE zvnj!u0VMdcJ0iG-5?Id6r-EmaUQ3DeeHDW1D1qh7d}_cmq?t1WNQ~pIl;9L4aO;`* z)PQG_Uz3*ny-k8^DWTtbWcqaMP3CV9~zw)dR?KM0T{ZrbnJgWom%^4o}Y|ik&Q*(xg`IbGy!(1|Fc!6$1 zpxX#t2#E5r_;&tSqs6XRe{ff1Sbwg1UFCui_cmJ zE=hVLCDIR92tGmyEIw;tz$K&^p9P7r+Mg1frUY*FSqlR$Nq$3G@((o$Zlr{MtIt{( za7ps(9Le7w5uB!E{mP{j+Gn^V`3*72KU5*Ok(luDXLKuavemtPIYtS4!KCthCO|R|fOsB)Zu%bBvjpZx{=70kQITv%Qvf>04J++&Sqg<4uw_gQ zx=1cln5(vS>iuFAI`d-`K1Yl~P3=?*u^*k7z#qZ+mDh~3Wrl}JXQaPuAwGcYMwOR$ zZtHY(@_3fAs=y=r+kyV7s;9oszCk-1k-tSe#ij?wQ`~c`@NvP1TO4r+mQrigB0hoL z;kC7hUD%BFwjggs?nd5*yd8N5^3JUjtO<5ZL!DJ;$235um<0tf2M>*>AbX0JsJH5E zKRceHuFi?4sIOm1^+J#K+)BiX*mjZ6BP&PRyncb0n(bRO@;ls_oG#Qg*AgHHVa zYk9Ydtd;o-U05$f!+If1gD^ z*27p^#+WvxQLP+`M=5Z7o9ntF_DVosh$bI{bYT%&y- z)r(xaPWT7c#GE|MB>4u{?hxMQ+EIM}Kf05%W5DCzb9Nkf@<+~208jtS*-2pU9?nhy z&;8EXY2bweoSgw)I>K2m@X86!&I12D!`V6D-{(0y54?7nvkSoMe{psZc=I35E&)xf zF?Shw$D%beowYtb1$XY5*Os_vT}QsTgt&8qI}@Cl;OsW=Ht_X^<=r>H->qG}MHCB5 z`*NCJb4|?i(+-lnk!$pug}H^!4=DIGzvDK)rr_5U{F;JavxnRKnu1?b@M{Ww%~Raw z*A)Dk;CC<1WAA#-_5t^A;A}te;3m!v01tO_b`VJX9s&}-hk?ZJ5g_qP{1d;#Kk-Za z6Tiei@k{&@zr;WBOZ*eR#6R)7+WsF^anIxOw{G=a{=QuIjKA}EkEOL{9+$sc=kM}o zVGs8I`~7zX??Q~ddtl8`#yo^4>2n>}%h(*k3yfX8;@EWUb!@s0IyPPKA)7Av+`Rf| z8w-0VGqQh3J9Rjo8y~wCuO0G1x4(|;EB1bdcNK*l-azk_0sp;mriJtlhdp772#XX} zOo%;T2N7cLR~I4neC34L>$QY%sKQDKhof=vIYRNQ`7wwr5};*B8O`62Wtz*UU_QvM z_0HT2OwYqHu6>sO;~XGZ)k(z;lnPN3SRinv(3c1~C-CYWD?`O8La&PYHX#eK59(1W zz@1zPR2AjcbAy?J1c?TNqTp2?i7O*rLkdPHK@tWRb#$#8`|?F|L~`m|iwgGml9`m@ zi|N_qJU!j)%eADNV@V_JYtO`@t!=(kONHO>OQe!|H1SS6nh1<%USCI2j~m$zr@9Gs z(RL%7%d{HlMiP?oK(H!`ydW4JADypf+l;iggH3Bmw;I_RPs8*{o@vu2d4eIY-|P3h zl+5MYJ8FEs=9XM?UXwSLN%`KGIWgsHYnz{Xr9GROYs7M1C_SO-SXEQ3A{^4g6`|@% zBOC}M;(BGQ$qx+Zp>SoQYJ5diaNKJRZ%&*tc~VV0P#FvBy59&HdaybcH!A$WM7Syu ztTaN^P2>Hc3L}xwgOy{6n1?0%`nYfrqBfy@rX&sHMGPD4^h?!{hEnMAAg4Mawcd0l zXE1MkVY*{sig}yU^SnvDBgw>Hs?TMaH=Bv;Ih}cP#)6z}t*2UIs5aWNnP|HnkK-z# z-e{WF9Bpe!Q;U7&sNDisOc`l7qo17K1WcmdDLKfZZ_fzpEjc4w5wfrE<6X$fPwMAw zjkD;a*)tlZze-yHKb1uMgMB=^MUn-HeFM_b%kd=5$(P2cG{;bKlE5iKq;X4qNwpl$ zLr{0kvNFAYn7@_v;Vfi6Eq@&%NA$mHKgvm6KV!4@WMUK${Vp(0;82Rq5}Rqq(Ki)#7Y zl>E6xeI1bMq<_2cZx{X_3Ofk{y2l#Z&hbmVMCm{9MjhxsTlo8>qrml||G5L|rN6|M z2l&4baV#)ryI+!i60^|uUAsKXZNh(!sH6Lo>?D8k{#}GR$(=B@!PDUMKtE>WC)uy8 TTZY2@{J#?Z?@2>}1O5L6YF1l# diff --git a/hsa/gfx1250/fmha_fwd_bf16_varlen/fmha_bf16_pertokenBf16_hd64_128x256_varlen.co b/hsa/gfx1250/fmha_fwd_bf16_varlen/fmha_bf16_pertokenBf16_hd64_128x256_varlen.co index 4a4d33430ccae1db2292dddfdbc44c80e40e8ff9..efb7b401d1b96a7f14044b8ab4a7e3a7c044a329 100755 GIT binary patch delta 648 zcmZ3mon^rlrU@F%3dy4n8^SEFj@e@Wdu@eAm(BSpW!enT>#}W@Io0-eM~?u zG5VnTC;KxBDiOA0a}(nyE=H-zr99!AzwlgPJITB_ zN0@u_4(Ay>Osqnig96@(Ztm$;;A3Ls+`Mk)Ez!+au4wQH{C~S(zfzM*3pXRf7obZ$ zCL27q5GZ9>w7-Gj(mqMm&5e=_H}-Ab^LT?E)1h12^Mn{LF-~s~VVtCG4phy|D9^;t zDhCQ028Mr4{S#CKlg~c4*;T+_ZhKx&zc?c?CAFwnfARq~EndB12L1B=#N^cJ zj-rezLOz*!*{MZV3XZ-m3O+tA3PwiL*NHOfM_DE&Cz%)%V21&quL4K0n0OwBEgOij$q(kv6rEiIFi zQ_WH=Qj=3s5|d2QQZ*+pWR#sAAjZhS42-_*(PE6M%uEb3r!z`2%1vJ&$tb`LkJZHt z(|1cUn#vYHqqzVY?Gt#wo`n$86{Q%3rz=P?N-zWEr&~)g+H!7&DomZ;D8*>bEU^S= d5Kx?X2axd_#E@7DR4ENqxnwEClxmRu@&Fv+mAC)^ literal 72608 zcmeHw3wTx4nf5;WtOUHYA|eJjK){o55km;!ehbKbBO-SYO#+0AT!o;BxX%fOi$JX+ z#?;cD)M6Vg?W9vW#nvX;*0xsd#VKApZD*#_NvCsR`i~vO!1>>It^J*>!$m-CkNQlW z=j7Y(+UxSIwchpZz1H6A+p!7LCc2?eiE_+e*!is!3XUmCg5$cYOz4y>h7EPv;(t5m zlTJ3yGp)osRX!bLCbBK!@{hq%rVcpeU&%!#KqLy&e@q7z9Oq&U9j9};j9+XD6ltfU zuJI1R`QmiBKFB71NpKL9=K`KjI!Q0}oB1!5OIg43EzfA{OirIo7WJ*67|o>EPIPpPI|NUHZbz-E|2-eu@MS+IP=x=rg#H&!fPQ?a(P zblJ+y6-!Gw-hWC{{dd}oz<#76l&@a7d~Mrro)*p|^N$p)Dc@YWY;}2M=^g7ftO~Nm znHLqT$lCHX6|W60T{wMEIgHMR;-Y11R+N`6UN&HG>H3NdmFrejtQ|+NV(H-G(g8(7 zHx~^W40dc-U9on;cM8^TSb2MSWkpJnR!yC+6l`3+enaV!b(_{!-umT&jd!kDyl(Yt z!`@p-1*?`GDcH1j<+61f*63Q4wpJtbje<=ZD>jxcy%Tj=xukSsW%-g--vf73#iOEs zQ*cN5?G?*5tV46MRdSy!Xl;!38`jbIuG>($X5G??&{qRvU5O^HsQfn2@+E7ph2RfX z{Mu`mFWWqzXi%Y8_1h~pY+Si+?Y@v)B4GMY`{VVfeHd2E23PVgK)-| zpH9*amlW4g40|E+lRNr&bN(2{F@OAH**ur__ug9Dd2h+kNxE=-H{r||MJ|_v)8(C;amPFvOS^avlpkc&#<4WVRcPcoSdRVH&E?1Uk^G1(K?f$aTYJKR9^-qyJ~ z`)cQZP`%=vfxKedCz|5UiN=KE4Bq5?p;0-%ZOn51v9X79u+eqC(9{!QQRmUd&TZpQ zvac_{RQd7C4_3MJl#jcjrleBsP9|eFxR)tE?DTsjs$6j6+E?5*uH&{*Q(YYQ)M5Qz zaj$lqp4k^qt5R*JUVd@S*2^!>p3XeniW$W+|!+^t(Sy_)F-*{8|WTYmJ z^4ll7Yx!REXm9D!{(|)wsrBfV?5fM}mh7gbG@#vojdtGN$T(x29ZhlfSnMsQ{FO3{ zD9Uoz$ZU*0;x^evwry+VMYu<1xO-{HxdeIgIQ&Tyc5yo+FKHigY<;3t2jkHm+tHM9 z=&++HH=f;;bbj7A;)x?;lFp&Vc15#qPdZ<1to`<{P9~kNfe)8nnsolAQDr%yq|?yo zILYJ;ceLNm9qC6=W+*%Uq#pm14mQ5DQ;}Q8wQLv2+&~rvvLui*0$CQw^?{5B^8P^9 z1+pQKPY3en&T+OG>m2Q9>YA5BO-#KyM;@!`9EsLM%gW+;<>lLw&X*cdX{TrYV>LbV zTf-yCeBF;d5Z=QI&#lX^$wwafRvuN(50J;R`2Dak>HG+7;ly*RdhO|z>{a6whMZjV zZ!{ULaoVHrd!e_J(JJSD93xE`(v&C5_4u5WoU{jNPnJ)bv^Vbj82LSi-%s#+9={jx zdlA2%;`g)0Dak2%3{Oo?MVe$1crS`|rpBl4?GfmnTbEappO+uc$^TMSeqH{(`SJYS zjL)&}B|V)k|Ndxobn4W-klUl4x0m6;d7a<1`THQ40^jKpAI^-qube!qQ>H6IC zyy<)QxBqeDNw z>$(2ZaU-SAx@EoKU&OHiF13$?ORqY|wQLULTY+pFIxcAiP6h#K}7vnZo(+j;0Pgu7VYUp86R1@;doF=u6xI=cN2wsMm6f@|5?@w_%|QXISyT)UB63&Oi}L%eRl zZQI84F1qOSctQ9Tw?y|ubEA8sXy3kQCzefK^CFy#=hbSNAFXvTDU5bHc(02#aK?t5 zWTzwd@*+Llnet2bHfZi%>?_!6cNDMRc}HE(aq@W%^^d#RzRPnX94B$vuBQAde@}MF zRsYF4(f$tyZW~|}gLEew)6N6i=br(Vwu(D1fzyBASl4xSOa z7r#dQn(%uXzi06K9)90%9F-iUc|AHgTJsuMkAczg(R;@v$LMpAI;ixG*>w3UKzB?8hK&Pu{x?IM+Dt_TIo#iCwHza#5BH04d{A;6Q?~Z=YmJ?pSJBq5qhH==)P3yy zZ6nNAvMx8TrY<+Xrc>TGsygK#uIjYw=IuA{y1BZ?&PCOoat>B4iY}^JFrsobK{)KkGpHNmyp3b2 zpGfBG2KJ}g$u9aF#+w`1pfHZZNb|Wi!#$Ss5XRZ(0z1NXOJQ)GWe`8a^S}yY+kFOM z&kv7X=Ti1fwQZ`*%R;bo*+utcXBYB5>p)O;9kjd$?LfWUFybgxcpz)2cL4bxVE!0W zo#Ys6Z`AETxOc2mxTm(u`{f+;xE#lw0-dNM&+QK|&s`oIX9xQ3e|~7}^5=_u>b(E? z-Z<{5dE=lA+OA6xZ+_Lms^~6UgXYexiO!o<6Wz72I=U;bdLZ~voOie?KKJFS_`FxD z;y1lf6)*bRs@dSnz<6|b-psssG8)x(EIV&_yiIb%Hg-t#{?XNQ5T}@NX0^s?n>R6j zQF2lcXKFQVWpCJNvCl(CZrwV2__ueCh>zGdI@wr-tQ?I0-4(oMM|sVDvhh@#{;p{z zr>7ZMG_Y#s+?iD~<1=@kPaQ(e^G!XRx~B8h>8$bIrydG=wAQ=jjR>YlxsH~X3h#N} z#_x*P{C+V5*=NzYw`9dBnF|xkQ$U}32JRWkKTE>Aqz&Ma~ zj01V_i8)5UcZ-vJ;@ok@ByoQ-{OMSH_z&>=F@7)L_jCMyiQlXE{VEn4!#E8%K8)WH z{GP%uJxx>W+;Ngk=Z@3bMn7zdIE!DXyWH*UwAby_kZa`O5+je48TnKQ?G;7bTEv|b zLY)zgcByB4$U}?|*}(XahwIUH9hi5>?fgXjx#K^McBC$~h@X7?-0>CKl=16B8zNcC z=vEin7dez=%B>I0jC}QwiBpF-GrwxWssF5*L0LFHJ}U@Ex$hir)nV>CqW{duyt%FU zoSfGho;=waPP@?SeC@*O^f7);Tf!NiWk)X5Cb$@jxv&Y8XC3dS=yb+-?^BOfdtk=6 z!~%5QqF@IQ1w0bXfuaba=o&(SiIk z`tQ^HlYGcO2`B#~oNehqr;#W_kLSU`csBBIiIGRjjMU?KJmRLtGs4ks^|?kKVtn8R z#)s77nRQH!=X1nods3Iwcs@sbwkPA)hyJA7le*P~+IQu6*8PsQWqa!SAr9M<;WD0e zSwVa1@Sr_K2ezlt|0Ug?k`LQc!r7h@F6}uDWsslb-)!oK<9gUX>PK!p4%@~!NLGouEOu6m5(k4khtb>HJ z4iYYQg>%R`6MwjKQhd_3smY({dlbz3cMsw{N^jbtXHUCFdDk>E($f?d6<5ugH>+w^ zeAe#sxrdVL&%#Bj>4uAVFJB56mS;4(_(rwFozT^b7`w8AR0!>#I9A5~GAqP+eduW9qJ5|_=(cN5$u%jiWv<@jg^a!C3LPw+0Usq!$ z2b?E34>LaCe8Ig!PA}kIg8PIpY(htS3%(}g^a1W8_}Y-u7r3wB{vqcY;A?3>`El1w#kTNx{%Tb5byL(3}(u9W*C_Q+&vv4k0K4AD(vK z!rJ0-e=O!8Nw>uxr{ziiIxU^pcrD%71T9r;qL$gQNs!KEfqi)!D_!W{e$wCX;AsW$ zwSrH$P62R%;H2yH2ktL86m|-M3k8S6&H&&6g0sR-5pa>ws)7!Alk!c;izb*4RgulO|3D+ne zmq&2vxPPOTE&fa`Px`a8bYio$bYpY0RI#~QX2<41I^kAM6{C-frO(lygMbGKMt=?l z9xNFBIRto!VD#ru;Gu%ipTmHM2}XYo2OcgM{W$`71hCC1o7d!&&1-VX<~2EG^O~Hp zc}-5)ye6k?UXxS8YnxM<(!u6brgRWa#WhEc`?z`2c8Uv9N%y3GkCslXT1z*!T}u_) zp=EY#C#21(k?7-*(&y;UQNW`Fqd!Ljj~0yn90NQ?F#59uxI{4eb1d*!!RXI%z~cm? zKd%G64%p_D&1-VX<~2EG^O~Hpc}-5)ye6k?UXxQcugNLlwauwa>0omzQ#uHzu!O7a z)B{?!`1`ax=|8BY6Wgz)8^in$@l>ov%k0?0kT$2rqmRc+pQAq~08bE%{+tLrQ84;* z67VF!=+DW(lLezcrvOh8jQ*SoJXJ9I(*yQ^ZBE&|CZ}v(lT$XY$tj!H|9vf;*blUHW6x@-Vn5U}JN6?; zn^V)!$J3ZtSR*D)zFL*|Aq3ZBETbAJ3LPM}N)%o+B9jITv`YVD#ra;CX`4 zpYws|3r2q~0A3&%{kaf$pc;N%0qtV@G>ppQAvRVz)OlT)1U zt5pV*Q=IRsH3pMYobRi329r~q@2lGkCZ{;xR~ro`r#RnNn+ztWINw)yq;skt!-Uu9 z^E6%qr|}v%jn}|wyarC=HE2q?5 z^L=%f!Q>R@`|56k$tlkFRm@;=it~L{WiUC#`M$cxU~-D{eYM?Sa*FePwKJVlX}m_C zr|}v%jn}|wyarC=HEeB|3Q=IRs2Mi{sINw(f8ca@czONoKn4IE#Up<`8sWe`r&(nAf zoW^V5G+qOz@ftXd*T8AK1{Pl1oXV6AHm5SBgK+BUz)roVbu5choad^~m_8?`INw*F zHJF^@d|!ReU~-D{eU&hnoZ@_6eZgRIit~N-MT5yH&iB=029r~q@2fATb1IG3=<_sQ z1E=vCIE~l9X}ktb<27&^uYrZvHm5SBgUzW-=^&hXU+Y*Fr)IN{A2)qYPR$|yioxU* z*Rj-B4JN1N5&wOz9x+Oxbd))A!}bh%M@F$f)-SHdgk1x!+CS zR@IQjt6o>zAxGYIT3iMy=Pai0Uw%A` z>4)6bKAY*sRCUZ|`biIT&Sv@>zEG6S^z*(xIGgEvOfc#D%rof+RG9R`?l9@cZa3+t zJZ#e6_?St*;9DmB)pP!&%N@AscoxgM?xBvloLS#2%H})Ul?dx_%|f*al6Acul67U- ztSfbAU8y_kO5Isk=Eb@){Z#{A%3}KNV_(i<`d&A_uIt{v+~_{!HlzEPDx>?P2aN7F ze4%4D({~zV()XBT()XET(hpc}(hu8Y(vRI~(ocEFq`&dYCViI)FX?ixsd!zt^@#0% z)@|%P);^o>oPXTw$am&H?se4T67RHwdm*rB5T{yT1^5vF(9wkli42A#)*nLiT*I-GL}%6gc7y_49F@kK?Z1 zaKAT>d*is9H`4EmTVuVM%$^Kv*55{qxH`O1C_aAh<&)yJFZm*BVr$9=uo{x}?u1MjZ!=KAAtJRafwy!rk_98bjYwcbL1 zGL9$XxWHTFPsQ<6#P9Fj;!nfzG=vv=W&U&=Psi~9Z?QiE$1`wT+F5A>G# zvvE8d;lJhbHs5%uYGIb zO$)0Q%vViciGIP_2P*r%jBJZNON+4oMekR#{2hc)$E678-X4d04HufdOn5r^LS&R1 z8S1U~b4}Rc5+jd9oHegRM!0I2x52MRdhWg9UL^44Oik^$NsLO?aD%W)(iU-yUxMBChCPahg(oa&LB5Jn`ZCN>4>yB16@V? zb_i+p{v+%sdg4%P*|vVCmTl|xRrH%~Yv}$2_8m#twtjj)&}Or@$JS{k(z0y!M-qG2 zoS@$4TlOs6=jnE3dhEA6Q`vTT#}Qwb$Ns~f%x-9h-0NI5(%bBJ$8pa|t{UZS!Tn!r ze>vD2m5p|cUhhUmdt3cn#EW{a8squ?+!P;Xc6G3Cst3;H-{7heFAg6SeO~CDtwH_jc)P%vHdh zhq#K@BHrstyU#(rP9h(2nAaxqKL>HiTV9_!;h6j+zuC6AZ-yh}b7zFJ{^Tv&sps2a z&DSmnXIrpcc#W|6V(Va+`9YOOJMh;mk8_39;Go_sdWx;6Z-}!{n}Pg zAHDA{uoZ0Q=!tNI+@!7GdF&50wgUNLtk=>`k(ZjIChP$9!nr#3c|Q7_oTaT`y|Aa$ z*a{QpAZ?g%*R~bpMQ`weJf^K+ov;Vh*ovTRTR*l5%1o7Q>qT3k+ZwuSTM?9P>o+LQ zJ;~2D+WOE=uw3?qwi6dvF71SFQ?V0gC|C9x+U3$-=yK7A+=ED)!SPy%@jAuZ=gXeN ze6(HR5?4+2_G{ZA{YQTfVn2$&8PBWn#YY7FKlDPnqUEld=GAIjV)(MJtN4YWO(9>3 z!I$g3NBrphQ>%L;P4OyMP4^CHyCUsLyF%RtL$@2e&-;B4Z}2)-&G729U6H&wcP592 zLZ2JGNBzEtH*}+`W_kzxB4mnnNK0|VH?sGO54%a*nG3g|4WxmyOXK1X@jBiY zToZP<#K+QG8_PZwqd&<}+9#gJers)? zs1NcuOxq~DEk;>`Z!j>^qMnbRARc>US_ci}ow8`k0tLA$RxGo3z@_GR~ zGaB*6-Rnjccz^2`AYREmu3G3F*7il(pSGn0@g_dtMsD){PTxyR1!vgz)6n<4KQVp% zlDxDxMzo)3NFzGO@j?${OLjb8A1G+h)-L~F+&?;^Uv1P*6XjO5ADu}Ngvvx zz2LXGA3FBN0o*?nqV2}-!8`d_2bMAjcSOD>+O) z9S47CpI8_8TQK(NEV?;?Zn3$N7moa%^2Kn3c1+rgb}T2WFuR%Oei4rR8`B&ONB*7oHQEy^ zZ`iQ^WcWXXBeY$-UXt&$&*VGpFL_EkCi&Bj(cZCbG*@7YXv;WO*ghO9Y#(x*?ZI|r zd%W}Jn{8f2S?#|bj=T=p^38DMSHw?-BfqBnZaC8J%^umTYx|(CEkRw|2X$?ay3&rF z%GI;!eX5>+EgfmwKI}Tu-i^d_u0`M3k~Io)>GO|djfPzP#<#P^>*rsuXH9_Qv$Bbh zeAYG%a&MOr+1EqvzviZFrZ0WwPm1Z6|KiVz>DT?^amDm^{C9huJ{G!4r{8&bmn^3L zbdP~qO#kqJAz4gc{;WyA@)eW*wtq3{@BCkrzADS4zxOJW{((G`{xd^N`XxvIq|4p- z=5a-R@450SUCx7p24<1d#R%iG_(71Y>tIOMm1VQ8)Sb`is5|RQ-C0-W#kw;6ysy86 zanHKGteAfJFJ4znzwRH6?sxpR(LEM=N7sGl&yB<$~c)XEE?%!SJWE1b7K>%7;rR zta*Z=1K-7(FBm%TU91Iyp#$H=S|}Jg@LjB%fMxFs{R(AD2ig0QNgZS_R926WvlMN! zRQl`ckW&F%AsGI2mH{sl41YSyftL%0Kb;l8D+I%z&Pw2wg5gi+R^VF&!=KJ7;8nmW zK0pV}N$D@>pgAcRI%rM`h7OvOf}w-vq+sZvIVl)AXif@-4w{p|DL!OShZG+&sDs%H zwHZErwgf(Xwgx_Z{J^JA+#j#s?cL@ZpFY*T@#(X}PxI+>r|Yan|E`w)zRPvi0Iv~z zx9hA0UMm=Hv8)4LC%DRW)&s8>e2?qg27H^~?XI%{c!S`buCoz%Bk)+<2WWpf(81>P z3Fu&R`UG^aIeh{;*qlBA9c)gYfDSgNPe2Eo(edE(-8=n8rS4K6~i79tr?H7OI(+BmaL?2g5pQArF0dEqF{=6Od zcERY+JAm&HjQ-pVyjd{%^G@J91*1Q=0B;eD{=5tLE?}EeHm}Jko7d!&&1-VX<~2EG z^O~Hpc}-5)ye6lF*EXjzrGw3>Oz9w;iffK+34Ho&^|xv4`^KkF+^^Pnn{RyjRQtxK z&kjG$rw=%_6@9!_`W*dvH}Ktp(VsrBFBtt91C9wsf5w61g3+H7i=2WJ15KfuBP+R;5 zGf$tTJ{@e|`TQK@_5AYsfn^QKg$tj!H0omzQ#uHz%wDK1flr^U zflnVl@aYrxjZdF#{?Wir8J|8o{4}3F;M9Kf@qX!Z^yfps4+%zp)&SQCMt?pG{IFp3 zXDx88VD#r_fIlM`{rL#+BZASNp9TIbu+1r(*W{GVYjVowH92MTnw+wEO-|XoCZ}v( zlT*TLn^T$6!RAz^boi+I^jQQ>sV9T|c<`~pH3RjXU_Tx(*9_EmgZ+5GTr*Hj2Ggex z*9_D%2Ggex*9_G64W>^Yt{JFj4W>^Yt{JEwwXR#xm(6U1wLEU((5DaAv08#Xh4kse zb*z?PPa%E!a2=~9*i%TKK3vCY3HB7yrw`Y$T7o@=^y$NOtd?L;A$|IA9jgU<3T53Q zlRC({MJ9ESbu9YyS&Ft%&zZiUPan?t)$<0^rw`}+>P3U;(}#0@^)rL%(}#0@^$UaP z(}#0@_4fwTrw`}+>Zrl=>BBj{dZm>Q^kqX%wgjA{Pakr!CEz4|`jC??0VnCxhn#E) zI7y#A;1XcY_tifdJQkSqef7@U;T^0*8y|B zul}`lzwk)x-?ceC3fSiKXkeSuV}NZ=mjK(G9t&)9dK|FL>Fa=n)3RSUlRC(L;Y{ja z_MpGdb*x|yIyuF8uKI)Nb8?FFef39!$tlkF)qfaFPI11k{>xx;it~N--v*OYobRju zXD~U%`M!G7U~-D{ef7WToJ!+0`aF%-z-hb&PUAIj8n1!VcnzG!YhdBE&8bZ3U~?){ zItZuY!ThjF%L`n`vN*+gu6o<_IXT7ozIw-Ca*FePb;4kBit~MyG?<*?d|!pk_ch2V z&i7T=U~-D{eU)V}ImP+DYV)8YzQ;H|(|8S>#%tg-UIVA`8aR#Dz-hb&PUAJO@Y?27 zrgX45l_?#BQ}1gX%ialWrEH<+B_ zd|zE*FgeBfzPi$2a*FePbyYg2(s+$NPvbRk8n1!VcnzG!Yv43q1E=vCSa@x7DpNYx zoXV6A!l|bNJ7s+OT;MvE#i_mQ<4&f}$tlkFRTqQFDbDv*SA)qZ&i7S!gUKn*_f-#r z$tlkF)zt=*Q=IRsJcG$8&i7TXbWWx58hxI|Yv43q1E=vCIE~l9X}ktb<2A7G+U8WI zbg(&Fge9_ zEH%(za*FF%YLLO?6xXrTkaSL^@fv-e#%tg-UIVA`8aR#Dz-hb&PUAJO@Y?27rgX45 zl_?!$f3L{j;(g${VcWXf`;nXZj_|NYyE`59JKYU@uaECw5#!zM!?`B>h!b-BQO=^5 zM=m%F-%2~0UPZdvVT-#vH|}U+9K?M}{Ejd#eUFU1YfCocO1XLuEaI0SKAs!Z^W8xD z*5SK_EHfMLwlZ#QpKaaeWSe++$NA9sJ*)ebnKTWQ2s_L&Bt6ry9(8>l?fwMPIcLeA z;mj{L$cy>T!@H`wE`1hve**6g^W3TV%eN~TSJwslCAnYXEafnq<&;_F)Z=|qmQ#mv zlH5alhH~uh{W3nwsz+J++rPp0H2I$VAoN2q`r&J+x9C3SYusCG{I{@e=)4OR z4CY)teTfL4>!$>KHgOJ}&3BA&n&qZAjWpnNEq%WTkLxkkG>>r(d^lkHnkh5!>nL&V z;Hkc6gwqWeo0`*B{*n*t!hCasyy(M5_*{>%srh`C{KdD8aJm7yYEGY_oRXj%>WVtn zu^i!Z1Ip2SK1Vsi=OZXb#*@wWig>i@Yv4BdLEZU%+OvI^R`)se4Zgcg?n`}6{RX!K z?ks}Ojz#d_aSP-vkZ%X}i{&`rX&J)H5WX04F?@S0g6*#`ddrI%26y3?oTXF7I=7jNoax8J9j7jL) zhWi1Lmuctb1otP(7^uY<;TR}0`5iWW(464@MAPml6Z;c|?{yfrn(t@P%L(+#J&Rt# z|5}V?&HtcWTQ^$=;eQ?U)cik7x#wyR84qnn&H<+lkI}c-XDE9E87Gw&;ut<3OK`uT;d66>@_D*o1o63~dukUIz`Qn-jEErcTWX+A6UL4X|t4CS0ICVhieF>u3vDf9QVD*2ykg zY(WF`qb<15`il){KzZ5*oTEIk0Y_->VFPxcFLt0W)_AY`yKuY<$7{V`rECD}K^w3e z=XRt2)_K47_u_aj;;r|7tK;cw3-d#~`*7|)oV(5YC;xsN-;d)B-tY7^O<${+AIA1R zoZE-|HhRDJ_v3g!;#GRht=B&0hj=wOSA%n#ynpj+aa@bz+r58pz4kFbjO|Bo?h)j7 zhxeb_Mj0+PCurN`UXE={zdqbgXZUz5L3>AAAijPKH=7f*b*62a6Xd#So8|<$ZQ9~k zf?PIj(VQTcP2HOlY)e!3<^yUiPdTxy$>r_KQM&PW7QQ`>Sq*oaf$No`(;mi1wj` zd@;uBXCq(kJ0DILE!G91Gm%%X-0wQbhYuGI6vIrTO1 z%l)bxAM}gDI@O0}Mzjy5plt3775z>v+t!PFeRW$yckLG?DBIT0{-!$jxQkBojl#0U zAAxg$Wplr;ZddfP_Mvo!vhDKd6GfL7?DxF|?NIi8S8es)@=I~N_(!g~+dHoP6i9!G z50s@Zx)I-N@yifz>GSyTqjysKJu!Tk8Q~t^a-3WCGgrku2X8d>d0_?o4^(+BzG2(i zM%>%G81Yv8y{oo)Dn_2%iv8#Jc-b-T>rLf{eZ5N%@77mbRqeIKcX4+#%>(BCV%jd+ zAZd5nANm*|hiS`bf94}Dc}x4X0LSDf`OSVUgRcSdc_G4CfAW^?v>5x*$=90@&bDB? z(7xDwv30P^{GiIC9rtFZp)w;#jT;J6K|5 z{Zh2s>Ni}q!@D#_zW^MQDIZEEj`pFn7QEQ$eKIEg)+&&0?QdMQ%WD^-zcuP}st={5 z*w0UH(pK<1d?*=PfqY>PYH6p)OLCNYA?yJ4g4DhSmZ8tdS=tKL3qF*LtuS#8(uN6l zZCgQJtN<^_W7-PV2|kpJtq95{4@AFH%eM8RtF}|6=x`0{3zMw(q`y#VKZz$O6xIZ@AcZp#E;T4wBP!FcGYgLLyUd` zqz`RBN*lnNJzmF{%=zg@X*tqu_*Yl$^>XmdYvIhk$fHvT27#g5Ua{Ww5y~;k984`tm_9KbC944wo2tWEI+M%m2D+ zzt=k!LZ6LCTDh+&GjX&ZrLE}WhrGVA)ri0K9aq(O{j_bO4M_P>LVVZ>?MG{EfSv6R|$1J)sIpo`jZ@`jpBLuQPMVw`XG-nNUvk z(*C?Zi6h>Q%fpe+dV}@-#BOkg*Qveed)gG!*DvXN6qbG8+qjp3y#GYHA0=rM+7aft z4?6NbWouae3a_bSZy5#+lLdTwhI)_#%_d1Zf{sZ^zkh{Sp+MvC#L5zQ25dZ!l z{yxN~t>u`Zjj{P>>uKxt*V2b}=fk8AZP7IFTi+ASFwY;>qwT6M39H*6cU%@$4#M>_ z2y;H&j}rQoHfc6(Kyd$*YvLaw@7S;6M`;7{+jT`)`%$6|NcmAhd_IfdwT^a+*D{Wi z5`^=4L76$9?neoIOxr}=Z9huvQ`#Ewqr~&@qhx%%nf$aLC0++P#%No~Ve(1*D6uZ^ zqh##US#)EY=w}&c(T#SBb%Y-!?eC3kg7^n*{pd$Y^q`&6{QzCHAEmRDd#*O2y<)j? z@5Huyw{mH(bQ=ctY9rcX_f=sfev~SK_jU>6dn0RJxvx9hDab#tTelouCCBuvM+7I0M_)(HJJ0Cwv zsk~3|qr~ea`A+*xzSI7ar?g{|KkXRp9ot581-6K`jAMoE!?D8lA;;MsY)7`o2kA%Y zRIZ*)?^E^sYw1Ya_F>nN_HHYlHUHmtg8uJ2L6zXahxuDb=3e3B{kG+Dz|ro4;ln5wI9D)y81(?|As9Z4dII+p z3?D{U178iC@|zP1>2Jz+5WhCi;Y!50QZRJr2;5OHbhrxmD#6ep2RKJCbm#=!NicNi z4BS~Tbm#)y1z7eQ(T7n7gk?ep*>99d9c2F*{U}Az4pHf^t3&$x+j)ZF!zdr;^993) zQ7_ZgpgAcRI%rM`h7OvOf}w-vByfrk8Pp-ghYafQL48v`>uvK*`F_&h@0f4OUn}^8 z`KEk<;H3Gce1E|q^X=_I!C~|5?E!+b%(u6T1h+BY-W~{?+M^8}Y)+qm4te05aQXyv zusMAKI@p{(0Uc~kpMVZFr%yl!o6{$tgK#>vM>~T$r1ofMPzST`|E<7}((%BLQcK`R z>15zX$%)O@@94XD21NfP3hTs_*;xCa63$Mwk{=mX( za;gwmcuh_X02W@8Q$@hSYjSEJFnMirDpNYxoXV6A!l}6C$b0;zyv3=JXy1|2=jhK- zz@r4CKSu+P7L5KJ13X4B`m+SML@@euEbv&t=+AM$;{>BWuLHgg*yfbYYjVowH92MT znw+wEO-|XoCZ}v(lT$XY$tmHr&8bZ3U~?){ItZuC{2vhw1mFpR(Vr86CkjS?P6D1J82vdJc(P#h=M>;6 zg3+H-fu{;ae|o?ku+1r(*W{GVYjVowH92MTnw+wEO-|XoCZ}v(lT*TLn^T$6!RAz^ zbP!HG9oQ+e|EwkOqjWOxqvXVlA0;7i=2WJ15Kfu>XU79SN-cpOrIUdlB`5aFz)l?v?3D4N zlpRa+qXbUPMjy|XK1YAf0iGim{W%wSu3+@%Jm7hP(Vz2y=L<%EE&yI282z~rc%fkQ z=S{#j0o$Cic}-5)ye6k?UXxQcugNK!*W{GVYjVowH8~}`wmFq49c)fzN{5fSAEgL5 zrG}YxtSb1m;hKROVemF!t{JFN2HykBH3K!qVER$wnt>W?F#RZT%|KmeF#RZT%|J~s zn0}PFW}qguu3ON5PzQvy^f7VhM~UlLE!P-KKT2H3YPr^6`cdLKR!e__=|_p{SS!4n0}PFj@2^AVER$wI#$b&R=+u!)IruQGO2^CW6_UN6m6rXn6{!HCC>Sk zXE6OJan7%L7f`qz)g{I@VU~neK0HZYI&>Ao2}S0ZFBl=V4KrEu+8Zhu+8Z>u+8Zz zV4Kt1fNf6S11y}DJ=&SnLH1~8QU|l||9!4w1^fQVDb91%O4H}$6zBVDmBHi`=lg1n z!Q>R@`)ZxRNbPPDbDxRMuW*I&iBI-&dbDn4IE#Up-(jImP+DdeC5Uit~N-kip~> z=lkm6bWWx58hxI|Yv43q1E=vCIE~l9X}ktb<2A7G+U8WIbg(&=kI)~^-SFSD)%K(GB=$|ykCKUdnjfXD z?^u46P;ScS4C3RtQ9a+g<2M{P1!c-U``X9NKKl~H!#m4|sxSGEeq|<210})^Qy)n$ z`|ax<5BA%iC4VQ#FE_}G`O04V+Q$RG8)wL0{JzLO`#R{SeQunkoRXj%>WX?cupHTA zUx#w^9{V$tWBZknJ@)k|OZ!oZa(~>Djr60m3H?xM`%yxkXY!*|`A6eNDHR{@6qy(d3P$eZt4pXNvDxzjm)3)<<{JJu8?(BY) zFgCRh2`m4UAEhAQ+#s(sKS~&z+KZ;l8`a6O3V-O3UICf=X!YKT74_}fq z2&ermGv^y}5T1kZ&XAp7w0tQ+*ESfJ%*(WMbE1RSJLNZ}7UM+wO)>c$Hhs{XxYFzE zn|4Q;UcZ#zlsb%E&H1zFM?PSa%D`^(|*CO4@J8UMIYsR)BNE$9*%zPvKjY;(jlJZ?_pc!j>7JlVU^QFYR5XbQOSfYzJ zIN)=0g5!hxW3(?N=9S`ebAsc;aJM<3#|Pr+>ksooJj2~%362lh4`?{soZvX&o;{l% zc0JElFS{OR)7Ol*)Ah6UvGw3J@x!18ZH4xe^9S%{vhhtY8eA9+F7)wc`ne|TV2P3S zB{)}tbA7$p#+T9`;7iGzJ2VdG#(`hgcys;nI3AC9{k-`p+fe@p^d-hK%fY)nwZ?ul zCupNs4#Ezw9LPGu+vWuAQ_7bTbQGIlxO^-@8zuL6^{^S*CYU@98csJSXsb+}niI5D za&OlFyQXcz1?nTVpdPkP`%((Zvvsn|7F*B&{j@Ko3oK7;Km*FtHsBoPi48bHdk-5h z5q&WceQ~X~(4UOs$v7_X7Nu+e>p>eZ73Zd+|N48k_|tGa4e<)SvQ}S8%n$LVg~)G+w^rLI!^P$VZ5!={<}vdOd?^_|9!t>P(H4j=CBx0; z1Z|yZo8|<$ZrY|fL2jG2IF=xnOuwlsA;mSEe_#@Kwb^|W>R zOY1=!^I^~<+ZH z7sQh|v0&GN?F*_F?CIegY|L`L&=_^LH}=TNZaM)@lsRRuycOcNoFT&@r!F_IW{2}A z_MN}f*eUNDRh^ty8$0c~dHc<~Zm#aJb5V7toP$-1qKm2)IlpNPIlubWqFsx2;8$Im zoL?13S@S!orum)b*UnF>Cd@gU8cVuUWmzs_82gakj^JeD7f6nfvXQsy+lga%} zBiKj{H0EYvIU$QvO3Y zLOXw|-yz!ff>5B2kRiR=2!P5)=Kcq5OawJ zO8ePtuE9^qvs^u=Kp5BbOCT{9I8iH?x>GYcIlUNtyI$sc{nhZ$vxm zwOinL^npgpOFZqFcoNf5s*h)>D* z(w_Bbf9lnM=YrCn_2@&k6QAKpdo~1NeBLJQX~&cHw8Nx5?XWZszJ@l#GY!o_NXBg6X$}VQSq9@XPo!_41a9RZ^w4b<#4icr zmj&@D8DH9r&!030(U;O@e3qiQg>%wobwL=P$4Hym@ubb{FljS8%x<%WKbW$QVC_Bi zy`Db={e^Sc7)zS>sFUtjw4Lb4wO-w?I43%CU72BAFBTo`c%q{nCOX<-93y$>yMD4d zeJw=B6W<>>ueZT7q4xaTBSjq{t$r@Tqn@kAc)s5Q z$N4w7s>F-?EUP#ChK}{N`8^TddzP!ldDS}qzPLxc&fDQ%jqtwnTs7X?rJoD%9;?H( zZiLS+I^bM~Yh1-=6<6Z8V}DoiSq1N#a|XE)KAY%>@SK6J; z&mwXV&U*6MLnj=wo~%F1%7w2`*1t2tSx?rV`S-+q9_!x);jA<3FKay?R{h9>4|Dqo z2g#j&pKv1+yxo2j?c47rS55TxYQ7YJFO$6c{5*sgl)7rNcfaOKA^0-I+vn#ayl{!D zrh5A|2aAx8=hgVV5MH$0Rnxp$oli0Hx!!xk?~U-{Rj!)u9nkpRN=4 zj=758uNsZ;are3re!r>!;U)LDir=d$LHNW6+z7u*#j>WNERK_D$eVdhzZ13=a>m_m zgmGsgZ`#+{;5p;Z4dTxa;x9z}{>XpaPFMS3CZCGHCyo``Tp26uhhp%FV})Z;#!4~5 z2j6DK3df?1mB9!fj=VWmI2L8B3`IEW%l2Ve<4_j!9*?}4*Tj{u#gLQN;MxN@6?v2U z)4+YkpB}`Y5yYQ~_#D%0XIbm{sOnF?f0Wx_#y}zZWBeXmk0B?1+BLp_1Kwo^{3Hj& zuX7RNO@7c-10knA9K;7di-D(o7T0OW=?Pa2hMe(5T%RFlemRH-o(~1)w{M0)@;f)f zA^FXk5s>@_N;pepe^6^ZyiRcJ^18sWN^YjsdIIipJkWnM*LrxJ;Mk=t;26{K!Cl$_ zwh_lX+lXVFZNRa}Hqh}f2H2Kt1GXjGfNjM#U|X>bK1lz-AJ@8ZTzy>YX3rV*w~q0h zT|Li+)bnhEkK`I`=Gnu7c{alIJR1^ofz{}s&WkXa!`ks=4r_jx6VB(w_Y8u3onY;srr(pv{8#ECc5cF;8-eQpXS4jNg>iZ5G5Y3F4Op@hKT!+Kk_A)oVTIOKCHHgOqK- zZ>36`@q4EX<9AG@&Fpy6W_FmgnH^@g8NVONviZ%*)LPHVpdWB9wbp|=>3&7qiH`h6 zqV6x86CL?IMur_hnCNK76CLd^(a{e3Ft7FS-7>x#7Uer(SJB5VWGDKm{gQrvEFbTW zwZpq-XM68VhZhF#p_K%3Mj*=qx!yUh?`iOir^n7-)xEm+s_EPv?{#&@`&+IX&kKi7 zf3GVD*Y9s3ZoU<_7tZ0E5!+hdo9b1Q-wSU`bmho5pvuSBfK-c6Bf7M3HvS9<%W;& zp2oX&C8%3;_%&V<$IeA4r6e6cjd!u~qS5%%aT6|jY;%SVIA=M3LODZ+c~#C4yi4{Z ze&5D>Yu`ERduylZh%_$Jgp*;s@0ARVNRGgJV9v?#h!J>4>?!2;UHlsHYr^ko{GP$@ zd-#37aa3}Yes^qiaH~g$KXM{$pQZqemOk~vt8^@%fbFM zM*-v6E4i5y&h_sr+0oRlM{f3UoNtHoyYB;jvq$z%c0=O4B7&oRccV%c3FciZpV3sdM2Fn zPn6GwbACtpTsY_VlrM&J_&iztBAgREPtM`oTZ?EDInZSxP1Y@1(0 zvTc3?$+oG{?=hLWKT>n*T<*s$8aCuI9uQN+;1Hx zm-2Ux^X4%V_Ry;)?4e)beJe(YyS3&sd00%H7#^5LmY##!tLZNp)`3Wb4KOhd6PzblG_zQ=+yO(8k z3l$Ok{J2{C)87wU^!hV*6~F%P2fx;T#kw^W{g;$)Sh1;m?ehMWE7vYxvZ7*b|MlyZ zl&)Icf6dC`!ovQ`)~qNmU3SOP(q#h%I|cnWt|(txv0lrT)V1b!|>d(*N@MgHf?-v(ZGVjg2LS1D=I73 zZyesg|MHcUD>f}ISh8+S|Cu*jx2FH<)wi#?cKwERw^l5vEWpX!fy0&zUA&}t(7^IR z#RG>8sTee1z_O*~LzXNqgd9*laL|xtLkAZRExKmL*xA=jo-ls+(g8!33@R@#tQc5P zUNmgU(u(53qGf}IE-M;RF>u)8!G!~hE0!%QFB;N^?Q+IH-7juiCtj=@G8KoYeYlMS zhn8u7LxC>m76{(=;IBmg$G?KL>nbapf~9w^-FW93r(pToO$95;H?DAk{|v9(;1q0F zx3s*n+$pH6*j%Y&m#@jNU%qrHdI#YZi#IJVUA=NGb4(qV>KxE~Ybw?v zG0LedUkted;pWIlr*z@;LFFqeD>f7t>3%)04xI2FlNGF5>XeROFn#RxQ?8T7c<++! zj#h^~5B}spWNHAK^D=%UI@5AYN^}ez^B2g;0pvJkS#ny&?_~)4b27P3(#tp%84c3Q zc}X7x!yhWgYis#3u0`es5hcCoFS0w%hpqHtM?{Jp39`|3kYmbT__1D+zwlIK3C`Jn zB{slufBcv})y%^Gz>vi+=2`3S6gUW{n3i?mpNy+`2NQ9<3D#}ILutQuLHOt(Owu0< z#^tdfeLFF@LHtvShc6qWe{eb_B>kxrJvIG;Abnvfr8yXQrHQ%nlyFHelIMARYWf2~ zdaT=}{Yg5J?B7)1SuuxpG%?D8F!Ge~r2T}yTM#B~7ev*fHb~D6T=t)oD`C|*I8*v4 Q)`0kTm;T@S@6`1FA7fzVJOBUy diff --git a/hsa/gfx1250/fmha_fwd_bf16_varlen/fmha_fwd_bf16_varlen.csv b/hsa/gfx1250/fmha_fwd_bf16_varlen/fmha_fwd_bf16_varlen.csv index ad14180b78..8c5c8bbe88 100644 --- a/hsa/gfx1250/fmha_fwd_bf16_varlen/fmha_fwd_bf16_varlen.csv +++ b/hsa/gfx1250/fmha_fwd_bf16_varlen/fmha_fwd_bf16_varlen.csv @@ -1,3 +1,5 @@ dtype,hdim_q,hdim_v,mask,knl_name,co_name -bf16,64,64,1,_ZN5aiter42fmha_bf16_pertokenBf16_hd64_128x256_varlenE,fmha_bf16_pertokenBf16_hd64_128x256_varlen.co -bf16,128,128,1,_ZN5aiter43fmha_bf16_pertokenBf16_hd128_128x256_varlenE,fmha_bf16_pertokenBf16_hd128_128x256_varlen.co +bf16,64,64,0,_ZN5aiter42fmha_bf16_pertokenBf16_hd64_128x256_varlenE,fmha_bf16_pertokenBf16_hd64_128x256_varlen.co +bf16,128,128,0,_ZN5aiter43fmha_bf16_pertokenBf16_hd128_128x256_varlenE,fmha_bf16_pertokenBf16_hd128_128x256_varlen.co +bf16,64,64,1,_ZN5aiter47fmha_bf16_pertokenBf16_hd64_128x256_mask_varlenE,fmha_bf16_pertokenBf16_hd64_128x256_mask_varlen.co +bf16,128,128,1,_ZN5aiter48fmha_bf16_pertokenBf16_hd128_128x256_mask_varlenE,fmha_bf16_pertokenBf16_hd128_128x256_mask_varlen.co diff --git a/op_tests/test_fmha_fwd_with_sink_asm.py b/op_tests/test_fmha_fwd_with_sink_asm.py index 4432d9701c..2ae3c894f1 100644 --- a/op_tests/test_fmha_fwd_with_sink_asm.py +++ b/op_tests/test_fmha_fwd_with_sink_asm.py @@ -347,33 +347,33 @@ def run_ref(q, k, v, *, is_causal: bool, sink: Optional[torch.Tensor] = None): # --------------------------------------------------------------------------- -# Only causal kernels are shipped on gfx1250 (CSV registers only `mask=1` -# entries — the nocausal `_brd_v8` binaries were removed). is_causal is kept -# as a parameter so the kernel-call sites still receive the (now always-True) -# flag explicitly; if a nocausal binary is re-added, just add `False` back. -@pytest.mark.parametrize("is_causal", [True]) +# KV-length constraint (mask=0 only): the non-causal (mask=0) kernels only +# support sk (kv_seqlen) that is a multiple of 256. +_CORRECTNESS_SHAPES = [ + # ----- Small shapes (cheap, GQA-light) --------------------------- + (64, 8, 1, 128, 2048, 1), # D64 aligned + (64, 8, 1, 128, 2048, 2), + (64, 8, 1, 130, 2048, 1), # D64 q-unaligned (sq not mult of 128) + (64, 8, 1, 128, 2300, 1), # D64 kv-unaligned (sk not mult of 256) -> causal only + (128, 8, 1, 128, 2048, 1), # D128 aligned + (128, 8, 1, 128, 2048, 2), + (128, 8, 1, 130, 2048, 1), # D128 q-unaligned + (128, 8, 1, 128, 2300, 1), # D128 kv-unaligned -> causal only + (64, 64, 8, 8192, 8192, 1), # D64 perf-sized, aligned + (128, 64, 4, 4096, 4096, 1), # D128 perf-sized, aligned +] + + +_CORRECTNESS_CASES = [ + (head_dim, hq, hk, sq, sk, batch, causal) + for (head_dim, hq, hk, sq, sk, batch) in _CORRECTNESS_SHAPES + for causal in (True, False) + if causal or (sk % 256 == 0) +] + + @pytest.mark.parametrize( - "head_dim,hq,hk,sq,sk,batch", - [ - # ----- Small shapes (cheap, GQA-light) --------------------------- - # Catch unaligned-sq / unaligned-sk corner cases without paying - # the cost of materializing the full [b, h, sq, sk] fp32 attn - # matrix in _ref_attn. - (64, 8, 1, 128, 2048, 1), # D64 aligned - (64, 8, 1, 128, 2048, 2), - (64, 8, 1, 130, 2048, 1), # D64 q-unaligned (sq not mult of 128) - (64, 8, 1, 128, 2300, 1), # D64 kv-unaligned (sk not mult of 256) - (128, 8, 1, 128, 2048, 1), # D128 aligned - (128, 8, 1, 128, 2048, 2), - (128, 8, 1, 130, 2048, 1), # D128 q-unaligned - (128, 8, 1, 128, 2300, 1), # D128 kv-unaligned - # ----- Large shapes aligned to run.sh perf_v4_d64 / perf_v4_d128 - - # Same memory pressure as test_fmha_fwd_with_sink_asm_perf, batch=1 only - # because the reference path's fp32 attn matrix would otherwise - # exceed device memory (D64 batch=2 sq=sk=8192 → 32 GB). - (64, 64, 8, 8192, 8192, 1), # D64 perf-sized, aligned - (128, 64, 4, 4096, 4096, 1), # D128 perf-sized, aligned - ], + "head_dim,hq,hk,sq,sk,batch,is_causal", _CORRECTNESS_CASES ) def test_fmha_fwd_with_sink_asm_correctness(head_dim, hq, hk, sq, sk, batch, is_causal): device = "cuda" @@ -573,8 +573,7 @@ def test_fmha_fwd_with_sink_asm_layout(layout, head_dim): @pytest.mark.parametrize("head_dim", [64, 128]) -# Only causal kernels are shipped (see test_fmha_fwd_with_sink_asm_correctness comment). -@pytest.mark.parametrize("is_causal", [True]) +@pytest.mark.parametrize("is_causal", [True, False]) def test_fmha_fwd_with_sink_asm_via_flash_attn_func(head_dim, is_causal): device = "cuda" torch.manual_seed(0) @@ -805,8 +804,7 @@ def _make_qkv_perf(init: str, *, layout, sq, sk, batch, hq, hk, d, dtype, device @pytest.mark.parametrize("init", _PERF_INITS) @pytest.mark.parametrize("head_dim,seqlen", _PERF_SHAPES) -# Only causal kernels are shipped (see test_fmha_fwd_with_sink_asm_correctness comment). -@pytest.mark.parametrize("is_causal", [True]) +@pytest.mark.parametrize("is_causal", [True, False]) def test_fmha_fwd_with_sink_asm_perf(head_dim, seqlen, is_causal, init): device = "cuda" torch.manual_seed(0) diff --git a/op_tests/test_fmha_fwd_with_sink_varlen_asm.py b/op_tests/test_fmha_fwd_with_sink_varlen_asm.py index 74887448e5..c681323e96 100644 --- a/op_tests/test_fmha_fwd_with_sink_varlen_asm.py +++ b/op_tests/test_fmha_fwd_with_sink_varlen_asm.py @@ -18,9 +18,9 @@ to the kernel verbatim (no host-side scaling). D64 kernels read it; D128 kernels ignore it (pass None). -Only causal kernels are shipped (CSV registers mask=1 rows), so is_causal=True. -Causal uses bottom-right alignment per sequence (query i attends to key j iff -j <= i + (sk - sq)), matching flash_attn varlen semantics. + +KV-length constraint (mask=0 only): the non-causal (mask=0) kernels only +support per-sequence kv_seqlen that is a multiple of 256. """ from __future__ import annotations @@ -131,11 +131,22 @@ def _ref_varlen(q, k, v, cu_q, cu_k, *, is_causal: bool, sink: Optional[torch.Te def make_varlen_packed( - seqlens: List[int], hq: int, hk: int, d: int, dv: int, device="cuda", seed=0 + seqlens: List[int], + hq: int, + hk: int, + d: int, + dv: int, + device="cuda", + seed=0, + init: str = "randn", ): """Build packed THD q/k/v + cu_seqlens for the given per-batch seqlens. Uses equal q/k seqlens per batch (standard varlen self-attention). + + init pattern (mirrors the fixed-batch perf test's `_make_qkv_perf`): + "randn" : standard normal (default; exercises real attention math). + "const0.25" : fill every element with 0.25 """ torch.manual_seed(seed) cu = torch.tensor( @@ -145,6 +156,12 @@ def make_varlen_packed( q = torch.randn(total, hq, d, dtype=torch.bfloat16, device=device) k = torch.randn(total, hk, d, dtype=torch.bfloat16, device=device) v = torch.randn(total, hk, dv, dtype=torch.bfloat16, device=device) + if init == "const0.25": + q.fill_(0.25) + k.fill_(0.25) + v.fill_(0.25) + elif init != "randn": + raise ValueError(f"unknown perf init pattern: {init!r}") cu = cu.to(device) return q, k, v, cu @@ -205,23 +222,39 @@ def run_kernel( # --------------------------------------------------------------------------- -@pytest.mark.parametrize("is_causal", [True]) -@pytest.mark.parametrize( - "head_dim,hq,hk,seqlens", - [ - # aligned single batch - (64, 8, 1, [256]), - (128, 8, 1, [256]), - # multi-batch, mixed (some unaligned) seqlens - (64, 8, 1, [128, 256, 384]), - (128, 8, 1, [128, 256, 384]), - (64, 8, 2, [100, 200, 300]), # unaligned + GQA - (128, 8, 2, [100, 200, 300]), - # GQA-heavy, larger - (64, 64, 8, [512, 1024]), - (128, 64, 4, [512, 1024]), - ], -) +_CORRECTNESS_SHAPES = [ + # aligned single batch + (64, 8, 1, [256]), + (128, 8, 1, [256]), + # multi-batch, mixed (some unaligned) seqlens -> causal only + (64, 8, 1, [128, 256, 384]), + (128, 8, 1, [128, 256, 384]), + (64, 8, 2, [100, 200, 300]), # unaligned + GQA + (128, 8, 2, [100, 200, 300]), + # 256-aligned multi-batch (exercised under BOTH causal and mask=0) + (64, 8, 1, [256, 512]), + (128, 8, 1, [256, 512]), + (64, 8, 2, [256, 512, 768]), # aligned 3-batch + GQA + (128, 8, 2, [256, 512, 768]), + # GQA-heavy, larger (256-aligned) + (64, 64, 8, [512, 1024]), + (128, 64, 4, [512, 1024]), +] + + +def _kv_256_aligned(seqlens) -> bool: + return all(s % 256 == 0 for s in seqlens) + + +_CORRECTNESS_CASES = [ + (hd, hq, hk, sl, causal) + for (hd, hq, hk, sl) in _CORRECTNESS_SHAPES + for causal in (True, False) + if causal or _kv_256_aligned(sl) +] + + +@pytest.mark.parametrize("head_dim,hq,hk,seqlens,is_causal", _CORRECTNESS_CASES) def test_fmha_fwd_with_sink_varlen_asm_correctness( head_dim, hq, hk, seqlens, is_causal ): @@ -271,10 +304,10 @@ def test_fmha_fwd_with_sink_varlen_asm_correctness( @pytest.mark.parametrize("head_dim", [64, 128]) -@pytest.mark.parametrize("is_causal", [True]) +@pytest.mark.parametrize("is_causal", [True, False]) def test_fmha_fwd_with_sink_varlen_asm_via_flash_attn_varlen_func(head_dim, is_causal): device = "cuda" - hq, hk, seqlens = 8, 1, [128, 256, 384] + hq, hk, seqlens = 8, 1, [256, 512, 768] q, k, v, cu = make_varlen_packed(seqlens, hq, hk, head_dim, head_dim, device=device) max_seqlen_q = max(seqlens) scale = 1.0 / math.sqrt(head_dim) @@ -338,15 +371,27 @@ def _bench(fn, *args, num_iters=20, num_warmup=10, **kwargs) -> float: return start.elapsed_time(end) * 1000.0 / num_iters # us per iter -@pytest.mark.parametrize("head_dim", [64, 128]) -@pytest.mark.parametrize("is_causal", [True]) -def test_fmha_fwd_with_sink_varlen_asm_perf(head_dim, is_causal): +_VARLEN_PERF_SHAPES = [ + (64, 64, 8, [4096, 4096]), # D64 multi-batch + (128, 64, 4, [2048, 2048]), # D128 multi-batch + (128, 64, 4, [16384]), # D128 sq=sk=16384 (long context) + (64, 64, 8, [32768]), # D64 sq=sk=32768 (long context) +] + +# Perf input init patterns (mirrors the fixed-batch perf test's _PERF_INITS): +# "randn" : standard normal (default; exercises real attention math). +# "const0.25" : constant 0.25 fill (matches the cpp init_pattern=10 baseline). +_VARLEN_PERF_INITS = ["randn", "const0.25"] + + +@pytest.mark.parametrize("init", _VARLEN_PERF_INITS) +@pytest.mark.parametrize("head_dim,hq,hk,seqlens", _VARLEN_PERF_SHAPES) +@pytest.mark.parametrize("is_causal", [True, False]) +def test_fmha_fwd_with_sink_varlen_asm_perf(head_dim, hq, hk, seqlens, is_causal, init): device = "cuda" - if head_dim == 64: - hq, hk, seqlens = 64, 8, [4096, 4096] - else: - hq, hk, seqlens = 64, 4, [2048, 2048] - q, k, v, cu = make_varlen_packed(seqlens, hq, hk, head_dim, head_dim, device=device) + q, k, v, cu = make_varlen_packed( + seqlens, hq, hk, head_dim, head_dim, device=device, init=init + ) max_seqlen_q = max(seqlens) scale = 1.0 / math.sqrt(head_dim) sink = _d64_sink(hq, device) if head_dim == 64 else None @@ -364,10 +409,13 @@ def test_fmha_fwd_with_sink_varlen_asm_perf(head_dim, is_causal): False, sink=sink, ) - # Causal FLOPs summed over batches (each ~ 2 * hq * s^2 * 2d / 2). - flops = sum(2.0 * hq * s * s * (2 * head_dim) / 2.0 for s in seqlens) + # FLOPs summed over batches (each ~ 2 * hq * s^2 * 2d); causal halves it. + flops = sum(2.0 * hq * s * s * (2 * head_dim) for s in seqlens) + if is_causal: + flops /= 2.0 tflops = flops / (us * 1e-6) / 1e12 print( - f"[perf varlen] d={head_dim} causal={is_causal} seqlens={seqlens}: {us:.1f}us, {tflops:.2f} TFLOPS" + f"[perf varlen] d={head_dim} causal={is_causal} hq={hq} hk={hk} " + f"seqlens={seqlens} init={init}: {us:.1f}us, {tflops:.2f} TFLOPS" ) assert us > 0.0 and math.isfinite(tflops) From 4eac00ab42d38f00c7869ceb43c8f87ca3052457 Mon Sep 17 00:00:00 2001 From: junxiaguo Date: Fri, 26 Jun 2026 13:51:49 +0000 Subject: [PATCH 42/43] asm bf16 mha: add mask kernel .co binaries Co-authored-by: Cursor --- ...fmha_bf16_pertokenBf16_hd128_128x256_mask.co | Bin 0 -> 83744 bytes .../fmha_bf16_pertokenBf16_hd64_128x256_mask.co | Bin 0 -> 72736 bytes ...16_pertokenBf16_hd128_128x256_mask_varlen.co | Bin 0 -> 83512 bytes ...f16_pertokenBf16_hd64_128x256_mask_varlen.co | Bin 0 -> 72632 bytes 4 files changed, 0 insertions(+), 0 deletions(-) create mode 100755 hsa/gfx1250/fmha_fwd_bf16/fmha_bf16_pertokenBf16_hd128_128x256_mask.co create mode 100755 hsa/gfx1250/fmha_fwd_bf16/fmha_bf16_pertokenBf16_hd64_128x256_mask.co create mode 100755 hsa/gfx1250/fmha_fwd_bf16_varlen/fmha_bf16_pertokenBf16_hd128_128x256_mask_varlen.co create mode 100755 hsa/gfx1250/fmha_fwd_bf16_varlen/fmha_bf16_pertokenBf16_hd64_128x256_mask_varlen.co diff --git a/hsa/gfx1250/fmha_fwd_bf16/fmha_bf16_pertokenBf16_hd128_128x256_mask.co b/hsa/gfx1250/fmha_fwd_bf16/fmha_bf16_pertokenBf16_hd128_128x256_mask.co new file mode 100755 index 0000000000000000000000000000000000000000..f125b7546dc19c5dfcd1665f69fb4f8e6c5ed1a4 GIT binary patch literal 83744 zcmd?S3!GHNnKpi==gf$gL?DO=4Ky$_Ttr0V4nl*-?H~ej7sHGKilQjs4e@jj4i}La zuQ)MeyJIjJ32`S`vKh1N&?cBnR!nAHR5b4HB;W40D~Zsd}r;dK^?TQ?EL*Ysfpx1ltVGU=73+|;eEmITx32v5ZEM!v zQnuzhL)NZZwSMIW=cOU*S8rU|;k!fbFI#ip%H?;izI(;@L}mFc_pMsBa$U=+Up7^* zy?f0=ZP(o4|5?o)|DV;|?W}8VcSlFgSapSt$pBGx?J=pk<)P(fgfwx(L-(xw)~c(A zWl9Zhcd6A@siEgoYWO*o8huWs#-CHE=sA^|a!#eDpHrz>=TvIWIhC4!PNf!~Q>kU= zRI2oxO077jQn#N|sWsYj5dwc(sfJ$N>y&eg4NlAE-#V7xlywsmXoyJz|OmABos z^6m}GSFL_<<%;E8Wq!#=gg-aS7uKzeLfM+tx82?0hpp0C{^^hh%I;seYTa7&COaka>X7EHxM$s3uJ~)$Ex&8+ij{fa3!V`+ zpodp(_%YCJx88jP1pn~xaK#n3t$OgPVIzj>!^Hh7*R5Z@_U@VE14Ca`-FxxaJ?zPI>cl>TTzo zN@0v(h5es-nsq9IqxV<#_Gs*L#@#?&E-s9{WAuJ1VUgK5|8crqQkA#}0p=R1SnEpkH` zmn+^4ofmRK=eaXOxbA1t2EQ9BcAS2N7tAVmJIw5S!LCi6FDRVNIzs2Wp@Q=+2$j2q zCOwa7!1<5`1uvrBIBfho*tDg#Yxx%rPI9&$jE9cJK6c99Ed>K8%R(XxU6JP%itNxq zd?8&FkD;AK+lsanmG6lc_1zMujSlYFCti3k z?bO$f+gmd+?d+@VIBfp?Y3F;jl|TOAWZHQJeRt7CY3EOC-2x{s?Nrw~PC7j&G{L_p zG~SQko_U4wSIJjr+Me3Cwz(}`y;S=}*if7Pj`;B?xg7CRagjPky$TNd zoxmA#rE?ryIp7>O@^BzO2;{#8vMG=k<%wQ6kc0D%Yn@@I$J?cf3ED*5CCFDhzFb~-u)ulo z;A^wvwdwS;m|IuXqqs9=`ld|-E&(?$0nZ8x>!Y z{`s5d_CS7*X5975MUNNtDcV_tejkMXyNwB4DM z=cK#UJjTUjtTW?C=w4VFy1;kg&q5=(z7)S5@*Jmxd1!Y$ROpA8*WYmx7jHjUQtm%q z7`^o0*#`Q^vA`VyjN72x$=a-ZU_1XcVC|c@^A@=NH?_$x&EJ*mzF=4SOAB|=u5|ZB zMq*w)o{q1qB%NAZY6(S-fjEMTW;Qdb4A~6%PP8kVNdz8qGjdFoWHNl zb3XXdvhB;Z;;&+PdU1IiTwC1DJ-E2r;>yKo_n@hB@#0F#KFOk8Uv)wwD;#&&cGty9 zSNv{XK?qqT@8*HiUA*ZC;(jq}EOyw7u(XWu{`eFyt3gTZy)gZMe- z1G|{pq1TZ1#@N`kA<99SzRle8;yn1b!eNgV77pdJ$nN0Y5mfOg`hj+Z@}c9pLw6U9 z_I9KG-K-yT>N36VEPb#$e?aWA{JzFtKdX7LqZ6(#3v8l|%zHM#%*B+ugl}Z_m|HHUq-}>b`;Mmj;D)?j2|m39veR|J#I4xr0B^B6$_v< zk~%jw>vSld8b3chEzp@+L0>rlep&tT=<%C2%^&;YZR6tOHcv>`mZK^M^MA)muH{qU zKjBAO`tUZ(Y|kz;V)%&ic?;*2&x_C7(VjNsId2^7<0KEZ*QV3zeMUQ6*wNhXre9-Q z(;?y*c~Mkk$5N49&}Zn+t>&54K^hVq>O#8Ih3r6G$ct2WGxXc5`_r?~)%scYHykG! z5xFNSvZ_?%zPQNhq{#g#ku?n>4=f1o`JCfaMvw-%J1R0+DsoR;WK~k+zLd!729f*s zE|4~UmO9dVKYJY$(|GP0n~KH99>QN8{tn}>9)CyhcNBjg;I9FH|A4@QFznoNIN2%G-RW!il8T5t z6ct%lD)Mk1_)rAhO6ump21&0^iae4Md9*>~2UXDT!n*T9-4|4043Q2V)I>xch>A>= ziahi(xI$YhssFOn5B<8N6ygfh)FEnFoE^&+)o(UeLF=K7M16j(-01c#w|kd7nypvd_G*>L<~+ zs-Hyts-Hyt3j_Ts*tuw7vz;dwNj*$YpKMNFi1yN+YSg3pv{Ut&uKG;pI@Q0u>(t@w zHRp_LTO3b7*1C4KYg7npLj-FSENWU^G`9?oalQls-( z(yNjp_oYNuH;CNN`Aq$~K0lE8Ea{aIk*(_Ie5U=WK5xl<7QNHf&-qOI>o8}`d_F_{ zFN3ermp_pCe1`ftpQ&G;_X9JZXfoS&wR(BXXM_0dkv`t_Zf^^d|% z)vt#R=QHc!cyWHREU!5~O?oh&O?oh&nVy2bQ~ebBR`pY;pYxgeDb&yTO#M38$@$6j zI@rnisp*`bn$G#m^m^2z`m|H^nXdXwCpR6G8ILk%y;&8kHzE&3Mb?#yH0#Y2=+b|g z^+wX`lOm6#L>_GrY1SL|d1k#qx>;{T9*Bxem5Ma$4QVhK!_PC+Pku^YekA-nL;d6@^{ewf zGW?`%Re7Cza=qbwtHD+B&9o6ZDn%HSv^;3Q3^;G>j^q1|~^UN5q{(6iD`_$A49gYF7kD{#V*LP~xKZ^QQzaBaq1J*;E z*{8AG(7jBvr!pKY5KpSF2s`mfBM6z9J?MsZI%oooH;t@fmCmU%L} z%*f#*%WqtCWBHBo8+Wv4&#ceOOP9F^Z#bWOXIXETFWpmK5S*=~LmjxsPhSV#oZvYL z&Rf!Rab4@QU+rj}=KP92b<&Z7Jf7d?y&XRPX|xp>=NCPJdnzu-b9w>yQhZ*X z(;K+A;`8&c|IB+kqWD6b9|HGL{DnLm+2_68S24~|`T_R?rv1F<3E0pDXmnnt~aO`h)R}33^0QUf9f92W+j!&*_ z$nc>Le^beOyGX~YA5QLoixn5+gb%nxasNE0KX8A=1M{2#zylOtp63h%9;o<=JZBK_ zAjMbaIhO-p4xHfwY%rYE@q!J8lZs)3;iO{NU^uB5HW*GSh7E?3ieZD{q+-}$I0>BL zLk?}o@F9mbTnzt^3Vn>x^_`Q;jT)O@nkU z4*bg}_+~ASPf(2UoCrKo zF~&0r994|*oCG{cF~)N;@MOgp&uf9N1-3b5^O~Hpc}-5)ye6k?UXxQcugNK!*W{GV zYjR3?ZF4GDHrSlXl?}=%JjR1V$Ni^_Z1SHr@}&QakxuMcBSSIl`=RH?b{Sb1dmhr} z)D(>I6diMn=XJo>DaLqC1)i!H<2em@nqrLSbl~ZVF`hGkXDG&a&IF#R7~|;yd%!lQ zY+jR7Hm}Jko7d!&&1-VX<~2EG^O~Hpc}-3!uWe4{$_ATLxw1hyl`#~kB%J@EC4F`l!5XDh~d-T-`qVvOe; z;5mvho^yfcD#m!u1D>ZC<9Q?SjlediY+jR7Hm}Jko7d!&&1-VX<~2EG^O~Hpc}-3! zuWe4{$_ATLxw1hy#TT9Er~cZ=CjTuXPx^mjq!T-0WGMEwk#6iABMW2iLfV{~k1?LF zV~+7$0K7mk#&aR?Ld6)*MZk*`V>}lFFIJ54TmrmAF~)N#@KVJX&zpd60=7A2^O~Hp zc}-5)ye6k?UXxQcugNK!*W{GVYjR3?ZF4GDHrSlXl?_X8b`Ms&&1X*CmhLIn_j~%3 z7e8K%_jn3%-W0<7It6%dr|2TQGZvgh@qL?<%$t;F-s?Gu{FBhT4*Q=I)NRU#uAEU- z;LOG7B7b{D>-@Uy&2_y|$7`wUS8ue|)pLvZ1tD{0;Es`Vta4zU8MxyFZwBU>fqRYM zM}c`};7$}=0n9T4caq>Oz&tZ>uNAx%m}ds=b%M76^UT1V)_iUeKb~*Sv6=>oPB}2o zv6?OyycwA1SWQ<5eiZmp;wuGL0P`HH=_- zlT+O9yY~qur?}sDAIRp^V5TXrG3Hsk2F~I&a2BtDvv>`h#cSX!UIQzyZBFIN2Afm4 zvOzf&H_zthc#dUpiu+vmAsKUWiu--{VZr1S_xtW6g2^fF_uZIaa*F$Xw_Gqe#r?kf zs9`h#cSX!UIS;4%oZ^1peMT@j z#r?kfoM3W_`+fKMY))nI8e^WtYv3$i184CXIE&Z7S-b|$;x(}H+U8WQY_K_%D;tzk zslZR2(>a#KDeiOKugRE`Q{3;nUl&YHalh|=LohkT{l1$JOippX?|xG-ImP|H`z^ub z6!-h?OM=NM?)TmAWOFKu*BJ9GUIS9C#Mz=e^)R$#d9q8dxFWSMZ|w9n4ID{miv9ddcPVpSe z{h?rTisxAFkFq(H#cPat7O#P`cnzGzYv3$i184CXIE&Z7%4?fbxw66LRIY5$ccyH) z$r<;;zCW^% z<@;PGi{t@?##5@{_hm`5Dhk`MEDi`6WM+^8FTkWbS+Qea8!UziXfCYVLF6 z4~G@<8=4JB>vH*0_dZCr^?pdUmG@>_X*=6W+u2sy&bG2Hww2{Cz3Qz3mhV03odTBc zKlgpp_AAT8_EGnW?Gwwz_GwRv?Kgb0Yaz>DHbTnxnI`22E|Bt9-6rM7+$ZHHZIkjd zo|E!(za!;)T=$l_@8v7sH+?;B%O6Z1doOh^}}*XNPp~YH1iVgYm%3nK0R*6 zx#Geeg*$tk7~Gt;?437VD_1$EqiAJ z${k0!T`2dwNne`b&e@jJ@i^OZ!_mIPl@qV19lrJ8^H*MZ#im-|{2{TEn}8u5yt^6? z4fpvyQ9M-SFTl7{ronMazQ3NxA0K1;6KF?wZ#d#s$hGSXIE+5&);)iu-~Fng9Xa=( z$G5<2^YhT{>=pT+2G>o{ML%@woL}O1?i1%QI;e;BTZLBF3j)gO-Q;Ycs?ru!ptJrdW&-b{Zqu1Dj##GB=h z#r0U|_xEP|<8eJ6=>xnuKKro?aF;h56I~&@LUx1f2H735`&kF>Wj!T#acexB_o-#*Xw>l~N>u1o-@272@SD6XTp9^}pUC*yiD zcz3zC(4T_qDM%meE%vA4dMd83@Rs`1aXlT^L%e1FOkB@|{*~UB{aLu4h4i6bsXrUn zvvGZucZ)v<*K=?^%v<5l!}UDqU+t~(=i_=l(uaGu`wMZs5Z5ETJN(7CUX1IJ-Wq=? zu9rf8ly|q`zRZ`#1m~B`m&OF=m&})A3C=H>FO3P#FKK6Eg7Zt-*_hz`l6D?TaDGWU z8xx#g($2;N`$gK>m|(w1JC7yUFVfD&1p7tW*_dFzNIM%7>=$Y0u>|{toc}!9O|E|) z?bh!m+Wam*2jB9Wxd;E9{Vjd)t>X6J`$Um@q9UtGMed7>tWJvDpAuQqAX2_h$2H#d zs*H%-9Tk}@6}cxavMMQZUrJ6`jL9q0?(($Q|o# z@Z*wJpA>n-Mct9>Lg8`V{r<}+PrLa29>2p%iO$hpC?B~tzIiZX-fr`u{4FXPfa&c~5>5pAwz=t|;H_>X6HIp+VA) zT#B^rqe3p%2YyQ{W3a9#$73(rYuoHdnVN{m1GddB_{p;P&b;Sdd~+5Rom8pFLvfLH zb{h&%hV8`n=sox1`@p2=WVIzFY4r^vkJxS4hw^MEzG3f~-Y48Rn|}Ttp0mw3!R9o+ zX)Bd`w7Z}Ats~3hTlt=8v{`MnZB)DMel9>cwj1Bo_vH6y4%%Lb@hiGM6rSL1^6l@@ zA{d_{FXT@2e1BiSWqgC*li#U1XmcOrm)sC?qh8!M->n^$Z_qeD1{8$Clf2FT_W};% zd-t9W+SwOn2HY5OCwmn>xjyLpQ21JJE8fSK{xow9-}Uu_-k?PxcZ#9?;{Q<2<}k`OA3+8);L~iTp6Fhsg*MU8vZ9o0LZKLwmw$-+c-$a_Zf%c_gk8Nv~Z2^bLjdWYK+4Z%s&CGk0r_bQt zbq%;}Vgt1B;~UIe3HzHwJ<0oX7OzG6p{|W5Vx&i4ilv z_-04Sw7VVj(`*Og3>ZIMD&^bV4*F@f1F;Eu?uE}vwX+@HLpjs`u$%t*Z0)9hW_iRu zF#h>$?WUh*yAdy;XBs}by>{I)+;s4Ej zF#j|zm;=9gIm(TFAmq;Xs(d>J7@vFiJf!OfeZ9FS$`?I3vtmKu?+0V9a1G%)r+$F` zp7%mbiHyG=f;mAy!1Ye&JN-TDM7)WNzaNS@!1>NKPUkh(BU2y812%Jx4MTmL-(2H# zT+~LysW86Ywx9D_ZKbbgeTZRUe7$Wu>sDLo_h}#ETNuA@+s^r_w$krs>Vt2!ZMSXZ z8jz_E9J6g_o^3DvKkGz%jEw(R`)zv{L63DJc1FhktNmH_+VutgU+uT;)w*oExh~kY za*f!8dzxqcf0yTZ@^d`+xxm}&M@OW1D2`RK(xK}vM$N22`Lhh$ziNZJvuX*!p=hOoVe_tA4N z_5zORwA=2N!7CZ7k9vLy@J-&In`iHt z@UP_lEO4LC_iQ6#p7i9iJvluaV@qz%c_b8O*?HhN*NpjCGx%I@#sW6+xt{)dVNl;< z)W`BmQT|HsVe&Tk_Mm<XZp;~`9B#xmh)o@<_CE`bv5<}kkju9h3RWe-B=H}7O<|FmYjn}_?tQWI3u-@ZAeJ2Au zIPW^=e;nibVCv=^!TMmgt7SX5A29Wz&$%CH*{+uD;99}D5l_af6)oGIfolirMC=-~cC>7hp0l-V3)d3f195K5 zTGFy@E!%R^z7K3~**3c^S})fU6XzLacj8{M&p7t$-H1WiBmV+_7{-74Ga=V;j{W*F z$JrBsL|)a^NT2z9z;VnCt|QN)JfDGhu0%cP?;8%iSzpI_2;}Sp_SI6pniBc@M0#e~NuA@i4P%l1rQ4jiixNdTO zbN%D|PD%MHO62e3`o_MZe_-EmjbWc4wvbt48brU^+y`@({(^nObd=vuzrj9YpR-T6 z*03)SkEoYfYovTuTO297J0f|l+QRil#&uXPU2ljH%cvL6m00dnZHr2MRg|Y{8`mAS z4RMgnx>G9pt=h))Q?-rj58H;=NxgX16qoW@ZDShBo_3tL2AOd}Tet=#MgOeFiE9z- zLA<73Ja=jkTUxb+>8EN7*Ch4@Vmz5OX&lCH>5oEget}!qfZvR}zm^~VEu{N;zFCvl zZp47<#Wm?4nE#9X@c*WKGe6v|U}#|@um3vVtVd?7U>Db;e`XzT=ZF85`S0b2|DE!0 z^TWp|e*>Q$%pEfa;nP2&{y*l2f5-d}^TU6j{I~q@$CUq(Z`LE$Z}hPq9jCroE0Aw| zJ0#`<=K$wB=O=wT?KkrazWvkR|NgwADA)P>`Qi5=n|_!d{($(^e6uE*`hzv;@0k9V z`QeVg?_>InM=ohX>tT)ByG1$(2=^XS+XY>pEq%-=2eb9tH;CjTm5%;PW&!*TX zor6B<9P~+N^a=Z*34Ne;r_k@*v#?J(2Yu2ReZuoBZ6n*nb;;C^zUR7hx;AQ?xGvFt z#MtV^^Qu#|QQO2diFG3eS1+DlaqLgkMr{+m2t}wNcx|bt+RoxXE#68<}pm zQQO4ziS;9{S1+DzwH!xnBiqC^iFG6HS1+D#k>B<>Y8$mpT$3_&!}gZrsBP3XY2D%M~N0Rw;0)V#L%c z11?jHm|C|0-=Y{XwQdEz6*v>eDKF3A`K)bm^T6I_iv0XmMa_d`?p-$px;9l^vQEppl?>_c=gM3 zRsye7jF?)hfLAF-Os(61Z&QqzTDJq=t{5@3Rs*kAjF?(?0NElLR16yoCl$j6!%4-k!Eh2d!-pK&kl{lPZIJIFA4FWQ zjX_+mO+j2QKZxrU_wn;RzSZ9BOI)uCU*dXg^|Ru7Jrr`*V0_o;ct0F+?gG9`@gpJU zZs5BW<4v};z-twkhn#zW?@|0{$hjBzUd3BN&N|?AinoQF^}y?aCt+W?W&A^)#V>}-PeiYc| zl+9~$%H}mWW%HVxvUyET*}NvFY+jR7Hm}Jk<+aVJT-jiADpxirr{sIcjX_+mO+j2Q zKZxrU_a&~^W*_JJ#!pFHudRMoTrY5{0%KgEV~+9M0=z{r#&aw1R>c_4ZNS?UV?4J5 zZ&!@*d<^(8#Td^Wz&jLUJRb*s9N6ZR&1-VX<~2EG^O~Hpc}-5)ye6k?UXxQcugNLp zwauwq*mIJFaFyi>;< zmIQ1;X_*orujOTN}&nd=u?gHMW7~}ao@bii>o|V9riZPyF z1OA#~jOPo$FDS-%ejWJhz&59BUXxQcugNK!*W{GVYjVowH92MTnw+wEO-?DVZBFIN z2Afm4vf=X{*J~L#<-RKCSd8n%GXwW81T(G|&kWqZ6wJ6@JTq_)3T9j{o*B5W31(a` zo*B4570kF^JTq`#7tFX`JTq{A-h6Jscvka~*7UOIFs>KRv6_PKiW%37=U7d_cg2kB z#dEBt;JaeR_2M~JQ}A6e<9hKNt10-dm~p*$j@1-=SIoFxJjZIncg1>ckxLu&+#;7Y z=s6bSdaXd;xWACGU|cWm`Q0}JGp-l+{O+5A8P|(@e)m^`8P|(@e)q2hGp-l+{O;cf zW?V1s`Q5h#Gp-l+{O-HWd|*5)aVtVD9(b-w2)r%>BOmzXVSP=6>J(N5R(ubHDHYQ*+FK z@%V<{=JYkdHm4^5+nk;VY;!saY;$@Nu+8boz&59^1y)XL%z#|lpfLk-X@kU}_>AXR z7s=2^T3&f+z27O#P`cnzGzYv3$i11qm>PUXr5n^U>6K{*u< z_J`$0p5-}~#VPJ{-A`o9$tmvl-A@IRQ{3;nCj^sI-0!<-!Q>S8`);268$09__xoS8`|c%z$tmvl-Al7MmBnj}c^0pMvv>`h#cSX!UIS9A_xo;d!Q>S8`)(h>>DVUt%IhK2sU~-D*Snkz= z$tj*=xg!LVQ#{9VM`d#=i`N+QEM5a=@ftXb*T7l42F~I&a2BtDmDe_>a%F?fsa)Bh z-}tL+z&l;ZF`IkaF$o%xRyF3u-cFHu=QlMT@qd>6#m0hbAXmKcLIKn94)39ezwNL8;4&TMKHz=V{^q#L`#$g=oeSpTora?*TRG;I z-V5e#Js5``ev7IZj(o-+8v)67Fdgr69!N@lY9!LxF5ZXjn2UD;wO?2d^`L)<_h$?v z-k0r3N%=ZTYb{6Q7K#Wp7HkoRBhb9DiE|NK>>3KoICX1vj#6XFwguspd?_PTI?^}By?SEliA zss{g7IeB>RKMMaJ`p?bkVHT5+g{$gyg=^#*rh!Rc)zEL<$hlqHOlC-y;W#0 z^?9#b-~PyDz3YE}+-3PkFS*o|duGJdMxX7gM*A2Kf%D;k|M-K^kL7)8=Ha$ZUn*d^ zulBjLfbX%hJvC?#^~w7sKNQ}V{rnx5d|&;x@crI@`pA6+`Z|saUXl7A`n}X&exB6- z*dkfY`(Fw8T}^%m z^|xjJC(u{>_kMC_`>C(?Q~yl%Q(x^*2lcmQ|0m$1@`?Iq>VLJJ{v|e-SW?;>kF5jS#i;S;GTNq!F>G1h`B9dRlI_qa(jgj)|^)=42cg8Q-k2s++hBY&SH3T|XLn_%;#(kvE zrd@HQ!^iASN`6w;47SUx8Ir$e%HwMWr$ncU5@q)Bep;Wd5p26zBSg2FIt`+4+A#w5 z4X1s9)AgbH z`4pdf;A8Fhh~ppLSV%ui?$)8*3y^QeK^%VP(!xbL252AO<8PVIe;b~2DMfqW_gdu# zbD$3Q=Q>dY9mYAd*9Q9kmGPqTSBeln5U~%d;D2aSJ!~p+Uah^IWe@{##PAX2^A^s- zcHQy+`F6${91^SgXPNg$VITc^cjrs*B;M>$^onvDcF~`8M*8z_Huk5VGjkI5(2r)) z(N67$w4EOKm3r2X^p-IeKe6uP!0((c<|6en_k~`|IEkNF_g{eg)5T+?p7949k5kmc z?;kiG4&W%d5^;`4UQM=XF5`1YUkvv^){T)*=#EFK#_FFlU0 z;KhrcoKT^@Fu4RgTqDog%Med)8Dh(Q8S=}JOoxv;6p{S8Qlyt6{T9eu5Oi|Ab*)x<42`TJtgvvtU}%@HM3>7Qc> ze#fppmi$v)U7ty6PxfwnzPI62711 zOGP)UJxoKntoEpXt-=~${Hv5})tUS%#RvN!6 zx>@!&NS#^stN%^%836us4S2Q&+$;4`{@u8~8`ovtPw-xI@Vvw40_fd?ynB#$i+9Lh zhwF9V=B?h(jGh@6^au4eAa4WoR(N&({kXm#*DJlhGBFFtza;FXUwjaG4y4=IHt&~4kA9$v_CaqG@-{*5cCX%iuPt+|F=65$1U{;o>o;^|jx{D2|3Kyx_>Z^; znP*|xNIjWb#}fR0UE1H6;GE4o1M~RH`Md!=>5pRx&Q)oD zV}kujJ>w(DG3eR0+qPz&bzvX%Y};*HGtaoNkG|Hn-L^IJY|FU;J==EM*32_4?Sr0e zJN>b3Z{~THyoauBziqGaF|g0JTfeoo?d2GggSOqao!W1~Y{OmNOcByD$8WK!lxV}f%|{ddi3a1(yCRCM;p+&PxuT#QRvRZ`?WnLCXM&PDa# zDfAD^r$nb(`lvC%Iocp;`=xIh6P%mszw6LH#(#@W`|S?=Hv7r;+cj86jNg^|+T9NI z+bOOm@Z0Tehx+Y0th@BL&RMoY{dGOoVU{mFOS{!y*I@mnzizMH>aSCLPC>aH;L{Fp zbFKGx{!U!)1h?+-8Z!R64*f#CCy@69^6vG1>pzL>Cvm;b`-hCbu17ym?`h;cjr!Jm z|J#2S*Uv(4gV&hx*D2UbKfDWhyO4LE_s@PMt}AhUzxS^he_coWp!WjuUO;^hc>iwv zwales3FF%Xe^?J2>4#-5H74lWW$rX4I9JtQABBz7lR4Cw;9QmVA4_m_h5Fe>5gIC#C(z66{a)*X%?3VcT}wR`u8HL+aVK+qSB|rhW9U zw(Yj9>aS@Z^=#X1Th(71`=DprPJeCNtNxlc(qG&5+xF65(>~j7+fL(CVV7;MZJ+U- z=r7w&+b;TP!!^WErLWdFsyAGM7_Qh4gt-ndenpEodqqe$FP^h@>5gimY3PZ{?T$MaX@~`^a#z3+u%<@Z@Bv=p4QoI$u5*ayNRv^W&0MpA>20 zrIx-P3P0@q!Nf_TU5o=`;-!kt(J!NX=}$xMChudvLDD{0jM;0NCu@X3!8D~$%U|rapX$0|7 z)n-S^)I>xcux)0%RF*}&)bL)!aEOXds#N5mxX3!Y4UA#Ib|PMC*u+Z}ovgN`B(1(d z@r!oJs(6;H1eq~Op!A9D&0`W6A9~oc59s|ZpHFcsdHD0RPZ`;c_6sAt}9pg~g_A`!*+ROM8tPk;0 z!;C*++s}9ttOM~$7dOZDULc)>V!6X}namHOsbu z!-$s}=CcT6Af3`SyS^5-(H54c&!8V441B71sl$BPm$;H6&SxYzYe)uy^CTI zN!t?@Y2u~c4GwSfz8K?gDx5Q!xJ06}Zzam#{r4evyVo%mm$d4nNE0vh9&r0HuXBuX ziD(z&rWijiIyI|M{+|C8a(8%LVhxgJ;-#(ww;%Vq##jdNQpJxWpU>%BUpOZ@S0j?P zJ1Ww|OI-yHbH37#v(1Q?Dt=sa_HgadIUAR>s-#F0FZFhCoAZ`_oNY$DRPp1YQ_ZzU z=Wm0gnRuypfa{#c^y4xHKQLp$71#34V!qW?X;WmQrTX_OBMgV zZ|lL^FlOuiDdg_-y2b2x*nE!J0FHmf>u&tC87J7Q@ltO`xefmka-ZZ5$bHg_#O#=<4}#ZEd3}vP=e<})W2WALG7tXWko&Y( z6dP&9Ox=k4JmZzb7&Fz}2XjczQW-P#PL$jDpCR{IZ$PYN%v3%T=^8?RZ|;fmcFfc@ zm@iyMxYnsJpwH*M5HmI7^Y6yIpfBK>r*ob@pLHT;YR2c^gL%L?&vj1cHrFImAIAhX z)6cC#eVpT5=X7k;M#M}t{@%8q{$6dRzh`}jnQHvKZ9D5$Tj~30A7Z8&-*4Maf3CLD z_h;&Zf31ri%VL4Qsj}lP#%Fo+)8h#SxY#tbx)8IoufX=Z$3Xi z{55Y_tU=N~xEtl-`T6b(-f**q&}Qxh7~9T)4xj5ejvK%^&f$op9f*odaedSI%eZzd zkC>@uJt-BPLmN>3{{N7<9GA4Zq{zcu-*g^xJz={MGu6c3PKi$a{V2cj6Pecyl6Hja zn$BgeBWy2XriM)%JJD&k-Hc%;WA#y3W7rWRW~y0FN~OMbw}a~m+ku#=CjNGN+rf2& z?Lf>_dEP(EcJO)M^grx2&--U-H`f!EN6b`t-k0)c?0>E!Y%gM__GHY|IL2#hr~L5O zy^-emdk6e0xxW+K=QBRrh?uEi#-}5vpTO9Xn@^qyg<1A#aGdMLvsgFyY;VQ_HfhY% zT|s@7sE_4eK>4-c!{+nyP0Uo*Z{`l3@$W%;#fACqy^vck&UaaFCT1$i@wuP%8+|-e z(+TeGemm@SCe-h=nCeZdIpQm6u>w6mYac$-L!?lt=f@>e=I@bo)kC>@uZ7^{kgZfSe zc5vQl%v4i1=Ll@K+eJIP4S(PtdYmE!)Ahf^{Qis#z;qwyR}3xMr|^#7s47 zM$2}!YzNnlO#Sf7>|bpc*95yQT5roXwQK{|4%Ug7sb=kH*(N>5Z`l^ECArkb^+ zW!qY|h4JY!_kry#+h(^#>*ZRa=lrv9FSFNpQr=f#%+z%l|LtGOcO9f%#&=aDuWCKg zcU&599CL%~$b6LNGmyiWsiMDc1N3(G$j84ude^&8^v-vqQofoJ_3ghO=}-2{cOQU! zx_`cjnacT{*_(*I#7xDUeGq!jUY_rkO8Em10q?pp-`xmVd3C;tnaXx>Z(?GmLXYpd zFlH*B$+-T+k%r$NcPAw;$!9LkXVYJp8(dph2V$o7G%-^}e-GD9&Tp=NoZl%aUqy-f z_Hlh~;$hZ#csp}0fVy2oH-7JTDw`yBd>Z_tWRol4kux*H$D*x_-=(lQH zsg!TkHm*Nx8)BxKnB8$HpVc;|;ohemC$2$eoX{2%Ggb6kwIwBWpLU$M7O@`0Of~Vl z8$`cVTbO>TapIc9zCg@WvnD-^@q1xZ{%K>TvfYT8YT|gGDQ2n}E7;ZEn5nGW%t83{ z_QXtO{YD?_QCl%nIo~-y`Rs4*!TAN>ex{hIrv6|}vSV+yjJ3)2i1i|7s)^y`qB68jhRaO5i`}q@;+0{ zRMw4{sV0uM#ushbMr{+l$CQWgE3kT&FVigPR&-l<9UG?U<>oA2Cx+Oz$(r zOl94OnQG#Aw;acoUWx$syh7DM_-|ns$HuM1Q0j#kF z7_T%}HvDP__urF_HNlvvMd+I%9j|_QPBCz?V#F&g0WMLDc%}V;`zuDg(gDB& z6eC{gK;VIj5wCO*@F2yAS9&?{<-i#}zy`xf9WU5mIH?#m7)~mN4Th77VT0kMV%T6f zsTejGPAY~ChLgYj@}z&Qkxp!ik)fExOm$-t zGqo_56*KjdkTV$LJ6Om2(~xro@D++rgq$J3Llmb&&XvGdD$dJyh5`>&oS*Mp1$>p_ zf_!Hf@G!;a^;jORGualkgG zY+jR7Hm}Jko7d!&&1-VX<~2EG^O~Hpc}-3!uWe4{$_ATLxw1hy6%ROtkF*fKvMGp} zdeVQ?U?)~#WGE&vQ{9-vOf8IM#Y_dK#$$}f>zHFauK~VBF~)NO@C3ye&xybj6=OW3 zz){5*&q=_O6k|Lm15Z|r@w^uJT40+~Hm}Jko7d!&&1-VX<~2EG^O~Hpc}-5)ye6lV z*EXkeWrNMBT-l(Ul2{YRgP5sJLCn;XLCjPqhI1EiEfmAKKBOCyn5l)ateC0b)D(>I z6diMn=XJo>DaLqC1)i!H<2em@nqrLSbl~ZVF`hGkXDG&a&IF#R7~|;yd%!lQY+jR7 zHm}Jko7d!&&1-VX<~2EG^O~Hpc}-3!uWe4{$_ATLxw1hyl`>p89>h#-3Sy?73}U7_ zu|ol;Bxb4`lbET6v8;M6RP@hly4jOX>h*DJ<&&IX>X7~^>Z@C}MFo^ycbD8_is z1)i%I<2es_o??vWjleeo+nlm_O-|XoCZ}v(lT$XY$tj!H!Hk*8GXr;$V8%@4nSpz)V8%@4nSpzqV8%@4nSnd4`P_oBLc1WXX`tvZ zW-8CInl2a2n5jI+YPv!&W2W*PtLaL?jG4-Ftfs33GiEB!v6`+H%$TV>$7&iOm@!j% zj@2}(IYwqKZP0UzT-u=LSd5ukguZcSNMA8#D);=ZCzvr)x#xGU7tENc-1ECP2xiPw z?)lxhf*CWFdw%yu!Hk*8J-@p^Fk_~2&+jg3<^yAel9NpVCmA!9oNNj>$(X6+WK+ON z#!MwAn*vTUW-2+^6mXI;Q_0DufRl`wN=`NfoMg;YaLN9|cxUYdof0+Mw~6 za%qFa`1*|JSV4?0a*F$0ceRW;ImP|Hd#7M>iu--{F2Up!_xtWz!Q>S8`|iDh$tmvl z-SvXWDem{(`vj9y-0!;&WOFKu*BJ9GUISu07OyepS-b|$;x%v9C#Mz=e^)R$#d9q8dxFU+o@2RxDwv$&IhOlRr zvD`lsOiu9}%l)BXa*F3z?vJuLmBnj}c^0pMvv>`h#cSX!UISOh>s7c;73_ju-I0qweiosPXP9Uq$=^#N>!W z55Gm#Y(RdO%a`I^WN8P}@jmB)q~xdWM>^ZZ`>-7v+arm7VLjA?{vqC`%0yJtM;)x$GZAui2)Lq@~zs#bhalc`K{Wcu|O(cmRKOvJ5_s9QvS5#rLjSh z=wA~9O7vT`r$Op()gG2Na|ivZF;mHfiRE1j$^B_hbuYBz;*s|^d~;YKdB5oUBTcM6 zGv4S=jhRX=^tn#BKXB0>3QWvYyj;Y%dF0_$D~11K9_U<{iJ6LiV4Mctd(sxU_l)Pe z7xI2{Uy}PR`O!$D&-q`4_EMks>bKyd0@i!=ea8)tuYInoDR<)!hZ%jguNv)ROa#t{ zQTP7A=ua#cJElF=xsdn0;hS9x`5rslQ-k(UpS-_pgz&!4w08=~_kjz9?^oUSk%?() z;vK<$#uZ`xWA2msCvB7ZXFMnM&;3rlxRLh~y_} z8?T|WCn{-Gwr#pjq_BRm9Cg(GQqixbL|T@Ox-Qh=8NkH!6WufRVJ0RN)_xPiPwGBn zAFBVa!Mbn!c3|7-`cVCRiqAcW*I>s?-SnvyGnMN$^6i+X8~(+LnTqzcjG4;(Ow3f; z1Had@d{r<9>Tv%yW2UBy%JC0RIIZHQ!mf75O@)p0?XBXb!Z!M}OnlUUBkU9U(o8z| z#(7+ad#3Glw1M-wp7kUBv~g2$pLWGf<$d9!TE;5P#7*T`ApdkROEYm(IVPv5hu=WZ zzmX&Dikk|1TH2!ha5@{bKh7y`>U-c~lRR&S5%VjIcwoICdqFZCe(F#}^6Mf&(>X#-{sB5L|4eJka48L9~?+ZeySVNyCJ|=l&jjc;DXIIE zY$y`r0Ce}mMSWxx91B%cG|V+VsDgTcLs7xjnWdI+xjc#|`6_^SA90KK8e z8;ZQX-V}cru7`n}{k*A}IDFOU59$p^-f-v@dDH!oxE_h?VsEC2Wkx@cguV2Oqmefn zc_rQ~e=M%YqQ3s#Y@=tMjcFhB#v^Y$^aglyOdLL$V~vS!-cS=OjB~J>YdCaejx{DO z^M?5{r@()2cqR^C4Q!;I%&lXI9^Ob_+TWPqoYgpd>{I$;>5s+)=d859F~K>SdH&@y z2lS*rjwLu(rTvWw_NT_-V;|Dz+qT=bW}bIpAN6e8ZCf+Xxv-DE*0$ZYHS>JSxdA=f zcH7p>b1m(Io^3n*wry|bnU=hVu5G_W7G%DtY@R zBX2VJc)7RGk4sv0Qsn+A$eV(4gT2KPH}ylrO_jWwsmPm(yeqt=euJbPn2xmRNE_lU zlenoL{#N3qA|LPRR>~Y{OmNOcByD$8WK!lxV}f&z&!1);{Se&5vuCO3?2);1EWx=L zm$a&+$bB+*8WWt0EU*2;@+r}&mOg4saE>-e+J5Pq#sueP<~bStWBj-1wBPQ)Z`Y%r zY`di*pY~)?#-Qv%|^&DIe^Hya1 zbv^oldh?Js5A|K`t@7vNdOq}qd$(u&bqe;<4=+UCLgbC`?(i4mdNHm?dTTQNx{mfi zZz=MYqP|hy-Ns+bTsoF8zCC!Rt%r^D!!nl|6ZGvecN!C%t9pKM6gE;%=1^mTb5+`Z zEWx>{{+fMCKP>&xnBd%$_BSRt7u8?052+{p(U{us_vbvk&QqZQE^I)nBs@ zsb|}6+p7MW_R+uEw%fLlgPv_W{k3hc`fJ)qe{I`u+iQFZ?6d8* z?KD0EcG>pY_L*mM^p|a?ZI^jAC)ZlUP0e$@dGOY^lL!063lE-;YiHV?+PAj3zr)XW z$Bpb5$VecEI>#5Mm&DUhSh9V|mL=s&9`EbysV#6`tSxf3)b=eXJa__oqf)2z-H-F~ zoHEM%JST~NM`5$`BI4G(RokujhvnUzqqW_(-@N7K?KfBS-L|Zv+ZXnfFDqJBzRdai z+C1ljA1&LyY%BgMmZuk&$8oR4-Q0tVyDhF7reDInOETUsR6ka;GD|t9`w;6lKcNWsj%R>AY#_X^*4q$+BtFcE+7L z)b|Vg{T2S+z~5o~y@|hH;_p|rGtx6q#&Kq*XQE6x4ZIU&^AP`SXWw+mt~fxSWbv+& z;*$6mO1@oQk}P?wBwn(E`Yh+AJyWja$)bv)nKO4nZYlD-olFn%O?|V{vy9%Xl{t zR&j+M-K zu^JoMj)82)K4vT{jb&@cueD><@?ILB_H^-R?fA1T&!V%l*q zHz8f)UE1+28KY0*SlaO`+lo8Ly4s2}NG`V(SMW@61Zk5UH;^3Gn3;BrOva1TSeSO~ zOFPD;9oy25ZAaePar#=uv}+l|j{0_d!866Ga&-cixZyt`K__$VcRFEr=j zuj0H6&u4L@@i#(B?6WfOZzTmEV!X|d&2O^5HTo&h&%B@2Ah@0q<&WsuGvDtsXTXwQ z<4AtyeYc33O_e_9mD0z0kcJdt-q~JZu zGrs>r{gmiu-p6SWTuq7c`;})k`2EQ6O!71D(?kR(DWShdd6xP`z%$9Oic0>zQo)&b zWvH+1G(401%DCvavR`?|_8Ojv{%P%3p7DJj-iPn+u)p{|&d2OGykDa{+4w9-Cj^tBDPlUFYBcX#Sk1v>oAoI3nXT)EU% zJ<6rFbd*bN>DXLKeJE|K`!L}ChtjsA_>ENGDX;l3m?x*u)%UXbTcVk77z?(Ezww#* z2Ciry@i!PV-vV9!hC*HbKB9fZ-@2&7-?Gen3v~E9l=e|u^=KcprK5e+mJWS-aeL39 z4`-h#tN-Ws1gAW^j^phAGn_#mE=Ybp&&-`*{ev{?pWv#9;C)fS)s)EJUn;nU5?Id6 zac$N+Nce!=Nx?}<;5{k9Rg}Q{8U$BU0?V1Xq*XeSwmTv?Ny$2*f~zQ5N2%ayO4h;W z<+!9(QX=iF?a+NB_k!VTLgCTgJ$@MRro-bx?ig>K-xJrpCWhRx-Ugp%Cz0zy;c?#m zelMg)t_`{4y$Ai?xb8bG# z5eoCS?k>>la(T$*Z`+sPy6cr8m%nZEOr+b0P?*1EcSU-)t3xh-%f1xX-A9F7{&vkX z0k)aHU3Wt|+sxmpFT*w4%)a2gBKS8P*caWA&Nj0zSbtxfp|CG{Af4@JU$D+1#Ia{z zaR2ssv6Wo!Lg4FRX7c~AJoNFTC1dAt1nNFR1v$era?ntFz#p6k6A`~gTGerL#??d>-8j6^**cz@y# zMEc0JA$N|KH1&)|J#)Pm{Xs|{y*}j5^Y$41vCzNK`?h~M(#Jj!a{2Gk^Vyg4i}QkW zgneEDesPX)uIU^hzXpI`oFklTI!DN_LEsnX2B`G-f6$Eo*}4*{lYn@ z{la>Nq8|1O=b-ir^@l;9{lYn@{X#B(9_=QtKaX}R$LUYUgI^20z5ZZ~$M}as?m};$ zKOX55%0uBr-YfnUNS_c3x%~IzCm?~6kbA?yJ$yLeJtwX9OE3;IW`*Ute10) zyqyf*vhFFUn{`cH4c`tq{VuF)kTX#?=gBP06Y9?n^ydWn^Po>(z`4q~&%WkdXWz11 z=FB{(m$vfX+|e`hKc+Tu-u$tRlg=rgnGeIbPJbrkUJW_(`GB+F_Hf`?U&r$SOfL(iW0c_{Ao;ZmMQslDak+FAh@0q`pxH0 zV}kQ1$**xF|3E}=ijwu~Sti?O&YvW|E-LwlO9j_cn)<;3o>5exo#y;W@@wLf-^%{X z`4ie}&Ywj8wDxDtp9Ti!PXpz=q-tPr{xq=pY^8FboSP&A-Ks#h8oGM6k_>dJ0-fqW zrv^HDw$fHTdbZM*j-IWwrPJ;+^BLgT^x(_@5`5Vc5nM$HENA93z%xm&rbPPwQo%Kp zz;b3jBj6d*%ozeC#&LI2aFPSC&Qa(n&uZGJM|swkj`FN6 zoh+ZV1YBAZ_$)|p;!s3z9VM{%tR>)*q}Nj-{Ya_cqm;nnvz7#0LYnbekQl22Nx>;f z;AWq-B;b)XXcnw%B4!o7jh{X z=vD=~)zDQgB?FzRK&Lv;sez7isjYgHOKs^Wm)g>?xs+NhZL3=yoL{e&wjEt3sY zw2#`-(LQQR=l{=V=KRKi-zF6Cn}kdGZ&yKH#((eW+vfX)5`4eV5#JJ=>H7zh9}T{T zm=nm-K;GjVH+vnNHTB)rzoLK7{<{YB#&-q1@r^(z6fe%t-`uYkzANa}jLmlhJ@Fku z&*rrL(8a&mxp}~4_{N|g-qw#GKZ5)VE{GRjc;V*e_xk(qD(R2>%_S8j$crOS?;mn{ zyzTVPbH3qR_;x6r$IlF$3dbGx|Fw5*u~AfE_-xPgVym*igY%*+rbG>KZ)`8yhIU&@ zg5j!>2Ygw!JGLD56XjYga<&N){9Trt_qGu%vC0}tj6=mN-YQgLzXu2!o~?$2ke-1Efk?ZNiS{WCvhThY!o z#2=xf*gm3;;-P(oPY!&z!J$7eY?VeObO~&TS5`u^umkPwL~KXwK-`768*vZf-u9`+ zR8z}PWmK732INVrV1w4+u{sK}C!IvKQEmE}I*O_)OGi;%y_(8JJ=(L|0!F~J3v39i ze%0jlduV5Nw@WQWtx;>%*G{Tk{k45Z`)D;K4{$*|LQ_%*O-YeaWO5-jB}GrIDN)$} zt0_@fwx(poJuXJjmDqpAbueYft4Nh ziH^KM)Z0U;GSs(EB-<(|m4*tX0RiKW!?ZN$Ol#GM`7>EKKU zXLo^jf$!F=>=*!lclP(>ljfDy-_$p8RgCXbyC}bntMsly-$};@6#VKxag$$N@T&`c zb-}NGh@1TCf?r+ms|$Yh^W5ZD7yRnrw-d+FxtX)0z++oDI|l69&e?I`$qvrCfW+?! zAn|(=Nc^4x62HVh@k{&@zr;WBOZ*eR#6R&%{1d;#Kk-Za6TfTC|E3BKjh4TSYwz>- z!^Y?Q_2T)J=9C#Ne|LKC^JieK^=JM1yN+id#y&az?Pe5{nss< zwoc2Yt;@1$gAduX!Dq1l$uv##rFv5R=3t8}k zIVGXSOeF2|jQW&Me>yK`lg5H|3+B|!eutL-{m+4T2kW?piy#vMGdNY*LG~Y6M!s}v zq?_!g5@!fQ>Zi0V*_`aZV^DU>Fp|<=_EU+)!e7=)e^HEenW(?s-@feE5*@-&`pf;7 zI05xM%U|9{B+C1UXvRDSnI?P#k&a8YFRxRH0n}xOKz0Uc7b5w~``o?j|3zWwquQ)6 zEL(yq@}*-igX~uU%TVsWyiZF0?83YXNM+JLCHzyu{|h=nSz*}bF{ie*|5+wxQu+_S zNQe6`5dQ9A4Qv+M*M^l#e~FDF_`gzPS*&}4e;W1j++~|?75)oFA<;l~l0SLf-dTc>!RwlmY2NvCsR`i~vO!1>>It^J*>gCL5wM|~#G zbMozX?RELqTJQSyUTg35?bzg*Q`}IfR5|7^?EKaV1;-Sn!EtVm37wwBun|s2{O{y^ z#>vKcrj>Xn%O``(M0P}6{xMj})Bz{`d-6OJAQFY?Kc<5Uj&r_-juSaao)Z)((n?2N zolAoA!_wu3AR8wrSITn{{(jO)da2)>f3B|7^?RS?1=={*Ae}Bx^m(6>W$E&$F#k@H z)q!FnrR_T)>J2!T3pwGcNmFOfbDTTAS+TZq<)(^))m0nURjt`{@7<3SRBT+a=?$m* zwr>_xEUVsJv8JqI*|Mrln;zR(RZ&@X%f?mJRX9^wxe;Myn>JJ|tNQASHS3pFtod5O z`sK?vRaHBW6l_|xwW{OS3vRAhv$?8l^{RE1-!M_imTq3YylUggSwE_?UcYY5t?!+4 z$3I)no&Ib&=X~It-*-djo-yls>688@szAPsfy1k)rhl7HRh~Rm7Z0qNoSR6+F7NVc~+@rpH-^4XO(KpFhtUExgfeEs2qwG~^+manO(F1uy@#??XIIQ4?U z#cN%~+NxKJ%NEZXR)K5X#^S=|Ygbm3EnPmOq-;ag#_IK}tJX~P)2(Zlu3z)&@UtnW zV0Go;g3aqzEnmNJt*${?dlf?8F4(-OYExO|t*FPUWo4VHE0(SPAxwO#a8&ZI3T~;m zxoY{w^=L~rN$%qX?G3SE<9c52*KaIayS}n2^o`&Sp&IR5RsB7n70cFL0l^<`9j>@y z#qupfiiQ=+t;Ee$8#k?5ziv-R?n`jLajHMw2XzkPu0-zZf=DHh7vWAU9^9Xm2H}h^ zzitdG#jm3(W!S9VN8eeUQ~XRA$NVvjfA3P^Y?|9Sn?&d%qk{Ec-_1JpK}bBfU$r8n z4v${wIOCo3pM3HnR(1vZKi+wNt-C<^juUT5D%I&kGIq6lq4LAdpqHb{MgLs!vfIIR+zx7ni{rjJ zY|zVYp5ye*K6hrV>NunGxeYa)&&{62Jlu1Xo7Lf5w^n8A@DRg*!;o274+m_S+K8kwz(?2v6_$BssxbFPu@`5{aTzTCnz;m??` zb2}M%UgwZ=K{6VTp`Otl(QVP%{qbm@ZE@=8pv>O!?3SeSi{{af9Uh-_4m5WvntOB7 z`9|};@4ay%>3kDycmDZF=dYSomJ>=k2b&!ynVjv8^Urt3_)(M@%8oy-c{_D~^NTyw z|Dc#-T6PL#ZXgQ-SsKXMfh-T?hCs#xc~2l419>ozPX_WA&M~$b>m2QB>YAtLfTmvk zFM6b*|3%S;XnA=&ucD%^Z>L8Z`gUr^eJ*^Yq0fcwxOZcIL;hzT!LL26*7)eB5q$x+5ie!c6?bPJd zyO8!o#nh?0pvMcQn=SM>v{hiaL567I>*7CSDa&7 zwg&R8Kz0lrlQa>h`w^#C2^BN3PcG&gM1sG=UgJ$fU2^oSYnr;j2|-jT?3n&a&) zU3grJ5ehx^G4gfJahxAQU*aw}C*|Kzo86M-Jlyistax)Wxff$-$S-ixnKZ4YrvD}2 z+$G>oc6L0kLx;L+x;)ZwO_z4uKZ5fS;Eo;RdFPx{cTIPk@7|8P-VnVj+B>>CiuN3c zc41jb=c~=UASB~?`?SoD?sF74mb3p(7j z*OI&=uID)UJcs(m-E80Gxqgn5xNv7neyx93cIl=6$vV&$jsor&U=)LNCz{jF1Ka1H z0+zOkJ1>I6f7jf2@!W>SZu1(F7te2?Ude6?w8VJ1D;cla$FY;F8XjlonK6`S#t``J zba9-#s;YSY@Zs@V=P>en9KY{j{C!`KzbcKNK-drPYsRkyzbEl~3cnxX_oL>q$+4Qp z>O%_xk@(n&TDAwo!^j?_nq3D{J*Kq z*?Il8>vvvX-)F~?`kV{)*Di@Jsa@jyra9!i@!cgmmu$zczAU-0HV&>W%uy{1a~AGf zm{ct~&xH&3QTA?(Hhj);OX?j}v{NZu(ehpnWw|(&|8fYNCaG2 zdC&1sPDIp}c_ zjxP&zqK-VbH^4l1QE;3c=)3p15wVM&EApxH-sk$`xR2(IgZk5UosW16YxmbicjB5e ze@;Vm!L<$1or~+EJM-#?fe$5le^VQu|59yy!OOMr>t3&o7ayvf3%(4CM|b7T$%`kW zQEkVv^G3xxBuCe=L!$SLtDlEBC5&@zdz_AWQ{v|&rv`Cm)YDe>hg}wXJYq~u&D>Gn z+c7#mx^7&uxfWSD82`Jfc#V$o8vS_l$u|5w(@aZGGpu-6?VR~@YUjk~?0P?S2szKS z^mZCs-mgxljrSq-P|%~j-fgdA8q+b?(eixZJ@2a=guxU$JO_zOfw`}$dyjKJdXICG zp80;!aT=K~+>eZU zG8P~8WBi`M?|J-wf#0w2dj-EYVzKdza}dXe@H>p(6Zoa4X^EXVPO|09aoXGHCoK_Y z>GO>jx!s)3x}6T@8hNPH$iwAEJ`qBDMG&#q4k4AHu=2eYr+H#Q4CCj1Re={3Blv3jfX!pZudPO~Su3#3%n4|8VH9HUFsF z!O)AEe=PSfxJdr#`XLVa$8hRT{^_y;{^{_5f1(5VXY}8r`6v01e-cjqNjTfmflgyk zh91u)!FV?EP^poJ%Z=3Ic@pBL#xugvZcVvH9$#~CO)ZsyUiVkc~qyLM#JtZHu zr-ZXTC0yEbCdwc`$-lYO564Zgf7FlMdK9*eamX75yT#+fp`sqNNsNCmG)LEg@eiXu ztb@)Iaaae23nz3wK^=5>PzTAMbui_2?m?R*`LGTW&N@iA*cHwh=S}&^j;Zmfbu*Gb z)AuNt=kFTMdzAjPMNhx$9_2mL%uY{JQe0Ad?SgA-uZ>^3>;2qA$@ORP64i3`IlPxL z_MmL>{@SeIUOVY_7O!!q+i1YZ+ zk;plFP*-5w&-MWBAvi1K^aSoHxI@V41>8$8o}uRg=L$X#YcjyS1z!+y`T+M4jC<<7 zz~@xcNGjBE(N|+Fm%WP&Jhe9E(5+yFm&h! z+)Xfa=nmW+nC+Em9oRpa)*;1*-XZis=txxht1o79zj69C9uPz8pBk2k4+VDg6Z)km5rIb+{1r;Ys%`tR)`vCtwbebld!iTAuK))Y6Gf z($bAh)>6f$Xqg?G3h7)J*q67l(uMvVB>nvkR_=kX5PaNq3V;g)CtYVS@L<8Auu}+J zC^#H;h5!!{oE3J8fQtlo2s=Z8hXQjBrNPap)kNuERP zMt_b19wiw4IU0C0u+1r(*W{GVYjVowH92MTnw+wEO-|XoCZ}v(lT*TLn^T$6!RAz^ zbP!I(HAjy5xOvldiVIRn_k@3kmQJi*OEUd$c^^->0P$+pDD;!~75N zRIEYE?AQa4Hm4?`k0(i=qdzADPZo^+oB}*WF#2;U@KnL*&uPHZ1fxHv15X!>{+t0k zLooW&1NMMzPT9OBr)*x6Q#P;3DVx{il+9~$%H}mWW%HVx5?82xz-@HK+bpL2lc2u6Qi3w$lG%_*DL7i=2WJ15KjF{nX4u_Icl*h^Yw z$6kiCIW-r3JXiW0{W%YKo?!IneBk+l(Vq)|7YIgwE(BgE82z~jc#&ZA=VIW+g3+JX z0bd7fbIRs5Ic4*joU(aMPT9OBr)*x6Q#P;3DVx{ilsW2W z4Bi3Eb*#4Gds6EbnbbknEi$Qt@ZlcxPZVvVrkl397npN? zHh3>E=ltp#gYO6CoL^mQa04*s{A!-T4*+w{uNJiP;U4r)1YvCfC+`I&C))x}?g1tz z+X7DB2TV@31)SUqOis21oV*{HoNNm?*#JyVwgsGg0GOO?11Du2E0a11A2O-KN41Vs zbDmkpQj1J~-ww?AzPip}ADHufb-lqcV9xhdnZa>j&i7S?!L`7g@2h17*8y|Buc{2b z1DNxDwPKGmDOksDz&APW!+%r(?i2r{lmjr)z<2PS*k3oW28CIK6X{S$oN( z4$tGdz_qkY>Tv$^AHh1-`9VF<$DHS?Ri@9$DbDxRYJr3!fW(-8n1!VcnzG!Yv43q1E=vCIE~l9 z!fTsTnbN`LRHk$gPQ~@T`B|=GS)AfLSKVs*oSfo(U)^RfImP+Dy4_%Mit~LHGnky> zd|%ZXOippWukJ9IoZ@_6Z8Mmh;(TB2Nas` zuWe3cN(Y-$nbJWx^`X|WEKYHrtL`*?PEK*YukJFKoZ@_6eb!)dit~N-IfKb5&iB>5 z29r~q@2mR^CZ{;xSN9uCPI11k9!Td@8n4mkX}ktb<27&^uYuEe4V=bn;51$X3$JZX zWl9H|Q<>61IQ3*;r_O2}%iTBtoO5-*9JdM}DX}ktb<27&^uYuEe4V=bnVBxjR zsZ8l$b1G9h2&X>OI+n$$x$NUdO`nrf^N7E0Fge9_EcFe8$*Bdze`PQ^#dR$8ErZD^ zu4Ach8%$1d9ZP-3U~-DXx+| za^@FCWqlTM?$;M*F@3KIf6QX~e%Jgdi|Gef9Lr+*;Wu^8X8Q59U9*{f>b>2vnf~f8 z7iBa3f^U^%Gkx#LCjEc~CjF2qlYZnaCjEqMCjImWO!{jcG3gh5*QD<|?~l6Np_`9o zvAiqq@2bnW_B%z{d}q5FVO=g?tTsckt~W!nt}L5%rS7aNb!T0vJL}54SXZXMbjXWY zOy6t5OIb|c@0!~bRS=9bf0>!(f#T#cg<${%Z8csy{DS=1Lm3ZLsppd zBR8A$6Ly&N)9*Lwulbru-+l6ny4=gFUej$odfT6L8+(s*&gMJkpY}TPo%v6D9rd`x zJFVnp%{#|*Ki8kvJ$rZe!JR`t$x{ z5ZlazEqSRL>5d^?1JXU9!xyKx^Fh-|e|*q%#i(Cm@c1j5i?_EtFnI74HO;``g4l@~ zU`Pk=qQ>1~|1pf)xEuB7q2DQ!;5a4US5Ab-#903X>e0_33LVXX55h}+qV`tJnCZHS9@$mtx;_dEB_?Zh$ke&e$4?>DaWX=GBb=OZt4 ze5fa6Psm)zT*y99Pvi@`8dwUaSv~l-yg^Qaop1z;}68~K*a6kjq?ZL zco4#Ky;8pb#|1df4%N`>=06$@-ITpJe@I42%O;#(`4zRY-OO>lgfdbTDwzDzw^6C7Wr zo<|cLU#6a|363vQ&(;Ln#niJk!FDnAJepv;n0mG**e<4?tqHb^sb_0~?PBVAG{JTu z=Rb*hlk1;Ey=C9Td+yVWe!x9kN$j=wkb5fi*n|B}-&+~%rJ5G(pE7cPsgX_PMwaqXgos^#m^FI)RSW#5;P9kFL= z3HHC}{YsX9gAnRiiE!@iak$rTvDwRnr<2b|#=4Oa-UdI{gdHk1@^Hji`*LKot44Yo z{U)U6-W%>!a_$d=H~Wtw9rfX!B?aB)VL#6? zobPgl8=*~77>7KMJ%>H!VSf+xLc29}LB3rscNJ|F^+MPI>IHf55}fNg*j2P$SxC=% zVV`1;dDz!Py%6V68_LKT=0<4K?EN`ikv3vD2m5p|cUgbu{c{P46;zd1I zjrV+ieu@usdN|lO)f?yXuXa_b7l)6E0nhi(aw8MGI)6csANGs(aImMU590N|)>RX| zdY?QQc#a#n(%bHzu=B$`ZePS3xWHADyq)?Qb187Q;jZGfi1)hE?(#8P{iQ}WMbU19u5;BCZ@0D; z9FyG3r)`CabAY@k057I`cc*Mc9?}(*xoVnskG2)mNALR!Yz5mndORE-%V@$V<&p6ZR1G!nsEFc|Q7_oTaT`y|Aa$*a{P8KW&(B*R~bpMSt*uJf^K+ zov;Vh*ovTRTR*l5%1o7Q>qT3k+ZwuSTM?9P>o+{kJ;_fu+xpN>uw3?qwi6$)T-piU zreY^fQLgMYw9BQv(B+~Jxd)LpgX6Uj<8`{X$Co{c`DnYsWv-gx?bWtJ`j7q|#C{Zk zGoIJri;oEUf9QvFMJrr2)7z(QiQ&th9^w~*HidjC24Al79`vL4%&6~=G{vi3HOqTQ z+ZAa~+7;?n0^P3mzT^);ypr{nNoqCUvu5N)IIwisoV++=u4dqn-Q zC%flkxOe%W`W!&NX*&&j#`_m3k9-lG?6PTtbX!BeB<@!}XnAwcS1eENSn zA#Goz{b^fD5pT-9Zsa=eZ}h#y3~+{hKNEe=`xDdGFY5afmOblM+{Zx9zTJ(;{XXwc zq)li?nCBemNZT_PwufAtALP3b`SLy`l`rfG^IeR52Sd+^J6y%JAYRkN{_tK!@~2%X zLHtpOPy0i=LO*1*p<;(d;5^%i+$S$5f|u;iN#Gc{Gi4R-b0DX!#XURZ3~-4yXeMkB z&ko|xL44X;jv3k*n}4>RwqAcJeQ0++PWsRm?FPTi{m{|ZAHw}ZA=+-zU6_YJ zPWhax9OvlkmtkI)3yJehs8Jj6dh-mzc1g**D)!{_=%$Zy(x zuHw9kHh}j^v`M9ik3B;Bc&($|;wv`+vpN@e)v`?%H{4E&!bQ;~9K)2k} z=teumI>J{&PueNQW7)QTwjQ)ox*wn`?NsS$%4PU_wF&JN%awbl@-vi6d!^ejuvbIT z9y1jImz~Jo5{fgFwzou$_YZ1G`m%@Y!F%yaRI1*MfAgH6wuMe%)1^TS*_$ zevL%<{J+8+1ajfGT{RkV@poMn&QjU5UxpW*d-7iBw^)zw_Eb3XkC5u=aO8Ky&xRxa zLis{CLOUjHMmv_1RhZq%bH5Bn{*7slgd_h>{3`8)JV}YgnvBwXDgId{#CElF!;^LhkN9I{PZfy_a8?&GcnY z{ZTRfieLUoG5z{~IHs8XmjCXo)5k)W>hwD<>Yl~)pX)s|i|HR2GCYgvE1ov#SG{b~ z-}KKW{jL9N(${90^mkrr(%+kB(tm!qNx$sKA9cB#-aMwL?;V$1s>``=*w8F;x)@=6 z7C#J0eFyTi-P4Yde_qcV1%B-`mfm zzwYT5b-Anm`8D0vw`KjWF6XYii?hsi{nK4XbFc8}uA_`czGFd7@*RtrU#J=Mm-Okg z1n(s+Imf)GM4vv_179x~{&a2tzCkej>68JN35GwNa^P~o@TXG&Tp<|#bd~}y6%2nm z%Yc^wr+m1CLJr@>nkzn5pab8 zti^(%1K-8E4p{cS(63OYbdbF-nbbk{LS^+1IhAOeO6jk@A*TwsN-+HCEC*gL82)rt z0Iv`Xe>y9HR|3>`El z1w#kTNx{%Tb5byL(3}(u9W*C_Q+&vv4k>7Sk?ot7hLN)8-OEqBrIGx%roIxE@`-L;8gV}?=CGhF9HSp3pEm>FEExTH3-B$1(Vts@w+Kdm-U@uH zVD#r!;H`qupSJ!+x;}3KHyXh`nX2=9Q}Da@a=-p zpFXfJ82uRojtNG8#)0F4(Vw-zwSv)~b-;Ck(Vuq!-vMlM%H}mWW%HVxvUyET*}NvF zY+jR7Hm}Jko7d!&@Y?27rgX45l_?#BQ)VyJR{vhjotnU>k01E-iTlQ0W zPoM36nol2asvdn@FMW>w+y=Z&F#2;l@OHuI&mF)!1fxH90`C-z{=5_TPQmEUUBJ5p zqd)Hgz6;pql+9~$%H}mWW%HVxvUyET*}NvFY+jR7Hm}Jk;kC`FOzB{ADpNWLr=HYY z*&6utsR?}g_<>KKxc_5)zRov3ed>MV(`UP%=Ff$tTJ{@eq+M=<*HKH&R+ZBE&|CZ}v(lT$XY$tj!Hd(p>xrO(ly_XFQA82#A*+#neJ`2g?(g3+J*fcFVTe|{eL^McWz4+1|Z82$ML z;4c8%oU(aMPT9OBr)*x6Q#P;3DVx{il+9~$%H}mWCA_vdl_?!;PGw4mPr6T^CE%2L zJlKy1A1holP~Q*s;{kKcK>Z-tj|a>(1Jzf&I`t;$NfqL3t z`t;$Nf%<9tx&?jN%tct+qb3f0`fweqE!a~?pFUj2Y76!h(x(sCvD$(?h4kseb*#2v zPa%E!a2=~H*i%TKK3vCY3-%P!rw`Y$+OVfk)-5urgREO*QU_VbqEDYnw2gY!^aXwT zaL%uuGnhVoIOkU{7)+l&ob#)n8%&=*ob#(+8cd%)ob#){GnhVoIOkVK45m*X&iU2L z?R=mw8*;KO;3R$ekdtizC+X9NoNNm?NuNICWLv;V`t%_u+X7C~rw=*V7I2b2eaOkS zfRps;Lr%7Vlfs8g>L7f`qz)g|I@TEM-(7>YRj-=<9t+I*zIxr@aloAKtG_pRJTT|` z>Nf_L0&~8v{=wi0z?|=^e=>L?Fz5T~pAEhenDc%0ukHJV$6)`i&FQhgHmAn{+ngQ` zY;(F4*yi*EV4Kqufo)D-2`rqJ{lb~lLG}x0QU|jK{X?!}1$)rRDb91%?@gbRQ=IRs zKNw6-alWtq!(eiX^L_PS29r~q@2medn4IE#U;RIW$tlkF)td&BQ=IRs|4rvq8n4mk zX}ktb<27&^uYuEe4V=bn;51$X3$JZXWl9H|Q<>61I28}(hqYRMz;!H(Q=I3jw@sgu zQ=IRscMK+{INw*t4JM~J-&aY4$tlkFRmgl_gPh`gUxf`Or#RnNSq76+obRg+_c`Kw zjN>zn*T8AK22SHOa2l_H(|8S>#%tg-UIPoSZBAuM2b)uw(m^=&q1Lf1PH~>A&M|#X zPI11k&NG;t;(T9SU@$qw`M$c)U~-D{eRYw+SBY*DbDxRB?gmIobRhk(>ayK zYxH>TNJN#reMKYcM&*`M%0Cn4IE#U-e7p zR2r|*=V`nKPUAIj8n1!VcnzG!Yv43q0}HQhPGw35n^T$6K{)lH*0C&3?PVViFnvx= zaUDxtZZJ8;bu4v-!Q>RzvD9FL$tkX5sUZfFQ(VVVLk%XUxQ?ZU8B9)b9ZL;Q=TsW6 z(dTKr22SHOa2l_H(|8S>#%tg-UIPoSZBAuM2b)uw(n0q3iu^6!2W}i$*UR3I+{$-^ zM?T!k>7w81KFIg__zo5^-rYWwYr+pZA;%xjM3f+%IvOav08X%B^ym@V+U_X+$|m?jb%!IrjH{8J}e}p)CFF-{5OnQ{}qkrL7 z7k$_WpPMi?HJ?wDzxcKhP9KD>n$xE!r!**sx}uJaEJyf!5ann-pP?M#^I?=Dmq1pzi!W?diTttNWb#2H)K#_oY53e}mfrcb33s#}fGOxB>D8$hQOg#c~|* zv>f5(2ww`h6uvzwAuEBGc!zz)mGT&OInFJ|x$C{}`73d}62~`qPv||AqC0tcBhKB3 zyvn?0e+`b;AYQrmq>h)$5AoLF+&Y}A@P4TGMDbkXBJg>PvBw*5egn=g^?vL#Oxt90 z?(jyO+lVrjc|Y;B9bfctuUr%7iE5myM!HJx8UJP+-;Cob?`J8l?_0FA*F4%{#HTGb zzE4^c^AUGGeD^S1+c0zfpz(pynlS!PjwY^=F$rBea6cgOGVR=&;QmAz1N$&WI0nj1 zeuqpSv?jPe(X>0t#QsF#dn3lJ=KE>%ass__PotOce;>xO=6_JGt(&cb@V^myYW|<5 z+%vU@jE6>)tH;9`%9ZiZMB4@4Z$Z0nK_4ymp7*!ncq{sKh4;7qHv_J+J`Q+ZgL5^A zx6=EC_6=Y-do;m)g}feU{;isC^T27tWArWd8Oq*3 z#!2OcIEK$h6WniT_}rS{_@Hf0<(1-dYl7p$aJMy~$A_IC;u-E9O>lh3UO>ay)&$21 z-|M&eVb}9?^|I@6I(^M}d$)eJKDHjb9)2A3NL}kzfiLEp^Nl{Z;DZaRycgl0Lhm2m z&u`9u5#u?Ib8(!z(fgIQ4To1f+>76#$GHP_I9CULt@d8>>v3FWGhZI#%BgRpDbCVW7B#1=Hc*3lNQ{?L8Dt&?50*n)%5kG9~0)?aMEL6oO$z!}OD z8*rHR9yVY*`eHl!Vy*X@zZ1thalFoZBV_|v588lTIJXP^x8D1;zZ=K95pRR{TOChd zTbLi>-Hmg1p>gyanNIK=P&DTzDIZu!hN78`9+B$T>K30}utTLr9$tdIk4wJqs$0E3YX1XWm@oH>g6rib&J))o&J8WD+UosJ$}bA{ zotHoDMsD-|r2V2$pObwk&HaWOA?LZbm*?R_DWZKSAzzH~rn$(M`%cMy>V>ca)C=<9 zJe=d+Q;r4h^JTr@Ln)$tD496ghtk6DxDk#C+b_y|q~(59jt}}pVV#;nb0XS@QcyPc zg^GSBmu>6Cy}r7wp}Y2r5|nN0XMaRJehQ?�N^{3vR^s+Wd0Ft9%Y0e)LXg zzbA$db0Xa1TY+=Sf9|Td=irT|0ne|5|AAW1#W!r*+lYI6mm=QEzjIZcr()#Ejo5#F zhnF4WzTQ-R*w?!Z@os$CRrOv+d>40n%K~8TFQ)CH4U%@J{h^Nma+tP^_GcmDlDD*9 zi*QVSlHcsta`+k`pBE#X^(SxHPD`;LoqW9x;cN@G3+;=|7h4Cr%#W%(+L6Cxd9?L| zz`;}bP%7rQ2>bvzQEe+4>7QZ^_L6V&&W+`ou>GY*HdUhC*1Ybj?cVt@`UT*aO!-hU zakLMmb>PJg?=vy+w^oI8>we>^onEIH{jE`-lYJ;vVn08*Nn645@S$XE1@eVG*hf1> zUXr8K3ts*ovTR z@<8-Exolf6+6vv)&|TY#plotd^wYLtw(+-S>qJ|@vc(sG^8w4ItE^LPFM`;7b?44fcnD|jzj`rK|Pp;bKb&1hWfb^m5M`TWNhZHu(2_))qUx_!>;8C!*T zH~*Kb?(uT9Z83b@6A?d3TcFRqUZ2>Fh_~hcxoVFWjnR*iR9OJm4jg)af{;UIk$amUD;W_i!fPC17w1?7mv^O^A?DGHIWz+U#s%+Y! zG2pkpXDKoFCA>ClMPJ_M<;QYO*r8G*53fd>ZT(+Y?e+S{Lg=$eNGta>DR$Z9vM865_*7Xg^9D(O2Xs%R|@!mIrx|*COF5eU(txll>@F zqd&<}+9;lfA0=(0s1NcuMEfYbtw!B${%^xm+9c`^KT18cAEghd5B-y9TMe70{U`7w4q{$w%|P5h}`FxtOGCE zzV+Z3xwHNExYvQ)`5(A%huj4&(FW~?4PyMegZTFZ@%JD;Z7s(PZH&!7TTffBzmz_- zJ0B-~Xp3fo-};_twt4=r0c}@*URd1(x&6YhauBYcL74ODew5I!v`KSm1A_afToeBQ zdB=VgKS~>s-_DD}+K&=#K+2C2;`3PquXVIryq0mClp>tZ3(C#;bU#YyW7;O_Zu?PU zpVHQdA0?iLA0^}C&E%*3DDgVTF-F@;4wFyfM~QWTA0=a-PNN&!L_fqkFIq6h7i?g!|q{V1KL+%vTa?G?+FdndNtdzDLjrQ0yDSDVltyDkkY z@uO4?yt{iC-y2!`^4-1IPC@>G-MSg!_w)^`TOjxJ3(~>XYysYTd06rONcxEO>sEv} z3=XTUko$&))oqXu4iBgNC>dUK)_&mLj~^vzv-jgiDV6t0ew28dc@|1Q= z@~0i6y<^*GuD}-2mT|1GeK=OwKIAytgYC%n_$d7-oy^tK>3y=Ee<>Yl+dl3((%#kJ zS@ZvWC+Ppa6I2Zze4M|9WbPF{-QRnX@p$q#g8q_zlp=T+EP_3a^f^L5NKoGE-_8>ZA4d5&pD!3bjQRoh6AT|l{ek-nh7Y3wzyk!s zhtWXbfr8<~=yKr8fm3{d4w{qFU(i8wQZRJToD>WlG$#c^2hB;r&_Q!jFm%wI6bv0S zCxKIZ$e<1>K4egbkLsK9S#O(f$`6wMe#d-M{tCgz%{S!>1SicmPap+)k`Z#p3Iei>D z*qlBN9fZ@VJ=z)6A+<+4gF2Xf|8E6;l#T^{l-dG6N+$w8N=|I9en;QMGa&jWQCKIY z%*NU;{=|QRjTE0#V-e+~m4CK&x$0$d^({W%2;dQd(VruMM+!!NjshMf z82vdKcr>uhsX+)6UXxRJdGd(xnw%;C7G9H6gMo$D2vhwWZ=ny(VtU*rwB%WP6eJS82vd7c$#4J=XBudg3+HdfM*Cse|o?ku+1r(*W{GV zYjVowH92MTnw+wEO-|XoCZ}v(lT*TLn^T$6!RAz^bP!HG8Q3Ya|Ew+WqjVzhqvXVl zA0;zDpe_h&8(`wl zj}q6h+AcSkew4V5)pmu!^rOUethT`h(~lC@vD$_hOg~Co$7&mDF#RZT9jk4a!StiV zb*#4G?S6AIse`OrWKsuN$D$vlDB4C%H*G~fN}Tg6&tUpd;+$VyWib6Han7%gCnlWhSf z=|_p2YzsI^KT70eTfj;BQ6eYX0#4G85;@rxaFTwM$jLTvQr59Dse|w#lRA7<>sU3| zGd&1xs}`C5z8#qJeRZ9|J}~F|>Ux7?z?|=^GK1s5obRg&gKL2~-&e~Ft^?+LUsV}= z2QcURYDK%xn;PtywmE$}u+3>7*yeN$*yeN`*yeOCu+8Z@V4Kr-01KyOk9H#%tg-UIVA`8d!L3b1G9h*qq9g z4#KH;Fh8u-vMulje!@5Yz@1pV4tHb5AGnGcf8g0M?GN1I6z93>R@3L?6zBWuHiOA2 z&iB>r29r~q@2i->R@`)Wrzr_y+hK2PH{a2l_H z(|8S>#%tg-UIVA`8d!L3b1G9h*qq9g4#KGqwT@+Rit}7`r|ENYit~MSm%-!|=lklj z29r~q@2k%lOippWukJONoZ@_6-Dfa4#reLv-(Yfz^L_O|I;YZjjXqD~HEd(UZc;`cnzG!Yv43q z1E=vCIE~l9X}ks&UfZ0?lnyqhGNprX>O-w#S)7{7K7Q2nIXN|t`0ECfQ(VVV-!Pb* z;yRZ4D}%`?u4Ach8B9)b9ZP-NU~-Dzz+O?>Z;-*DU< zlqvh{_dRO%*_R?7-dR3Sf8Ku#DmQ5kQX=dS^^x?l-@ftDV88uo@^^y#a)Z2>uk5wo z_h{gE;}rRe-xt|u-w6G*&yCZRQyP>*T~W`2EJya(H=-Q9$Nm)M*nVYXk9`x$(tebp z+#mONGyN!SMn6>Bew2{ssr)Ea|H1fCO2x-}xtasW^HhG67@zMNO8k@`CF+cQ^e6ji zV*GdcQ9|Bl^rM9Q&dQGx^5(nN@A9Mc?7KOA_*rmz%PIUQAx!UKFz3?!C_NkS*~B@s zA0?EV;xy8ruhab~f%Dpr63&6M>3)<<{Kj%~?(}|?FgCRh2`m4UAEhAQ+#s(sKS~&z z+K@)y5D!s&z1Rdf0j<&*~HP*>D3-H#H=(R@BbIl|||C`ZPV&3Dc5X^xZfrG$LX z=u3$@zfWIE5%_k9!0$s($exfXUrMxL;C3#;a}nMLvJZSfL?NTV5pP7hKM9n_xcNAj zk8?e|QGS0M_s4NhZ%n&C3Fe9S%Ln4zK;+fS8|M$g@gT&@^-9})DKS69E5Nw|oa^mP zZ1<(~D$eaTHn|Yz3vs@WH_2z1w#(++;Ub(XLK%I%DaM!5tMH{{;yh7|bHzv(^``kH zI4;3)o;M@qFJ<4Wup9KHg!r__T@a@Wd?`Grs2&ermH|G!LAUp@*-5|TYVEIyl zt{pHgnU`tj)bQ;&hulzXQ3NZELltH;C{%9SzEMEeE1J_7AJ0)3S4 z&Gbj%coh1zpEoPzM`a)F6a1r$!MQPr*Wa71{gfEa9!*^86}0bjtqIyFmV>Z|SPo>P;caVz_9^8{ z2|9{RFkC*GppBAyye8NTZ4*o$`wgdC6SP&PPOS;rD!I2i2)m|j!UxnxY(W!jo%W>^ zlxOQ?mo2v7AoSC|ls;g2VgnAMJZ%HcP@dR;!?gFX0aMTyQ_vSzc#HjMIG%>%0&hvm z2CyEq0W)xJ2KsNXcY{9@$1@SH&?|5ErNsOYZx+tY!nq;dQhzp%XXCiYt8DkB#QZR} z=iuBNXT*NE(R<>XJm>=TJ$GQ19H_W@yUx?#{I4<$lv|sy}A9%VL=N2Qs z;odrJqYM{Y6SQr#7n;Y+Gw`Kk_;@rydq-O!zLX3%TNAW(rfpgiCdePZOq3(kC5}_mPM~O!iU&T zTl&^T>n{sA|AS?&X}#mwEgf*qnYzFE#T|-a$F%Ge$Xw^x!sMcO5+@ezT(oUb?V`K- zIQyHkoQIpE&bH=0S=lYe!HaUI{N=Yo{GKypIOH_;&TFW19>(7D7n^hPzEhjyywaSr z^ZISq@4UXg&yFSaIT!4&T@qbVyTtiTbI5t)yGwR1*^Xa*S#n`*9Az!cQ7sE|7VcY^ zR4tfyILUbZ1`uFq9FW|59zd_)u*X=2LS~{;+s- zSKgewxZF#g?2EJiMUOP}zbM)eEiaGfRaDgV?es`P-%jng&xMaP^trGd_ioH@$p6eE z__c@CIzMiXJ5S^HljfxJ(`Kk1?_JyPu71gW4Nf8UWz<5)WVFHQjCo$aXf2Lw;Yb>D zzNeeZk)|S9aaS^#3{6c=y$fkiR7{<^JMKJ#{GP?{XZSsb-}CsrfZyNZ_w(lI$>~Vr zI5UznkS3V~-i>0N8Sxpr`y}%l*nYhm^BVH=^5Ylef3-HhG5^l|c>XTNXF4zG>2&$` zMC+q7X6%OC7WKT{3=huh{AMO+>Uc97XF{h(nm+>_@{)Nv&aC7teQs9XtlfJ%Khr!d zISt|CPe3BA6EZkSNt`Wf6^m-mZMjW>Z_cikr)I^QlgYg;Wb&=BEFQ04h0s3_+#WC)=_fc|gSZ;75CGdXQB`1hO z$vC+|97@Ka%_t4xP@XCutrNx__i>BAGuywJ?Z?daUv@e_S@bb+D!*8#^MiFdzgH*w zx#C`G@jYhy8nbxEw%;srQ~YDuezC|$@wsIC zT4FxpSIPFJ#C*hOlI<^v`JBo>67xBgKO~M%`W+GfR<=JYwzK%HvVBI8o8mL;FX_LC z-29~IN}hgFbQR8$JK~SY_P<2#oXXEq4Z=_5=Z8K}PUY9lD$ z$V>6>WczbsKb*=>(#Ods4Y~bE)}MU;B?9vI?_Xo&L{v`(-%tZ;+EFyEb?py8z!%&}BZlw3p5c=bGSaj$wQj$}m3Tl;=Kt4lD8A#`@LCY55%TxzBg%FO^vkY!DrE)cuu9~W{~!CKt4kQK0v+s>`=ES!t@*s67_vZo_|pS z@0a#$dNyd!AU?}reCCPtjg&fm(4N#ow`UN)G>Bgw#HVC@Y0oCKKlM6@XMxh5P3S|m z6QA2jdmaqJ_>4{3(~c+YX@^OB+F@xNd=+hm=NOuUkeY)AALjEHV8cPI&+0ZqnC2iP z>X3SNRBrGgX)`{1(i{xpvkb;(o=AU?61bg%&_lOb5Wh5tUmnD#WPE8eK6BC>L|;mq z@%f477S2hVH3ngP1|w}|$CEa*!=%mZFuTnT{@#>*80+n+Z}j{==r5eh##qw4N1b%P zqU}USuJ7u8#W~TDYsn1b8nNhT#}ggxFwxNt;~2?%zw0M!($_*{Jn@~8Q?9whv1aul z)=su6t~q`J*Ls}5o9y?$;_#YR z8pNSIRX%dR#{09#ST{1l+u%pwe=aiGRU^HPeh(b?9Pg@8UNxTa4R}6xvKtxg-R$>7 zco&P}GBVOrk_xmDz-~v}o@^(BiA;69J_?~ZWRne~^oo{y`3nn6?`}U2;RR)`n&#c3`BDhJO!xNq`3Nsu=BgRqUd_QG~Oa-L&HIL~|rA|G;|<48Eq zdjwxvi=2M7#*cKd9(iXDT^GVQ^JpH7!t8kV!aTK_7t@l;` zas;`h@#hEe z7Y6YcBmQ9IKXHeveJ_(wMc@<13T>{874}0h_{6cou_$Au7~v&1nX$sLC}X7r;iHf@ z#|p=yjFk}xXMNc|ENddlV&0RGH}jgZ3bq(>+FD$DAZH+Na(^bc&-k-~__Krfa}b|n zn(ZuWJ)czl$@fok`^y+8M1M@W3)f@FDW7wV|KEUj*#SSv0rBNrgm}~LbJbAD84m>U z!OvpgnP0$l8gf>`RV9$Kzk=&CxIMl5N1YWE-%p*amDXw!uf~6Zq3wH;$`MYu)TQqyD}zzOAe0*^qjkZSdh-gUvj9 zWH8T0n4V`tVlI&SX0LpoH#N^58O*bT_)ZX?dFpw#!R_qSkHM8CUaOjp3GtGFqy;JVefmbXFAp`@NHMU)&r^M zW(GI$yR*P%t!H}Bo(Mzz_MQCASbjT}b2MpBers2+TLkf02IpeT6X_c%b$swv&*Pzo zUbhJ1mj?06gZPw;FY6Zk_O4zNKtHF}1ki^%FPxM1MLG4wLq@ z!_qjo2yKS%e`*dwY7QEFnBOf0HXK|Ov>C!s=cbdtFT%Y*ooj4y4*Z?)>R9`vQO8NWNqw&3?srOo(_Q-<-|rP5}0 zJZUpKOxny2v)hc{jAYsTPGxGXXI0P-IG0-ML7jBJqU}USeiu>q7tV=}{01Y#4kJu- zwBw16c9`gBhkcyadiX9G-vx{E9k5I3R~Hg5n)PwMs^1sO$NOTP@UGeE-ZRtTg~5Ag zrGcCs$nrpLaE|GF8a(6av!h>qzaIS>`uD3+oN6>`4loOi@chWJ{SQ}3uEml(cn-O;1HQ@slox8DW(E!N{kjrN|zJ9VX~Q*_kj zUJ}R7IVhzx9Y2Y8u=1kO_>*xHE_&2CBZi!@oIj$R5hJ}?=P=$OdmO*-;eEC5pZ0yV zcj<^UF4BaPVZ7&+42@2X#`|B+iSX#rcsJ|`kKJGo=S7Us>k2XFzUZ>OVtBo6H-d7txemBE|^T_WWr!-lrs#I-g(LMbX&-~d*_|? zyL;**es@nf_PcxaskS)H`+J?90T1FW4)~|=%jsR1?P4!l4)&rs3K-8^$<3T_u77vw z_Lfe)bF+`(d?%dWbvN*vy|aI|3lit~e(A2CZErb$?$MY!`rjA5mDSPjh;!La*CPpV z+UdBxC2_%LI(Be69_jFc<8W0@Hw*j`*6;0aXz7NcaW`r7k2&`^50$#JO2Xt@2`iQe}(+d-*Epu*Gjq`c>wjEJYjpw1ILr+BvGFv z>Ycs6bL^Zj{~(T1o)r@UF}M7SWP_k*GCov!{Ux+C?5~!{F?FytQ%y|Hf=%Mv;}R` zhPHVHb$I35VdquI*S;HeUW0t&`(fvG$X~aFoi`xaHh&Mvw)r(A+vYcrY?}uC{*tNt zgAHe{`(qD&dg}fM-0w)g|JNVRT=(REeR}GC6xV;psd@d2xUS_={?2jUJZi%3f5n8| z{{~(%fAig8$z$&h8;AVfzR&d&7w&8+9LMv;uRMhJz(dZjzsq}}A%*XL_dJ{T9a$l# z10}wt(2)|~P&kJY-%fBT@y&!VCBBt#F6Bib=RC@bk$GSX`Hs8&?8+$}!fpHxpOwim zpF@Z6>@C)8o^#>07dj^X@Aoll1H6E)|F)cKQ!F>d9aSTPL1hRh#(g zMK^u(-}(>e^UGregEy_LsI1z!Y4Gy3D=W&DE+0}-wxMcc_4?IS>n0MctSl)m8&Wi4 zOVO~Bvb7bPRu^n?2H&!NjrOFzpQNan!#&V6&Dtoyq4cm zS%w^(nXA^VuG%;%cfwVZa%aw*lv`9(P*_ly+ka(s^@dHO1`l4bs(R(-r3K5@uN^$+ z>MPd{UbE)rwO4G|xcnLzh=BFD$McGGgS&<&`6cR4y$k9x=SA zxUzCt$*`j0Wy=efUOs!m+$*O|o;0c$0YxQ4hKw9qG;H|DqG83uOO}tU7(R03vSn2z zl_RQ_RaRCkEnZ$VKsU+h|LJ~lJ37uV(|8h-sB^f31IL7EeWpb^(rtwUmzQ(dtXawWpek&#Z>;#tEgR#jJR zEG~S%T|W7&5*4hjbjl_znl<67=~qfad^pK=N2|lO27hu8$ZB+z{U_r`!kCs_Bgtf( zio_NU{l{_2vLrhhzZW9xPs!wZNiXA6WK^ea&tK99!SF-M@!Eb~#sMkPJ*;X zOv^gxKN(k-n1~yKV6l_Zew~8wagYp?^c#Zo8-n!roMb0YDjq&$jQ&A(DIw`krs>J) z7X|4HQz^~Cw}bMRofIzVMXowc`mS9~46L1{{Yg5J8*x_p&W?CENM9ZVlBbL(?I--* niZE%rAgUHQSV!R>Cb(&TQmzEn|c+XB&joUm^RAl zfd5}%ea*^6KFeyomNMa2rez1{@;@f)d#Z#M|BGHK30mT2>OYo)36^!4f|k|2g&g7E zP)nm6ZM7!&`76@pdcT@fe!exuXM%TtJki4SyWcscHgR1r5jpS{gSGB z-P$z|wOwqXbx?v4&#bk*(c=)V7DH4nL9&E=X$qeRs)c~o`VLn{=k zbmFFm)|dXts$oT`QsdfPYPC@+{JBa^{#>P|f38xqE~eDyonUk1CT&dJR|nm(VcmV} zS8Oc39lpBHnNGX)8S$)Ua4nJ;{<@{2HjKg;EGji zN;a){VBLnh{i>1krUNgsw&b4D_r|SQHhV+~oX&>fqgLH>XUU4&Rt+7wVtwg`P3!J1 zT|0^3&Xq%pMyB2akU3wRfy~aA?tpA=<;=U%FxA>UC?M%+kvRmVtBr z$MvaGHdYJ0-uhbOAvMeVMB*$U+b|lKH#T6Ezia^|Cn~p`+t#s zx4uYW3>s`|-+$_9#9HDjC;aDzbCo^ybv%ApmnRY(Evtjm?+*SKqDO%X6=>{sYDh-|5RMw|@0#)H+iev#gQ#S##3% z?3_z3v&)=Zm7c{k;B3g8oEK1U^k}DKU_S}gDcMWO&tpd?!;VfDZO1reN6%!Qx_{4P zPbDqumq)*Pc}3aRzxJ)N%dKZ?zh*sG+cg=AM$lKGouTblmVNao`!Q4&isEm3e(2Jp zS-_T6jeh@I^z-&w>P)nD9F5wiBA-|#?-rw>lqI&vTu0<3xgtAs5P9ikBJInwtgoO> zmdXB_q+ND}$ggzDvP^qIWtGuT{`M&Q-NHSC(cGg+tG;&Z!I}w4>rm|#MGNjvS})gD z{PcsfN$VBx;quFq)}PioIaXHEs;;%HWO9x@-n+sc=LK=ktla3UocGqW%G$SfI^@=A zC9m*h(3eAe8TREIUl#jvy)UD_eB76DUshYE*+*<+sEf3%peyez?dTqOaewzfXn&}< zI9gCrvNdUax0ZbAUHIbu-i6KSfn?$SC`#ocJ>N(V#tZisqK-nNjxy_5ZPa=lf4`_r zTE9fwt!S{U&z?TXKKrd9wiU#9hLWNER&R`TAB=7?RA!;yLnu>>G9}3pH6N!Xr|m)6 zvnA7}?TuP>sP9+!`wRTNfxqMUdlP?uiN9ah&PdKsb9QEOCdwp}z|VL@TEYvFgx3gd;37DfwqQ=fC)O}eUF;p3t5(9D^8A-9KIcQ4cZd{y79~U+a266}!&!opl&mY49GWw3+!ehR zj`x!GxJWV?Nk;7iMd~r@nRDFh$hiRSbc%vIN37FI9{1%3zWf(o zHu>`MEYS=2a$weJU1z}R@wU|~%L-Uuc{`BI;&52y)|jF^;JkdKj~z9$J!2TK3f>OP z;C^_=(ayX^T~RcaZP6GDru8$liMX?$ukEL+er8nmm2W zmL7R8?(dNYPUhxD3of}N#nrjso3qutaHjANdm5&-Nsvr4^iW$n7&&D&Cik09;O@mGt#qxehU?=}4W41Yhby*_!p z;{W*Mc*TEUHCM(*$M2nxoS^a`Ra!V1RyyH$Sk0e2)T8Imq~s)(H>qF}=FgGZamjIN z{y?hvBRH8Q*vdVZpBbV2&gb_P7n<)qKmUCHTF;!n^m^sgc|8}7^EigzxqjRgJ+Ht1 zTF#a4WDksRj!<&GQ#OzDd$rxHlj!UBF)qJR{yAy=uoiAE84nihj|U6)cPsc&S+~N& zW!-k&vi+7_x0L7aTwdO-Yh~H;(DJh7*5B1;Ss(ml`L5+V@K?Sfxuh%#t}W^299`0F zNyU<+bCi5eE?H7R8H|Vaf77x@mRnBIF2})2SnzIEj*YCsceB80@+!2wpd6n28=>um zU@f8-99Z@ukHJxax1s!+n8^*?~Af= zosBZzzGRHc`#g<#_HE?Rm$2Vb7+mK)h@WOYu!Fg6zlO9o#zbzgDf_4THg(UhXTjg) z7H!MT9m40Y1OB~(sA3!Xfp*#1&~cn02XaQa2T=b3){i;WO<#ACJ~)uwH_|P;xANcn zbRO*Ji0f{?O|+4D`+Us2j{bG7Z|}Z0MnyWlQRLC)eQ)%|b#KKR3+<=xx*U2-$|}o3 zyRcR*nzui+c>ey-u4Uz+T?OUCz=z=lhs&ai-YJVNezz=o^KZ(cML#TC0KN>1hISXs zD~KjTA?3$%3&un*Nsish0SP@mzI-8chEr#LvrdPCsnJW5(|nzo<@A+(;it8K9yM;u zmIY&ex^rxF?AGzg+A>sSVgBzf<=Q?O{u6$rrB83O%&zn@!-|HL&093DY+iKU?)J1H z%X;Hzz7;>(UYpLV_XX{6U`KPin|>3()+cBwd3jjm6~!VuqtDQvJJd6+g)}5s)P;1Y z3)z9Xke6%St{z+VIY{vLmg`1=?9ox5J({_etzQkDdC;f*9IyHF{JzEY z>G^&-`uVfdembt_eJ1Jg{CNSbA4lJ6{W$8^`f=31$k(reor@PW+j(}e)Wh`T+2-^` zXfN%lMm<`ec4~d5Ykj73o$Axxb?SKfnsdRmjVBYvx^}T^l#R6^h&77xyz5U3o6f2C z1?^~FpJe^9tX~`*h(1nmj!yRdzsSn4$f{zIhd7^yL$^lHXGyP)i#(hVS<@i$2%o#PGFHrwW;H&iI4`n`IpnlG0>epxeP|auBR+sgbn$Ns% zJ?1v&r)nc~IG=favSYJ;eaB|~ldx0k*F%T%ne}kII6qmI*PNd!-Jj1Y-Jj1)Pr%=4 z{RH|}>nBh@=QH&asGswh`gO3A^ONayu#@vsr*nSlbk1j{*P|Y-Pdl|f)3rX+$xRDo z#-WT_Z&vy1jmTqRk#)r))p|1-y7XUay^-|#xX2R;ktZ8Os`Z9_o?35^uGSloN5Uc# z#Uj;uLt9eojilGbMIKLxtZxvh)*I?q<$omnl=PaQ$X4}}pR_-o-ziV6H==jm`pHk) zUj+^*eqNw{@>BZqBjM)->L)*`U!C=l;wNpZ%IcKI^@jJY23N^9)kf%$pS-T=*sNdO zv01+cc53}<=#ZbRhvP-Qu`I92HFX-WO^L)SL?^ow^~1r`pHk~$5B7|N&PC= zNxm_?3U-ojI-Pve>EtKVt5J{Er=41#=~|!ZnxC^!hW?ZOaRIJ5$ExtW!2Y7|dkN11 z)S+Lqu+HnHN({Df@KL#q@9|M){kAZ3zII8s%XbZoZ;JGhMISJTqK4^%py*MW=0@ znfwd2C&hX2?h)LRPUl+xdaFHYn`ItPFEhMoc-j2L^ULN(=kIRMo>@OHFI(;$z3Ec! zon^gUv8=Kz$3I(1+8wyZPfh`E&hVTB=PgP5lL*sxPidX@>)ox>tY6coPCSv5#q->( zw*!|xiM9gc{2~uHPve{{t0!#>)*EKk~11`|G04IFFg&OzC zvibn`(YRlh)fc$0#sjjfe!%@SzBbG158Pkl!CBS--~qrXKEMXWNj+Y$L2*)J*q}J6 zF>Fwr)EG7>PHGGr6el%?4T_T*!v@7k;1nM+XhVt*8MNW+@DB<56a40M+M9?orlj5E zO;Yl#cY~5v1V?sAw>TFyLXp6SaSV4JN10zy_1kXJCWL z=`*mw8lY%n=}1~!~ATPtq9Uc?0kb zz$T|mUXxQMugNKs*W{GRYjVouH92MSnw&CuO-^ZEo1Ds&4JM~DWrOAv9^=8G)7}$G zHhE7fdDeSMNh`8XNjrjlKlGf)ekF4w&p?`-nv5}?tj8SVIR$u%#u(43z*9BGcuoVJ zrZL8II`DLjF`hGkXK0M^oC!QrV~nQ@>;jvdGI>o-nY<>aOkR^yCa=jUlh@>w$!l`T zG4)7d}F`jdQ=W2}coCiElV~po~;Q7EN zr%YaxQzoy;DU;Xal*wyy%H%aUW%8PwGI>o-XS?ax>v&cUSy(!rLoS|-0Hgx5TsvKu7N*DRN%3J5x?P{*;jXGXSUB7;# zwXQz5h+bx^GXrO|oMV*%^US~*D|jm~&kUUF1#bi9nSnDwa5*r~44jFAw*&Lcz_~&2 z4q%=cI8y}g1m>B6Gp+gDB6>Poontli6P+?(o?|r)5WE$b=U7eG3f=~MHSu7<<-k10 zY8ondJ220&nuZD90nBr(rV)a70`nZJ>AELV=N6f?L7!V>(gw|k$1y%3^o=t^`sxW_ z?)e>8@RPvY^E)>RehQd-e&;5^`+&LUcjgLy8kl>2XTIS5z})jY3k5#|%ss!exS0=+ zV|)TgYw|hy1TZ<-#U~;m_=j2ns=D{_wIES2$o1ep7M=VrkkF!%e;ErKJ!-0wRp1V@3n z-*-v`mjQFX@7ykUD=_!_PO0E+z})XUcRYz#KGY%n>MDH}AWqUzcFbDm=voZ>#$c}T{boZ^1p zc~~$x#r?kXh+uMx`+X-On4IE%-zgJJPI15QY!ggQalh|u7feoZzwhi!=TsW6G3IH! z22SHOa2l_H(|8S>#%tg-UIS}ho1Ds&4JM~DWrOC_7dpo>IK_Rg^QeqDImP|HvqvyF z#r?kX4Z-9T_xsK_1(Q?U?>kQjCa1XHcb*bVPI15QJS~`<;(p(GCY@7hyvCTP@ftXd z*T8AK22SHOa2l_H(|8T6d2MnkQ#P2K%9IV7QwiTseXesXgHzn+I^U8pC#SgIcb*eW zPI15Qd|NO%#r?h$6HHEVzwbORn4IE%-}w{4v$(Oiu9}%lWZja*F3z&QH=gmBwp~c^a>Q(|8S> z#%tg-UIVA`8aR#Dz?#=4r!r-O$*D})px>D?$zl0EbKh5OA6z1~Uw5zAKA}u(pZ0{~KkC*>#Zl=3s4mhyAIC*^xgc}v}QKG*B0cDay+ODhgzJ2Fk~aj`LvY>Oo$M9ix(NE$xKq91 zxE_x5kUQNQiR+QLE^udhqi{V6*M;sZZw#)-K);VW+Z%`LaY*m$&hglfoq;>Q*%<2r z*#)vIWLL=Uklo)r6_fTg#yY!wJ!xZO>?*gvC+$Qzcc3S2Jr%p!9pp)SQI9*sZ*OC) zn_J|!7xiLXrM;(OJ=~FgdmCfyA8Bu6jQu0+ZH%#hq`jwN>>p`wV~qX7vHddJPriMb z?bmZ)Jh(C*oa*P!^TN0e)E&->fYwf!Sx(m7r86FdAObj{bBAZZvn0s zAbq%dr?&{#i*P-{y~|sI>m|4z>8|mX;d&YLuXEQb?#p~>jB$R+d})kve#v|}732Jp z`O+BU{E~Jy#yG#EosBWhFKOqg80VL?voXf`CGBjCv0tQ}jWPC%wDVMq{UYsbjIm#& zosBW}i?p*b#(t4@o{F(w$oVg$-Q@b0(Qf_SM4R8`XW(0YHTU4Z)4!$nzg67jf1fC_ zGAy#HSmdFo$m+Pr!wHc!4I<_HbX?2m$K z>mX;WFIWfT3H3lsi2sh5QYuze0*_)@@BL)i&CAM`E^&@N-~V|d)`9+(ROA)-Zf`lh z;Zfi088LmbFs>_+&TqUdd>>^Ul<)cQi|p~h^>$#CyWR^*+Oe?6x`1`hyMeK`GuqwY z@q1E!|HSY0EPPW{EIP;Ypwn}L?Tm3Zc~MELkBdCvpzh!lJ22M0-+Kw=X&1lW<9ApI z(K*=@<%2ia&N%l$uR+p2=!LZ2(`@H@ceBSb_+~s$#Wz4c_)~Kl^>)724$zMWCGAL9 zWTG?5b{=3m^y^vhYiu*VAG_)UC5bn3gHeAi*N!*!uS(oS5BwC>m04%Y{MODkirA&=v65bZT>wxmo=P~;KQ zW(WLaS$t=ncM#v4g+(V(Eb>@XWS!ZD9F$=@@jZIpL3|$=7oD`WBqXiALF5Uu4Tn&k z?Zh|idC5b$xSn^&=Q6&*&*OJ$7TTPT{KA`TC+tQ& z_1)S@`38;iqi>EKnCNcxUiLYR@7?n(w6izL^qp@zliYHTT;{2TF*N2z^c?)3|x!(KJY&Ac7K#y~d z^KicAFXtU>q)nkS*#XW+J}a1GaN6e%;tk{-gpH>CroEr}yhbd7yo0dOwBNLMkzXI; z5#$}jH;bnIi~TwfUm@=xzF|C{{b$WOutzlQr~fx?)VwuqHErWJk!o(BeM#73+L~sY z&tY;S*_Lf)eJyNL^B(2tGq`tM18%F>04@CZCN)=lKYm=+DnBkdhYC=B&BnUZeyGNT-vbjPW`Od|mXv9CJLsp`4#XKye!5u7x4Rwm(`*M~6XYF)&q}nj z9WSGt>VMcx|9r7_(?7F3Vjn2~e6e=ZPqW>Kmynl)k8ZEshft2~MQjD}?`U%$j9Jkg zwlmAENb~RH_;7IiM)z6er`0&YUgh7>=DsL5{BGNs?H=$(L>W)y^;+_JBzS$3`yJ)8 zc`ts8&F_sZ_{V-IGjg5n%yHwM{{EPp9tBR%bze}vocCcFewS>)SN2DlQ5$V%o?GeZ z*j~KP7~E&R`(5ue{J**n=AVuWX2EX`K)Epw*vp?}uOxaK3Ym)AO3^ zk*bg50h>9;iclZtH`h2lF4{)KsZhS&w4d`@+e%;0`VhlH`FhiK)~#)&-=}?uZ=w9Y zX*=huwv~QARUdq-X}f7F*ML-g;FxJU^Gti`|5+#EW2F4Qw%@dOG4xm`VrQiMzqUWk zUb8;m|7-hAdv#r=-CP$;Te(JT!9CTp{=dodJoz~e{9NcB^n#LhEG)8aAjW9i!?v@? zJ>+peP>!pF7UjoBY-fpEt=19FX}upvh|bCJ zC_m{@JFwLKq1Pa39}Ggd@HX37<{nn-2W?i*D)6Tkboi{vaU2fLasCD+?MPT;f@_SN8T%QIVx#&agoQlw&{7ywS?_PoR6N|6C^~Zek96|c}V7U zgQT6{nx^M6*ATWB@jmhnVlQBcPP^@X3A~cA`Y7*8j3d_)V#N1QYe})x*Y0+3Enzzl z6Qn1f`PN zGx%Ju#sW6+xt{)dkzd~u)W`D6P<}A@Fli@zyI;SWJ6I=%Abs*4>^UK)e$z%2?|#Q^ zzkDvr@tL3XD}AgHw1ac3dv*u)%s`#9IR zHn4ugdQocw>pkt)chs&38^tPf_pTDF7x0aY*hocn>6?P}Q$t`)2s z@nqCm(Xw4F+rc%1^&{qtS~FU|c9odKSd%QWLBEpgCg(TTKhE!jl&_*h{vob!>?`^Q_6^q<_6cGOsWqlS^sCi?8I$`-E!^`vUQZdaAWX%BQu(lClSalGmy&TyJDti+bwyh8VGodh%R} z<<8Z%u+&#Yd9Jo`-C^4h2T83v#iHM;ZA?E`+qnL)ZHS%JlV?p)DWBFhrlIV4$BAo@ z8Yi@cYfxPDFM6D~7O@`0YwF2!rv|a5Ra=;TuC{PZVqYM}lUkF;V*Hl<#CEcCoZJTd zX59U??7;6Jo!7I~n#6V^22@Y3Nq^7$Uu6gW59OQLfv!12avOR5SJ`SkQey?XxE}oz z>v%gm@Gs1NFFWvWlz)>QI7Rte_;i2ns5uCq{t@;6Av^FN%>OVu@G<4TW(Pi@{P%3N z9Dy_)nqTnkpZ)&#mz+eoPCv*Fybsy*RfHqZQ`25x)Fn`C(o}q_UCG&ZWGs}RNe6F9QSjz zQMZZfRH}Y(ljF`dGTm&WZWGrh){nSeJ$bg(avXIV*(R<@tQ&E^dh&dW{5Ho?w^6r= zYf`Fi*xqs+bsKe?blv2%IeuKHbo+FB{yXD(Eyufw%P*C86&cs-7T{YnMog_+fp66q zF|}3zuh1AVwTgj@HAYOW65tYz5mW0n;M+7tOs(62ZwF4ramvcF_&(+W9cKwP@O{jM z8p8&@kGV)=*ueKO7i$a~_&(+mjbQ`d$6Ts0Y~cHt%QS`!d>`{>VEz3YV~b_V2L1h8 zCT-B)L+0dXSu4>uEA@C?lVz0xmuifdTC0FpX^fa!cL3j^F=A@n34EuFwr)EG7>PHGGr z6eodGe8`{;DL!P-2KgTHLB#di?8o)m;>Y#!{J36G4?o}ITkWl$#PuroB(B#EFDazih2^)Yb(wS$gOfu;(G1y(&Bod9h)%5 zoAj7tJnsX(Ph*Vd{lNEYjPZN`_yLVEo(}>)s4>R#A>fBJ#&~W9-mEdk^I_nJflW@C zye6kiUXxQMugNKs*W{GRYjVouH92MSnw-+SHaV3k8%$1R$_CA;sN%|IKd#pnZ>z$d zw@t~YSFYq%PvUx&dlJ`chnE)D3!K`5G2Wub9OL;2@FN;yJUw7fV~l46IHEDeGYTBl z7~@$6T&6L`b1U#xjWM3vfVTmgoHBV$PMN$Wr%YaxQzoy;DU;Xal*wyy%H%aUrFm^~ zDpNL?oXV69np5&Uazih2^)YpaKIedVVluGbDPEv^?hRgN((*JFV*A_pnm*>azih9R< zKP7R!$~}qewZlt`>jh5j#Tf6^V~+9s2JkmD#&|vk{FufV&u;>MQ)7(h$TN;+xJrv*K3ED7S{`$+J`aTr^g)Q`84p; z8e=^71Mk-uw+2Ai)RMTFPqOT7|&_} z(wbfp9me(IIaZVZT`}W&@f@qk|E`#Ey?Bn*S8T6*I0E z&#{{P?}{1Mi|1HP_^w!=TV&D(eQuFS8}vCA<9e+`-#EXLv0z*;?)jZJ1T(G|_x#SA zf*IF~dw%EFf*IF~dw%Dy1T(G|_x#RZ3uas|?)ja!1v9P}_x#Sg&3s@yD{`{Q=Op8L zk&{h6CmGj^oNV$r$+%wRWRuTH#`Pj6n|w|(t`|AklbR2i zv_bPBlQ#UJ&auYf8~!!uTjxC)@9TlN-*H1kC-u^ACb=0Oo$*`N!s%0psuuzsc$AflW@22R1o90odep7}(_WL|~KC zlYmW5-vF#Rtz!mc(gqzfAd@yo9EvY^j&-@5OOjLE=Qql2n4IE%-}zU; zS8 z`_37`;(p)B5ll{Tzwcb~l%?Yja(<@q8aR#D zz-hb&PUAIj8n1!VcnzG!YhcZ5lT(?p!Q@n?Y|xzgLg!cpr?}5`E|oDSr?}sDz9N{M z;(p)xs$g=8`+eu@g2^fF_nnS{$tmvloht>CQ{3-6R|zJkxZiiKPUln_uQBFnyarC= zHEhy8$tmvloj&QDO5-)gJdM}D zX}ktb<27&^uYuEe4V=bnV9jfjQ<<{C#EQb1IG381pn<1E=vC zIE~l9X}ktb<27&^uYonMO-^OX29r~nvO$02uVn+?>57lu+RKbd(1^6E(J%C}Lh7C0 z#5lzNS^j66bFPP6`Np$3OvgLC$AXeyHy-J0e)H3u$tq6J`#GmjzIyb}dNCh*$EP5j zF@~l>GDcBY%GFaM|HLe$@9i-*_eRKl18&a!1m$Z+|E^cD=$+*KSG@L-!~5UyS06h} zN4XDp-*vw|?eM-2{ClUIxp=4HB+6EdzOC27xjT+Vp@-k1YK9}9@yA9$vK>sv`(;)iw zv}f@T-om@lY}Y#u(^38e?YQHw-gjuny5D}}u)OX|z6Y1fI@rE`TYDYM-Eq{C`dYP* z>1{hp7w-eou6Vx}R6W!?SNpQ8~KUe!$o?~6z z?{~eDXirqiw`vd5*`B!Mw`vdHO;qFF@3vliH<5bhYEMGSpLe`ij~aLMuX@K(^jo#3 zLF#WcUM#Q18~w_EKiq;X6xWD?{kDbya{!LZ?-zp~$@BIhi|3m-TDTnXSvmfH<2Y%bj zdzaKwku&i2%xJ=7=fm;P9IU-HX$9P)kj z+rsyI|M4T|73k}6T=eJR=;bTFJ!N_Rog%x#%~r`J~5;9no~CeYtD4}28CQ?d_~&A z_=-%2&#w$heiiGipMf<-(htpgZp*;1x+tnH&y4BQa5Pj8-5wLGK?epgp+s^(xh&4girERmMjtWZJNI%Oy zufaOcbt5SGaoWaf=v0O!t;)1buM-J83$Pq@wEe}RUrmX$G#mB0P={v#<<~{`f_<0^ zZg8CHvG%iETqH1TjB{wN4fOw|(NNhdA;b?v z?87SfAKFw8n?lyBwRf@%VjvDH8df%M(L8L|E$^T1WURqKk*a@^d4Cf2(XV&6u6!r< zW{1!#n%l68{;U(ypLw&f5B;2)ldy+=G?k8a>V8O?>Aqj7XZ=WT8DsHN<31Mr&iP_4 zQZIF1=(UWK_^EOKg~&f&JVxp%f57oLM?L)hf#X4b9E9!XtA~2po|d-gemGxQegApU zdAk=N7QnD*XgB9)!MuW~{?0$QU`+IqEA4q!%OoHptr$caC;dEq$RE^PzWqCDKA1A?+^QV})DmabDIe{ar796BI>0pQ|N4 zUSmvR<2A;Z4qtF2F8K+Glh+uN7ilw)Sj}-l5(&ok(V;>}>Y#k-)IxhXt z7~}Ws+Q+iLl&=)sdg&9`f$!QIq}&PVpHne@$F6-W`-%S2l5(x=XFBZ4Q?TJd+ z^Nxr1x7An|mA@7JH2V`$_c_Ny``rqxmCEmmZkqiKQfHd|+W*G+3;_SR20U8>?iITU zZ!NCZ;=08B8QyF5pLh6N0KN6dTaUck++*GbTyFq3Z+Cy8^whYZKd83}d7GfO(yjCE z$MyZVE_MGx#VjEI;;@%~@j>K0h`d$qac?uOH>18g+`m+M^aEA24|-dWw*`84y7lUN zZJA?@F%<{F_fgeczo9F0tTD#;2QsI?f5bgVJqyD|>dD+X7325o(*DL6=WOa3n9m>Z zt_^v^Djt3;U>N+HTsK zdd7u)^tGn#rmd-GTh0yWnYNp@rk-hOAM{My>5ol&Q_r*HJ#aKahW`D#{+j(yKf;ji%P2ui>!(wFN(ao z+!LN!Usk`+t61_5ZAIQz@bPZ<9WN?r)p3!B%aK=(a%+Qw$UT|x@+mQ0tb?6uBJ%+r; zkaw^9JMVE^KaT4S?%${Ubv^oldQT$nNz}K|{XgD5TGs{;(c4(htjAYK+mh%iL*< zajt5AeG)cOPv%f#jB{1me=5egsr@zklzv$HqcO(0DeZ5JaV~0q%|4`_^haZib5h!W zD#rfQ{+fMAKWy4=+N%9E`;dC3?WV2TU(-JNSJQUWR_(88AN5SzO|1;Zp$o)uh zvNP+&H}K?SvFIGX1vHtIe9C} z7ysOLwz!{o4U+c33Z&ikOWS$GJ?*g!;-#u@_bueZzpHqu3tqMZoTEWWI}#R|Sb(yO ztHC+SxD{+O;-v;uyj0OSwh%guwZXX?m9)CJNEKsr$&c*-=Pu_A+l+Xr0nV9(=+rMl zdB*7AoNkb`6N{0?SP7iVjI$?Wupyw%H2ip}+Gb12)C5HyF>PkNRF*}&)WAW+a0rV| zqFCgysK`3A4UA#Ib|PMCK*dWHowT+jB(1(dVHEq;(oBhmq6>K-+r3TD+sVm>K1D@NI7EiAjMAb-`b5mUvrA}U_$^yYY}cOs@znQKQFS4oWx>|`8^+fe4t zzqXyNt`lL5rMnO#X`7oHVf+f#ujU$JK->7ii798H`el5b^R5)i+afw9d zP$|l<{X5&)7ii9^8J^?G#~LBHG2cDawzFPR%NmU;n>tXSdrq(jaLn zUg`#Ldym^C!ZL`LDt;XKd`{>3!a2#g8kDpHVUa3c>MC%U^Ob&_ZAQFQ@#CUX$+bt% z*{Gyd#YL)ksds|goVWDjY%}7eiXRu9YOXzc{x(ROikEs9xXyV@KQ3eNLp3IhaYu}J zsmeE7Ql{PQpr2+t5HD5v>0&A0?sm{mvmJ<+8c^|4+u07rOI7_3yH&i@i?o~mndK2L zRr%+OwVQsL?MA%RfQpyePP-W|mF-2mRPpbJb{xF}W47TRZD+6BHDboY=5x#@aQqu? zcjc$mIKf^WFZE88+w{-2^OzeD|BjdCbj;NI!R>FlJ(b_){W!OD%+%E=cmMyjoyXl^ z#EhBxAb9rdfC(4^KQ`ca=a2?@Vr+oo^KJSH?sVSeo7W0C>fNP$f z^Yr zSs!AiDt~X<&bqa&^!>CCF;kWAH*Kds*S6C4r|N@$HElO-7I-)rW!19?){ zB2V0d@(2{-RJcRbTEcm)_XG*iIq9MN)=RSk-*Ss04U+c3T9k`sXFJcj!_^u>o4FTY zY&e6Hs>ZUW~xhl7%KBrGz)^-a%T#|GUiPsHD}!MIPt+ zrspx&6Sf;MQ&s%!gy_`YkMf&8m3iGDX(za@>AB2xgzZJl)PRa(Cpzu6n=$NUtUe0p z7qb|9jTtbcf-Gu`+LECKI5~Eh?yE-d^&Ra zF^ny_`S=+-z_L$*<6JlPVcp=fy&4PHq+_P;_v@=beJuYh%C7?-wqBC0Vy3cwHFxlg zzaHu3U&(gvh1~J=Y=`xxVy2=TpZi(A(#JD3?cf~KF;gG(v05K|o~ila``E4Uv78^} zm>=Z%jt{X%fZX-35;IlR&H039Yu2?J^MHPKFV+P5kH`G-kE1--1lk_w^Av1neNUo3 zuB}{uxK`3faP8w<=i0#f5i?b-4Jz)VU*B2Z4$eCrGgZ~iIRe|wcF_*A9o!SBdeP_H z6SQns%XV zd#Sy~RUe!jV@4njSIOYb|kp(EvXCR9)Q$_#KCg|<$ zk&Sy_<N|Wt(jUJj+j#);$v)XCW-8}*YHuR?5;GNZ_Ce_F8<6c3 zOZg)Y0q-B2?QDju7?!PKrm`K}o2Zzn(Br!M75xMIhHDJ_1Tj<98q*;95{naa zmi~f$!*rBCOuxZCVxO~5xYn>Q5HnTcb4&TOwpdd3Kv42pwT0`AjB8PzUT=sIGgZat zW;xuuRolW+UlrxK+QxNi5?qGxlc7SesmrSTACxsuV%v9Emn5im`cgt~XIc{8&Qgy@j^R-d8 zN!Q)7ZCs~x`*eH$J7cB>@ZCoM@k|!&SgnX$%{>0C&+CHe3yS zwZ^caD{xnhVM90IZW_Y|EZlE**BCbR0PX>-V+$}|X{K!W^=|IJC!WZp4La5YW2T1C zHz7S<*JN1*zy%s3UTGn4p~i?;+6TCg#)wzi7r3v+h*#PVxSz&|SK1%Azs87HIskY8 zaEcGGL2*)#7i>_R)EG7>PHGGr6el%?4T_T*!v@7kjbVf0q{gs8aS}MihYZ?~;zI^) zkXRF+V0(1hkD1!!O;X=Vp7m}}(uz!0(vC>XR3{=aQ*$F}F;hRat$`TdfqJ|@v#o1^ zuhsaBZ4Ck*q;b-=1_KY)I4j#40z5?H>}+c&@KB9&vaKTEB8@M}wuS)@15U+bf(<68 z&%g$g(`R6V$>}q&!Q}KA*kE${3~VqteFip|oIV2^G^bPXm@;TXDjrh?ZIBpWpLlZ= zXHNSuQ=9ylsb~F|sa9lxq zuLHhLV~po0;87Z5JVyhM))?bC26&9d7|*f5V}VUhnY<>aOkR^yCa=jUlh@>w$!l`T ze58f=l}&!k)U)0;g{?@rl6FL5raBRcnVK6( zio-nY<>aG_OrgWy%JVQ<<_sb4p@Ooc3d; zHu*7A&-yV_tq9Itz%@I9bA3oBA~92QBWW>H!Kuj@r=F`iR^r)Z4voC-WuV~poC z;At9TJf{Os*BIkD19*nU7|)r&Gd0F|y1*{5$tjc9Ol|UGrk?dKBpvRsuPizskxD~n5p2@ zER69iJ?0qC8-Z`s7~?q`c(%qE&zpd6(ir182Y8Og7|*%Db2Y|z&I6vOF~)N~@O)sC zQzoy;DU;Xal*wyy%H%aUW%8PwGI>o-nY<>aG_OrgWy%JVQ<<_sb4p@Ooc3d;Hu*7A z&-yV_t;h+VQ*SHjL?mWvZX_*cDmb+OW4u6*ImUA#@Is9-o{NANX^in)47^xljOP;I zB^qNqmjW-<7~{DNc$vl+&zpg71~xfm@|v77c}-54ye6kiUXxQMugNKs*W{GRYjR5S z+T>KGY%n>MDI31*F;fHJlrvh+vC0s`l4l0aSixI?d1l~TFL)a;&kURif*CWFX9mth z!Hk*8GXv)a!Hk*8GXrOeV8%@4nSnE{`P_oBLOUa^sh{XDW-8CIng$4F%v7FZHC-#1 zF;jVt)ihWzW2W*Pt7)iU#!TfoR?{%SjG4-Ftfmoy88emVSWVY8$H>g24f@<7lQ!sc zEXGU?p>LcS(pQX`%00j13TDhy?)jY?1v6$U_x#RHf*CWFdwyrGV8%@4p5K`-m@!ki z=XVwgX3SLX`JKhhd|<3lacE zDmmHYbCNMr$;l?4lZ=^4PBwv)`W!2hHfTO%(uP0OIo1}$M;eH}b(YF_KLX7CzH_r+ z519LX=N7>cVD9&w6@sI{-0wRjg3Exp-*;{oycL-HeWz6LHel}eojaQ2TW&#oB$LyR z0GpilfK5(EfK5(EflW@A0h^rO3T$$E8?ffIj>nWq8+1ITOxhqZzP{i&mLKDboZ>#$ zSuJBuPI15Q+%1@#;(p(`M=&|X{l2qKFgeBjzH_f&a*F$XXQN6}XAHO4%R*T8AK22SHOa2l_H(|8S>#%o~BYm-x%vccq3rfkrhimGSy)BeBxbCKs* z2B)~sbsmy2C#SgIcODi@PI15QJR+E!;(p(W2qvev-*?IclT+O9JKF@4Q{3-6+Xa(T z-0wR((>ayKYm9jsuYuEe4V=bn;51$Xr|}v%jn}}M*CwYjWrNA7Oxd70^@Yx{3{G*M z>pUuBPEK*Z@9YswPI15Qd_yof#r?kXO~K?8_xsKhg2^fF_noH%lT+O9J5LKHr?}sD zo=N9a8m}?tX}ktb<27&^uYuEe4V=bn;51$XYhIh2%9ITzr!r-O=2XJ>Q?Ds`k>^+j zr)F`Cza?W%PI15QJSUi(;(p)xwqSCK`+X-Sn4IE%-+5jzImP|H^CyDIDem{37X_13 z-0wTzOXpM?uQBFnyarC=HE zWBihgIXShE`1^v%DV}3FFAFB8c#h@#sbF%7=UC1U1d~%d$8vron4ID{mh)r5znlh4oMm`p0;GwvYE^dlFKwh-<)@8&Y4Z z_A#C93rc>g_UV`(@t0mu^-%9z?F&o!I?8jkPsaeMdg*5pdqnhGwXaypw`w2DbF8ah zk{BRSDc`C+OlN!IlHaO5Iu=O9OA-r&dgp3SLdu_aymV}kIQmz`fD-*y?P-wuTeXMf z)!ae9>X@nI!i2Iex#a$|C%WgFaq-Ceo1QPqCGQviV5ExGr^Xxosbi*+3;9!o`~4Pw zoTFl<;^iX7%_9$omJ0tzKhP;R6*Cq6z&H)O_r&dT?-|c@&*lB*z9{!w`je4LpYy*8 z?WI2Nb^DCIaWfb@zU(^e2>w9n+rZl*{|x z^n90GzQ@k?)Sx}oC-1wB5Z>ocdnbo{@3&C+KJ<={R7_J9?+ErYt_bTNeV^1nai`Qj z<7ugX?)SRn>Nq4EFY2@Y9#h^@^$sX~U-^r%+do$R!F{ol(%0j|I3pLb{}u34j|cTH zWWSD2LLM^i$c60JaY|^viWhYO`*pmMD)c{NkX*=q9kZkw{ZIW1*{}QlLUyY)12$i% zzja^hKBN!hH;XC;%CB0lId#9nnsZXVK_M3zmzuUP4mH!^^DBdrFELYpg*8Uf5AE%7 zW?)$Iswts+m~kX^J@hjte}y$gbZVH#Yv>)}^&^N$x@D;mGxb;CAlDG+U=68YTNy8# zKAUzi9X{qjT=JV^rlMVH&5-=c@~`~sz=Y^jQKHNt-cQ%3*9f*GUzaZ}#|7n|gHJAjy90mK9A3E2~p>F`s>f|6esM0ybEy&-!eHkk6W?;&og=p7Fs zJ%s##JIV`7nR-gpaiS1;g~-cu$4K1N_YgN#bWZj~USH((bjNv2gZ>Bok=7sg=;e-= zxT){`PU5B_pU>8vk>B~v##k4~E|5{A!yg=pOMap&(z_zPJ7o7ajku}r-6nog^p8oK z8)Kc_zFtDg)={FawI@oDh^*2pADcl1bIV{*V~=!72&!F+`Pt} znu^0$jsBqCaO4e#UdWy9jl}gxTo<@ARV*|5fjI1?UmS(JQOGNFXL)0AJqGplac3(% z^=wT0pf?VAB*RXzKZw&m7Q`{x}umT$T1W#@L@a z4j=oFKHs$6v^Dj-3;U>N+HTsKdd`J?^tGn#rmd;xTh0yWnYNp@rk-nQAM{My>9@w{&?NiUTu!YZJ`g>~ieET7I zvrFPLjR$wegFF4)d0tS`D#IeH!pI9FufMxM;--FxxT%tNXcF=!fsX^+MP5|Ws^cOL zPe$HklpE+Sk+`WJB5tbW)l5a+RODUjF7p~B?Z|YbO-I@wce%t({qT1ZHx>DKPq#wm zNMnq1E+}aS!Xo1`M;c?CbA0|(>*$BzCZ0WuMW<5c&Z!vZVpP(q;vx^p+-Zz)F0#Dt zAC^yuPPOz=V~lgOLDCLO-!#TJH&f5a=pW_3MW_9C`+mC~{bc&>8muGA?@E2`Zin{U z39cvb+wEKj?3*i)@GX*Y#M3S-$up?biOf2J0{Vb$jjB{yM?u6qK6@KFtI- z2fMd=vv55N+#2E*r~Gvt`h|M4kvAK8L*3iFIk=vK>mqk$%3s%`AE-AEdGk=;Fn5)= z0M`qkH{885<*yU4mwtE=@)jX)gnO5_1lLP&Jaxw+!`N=dM-$TISNJ znDXuZGi^O=q#u^K)EJ|0m$}mz<6PC}7bjsO^<)k;#yD4{{ikA_o7!KqPw9uHKN@44 zo6`Qq80Vt)*X%>;Nq;oPI47n3r(*0+?XTH~^uwm@rmfmvvk$3f+HTsa{Wa~Qe>H74 zZPoso_EFEY-LzHvYh@qwOxx+NO?$P!rj7L1rv0Y9%BR3S({9sF49Y7 ze%h3e^n4>d7%$vkh&l?5I?AkLwNdMJ{QaUfY5lSm#iGHoK70Bk`|P)d;Jwl^*qjXQ z$IE(Fs86U2*Jaif{Lc3E+G3O`NtWzMCX-pylGFB}?AelO)AmNKI@I?o{QU*~-oW2+ z{Jn|4zr^3KYiA^9pp0eBOwL4^WDZb@wve&sX)$O3qSxv*NR0(~GqM*ieuxP&%`dvsK>g zg4uibb*ihKo}7;K31=ZuHk>6mNy)l$_3trD%Pu?mW)660?LPY2>}V~YDaffpaL4?O zhUW(6S+e>!R@is`*|09#t}-}d;CX^BqyHTib06ClZ?x?e;$$4KtT-k1j;WY2LBWT@ zlAnqJQ!KcK68cBfzsG@Vp0`CIvFAy}0Er8(Oh|qzrbmO|YD(xIW;_Q=(kdufM^JE_ z61XxfI2GfhSa3BZ>!1&dO4{ePL-h�AoSv7|Ui%Wixg%D$+5M%^1CATv*1cI#(Q6Gqx)4 zb*@;dtpA*`Q;}}Qq5ZPhN-lj_Y}K4KW98=XA=5R$v?0CbN%1MtI=NSC;a*;QQz?f!72NBmP>mo zUfSA=_u>0^>@U7g$9~{@af@-r%d;`QQ>1M7b@>jE>Raem;rx_3abJh;bFn_Yr$rsU zfAtE^FE3Qjr#M5rkPgrAE~G;)@ysFzXBPTQL7ziRp1x&E+B-maS3{i{K&mqX!Bs)Q zhu99j8^d=`pqF~Lhwq&L*HH58fbHZvF^XqMQ)dW}==an+I&s03nrD0;i24c9PraYh zAh?Rae)o+W%8 zzQ4ly>V2J)(9t|=s~*j>wsbVl+R{n$S?|GT;df@`vmlkv5?mJ)EI#W!a7ofr??e;} zK1rGKS?~E=LYnefkQm$4@91&C%|6S2FCroOso%;Q1lLode6!EOS-0Yn% z$Pd1UKrZn;0?j4#HSrTeI@dUPMPrK9_(Egkywg7%(6A5TA1*8ZR0 z7o79#I*POZFK`BZJSYC;JTtfa^$$|5e}b!mf)9lSS5qSYaIxSTN?MCJ7G8K@m_uLZ$M0Rw|c#i z-gmz3OmfRr{r&N8K-}Q&@UB66|HZa5+1;h;9EiC0Q{3I^cV+J5I$vuC_}h19=ye`o zJN&KtDqMFNY&-m|n`a_jN7w=Ww%rBkU5D8Yf7`wq*WItP9sZWhGXb`lzh!qtI@`?O zuDjuyZDwEaUP1hu4(yBWNN1bb7p%WG&QRDFJ&?}!voBa@2yyM%7u>&nS!^YjzAUzC z&W6zM18=sSsqS9IsX^e>H1{#D0O^BP*v@qKamA@2;M5HFNv{y;LvFX7neINtsUmR7 zb@zLHkY03$?aXp3R6WB{&yDW0USFgSzuR_Zy9ZP~BT>&y?svR?NFTY*cILQoRnI8Y zGuM5=>yPwN8*OKvTdDNNK!3jbU2g!=$2?#={P*ek?92JZdBHitJ}(5nI7c|w^c*3- z`hs7aBb;k`j*wsd!7t7c&O1FvSkFMz!#Tovr~8HV3_?BZ7tTT5FRW(>>S4cd4(fiP zei8K9FPwwAU&!Szquu27m(gy`ar)D7;MYR;pf?cXG45g8S>zt_#vy%tnH^Z{zT#br z^zjkf;lC$89_f=FwFCV3;|C!_ zIgi)@>dr&m^wSG47pT9;*I(l6FN6MI)IVvb?F@mOyazrQa_Tqj^nZ)Tua~y!e|v}A z8xHPqj&Tm_IW`>WBkz?t#yPC#*hr+0K|P#foWpvKjY2x>_^AZI?~a~9kl4m|5QJRd;Lj@iyg$T@$4=K{!i-}Ch_ zM@9kj-#s1;$$#5;3?%=J;<1qYcZsueoLrs{cYiHA@H=&hn`NKKRe&HNvzp($#0?V2C zX#e~PY3lq55__s6alr{n;O6tE(f(Pc8n=$v<8!xSmqg4-W8*q6+O)=TDMf6P5f{_NUID&|Y=^B>LyIKXv}p z&p&_aC+8(q{rvN%e$8hq75(JgB<|~0`MTB6)n_YlU#H5~srGehprg-L+NwvNt+b`1 z&sN&fY4@4=4Df8ae`Wv)zElPUS5X4XnfVOxOwy|+_S-<940_{^gll-c% z?bkf30q@lr9{8-z@W4}bhKKo< zI>W(u)?C!wQxR?|j3nrCh4Xr8sDljgIQf=g?Bp9Kj{ z919ArqXZV8wG>>E^ma)>5BKNK-xw5@U5FE;vC6-0ZWK`dpIyx`gB( zZxCEh3H@fDwbbX5OWW#J`{&oIrEMoy8)xRL{dsZ@-PD;m#!SsO zjD?zS=wmhCz!lv`HQ<4oZ@zAwuUijY-A4&ur_R@@_jOJ}NB2=%_2@ooOGo!nTRQ)L zJ~QVx5Bydk#BUU?=D%eH*^U1m)OXeQ42Aff;R<|PaG~!ZRDRh1K4Ok9i+#D?I<59P zIBUw^*{8fuuRi;`_riAuz3@GOZAS~Tvs-^>;HRta37}hO==MP#{w>d~&F}H|*-5F-B4DH&M$SMG(rs`292IKq*(1C-o#}zjOKjGc(`J znVp?I-#IlHz5nQ#;)w2=o-sABuR+`8<;Bqz72S__kLekMbP;K}A1?OTE>@|qHLPNn z%Th>gzb{{1|$QPwuFvxMg4A z(r7h8pTMs1#ztrtHle-Eh+7bQ5w{|4L)?z|>6Y=#CqU)l|D{Wrig{EfP7OAOdwOZ}^*3qrY z-*#`>a=+S=o46nzp)Dzgwxq(Uu(^=hl8V2rEm7Hn)t0DiptfX0cXbcxORDdrFL8f? z>!67Lzn1i>h+37u)P?y%G|U&mROCzirF4C2+_Pv!bV;wf%2V2pdN=Bqtw8=lRq4)U zK-8D*bmqfYTg8|*qjZt>Vi{sN-i?j;PJ^CFr8(V{H(3d=1gNl)-}4=1Dl4P0EO@uD zjcei^gGRrn)g4@;buQJfxJJLH)o-{a#^h-1z%z$9>jR$qk+b8#-+tlj1n~EhoSg(-Jj>ZB;HC4Nod)** z!Pyz0i8%kH_luVh`xg^;Zg6LUGZUO$1zrU{+p?thGWff) zqi+alVQF1X^8>Dlaemr<%5UTvy|XYk)A0cXzvgG$=GPSbnu1?b@N4ekHovCe*A)Dk zf?x9|Zu4sjeogSZ8^^JG17~}Hdq3oCFL3`Sob3Z1?B#4fkoY|SBz_M9iQhv&;+Oa* zeu;nLm-r`siGSjk_$Pjef8v+;Cw_^4;&+w(A640|`{i%_svG>hxBh?rPU1P1=9;-* z{;r(7!JmaS*#GO--#I)BF}85un!}8F2#?YCIS-Ib_SqUGq zSqY#13-`9Mu!b@t`@6K$g#EevmTS@4VM}}cO=KV1y&BId3Oo2AJy!<&x9Utw=@|}d z!VV!EqOdYTtN}Ze5Np4>2(jiXC&XH><%Gi(RzWxtjf?LQ(z6cKAhKwHrXgiCenX~d zET4k$AUifVV=pi~5Bs?G|M)NG0Af`;xXT=@5P3~tiNMvOzFg!N3A}L4%2M(rQLl>f zg(5Gq*P$7y5?smkKvhw$Jn}G85RjwJpeU8fc%->f=^9otii#QQ+c&EtT;G4b&ObSP zb>q_Ky}qtgTsQJvzGONTkF_NOb+Me0FJwE6Oe*(~Q+e3c8(`Is>9z#oPYI~-`$TxUercLxr zojTDI40`=uzvqcmp^)op@cCx97gBTDyoqeu_uPyLXX!i33_CN_jn5UdLXBIAO! z^^st0sJ<>4iPuLWiG)$7hmC};$J;_l<4K#EJAc^c$Ayy+b4ZkylpDc^Ff6chQ)ohN zIHb$bp-ypB)_OD9g2BA{f=t(fH1p2R%<-n;T`49$sl1S9-h5V%7vjuYFyv+07 zfnuXGpN-|>x(>2YZnVvr9qVk*P>a{oF}nqjm^LzS#&!zvHed?nPRcL>jpXK#njViRXhZ<+QqEeiYq0-Xn(1>D0$kO_eg$&M+iWxtZ<(`m;Vr0TB;oo24!ne zMZR literal 0 HcmV?d00001 diff --git a/hsa/gfx1250/fmha_fwd_bf16_varlen/fmha_bf16_pertokenBf16_hd64_128x256_mask_varlen.co b/hsa/gfx1250/fmha_fwd_bf16_varlen/fmha_bf16_pertokenBf16_hd64_128x256_mask_varlen.co new file mode 100755 index 0000000000000000000000000000000000000000..8992ba4f17f3f32adbac4c5ba624beb4682cfee2 GIT binary patch literal 72632 zcmeHw3wTu3x%QqtD*;g}AYy<42AqU@2qA=9fPmaLB60`OB#>~Ct05?2+%pN`B2X`g zF;(nDi*2;DOl|9>u`?@4MDslQn?|sCCrm@I2w$ z@7n9~t@XX@o4wZT^-Xxntf{uomoF^!&u{(K@_E-3`QG)>%T?-(6sC=`+T;I@)@Q6# z+-F&xueE&At4z!G$jg6B)@{m!R{upVPzhS1G5L?>pn_#}kkGQaC(1|%)u5JUIqDke z;@!_ll$*S2A{TlWUVAp+xs=o8b^qr6v8o(tg$0M0VrK>i7e#cic zi&m6vDOy)hv|>fc=FN|lmJ}5i++4b*tOR$8i%XGKuz6F_ijps`TDNg|(Yh~XZd|!? zb4i)?Nap4>+e+GhIrFBXbz4db)~?x5{1ug@VELAnD@#gSSN*80dgF$5x4d`F?f+#p zcl?*toOa$dpYuQm&%5e#9q9i5vYNBbT62}=(I}Nwm^`XmeoKL5l};?XWmCx)Ru0dO zm&!ZmQfrJ-`R}XLwHSR-w8HHHS!))_wmeCr5m?wD%f1IYJJIuvVxUs zww4qZaK8V9uKNE;Hv<0=&rq~(&8iLUzjazVi!45xxxQ#?!OC?-Wd%2HEM4nWjXN(2 zc##c7>q}l6Td;KYh$1+h(wvbi*RL)rSiW*tZo#IK(z1|9Yty2x_2taXt2UJutk}3^L)i^q%G`X*`sEwf zy*B;>G?%%y_-N*q4Qp0zEL|`AQ_#{O-#0V2Y%bYcP<#t|wPr=Z=CYy{YrhW`$8|uL zzm|D((M=^QOE+R*IX=P1Gh2pbQ|U&!;fo*pc_`c$~?J^8@N!fRRR;}1D1cHC? z=R=09TDf&t_J}O)-)|}@-MnVwhW$SMVSx{UR{!|9)Y*?u3H>?i-5&?p37?Gl-sg0_ zm(Kk9PZsg<6S$+zv^QsdH#j=n^b0?(`FEyPna}zF1O7qE!dPtmJpak35o@tm>Phdp z;Y^hrzr?a8SQkC{WGBmNFS?#7%SWDeL4Ks(7xV3n#bUm)Sn!^1n6jH=eSs~={`Yi* z`TAFOuS&hb`X4Blv&)m0R$kl~u}(HbEh~46^-zPbe%p{@{jQ;pb*RC%9%}51w2<|1 zL-+QP$7$zI7YR4g=}?8eNVs?^YK#fd@l-5)wS9?j{np@DLc#_ohP-08vn{)wm}%p> zuS^^KihYG;^-b+Ct3tG&*{MTyd8ZDkvss6Ifv{8Bb+9W$s!aDW4cHHvlJYR>jWl+S z1*#)xzjLg&lzT%jFRH1y5J_-(7` zl|sxs$|B4*%2c6byHq9Hw^#B4JR|eIeUZ<)5Owm}{~49mp`(%)cJ^7OKB0<3kx-u< z7|IBA*x4A2q&CK^pEZnq?C6A;b-1Bp_WYY-)>j&8zVqg(nDtfg;i8LT*4G+Disg%0 z^$nI4i_NjeyB+O3H-t8QsgcL!T%UHR;iX-I+&Up;M^6SlndQlRPtNgVp(i(aGUCa* zJX!0>dQU#-$)8y#IA-i~sH^I0dKxuR{pucgq`G?`R2?cTjHDM8RmQ9@HlWj1-;779 z`)0JH2Vxo0j(w2c$4C#>W>jaOjtrxY3hRfc<7xc;s3B%OgR!t8!HRx+`^EZITUkCU z2>XU&p=zr$Y~K&oj)f|$yKs#%g(y=LE0XhbT5Q^0ls#26ZQ8zw^(^Xp4u3z!-}Cr; z0e>&z?Ydc5Jr1 zH#>dyz5|_~ZI~XLj`RtqAW_!xDIB9DP9>-2v-85SSU483v)p{i@t_;Y zC)b1SlMyAg&3dK0;9kJF0WNipfJ?7hC!}ohnK(1gsv%tlmB=U|o1D5c6@k zttx9wc2D4*eULtO)Xa0SQNT(+7MQ{H@6N_9ysm&3f}Zjk^;-CR{66$0?t*)|{p%}I z8&j-@8=smTX^6!RU``GD8E#sWrkDlOKYHB3CUAs63uL0MtA=kY09=(vS z7jXObk@O2LI6a@2e(Ck0y`f-eUkKwn5aYzQ$!mT9$0F%9Qf7o|EG!B`-45MuV+^c` zJ}cJk=!83zS_ozzm5Lbj%8)=9{L}#Q(c?)##&bNlHH9N74F{D z{LBB!KGFY=0dDVMG=p-d8WQdUoA;jr)?*d1UIM3o(NNoSesyiH1=X>h3#+MDtk)tb zF@N^PA|*APOR5gJp?sdP1`z;~;QWu=#tL^4K)31DxFMV)pZ5Tc=Ztmu|;q@vsI>vmkX`?{(=yOvdTOFLAtEVQg*nf03npY`Uq zm+fA*6Mt0&vBeb;aBXon(YUzV;+n-V(J1R&ytsxkSR1Oo)3S4`ERnri2z=tDzv4@= zaVz5$A2>~3g?6M@!6Sb*v?HTR-UsK|k9U{{*Avl~cTnz~tI&&yUON28Q|KG>yn}1q zKP_3W?fIW7D>eH;%s1QfL4I8OQRcyRV;r_~FXq{Uo*&`3#WA?fHi+-#ePDsPZ9j#y z=f{Mvv?&M1$2Q*PB|iAM)a>o4sabr^I_R}s3oW-}9H^J=M;;-v4yKH94x;{ptRHi# zo4&^08+AMA?;q~w?<@WC0eug8bi#EvPbccgdj~wsd!4-NR8QXn&yNasdOq8w&Ig|F zkL$jYHx}BU@45*27FQgq2<^sa(89UZp+)nmL%Ww&g?6V`4F?}`(vMU`7QS2&S@cRp z`ke zfjsos0;S)3<%vCZ_B<0}cs?2PWH>VBhxmIIe=p$gXZZUi{$9o3o8j;T=BdZ^5&Rv+ z-xK&tEYlc1d!AV1+4HoF(T^Gf*76r>JK5c>&T^dUgGwIBSMq3~l27qz+@(g?0Pv?vK@-J^!N^N9t08{IN&Ro?l>0nZM3g8b}dJ zw_4x+z~L0tZk=y#;46n!o?7IY`xTW={pZc~+QRjTd0sl&efLC*4h!Ga`p*q4TG&$0 zsYNa6u~RMS^b7sY)i10|oa1NpC0y|te&l?8f{nQtgioMsbH2CI>5Tb4pdKy$K+SQ> z`cdOx*sz}b3wiTZ$wT={))gvwnEcD3y*2-k4xZHnl|0D&z_rW|d5HWYU+Xph&XS+} zqb_xte`m>0{xSbi-!~-xs9U}7CCNXwdlXzG|78D=hx}tY^(X&iTOR*py2n4Q1No=) z-!J*6>mmPiI{By5IhGc5%0nA+KIeM#S;-^$N**m#QqJee$Qz%}NXNL<1(iI^{J{0h z4=Lv}`xu|kXUWg;q%QIKe3twiPv)=l{X>o?b*uGt?#cNq?T)eKc*_1E566?~dOpjx zyz!Lj-gs&qIG#%Xm*jZrdN`gso#Uy~^?1%g8{{YXH=p|9x(@!2`jK0Y!nZLGc_ZMr zczx8D-IG3v`RjdiWgnRTDEh-b$U2dSePFuggsjKwgG~4OpzCKJRJ)yf(kJP9*aw}? zKInAqSGa~;F!e{frbVVz&W!z7KBHjWzh?xWQTo#tJ$>3U%6pcXlUODvJEvmaqInhb zBJ=i~%QKYz{8_q8G+uoHpQV&PC|G)^BE@^wj@j+`%om%2aX-oDW;}n#?6>*+8rw7F z^t7MuIX%t#Dd)<>U#9rDuJ;`aT(BQ~1;+DiPvD*!r}(U1z`ZnX=d*eP_ty9VAD&Nq z$ATJP=(G9&_tE%bpVb$*uf}+$z5@6PVCv5{PeO+-NbB;>JyM60(BU%R%QS`#U4gr5 z3>_{9zFcGIkOrKlF?8q#+)ZQX&>gtD#?YY$a1UUPSF&}WeUhz1oDY3`uz~MbNZacQ ztmJ^xHBQHh4>&{Pem<)oa6gR)_%LmJ$NFnL$Y%`z9-#3MpEVG8pvFUe)*#?Pz;Ql6 z2gyloFX$jSsWEhroYWXPNKR@D9V90;h7OXG8bb%kNsXa{~r4^nmr5&Ckr3g=zGBrF6(z?X+FYjQd3-%qX?fovk ztpFaP@k!gt1kThrW?MsnhidHeTUo$a8vFg$FyLVtr}(XG;B1ZC`K{r=!-2VmlJg2W zn4CTd9ZXK2gbpUBPeKQi(pHitb&0FTfZ z_RIy&)fo012|Q9`*mD%{D2-vy(ZHiMhCRmskI@+R91A=a*yNPSYjVouH92MSnw&Cu zO-`A-CZ|kZlT#+I$tlfilT*pk!Q@o3bkLlNNRFIv@$e@76gQ+|_9=I}lvcP(N;|wm zN)g^EWomdAq{*o~*f>wy9QGUsJWgZSb3E{PjbYCTz!NlvJ@bL{HHJMW0#DQ!_M8Mf zNn_aaO5iJjO-`A-CZ|kZlT#+I$tjc9o-nY<>a zOkR^yCa=jU&1;iW$SKuRn8Ln-a>(^87?kEBcu zKLcrUY8GrfOWPdwybAa#jbYE(z_T@mJ+B77T4UIA4)7d}Vb5!TuhAIxoC`cxW7u;Z z@H}9XQzoy;DU;Xal*wyy%H%aUW%8PwGI>o-nY<>aG_OrgB})gBQ_0dnbLwZ3BPZOS zOWEwcB;_gh7gAc`UrK3*k4Y)QFH4ykeg)Fx)O^@@zP35+xd3>9#<1r?;Ds8)o{NAN zX$*TV241W&?70MZiN>(!QsAW;!=BdyUkhw<%H%aUW%8PwGI>o-nY<>aOkR^yCa=jU zlh@>w=C#SGWa(gXDp@)#y-qaNicf185uaYGl` z`0P`AQT&ec6z@&+$?thj;r=P)n}YSvN#J&`cn2`|v6_b~yc3xFSj{68-UZBktmcvX%=NxlwcncT?PHa{>rdeHZNMg{U0{>bVPKQf z5nz+k6~HE^D}hZ;ZwJ<#-aT3Ey(CkI7x1~jy|iTNaM24N!amkTUO!-Cu5-m2Wpi?h z>wU3SVRDM=eX(9)a*FGHu~A`iitBxGqr&7A*ZX3#!sHa!`(lg21Yv2T411InrIDyx|3A_eQ;5D%3waKYu>0ojySvqJ=MdY*j``pJeIK_3YxJB8V zoZ@<4+^R4+#r3|pO<{71>wOVcn4IEzUsNbePI0|2wku3dalJ2gC`?Xqy)Skpaw>t> zuz3QnffINQoWN_~1YQFt@ESOQ*T97Y6Ff%dTsPH~+pZdW!Zr?}o1 zdle?9xZW3^RhXROdSBeBFgeBbzPMXqa*FGHagV~}6xaLWUWLgiuJ^@#iJVH{HEf>1 zYv2T411InrIDyx|3A_eQ;5D%3waKYu>0ojySvqJ=J?Z(W_qC5@aEj|(@i}F4a*FGH z@qohQ6xaLWL50aFuJ=V$VRDM=eesaOI3a#8JwC=8$YUSPEIW#{<6a4 z6!)>jR}?0v77>3!~_?E)t6!)>jw-Y&)z-!n%f!DwZ zyarC-HE;s2ffINQoWN^f&1;iW$8myJEyYzgo>`IEI;k;?x`$)^+VaIEWhaMxv4DQXNoF6V38_6tVER`eX}Y*afd2D z<33gXnnzUmCEr%%uUPO8+3xTyCsNqnmG^d)?acdDb}HZ5E<;+EK}*FJNcQz6NcNR& zv#->heWmW~D|KgISr_}t@|O>LDTU>GPkcFr<@;Upy6pSVBBlGt8Li9*IFU{A_TwLekI^CJ+j>7dQTxU45+%dQwgZ%xR*=`=L^N`-(nd5RCy8w52u_@XW zvMXd7WEy05$nGy5kE;GPMY}ltUDd~?=w;48SM?L+oWZW@>+$I2PNu8+i+Y?aufI*v zZcet>U(^e`s{S62_Hc5&{x(H9KB~V>QI3!5Z&Q@xqxyS1%JEVCZHjVyXxmS+{^Z*y zS${nT#)B*4!KnexTsI%r`M4hF%y%c@dJ=dy$XV!4#`R>R4|W#2Q*k{N*F&78?sQyF z$91N&%$sd(8athtqxSoycVa{@Q4zB0mI@>9B=i+)U@(*`by7O^8 zAL%*HYIh;77vg$^bA!7W*NbtT>#TE^;(96ak90Oj?yLFI6y^L<^Q9@u`K9K|@hIn) znlDXJ&M(!^rYPr^>St4w^Go&fc$D)?^|L9;`K9{V6y><6el|rpE~=l$qZ}92&!#BH zMfI~O%5hQsY>IMRR6mbLIWFYBrvaV1`b>0!zc$cdtsN_NB0j^~pNOj)vEO}^C>nwRJ-)rAqdhODR zC5uJlmqQO3=RozjFD2XK%+fNPf05^vjCcn=^syM}JlkXOtl?61mI>cZz7QB^2Sz!Y z+@MN3lCR{^fVKXWz*t+1c1qnkl;_zSo>j7NKBrLSd7>xs^qOFcF;1DQ&%+4R8JuDV z#yU5-kD?s);h7}?-4@_H&k5Y`GQbjubE`w}AUq!u; zc9?oW)?bEuU5DC&zAFXg*)N<^?70Bvny44@9BD=yX(Q|aeVTbbrz^^)4YvjT+hLTI z=Z|on=&{2sZJYYFZrjxBE3lg!Yv}$M&K>EtP5tC~psng`kEzpKlx5qrM+|4zEU(|^ z+V(s==gDzpd7QUAQ`=^HCy-yZNBiMSW-p9G@Jd_cIa}S{xb8d67UP_4c>Zg#mxZ%Y zsTjx5Rd!&!Q|<!sk+1(eTTF7QT=Hb#1$N*{XQzA0tPjt)S0LZOMYfpi?3SM~mjibnX$yW9 z@mW`o`vUaq6zU;|`PoGNry(zS%g?87xF$czZ;oy7Eq{P~?v8Z!pSla$a8<5 zui!X`PWl7nCVd6(<9w*{6{r_;y@q~@yp$YOX$Pqn?$y%f8L&AyOJBi$;Y_LW6)Mjm z`Y_F1(^rre{lN?Jn7)F2!WmTME4;Q%{WvCQGv2nT7kz~sYv?Y0h1a&J--rm$BtP9? z>O()lc4-UgC(g57`UyFv+E1LJU47QjY?uB*whJ5b3?h97=W7<`>kMbVtIs56VC=G1 z*kY!0K>7}CKgN5|{v#Wlahz&b$B1D3hkhuRy~-A|oEqs%6kqoD)Nvu`Q^=Pb@Z~D! zem8X2%&Pt#|ujuilU!iWf(CupH^X>rT%iU;;IZmzgE4ps3oyp-*(B~TG zVRs<%joNIBxy~Us2LGe$qfeoK)P9OSg?1hY9@1CQUuxe(`wa%aH34vz{!}!!e{U)MH0qWAe>x|6koU{Z6vlrVlCyzvc6atDZ?{%Q3L!Jm-sUP^BHo zSMul}jMm<4!- zv5+70wT^wyhONj^wuiLCY!9-YpF^6bkHSXOmCrKm4F8k^dy=E{PrQ%w*3v&wAJlP# zzESfw2W{ovsCY_$ME!9lyVqlQb~#^t4#RHJPs5+_`9-%!zG$7yw&{c9SVO-Uo>$Jd zy}7Uz+tbf1XKRnXM~)XiNAQf22ffBu*nvgPSKT2Plks6&EOzSgx$M=;&kOjO@yIvn zb~~`d`G%W`eEHjLvD7&t{fi!d`j&j;n|ik$xYqf9@>yagI78ddg6;WyqHO(=d_G~@ zvv0w34CI{K?0|mW=ktjk6Z#R>ITt$8_soayAr}{V^)5!ed`^ki3xC3Tm!jUG&~wr* zTW~LkpK03v@L5IIPrs6j{9}-x{)c{ramnaIwI3RV`y3;3pS+v|UecbE!7*}Y>KZ)f zKu%weXLiV$;1Ye%EchVipY7$J@={PeY)GxRYg|4cniy*?^^=yyI&`p_5c1HaYt z(D648;`t#9V>fv()*+Bn@3e(w9e<-6)^$Nh+^-`~Y>ywQuq%Di0r~*%*@^j(|1fz+ zyLR`tcYF9dxY?+0`aQPbx{5x4&r0-3`N)qmLN)xXqu=6Z8RrSpJK@=)P~Df$H77h< zpMs6)o2WaVowRSFP3dbo``-@JH}O8=DD>hwi}j$sLzntbg!wthIY!?~4wFwOz#sZ2 z_66}4lz(bNH_Ov4*oJQOQ|u#RHT0sNVm`KQ>SyXfKPBw|UFoOt+h~{R?=>d$S8P{5 zI~AU#UHU6IhMvC~j`5gzpN+_*urcO-4)Cl8um*vg9pyNA^?QCR7wL1pfOQAt+%I|M z;A=(!&;PP5xVF+ZqW>C=^o3u;8U%9jH*GN%a_P5i;ZG5%^k0e>oqO?F=(pI9@BWlO z@Owz{v_J3%;^+KBn^a^kejQ92?0M_#*l;&J~Ui=L*M%9Orm&9628EzV%kS zSJ77Iulob9LpFcQA9$1aaev^~l;86QI=m3{5(^|jgSYiF;oozYkNvDRE|OYhct zepEWrw|(4oq`%9<_gu@qyDeoLWbyO&r;LYO_r`ZpCd==?UQd|<$?wXhLh`$|S&;jB zj7_}?^1z^LQ(3;?secHTU-k2U3YOpayAy)tZ~l8{Sw8H$T$bO}sYeRS-`Qt)3d`R& zY-9?{7d@@YuX#n4zwwW%{4M{h%2%YQ^0!~E%HN%?%71R8D!<~`KV-X`-#Q_v@Ak_s zm+jm$Vt5KUor5%f7e4}$ea(erU)eVMO5OQA9d&14sXP11y4Y8iU-b2tFz?ydmj%nO z`uXdE3;LymF{8RyRz@QI$bLJerKP{Q&@h*9-ThSgYoC5ew!8L^ugkH%HRXS0JA2bFO;MlgpYA@YXN6C9AN73XI~L?5 z-?5nWK>6U0GN#WmyqC1>0`;B}WBObNe4WOKr*l2<^%^6dP62R%#)zj=2wbQ!;^`Ct z7io-mI?I8VYm9h0D}Yx3$78tod=}runy+K5KnK2ywLoL&z<040Y78CtF4iKAp#$H= zTC6d2;Ja8$G=>g*7i+1;(1Gt_T??$wzA&y(vUJdAUy`YVJ`0u7$7dB|Y>KtLuJBnU zz$F?Zp3X|(l^P?S&MM$l8Y7<0YT(rxBc9G0;58Z}p3V)xH)xD_I%|Q~0>}9P9V92U zy`Y2Sq{h%ea#CaHAUUZqbda3X7&=H!Y78ADCpCr+l9Rx3J|t0xI3JRzgE|Yf6)}Cb zc`<#;y_i0(7t<%=PL}WXR=O&tPnD};`s{QQV*1=-TkBxob=uyy+SYpD^%~!1TN{8k zXpFa5HUe+dxWcwJ0dLZHyKUVFe51xYY^xNwRO4N?wHbIb@I*WZ$ap%?!Q}Kw=wNdC zBy=!2eG)pDoIVL1OirJK4ko8hLI;!6C!vGpbo{(<5_O257fzxM>J0i;FQ(5nFQ!kq z7t_b}V){f}71O5@-~TXHMiusnDR*M;7yoojAM~RPHZIdPhds9dZ_ya`yb1UwjbYE5 zfp69r_S_1*Rb$xm7T{YnhCR0dZ_^m|ycPIXV3SiOugNKs*W{GRYjVouH92MSnw&Cu zO-`A-CZ{y7O-?0C2a{9D(m``7A~~|ni|JGDR!Z!;DyC1wt&+IXRWW_4Tou!2r<)Md z2b?O0jmx#oVb9xuZ_^m|bb(!sVb3sdSYy~T0vyp8_N)M|&=~ft1g_K=_S_D<9oXcQ z$!l`T0ojySvqJ=Jt?`e&5P+%?#1+Ry_h}`_lNR+rK@84RJkgq&rUZXrVlu^4>sPX zZ4P^W7WlIo!=85l-=Q(=c_;9l8pED<0pF!D?0Glv-5SH5`+@gs413-Kd=Ie6DU;Xa zl*wyy%H%aUW%8PwGI>o-nY<>aOkR^yn%5?$lBI*msbuM(Ii=1*ZS!LKlzTCKTrZ|i z#8okUD&1qApHeY>cDe~MeZZ*$u<-$HbJ+7<;CnTOJ*$DMHHJO!1HMmV*s}(>Mq}9X zbHJa|81}p$_KS zbTB!UEFC`SF@2VSQ{r*&JRV}KaL+(|*E^2~%sm6~J?}goF!u~Zqr!~o!#xA>l){YZ z!#xA>1BDsWhkFL%X@wcnhkFL%nU;ME#(Y-y)eh z=zT24^eM*Ji070o7}JMqe(}7*jOoKQzj#q$#`NKuU;I>I#`NKuU;JEQ#`NKuU;IL0 z#`NKuUmR1IF@3n^7q7JNfw64J$!3p}jOjy8HhY|8OdoQx+2bT*`jC^&9w!;ohn#Ho zILVkkCD9I*hG&P1$=KFxUIy z4TZ-8bGJ0h^+{g0H zpp#Qv=ZZfoo0C&q?~A`EOippVFaAqma*FGH@i&FZDX#a$-xVgOxZW54tuQ&o^}cvZ zVRDM=eeu7EoJ!y|Y@Wbt-~?U+C-53Lf!DwZyarC-HL&Kj$*E-NU~(#1I%rNsy!By) zl;^pRWpIk?T=9;wIXT7kzIaz*a*FGHaZ+J&itBw5Q<$9MdSCd|?`x1#T<;6N!sHa! z`yxeQa*FGH(e55g#~$PSOyD(e0KSbTB!UEFCnb zKF~gv!KnkZ@c?CWa*F#{Vvxe*6!)>j5QWJp?qi9e3X@aZ#}dO7Ca1WMC59_ZPH`Vg zj8K@I;y#ubnaHUGUc=@IyarC-HE;s2ffINQoWN_~1YQGcUYndsmJTMTlBI(_->c*G6E#CUi6NKmC8wS1O4&RX_z-jXATt+cc8 zRg|k4y}Y+|&CW*VLEa~H+!5ww?2)`%x1}<#ZdaaxMgDx`$M;5cd^eD>b@*-}+f2o~ zt;}09ptARZRFx0!I3J$8cirGZRi>U2X-C+GF3)o8N9}-zdq0M9RvYy*o%IF1x>)Zb zysIkvGGKY{$MEhj@3pR9|8^zw%DzCq7|%09b!bce_OJJQntV@w1niIlJA4)W*19kFD$f?H_$?e8#&F45t75ps^Bej<)<%B5 zYpCcg{W@mLJBZn$$A;}UAU|Wc@LI=li6M@Q*7p?Z)-heWTbI5ZeX)Ir zaa}s&`o0&N`Z2bPzUKSax^BK}t<(9=Ak*t-oX+W^Gr;No>Kua^6NYK>41>B?$5_{r*CdZ|4}3Ui#+oTq`D-b0?@;U5Gn&)& zn46N*M*X@T_J#EZy}B5~M)SE2b5ruUjrw(L8_nr@=qfpVhIaD3cBm`*Sj%=apX4ivN4!In$J&GZVftNW)UFOy8G4D#;TZwzuIp1+t<9ao& zuXmo1XDGGqQU%Wo#+kkr;aIeVuzC07fd$miz=RD<) zH{t#!++Xhe&}EwR$?D$GQrs&=8!Mb2xzdj>dAN5_<$0nE_sUSN*m>5y3D-B_y2SZ$ zoa;49miJyjUyS_p#VYnmQ* z+^>3EWq&O2yd3w+k#Dv0GZ`B|arStW=L-3GAo;^FM_$F_rYO%DD&96ld9Lss6^~(CoHNvC1DPjY7xE}RACL09q2hB>l=FkWIbK(s&rMOz55?W4sGJ{WeaNS{ zdpyedq0a&;&Nf9kPxxNH$q%!iZS~9SM_c-;`F6T~raq<~{5? z!37swSmV5e_!RQ|@FD)@{DYX!5!{R5-VM$#rEfU8=HcG_9eUh5T#0*?;MZE`Ww#2~ zRmiu_c{T1E>eeXVz;^InPmS^)O;P$Nwu7|zZ`&*0Hbv>5v`?sqj@l z2cIE*f~xC~;&fA#zDo6}DN0|ZeL_9_n)C_hsgL#rb?|lc1?)d`KV<4;wyk|ZJ@lh5 zIN$zjA5f3>qz^btd)fyarN4&{*a=(gge}%Pue-Z(y&KmXoHye>fc>Bk*n@j}V84yd zuibsP-iLgfoZrfP^0S5YA>SRicL(m>==|Qj3)gqyy43lD{7jRdRjdzldq3{&M}3=} zKe-2ReE|8&oTiq~KGuhP)woxUdt03UaBFZ~gX^1||7`i}V||$0_v7CEsPAUyuhK^; zE;dE!+w`*>$Cz<_c%Dx2@pzQ}j=n(0`cd3$iqhAqv1y8u>uPM8qU5$3i{nvpS&cOBQ%_T;f3+U;F&_s#=68Nb{^omt&mcSp+A1ze z2P^BHoSMumGoPAvOU0dAZ{6oec=)!t=UKCs}RC%7b z4tcI`w8b{(f8udbc<#LLX*+PM^G_KUh5EFPp)~(1c7UAc*! zUYw#!ymuWDKRn-?9Uo6J}hLg(%DOs+=E; zi^4wD`Q`>>3?;8^o(t9bwQk$gi)Vf1SVMOi7sYGa)X)4)b)Iq8Ix#j1+t%?2tn+M} z=Y8e4!p<^=(iz$|+ha@=*`9aa_j-&&;SX$4?!4_5;ClHpwz$nXA>$Nid+8V`#V^_c z*J*YOk+1l9{P3f5O2$1=e3%>H8Q&t@TlrI4L>vomG!1xRHR2CcI5vL6wq=ZXws$%5 zt^S2ADjg9fPj0~Z^X*P*nCE)q_2FFa3go-t6Fhsw%W+zc^XTO3wMge!a9rqLOum>p zm~DPo?a_~Xli4+aO%6hkS8^TLY*V1=ZwsAYVLbvR4D!|4zXsV>p})C*F^8dwRNle6>{>=$AvDPN)T9HI}?+%^6^wQk$gi@rjRHFTH0!fTtH)cQ$ZF-OH)Gj*b`VB0ztfOVd2 z(^trG)xP2kZRmHLTbCjH9#)F>@I7(&U%x9hMVa8EfjWT6_vBe!uK>8Lv zraF$&P0;O5r&o9l^4;_|TioRYrEgJu+#k?!l(s^jyPdw_8<217f7@ce6ACkqlCF<_ zg+5aID*6@L`UY^3{)#?R`zP9P9XQ5$K_98-0r|55{2|}zA2rWe&nDEvF{D4#V@H2u za?Wi3U)?r+PqNykAIby28s(>WE>^0J@Q5CWVTH|B*z;1$v8^q*&gGPus!|UqWjlYd-OeWy!csyI7;Qv z%k}vK_c@v2^%xTz%oH_F7UFn&^?J`IF7j1g;17Jx$qsKozKGv1?ssyef6?R5=aUHX z?d;?aJmBQY=ZQVw3_qv#!S?hi%GNK*XB4)5$2)kIfxPQvVjLwsCiEk$b3b(CbIbww z9&)kTtG5RA@>wNbFP>Fc@BOHEBlN6n=a+Gm__?P251&_b{q!q0A^$eyr~lD=C-kA( z4{gPLjuE-fIavu_a(t`6F>+_;pYf~%x%O#M`5pw4xeqkY9eg~oMC&p2N zUFnnN(+7CZPeGOcFnLG2>NrZJsBibBei=uJJ|G@P3HkY51V8KOxAoM@hzetDH}H^<4V-}75HA^ol^{NiTF{r$Xh@HJb3 z4-E1PJ|Aft(SO~7^y;C0u?@0jxL@1~dH+a%JdTp$MQ0fY?p)$1=`lN(I7;!lTg6f0 z=Oy`0|4hEq|B|QlW4eC&G5R}>jpPb^5q%lw3de_Yh2ukxb38bX9FGq(j#6u`wxxG# zJwGZP>DxZ;I?~^jAW#Z?j7zKfY8Y6~LAK*S3BZg65;Jz9ohS3$k zR{+Q3=J1HHHppz-byohi<^#G=>h{fxBxA z9eM!w0M_S?7{jOw(vqQrK5vvv9rXD##!(7k975V&SNP=bZ>MXF7)BYmpP?~g81)10 zr!is}^#|^+F=7}E03M(*Vi*ks9;h*57!3j*1RUoBbda3X_JR(QlNv(@$w`f&gXE;f z&_QxiW9T3`sWEhroYWXPNKOLB`H(~%;(SP=4j7Y5K&Yzv|;wUwHag@fEFrJqu9lv2Y9ag^YvCd0;)wasDADZo=ShCQbOPt_RqoCZ8iW7u;#@N|t~&l$io zG=@EA0?*VK_H=+9V3SiOugNKs*W{GRYjVouH92MSnw&CuO-`A-CZ{y7O-?0C2a{9D z(m`|TNzYHI^JmRo9HmoU93?BP;waf+6-P;gRUD<%a6%j! z*}$_khCQzazFK40a}MwvjbYDgfUnUQ_M8hmS7X?79`HP1lT#+I$tjc90K7nB*mEK9LXBb1MZk+RhCLSpFV-0LTmrmAW7u;k@KTLo z&uf9N1vWWl@|v77c}-54ye6kiUXxQMugNKs*W{GRYjR5S+T>KSbTB!UEFC`Sag+k! zlo+k{u__SPhI{~E?P#2^%4^Vj+M~VAb&4UzX93}2!H4jmkag?}^)jU*T#!=!vR`W20 z8AplxSk1!~W*jB%V>ORZm~oW2kJUV~C2meKb|i4juJW9>~WHDl*q|ukCTj}L{2t)oMap& za{N4$SqwSk)5qrW|LcO-|niY;xKKHaQ&zHaQ&uHaT4Z zY;w91*yQwfV9jZLMmw20=rh{M)Ipv5|A6~g-noBritAjlM%kR4;(A}KRhXROdS9$p zn4IEzUu;yEoZ@<4+^8@)#r3|}tS~vn^}g7mFgeBbzPLG&Qwh9=%@cSHoWN_~1YQFt z@ESOQ*T4z92G+bbIh8COOim?B2hFL7w?3?pve}CVe#%wxz^!nVOt-@-9=Hgrc;KmF z84ujx6xX@p7G-mCitBxGtHR_I*Zbl&g~=(d_eEG?a*FGHQK2w7#r3|}t}r>p^}g7l zFgeBbzSxz>sRUlb<_Ww8PT)0g0DX#a$JqnXkTt>uz3QnffINQoWN_~1YQFt@ESOQ*T97Y6Fr01vJ*FKiP zsadq~=akLKDX#a$0}7KM4wZhC@o;WK{GM_%j?&{eH_bRoD(`7=l*->V z;wYirc+45($M;5ceD99G;kd6||L95}u8 z3~`i@CeJXadx>$Bp7Z#u@|-=6655S(8f9SX#5hXeyo{rSd*Ezh93_>%wou(`JB||O zri>wB)E|$dPm>Cgt;l>$h1+vjyt3|T@PI)r_az%zSj^uCaN z5d$Iw83GPCqgvugpgrczz`YFI>*5sc0PZz{c>I&JFv6PTb z|68c;*QX&p4e8w>yT53}Qi87SFfUn`8t0~H7pH$bZb}X2iHw_~>N}!r&=kGQ8R)8U zN1M*zc-)j)%w5U(HuSPQy@GA%75C|w&vFiU?V7roI_No23q9o=XrtY;jYr(aqg^>C z&eE=)6Ls`o@av;6uA^Y13}==*2G?U?*M83IcpQ})`X|Ik$-})oOuk#R~CXOBlO zcQRXIDRIn^SMj(hn&xD=inmSCZccVfEG5**SW1e=ur1=sofb=pbs>-9^YLg8C)eY1 zQS{}hj<1Rb?cP+UG9rH|6jcy;g@(kG}o4k=DIMd_Xy$w)`xrxac?2+jc{&o7vp*{ zu5+DrEuVd?4?JCpdrMK@NN0oeQHqOAQTjIe3&~^F>BUk~d^{edzoReEv6K`yo1*k} zYHXUK~zHM;~MI&D7J>>0hk} zeay!}51;i=hZ;iGj)p!dsf{PWi9)OJmA8HTEoVr-&#DckSMRhQ#<}yC z8oH%_v!a{zYD2f(*X_7&_jOf$b}g&wmUgIOS!h|sGV3=DKI_eIFWbFrC;qAmVv8#x zXlrpd(YUzV;+n-V(TFvN6c}wasIY#BI-bVgj~ZguGYu#f30Cym+b`Cy+RE}-!3yXc z3sqa4vF7U+s=##xf=Xlk_jE%c$`r+l_Qql{-?Z4Yy(oLCXxg-W5$jph_ZV$d7Gu=A&nHlTMjA>@ZGHWYl7UNBv zDMnUXF|pc;fz>+Z70*@cIAmtrF*Cjx<2~q@VjpGfDB9xVtRH#san?_B(2S{N#?T@+ zb*wBiMi%*~<1d+UmspREv1GoQ~boGn%l^S6%8Ltg4wPiAZ<+TlzwlRi%IX~^wQvi{`zCs}_z2U^DjqMx|9}+7{DJ=@e$5~FE9I~J0s8sY zafj&NPk1q-N<3694pOX25UVcj8BiCS@<=!py zi19lEFF)m(>QD9dD*0Wm{EZuYH@gVGry$#Wba`)C7w**|?i|zjy(rW8eW(5oh~LTT zeD7c{t94m^C)qlUW5%^%3f6{t?WWgzlc$%L>+!6A4&#aMtmN7Z()>=4-=_f|rr!J> zQI03lP?CdQezw8mNE)ncmeSp1_ZKC#*xz}TP{sdC`6NT%73ah;yTuVO!3635tRJfKB zSnc(Ac`Ppv%S+$k<>6;uzL$scO!esJH9nsO#@T^U&L%g2SagB0wixY{x;=5-Yl1Dt zIA!?0Z@>$|DRyA2bCcT(>A@>)k>_l6d*izAG+T^wwz*v6gs!p!h*vs0-7AnjaFH!0JGyNfQk*JY3`_+7;S);=!*2T;kMv+6PM$<`$${xy9lmf z*iU{Jk%n~klixjb!!`TK{ub3k%18}&F&wc8Kr*{f_Z%c+s|I{@jq z8*MSisg?DNLOs_w54!`AK5DZq<~oNYAIYN-;>VKnoG;`#InQy<0C&iF&K=Epa;HDI zL(X&VXwI{qfvAU^=RDG!XFY>a566P@NRI{U$wWOI3(hG$7OW==^>8dWr}S9py`E2k zuH@+_Wn499=@Z9*JM)|`x`Qz8V{W#E{CzN_=Yc!?9k9Vj&%4zY{2j17q>rz#1N=R! zAxIw|wgrE`YCO^>-EIf?`&F4p&);qf{$5o+(x={S2l%^GY-=Xk;yjs!x>?ukTi|OU z=iFuon0GGfrhlCeo-_YKFaKgM|5D^1iuxz*vSl1*@+lj9;#{H6)pLb*$N`@?S2!2- zT**Ot?u}}$a4zb(l8f{)sGDZS?~3HQuQa_Kgr{-=Rg+hF?laOk0GbtX{%Vk9`8~; zev$(^u5&i>O~1z$!y#wh=j8`KbAV?(fX`{j*-=~MLeBXDKA$1ye#y%Ro{s|NZ{LiD z!f#h%2jD_THp!icn>W8)0!_NuMU4AZbu9BPay&jLdoDYm2&AlFePH^th7jTZr z{NOHq0LO@Po@2y0&N1Lz&Ly=N_5g34J)mU`%irLY>ulW9Ygqo4uw3&ZO|N0ie0mLQ zrs*}TnRd>5Ju|Rxf!}$Rdp(eHZKiM?e}fiS?e)y?#uI7iUrp=ZkLB;ka*d|PlfSzw z_bt5qY=dht)`{}9lrlefE7$SRL+)F6`SZQ}g>Yy$IQ#0@8vJ_@>4Ru9y9)KtK92> zE%li3H%K`a{H;_yX8gTVrtx=7^_ZFY^q84xdd$o;bIkbrk!+j4SsCB!S>xFO_u_j! z=##W7#!l*2d)d^aq_cfu}bj9tiX zj8pqX`Tkf2-XH6TchA~-?@Xp=dGDd+dvcB^3q85XIw7BF@EuQ|UHz*1_3l^Qy*J+L z>W%leY&(+f_n-b=mzOTz-$LFDBX2+4!*51Zw!AmhuR5b2-j?c~5$RmnuPUP-^7ljC z9XdqPFTAj_Z)HYR2CgHx)_T}hk7HJEpY@=1;W0brq}i z^PRBYVqb2|Sm#N+YnP9{g~kkWVz{<0Kr8u)_({Brl^zO3o{Xq;tw*IbYS>xZ`3Kq= zHQK4Lj^bUi$MN?aytnqf-EQBTN^)K zy|*@D!ak;Z_fg+nR(>pB=F6|m-`5*s$amL5&_lnwHYqkq-k+2{Y2Sg4Pc)qCJA4Ua z!EwYpd>!x(--WRYEa`5iuCneXs*#(SV@EZTUFuj8}eL8Q?F{{;Th z`edcrIG>h=^J$g<#&@seW|}|f-jTnvv16ZL>IvNMi2Hl)0Dh}a>W}w8;=W}))41oy zxSoGJERO%Z)7vTS-S)VbYIQvp1*fg{I~$`Hf2MsqtNpQdFIrYRo>jQW#JSk}yY%I> z5gSCRA5Znj7wuzNX;-JZX$#upuLIV+oqw5zcTfe-tNQM?QXo?xeJJODuDuW=+ILJD z(V-*P8R98_+V3f!_NV=U@;QInpD17Sr}6t_@pFHg_kD62zfTsg`P2A)viOxh?YDTY z|LqU_*6$#H|BT=IJ>(yr_gnu4`KO=ytv^6E{lahk5%ND?@mqg_{LeT1*1tpk^*20| z&pnf_$L>SFr%c@0c;Cs`1u^s|hJMHJJNBa4#2M~_DHu2Op=0`Namt?=mS@D!_l{Tp zGu&~(+xR;XX5Txa@6G6QGx~fI@+9OtDEIDjP3@Y~-|loW{Y0l%(TC=*`_o>h{FXoM zP0GjpX}_lY9{UFAjZL#RHqGAHG-GUDMIT=Mrr&xE^7U`~t=A#n{I1`61M=66e(Oz0 zj?J$iIX1tB_rLw+?0t{@?bFlu<9H6R ztnxP={HY2#7fJNMXb^pf3;S>t&>=hcHg zyl8Cw`rCXa8kTkXJL#!>{z&mz?I`j43hgQJy9yUj;)Qsd62GJ1r^N3kbfE0yvo55( z6qS3vkng^m?<-sR!+5mc;&*2Hn%_tJ@Eu%fgZjP;kH63{`hS1_HwO63Q~%nxSMov+ zr+G5PlLx%}9Xwp_$+u3$Hu7?gcVBqvWghlmTui`K3fMjpQuuuDe&%A!hkrnvFg{;z zUmmXgzTWmFDZQAC_1{kyx!3;fv1f|EkoUdS&+We|bLi&PMa3nhn}@Dkzq+Vk`O0Cr z1)EAr%Qmhp*)WMpN=?hP`*Avsbt>=}_&a#}Vx|p87T2A*ea{jU{JstJDz65FijKwzU z@_PPi8S={O`?|cBj5w*B$1Uya`L1Qq%c#q1{k80k`+lRm_Agp$|Ke36`=GBW`{9rM z()GuA+nT@3jDPv^|CaxT;*#a#!>s3n=ink8c-Q#X3XZ@nmSrFKPtUVR7nO07N|s~9 zOFe!az4Y;3nl69bo2SRU@*TA!_VTxC9x-l|{@(4l(B)ebwRQO=UiqweNp&&&GL`d& zR_VIDmTTH5|Dad?;91I-;=Z<>nKi$wYN60eBu|-7kDuo6Hl*pX^Rh}&>y_uhF7uz( ZTc=gw;!NcSV{eH6cF6x*{%c+S{{zE__DKK$ literal 0 HcmV?d00001 From 86d97cbf4ec88d477ed0b910809ab61983ea4d2a Mon Sep 17 00:00:00 2001 From: ahmed-bsod Date: Fri, 26 Jun 2026 13:55:45 +0000 Subject: [PATCH 43/43] reformat --- op_tests/test_fmha_fwd_with_sink_asm.py | 6 ++---- op_tests/test_fmha_fwd_with_sink_varlen_asm.py | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/op_tests/test_fmha_fwd_with_sink_asm.py b/op_tests/test_fmha_fwd_with_sink_asm.py index 2ae3c894f1..5ba1e51644 100644 --- a/op_tests/test_fmha_fwd_with_sink_asm.py +++ b/op_tests/test_fmha_fwd_with_sink_asm.py @@ -348,7 +348,7 @@ def run_ref(q, k, v, *, is_causal: bool, sink: Optional[torch.Tensor] = None): # KV-length constraint (mask=0 only): the non-causal (mask=0) kernels only -# support sk (kv_seqlen) that is a multiple of 256. +# support sk (kv_seqlen) that is a multiple of 256. _CORRECTNESS_SHAPES = [ # ----- Small shapes (cheap, GQA-light) --------------------------- (64, 8, 1, 128, 2048, 1), # D64 aligned @@ -372,9 +372,7 @@ def run_ref(q, k, v, *, is_causal: bool, sink: Optional[torch.Tensor] = None): ] -@pytest.mark.parametrize( - "head_dim,hq,hk,sq,sk,batch,is_causal", _CORRECTNESS_CASES -) +@pytest.mark.parametrize("head_dim,hq,hk,sq,sk,batch,is_causal", _CORRECTNESS_CASES) def test_fmha_fwd_with_sink_asm_correctness(head_dim, hq, hk, sq, sk, batch, is_causal): device = "cuda" torch.manual_seed(0) diff --git a/op_tests/test_fmha_fwd_with_sink_varlen_asm.py b/op_tests/test_fmha_fwd_with_sink_varlen_asm.py index c681323e96..5b2892034d 100644 --- a/op_tests/test_fmha_fwd_with_sink_varlen_asm.py +++ b/op_tests/test_fmha_fwd_with_sink_varlen_asm.py @@ -20,7 +20,7 @@ KV-length constraint (mask=0 only): the non-causal (mask=0) kernels only -support per-sequence kv_seqlen that is a multiple of 256. +support per-sequence kv_seqlen that is a multiple of 256. """ from __future__ import annotations @@ -146,7 +146,7 @@ def make_varlen_packed( init pattern (mirrors the fixed-batch perf test's `_make_qkv_perf`): "randn" : standard normal (default; exercises real attention math). - "const0.25" : fill every element with 0.25 + "const0.25" : fill every element with 0.25 """ torch.manual_seed(seed) cu = torch.tensor(

Q_<1-m7<^eCuUlbvNYWSA=!0RL<|*zC`pTS1RW0J zxo&cPbN%D|&Pe@AO4!@Y^^JYS_`ts58pA$8u2j9o)QP_2;KZC|ykOt39Q9vi++ZKE z&)FwjYuFdal`46;rG8#p9I3k_Cg+;8h3k!sYe_CQH!*Uh>fGF{hif-!TU_i_Ql6-7 zTzA+up}FJw5?R?H)$K!AGQs-QgyEGq}0!A8_RI*uN)_?L3*6f7M&|q^qaIL zBeuVCoVXUz4sxaHyxn!8-=r-pKhZdGO=4dlSE^o5RjoZd`%Cyh4=MR}H zHHz;&qR3mqZ<3fRwHz6>~*8#5GgTB59_$z2H_E(X5XVEAwe@FfPrhf9GkH5fj$0&Zn6d}s~a+F-3&&a((b_B4Mv{Q9>6_-b9{ggnv-U{;DhF*!SF$I z(qQSEY zm_J%eCox7#HzB!FRYG#57A5j>rGDW$Ju$vL&3J$5I=z5<8GO`rih+v_&bm%-;NAv@ z!cHIHJ_d)wPG8`@21mk932=$Q=Y*Xrfv*J4W-3Lgxob9tBw=tC|KQvrRD++LsiQ#5Ce1i4Zhf?TP`f?TOiVn)EJ znOdrZUAv}f?TP`f?TOiV!tkT6Ot=cB_vmBQ6eu_DmXO>V?4=>ImYuE;A;%VcuoeM zY%s?2THtF9#&}Kvo?CF`l!5XB&+1ybky}gE5|Sfae&D@tg}h*I6PR6fUQ=pmT7Qu!RKp_gFhO67B`hTejiE0xc&8u|)m zu2eq9YPeD`bEWb*RzrWm%$3ULSPcV@=f*6g59YZ=A$>5KPI14l?hs5)alfxt3nr(y-&c1ECa1XHS9j-gDv#F~^E_Sy=kXdikJrF? zyavwWHES8`|3M_ z$tmvl)uV#RDem{xV}i*k?)TLbg2^fF_tlev$tmvl)l>PL%HuW0JdfAFdAtVB<27&| zuYvP;4V=eoV8d&hQ-$)u=2W44Fr3N+aq6s|V_BS<#4-M^j5#^Q{l0oeFgeBjzIs+L zImP|HN(m;XxZhXL2_~nw-&fxkOippXuU-&LPI14lUd-oI9R5W2v9zb1IM581p<{1LyG?IFHxBdAtVB z<27&|uYnD(ZB7-+2b)uc@&T!ukf#CPT?A6+40#QC1|0duFd7STrM~uj2j^AE;j|9< z;i5y=s{0{3drvEt;~n1pF*#r3qrB(!KUd|D%qOSwh`fe8cA|G+Bg&ayZWAQ)-o>R} zEhX$6+=B8+&kTut2a-8WKgWA!)vqD1o#-9n^*fCGB*N=oG4-#kOFT!%v zKS)2WnEGLae)OLIiLP(@lJCLg@-w}JoCeIdA@-WIkL7G%OwKoHpUL-;e(8B_hk7S! zUtH?fP@brLCjUp}OK(Wt5z%kbzEY{*qPJ5hTwQvWN*%j5+~qknb&C(&=xo;tDLq&=*!bL^mBO|DdOVMN&_Mdbd~kF_bX z^Wl;A*FINLMBdN-(Ey#JPmeeH)8tAe7dnm=?suF0Sw!bb#mhy^mq#Ac?6X@w(SOcS#Z7V`qD+(H`oP_pSR2?>k=o zUW9z_HdFZCcgZI@m#NNm1pk>+g!TvDDfUM`AoeFbDfXwncuA4TLBjE(KJB+1`;NBT zW6_5?UJTv%nT`+M3oW$186V~tIhFsffS+bOsDCQ|O>PqMkU2+A<-f^MLjQHHs8jfF za+Oq~|CxW}RQ{WMB~|Eu>YvJg)Ay(HTdx`L`BeRF`r7m%W0=Vin0d45np5*8)|^A~ z4GOu)eAM)X`KMWqn7=b7=OtI_n^tG?y=xV}bKum8&$D zFO_3*0z3Q$g7J+UIjwxD@TajarXRk_2h$&CoG~+uO+e@Rr zbgUHJTImz`;q~j7h5%yjoV}5YZ|ei^-D!JuRSb7y}b6A_*#iIK*v|9*Q7m3sr!}VVd8BS z)Q?+R~nF8^L7{e#{xoErwcuHF=#e^2IEeX5n$N9PFR z9IWE{4PBXI^{Ligi7#^s{P+6h^6yo{N9xJkI-F|j4e+J?^(oF-lYftW$~Y|jQJ>|*K+pEw_BD4;OaGu}`_7nc`R9V(`M#l@Mg2*Wf~6d3Ga9Ot@w zGbCT?$H0KxJQa?t%R5@2Y4(G<+3D>r{H)Bj!V3Sxp-ZRexTkooSO!FS9**6891H+y?)*;xpQkJn(*DCK&P@}q*{6)d(jWCH&P{25eTs9@#B25;^`t-QQ=F61{=+Hu zr-|3>L&jm-ciUGJui1yxvwgRHHSwDMF}~Wq+rFB3P5-E8`)>Pc;83XL)v*D$yt;C(ar-$J15HfMms0W z^M>*h<#nU}iE`#K55+gpZ}R$^=(piG`DOCu+4=F-q1@!bv-93DcbCa;XXmv$Sw1q_ zI$8cPa`|NW#!i)Aj6T`<#K>WjOV7@o$2?LdN1mM%&(3{k=e)CXUXizU{;tNkt{UgI zqQ0GX>{L0dIKRFsuNddXA6uI^Z~oZYWagBeKdf=Su*P}88s`Dy`rummz8dHGI$4gT zljT(UC#UNH^=>%I+&XPO2uGQBrp@1kqs%wc=I_GMQ{_xLS&k$-$5`W>VvTc$HO>{L zd2i(eYn%)0M0tBomZ#@bd3jEiho^D=oyNI$I2ZpXwU_hse^Pt-`>ca}XFD7xO^M$D za=HIvf_KN|d@h$?so-i#=<&#!a2+|q(~X5@S>H(Q7xm_XpnhK!0Q$NVG5a-cwBQfw-K{z0XuC z_z)$3N74O(WBz86goJOocUjVc_h;mM?%k9+!L^joKdAdd%BmeH%e~(d6TCkz=X3At zlnSn;qz&C4Qg+sUIM#3Yd_PJ*BYo}p4*FT>Yj1S;T>r-Q+Ya`-So%5Z33ToEnP`Xa zBhe1uC!!s`7c?8sx%hm5?^I|zF9p{Obop*WC7vNsCmrZi20DCCh4%RV$t!qLEw@zgAr2dX*a;Z-xsD!(0$+mCS75O0moJ$Y=b8y)K1<+n$9Y_zL}dH47oaNOx?S6$_;_4(|e z^EGaCxVO%ap}e!_su7;=cf@g*Yh4xhl0Nq{U6Esaq_^Jhgz~P_T{X%p*Y>+3$M|S( zlYbe?yU%vj7;m$-*%Nt{$9fOx-!8a6XxYn+^7oaN&}-SlRs22WVjN%6+g1EMg?sH* z{oN>kKe+_ut*&$xe?Pet$883>iocg|ugNy^_mWm9XPf!^NNXIk&Fl+aD~6om?29%i zXPemt0s9B+D<>% zxyF0i?~3w%x4CMvw?o?*06W)u-}Aene837OJpwNBN-DuA1iU)cQl9 zKi&INzX!^P-0dp<+sC}`<^1Bj;2dF}cLBdRM>yBa93j8Df?u2?oNH!|kYC-wFU}Fp zJ2OXUrzh-ij&RT z--hFir^CQ6{@d6+F&@M2a~1y$>|rP$Ugk#mZ(sL9`S66R_-_>tNBO7+-6;QU;$oD? zH@J%bHgO!~;~sOP{I`f1$0vfDv^@#7X>0Pmh}n=+?sucqod(;C(=#v^s6Q*vzb??9 z3;o`(Kk5Nj^?@AoFk&#|xJTXmf9oNzOJB{u?LhAJ1NS(`IET#~>xc3IcgP&$95!=o z0Lq8J4(Ax>u$g0nP)@s?W9029@Rqj6z&34-TZ-5YIsSI6YmgIRoAYE6<_YyD2l`V2 z{b|r=EZ|(_+-F~NuCs4hFZV1quuEV0Z#-~r{IRu(^X8AOO=eEq7?S^P<`78!`{W2Ua^6wY175YAo34E77>Hv2{EVg7KA zvtKyJ*)Qxr_6z%u{c^J0_b1DB-#AD5@9SCY|CIJ}p7L4j|CILX=K@afoCH!oClOp3 z6TCYvxQY_zUo90}O$jW|VsWgWlR#pRz9TI-O$od+Be;?hcz2!PDoS8^P9kL$j+E_) z2~JbeMqF?uC2f=nuA-z3j$Km9DkxEQ#(wDj=o~yJfkgZE#{}0<0w0JAuB8M%SSt7s zC9ph;#j$=)0tr9%qy=Xvf%j(w*H8ij z17SG7=s7h$h%u0uBRgY)D=C3@#|2kW0>4@+xSA4J zVhoOTjDZBdcccZUDS>xp1Xof5@2(SEMF}i1M#?H2DccbfoTj9WxZp}k+9(xVMM)ca zPDxosQp(QQ58WTjf*1pd_U(@euAu}z5Eooa34E|r@F7ZIi7`0VF$NO8?MVyHPy+AI z2(F<7K2RsPmJ(QEjFeS7Qnn{1I73Mralti|v{5R!mXbDfe@Iz%Qp(QQkGvQ&Cx|g~ zf*3O=h%s}H&!LJrL2NjIu9-s>!Td@GbFng*i&dD5W)4*Zb1@z0RtCCN&^2=?9q3dB zI#q#AHFV4zI+-0ahfbzr=FrJ>axrFU5M!1GF=lBHW0oH8pX#MSj5&d>>7VML?=nF@ z*984si+(o!QyuhkCeW=3bZen&`X>|U)C4-UfzBc5nEpAL9n(K2(=q*XG9A4a!Ly*T z=PN%sw!F-pjT8!aH#VzO^JCe>weAJV4?8{ISJ+J>!`l1qIj-ZLqb4GY;QOVztmpgg z!;Vv0bl#*g)okL}^R}-WdtT9G+HlWRZseTv+%i?9%R?*!4nsyF8Q4wkE1goh{oHA4 zd$XyL?JA;@YS^%3@~W%WyJ@vuO~GI3@v_Ae)b?`~bSH>zI9s$miFbV6bYy!`q$pWb zUbMYPL4Wx0WOBra^`>pE)An7bL&$m7`PRE`HsmzNi^S!QDshSNZ+2uE*o$yZbput|5 zvjg>>#ozb#W}OtS?cjZ7%LkkrhI3bCuX;F}&4xN;JD``HV^?+PuqElFQSUkYJ&(Wd z<8LSaUcld<;_t=19kU%#$8kDkJE2ZC3%mt&L&#gOrE#B)D%w<5lrDM@z4s9HS?{v! zWx8I`R#mP#b=m^CQC)V~7M2I+VXt|pbGEbA>zwYqWeR*xqAwKuh-4#LuS>RzKHnwM zWy`ka>Ahu6C3IfG-!8QI2Tj{-`Ug70p)(>o0(P>Y9@!qS;~X2&qX*i)8}(nt-z)h0 zA^xiH_aprM7=J(6+bi2kU%NP4tgj6WUzLM@Yx<;Zr#{=(JKI~=(`_#F1{IWg>S|k1sv&9 zM4q(@$NWdYujUu8^5j*d3h(dIhZTu1u~b9A)1jyCEziN`y6jKLYivtKeei#Yu?gDJ%G#~Dl` zl8)yPNyqbuq~ird(vfr~9Z6@>k#r^+_pB74bc}|M&h>{{5OonyUt*QyfSx>Yoq+oe{*WR@nj_EqL@F4f`CeyK{ zjbfGxjlACDhM|}H+%3UwEY(0Qt{;oH0994%+aG}{H1u%80^Y8ov8smZx|%1ec9OH> z)YLbjvBy7o@1wcf;g6CTFsi0kS@lDU6sP4`hbqq*h1*|4DHx4{|=MYnBGGWei(Rwq~Z{ z7&g-RteVqwJ(U^2xDgu~P}8Xl?Xgr=ojst6rjbE1wqt2A#3aVul8f-~Ponx_*WUiF zUbTCFf9E~YpF(-9Nl6SZWt?xOP|3p zG0xiD#2gQc>!;9<4=-H%*gYqLpX2--=f7XkUWJQml)A^yn<;_dS9Z}#{z1<7m6&ef z7;pbjMO^R&rv8Ed#BMj^$X)n(tS_CrvfLA#|0<7BKczgOpXk4DU`%MoQyuWOyY1t{ Y>J}5nErM@j;qUmLaXvo8Zm~-KU(EH`EdT%j literal 0 HcmV?d00001 diff --git a/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_cas_rxy.co b/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_cas_rxy.co new file mode 100755 index 0000000000000000000000000000000000000000..a166a10b007ffb7064ac560b79cf889d851e48bd GIT binary patch literal 68776 zcmeHw3w%`7wfC7bXChi#fm)3)z<`K6LkQt1;_wtWh=@GIN0R{IAx}dry}UDgyKU|9hQ1>`X!kc-#DJ z<@XEgti9J>XRWo@Ui-{Bd+l8|Y5HV45QsXK{0mwQR=~d|i2B!69i((>h-D+K4*0*L z^%*M*^=zx@GGWqZreO!r@*j)!Jyk)*f6udcie1(euJPH#&hsz)``JL-(oVP6`px=X=_2aa*87PxmUSK4@qTCekM~mcq`>@_ z0S}!o#TshcpM&CB)I|WtT{H3OnR6`b&Zmo3E?T;_C}(+b$*STNYwy4BiJYR6C2QZX zdTxC>r)Xj6x}p^YMGF@euU)&Rq_}8N!R;l>N{dmmXi*8u3f8VET3GzeB`a1hC|dDE z&Z?r7#ZRm$Eh)J6TRE#2FJ4<*YCVy&cG>a;CI&%ucOZa7TgaVqI8z$C~1AEE}5VGx_#y1#63MD_Bvys-Q^o zDLe~4Pp>U4S+=OSK-i^OtvL%;Po-IvE;%XG!Pr@_`c9hFQVx=|SO<5U1+(fjvjs(^ z3zr()5@*5fi6yrc87!*Lf`zvEa@B<=l}nxlFKwyi1sRsAKMQ8sQp*c6E!A)q+@4sy zrqr<1k+Wg3Vr}tB7B2*>v*59J`TFKV<%K}UvtaeqZN8ZoEn8`d6*&uDd)G^kWw6RU z8&uo&`7Uoa-~4^M$~gvtXw+UEOpL zz7U9=1v9Pb>WoqNLSV;PaMPNuZmwzKOxT^>qd*nswd<}gzD-XnF9fRPuK(*#9qLwY zlUpTEdn86ld6z83=Ze|;-`TU%*n(V~-vYB;U*XV)j{ZZuSryB+x}-A#4N z^;2i0pVVHkt<&D>*_ARq3tY%2|8I$_1-eymsZ;-z{hPqMDp_tClTZ zU9wVXQ*ct9!1r_3tu0<#u;>oxv}|F)+R~zh%b&+nUs?nx{GFWJi`Ew}E?Eu3(ms)A zb50s%P04Eho>*N{uyXaH;=oh>FOX6gySVg6z?LjrH3)!z_-Qg|(2~U)hU5*))jw6% z7niJEwtCeA0sXs2e(p5;pTDy@2l1Q|Fq|8-#*e&9J)Z&mir zeZLy?{r7v`8$>bx^gmMswzbaQ=pUge`b!JcDpbFwt&Un~{kyt_Hb*Y7tgEc^lNF%p zpnpKNK=*MfIyV}t4x|G2rc#i4u*J6r?^}bcci;x^TRGM{5x^K=E+OipfYG4!&P?JR zt#=BEce37D188-&-ifgs_zu7%V1hX4s|it`WI13xp!Henod!biIRY33gk!y*1ndYH zy9D1FiwIF2wy<54eN%fC=K{4_KPAAG`!{%Z_F34_jZ~AIOS5tNiim zirTofwbqU~)c1S^JNhuf8rA!f-*WqP7Fd9qaDe zbFX%Gc94cn^lb;`p4I8#O-`ZI_%zC6YIHhoBpBMA?|*S!QJ~eB|`@~ER98J8(yE!>xf);_xjoPE zSe9~}jwe!O*V-32UJ$l+92@;V=vDh1TkMSM4yp|O&%8gZPKRlqIdA8t&zzSv zgFNhW9XoW+d3L#zrOILb{WkC*U?_Aq_{R31JKfni(e?dk9Be%a{J zvDg(?Y^Hx-4FA^hrY2Nuvk(_;^u1Lcz-%)05c@l3pXagv9QwysSr7QIWwB4PtP{1P zqZj`fUBaf;NdKDs`yA?G*1zHJP;Fw`oSlhl=I%^Qo43=#AWmI#gMv`x-c)Q!rIl-2 zsU=-wR!8_;xwFMdg?3uKz!N@>>B-^AayYI~-7s&UsV)e9b;N&(-Fwt-%;UfVBCU8?E;Rax&;Z$;BP z(R(|`(lqc1r9z6PPpXfq>l5m;{h`i@+DIyb@~ch&qHQ!Fc#4p`vdXP0-2X}Z?SZ`C zR`PaL_+FGfg}ses^kArAjxyoy=BguHLmvae#+ zzvKGLwO8z^xhiGtsO^|Hdwt5fzqay6Z=6V34qW5 zg0p}Ag!z^8Q_gzVDo31qE+*F>;QFKuUa!3Xe*bCP@h-sF z4bI8(Qudg#RH`f$vvWQ2joA^8v5znb9)!BR9jf>2&`Vx0^t@-!aa8%&LQY3JG-eRn zel0k#jBN?omQc5ijZT)uMjyam9sUmDuO5Gg@OK!0Z{V*1f4{|FBmVw~za#j22Y>J5 z?^v0VZFDuRpTS=Z{`TST40%Gf*hl1{Z1NF#Xq#xL=PS-rk0DlZ4no*KER{Lyv2#Yd z)c;}3PmiGv55{VKO5=z%>~G=^vmUte9mGQWIh6l=blC)(uz$Kgn%w(>fSM!k%*x8; z{J6`PAp%x+!Z#_O9R!WzNkq z{(d zmsyFn-Q#a8SHSxD{*yM4_-a|le)*VXB_aZMM+H_D3fvnLSe+2~Oj2M?gTQ@r;NKRm zF%4Hn1n!CoOcV;-9TQlU5V$ufu)0CuGkfNIbRMZaADxGl0*_wG9?6Kn15tr>g#r)8 z1lA`69!d&4+#v7`We;4#9yJkx`=SDqg#r)61lA=49!v_XZxDD$*+Z_+nuoH-N918y zN%ZZ%?5ADA)`EkHPIi|7`nxx-D{);F*t-8hmIv%EPgV&{C9bU}<+`e8Nm$b-dOi@= z^i`c^g*APmQ%TtI>8k@J;gBQzs{#*%_lAUjbzoNbsl7f=&^S-Ybq(5T`sz+6>1)7W z(^rFL7U(G_WxjrvFDI^#UhkJf<`0kh<+z^pp|4lZSz%3|fG(Oo0sfkv^3V0@t03pR zxhLg3F;DogJayt^`CQnO@>GM5rl*{mp5>aJ<+T4m&?kNS=i|D@w?F$L>D!+)HNO3M zUDxw_%KoHJ_Iyd%pY(OG740uPzo+ca>-xa=ls!pb7kEk8pY-+U3))lh1P$%a>%*N+ z(${x7Nq-ppHGMs3Xn*pd%(N%l@|yNk<-Yw@xo>}#C*d=iJ_%hkeG>d>f6^zxpY|tx z9pt1vSzZS@X-{2Fd+Ku9pXK%7qvD|_O=5HvI-O?O0(Vt@k?v4tqDipXkCa^jo@R_8*ng)R~$KYDcF#>l*1ttmw?v4qp zN(kJW6jj#=!_F#-=n1=bY`JQx#LpAdK`De!QEK$&B3 zt>zek`=SDqg#r)61lA=49!v_XZxASRj9k}51fDSuWsi@@L(eZ`Fdp^%G7;B|)toOF zi}d_L8a==8x~k{fD!!3E(X(?mO<#pr#P}vW-&XOB*VTcyReU3TRiJY>&M)L&jabKc zrFepd@r~CtoleqMcRER51OA%68Z?Y=x ze5O5VD?PuEM$a$2uIriCjq^L{lRalC`;)#7wxa!oXI?kX@4T)L*MpCyr<|Ie<(i)5nZITF4&1b9 zZvQ=Z4TufcoRfN5{g%P|mTg`6TV@L9mnWP3mT9%kj;3w8WOpf_eZ%bXw*1aAf6KIb zkH4{8_;t@;N!jn0v25F< z=4HRw*1XL6#n%0fHSSO-5U|2l;N|eSoh^mqcXc=5-87E%kllfI*Z4UBs|WBN8b3E+ zMSw>%{;7b~6L?RJe>#BIjDeSXX&k@pF9m)naLUhn9)}DUqU^#CKC5In4jDcN{Bs(I z3>N{vNaK*)^o4y>)+Ginn~gFVi^I#r6T-N8^_VtjmF4uJL{Wt1s}r8Xpj_`T_5!@j(HrKk)t< z9~`g-03QH6?GKPa`K0bI$e?^uC ztqc74@&UePpnnJI{{GO$=hMK;gEW5J#%I*P%Q+fP+4#&FczLkK13@bnc&^5SL2C%` zAsP<_tvuj)8b2p!4Fx_F_&6P3AcN`C$0391)5jr$>C?v{gXz=9A%p4D$0391)5jr$ z>C?v{gZ62DBU59$9cAFRw)U1G_~WhnF-Lhbm5&_tu2b-sH%q}2-t`JvWwRBu%jPKP zl+9Hzt85;i6+G!v`RL<(-RJ1fVZeuJ9Q`>Q_;8J*KSux`p>g!*NZ=zij{Y14e3Zt~ zpQC|~);Rj}3gA}&H+{c;MqTj{ckgd;)OOr%b=5PnmvA zpECWLK4tnfeaiG}`jqL{^eNM?=~LRTO`mEn8BCvQFB!B?;im3 z_!Nz!Kc@nps&Vw^G~m-Tj{bCkyTDDKGX0u9W%@OJ%JggclDbug%Q>I_jr%b=5 zPien4eX6}=Fny}MWY9j9R6cUldrrY)-t!8c@P4eIRdzr@yX+?lI%WT&U{={r0ZpHp zjy|5Q`yBmw4e)C;j{ckhe1^u+pVtDvR^#Z;nZRdi9Q}D6@ar^={+tDTmd4Sa*8{&E zxam`-U(=^dzot)_eodb;{hB^y`ZayZ^lSQ*>DTlr?boJHwU-R0Pqmi}+Nb!Dm~rYw z1&?_zDR{#BHwCS-LkikuFDvMjy`o@N*{gu2Pt8Uj&(?j8{+t7Rj>gfSbAiv*IQnxQ z@Oc_Xf8GH6292XX=L4UwarEbnz;Dzz`tv5>Hvu<&%JggclDbug%Q>I_jr%b=5 zPnmvApECWLKBfKI^r`le!St#2k^%3J)Vn9bO;-O`!dGl`z6jXceH4pp`oA*prXM*K zfceW`ceVhI`NC0WE8xU?yM(p_PXFrY(3b#bfBVJ|+xHmvc8KjSzwW&d+Yc@}8e;nq zw{^~9`>V_Ct!L{ixfe{kSdCe(EFA z{<j0_idO+&RdsA1+ zPF*QGb*1domAt4c+h087r4ZZq823tu?JvLXb*1~@B9VQ>Z6f zw!dVUwC{PfwC^`Z+7DSG?MJPX_T%o7_ER60_Sbz&+IOAwlDhAJ;@6d}uh{aQva$O_ z=PZ72|9jVw-)jEebyVXL@80Q`n?5|M`nlp$U9+}#JwEVc+0CzP>Nyc>M?1XyHB1I^ zMY}QW=cRvmUHK{7SA6PE18FlUyZMz;v^$D+JJIeDReodIch0n(?vFEVmk<5ogRdG? zo4u&C7uP?6q z;<~###_Nyk{-EvQj`arOdLYUpZq&=cbq=n3y5qfET<7AtmpjqR!*w3$FLft-`MAzU zd2e@$Hyqc)aebLP%^Qj9k+|;TPWMLRdNk-ScV~EGa6Ja)echQJ?RX*Z3tw!EUj%p& zU^ZYjU>CqHFCK}DevR=9-M*ga*cktu+usvC(as&{iLOWD7rQy0=nFn>uCH%n{1P|M z*B5-zucGgfcvpA0uWw_V_7Qy>dnRVTwD)xZ}o1#^$oZl?ym4|#PyA!AK|W2zAxjYG0yQNab!iJpyd+C}tijMFZn=aD$=B6>E)X&2G6F;2UP zo{e$ZMf5xpr(NjtA4j?A>mNtCGrxg!ej&z}(`rwC3t=f6bM^-BE#6g#!1+ z1Xd>mK9dw!(;!gRT;Uq;?J6SzcSQvz3I*uW^PoRlACU+1 zi?TEF=l&Xbdsg7}AKrBHP31SvclHnK`SljoTFdXoj#yKBGuDT2y^~%a$h8##^yMOy zbM26YwKvutStn*&{SJn&w8JCaHC{x@4nzgkg{_sZhOe-lQErJ>h4x%q!}U@Y*54Eg z&B1P<>3)^%jCM=Cn3UBg1Riq0J2J@*U*WFzo_F#>YDYGSQY9a#nnKC=@Nw&p$=%kQw06f+qIpmT+iF<`!d#YcjMY83p)2ieV=P>C+fyLwHE5ItZU-<=o_-bxyMT`4i{o>a&o6=XbaNmVeZAKQK{Y#>Cn!T5yeNKVxOmTO3F)6D~2z-XVoeSTd>OP?2IOWo7HIqV9(+BNy7uwD=_aU!A z%J$K>^WfXAyVGME&3?z;V&2G*; zdd$Y8tSTXJFMXpge4FEzahy6MH$mdK&{T8o(c`y4%ATQb^oOr=95aqfAAC>s3D=qu zM=pbk&6c!jvmT7o)C2hsDoz(l`!?&rI88l}Gtq4iVpg(^dOU@8%Kng>@%c>UW_)IQ zadb-|Mz>Y&y=X^$k@F$(9Xel*KFeETJJa3DCh?s{{E!2aj`g|le~xv|Z+c8~E>iqBh9ED;S04CrY;%6oeWK+{HDC~Ku4a)=SjzYEq@bvO@4mt*Ycb4YF?(?oD)o0IX`T|J=Jsm zpXFJezBvZIImg}OMWpONRAAje^wF3v*v?#cugBj46=Bv>$(u9iH!g&ufsfH*(M}dZ+E&=su(7 z4$7>aQxHon(C~Sl{g@A*hudU1@H@Z~nl{V* zB>YPH>g{fyML%*LADHIEbuzc%Z^d4zf(&!jt__uHxm=MUBi@J`t%I+Eq!z57ws_H&Vr9~zL<^qg3t7-FCY`2=^3Zz`uuJHKeoRS?FYjj z#@~gw?(LN6a(qn0_@JLpUWVTWfKyi5Va8g;+n)=_YZ}G@*L9?p@B5uEop);TwjKk|pv+(5oZeSRl=IXLb*2j4H_{GfPq zj9`8+^~%(PzXcRu*qpxwGWE*TgL4IWBS%Ti6`6Wv>cKgK{E^3`=8R0eGWFoxk>-!M zO#5oRI4798Xug>`W$M7WgFKNBrRI)Io%DOPOkFsa@E*vWQgcbBZkf8AFz*A|Gj%g{ z(R?|VsJ#l%c02ARzcG&dau;&-y9Uqm^3eZN9<&|HI`Yd)ENgcJ5Oq~UQ9kVv-^VdF zIFCGp_I&=~ZxPages4bLrhgUhLI7vP@ta!OR}-TCnc*m(`3?Mz1)TLQ+u^$qu z5}~hAV+mt+BDRpOdcGl!9J%g%r^I$G=@u1!RfH|+#(9UjA-_(|JB32uOgEOdq#NfS z>V}-W?tIS_llDz?V;S0>vY$8yseXbkoP!cVf5!d9xrls_2iTqOoEk)yX1cJvC0#ft z(H6)ZRCCf5=)W6(Xgk4>lhuIl*Ik|qhTjBqeiBr367@!IVRz0+zh(W;g5m!pd?6Ul z4&`Pw^7_S~nvYapK`zcmzbB8EgW*52{P%J_&vfu1;g(X{w1j9Bl1`Dn2(NWI@9q`x>g5fuaKO0nYlH%{rNxx?KKL^7d-|eaF(%HAmG1!Ik5&0rlvODJ^ z+NHB^m(H*YZPFPwp$(3~2Aq$`8~K#o`Cf`P>FnF2vu~5munBE&3^vemBj>U^f3wgg zoqd~hhE4b`OY2CTI4>#wus!FcQ|YL6;=Dxpk-OQQ@2XnTQR~DxiM)~9*`4pM*!L~z zsCD9;l;(}N&VFx6N39d*sWgB1Ci|T_vfR{B>%{qq{E;u(o$s|W`%&vioj50vH}Xll z^W7Hx+w4cJqt=OYQkpkp&+JF7qt;3Drcayw$9YQYr}g<;=49TC^^rI0^^(Ykyao6z z8b?0l=YfA-r~RR4z*0V``wKECpVT;HP(G<~$e?^u0H)e5J;}Xj`j*uhKYH)vX4;TI1!mwFdYajo)cow*kLR<6CU21bB(Y@3O76 zz}Es#ulI%wrcWP-45m*XhYY4qABPO4PalU2rcWP-45m*XhYY4qABPOur_<}b+fjz} zdhd3WL2?K-_&J#y{hZ8Aeom(6=VZn_$;sS|cLvNgsPH5wbF0@RClh*ot!4ydC)M8b^O_0KP%v=+8TV-=T5z=SJWgHIDxL0`M;YH+{VGw20W&5 z^k+Hna*d-uHv`|SarEb%!0!ZZ`jqL{^eNM?=~Jd()2B?orcarEO`kITnm%RvHGN9^ zwdqssC4=cx?InZuDapy)=;vf^@^dmhKPNNhNlxZw5AXF=oRXZ(tzMIyO!!m<`nW>( zIr?)8@GTlge{Kc7RpaQ-yMW)NarEci!0*;L`tu&(_h=mbxefR>jiW#B1%5AZ)2B?o zrcarEO`kITnm%RvHGRtTYxNUy9gimcpA8*%vj{f`-@Gof`{dpho`!tUJ{4($_YaIQ# z1NaV&qd)Hle!s@ipAP_kK;!7o2Z28b-1I5aujx~!U(=^dzot)_eodb;{hB^y`ZayZ z^lSQ*_G{Cp+Diu0r`k&f?NgGIxzW$b+~ntEdVWr3%#)nV&ECs?oRXZ(tzMIyO!(A8 z=;McUpQAq?2L7DTlr)351M+OJKYYA+c~pK31|^jhc!yaP*& z+T260#|lR_ZE8eW)u_jNSiRLZlH?fVZr%J(8$(wDF8cYSA(rEt$AO5{*NsK_ieLUH zG*RVvy&gJ(_SK`F@4eF@)^sMjkF%hCQ2kj2u{hPNPw%4}gd&4a9 zpnm-}_t=w#Z}!5knSLy%ei5l}re7A`pHi+wzsD6H(zT>tRNB`Owxl2VP`;{u&nrHK zLf=e3mbauI+q18$`@PvC1$|=DzL`EOr#=a(Z>A65<*I(~x1a~#<&v%?eUj4tl>J3M zs^4K>^-fsmo9WXa{G0U`+pGSDUAecq1zG3|Wr2&8@88v_YslPhoqqr2oRSbUNzZ#NeEDsMeAm8v)M5KOKX{_XQ`P5uu+ujSwK!Rh5Ey_TQ!r<0%bTK<&Jzcu+k zfIn(~BK_&?uVrVywq|!N^XcT(vT7L^!??za?UPfR&pCBdG3QK?Z}#*>=EhSN=Ek!e zF@JYN>Z{1Jek$e|Dc?K&tD6Q!rLLL~w9nwI1W)sMAM}T(Vonj78rJa|bo+RHrFHWw zW00?`=a8DI{u}}t%psN3l{x#2*_11Wa>STj38_!$IfHtsIYa7qPrP@a|Cr)bA-@VlcquFl^(+&Uq0pY#}sv^J@;Tv(7d#4mhh+~q>PNSw0RBYdCnUV zsZUTgUV~5YC~G34o)_xy44~q=(4N+YS?~?^Q$6N> zwo3@_)7ntS{~FBuDsKC-oyvwf&L{cYgBWY(wvYM3#w^BR`feTco`ZTbhdqD!jal<_ zA3#5T_sp!X^5Z~~@*wUttM|u19q!M0qBm%mqi)U(jQ>kwPWjWlk^7Cj^(w?4%2W@T zxc~W5wn6^7-v6As{(dR*#B<83elO$xFyv!g?_z!SmH3MtdOz*Y5402GS!a|#@?zuV zjB{#CQXaD$dTKkQ%yK`j)RRBTGxOs=Fz#a^?wl%Lo^fe*hIGqx$cA^WNLkWR~!DT}tlsoLuMo9CTye4gse799r@^WlRlK3E!7WYSF{O}EJ zH?BWQKZ_&YtC%Y7o9Gi2UUh_-`smo2K-_2hLZNM<56jT5i9R~ERw4eY*edOs=@XN- zr|b_MYpXFQs#q)ZP2^7s?-u(*$J|QHlPcy4Z4>z$gl7}^b?i-W{zvRw0Y6&--z#*J z-YQ(L!gZ1RJiY<>&pLc20NonYtwG%a_kdS|>k|0pLib-3o$43ZgLI{+D+S#mx6WIS z>-D%UcK=na@23AHATML$2Gngp-D3Bkw-MJH!EcHCe-#~LKo#W!-6qs+0^L%#Uaf7F zG1eGYYsUTfRL!{?v@*sT<6J8)V+#I{HR9>#V8}>18CyriF2%q{*7_klXNOZ(8oY$%5KVQ+89<^m8r82Ix%LOKqPS?0ckJQh{9J1Vd$hPoK) zZgmfNYHnHfc#lG<+q)Tco8gbk-B-MrlvO7LK2w3Z3bb3{9#(O@e%bRql2TW*6?I!t zx6*yxYml;iccbiXl&x~#P;vb5vNu&6w@{Do8I>|d8si*u5h>dh6_}7Q(irEM)A74z z8GI9Qv`}bv%h)**=U9wMS^C^689R+}jzt~6X&<&v3T?I6s4>nl+8|}mh;16<9Gg0R z*TFt2ehW?8<@Vz??PSL78q6aq?h3y)>!IUzlJg1Tc3bt(ak~!lF5|6rhI;6DU5|N~ z?F-LPZXK^{F#j@Mw^eQ(uakUELA!16r)}`f)$Xso?YQ0!-&*4~q~moR>_WQxP_EB)Q1<}%t#$v?dkEJLfv(hTOvmdaQ&t_Xm3*KxWoNuL<<;?; zGBRG9@|*H9UQ<3(Zc|S6JPx@`c}@9L?1a5cIZe43r09QGs2vXzqf_`3JNyOrJ(W{NxtKGkao*~EQbPM%}?`pq;*-zPFj?sverO(%!jke4|;27l^ zf9lM0=hVIuLVI8iXqcD4u^W@J^f`RY^SI#$c9>(AV}?5OtU8XFq|nySMSJEma7;Hy z*`awTYwE!D{?wCa?5TYveECe7wX9KJ9#dwn^;i7_nN_}-(6rgk zT;tF7Jj+k*DiRQ z9G^FVj$@AFkn8u!kLMlgeI*tl*MQ@ZYw=BA=e`n(Cv2(rmC*8=@^a0-;tAVv&Aur= z{Y=Ztwff}8^AfpM-;|$g^T`AIN`&>k5-rKkHTh{CSl7Ny@Rs(K$Va)#3t=3`S#*3Z zUxfZ!@k`s;>YiW5v+YopK4XtQyb3;im;0GAeox_;N#}40?cQRvU-j>{bGO^EEGA{? zv-s%SYv9}WxSh+G!$rB6W1`}?(AF$Q`!)YzJKNj~%NnFi?JH3N-@ezqsEloR7Nv^g zsONJ!=NFDij#bKxvh;a;^ohmrVUAbEaq7(TD^(m9+TEOc^q8g0XrDg&kM>^*-{!bw z9H-7a^HRlep{?fJqsK30Mtild#I5jkj$_7g>4Wd7K4CsEah{RM*lbCgHtWGSO+9$7 zCgXIWv~RN>jMLPEXK|{1CEBP5_mxoghumski8GX&@tN&;)+gihnaa&LO}(+NL|E-B z(MGwsuLSkwS)wYwBPVMK`mE%?Y-hWhU1shZL?17OkAKPSqT;mbC&;V!l~{^)rGK!U z``oa^cPx<8drPc`Z-3eCuHrWD$FZgNmRN>%>;Kz!cDRu;b8m?a@ay~CUMimRUaF11 zx5TYzv*G{Q&I4}mvdq0DHsW3nx_!#Hw}iSE#*yAzVmaDv{2$wS$n9H}xwizLiS!)8 zc(3kh#QPN(E1W|(&*?b8c+Y$A3{@TPS7A&r4sgEHuc4W;loj^BusrOzVe z+@{Aa^MBc%=YDc7DHPfRrMU0H+I=QKTzIfqbRo-3;IeTAmYax>Rg`s(el&h;hE^GG?DoT(n1OQ;9WFjad?v{4Vv zA=HECovLU3Gt`66`pW*0TRrPb`!?I3a|zq?Tvhd~-$uDPhfrVaEzym8OT^G$TRR2A zUv-D8XYXx@tMvWt@O?htQ%9cB$~<2B^nK`C`sR+~c9?A+fRA&|cnEU_pX*g$@ElZs zZ;72gze@09`$y4!HT+@oIYG6z1o^A6gXjA-D6jZb(76q8>jlB4drP1lpZUpO(c_t) za&V04y(KpIxSAJyKU3qwkFlE(V;ScvFh1z#Ti?R(1HilgBzsFJ-u_%bUfVDZ7-zR* zPGJ1F&u_m2?Kvk<_5`1$AUpXz0DhcTIrng$WQ^dv$Fa`2f&6(kE$0UEP46w?%fWG{ z_m)t+IYuD6sTbui_2BOT#TPc`9FeJ4rXHLt$eZWia<0hKD^m~78RXA1aye&Y>XoSn z=Z-Xg#AVu7>%}?2)J5~n)G1R3&K=~b&f0~{nL6or_?fzJF5x|RE-&YjOx-edVIFV# zK9D_AH&YkQmvf2UTVguyrG97Zkaa5DTcQO0fA?pDj)k&IxJE_ls@9@>+r_?*V{C99 znT_^*{;{~XgwXFT1>N?pL7W${^40r#1f8g~uOcQV7YHtb9ag7W2mf-Uk=bsqL z@a=I|Lh2HH-r{&x_QKfU+(I5a)0lG$=|I1m^CrhP=Rc0`q_nRhMEzdQZ?qNT18u`O zhBo1O$ed#ugkJV6z?fycplw)=_RlbG&_=X5ZNj;Rw%{4cYOay?O?0uO?XHN_HPeOj zjr40?dM_j5Jbzj3Eg|ij=@u1!RfH|+#(9Uj@$6>x8%F4x=~gK1o9V{+hr02+XSKJ4 zv~QwYOxm8ZpEw7peu6G)ZwaAqrb|+IpR%7g7m*Lom{xmB2z@hM8iao{T{tJv7Ce`l zbJ7>ke~*p`o^o#q>dkYj+p@QW>MO|A*1aXjTa7`)^tS9RLH>$f#q?9}Ey3~5@yTa@ zbq|g&#P-weEur}PbCS77LFS$WoR7$t=Ww@mZwd0|S>0{jTSD=N-A}c*gw|2-Eur|s z_HEr;g8X?dcw6?CAa9-<&N)f%!*Hs77dR)Sc_XgteHb$LU(kCmaGpx@hi~e=7+7xV zXzndR{@7bW?R|8*y(P$-XO(kK%IwFM_LfNVhU}-(QR}36XX?gzO6#Zf`CINS5k{WJ zC+B^)-=8?6Gkv@JoSza^CmJGwgYmU|-lp zpJC6l1p5KkXV~*B!T!MY8TLF&Z~*WD$el~)t$ombXO5@y*4j}9$%Xvnyl;JuJ$)({ zKBCXDr%w$5uFtWjPvrsE=h)Myh63j~_NGq_MA<+;XOm}S4gx+%vQbsQ=@_FbL{C;R{-ZZ_NGteqby(h5PWJF@L?K< zPYnk?T;r+#y)(y6pK31|v`@+25}%y+O`nRQzx6rx^r>;c^*Q$Rsqw(|Irj9a3BY-d zz3Ee9P&P*U41DTJ;8$uKJ~bBjSdGJ{t^$6Q#y^4#rcbq(4BDrXew>oMB|bUtTc2Z3 zpPGvP*5}yMr=|hd=h)MyT;MMJ!Stz#D4VE#1U@wh_#}0E^*Q$Rsq2CB9DCEJrlV}S z_7V8hHNdaYIDBdb@EID1PhAWAT8)1M8BCvQFBzKbE%7YYWm`k&URF5b?=A7{<2|fr zB)U+1CTJRe-;T|A4tU!gm=#m~pp=Y>b5T|FWA9OC)k=K108 zj+VQF!PLXoflCBTar(M(AQHQ^Xzbzqx~Vu zVV)t*_S%+wZKBpvX8P&;h{ThRD?X%aNx!JHuOnk}naH_RaKRIrT|MeKUP@-bCe-k~cxRmh?$V`&0Ip z&Z9`czUrN@&^ObkLHIY*hwW8P66~t?mZ0yOXWg6m!}NRe-23$260oPwl1^=&4^d^#eV}Lu+T;^SdAY zw+8xXxVuEpsSkI~qF&d1>!K{3v%&r%J^6Q?^iqi9 zXh8An>a6rDw!9bOxOAWBtmt+BFel@5^6MOqYWla&zcu-FE(hgjj>hTa*Et=OU**=F zMt+^!Q3d-mU*mN0>--MNPx{l@U*+XN_S4C&^70__>Flj-tzsBtU<^y=;Qg%moKyER z%sGeUn>~HeJnx?6i21uCQZIW;{0wu9l<(c%_4vT3)KwFLR_%A7`7q8L{u$;Jp^?2M zgl-@6^1q0^8#bAHOZ*Hz$Ta|Cr)bA-^!-V#Eu^jHV^O3l3`pgZl!d^^o+oo_QscvKQnM#fq8Sqf zbHB=W7T%||p^pDGnDQA+|LwbJ+ z_DKtT_|2H{jXtsmvY(0%>DV{)jZ9gz9ZuC&+vAM)mv{|+cubzP!^n*cBX6=hV0S>4 zBbFVANPS%da zuVH@)p*h?ab$wCS-5ukx4D@gGM_GT|qlY_I_Lq3=P1#=p^?a_r5cLMRO zZ8hda6>o*UiTp|7-C}>}xLb+2QpH`NZ6bez@N6Q#j=u>$10a45gntZ#??v3GmxJpZ zT=#Uxr}LGo_*?+GT-4>Fu9rK}%fodZeDhLwaynnR8ulPvKI-y8*V~=q4afCxTwmr+ zOXn*mATML$NYsr)T_1P4HyYQY!S8Z+Mmk@)it>SO4C=;!uCF^&qxw-JKPif8{-_a>1SX*e;}@kJsRU2 zv!Z`voMSZo?91m3(1|^c#5q<)|He4&NjeoH=wqNWWjAF_KkGt1(wVZGvZkMLAs^$c zDZ43a`q`Fa19YbBrmX2_TFM7HQ+CE;Q{MFREd3s|ru?S7>NytjnR4s3siwT@ITrGn za+`8$yP5Ku@~Kz=S@;~L*Pg1m<1P5j-IB*P7QQnUzSGa0=5> z{t|Cte+jAEJ05l8;g18{xn4}lsuKdAnTWcHXgAQkLH3t;3;Rn*UCm_FO-9`y_eQTl z%Jxk`*%XxJxHrrG5^udJ`%9o6xeApsMjGQBa}g=q6&09}G13_4nB%jmnnT}$Z{oSL zP-u3`*f|pCSd2+oRYKri89R+}jzzZD_F?;^&{T_!8si+J4N~@u*rqYgv6+5GhJ950 z7Mixp?Z@qU*vX9BHJC?K+!cOp)_WO3sGEVhA?^Zi zCa!1VI?r8{j@R|D1LwmyiP)1#^JfBn~S<(?ycSpxV{0` zJg=VbJ?K|;ln-<_qV7iU<9YR5_bg-SNL6v8m%VZOS+-_GpZAY>NJkagIeDuW3Wli9H(Q9FwB|kvQ$C<27x_ zIBd#p%BtfvZAdy(c2iazuPGnnt0}uFtB%){k94N&rmQ+%EBQcY%FcLg%B$lwWn{cI z@N|pzP|q#zfSD8V_Eyp$8{hp z_H2MJ!LP2Ujayr5?U?fpu=5q{=);H)bFK5|r<_;{1kT;g7M$#T@50dI*fVC^{^w@I zYSnyT#eid+12Q*9?@M9sLt*YWVeT(s?jvFDAHkd;y&r(N2Y{LH&wJ?{|5N4kn>qZM zIs42X(0eMdD zj*X9ttm@kkej}SX`1C2Ab8qI>o4NF6&OCie=gONo&g7%>n#~+$^3i$AW{xuX=-gy8 z51I5j_t?xcX1q7^@M&|McW>s`)4y~+y_q*ppVj&DX5K9Mw3H`HJ}u_OqP(R%SkkwY zV@zNEILb}G{y565eVqQK^NY=V;!P;mdBbL&FmoMsKCqbw+*n0a?Pr`XIPHgko|9APs@kbZ0CJ7(qvX66Et-ptcHUCtlJ*QxR_ zIX3>@bmF-Ad(%meDKpUf@eK}75k@V&F zSqu50yRZinA-)5obEPAK?~O`*I`_Fy@EStU@8dIA4A=Z_N{IK@>DK zi3olmD)s60I)#GQ6OxCrhm@VQ9G3A7pX;O4JJM$#@1fp>KKp!+_wietYq!j`Sp5Co z60NzOiF~*oiF~-8h!o0-(#GP#$4l|-c9^W;5~FEt+o%p+p~_} z;i(V5tG^Fe4?Ao7@Vlns+yDXWYL%^Ay~?1ArOD#ie+7$bOHMDT-A z!PDPz3k5$+n2s^HRxt(;vZufMB?Lc^l=}3yyavJR2|+J0M#^d|DNBC~jR<}qD)s5_ znT3Ma6OxCrhm_UCr0k64XcA*y^T+#Z;?LEu`7s9X!1Wla#JfWk8(N^%V~F2q>GS;7 zs$z^k7WwT{k0E}?RD66|es?4-zboo7#BYbB;kQBZ<99#O@VlNKL#^@AW2iM6J%(DN zNynJCM2@<*L=JxAU~D*yZ#>#QHE;Q`p#@rPAASo_F~+wuzfq_d13PQ`@Ex*>F+MHd zxvCfgT5TV`t0xWL#j6buzRm3DZfyT%J+KZx)Z zwlm5t@w(x<`&G6x+Aa0?n>;ef4qxG}_qwAzGQoDnxEs74xbAhe?Of?@^!VpJpuD$hJ6E}$*Av%$uC<-08}s;krZ4tn8s~2IdZE1U^|mwKtx){?V^5|D?pE(o zl=q)!I}_cz70-d#hh~zyO?|uI?}G~m*GJJJZ~UluzZsr(AcZcR9-Qme|g8w^H%R2cK)) zN4>r%&tGmkGu&N@&v5X$*8Q5-59PyG+s;fkq4d^kop?zCOVCkK3J7mh(~7xKvkAKHauP}_y{d7!6VI0m&{=*u5Rx#`y*N4d3+ zGoFrtfAK7#f#{DhU$7mX88imvW6SL@&k7oZ^08&M!?Rt+qI~>4c9>_mxr$1%n+tjAbB%7@=3V~k^1kFnt>9}PYn zV;sYJjEzJ&`ErcWZ^y%L$$KJrlh@>Bi0yz=R$^WQoCe+;C)52s^Je(;GkyA5pl2-L zSmoHKtvS|dTeeHTi}m?ZR_=+%vGMn&6UWWpn@)O6@m*{l`gO{KwlfrP+9STt!ngB* zPyZ^O4*+MxZJf!u^3|E&z;glMtZ(^r7$YNr^9+JffIMelG$79lxB`%81O!7)7T-H| zc`g`!6VUldFx;N+VmUW47H}@&m{tA^zh_M0*kugi*k#P1T{yOB7e$Bh!!b^~aE#L~ zv>)w4`_V3~?G@JA-e8$~O8sxWi~UE@m*bT0V*gR}Rqq8X|2+wydQT#FRYdT;QNgPT zQU6S#;5CH6&awvP?Txa#31Q7bA&o_?2u2q1wTv(dU+R% zYxSN45OSo?tWF4iASw0fGpHK`uO|e(yeE;e8cWL3=Xgg1KMYc?X|2@g5lkWj|=DK>n(E_c055RNF)q9eD!iVRE^F091 z|JLsTD*O3-eA+6XmS-^Q_W(TKTD>RnX?PyBdQSox{T`q-KKeaCYc%>jKx;JV_aswA zFP`D6-dRlb-;+!|X&;^^tln?5K&$Ojn#UO$d64F;bR36FDMyf)F(6^N*u~?=6)2 z^!dhWOi5W~OlZ<)KPLn~tsKf8I1^LF7(f+c1h0z-elRL{`rOt+!4DIrV+^iUi~)q~ z=`%hPf*(jqefr$a2EpqIK`${z%4#erOP_lh5&S?@>eFYJ77AWZNFK@_QdSd_vNM*W zNsO8A$C&xz&(-t&7&HIm7^<8ve$6vr>DN5dmA+3}J%%d%@x^mhS;uoyRgA${)MJQe zmU4XX{71#dr>zF99z#4Qlr%gClr%i!ll*ExqsLHdeDoMd zU*^Y{Whd>!vl&%vXn|JShvyzrAD&-GK0J?5#TeLG+lS||DLy_e&wEq%1g*9Y&*dWx z&)p+Go~cI~o|&iZ(;6RbpVnx!eOjYYzeVsaXwvzP7n@X3ZqIW(mf_sxjwe!O*V-32 zUeFr&s)JwCL2J;f_Bpm?pW{r!4ri|xsCY`r))1>pVW{`({ zu49MJInOS4vQ&A1Wx#`gp->WhWBUtd7VbQEmb0_Nb)lV3$cZ^)#>8S*Ub)#$I6Ix0 z_$xeFws@+u^IQkCQ-wB|%Gw#j8ecmR+L;x~ie*(~?aXpOKXz;^cGXpzwQjc6^<}F^ zzjvx#GGNBfE^+>}*OYauDs!I2ftXu-)l3fnlN2S_~zZYlQF5&_uXub9MXg@b4Gh%P4dW5Zn?D!?H zW$Ma%Q>j#-N2&*`Y@N8WN003>D}i=j$KT`l`v(4Yw-yINNEwOL;FoDTRq&kNuz`rwZ{I93PdTi|#8bHQx?_kK2i+d4lB zW^)f{=Y?Q4f7?1Q2DANjSJ{6g{#r1b`y)HQ3TAVik@K5iw!iKwo9m36w}RQ+6WRGw zF#CPNzXY@WbywNPh<^~wK0$aqnB9Qi;tf9zTE78&^QS@UO~Bv&JZSv~;D7!iX#Ezj z@!x{h?*M=QYS8*mz(4*nX#E%9pZ`5*H3A;_kD&Dy;M@Ndw0;lx&L4u-9{}I`@1XTZ z!1w=O(E1bL(f% z>BAp4p2Hj`?oEB{yTNRK%~tjs#Gegj^F6__f2JOQz8#PKB&h6o%(vq)-;T$8J05#E zsO)&mx8pJ2j>mjE9{XKT+3}cf$78VLVaR*<`$6k9z}J5mv|a~%6$t-m zS??YZnuiYy&BJd9&BJIznupQm=)0$x+cP#1i5AVWr zfGM*TJ-N@8zV174(b9kp=ef`$3Rh87LWA3AD8 z@vtF77B4Cqv2Z~y;E1(hY2xTbLq;qdR#cQ*JhZqdZ`8s? z#re5;i-(O|oHwF)=%@w5bBE>^FJ4@fH=-Yna;kr-U+fN6412ko|A=)Co?~Id_9p+v z0PAGI3pz@1RM+OLT3uRfR*_@+xfYLSx*gv}gg~ zQk2V;KT#q?*_LBdH`C))!zn%?$1CsE!N>JD=5Ty3m0GRa>+z~#Z=YV*>-K)Jg?COE zwM|$* literal 0 HcmV?d00001 diff --git a/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_rxy.co b/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_rxy.co new file mode 100755 index 0000000000000000000000000000000000000000..82dec0c14d94b33180709e029b9573541cc927f3 GIT binary patch literal 54176 zcmeHw3wTx4mH)}P=SF;t0<{|9LN17iJVFQ|JVd<8L-8OY@(^{J8%Ti2(-06Xy*VJF zRmP&lm|E<;wb%xXHd;E3Efa1noyIXvqk|fiX;Y_<*w;`;ts=wy|9*R&eK@&!g46kb zw!`;b);@c$z0O)|@3q!BXRqvCF@4qyGb1Bn8S*b+v=|v~nIPhp6F-&0%pmI~7`^a+ zZ{utu6V>b+k>++V=@Qehmy5@0eNVTl4T$uXOAT=|^^g6)f?@PgmxeK@Ep&b$*Bbi! zkyoeDZU22+xK=I=C*T&`` z25oyfr)Wv}`l6MEMN5{HtXsFYtfZ*8@bV5qLu4Q3Rje_E`CbT3m30ny0oM$ z{Wg2lZPu(_c}KT*xnAF;_h0fZw!TaFFL{>*`YyRAeU}~%%(pl&vu>EKZxreFMt94I zR(HH#U+J8&Lg$<6i7Ygkt$zyf~iL56i9vBBek4H;<44Z;}nS1dc+nNl`mQD zqn0=YYELY?t;k2B_7q5Hov%67!NjJ2mgPD{F`tr5PI z;hX|7E$NyxD|{(q$0<(MUY#DIt?PR!qvbaa{Woy>2g*7c(RkFIgaB1nrlHx)>to|$CI{r~T5#v$VLnvBVx@>i? z@3yOFmzT`mbvdhwHWn^jSyWzl`7**Q_hOwx}5c^OP8)GTcxBZJRwHL zcXHORD_K`qdV$SU5F>8>iL{Rs?#G* zZAL2N{!|KFk2c=J{~sEojQ23q9~wEvdtpEaFqaVR5x_{mcyA7`dmHaXdELi&Z!MtF z*LcriJ+5~ECIJ(?27WCe+LNpYYyvbsW4zZw2s%dqBY>D-A0`2N13DMret;3+&EYj* zl%xc&=eRKH!nH1RT)4x92^ZEH_^R-e1O0kA&&kws4j9I^198JBSZ~}_ zzb@5F>3m)4^e^{@0s_X?`d+BFjC<=(zsjoYH6vJQ1ue(Q&v%@0rX8?|T-Mrs0wi=mv0prY91A|a^nstqt zuqv(TR;6iN^s0F}?y=hV$RuHH=7Hs80%7&)%$p=;yUvjD;9vREz89Ck9yra%@jSO+)LGA z90tJGWS%i=v(@XGv(Kp9boLpUvq{4|-7c0pz8 znuV1Hj4^fXjS51L`%})cDkIl4Qp@@~MsLiy&DPykDp+X@22IRyV+g)|E?eeg_V4d( zHg-as6-#75vjo%aaeG|WL)elMyQg$~uH5fBImBRIQ z^vxLT4BmcW>O$29sOpBKhA5mNi6Pr*OV@x-FcnldLsLUl+tA?9?K}G>>cgop>MuVI zh`x~wxlR$1R_11-8u#CW-(K+hq~f=~x~@UpQ}{iN-?#Cr#qSyXp2hDw^`law)ctZ& zIqH76R=njnIon64MyobJRhOH}RXDkcT*dFTpriTCOXaDyykH*qt*IZG8magNRQ$?y zDnqXEVH@*@{>m}l0>6jrIaZGGbCkbSf9c-3%TvaV`rdi-Hl&OP>#M$h_;|{A2)^sA zvr@*F>#d-Xkur8dMtB~hs-7TW1}hV0X64l2S2j=0e01|v;|<97X8k#sPk(1xv2$P( zf%%!SlsUN~m8wWNW^RnW#WBM%`V_*XBT(~Z2Ag7L@Re8~_(HmRwJ7hyuMWTc_?;w8@aB9%8cHXhkcQTYhB^+=|33yFK;I5?gpZO~@YrdSY|8&I z{QhHT!vm!9``$Hd_=nIpuODVRu4&&Ee2n=+)W0~XVya0v+#8QJ_dX{>#h$w|Gjkbt zcDX!+LFz8U$|HYf063PFyDK=s-i7<@;(fs61-jf9RtO$<1%_2z5E!iH&`#Y3e)^z% zfy*0tW!p~onr(f2d}q3R?|gAWMV}Y*V&r}2i^EVJtmd47`^?AuzQD2W3*HcP;NQPU zdzla%=uAvyZ=r9pb`0LS066`KGw%e>q~LJp($okSCugg<**q}RcpSDj29HRF5ejeG zG=JdN?#Xttw+v1_z8Rzp_^SJsrL4`yH2T`d>VG)fsZXVL_N>pnZTno1b;0Hv7T&PA zC;VSzBzks_|7E^1j291_(1G|Z!#MWVV}_9k3)~$MSREBu;|Q!x2;7$xSl1$O|9p(8 zfil8dRaoGzh`>Zt;BH4?bwXfGQebV1zn?Oh7lTVG$dX3L|#+r+I8fzNc#*J>{lVff6$EK5sGedeB zXZ{y?GCPVf)7%&%aCbytbyQ%DBd|6ha9>hjU5h}8F(|7TBXCzlU?M7TwWsbv7+*-g7QT-DO3?%k{Ts`5 zeNN!l_BnxH2l^Vn7C7{8q(h$Ruh^Gm`YToM`ZrbY`ZpyPe6q$*Ko*Uk0Dby5;wM0# z{*Cz6;FJD}_0`~${z})=U+H@KH`dpJj>adS8lUwVpY@!Rvw*MT%RH3npQ%skO2-%C z==j2N!=StYjPJxx4w|d@^VS#HY68^Cx?{DzFVO8>-f!dOtBiY9QV zf0mp3oWO7Da{|8^^fi7HaHxOMAywx>8b1kHG=38Fsej@p zL7)03egpWVK3U%YKB-S#Pkrio>Yw#Zpri50r^aW!#%F!{yUehWn>H;NzW1IH&WJ5J zsi)Pu48CLB*Prh)S2MosY4m-)i`9iC13 z%j5e220+Tnds9~OPFcx2WhL*Fm9!`;`=3ANl_2|PU-4Ry z{V%@$O(py2BH?}fZNmHIn}zqQ9u(fM`}%pA?0>;H=|AWy>3_+5=|5(f^q;t1`d@L6 z^q={#^uPXF(!c-oSJZt+l)R~Qed*mFC>`5R^v&cu&tJQYjF*4yGAg@-|Kq&e^zl(O z&RfsypSivNe~&y-cgt&=22I0v@m{Zf?PYuySGae2^Su0bZ>o99{#(!d!$|5(>TY?h z9Q}@>UnTlIqUvw*=FZ9X)8lcn{qiASeDvj`>hrf9cx3eGQJd;<9muITz6sZW24aC@ z=EwHJZXI(-Y(B=FFoiiz$aotMOs=5(amdln&c`~0lr3XeBXpA0FHjKcHzv0?ZT}H` zkEP6y0JpC_B=)geJ_cOqA**j-Xsqv`aBtdBPjM-GPjU5_hNfjVhFD9S@o9$-(34MTYt$^-4mvEe8W2X3}KB{mY}k*E*bkysAO zIVcaZuZ-oQoQv{cds-|H&n|2zrfCO;>t^ zRJvcQQhouk#p>w0*S+p{FM+!w0;{6}YaD^K34!~P0_$1?%6Bi6)psv}yCMP;QGvT1 zfz=6tHA#WBEdu4cmz1l*0#BL-=dy7n> zTk!78Z$bLoDc?K!ouL@@{H|i)yM(bF-ZT z!D$|d{^6;nHQC-6YmvIc*{Bq2PfH@l;td+sPRM1LQay8RJ> z$#c>7+!3Zl|DOT>N13s9GGIPrjtb7f&jRPX(Wb??;7DCVLSW-D(9RlXh8QQR(VsG7 zy=B0B$ea|Mrt{E0YpiK8ezZv4q4QDKZ@g(Sj_g4{8H2I`9FM*De&grakUn)`f&2YD zTbL{Ci?y5qd-1I&A~?yYz=Mv!2EPnJ^r4(s^BJ%g-{ca4(?*u0)HSsTJmi<52K^}~ z)`DLjv>aRSdNd`pT-{~qhva{FUFYCsAyYhL|4YQFt?4x%6B#@f{Y ze(y0LdnU$j$hBr@ioGf3|Naxk_zba4>vB65t8wQt*0~1oyO05y2cdoFb*2@uotXMg z)GXh8XdlCZX6OogOYA9k4r5JefB`uNqtCD#OzTQ}Ym9S!_~~Y7s=X~X*QKv)4d2r) z1m5t4rZvsJH+F-z1KJ_JuaV!8e`kiK+xMyGjN`bbJ@Ogks5}2?GxPC1EemvLk9;P% z0A<=8?e=4Lz9~F>YvWuCzaI$EZu(Wc$TjPB2#koHCqkyu{{=lQn(GG9W z^Otr99?8>?{|&pWg*8J*@c+ z*o$w4e*PD_G^%ME_;%PC|Hu6_@Gj}+pT5}7qn@{ZUj4lBTcolL$d>{?eqP(~=FVZx zjZ{~>`RS$crtBX5=`;9_yApGo-;~q+csc6%eOCGLMv0L_Fn%L%Hmw=<_E=czc1HwO za}MWV4qs(Ip!_&(hTo)>9~YdOi_kx((6p|$cf=g2t4#>p$GM%0xjoZ&?afCVX4~{5tyK!hhh%X zUg^gvGuB|mj| zI_;Q#T*ly8H75KH&ugsnD&K5KpB~FWKTSEXHmv+~RQmT=4*F@zf%W78d*QQ^J(S}q z^i%o=-}KKX%QyWq`(wRY`R9}6n|_*dV~u-23O>50eAl2K<;8lq_;<*BF~%%!nQ6_k ztJ?T?&hdQA@oVizm7iAQ1b&r&hs?v!FMow;&9-;N`l~hLJ@uT|1(?^@*!~k_K*dX(=4EW6v=r`$h)0$^j$NYI(eD2}%kd7bp_3ECC39hdn2|Ho@ zV2sl~fWDshKn{TC>vLca^Z|@@+ScjoNe}q}p0Cfv{HLumzG<6gEK>AnL*SS8l?QsX zZN@h}CYnFw6(~RN=bbjKd8MBxJ>(oHKkw(Av^B5v`Q#7z2+HUCd8e&vUg`5adhn}$ z-u=AN|9kW>zx=$j&Cf4=KWQSL!Snr^e?Pwqfk&Fib?|(@=D!WUetNF&*ZlkW)wKM4 zGbZ?XWqjC#d#dOBKgzQ_=jLS0&H47;SXk-~Mg%sD#28KfylE}4YhrvC*owaD`Gn8K zg3~w|{ikd;LksPvV~*4{B?KNCh5l12OzTFwR>ct7vwjyy3QqGB^uO|pW@wT9Y^+7< z4(Fg>e@qwR*JZhu5zl5tJjt${hn{#dtGaU?1@ z2lLUt;5M;iN9r090vj3Ev<)+kP;RVs59IqmQgE6I(0|e$V$&^BcZl&!+cDz@<;A-A zfW3GRFa)Q^eDA@$lCgSsz-KUyj3d0pdb)}uQK8pkIT%MM2iD*R@_E0faxi{S4y@P9 zbN)%n!RLIXfAFoI^G}j*#u4^MUVuF3OaE^5&-g)kkwY+$&-f9H*Ng|v(9QNgs^{)& z;8!{KXJPL1*`6{Y$6x@T>p7=qV{AD$=X}8ovF}{WamI^zh!=dOS7QO5_)Jefy}+e+ zBj~aJP3S)w^Wn;S;MZOH%61SVa#26+e!Oo2&iImvzoxjP(RBM~q9334Nnhb3K9CRE zSie9o^}Ij9y;gC-ooD)apZ0$xd@Su_8tjAfd`2nW4FIoRWrpZ$6>T>bkk&P@1Nzxn zhzaaJ+wDIG{TUOK-SRmKyp!Hs&|^$x>|uy0E}-Z_=X@7Pmn&Tk#tPC#{)UPb>2jsZ!I(k%$nj7yBVDd^ zIT$-U`tZxtua=83!7q!Zn=Vtj42&J5iChsCJJMy+-@DRfVJzW2kXNE&NxE$5vK;r{ z2fU}t=9fj&Wh_y7+~~U<_mX#vBX8|O9&`V|8L>Q!|J7eMEyFnS)&+*KI}C`n>anQ5 z<`H*}!!{U4cA`I@fA}s!JmA;l18>$>@ht>!b{y~2(!Z7v?fVK)Kj#~Gj|H6jEz{z+ zCEB<5P9peq%9dcW6M#4GNz;l-|NRqjz2Mt;rv<$6JEp~NOO%7}B=g~09pLT6`^9Gh zM=SU|7D4=BT_3~Pm5{aspSNhwN?))I#um~*PR>9+a}f{tyBRlW-;95>@1*pvCPaG; z;~RBF|3KX^#!x57=TR}HMeu9YePFZn7t{^w(SINP26aT8Qzwiy)CF>Y2C7&i{oBZ5 zNZ(yyX=^77t3c>I zw!aeyeL(n!KNnhxMIyeR$Xugq;HjwWu)Jb1g zCw-w4e#_D_QYOYFMIX9nTuhU+1`YB%_vzamu5Qxyf;-jI8&|sAXb&B7Nl24di#N^l{WOQYOYE(nhY`Kz`fe z{PvHdmQl;ZnB>t0@9E>HWz;fh+MLt=@nf9Q@@aYa-qQkJ5)-#%>+fQr@TN_zsH>j% zc(yS_#j50F_oX~Z>EA$z_D1qS`6w^tAswwt(rFR=Ci1iJ znO6|gDc5Tj>(T!Z`B?Vyn-=+4^NV*a_Sd@P_+^p?<-24{_TEgqI|;pZ^0A)sg{8fn ze3_WX_lZZzmt?JCcw6Io7q8yq%qbJdX5l zClBi>PeR(;$wQk`<9^BFY}yp@I+7o{B#r|r%p;!L> zp8;Mt7b-H&S9AZKKK+CKzX{~L|I(nbLC*U}#*7ax#Qas`4Sf#A-wU!o=R(nsW$u^0 z{(+_bhS2>R3eLmZekJq&jz9Me^4X7iNI(z#9?g4i4$8g1cz*vN@Au$Ea=)*PAFuE! zZ#Cp4KJT^U)psq@UHAS`i~aBV?D?wSm&c7&_>`}f@&TXraQmM=Q1}%YA1gb&r_X1C z?DwTX=Lh-xMS1EV5Aiwg7kyXeebG-|vpC;NUzPcO+i%{ro(8@i$N5hS{X5X?v7v0 ztLA||Ovjw$%=R&-VJ2eE)$;DdxyamM^1|F<*2CxT4oiDAX*SJ7jFI}9Szp~WG9qoY zguva0zvqoVl`)PVQMo@MYBLugWI4y#&4l?X=&aJp;1Lh9_eQ}W_5`3j2mHTPmnj3fwMazb=7{}beu@yS%CeBqxp{t zek~#D+VH63LIa)wlwTLzZgrT6`O0x>LhNV1gwXC*huZ(wA?_=`?ef-{4z-_8^0@~- z)}Q-3`SW*X(hqa)HbCzAX!qy%=C8OZbD*^@_Y5)=uy_+E@vKXPDS-^#qOKI zAN_hi<1??tU+y*JX*WJlPxNPfQUA!xtryeJaeh!A^rK!qC;4rwnIzNwiIoDZ+}0g{PCjav!jC3yc})I(e^oe ze~fj&KYT0dZpA$c?fMvPvTo7a*?f<6(9UOS$@^@LOU`F&ob~Vn`xDZhl>E=uxa58w ziPLX!45}6_7XK*t2Sw)AIM?cu(zk&Sv>HVZt#PiwX#YxmDL*N=O`;R32x< zb0p5S7wuoEC;Cc5`nBVq_2fS+?d|!;+|YXCcOdUs9R6PURO#PFo`}$DAWWA>`_=^f zKKn-nw~aikL%%liXx~~5|F3+j^lK-NBYis^5AADf5fhcK75p~*Cxv#04w zxq{n<{}!RyhJWpQ6O8}xohvcVR$}f&?PP2<%BxW>vY(G>A6(660^qGh+gh|OwhzY2 zP%gvVTw?#T!c*e{eGso4ZRNl#wi{v_P~L!YiTy8XosIJ^0ee* zQ$B)o40wLt{k(e5x!{j@e%}4Ode67ukABwAyPsF@xt6v8JU{P#UcKj8@&`OW@ASof ze!XW|&U@hc`S^EaAQn&wJ)ZL4^)%IcK$D2#vR({+-JM)Ib zMq1;vxv$j;Vzu&Gy98rE(==E3*?YEPRC-B=n zl|%dO2E<+ZTjM0<(Ehp!ahUz1C&{<=*L8@$^w&M*Tl?!IpHtB9KFp{4FgMrOKZ|Wg zc{}FTTD!&b*A37M@g6|i18BR=emk}UC5a_M5e;wP2@=oBD z+pV6zPJ&ugM?%tDkp2ui9UeKjQg$ z_w%a#wc-zWe%|S?{rqZwO&;m5{rvm+rN1VBe!l&Ds^@X=<>%MWpYom1m!D5RU-Z*z zt|8~2zFOz~e|#qPPK3`6F%EQ)7diy>D$f9Gu?F@ofJ78LO+V902sq^-3Vy;B=$7bmB_6I7r zhkP-2f%_LF1-JQg=pX%&X>GDUjI~JJ;X>3c{)uUQ!9E&eAMSI+=X3+@%m?CH>Aa`R z5N$Lpb>1FO^U#-T!L(7XwNhs8jimDX1^3{5;BbAIw(Cfpw^tO`+i(1?8KUjdW+*fF zQ=-iz1-EGd`g2{GHr*n1hZdra>wC0i=7q`_l!esy49M%}*^oYUVS)SoJTt$aeYsbY z%I_DPc6(d-W#GCi<>a1DD!*U)sQi8{i{@SP>z9G+u9TDeK&kwGmp?zxn%9WSkDq6* zxvFsj&noXnaC)p~uC=m1_nT7r{nEc%J#&qfawES#|<^6Kcg6*;ulS; z!ag2j4iM$#K2|!vzX7JGYV_3i_vHKznRt+ z+p6GPzZH2sciEX0TyrITWouZsTmrmXUpK9-cCQNN`;dle!x|;?l2#&D;%@tl3eIun z^3Wa`2e>||=y0tR>ytNv4(*X~U=hl+JK8Ptd=yQ0O_k#o#hNYc_9o!bzG$CZPbEF% z_lIgV6jFV{>-k9&G?&D767*GNeN`TZfC-`^4cTpRUhV11D~)%??^`*p18`FZv8 z#&uBAQhSEE`TZT~md_NV#r?$iEJ8ohA#Z+qX}szF^cnQyBSEjD{Qi8@tMz&MaqO4G z=kj8V-^#a4Yny#m1@~V2F--@|TJEk9(F?d$_apq<6n)|rXHyhHY$8yk5Qx5L^ zMn4^u{ymn1ewuP{zc`iO-$OZ=->>u!zEytzN%Bqq%>LYuj{f;%`KF(y+{o__sr>#P z^3D8y%FF%klz&HV%rcBw*>6m1yPZ|x|NAY@@p8=Zf3W*0Kdr_I{ObJv<>*)bJJWi= z4vBwv_w3=^-hjFNC3~Rq+q@rbOXv8PqTh!9G_4(WxWb>~zY+8LL3^_Pz0=>59`^v${(dcNfcDN9r|p{YNYSGm zAs*4j%0Q3y%^0W0Mf1nK1L^Diywk2Vuk`h#$9)Cq>;1fww&s<7pZsypLHd0^@AT!G zSNeUA9(=2xcR#O;0UkZfF+cBY^YcspPnv4aLf8Lm{{8&Y|C1*7Gt~ZH^WTPFKRws~ zYySQGYFd8285jJ#GDb|sJ=L>5_KD;3Jf9~vVQxNb53k_9bExz7zgvkh+Vrt$RoWvf zn438bebsXa;z(3*8#keUEF%zl#LlU3q|VzXZx#AuV{EI+&Q)=QHm%YDQ9ED75Aw|S0On#Ez=7XYx$@fMmex#TtX%wsG$SveR>Cs@*>B70CW5ujMuh4fzVg&0`=^DAN(rk z{&viLKHpPD?jy*2O3vvAFt(hVJO0}YvF}5eAg%jg2lTVs5fkV?9&r2bK!3&r z@}A(c6nH1Shd_^Um9d9$l0JfQkG9U(K>FN2k+Fevy&Qj+589p1@mI8IBjDXH7y0nZ z!S?}07dmH*NS7;J4#o=7<{piV73p%N%fXmI`rOBnF(X~BbU7G1Jo@m<)UTF{F~Kj3 zrkgHPx(tjRq^b6U1kdR*>2LVyvM`qL9^5;Uu_Rr#bXk~B>D>psr_1J-Mbl+0(K-IJ za4+?qu|x7ynB!lD@xS*o0n0$$19$>N9LhFpMO-2zu?!D18;l( z0QS>c_4)(Z0V^W?YY9Pb-v-q0xG-Sd4*1Z;0hQxV`}W>T1YdIeVY3^7w{t|mic0_e zci_5mbile3uxe~T<@i$$zL%&Rf8gmHe?E^f{y3<^+T*T-v?chwMSE8If^9IikOueN zWNaZG@OLwA(!Lr0Xx~ZcUrmVi8pb#3ivEGRVT_?pxMwG0OpD-4?l^3g{(`z;J^JsX z-=L1DbLxb#hPvQBpDNZ!f0YNXWih1huCTPVlZEj{#x>9Tn=)Q=k5HB4Fa6ue77=>Y zgdNGoxI@{vzo>eL5&U+tMWuf`*%*H)8}}qtIsVeWjckte?R1s=lCo7(0xyH{7IjC*Y+gGpR~EREn||-hwm)^oiWLy z4Zp7Q;nVZrbq+k^lt&+PQ|H99-Y=s+$Dj0(IA|L*}ZrNY!lH=#)745-V9M_43UOV|% zPx->q-cCNfPMz5ExS~V6j^v9-{|3U2V>)Ctp^MAdpmjfoTqX-_Q?7*@j8+xDg8SgFVa!DAJDI|O~G#`Pm9oRClC88+kswn zjz8zVe@|wA9w+C$e}85##~=FCIsTmc{ym%h=LB*7`}c45a{QqOt}XK3UG3*ge3j1x zd5O<^b+x}U@hM*|Z9PX3ACo&L2Ab;q~1k?!=ZZ9(}k@IW7?V@~pg_A#g71;m_Yd3WMm^zX~ediea^ zVQH5f{}&Krq`rpxHbmst-VvnwI(CvyBDmx>wEzMK0(Ck3aP5PejRzow^S1Z7t-LU1L=U+|S2 z>%kxQl-4n!39?h4%zM(b*1I}0ghmx1d8D7^nAIVkF>ZvVJwe`B2F~t?)TtbQO;5*( zB;ptQ5l8bM75rL4)V1MJ$Atzw1E@Sy!R=OuULF%-zsfrm+TH3<`~Nz`edV`Z-c*i% z2mEP2pX757p27S%{+mAb<@hshqurmQU;aB^jz8q%^JjXym*Y=<;P=|KyK$z0&lg?g z`12eEobT2y&mVmCIL{wE(zmzE^9OJAXxe*^RIm^^ETkfY|`yPWm#We3C3-VjE+e=l?8W8XlYzu+|vLA&1DJR*IX z2tnh}P_*g2&XrGo19|>}(>x4qdhc`A0sruD)agCY=|Ah{wnrBU&>bs zZjY!8(MGve9CH-MK1ygbbJ^!@m~XI{=R;Q4v? z^Xfh8fZ@+IJ}GaW163cfnit?~unh0kGn4M4??cQ9|T4>j|N zreIw3KK$I*TICZ7td5{f@5`@Z%R9*Pm$sTK(Wdw5ccjkSe|s9*^uGNn=DdSEf5EAn zfi}I5e~Z-ZzZ!LVKY#l1=6BwfJb$!f&A6%n?FE=)+;cxHb>80FVk5E#zxLmC?_h4? zxic!byCZWCg{oUx9bo`l;0J4J(ffJ?Ihy~{B}>}(0;oCahLwqI7vCQzivVtX8-6( z@~!=K9pW$jbx--${yNF$6!g0WBxgZrf87Kg>4(LZ+Mf-f{dF^VB%at%`?Dgn zzot&|H%>Xde@T$%b>G+F-v7*X z(0Ts;yncWFzCT~zpO^2?$7ha(&ad|8RQvO%c`uza-C2&bKPNgp2b$}#I=|naXYS8= z=bE|BZTIK0`*V|-Gtp5lvOm9<_v$F0m-IW#-$lL7C1x)4r-fI}@lOk{`Ya%SPBZ6} z&SCcFD*JPj{W-{-Q#$w9pEF82I#1M}6Y9?c_2+z&j?VS;=XDZa=W_bdoj&3XN4>CeyHgnFHa>Cd}l?wHQ6^ygJ}l@CW+UFE-VE_aph)?I!ZdGhD8aSrQTPJiwu z^O$swravdspL^-gx%B5;ao+m#$I^4H(sQeb@6UVdE{BTt)mdH}ZR4*^Cfdzkn@rlK z{P}C?`D*ETY3X@rj1Q(S-z+`Ptg9R!=0J3|Z)bP=bat1s)73tl+{aeug{9}TrRT1t z=c=iB@5@O`&qeDfZ=|a{k?!(By2}Ge&;Llz{h%%WEy+te{acck>#PRy?RMcC6d}In zd%34!xvq&wyO&EEmFqe};P2-%n1eFcKMC<2+RG(O$o1}|w0pUMEplB;2z)u$P3o!) zsq=Cz!*ablBJEzTWK^ze2}y%v=SbZt^P%*Cybb6hX#he#FMla4*NqWr_wtgWa@|bG z^$?{Gl)3id0D?C!CnX`*2b0q7<(Ra{brT`*57F)nsjDL-jj&uN32}WeBG+EtO;oO% z2uVZfL+VbM55u<>pLlLdw))oj+~e8mo91)RXY)HDbFlP!ICHAh_tFk<{dq~G!@MHW zVO|jFFu!LZzE|-*pScl=&K~!kE-rHvs_`v|I0+Z0+QnhM3F$G95k4v_#V<7 z4!=iqhvUyxP~R9{5I(r}{vr5bK7rN`^99s*iWfvKuIH=o4#3s=;rcu2a6O%ETpuT$ zCg@q~hwGDyj*H86S49W7T0cn_hwI0r*W}_f14rwpD>_;~UEyf`bcLgA3hV32#{eoH zBiGeoxvpW(0NTBr4CV{qx{lEEF(@k^1Bm(Us`vn04g6N*9~F0Zj8vawt;gF z$Dpiy3?O*-*6$K>eK0BQ-kMyCTsIK{Uwn+z)frOft!akk`d~!by>-c`TsIMthSG=B z)j3jk(tNb>F>ko`{)Wuw+BaMugKyc|hN|!lQ2B-qaJ3C_tyTFL*S@$0N*m%DrnVuj zM=CllZncYB3tVkO2^Xi@#o_v$@-e{CHq;dzZ9`q*XdCJZ$MZ4oh+JIbQ1tnE1 zqxHje7v&o|z}5QUnuhW*uAaHppnMGUto6fht%{C|%Wt{rp1{@m;Wu~U@SD2wF)j|j zhimJ}Z^)*qtB-tjCy^Z9qH@yC@)04K0k~!{pU^c{Xn0kdM@zv*{PpJ zS)Y;0cWr$p>UpTwXP=&rvOe>a@0$AD$}H4(bWSDj)zP_>q(2yc!*(goiS3WR`W#Bq z9D={w(dSNnT6m?8`n2$>->HW{@A~XG&MAFf2n~5qF$d3$2q0XdE{9_ z`Ybro$;aPc@?5WBsMqIskxl{r{*veJ@vI?zJ{Rdsz~7wlY`x*A*XM5$U!R>f0`>Zw zJU;i*zVsQA)VV%0lD4L8g!4+|p?qh6nT$8&!4Id@Y~ug|l~ zL4D*dImeD?h;?@U7-@BN-Wccd0?d8#awFzC@%`tE@qD1J&J!a~{`12)_w?C$v|(*y z`KZ@t;?ah+jTN9?pLItY);2c5rOR{3IB&0X|E72v<~3>g&mALfSLciozpHb_XkXs9 z*it#qOrN)Hp`qSjHy$0>&c$8DjJ){NZ#2H}eXMa(DJa4J1zrF10 zZ!OdRM(=;>TkPM8ytGsQnQ?#ZxO2~DB5ZAkta_ya&*CN-oglL!VBvMypNZqcmTqg)gBO=${*>q94t|cT5 zj-4ZQRfMQJX+D%ba88c;P67z|yz`>Ma@`n_cJCahs9ZM_0$;wxqO87?0D=$i44{Nu zA52QSccxE^TsIK{U%rz_U7aCy-nmF&xjqc`A#xX z-nV(i7T?KvRu@wLOMK;M13cLp1toRuAX`J1!d=%AL^dK)%xK%9_l-Z zi^DTGh{N+Ww0?NDgyN?II$A$n;b{GIg`;c=XPYP=0|*=O&cg}IbxlOty)$rlJ`3>c z2!Su_Jt!+51E_3Ut`iBl-kp?d@4T88xvnJyzW5la^UjzF%XNYfINo_O5xK63O1pPn zjIt@It8xU#J4+`a*WL1=^no)5l#c;aK1Qw^!gAdhk!$Z9jHp~U6M8-dW#wZ4!Mk^6 zLPD+&CZ*jw2cbo-n+Sm~K1S;545{({ci|1634$qxXJ_fd^ZHQ+;aPD(I zeMQH`tp%>OA)dQH9G2QB{3uv^ah-XN6$=<6U$?Mx zSHKwcx_O#un5S9Sm?%e8-N@I?`wb(SdB&{GRMvbonbs?ocXGm$}oCo z7{*q^$}@RAD8n!>z0^(_Tk8${ajAtZ0If-KP1+IY#+o!j+F`g(r0sRexVxTsLEJa# zNSnUz7GuJgHgx&7pM#N;JYk}}+1Q1CkKy;V`jipJy$$3m`*=XxWVBtDy6pZ`DwUC) z%7&JW5wK_pSOtsX^#t7=u%T(I=I{^>*~l!2Sc< z({*-b=C;k5iOerTd-oBa{Vq&hsQP8@u(n!*2X6HUcPviG({A%%g2ES+V`%e9+)F^fDoK%jwH?F~}WnizJrKcJxP!u(N73FJ?e{$(XXF0MeSm21^)K?b z&GG(*k#${WENgx*{LYAF;B2qJk9t`~FaA9x_AL5gAj>^NIg9VM)(e3w?x|zF6v*Pc zt@Uys%gwvW;+eA68-XnDH)H*KAd7iM)-M8CZr)WE^Ng%_0$JQ+#`;4b>qEjn1+v_{ ztE^+Z{wR=jobbN`SuJ=MZ~0Nc_yyqGKM5Fb1OD>Gfbk!Izy4{!_$6TLzXgn60si*& zfbna<-@g?wegpW&p9hRqz$5<=Fx~-t_csCKw}9{cE@1o)@Pq#h7{3Sn@V^4a9{`X3 zIbi$|@Yu%z<39n9rvkXCY;9;s*Qk$R>csb}i365rB0($hC9Pv~jyH~&vhzr*`+ zM|%4BcPI3ea?iZ}Tb@t;zppvMTs33UTaUFEMwsxIjHQ6Bcvc|%y`BQmCmHKFjP zO}ad6B+B@^A*8$D7yrNC-=GdMIVte3>1=^#3NW%<7!Vd*J%Hm%jBilSDI51-bFQzU!e?#L- z&AZ-vZup$C;`vRZSFJ7|y>`u#!WAnESCy14bIYSwmFDN>j$XQIc~Rlg+lvdAjwvv5 zMz335R9sTFZuFHi#uOCJxc2HR3TG^wRyb`;-h{$2^R6n)UsyOcZ`H>9!n|<>t2d4* zEZcZT&N^dO>FO0FWs|~JTstj1Yu2=I-q@VnoZRrR<>lpT*G(EddRb}t^7V^zmaJJd zdhT^oSB+k|a>J@oYs=Q$TC$`(2aVye6PHX_yd;0z*rIXyV<(O;88>Fk(&D1=OBUw> zjwu>DZv4^-1^E;5M$EZl-qfq7Pn%RcX8e+IMMb$KV@rziCN3#1$ zvNeTki;9a&S1&_-$>R0P3Rjk{CPlAYs3<^-t4da*GwxGfv>0$X>ZK&m==tCk+SBLG zo<6H^#{Ah+PqAChiejU1+M?N4Tsw2BR>rAA>K$4)Oie-;T-bP)ZV;&LN7u41ZBlpB zcBjsxj+U<>NQJ-E% zRNa57+fRdZ+(l?$U9LuO;nHCs{WsQ-|k3dZL(q6^de|3A=Mu_M$`xq0!BnGNeCfag#`i%gUB6GYZ6F+NJt-Nq5ZCM_RI@$VEHyN{#kM*7EAL|mP(_sF!k?))@leN^% z?1JR)<6IKtgd4K1n>o)gHawHNB5zq~Zp!lflA`>=();dxJSDeeY3Vyg-|A;lau=7a z&MnN&UA#EIv~*8Ner{g&9VG>2`8bo8SAw+c(v`W3^S`vTuy|2!;o~VqxhwJ?Us+a? zed7};#Y>iy=9d|dr<4|~&yRa5Wle73>iq2G1x0yJOZDtUtCuXvFKJuNlcJj9qQZ3@ z)-p@i(*5JC#n!bXeVnz-*R`abaxEu0Fi&t`rq7wID++X2(MCC<^`(n*3v)}dm*|`` zWlqaIsP&81Ws4!!iDm0n=6|tZSh_0Ysa4sf`Kz)E^NX@`bvZewQO+}^WhDi9`Ps6# zXi+Oqqo}8&MU^dWHR|QiX%zM4Xi>{JNup-GyzMj!Yls%MD7S3!GFNGJr%~GDOIPK( zifA~EB6OcG7em62C#O-E?y2S3ZF}n7({NNh)BQAxdU}=W=DdOxZnKh3qpZDa@H~A!1$ME1sWx{Jr#)?xmTzd95AwZim*N-k6Bm*ikXH9sZbV6T>oR>Wt`>+AF5f zVO7tiEG;Qsy%Mu&>5BZKvg{=VYxDE6d9nK6{MGSa;))oT!cm3X!h)qmabG(oompNn zyYEj~k-Ii~NnvhT_8r9~%T?h#XTca=8NWGh;*_D8*;8(~enR$?1zFiynHkwb=U$hc zu^@X``iiv~+3CYGi`EX!UR08oU9xr^Y$;lvZBL#(WAgOuDf4Db`ew??l7cn4W%*H) zT5UF-N-15svLt(P@#>&!x0>&dQQv{;nu4 z$zD;Mm+yI6{Sqldr{|Y_18C{uq9G9c;3vwEAxoF69hyEoP5)$BlV4I=P+YX#qksR% zpFPL?=kKj}FMj>#-;YWo0C^FA@u}Z`#QdYa{`3?n6Hr5Ukyg9rrNE&T@4f0pIzR1I z&Ai6H*YW>mc8nWsIOb3Mnl`#?qxJ9VWAqH;@P&pk-Z=k26?EeC56E`(&ZVTTflz}d z;@J|3!0J@vJ^cT`7-GDK;s3x$G2Tmp3_+$*;(P!y;5FWxN!;CdFNgRdK{a?^2?i!;k?;5bA>ikli6em!m$&0Q6=OL+0>2 zH}y6iZSwd-&j~(WSJfCcsvFIa z#de=BWOpT#luT1HpyW&?bCg`EWJt+vO4cdapyUAqcNu=r)H5#hoTOT)$uO#$!iJH# z+St%o8i^BQP#Woae|L0<*QjcYL%L;bYK*J2s)nL3U}Id@>#Ul%DZU!ZXN9bcj8JI! z@X8FMrg6Bjv(bBfPEC$V!<`pVzBl5p2_fS|e^rGQvF_pVWXnR@#>U~1;ixm>u^!B< z8lE|Pt95tgM&E-Ysxn7pZuQ+gV&jOW5nDz?Mr<_#oYTDy8ht#FgAbWo45P~Mr+ba_ z4to0{t&26ytg~vY$ySYNTy@awiaNGgH378m2yunS2nEzJ&V4&1kB=bT_;z(u$n-nm z3pX{DAMkxQZdoY6{^9ZYJWj%W8I%(;{}I$W2GWlx9gD}%shrTU&UY$1*3tcT!TFXG zx}d7UTwpm&i&$3oqmkf^=7pByMITs}i7^^-$n0Xu_TxBCq@jPVz(D!CO|P)xrd`mj zru>3#{uwO8>}r|5F5S!u%P-Q=D@`V_7t-h3gmOboU8h?$S+*MAkt0K)QKKrm-iWjr zDsA-WP-x7UN^=*q46e zXyfR>Wq(0eGOICC9kc&mz`2m|Q+$5bST}86P2COiYa-JY)EF4D$PG6MiH2;6gqGGC zX&xi8v{%T$q(|CDE8?p$;!!5JhdFBamM#tXd-V!c7`ss3BlvuwF=B)pQB7#6z;#G_ z6racN`652M@p&AdFX8h9HTs46ZM`ybr8oyE(vl;|LMOQ{ zc`G?S4dwVEKB3b;(qEkG@9V#HM|@plQX~oK29%@iO^>9DbLqZx*t@rJaAdHs7gE?Oam1tW1?Uqo=R?NZu=kxtj#bF`C5}Je zIA+iO@eyNNWB2sAYa+&djkVu+=V-*(4*zog`4Qv(M$2b-BE}B15gx&)ZKS9(eKmEa zzh;u}!HP-#hbks*x^3fan{KO`WW0%XzSVdkc=Z|6axTQ&@Xqr)5p!%X5(!2^W}3sf zA2O31&U?xOnDdm{M$`8b$Mn7Cczxe?%z2hb|ANozZu-U!Vcsuz2M3v#l6fh0-q7d+ z!O-aM;`2j%UdHDq`1}l?H}H8UDC!>Ffa87m?8oPMeA?te*`W?}gv}l3Xq&Xuev3G8WrhN3A01iwtiLfq0^H(b6a&r`v!)_L-~kh`-J*b_KQ4LfuiU$HZP4>6-F|B-Xo1K%m_6`B0Ekl z&-Jl+E>F0;V$Om&6(?i=RYu*()p3UH@)$2Qwf2GZABNHL_9KQ-mn7xxfRyz)Qtq7x zZcqnbBem3l+(jM8I_f~~-ZQUboyeY!b&Lpf;)Oj8NmA|$NVz{p%IAeWI7Yqo)PdYf z9modiK<*Rvbf6>b=|IOY4xsJdZ=zj^#-f+&E;4(1&>zV-uElY^r@E;J(>-R-C+nq7 zEsm`x<#D~IB++L{{W{O~#JxVLU+?CUj;$Ez?=%oY|@z93PpZ(oyC|M_S9D z|BqkD(3&1O+L})L2gA+=Wq$^a>y`a%OM|kXI`zta9`E;jMc7aM2G484 ze(LW>TWG&16FRh?=`54>i*l6xB3;?9%cuRa-tEGET@LNn>9k*`bFN=?vU9yKcHVaA zA3|1z+c%ubJWoE^d42|MGTWQu$!d;ExjP_beU6lJj^h|}r#4B-UDN@tqYk8;;~nUT zIn{xVnB$phj!U^OAm#oXDdil;G3u>Pl5#I~fE%a-Dd%_xI>Md~boBfl3;yc)orPmP zzsY?)zp10=H;?N*e;4zE`gNZ8D>%Q!+`?Sp{19bAhx3E!dVY&?)cg?XYJTYQIX`5* zuZj7g%i;Xc>6{-roqakTdW`VF4h!hY)SM_XvW zC=)ugpXqvji*l6xB3;?9%cuRa-uNr%pL992U#HW4o!<7le8AxH^7#Yz+&w5Xs4^w; zjQB3c_4MXm{4O8B`TgWE-{mLFGcz_%&xD>8b8nhkaWdB#?(gyw)^UdI!u1Wlo2r{` zxy@>7uy`FL@uXX-Dynepx5={N%?jTZ-$s9x$LN2^{0!HUTy@AYO(SB)@p>%6>%I?p zZi2^{5a97uhdzYfWcQ+rwx;Ve7>e%x?+|*juOFwKa>&Gu0t84tY zy2&yo{4CVXG!hNZL2FdJL0kcR1@Rb(D}j3xUn}tj;6B9TC9VQaBAy`eM&Q20lO(PN z?ngXX;=6&bB)%@*n2fdy8;*GKv(*`ZukHiBirAKT zJ8*yE8zjCT_-f)CCEfu%fOwX~4**|7JV)Xh;DN;RB;E-;hw^afzf=}lRxACTsrS7*|Om+`$o zAJ>`vaQ@5HP52&ja2WtD9dTv}+2YI=@~AUMNFz8`NHaK3NGmvBNPlnvq;Y;Lr!qLk z1#-;Eso})SB_^jbiC0KWPK_WgmYAFxNxVv8a%vQDsl?>eXyVlplT%}e?}+7846iZ1 zF}wzj;Wcm!uYqHD4IIO3;22&5YhJrK)mb*UIn`Me`!QD=jY zMzBgqGq_PmD_AY0KX^B!n^R*s#_QymlT+6c-zhOUHIDc$iOH$)#6gM4sQ__>#N^Zj z;tdj$Qxl0dN=!~oBECD8Q!%{8n8)xMIEL52F}wzj;Wcm!uYqHD4XkE=`x$M_yO=H%35;w=)B zQ&WgPCows79r5QSCa10^zE5IuYAW&l5|dNYh#!!coU(~`#&Rl#*BJ8{UIWMQ8aRg6 zz%jfAj^QMR>Hrw#~?9C2O{vc>tfkVl>G2x$brE2J6xo{(1X z`$GDIKY(;|YC6aGK{@8+)D6TBOH5A9Abvz*a_UCnu*BrlOyWl+CZ}c*e^Fv`YBurX z5|dMNh@Xh%R1B{%<}thmj^QcnuuGYv34O18ZKpIn`MWp&QFCjf@hijOA}x8zINu zdBm!Q%-Yh^w-s{w!=rtlgPi-+%|7PuGvV((=D&K@KYYxentQ~@{3BMy`H{+WtS4mtUXx!F^$yB^OZ4@ajsFmRY(F0F=QFrZdK>wC z;gjA*F)#6afxImL=!h8Ss`GmJxAywi;MTNTUoY>Qh4n^p2fu(XgRszDALV)3pWYHY zW&W!3{&z6@Os3uXdKvN^LB1N~+bPm-j&kQz^Xc(8)qELfUpRIAkj9Mark$y&L&_V0 zy(z(?<-m{zo>hd*3}+AKZOBY^=3(3^Bj7kCpTC5?V}ope813k3XJCDoJhqGhFQZQq zdU`XRo5s z%ZabX?dlEL8!`zp39=t#zgN5ON`_1ZPP9il{c+qM$5+^+odGx=faBiwSZ5%P2ST@x zJ6!9mna=ztW!KWZ*ah>B;u>P9}~saeS3M z%^8W~kvQ&ePj^P+cr^5{wr4nFaXc331MHa&`>_Xbk5`++mqK0&nE;sp*%PwotB1p~ zea+z>_5eq=u{nH+JIefXDuG))oF|M+`hr_+> zOx51zF#AWgw>ixIk?m~`vwviJ4~N-5vc1h=_7BJQT(+NlJD2U(b6^~}G7g-&#-8m2 za2&w#Kzpt;5yun3yFvDRCkw|}NFQw9I|ZFU?`jFF9YD!<=7oz8ns7e#!aL9OnFz?Q9Nne#v$= zhdIAwI}e9Bzhpa`!<=8Toy}qPi)?3enEfK#c{t2|k?m{_vtML8o5SoE+0N!L`$e|% zaG3o<&Yz2Rlk4ZA-EE(%f4WAln#;GFSMEpdOh0Cx<0ucAP(!-ZpGRSu5Hn4 zp}3BL>)7&;&NV3p*0C8k%QZQ8V)}C8wPxZ-d!>^k)Aj|V+@ENyIFvZXv_{z_PCfE- z-3Zsl7+Cw3BXyp?0y@3No7QN%%+YH&ER>lv*-RW`uW_D4KH4MJ(ZIHOSSxn~=X(q> z6X`E4_z0fI+PN#{VQmWSLci7bK)F2znHK#h?LyjK+6CEg3C>-bYFhNGKICV+u-@*9 zd03A_yP&hL1$870Hxub^-D^rNMc#yArbXYo7kS0n8LWlbgY_@EZnu4H>vr4qG{#Nz zHEiF5H8Z+yw|!y_%UZb}#ck7U4W2b*O}I}_Ig~KwvLy9bv=IcWAY7V;yAn9NrGOoZCc}P$C)4H z!|W>ztoiASbNz2Lt$-bJ$f*H7GjW1l=`2v?)uE3JtOx1`y#aGfYocA{kS7DXnu(L_ zYUilCJY0WY3B7>}Oe@RYB)*A>u}xg^U$uND2E*8_a^zD0A2Ey-<_A^ znEWKaIj%|XdlSj$o=9i=$y@eQzYo2Fuf34YzF@!b`^(K2w+-$(|5NqR51h&R_&cT$ z9OOE(w!Wf_b5i??Lg@#|g~6XO6DQl7og|sIJ0N9!GWu=sEv7Za-YR?r=Oovm311;~ z_L3JV;KghWR?x-tOr2`u)*&X^Ty3n!Q8#4n2N)t-tmm>EMlR*EsHV z{Z}Jz`cl)HZr2KbB001D3TsLT_ZLY0b8GI}!LET^{`jeWUhK^d}tS zf#4wh6n&-kOB}nw;Fg#VNZ0%)Z&JV;@|=EA^PA0osR&nI@2*+|X#&VARB_~Oy?F&e`e-Qd?^c|)(*RFSXJyV6e`g$fu>O9Xe z9*Z%aXYX+aLx1d@rZwN*D|`~?KCfTq;rhjZKIUpY+n$cGA~#td()O}G$Oe7~X?{M5 zF`})!j_Km^O&J(Z@{_)a=dngu_$JzeGWO9gYTjm`uFO@Er}Ra%AM1#F@4OA@bbaKDw#i*L{g3Et*cZXI%Bj{j8)L=#^z}+d_0jK${^EBCu206ou5lG+ z;sW~_X9)UaT+p;`vK#Q7tjgtg1$@jn=uN!GOk8Mx#Yur)V1sGhZ0{4kMfX2_N&tFO z?lTi_vHwe4M@$1}IQG*q_PjQcWBr=AHeuZ}*5Mika^_uTqQ2JWwTbQ%`Vp2n8#dDK z%!S_}7w4;TZ$i1eMv0aSf5LKaM!Bi5bK>2m#d}ozKGXh(*D1Pu`jt%RkA^<|5B&;b zljuLS9~z1C>?3lYyqpMLay+xZF>+^00j_Z%uU~=db;xPp5`EBg_#o=fQ2H~K{%q*e z$8yfl$GG|Dw$p9bnX-p|=WMcvzGy4>Ew6Z|j$<4B6x)axgWmL0 z)MMRl``mWWPl<7Wt@KlYN-}5H^hWJHq;_oP5@g~xb={}<$ zOYo)nn|bb4Z{i=B=b$(7&%|%ipSa3~5Bm$#f9_4B@8b6*`A+{#zSIAbr}SgGeEKo^ zJNAv>3VacL8RrW7hjWGfLyogQ*pKXwkKTX3%NwXG{#kG0TaYbZ^CrGS{G2!O7nI-f zCU$?nub*v=S8Z)kZH-rLjYnJQ$J%oBcy_n7^Gw-D-*&d!NPjmL&);tS=6c_?ka;gX zn0eTfWJVe1!YA|J zbD7M4-&HdIgCk`A#RvZ`>Mec$h(&uhTymMH=l}#0!Z0qY5^Dlk%EsOb!ewZ);(D_dwcd>J zUR-b1qSl)+-izzaTGV6CaeA@m|Y`54G}vaZco9i{d2Xy~xQH#Yx6{k&`WolZ^Kw zCtDOJ8Sh0-wkS?A-iw@UQJiGF7dhFYILUY~as7p0 zxr+C4RJ>Qnk?~%Yj*R!Ja%8+$wd0QW8jH1^h3H%BO*!7z0F~0IwwemBbT(R}ue@#1nx_h<_{bB;Zow-?y$M9gDS{Zcbkd?B?`1U^l171G_mL z0CsbF0Bpt=E!vrVV;6X=mCX*C(%4@m}jyyjQu3_i|LcSICj^UX^&B z#C6P7xaUk+jr+;1>yyc;GLG>d1U@p%%HQ`N+uk(ivioA^SB$*E1m7fDP`-9vn_#N^aw z;!7kZr?wDZ7R#v^USrH-cnuuGYv34O1IO?hIEL52F}w!WymoV{vutp4ss7p0xr+C4RJ>Qnk?~%Y&Ozm;WV~0kJ+lT!~850jXjdWd+q#N^bN!k6~k+c zc?_?CV|Wc5!)xFeUIWMQ8aRg6z?#=?PIZ$FaUT>6`R9@c9 z=QN`p>tpl~&(#`uZpU%Nc}oWoZHm@>Y=S@V;$?!d~QHJg3le)qxrm_ebU~$V?$QOGvGG) zLEHJB$P>LEQj9yw;`?mLecc}SyKN2d=2paV+={r4+aPa){80HW)?t97IY`ez`Xb0h zi22Ba%mcpF-tSOX*F)VUIJX4nK5Ku&S%%|fIKIt(UaVo$c9WC0R4=l z-w{51;bVQ0q|WnYI9G;zdG-&TH8@^_<9z!^QJ&W>T+~PSVd&Ej%b1wv@OB4`>^9?dKra3I*Vh)FA>G=p-#hM5BblK0%VXhC;{IA9QApdh@dHduTG>5r9O!hnK z#QHGJ^*YR1!S&od`YmvNE&6>e#%PKCva=q?>oKlN?f-V3QCwwv3~;<0=gOhC%>Id3mn}Ja zILvij{4NmuVV^@+^0+z7bzYLU&0((V`Ve};2cyh+;I!m1#un?m^x7=yM9YGXxAS=I za<}7n_R9HotbJ~K+;;H0@NBRn`aM?wzTiF3kH7~z;DQ4#6xgpJHbA89=DVVCu0Diw zA)LG2{+aL%`wJfH!}mwy+}=u@s|3H6+pjxSIIeFYT5F=MK@o=NiYaG3s%zCg!W zNp3cW>FZ?QG>6G`**DE$a$EMr;V`)@`=U8aF3YwzhuN31?ag8KrEKfrF#C=^#?3dk zoo<^xZae5>&IUWa`Wftbg7HY?H3-+?sdxSB__buD>-F9S-sKKo(!lk3w|>h^Tx%b8 z_8u>?$c%>InT9~JkNK(i}*G5j`_JLm+St>ecFY2AAN8AJe=d&Kh6cN zvt+yYzIYM8CUr&p+D%_G6FDc`F)8zrm+J*NKNyq3Hu3%PB7RNP&2@R&zP5F{?c!QX z(buqD#H6UY-S)ZP!OS(r+9t-Oux`B`+&IO$x!zLrE5=#GueDRRyFSLFi2Bre%iGWo zIo~m@a{B`(8^?=&U|M(CN5ndDJzhHYB<~e7(Xm^c9O&h}gcqpUM@77eYf@LluN8q8ciR^P zb*yVX@)iBkv^Lq@gN${hJ#FLH^01zo+@!DIdG50ye1&>VXk+ck)=gg_`c?aicIwtKZ0>sLGeo`c8SWUi zm6)^l*zrLf!?pzdxAK2XYqQ-W$aUI!4BN)Am4G{2>`R007`COzTk?C;+G;1@{jr)i z+Y{X}Y-Qlg=j@)rWza4AlWE;+Cknr!`%}lTt$|&iw|fT*ptt6)rnSvZ5`IN;aeJbU zVOtA3?z8&^Z-?I6|1+)ac5;w0Y`Q%96#7T)r|47I*SCX*^i}ki+Bb3h3c)YV3HnDp z2gscwaEDx{Z`2%TIV({P`;Wd*_ZxkUn{V#=KkmBecRH(X`k=Akwzyuw-V^XM4g6kM zkFmVp?jPiS7D$Wkd9fUQw*H@{wZk3|WV}cg^6G0E?sWlO5yMuFF@C@v7%YT-`A4Qz zV-FU7iSs`i!>0DSVB1SDR^%q@6W1D2Ht>5$^OJERw6$#vTN%ca{G@;4dG2!|;zVQ_ z`{)}rZ_7~En!ib&(jU=&?u8*@*iO+N#)b$#4SyzL*i?Pwi?+#KH+_)kYuG1Z*iNxN z#)Pmwea)ix7CByh^gE)z_&tIcwsP3zc)W=_?UdjO^a&QQS+#Z=-tDByRo5mC^eVf0 z6CbqGgGJB_dA-&{c82gRy8n4?5`td!Mc%}R?M!hku^F7(|8f3G2T1 zLtMu|Zu?hk44du~`Vp479X9eBW(WKZxmcshtwp)KPKlO_>lBvz5Xvovot0g@B8H9M zY1;qrT1A&nzp@7U>!DBoqwky0e`-Ip7U$VV30|^)tH3dGr}_`L&Vk(Yzqn?H z+zc+!2W^E9qW--~f1A?Z4t@Gq&Kdd`H~-vry6rks_R#N~P4>_iO$Wcl_0UZD++ij9 zuIfCmwFf(Fa6h z*r3m65d5yA-{N-}=ScwRd`^%f&&S5FVT|dUXuCUxjbln*qhr{3o_p8`-z3WuF>L%E zGbPTYGu&4$2jW?(M^VQspbZ zwFc?iuJl@WKyJTU<%6$T3%p~H*W$I29wYj%bx5yC^;+v8YlnHQJ0Tw$;f=3_*n`Y~NT{TTfn`$ljDzKFhzbA|oGxx)S- z$Jrn3NA|~mGKQ@!SC40RTRYE`jr472yN&dB<#@*Y>APb;eRpgbcyKnqA6H&0e6rsG ztmk9f_r{*HxkPc}-f#r|~9GSv@=Yq|veawhgo(=khoVe7#s8&HK9l=}>GiRlX@g!L+nF}VHP@f+Sr*-k51fqQHE;~Cfn#_LtaH&IoucWiKT zsJ@%7~OeXYEn zN_@Y>oLkd~ACQ=H%O>6#Z)i@*7`9LMETfN@$-a5CwJ&E8f3dYMXA?i(+Lv>PpNRES z)0y_5)S;icf%svG>8EB8KO!;x)Q!YpiT@oN+0_rdk*VI)%wK5Bo)K<++%%= zJQ+8^`_jA*Ml50`WV(o9y9=>X+&@Xj{M0^&b*qeFle)*mu$6!0ieW>&(Rd%|Gj@Xa zKHYmD>2=PvPs(-90qEhm(O&Mwlq2&rP$Eslu<87Iy>s1@YQ6Ju$~RPbNvbTCtJgZ$ zKB?j!+9_Yh+~{@Ab+AvwJRGNqA_eJv%MI$vOmcfwx~XyjfmI;l-XVk8})g=SL;V(*l07>F}ID8qy8~5 zY$&_K7&er5S}|-Wo6ozBiD7&3SWfSM5u9GzP7E8;#B*eME;fekMa5^S({T(N>Wy+5 zdHDP|CWZ~17cp!&2hPUEuu1*89C_~eF>IKdVhz8m{AdiDDmO`$6%)gTxhdj!j#Ivl z`O=(jfUSbl?bH)c_0U$dF*b$`^$0$9P)~HtKKrD-7`BL2fiF!X8pnomPb!WL<#iOt zmWXxr?)}4}acuNo;B^wtA8$XgXdD~rp>BWZwzt0+*RhG%0G5g8djp}@-hN}$6R`#? z4|*xk)BBH++hTsciE~?}KTgAWy(bydgin^|_NU{V-lI&!vAu~nHmUP`2F|s&XIa#D zzX_kgcw*?&7x#d^dyg`v)A#1c^9>0&@7}Xa$E?)BRvoh<`&sT|7WL(rBO+!+mbb4r z%IS?bxwiY8MSVEtt>F7{>@t*HNyo7(>d!Hk1^-pOZrj{8X#Us1PQm}<)Z0;iMEyJJ z74xBkdi8v$r|*JKABlcxZ!fuM92{Yxn%Tp z+kN7qacn4)acl#iHvoE!=RGEljb%Yc^7+{Psy??(?z*)v zXn=hpj_nld(>|a9^$8!)L4Dc>?5Dqn514|!YH!az?E}~j`haQBYi|!f>IsgpJm}4U zUVD4`MZf=89_IFJ=(V@UU-Y|=`$FaHN*4*)F?zpt$#XoWVaC`Ai$BS<|UOZFV_$98B(=lc47&3RP z7}xLW_^>k>2gG&3XS03e<=JeXxUT_!W>4a?E7GVb^o zmeXD=49jUR_J!rN7wf`!I<6_#aZBzvCHAw9M{>skk()Xe>P*I_kelao#sVyosX zxuauk+_5#}PJ1ylrzJ6njDawy_rErHXQTTPshFYmVmCU959lZ!;6&^HJ6i95qCINdd(yOx zBcXqf?tP?UiFAyJI|k!yU%Sq^^GR(R=hY{*ZTwxvc%famZ$RmerAm@m>@r9(*__G3WlP_|c&um{gTU{3>|%ebF4@%aq!^SoZ-vlQN+ zq(nJ;b0m&F2O=L0r9(-bB&9w6-6 z)+O#=;hZ1$MtIEURMg{hrv-Rdm$(o1q|(E=dc-v{jn90jUyn3>f1A&nM0rU25Wm&i z=H>I2wrSeFh8JbMe8!>etAA0J$8*}gdex2wv_sohuiC-;{o1|;m9`&g+CI0Qw$GiW z?Q^GzF~xHL!7)g|F^TK>i~?A4499|Fkm&ne`ndunu;iH1F_aGTBTpSAb;Ot|oq*EG zQ96{=5%%Cdtl$`=;F!ewdH*=dF&qnyK_XxDo_CJK`vk|7j-hmzA9)%mkw(Kc zj?$r|P7KH1R2;*-Ej@;{xPKxzfpdBcYt>lSVXXBS)~d1QJ!?INbt;Ybll2(7_4F9J z)ASg+)1n;vgW?$O8*2M_Uy%0k{+_mv_wqOgc#lro$NN-559hRfyeGyq-s{r#x%ISt z?lf(mJFTBpcfxxSdQbR7#J=f0;IF{3-s_#$-FlDr-bin6FL7Rjx3_mV_r%rvE%RDM z?-kw;>3ZK?maq2)zY^(s&t3X5UIXa;<9pzo-Y5PN9P54Id0nUXf4>yzde8UEaIE(c z=QXz8H#`C9|EWF0&u05L-_K_I^ffKHqxTw5#@Oh+mB|;qhxk=U*Ly3IFM4n4{z%t* zE3=$*#JzKWU+#sf_w!{r8HkVP-n|2muJ`C=IhkFL-AkI~j6^wY_m0;0B9alW&OORG zU-Vw&>}S3AIOmS$Jh`d&73bX1oF^YY?tP|N$GKoDd3vtcsyR#U=)J24VO;fI)#Ofl zds2@@dV6~!k3)KUdmsmpe!M-8S(bZ`W9pu0FJtPr-Mg5))O%sm=jyq_acFN(=?tXn z{j50`^}ffMNZ0!ub1t5#{f*CM`^oon*?v6-(lFjxTUz%_j=2}HPVT|o-d@PuC-y}9 z?PlWqiT2k$(SExBse2Lpo^bEWO#dFe7f~Sh=GJ>IyZ1DvFL3Xze75aZ%j@z_YTG!k zKB;XJzss<%wfLO~DSjtPT+e;BqrVezjNe$bZTHO9dt!^El)&I8K7a8$5$%ZnPE_sS`}4JZd?!Bh z@?H1ZKE7{Wl!r8JpF2(4=S~x2iudXZjzJ2JNnFo&{zo~6W5F>T8GmYEjpl`x<242! zvhXG&L~tH5yO@UA#hPZ~I7g%nK4fk&j2wTr=@nMow8`CS$|ra8&tMs5SIhKu>1I|~ zev$5B8n74A=R1IMLrpm|b85QIwrb*L`D!em6|%;T4TY|~w$iM#YOI;~Utyonh@4_n03Ayzt11?SNUuF7WBuB3x&py$991s+-Iq7GL2pbjXoaZ5#zjr zX2gU2gbeK8W~H0NeLaRbW{e#%su~R}`nK@8KWp?D`v7nNd9BfdeGt>4!X#2=G=E zZ}bcG+uF9TCi<%@{B{0&&}*Bi&wN)#t`zzF+pH?9U%#!88?7s^+{$!y9_4K_k|W7N zFS#yx>rB`lLO)or!x!-hz5bE@;(UK!|E)W^*ELoc_0V||pFL>vQ^&Sh_YZW&L1%nq zJj#i921N#;9OLNtL4(lty~zJGKF{FuWqca&c^03q;PchSA(0`X?vzN1s2dozT1HAJ z<%DggKHHWWNfr4-+f!0x+f!4wGF_b)<)uZ^gkD-*+SaOQ+n;KVaNSrr} zA2h}Ni0ecyJs5rwy%Ja56#mQwab2+IPnTB>qYKu(=AG}xnN9-s(n%OzWcj`0@LuJ( zgW+C=k#M8mNthRhPdCSd^WN{oS=R8l?!MvOy7OAydcm9UYs&9=6MjSaqBr4pl&^Rb znkirPCj61|pf};qly7cHGT{E$3tG@caVR6+iUzD@-M$++$8TiTzYUP+BbOu_6Pb`q-z9rN6@|q_MFLU zapV5ZyvgVrw4wV|GpwWDl%R+yKpVT?_>W-sc^}|&B={lDfApQ^E-hDmaM8c6I&#q) zu(jn`Z^B!YU-KrsL;0LH;TM$OVjCb;AGD}GXiDB|OaMl1BO4kGyLbNtC}ajQ0=Av=*xhf$@2lQVT~Sn)y0Un2_VU8)75ODg z)p6>If{e7Z)FmsHclBSGqb1MaQ%erDGRc)vobTX zht9n&J7Yoiu=EvcGqTf%XBMp;n!TtbFS}&zy6n<|qU9;2#&n4MlF>;MZpccSK0PZb zeOO9bN?Ou@Wo2b6OGl@sE-ffqwt7*@;^Gylvu~WVBDJt^&59u_ONwvLUtE@glS#uy zEgrdOamMgrxx+JtjT(_ZeCW_6dATDNFG_q@^zzK5|L=i2Pxr7G&8u%L(~Mvt>a3FyTY z`9;W#ddhMaK`ukOJd)Bl^+C+GC(oWSd3yGgc{3*co7q#A=NZ{q3ujEYVd^B^Ij1i< z4j2W))J5n6CEqz;pO8||E1k-`oU1yUp1)cmyiojc{<5w(38V9x)AJvZNU_eZ=dYH@ zN?)JX`Bk!k-|(2nt@Z2qu4R%^)cLjjTK2(tuPeXyFIrZr{93RL?!T)6n5N6uywx(` zR>pB#oQgE&*Zwa$A8^Bff1w{q8U5eJfZ;g7yleyi^gIi_E%8bzL?7``_n(?xcvpx_ z)A~o-L5@lzPpPN-Pk%41N1EuwZD^f0yK)&qy2~H z3EzI#UYBpJ?_J;7Yn`*c9h)?LvKtB&DaZT^JHK*5!8Juua9wnvNu3&D+Hj{m{_o&? z-pRs!mQCf8{DC|aR3@^0kj`XjGe}AkMI8(jiN^FlmV*k8bFPMt)1#FP2Mvm}%28LR zQ*gf^U9JtPan24df^sh42Pr4zrGK-2TUn<2_W|1rv~jLNIo+P<^8qE>((O@U{%a+F z6ZA`@jD0&~{U_YZh8%ar#LH*Sb(}3vmaZyWwyrdHMR~>Q@|ElEzVqSS(u$?)-gbIy zdos6lapn5bmBpot7niSF_gF=FX<6}270WBjai^@T0%^tT)|M_V|Ju@(YZjHRd^mS? z>8kRF*H%^(U-?Mxnk7rtl~+0s=dN46sl5H;xf@DXt}icMv3zye6DE7{qV-FblvlKF z=26|un$;_BZnKrEq?HbT%~m{VCHt@0$~URQo&@yhbm#ideD$!XN{ zH0qL(T2b6;q<(T5mB~o0C~iGczdDWD9$vGy z(i*8Fr&Gnsb>%HpycBXyqmBbBHk>$BUJ7+MjjEowF&O5u<*RJ9vQML~0~?IRT2+;I zI#u0hs#?81O{=2Qs7k15hAc(RmqIg7qcY)U&{M0nlGCV7$ho04IbRB`J&o$5r)$zD z;Y*?TX;daXUDIk7z7*PZ8nsDJ*PPhX+7DCR={*Y6@pJ8}+2uFN$I45gdeiF1{?wtH z^3s|*)Mit`2jEkISEq+acYQB~e)88JrN?EIW|o$<4AigM)c*8rqFduYrL}hCuhg10 z%j~H$Qcr3xxlWr+J(Ih%V$J%sm`zJpm9MTWUb1{+d08di18?#%22v; z`O?+xzj;DBi@fA^IGDStbYt<7m8F%%H?65y5me553d-=w_>J-7Cl4+xo_xiWamAD8 zPb{8TSWrB8&gI1g^NWY%ui98poIkX1^~S-)i%ZuPFRCajuGn}p^jW>4*qbzK#-!=R zljqKu@SWVX70Wl2R+gvKYSG(yJa^sFwH3vS*Q{S%dBY>Q>uz4PXwAwuCVli)b61od z%w4~F`I0pit90**Te=teM(+A`_1v3EHU%=64#N88WdFGC>KMi)vFl(=66Srm5u zG?Tc4^QRKxbDck}g>*VPe~L35co*a$$Xa6LucyTQLrjPK38ZtL^QT`?qMjp=MUWuP zyN4hISt2l<);`7U>4e$d#Zef$|i(RicdY->z7PT_iIOT)Tkdu_XQ$usWl zfPoA~Y$hM~^=4dE#z zdrE>d$1(MXlUaM>C^$Z=rdlP{?Yy3(6wsy%9w z+T%Kx9Cpt@8@tq=BJ}SladpUv7X{b2_uaU;K8kebyW1M$ZkC@oXM1DSp~&~zFN+s( ze0Y5}ud}hAgL>lbpJS58LHf}kkK#4*Y#!Hkp655Wokzy)^JgnR{`s0}cfRtOmQp&1FjUr=)2mhcX`erHPu1to%$mVE+%uFL zX?Lbut+I4FMy1gO4nsyF+fi@4@r>ze&qOb<@9^RA_=pjk&$tq4GlH~{BjfQ=qc)R! z7lM1DZ*oG-4hO!>g-slcYnC(hXS74yJ=5p-D%$N!ybpPw<8UmqoZ}56i!S^FhLS~{ z;lVY>|2f=?J3qzW&l+l{&D~Rb#k@VqY4i6uFj?}7tF=T&?nuU$?sf7)PI772xC3W_ zw5=)`+2eFXo!}nkwi8*pG@jM9YrNXohx#7G-ujRzk2*Vg}=YU-!~ctCI{;Fa+A5bJz%X{ZajDApyVKZ4^pS)CG&KiyxKgi?-i&= z^vzG^>wEc;eCT_iVL)<#))!LiYj83Y;Ll@Bzz1;Lc?uM&9uc^LHT~~db^TURa^Yd?AxBa?p_^X+cTu>dy;4PS- z8W&7huy;XHHEPZ*Sg@Cpe&R02Ev#`={&uB0hMY@Y3q@Sq>it>>d?lyU)<})2R$owC zvugDHvmCgfZH@0S53a`*@%ob2-uoQ-hk38l;fJ1!o9p**-7*$$HkBk?0&v26=PG9&mY=xRm`;XI&yI<&o>z*kt%*A}TD6V!yW=G=qJh+{F9v*HuiQN!dxBv6O?nJ*;Q3@yM6h#?(y!MdnO;MMpg7}JC-I@wUa~N z{9wa#GvW=&JOs-B~I-JYvU8vE9AV;*fIvj&pA%>TMs%; zZMKp7i;S!*G4jA%@Rxb;akrOwAonp3WG(YR?tg4<+j)|YwVlUFB2P!HPkpwLPZt?^ zu*Aq`wLZ8;yLHS1d4PE!>zN1gX{}Eid9*%lr9@#xK@vv>$*@yG@?xY+R#1G0}+$IE;K9p#DSnr*<6eWCukPUNBfOlO_cU)K}puhRqlrGDyf+Pz2XFZED= zNvHmj&bfZc2c7FH)8}oQ{vnP}Ny|5Un0cP_LFf4=sFQoLIi3^DaU=H^8Ch3iq?zNm z#@yMPZR9@Y0j^~pNHfRV$fM^}8+r5`FAV0mkxv&Hd9cJtGskg_cI&c@Jit7_^~?il z=6D-!g*L9&k>iNO^wV{p|bAIc&g}K7{q3c8* z&JU)`{MPjZ^FyZx^F!+A{4njlqUVRy!}%fUoF9_TF`bV3Wq!}0Zo&MfZZf}_N9H%L z4~FtDrhj7o`p_(`Kl2|%U#P#X6M3jV(`A0^dIJ4*dZ545PyJ219WSPTl6t7Wq*H%M zZ~a-`XFyfeync_})<52Vb8hlU{aKFZ{vBQUS>A{9`_U6V%l~4TndxP^bna3;=ju7t zALKd1{w)8CZG6Id;rRxiP1_o;xlT1+c?O?@44!gLO?4z#2PED0eC|q4LcU{2dk@c_ zN%!3t({@ZcG401YPE2!tysc3=<9-$ig`B7pIvhQtqoXmND=!AVSa2ldbOY`txLwHU z4%}Vv86hVdI9u>pA*Tm$55b=cIX!`U3dXZ-FW_Fl)Sqo0gASdL*6F?Tv<}Ch!}-AH z3x*CC0AC;&I$Q{RpZgpgAcRI%rM`h7OvOf}w-vByfrk8Pp-g zhYad)4*bI*_g(A<9QDUx9hP*P{qb5J_a|uS#3pL##wKa0Vw1JZid_!roD=w$_pqx5 z`wkF$zwct}J#=`W;A1Yf;6sOV1t(o>(T5HX5*!LUdBAys!(nGI@L<7_u#*p*FSuRU z83H^6n0upoUO@+&)5oBL&FN#%!RGWa=wNgD7<8~XeGEF-oIVB}Y)&784#H{fy=Gbm z?(Jq;hw$^;8u1zB<0BhfI_h7gWwSp^%j5oREuGjLE#25$EmdrumRYg+kWRRTQw6Yb zf!G}O911*CFzi_fTqqd!90oj0Fzh)Tc(`ELa|G}R!La8@;E{r1&r!gmfNf6Mye6k? zUXxQcugNK!*W{GVYjVowH92MTnw%0|+nmaj4mPJUrGs!Pt~qkl#{;GIQ`}5Sy2t%3 zS~{^BE#25wEmdrrmRYgeAZ<>KhK)yy&0){WfG-madyWAfBN+A^3p`dZ>{$d{BpCJ_ z2Ru$N>^UBIykOXK0`LT2n^QKg$tj!HOZGtv;SQ!kNeMS>BPRLr5pRc zmMZoST4u$50BLh-I&3^$Yz})~0eppE*mDN(48gGHmB3dDhCOEj&lC)MUIl!WVAyjO z@GQZw=WO8Fz&59BUXxQcugNK!*W{GVYjVowH92MTnw+wEO->1~ZBAuM2b)uw(m^=I zFG=`3^|F@D{wrD@_kXIT6Z@H#ZtSp@D)y?DS+Un3ZBEUBjpvBXVb8h1a|Oem^ML0G zhCSy4&le1PUJZP;VAyj3@B+cG=R)9xf?>~VfUg0zIc4*joU(aMPT9OBr)*x6Q#P;3 zDVx{il+9~$N_cH^DpNYxoXV6A*Icg}>s8C1(}Zhks^zol1M1SYp`!yHeEl2Pj zvvOT<&+J9<`%r$@?2ZeieFr@W>z`xH`^PZynmwu-?78T?sAqf4iT8iFy``=fe#mR{ zefE!EI5Ds6Tg1Ds6XPJF0wZPc>R$#98)innDz+CUE>kN(obG@&M z4UPkIy{}3Qt_J3MUoAFxGcects@&i$z+CUEr5&A#!9G^i`{6WB-wJGV+6T5d9Rs#G z9S62KT@7q=dNZ)i=`Fy*>FpEE-b*HRcoClq+{enK4rjmkG3;ZV9rOb><~mm`H#R4y zxZYPQ3?`?z-dC#(Ca1XGS8EI=r?}o%HyTV%alNnB8B9)by|30AOippVuWm}`)BvUl zuVM2vUIVA`8aR#Dz-hb&PUAIj8n1zc*EXjzrGw3>Oz9w;itA_d)7;0hIK_3Yy4l#A zoZ@<4-C{5~#r3|r)nIap>wOh7n4IEzUsW4SPI0}jwirxKalNm$8ca@cy{~Rd=TsW6 zVe>Rz1E=vCIE~l9X}ktb<27&^uYrZvHm5SBgUzW-=^&i?Nc&h8r?}2lw;P+2Q(W(> zI}9eLxZYP^FqoX;dSBgTFgeBbzPj6Ba*FGHb+5tX6xaLeK7+|AuJ_gb>6}XAHEf>7 zYv43q1E=vCIE~l9X}ktb<2A7G+U8WIbg(&6}XAHEf>7 zYv43q1E=vCIE~l9X}ktb<2A7G+U8WIbg(&#%o~Vwauwa>0omzQ##;xQ2IAiwybjcy&4_0N!<#W<2``G3jJOk zaLu<>4P?QJH`P|i(YG8`+aM?2(Iv7Ia{8A>M!o z%1^!Dl)vg>Q+~m>O!;1O|ESv?vi@j(qatS`W#-Zh&N8 z**5!1-Pu>_&c0H2_LX(9uPlGz;8!9n-+kPx5thI7syB7t2bCJ#huvs&A6sp7zx-~a z`;}k0AdBTM8fwb-xZITQJJ*ySywsE*vEGy)cbh3c^*&Snsz*%uu9IHT?e;H!Q;+qi zt$)^I>^iwhI&NXLs&Dz=Z*npO_YhSJEF%f6- z+8_QhzT8OYyqi)yul)U+nx`yZbJl+j;Fy`TYhSHIxuYnz2j%Y9=?hcb`LN~09v`+` z0s5C1G0eW#lh?;LeQZjc@AdOdKT*yb;G4c4NnGgV`li3A z$IA=)+myJ-%MbdCdSO@7-y?~xUSZJRrUb{w^tUO&@iF~vN^pEke~%g&z&i*Q|p>wexGe>|?ogLnPCdHzIPPel3v?`nTCt|#Mq zptsPUg6k=`&h@VKr{Q`U@(=Q^_ow4}I@0sJ5`PA+XW)9Ux5%G~>zTOD_saZPxSoal zL%b#a99+*qdV#mhpNH#txE|`=;9rgFt8rcEt@Ib-dLi-;^Hyu_oB7g|;QTW4r76Mr zW#-F~1m~BTFHH%~FVoMa1m~COXH$an%k=X|g7eGtvnj#(W%}8a;JBE6HYGSNrk_U= z92e8irUb{u^s_0!aWVaDN^o3EKaV6hF68`YQEzhnv#59LGadhXKC|s}ui7u*vqhck z4A+$4440Ani;S!*G18pj!Zm(pwl~|zear(~%RG?g3|AX@$Tz2rJWlA%Z(VckHPs6j zsK!TfzH*W?rDk2#9%m=7#knD#tCDkgJVO&gAIp%=^H2`XP&y0Ec`$r)`eO7lH#*!~ z>t~y^r;Cg{7?79HiP5nhE~hw?nL#B*5=&Rv$6JkMT?Jl)2+YNS``%lRRNIj~IRk2Q3E4Cl3^ZCgKmerlsR3uWsx3uW0h?UBU!O(*F0 z2W@*co?2e_o0Yvo3aU-mC}i^}f9i z7Gvb#ELuoeO``=wP4d4p@{+gw?7RrqjPn&R!!zJmJb^OAwD z;5g?T3rERK`U>918ENAyP%q~CUivBWQghU#?W10}S4*4shRw-Y`U>_7=c_5=dg{h2->#wk?=Pv|ifKXHEbFtc$TSK2hz7&8jS9lNj zeUP_cg{x+G`?ODyv7}F-UWL%>O7F{lU*s!XHP1VseG=zBpI_$U`Nctg%+)&f zJs-9rH`yN24zNAQdVU58KOcpSs4Jgi+F8D-0QMw5>6>^T=hL-sqCTkOY5GOsZ2{UU zywUKKzKHtc412f7@Z9oY^*I2$X&((=#%C94k9-lG?6&ED^jJf`B%W11YzoB0>38P9?~sf0f_kq;y?jPV)eC>ZdKaSJLC|yjZLZ=T6+h3! z|L{3Q>Ze~RME;S;Pya)|!nio}pW=sx<37iT+$S%`gO{}DL~xAUnYKKd~=}QDOYk zDRgrJ-Lg-i8~qgfi1-%W=%<*EZQJ_UdeBd4J3v?ZsiIS~%k;lACiGWqSDu?n+Gv;l zN{?aSuZCberrqx%G9qk@xnBT0{YzMXK+Z^ToPzoTzg39znP0`419H|QK{@!E;lOhq zcNN!FVk7#m5lEl+b*w)iul|OsMnNw8maD=Ml|}z$c+s&NpM!pd?+Lm*7mofKQhh%h z{SEPt!qI=Fd?_5IACoboAL|^+%WC4im&4J2XPLv{=zkEuL4RV^4IlOgrvE$~rSIbB zCHYSOOup0qlBe`zQa}9|{T;_fa|OPLzKnB)S5KvPYdt?H9qHRX z?K;xmjmGzH*M4VHi?yQ3~23tgzo-*#@-2+QBqV@QPM?;kuY!t$lx zH|3YVX3F3ATT}k#|1ssOBc}ZA7n<^SUt-FCahNH;`0yWfyX)RLs;KXl^Dorx+&gqg zgq$uw8h;}&6q0={gk)dYHv3B5`TZevXJ4s1`^vi5SC*gu)GL_x?CYzF<(IzvregUu z|7>)>>A#KcvC#Xv@3)fyJuZx}{DNzoS*A~uNRCsKgGbsf)VGZ1h_;n;{22XmkLIl zpGCln1S8JRV&KKVsn{~1ki*}I&5_tI(1E`Zn=2SP@Hb-f1Vab@Mr^)d=)m8IT`d?o z@Hb)$1Vab@Mr@&A=)m8IT>~s*iVx61b5iUD9W*Be zLkG=C!O%f-QZRJToD>WlG$#c^2hB;r&_Qz&IK_ty>X70?26ZrJv^FBv(54{PP*o6X z$PZ!-#eEZNXtQr(4b}K2*3dRTE!NP@uCo&MT`Bgy#dTHzuM&K#>#PP|Ef~M9Sp&RA zaJB2K1zsz7i|gD7e52s4u2TVAA^0}eSqHohICYL0I@p{(1|4ipAA=4yr;kAgo72aj zgU#t<(81>PG3a1(`WSQ&PN&W>XHbXKIpz%NV9v&G3}Ov!3Stda1+j+wAl6XaH?f8` z{*mDQ) z4#BYJ9l&=0+nlm_O-|XoCZ}v(lT$XY$tj!H8jAbh)Au*~Ce~1mZ(N~+XL169~sJ{=+2?BG^Ks6f7SVP=1P|q36SVP=1P|q98SVP=1P~SJ0 zv4*&3pnlM@Z^0NlbCA~jsL8`vL)^z|4$e9<))4oxnuD{Bj5Wl4tmfdXBV!G5AFDYy z>&RF`+{bDT&N?#I5cjc~gR_o|HN<_aW}J1DeTz)$Ao~`X)Is*K7;C5uW21g#Y{6JV zT=T0J3}&n$uKCqV1~b+W*Zk_o1~b+W*Zk^bgBfdxYku`pgBfdxYkqauV8$BanqR%v z!Ux9KAt##yPBPXIIoTX=lCg%!$>xBQj5S0~HV2$!tRZr;Ip8E?4Uv=00Vf%2h@5N= zILTN;hN*xV~xhS;*}U%^@g$cWx!nTtG5gu1I+cl`nkblfw|sS zzcjcAnCpG@&jybJ=6YZKi^1c8x!za5HFyFr*Zb<&c>5dT<5Ca8Jm++T<@#j z8%$1dy|4b$U~-DOz9w;iU;e%YArwHK9MK9Rz`>L10Jo#=DX#a`rRkhX z<27ua#%tg-UIVA`8aR#Dz-hb&PUAJO@Y?27rgX45l_?#BQy*y`%i`2-+PJT=IXT6B zEY;s&a*F#{YM{a76!)>zAcM&%?qjLJ29s0V$5KNKCa1WMrG^?zPH`Vg4NK=#8n0pV zG+qOz@ftXd*T8AK22SHOa2l_Hh1WKxGNpsfsZ8l0XM08d3cm-g9kIE)oOO$4S5-Cf zH^L(x>h5&Xzv-^$@A~)~EMok2`{`_xeh}}>^)GX-eRXt?iHP~Mt?_k~+dE=Wcju~Y zjm(3*&q_QL=4DKj(YI{MVqR%i@!SaV7a>2sH>%@r0~zy&zb#~&S@>-$^X~1tx%=EK zlMlafJ}~i)l>LLltS>vLi}lXOZ&h_)`Y!7J z7=AmHN(g)9a_6$mxSq!RbEc9D^OJhH3f?gSl777$?H#x~T!5O`Zd% zifJR9X1gg)qYOB`moaIC$90%%n#Z^YKJ2q&;FOsBwUoHGzjaI;;dDLbrslL&ztqFN zu-@#TF2>doKG$JxYCfN$eu=pwoUVtin$ss~rzmKLx}uM@Y)AN9k9IVl+h|Ale2`;u zvUg_3RrQnLHu*u_`F`BL^j=(TchtrAc$53mAN!r&4tR4d;)q;}m?GChUJvy^k?;vLfYQuQI!;9*V?e#wPw0JuUc&von8%v?LA$nYwhqGmTIi{{ ze~Nb78V{KZwP;t*g*Muixll*H1q9=n-6ILk4><>DINOxq zJmGKpZGPDOJXOE!ew<2QGv7|s&(_D*gP(<;20c=rbIZXO$HXG=!37^&Snj=oxD+~V zf02=OaomgJ-VNT*v~M`L{Gsk8=H7wLxVIVnTH(Fw*WkJa`Br+br+h=*a^oA=4t~?K z*Z7a71br0SLE1jH16gZ$+mxVx5}!~H9mOXYE+0wIM~P3UgU`@D!PK$eaJnf$UuF8# zl%TH?pHL6KrhUSP)JJ?l9ef>q0s9Y~_uD$zZHq6chko=0AGZJE1M1P9_5p3QCqCdH z{XKlZHrQetY_ZCF)8CHk?YLg;y`Ay_><4|o4&2)T`>pYQ;qS!tPUKtb{YvN4pDnBp z`R>HMJ8|zu@7Mk=T<^klh4&l%nWjIhSRdy0J-Bxd>RaditG^r9yOFQbYijxIV|~cC z2lw{i-g@uf{k^!}i|Y;Ef3$q|u|CZ02XOBJ)OVBj2koN_7n>6FZSow)F=jj;o~1K< zJd&Wlqc4ycKZcu43HmxSHcbg~-Hc6Bg4{M^aU?-5o3Ut0kjtj;O$m;r>3dUxV`=(& zB*C$xkFoh?>uKxs*VcnR=F_0Ztd3u4r2V3N2H_b{XE3)S9EqKjgLH{C;B+*x3V1g3 z+P`yYRAMptd}{*;P` z!ZYV3-*=<8cz@RMP^eGq*hzDqaHHfr&-L;?Vkbp)>?G8SIbJsh_43Rqxlg^2c7S?8 z*3ZQ~o;&4S;2B@`3$c@;I(CxDqhlvs{Y^K@Ibp{`nTN7GtIGMocqr^sU1(NR$4&~` z=9y5@uXWqDUOeZk#~Qlpcql>Jwtn`z=Xt(ebYe^twk@#;oDbPH&-&_dg`IWmq?5F5 zx5rp0y1n46@AVjmlILAj<-O|{<9g8#Ty?8=RL3X~dr91rvX|Vb?=|}+$XE6PUPkF1 z*Rf6vA7({)zPA+jmi*XNanHf8nfkuC46z5QJs0oQZ5bn;>s^F=%YN#r&7O*pCpX~i z`4%rL#xuRC`f#RqG4kE;nyYHO_IOwCw#NCuJX=iPMIR*NPXELB0^~4#8U4@I$V=YR ze=Wc@`AL4$t|f>wKt3--I{Q!Fa-0_7EIRpm4bnLl92fc*n=iHwcAFnpd-NlpWP9`# z1Hi$P#ZD^Vya-|dI63Bdg|R7C;vD%_@61@XN!wp!WL+7?ZRK08+UA`dV@v?f$yDql zlSju+S`A*@=6yaUan{OFZuKu+wcYCwW1KbW(>iui8P4*PoAedDkJw4ZSD;?_gT3@q zwlU!liUe8oxHmiS3_ zyYv~lUHA+;e$raZ+1tI2F^QkF1mm~%UtG1r>l9;*0I^}~_(>Ju&K=$bF*|K?)0MCugG{x{G<)g>n^WbY&r66_%B!O z^0KvGFp0OK{Z{z=U)jeKLjPaACKKc~;NAXkiDYWwq;30h#{iXOO z+HWQJ#W_L$D06_^Sq<)x>-3Gnan`dI^>F;?3uWBsYiz#R?fk)ulu*~!@slcHPx6!g ziT4pdN&6@2gF2q3Zxr5EqHi1i+wheBi25Ub(#1M{(udTCaY?kFhCkEslY;ih7tzUX zn?6X7HT2W*lRjj7j7P%ufq{+;H?uUZH-L*a6P)GioPn zPhVne{fd4*VcU1Uhvyi`UB}YnC&`%5kFd^rpd+7QcEj(Gi+h54_o7}tr=;q|a|-Ky z0QIhco}1f+b^IiLriuUIvx?MDzp?@OHz7a$kL;V!e~KU4i2EEPa-VZ@GkD4ItpUf# zoo&Cva}MP8|HLypR=5cA&|xH&V{HD}dfIw@Qu@&Ee46y3 zFPaX1>*t}F=6i>=7`vLY!s75z_fRL5aDa9zO{-rf;I|cKjsTl)gsd zC-FYwCz;r8ram1%iJya zg8ch!{TM$<^q`;8c7U!re$px0ZEH;Euh_0UH*xI#QoHn5dJF@9wGQL4RW9xSoeUkB$qWBG16lWRv9%RN(x)}Ik!H8Yd4Y-?N z#4f^@0EfE^M(m<&;B3K&UDN}(hhW4m>IvLaFk%<=0`3KzikA}#>37_Bl6W@I;e6yd zUodpI0QdsI(BVSh3k5@m&cK}oLx+oiFA@wLx&U_(3>~@xcLkPnMvPt532B+oLCzUv zQU^JA#`sA&7>69OSFezMANVDL5xb~2?)Mgq*hQBDUn&@}i~0cf5scVHeS!N5M(m<~ z!2JXxc2R%e{=g|dKnKl9u@`jEoD>WlG$#c^2hB;r&_Q!jFm%wI6bv0SCj~!N1aX(0K$h-?YPjJ}03w*HP zhnDYLNm zi+>V73H>O5{R+h9u;)xN*W?s_G39%7$%$dV4t~jvVXt5NDHA^_E0z{N34UrKY&=nH4tq`l zo+KFdoD4i!Fzk6b@a2MG&nduD1jC+Ffu{h;_VkUl)8#D2f zRLsOr%8I4MPXec=!^YFa=CJ1#z*h){J!b&V5Da@>34Ens*mEZEOu?|{RlrvXhCOEi z&k_uK&IXiQX|YhRyE?;aL+)EGI%pE_YBl!25$l8o`D)` zFykk2&p?ecnDLXiXP_n+%=k&%Gf^37CUuZ~EXGgD!Puy&W~>-LiEDo48O-=eT=T0d3}*Z!uKCrK1~YyV*Zk@#gBd@G zYkoD`V8&14nqSQ|nDLXi=2!Du_`tY9y zRN<_&&FNc#ZBF~ZHm762HmBpjHm9qBZBB0nwmH28SU4?bvoooKoXyUp4(7c7N8HB> z&ij*7T<5Cg#^&S{*ZXRP!Q>Rz`)ZZJwUGwU~-DMn!H zDX#a`-3F6WT<@!U4JN0!-dFb-OippVukKIhR2r{g^E6%qr|}v%jn}|wyarC=HERz`|3f1$tkY)Rl;C$ zitBy#6@$qsuJ_ef4JN0!-d7JBOippVuO3O~R2r{g^E6%qr|}v%jn}|wyarC=HE_-bBP}}n4IE1mU_Zqa*F#{>gxuRQ{2Z= zPZ>;3aUV;4!(eiX`&jCm29s0V$5P))=TsW6Ve>Rz1E=vCIE~l9X}ktb<27&^uYrZv zHm5SBgUzW-=^$~FMCzCb&*1EGa5grY9mG$1=ArIRnTgrK-$e2^Sj0MBheh zvwWcDtp6NPV#?H0BJFAFBjx3+eeI*cS^HDe?*#Q_2X(PtIcLB3(IDo=N$Qt)FLK7d z7W(OU8>eWeC}@YeqM!9_N6y#Rq8)v{{v_?3G=37=((#jWcsB0A2F6cX4?9%a@sm*J z$>Jwf{@%n-O6AAzay17~=gHzHF+YE6DEU+Ilc+P!(6^4y#QZ13PeR>o#!o_hrxia5 zb@R8@C&W+s(TSWs_#<$7<4NKtAx)oQF!$2qC;cekv&qwT{3NuS;xx*@*6Hz+zpr;d=8bNN zBf&cH+wy+6*AI1d_r~}Ga6JI|vb~~~7)q=U`EqeD7x#L2<6B}Vy@7i>jX%!A{XE?7 z=}q*RrhT%xcQ7CK^3g^wZ?cJ@^af%mnLN)H;9ddB<#<#4LR=T(`VwziDo)DYH{dfE z%MSVJi#s7tC&W&=0P+IHP(nI=Z;82I-x=whk=_Ne%ga^_CFt4?bCPwLac)X<^7^FW zrR>EV(eYADeNP)3G$qdW`uS$u(WW;b6)&Y0^H%fy6nZ&BBLu!(pS|-gJK?u1CVImwGc&@l*EF zHz6*{Xxtl(e0{u`IzEZv?2*KUUT#YaC5}1r8Xh+#I(vD(;cZjmA}_xsh7#&z3?;*3 z*cP$mPKcqzx{$~4`ADLxR~YcQDZ%-{voSh`66;FwxhcWxyu%2>1GVb4(1S-yw#Qxp!bZ?u`e(`g`;IiMXDKd;`3z zQ@)|@cd#YqGuy%MdiEOs(UhQ%VmnCN$95oV4R4zg^iQc6O3+b!g5mO!1bvh|-_^lq zXrEx}*l#%9l%TINeQHY3SIKi-J^Y&X2_I4)@db78bvlMp(4MW6-M09Gdg!NPD1FHG z#0S))J?#V9Xit2=LHc|6fXT4MWY}V$x6q%0>nXU-^{!3%0QQ4EU>fdCgZ&11*Zb3P zJstV-ypom}N~{n0X5iio+#BpI@@L|DCa&|nvX&T1tPgX07VgbLeM7t@{v2G-LB0ZS zS<7c1>qEYIxHk{?hI%*nSL6C>To-yPTR!_(A9%VD_ZFhQVcu%(qYM|D67+5K7n;Yc zGl-#N_;@5ie@9;+F_a89n-cVOW^9@g94H^eaxppkC5|~#_QgyZFJ*Vjc4LIlofv_ggq|j@|uQ( zv#r66t3Lsqt!0NmW;eTa$MMIQWbsCx zCzY5>c8n!EmXaMq=~S_Lc-H-7v3X7vi|16acUs5V;h9c}e`Loyvf~=@thvN8`Xpo4 zkguO+{mA1_vwoU~@K1IuA3JsrxhXMv?AScyqr`o&gWE>T8^RuEWdHPw=RX9uTNL&~@4h*?- zvUo35NIzM;5yoOTS$vVd6i8t>c1_ml9{jjw?euoGf0;rztiDx&2wz zpM3u;>o0SlbvzRKSvQDJlNH3bIa!>MwqjYd6}#du#j0p4Hbv)1URIOS2YV*r=)Xg% z!{O+E5Wf+Q{(zF6>??;1pHphdwHxkdpju-N2kF&tJ^S9bJ&a1!GzHuGL z*f#sH#;3I7{$v|`fV?fjHTS(KxyM~%a4jY9e%jv&@=!8Qc94gXc{q+mK_1GJ)uZ*n zcP7xMp5Nox-%auR9N=fQKDg$7X6x@@_+1R@eOl`i@ z`EaieF}Rqze{=9s|;x!;`P7_L!w zUF$vJ5~QOZ!?7TbW6}<=Jj&ElGLP0L$Ws*LDGBmWGEW-E-Uv8`y)dyM_wY3*a8GQ= z{dUbU*jjAJy8nB=w)tnxVP4iB2H6ul&eN~ z75>Gz?l#s{BfU!OhxdIkdy*R+2G6;|jgIlE z{A{G>c&-}j`F;;v_rB6qMPA(Jvq~St)Enn*_Io0|&umwX_iA+g{qPJk!Q1BdLVCaX zuA1m=*ZXFC2Iw@(xJk*VH@9qMmcOK#@?%iF8 z>n_7w#l1Q{W3!*!tLuz(_LF;a7vY-yWdGS#Hsa{9|6P#IezO0pzbBqU*#E9bXP?=B z`q59bew<&QX8q)umfRWeIX61V+u`S6d$9?&6iy8<#O*%{}QC<7Q1SSw@dRS z4}6*G-Q)L0dfsAJP4jkZ4(6jC&)ef)iuC-YuA1)c)%6sho-4cu{60u8SmCM}-acJV zA?mr(`?B8`>4j@tHPfrr^$bTnS9uTl{g6I_k<@nKuz(H85Th`L$VL&N6gZs=sBgj89$Uh7DIj1?!x1`S>UAQdUMCuSFxz0W7lCWNPJ~1~srPBhyEFA(M3Bb`^02&KCmL+li9w#C zAP>tk4<+h-I&GaeB3LJmXjzBxJ*Rs8hkLRP<2y?AI%^cReAlDs$M-j~ zEZ@s0`tjX}x;~_des-GZXQyeK;vJBBF9K3?%-}k{pAy(`4A&U@y{+FV$#+YVKiZ~2 z9>=7m-fzizm`8Ig$aAWCv_5zbpXL~(=9s|;`Tjs)!!cY#pSsrX_$xs=xuNw5@;D|f z_5MND!#rA_AkV4lN#odpfMW|iv=qmd2OL}8q95N2r#XRpq95Pora6Xwh<mq*mb~naWHK4*p6rg% zb&g-wz5C9%Q;Tw6!QVsp`zrqShbp!{{9Ys-)I<^9H`sQP3G#hfuXB%a^tywvG2^! zz70wa(&cpDb8}7K2MyZE^x(d(FE5#=^X1j%?W{@l{i#&n^OO1detsk$eLsM{n%@~I zaH(^Tyet>zBs$}qgrk7*JsdgKIh^g^32t@hk)3rE_dDSJjyr+h>5=u1J0Nl2aemO) z{vY`}kqZtdeuPnJzpXLxxzD$6hxb#rd)aZ?;dioSXWO_C`)cRC+Ii?|l@-PlGx8<6 zI!@;+v;59;+vD#{KZN_?=i4haw0(!j&@(&mSzSFB?)+=Y?}t18hVn<@&i_jJQn)j} zGgL2!I|ttxcIJ16>Wy$`erKqD9`5`rJRAS&`LOfPkiY&x*!eZ&Z(ayH{{s21KMp&; zfo%F|*!eBwzrPlC{uS~+-U>Va2Kk3y^2{gq9WFS0Kl(Rm9NuMpEO|x}dMDApB;Jo) z)@0l%hq>?K?`Z5j+<*W1rgqJjynF7kOOBrV zI&^J*D%|-^%5R1{zfJi}xbrV4|DJt-491{27=z|u44N?puS3_@zY%ucfPC{?VdqWA zx4#p1-h%u^W7v5cl4J05NRGiTAUOuVgyb0P(fdHAj}Po=yN{1O@Y(C*@A14UcKom3 zx829&e|`4)X#bY*2)~zfs@{6=SB{fS`5VW1=ZHzW?{$-Q-`kFJ^qmval1EQW8-x1Z zyVvy-=WK7x8^ikrukX`8dbEDOSY+NM8O3=yC%EQ&C8-x4!||8+-@pG)#~=$I5B%5k zc_Ys<0`aTNzepe}&oLKg26#sx-#MOK!^^zjz6#Rk1UP^xF&0-5r#<3C;A1TmdjIpE zb3*tBgg;#1(F^~=q3-TEk#3>>1Wy*tc>eie{a%0m&u6}P-QkLHgI2As9JF@L;^Gx6 zi&vFbEDf#)ty*4?mp5q1s%53cOKvJFUNX4Q$sM$AS!r2$#kxV`Cl4+xo_xiWamAD8 zPb{8TSWrB8&gI1g^NWY%ui98poIkX1^~S-)i%ZuPFRCajuGn~U@w(-!SLCj9rbCoh zjLaT)#l-CC(?Lx{-qhEnQx@Z2h9##cNg#nsw!bRfATp+^}lk z+KM$dlrOH##m(#?BNh)|w76jCkkX+ALq-fMA3AvOlCsiaix=fV4lW%sbl8&Ng$2X& z`_CLVXTp?86GxT}9=3RBX=z^hkn+;}5sS;p3-a=p3?05Ce^~jD5sM1*h7^=9SyGxm ztS<-WRR6Sn-1cw>dO%azh&qPbIS4?N_76`P&bbcQ(7_}Z$=%%5Ybwi~+_IZjue*7b zle={F`rKuu>y|me|4gr}aB?fwl$BPNI=PkQ8!L70(pAeBBe{HK#hT)^rDbKyS1(0+ z`J(kpi&rjR%^FkJ#kvNJ;HvV~D2#S0OBX>dL%O*#(m6dN7JHLs&6qU3c=FsC6F!D2 za#xf&#S<6I7GA3fxWE&dPZxC|?q!k*CZj<0t&xgftnu iAgdPH7nl~X5KH?f?Mh+|E>2edKv4c($r#Ai<^LZb`pOvq literal 0 HcmV?d00001 diff --git a/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_cas_rxy_sink.co b/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_cas_rxy_sink.co new file mode 100755 index 0000000000000000000000000000000000000000..a70c893e207d0dfc23a4b19b51aa26a254ec4de0 GIT binary patch literal 57912 zcmeHw3w%|@wf>xa_BnvHij-=E00AN*j}Sr#4-vvsWg{Z;5Fbqf2@eSYlYoHfJ|`GZ zkt!m_)M8I+v5gdMZ0#+!-h{qyTfMEe_-^a%ZF|$l?dA5by@J5`f8Werhe-$qX*KH4 z{C+2EubDM7Yi513_G{Lj*z{R5oKPs=Rpu|OUQ?mqm?S?q9{X!^aCwAjV^vrD@2<{L zQ6#f$zR5m;?hh&x+BJ}4va}f_wIcc54J#Cl=|7f(3QC=$S*7~7p+5f?-lup`30{^f^V!wsd<`n15~Pt7n>I zp)&Seko-gBWq?k;V%nv1<|}pUV@0cqmsb>Jtt=@oD=DqG=dOpdiprN&ysG+dc`U1F zN#)w2(t@HTOG+v#_LP?t6&KuGzM`@OImN~0NGqsVQ?#VyOUp`EFD@#5IIFB^RmsC^ zD$5J5{BqXnrAsSHD%HbT6)QHDbp1-!x}wsxB?T*2lodZ}#0wU$UAnZSylpd&=w?=z zm2T*;m8+zc?(b$Rp0twjZnm;OTFHLLt(>A^zD&c+x_Y`al;2@Pn~g;qDwY(L7L^w) zm6CHzN!K=N!{QAE+QgDrxnWJo7gvnR2`c%@O$8MtHx-nYlob?7J%y)H&tnypO~ARXEjXVEtvQYAq^GM}Pr_$H@zbbG zdb+yJEPN)k{WNNmo~}Nzr}gioy3@N9XyAG6s;f(GlGDmFp+?i{KmDphH|4czYJZ1K z1t-AA1FudG(c1Mr6Z-kPKT2PbQJPa!e0-o@>rngCvx#nv1J$avgYQyn+ALRJK09@# z_Kc%CZ0gCZW#y~auEA_twyLD8vS8_o^(DmxyjcBr-WdFgz9Pn@a7v)4bj7l=uHQHz zokgB;y6?+cRkXffX=zbq!Og47R|b_c&xJC)GJa{|lo=!Q3T9k!+2n#53#S!K%gZeo zIq%Yf+=T_Ba#pR+Eyx+2SGIm+!IGkig7Wnnpi9}x0&n`<+0$ng%$PrW>bJAjl&@G< zR9TWz>A247D_IrG)|3}4S-rNb^2RS`Rcu(bcy;MZlmD?6vsM=G%UWBuV(IGgRk~LN z$Ga8!de+*Cl8S=j4d~m7B?T3gMN3wG7gv8NHPG>|X5C!0u4HNXY78*PDdWkkKjyaQg#WzX>Ji4>kK8{768WI#@QyFI|0B;o%FC~pPEplL73#FK?_N^%M%A{L z!bsp`|1lSybjLcNavZ>r(yb(sH6m8D+K0F8rYlOjJKG(W6fpF_U8dcBbRIqLN_ zpsI&@JPBPsItGadBjpz0Iq_18#I&q2_9Pyp)9{h-}J~?Q=3`khjG<8L~t8Q=Vy2-5>iLro=UAtZC)^?o{sdXc6 z+|A96$48Iel&fl+Myvaq!j~1+76xfbnfk-YXl)z?r$lS2-K2X5kEgpX(l$4ZPL4*K z$&mZmyqeK@qqn)Y&(RG08F8R6gf(?*pn|2z1yB&Yeot zsAx`Dee6KEKhnCmGo5<3)}8LwI_iQ0PB*l%-L1_>{|=L{4yky4aE!d~#?A3zq^s|4 zX^uNlKXLZ$%~ks&-|xCSp3m{&@tHi%z;hYY6Lx|l}v(AXlW*ts9*NJpF!>M+oIvu0Z?0^r0Mk2SP-gtAjS#Iq#FR<^}vGMr0 zahtkbiL}{4+W7JD_{A4*B9_ksmPOy>)S9gdKFNVi6vj2GF8?L%5O>b-IleCK_BrN* z-lr7DGOCU>jn6;tuNX=eRbzu=j{kR%7gxW)@0U&WGw0XVU$LMzIdfsHg2|FsT%##E za%VEWtWITzRB~DGxWc4I+GaNysZ~8tC$NV(sv^sl#iPA@$E(#Y)b}8MpKnU4L=&2c zFVlQI(!PM-L->6WzuovfjNg~=`*KtNWPd$31|$andC4UBZ78Y+#0P8}m>j6{Ky_MX zGE>WB)@N=b#%H3QNHU^j1|+J6MU zJ<#_nTHoH9-;1WBa-{9U*eS`Y9rjYv8H*dK8hAsHjT$Wr^9mk+8 zn(8($n!2cNQPOSJSXs2Fj+Fl4Zsp|FC^zSJ*X9)r=;`j;t_Tl#wer?L2 z?sx|>TIUXAL?@x*$LYTxgs*UtAx>NPGULl1>@wb?e)qv|KZra<9|B!cJmxyboBVyu z2d`}hU*voT=|3GGo9d7bNsULVw$BdfbH}aGXg1Fqy8<2Rp~J1f2K95oka6AYU6HZg zF0`|Y?Lf)%<@gMPi(TQtvGc@3_qP z$dLHO$)SNvR*h5bG!Ie_VUCXcIm)T@jH;^neLsI&zj(h*1CkF_qbmBet;>>bwaTPV zez55~v*S(4#?xGB6J!L?5@0s7ROma`hGAap~9$KHq3`3vDH*{a2 zp-*XjaEx{vC`UG*WtJ! zw57Qx(?d?LM;eSw9gf{c%yC1gJU!wX`TEd~^t}-y-w>Lc{^;JIPRPuC)TFb{tLFyw z;P~*>K|1Pu>+tdV7rZ6y&P`vq;CMa979LMe9y^{+{f9x%#z6mE95)2|voDQ-{*-A5 z^yl%u(ATv7ly3|@r}d}&KJpCGr{h7`>slTo#&|jwq`b+)P-?Y0!>o4_Ce@UnQ zlFqq)!Ku#m($@2~L;n!RS<>w4P zKT*ChG*|0S`F-dM_1ASmhWayI=C`gV&|jwq`b+)P-?ZDK5B-zWL;WS4`b&D-bNS$5 zRaFay?73}deCVdE+EixJtZoSU{ZUw4Ds{5hA`L59EVx|-@pa37F#y7Ia!IURCGk#-2zpGoJ<7}K^+ zKQZlRTTe_=Kiksms>#2MghDD^g$|^5>!CD{>&iah`v@NislMR*3g0E9`ho8!e7BIw z0G}cJ$3m(<`2NCwJfsGIA0RxgZ3luM2%h@0&7;tvC(?Qz`h?cuD0KKF_)iKC9nJ-R zuJF*|Jn-iU4;|9LrwI=o&Ifvsg1=Dsi$a*np#y`39~x2@fxk%j;UP5y{1D+sgw#;* zL&2x;039?Y#a_@sV^VnNpfM>tbkLX-9y(}D3J)DLCWVI%8k5392aQSaDLizd4k6*H+8Jb38mx8LZ1OIXePt#!E zVPfyM96UM?9T+bBQ3sFELkF^iPda$S9y%~W_)u77gU=Q|99AR2j}$%&u5}~UO@*N(?_9$jp?J%!N&Aa=wM^|D0HwfeH1#_m_7;}Y)l`84uWYubM0&$ z_)NF6bqIfNOEb<ub2&%%zF_jA& z=Zei?&(YvV3lDqdfzJ~j_8bF#jPS7MSny+ohdsxEA16HQIUf9Y;bG5xfF=gYLn6hzAOxd_5rfggjQ#P)NDZ#aksm{{D##CqNAef44j2!lHL8<)|A0{Q8 zWB#q0s#uMtPHeNLZfuLD(b#REHl`-P#uLQmu;(Si76Y`#FXIL z##CqNU}LJYbP!D8QL6S+_h@>=-=XO-|6Wa1Y^SD94EL;%b7QrdMq~Gb+L)RK8&4CP z!=BT@PZu8coB@7@@UZ8l;4c*(_Ph-IWx~UrmxI4tc-V6$_?g1Po*sA)yp1Uv*Tj^K zYhudAH8Ex5nwYY2O-$LiCZ=p$6H|g~8&jR7gN>=q(m^n_Ut{F3{~b+__}|s^nEyRZ zRqXqkIc)PkX*BjDP#aUTVB=Y0bJ+6=@K*>Ad(H+wTX@*>O7K?-4|~o5KSy}j z^D6LH2@iYD1wU7K*z;=eSA(}PW#gKdvT;pJ*|;XAY+MslHm->&8`s2?jcZ~`aBX9% zvvjaA)mb_SrudSC*Hh1Gdc=QD(_{WGG*z)*YU;!eXzIqE*EAY?0o2CSJlJ@i*c|qp z4}QMzu;&8s3xtO~7lL0XJnVT5_-llRJr{vrBs}bSE%T{zkf9XBFqAY*q7jmL3zwq(AD9iVsZpvS@(3BrpV#<%Z z*_5BW*_6NhepCLchfVoK-!$b1&i|WkchuU$5wx#lEuqc_W{TuzbJC&qrAP!mD1^eIHR|bRTn*(S1_2 z(f!hUjP6%{;oK<8pFi4^?|-Q&f6;tXe&jM!e%xA9e)4Um{N?wV@>hM?ltQ9lIt{VI_#w4v*IM44jGP^tH{{2{s!am;**&be||5k8(1hN>1 zv>xF>evkec-8qL&RhE4}RavoV`m}yer$5_uS6|S+pc$YUpaVb$JllO&CTJ%3bZ@Lb z2*-nP+{YX5561Ce9QXAm_(O0!1hW0SiT*Gg4?}u}m+xocI19)9y(xY+jxG5u{xaC}UE4<bG_yM0vs>E@o4Wx{~8=$gX27})V~(T*Ft`bSEjLV=1WV0^UKVa zmIUXQnJ)(uoL^?Xv?MscOg~!^oL{D&EeXyq)6atm&M(u?mIUXQ>1RuV<6`>RlHjblq+ArYT z;;MmpDzD6BbC*P8V+j=JdS^h=!d zvECX#!=yctZ|J^swd#fRiye2ISME2UJlA+|9gD&`wL&BFR3FIno#eRVy-Hu!Tezq* zW4e=mvA52D1m&oYUh@Lo=3`ykVdVE5?xfRSy6_Rq$2zw@^RbSEdSToedZON*LmiiX zlzJg;FZBX#{3P4*@Lwz(zdN%+qP}J9);cXSVQ+cSl=RT+xqGCChN`G5nHFZD9g5Kk0jQ+ zz0bzxLX;+dB+--5%|Sb%1>_4jEG&cY?Rx?}y_7mpbkx-bP%T9=Dgm z8k{J`G4l#1eWF+8XFx8~bKFUu?=MKo&G+IyF*P{iq)+xX`3r;k z>M=$NYj*}fZt&HPJH@NLG@C-X#9hAWPix+<88ZiBIC2cFlMroK8IVLOT0T+;W@-913f^_C`9# zg5$#TmyH)&2fNLGR(tdV?`3_7>rjoaFfzO8 z!vwpwuOKc40~f?GeFgi3b*si#1Z~^;aZJ!=s%={@`U*YP&|UkAplw^f-f^xYeWc0O zhkkEwa&Lz(YwHJL?6Q|Q z?o4l|_8nqBS(h(9BnPsqD-U{I>3!b62y%I=9e0jbuYHQt$$NU@cP#X{%6rHk0=cmj zjyu=e?I+=Tq(1r+`bP0l^e43O5MYpgioR0(5^XmO*wXU>>4JaaCJVSB&gmBg->fGa z^>F;?2W8yoXKbw5?Z4Y?)8}+n+w?zGz_z|t@y+!JZ8;vcyxRMcpJCFT$TxJ~P>k95 zn;mza*WmMdrUqr@dZy6GJVhH%fQ{#Sd;DRLpKy!gF7WnhpTxP(>zDbseo>IeTy0?A zb6_iClkFjGFWUoc?OSC0=~MC{H{%{B{W|Yo^mW8cV1~Az1>5u5#Mt^deQmITt$8@63bWAr=<|^|V#^Jr#WbuaoGL@*$749(6p|(Qolw#(Bc@b8uZzX!7+n&Ede-$6#anChE@X zCh<+QDSb_k@S8FECgx+UeqY{Wu^!a7`xD{AF`frG$LL#$VdCj9@I(K^zFkR72B2Tros-|rN7c+ z82GDE7>}9vJBT2MjWPFg!O!{}?ms|hCpb<){ej=gL;9RA;+_L^?w5md@HJz>&-;qw z^1e!JME^Ao=?lJ!`w!4-zV5gegI@bh#|=l^DE*hgMUTF`4tfpm33`1eoc>!-_Xpwh z-;w`uIQF%(H7=C8&3Zd%Nz)&|AqWZ^e0x`@L_*t`me(2^j$n( z67TfS#5?^jaY{cX_0x~h-*Id-R^W^1%Q#m!KAbBYA7Y&2!Exkxy!FN#U0y_6JsuCI zzYKch8{za<$v+uR|265qhSR&h(Lc(*_6YiVBgCb%ks;f{kzNZtN-J$%knq>Zx3BQ7CKLtzwMmf5thHZ|ELJd-#>CpgyoBV zV9Kv}!IZ!0_on=Y|1;&QBc}Wv=b7^NTwuz7c8n>%P8kBv_17%;?Hv3B5`Tmf)v#->hePvzjE6Xo@{5i~f_Vsy}<(EDCvdi+T z|HJ5h^Zy#%W1+Wn-)}qT6T0tr_dhSf@{7J<$`}2}lwWbcl)ve}O!*COnDW(KO!+%L zY0BSop(%ge51!NQuKfMWdaQ4W{GV>;&I>*fG3WIUcOT8Q!iT$$G9S72gP7#nk6E9q z8unhsW?heUQrFA6Da1wH0R9Hy5f}9-@ShSMaZv@}3xr2pR3Z36;Sm>A1inak#6>Lz zzgT$0MJ)lp1biwMClpe8ZJ5N$KnJ}xOnB&^*ML! zzg&34MXdn8LU_bQ-3b0h;Sm?L68uW=DLnKKDUC_77j)2=6dpQgObQPjG$w_I4jPlf zLkEpX;h}@Zr0~!|V-kD{51pt(3J;yAgNe;rkJzk@L2Oo45S!%(u~~87#Aa>sO>9<; zZ(_5y_^o2IHaMyj_AM2A-{PoM;8zL%X-AcTFB2ZCYgU6_Eqt}3)_`9l{H>0<3H(jM zZ+28U_;TTIb5sTR3h=3QpU}a^^ik+wWBMp`urYlUI@p*#3LR`rAB7G!rjJ4g8`DRj zgJ3$f?z0nhNUi(qL>9QIrbey#AZ=Q{A~goizE27j~gu;+U4>xG9sH-O(DJnXp<{6^tn&s)IX z0^Y`yjca1c#x*fzVVaZOCwxF)7-ToY3^u8AqZwT-FH(!s`5XXzlAiffE)3}UmY zd=s1H2eDak-^6BZ@=a`3jc;PJw)m}Lvw*28*tkk;4tstY{HKM7J$>-L@UUkLd`x)Q zGY&p3JnUHwzFK(La})SY!o!}og1;5KjVT+~#FUL|V#>xfF=gYLn6hzAOxd_5rfggj zQ-W(7Q=O%Qjj7JkK`>=vvo`woXlzvlu~~i)n-%v>Y}O_p&-%5WGO<}({8q79z*G%v zTq8DzJvW2jEIjPF1^gD_Vb9yZ-zGflc{}*qg@--w0Dp(@u;*6rTZM-`?*xA*cpFnT zu8Aod*Tj^KYhudAH8Ex5nwYY2O-$LiCZ+_}Hl{jD2OCqJrGsEmJWg`6PvX$h|Q`BVzc}pHY@I%*sM+dfxu6h*sLvntJo}HYA0;GQ)~`< z-Ut3Z;bG5O@U_Cjp7(>lUwGKF4t$;Pu;*vNe^z+d^8xS=2oHOH4*ci9+nBO(O-$Li zCZ=p$6H_*>i76Y`#FUL|V#>xfF(tURG1XZ**qG`p9b`@9Yj}TBKWZf|@cVLZt4$5a%Hl#euu^;spJ=AXx%Bhpo&ve$85!A(c7vkBz?#o4s`|ZKI7Us3B zU%nwpS=|@tm*o59leEKhwo_=e(}3smY^NUWB)Q(Qop$VRR8pR8HJ~m1y~^NwaeS{h z8g|Hq9Ueo!MfdrSaUG_K!{OL47AJS5iN#6DTl()gi9Dapiu|EK=Q`?qsQvmS_T~^` zZ)9xPeiP&wi^F4y$4Md{NAx|0x+ONJm-@u>iD$cJGCrpVj_-fAd&e!BMFeXIs+;Dk-XCt%sWHCU3X||ifG|B+e zb&TN=95-ODX&fUDc-Upf3>6yrdQ#-=ZW}`+m~O<})R?yFmwMP2)|(O3#aJZ4a|7n4 z#`8()ml!3%bR%@tm~N+?{Gc7`iayq}9l>)W+R=FKpdG>UK8{Iy-(-!u)sF$�Pcf z_ZQ#pyNlZHsEgl;CHAF1_BUe{aC1H4F0Mxm#-~6(1$rp(U2H=EqlHK>MEYXT#fZ-+ z1}z4Ey|>S&th7VfrN~=~yc@i4`O9&<9LJyXp3>{hL~mm9M&#Xyx(d7|zZA!%kSp}| zYq?Z?kSjx88S;v}@9Om<%&T7nEKe}Lcn$K`Ab+vai~DIm40-xt6Ti}uSOD1th`-85y7pft zztP08v?NRn%fZA|G9RI9m%yi+ac)U)EtlZG4)cTfFEsT%VQkQn;94#-?r0NhxdiL= zn6nz|C((;zK)u*c=zW4-g8e$oV~zcwU0XL>2f=b;oq4Gk-dZ(uuk22yAIM@xb}itQjR^{r=v z+m;0VllX*2=qNtHVEJHzK1zH-1AK<|38t>y2GcDG`YO|>mIQs3_=HCIHSH7Lp+4dZ z8sO{b3)p|?zT4KxZd-goBlM#$c&GgrAJB;Qv=8W@J@Eni=+ zz8%M9-m57ez<$sNY(?Hy*l)G>YkwP#w?S@=_nMZ|=N8rnxx0{e7xHfMe(P_?@pc@S zd%x4?G<~jOeVE%jkhcT%Rd|2!cj9;_Za;v$2T~QOM=)oV{tG+ESs@tNf67X?=18}4*Cw;wl(C2s8k(T;4`}J5S zcKx>*d-yV4dG&hwM`~K;~1;j=Rx&JrzH~HM)gAaMEw_{;uOks88Ei zvw4p?>BKzOK{B7;RM)X)!MEGzpY2Zbl*GT7ecQ1r})^JcTC5I7(C2P=X%H@Z>C5PUu7NCZOaHY9$HXV`O}iFiT{iK2Ez;S4;+EsI z7;ATl*XxkZvEaDSzu0)Ob+Fs~XSGK^@?N$_Uoi|AY%kU`odmd`mdn9OI5wuMlh<!xWB^s(+#+orG3<0`(Qowg<3%x;%HL$?c`VaJ=T!JNIr>k*T9v!xin zHUH_jTfLq!uBjFqwv9I{2X^lC&W+jeX3J2v{J$J`o0o=fy$Wu2q}%akmB7qryk4>8 zkgfc)5z~f??tY9;jVrhr=b48Z>>Z6_JLvdX3!lM2Ib&u z)`QOol3k}s_r)N zW;|aK@AS{aJN++lNy`P~_8YJ7WxQEB)_#4o z7g*=*N0y2&LcCc|*rGG`M6=_~GNHpqdx3S{eq<>;Al^*RO^J&`++82orjPK5yXy!1$KOYJMJ?u7~>-Q z+i{|Dtb(Ri-skmNC|IuDx#MD^WSN6CgrpAGnJ?@C9@!(~TJ7Vf$@Z96h##AoSWS>1^ zYBYG+XOEc5126mR5mRHp%RYPmhz>TUI!gz^l!-Sx9K@S_v=^A{aYsx|hP`EvJ7Q`I zc-iBQn3@Wnd)(QWnt(LfXOEb=1ib9CM@&rwFZ=8fQCh_P>`AQk4Av5+X9V$PPd?O76`L3bK11d+EAl$N!KCYW zvri+=iTe^sd`{hySYyg~Gb4LKyjj&-R=gS7O~vLwo^cL*PHFEgBx{oE9x-c@^C5?K zJbSsvQK2c*NQyKaZzknsZF2o1!P?}L)USg2GJ?8TudGY1dnAZaXs3ROkC8RW_0Uhp zC!D06{Gc7`%D4!&BkPgt(T-k^+)g{~#+#un9dDM&wL}j#G2Uz~?7)4VQt@V}v%Pq; z%0HWUvy?pEW9T>n)Y)FV8RhwGR^(IhX4DyLj@!nwQT~K@Gt}K-ycy~{t#~uk&398L z#GC#2L`?7dF)+Qpop>{(>36;+uXViHj{`g#nU3Sl&~6ITD8qNXt>Vpqc^z+tJYcqU zyqS@&FEn{4k2k~I)a&i7`cv^{LA@D4U9IBHFgJC~%}MH)_$w{bt~ec?aM))?ER%<&R#;ynUQ%a7kTaN1(x#NFTrOp_80Q>#XTW!@BPJe z`rbm5-K%n==5V(NRMFY4)wn6|e2hNXNs=B>v2 zN%T^IUKuCRE9K8Imo@%_c5U5k9R&aN&{O08B<*%I9x4BhcJ+Mdpk0{{4fI{`>0>c2 z?d{Q)iaDciLLA8i$k}_g4FEp?d$V!BwN%U*#~iYm$jii7x7~j&6?29<8FMxma)Tkq z*xM6g&R7>@44zNir;Td~bj%s+O5yp$ecLFfvCaA*XRvGbag()E2D8Wab2Ia@bN7|w z`SAUq2YrS18-D~|u$K|na8HEYWUsXhla|^;DIa;V_Zoe}zCR-7%*gDWg1q+jViVuc z@JHAZ^I7&x%7c6!@WH*&*p6P4ZD_s0t=TJ0d_p616rV62Wyb@b+&@ixLIZq;_6eqr z-3HSq?yDv~p%H#f`vfCjcdGh`FKB?T(=lg3d$vw?+u{ovp`VU9dx!0b4`@Vt+6Q#d zp7?-$^!M-qGcZ=|?S&^kfc>Bkm^Wet=^vF=qykX3sqO0*N`BhI*y}ckkx@ zd1elE?mm9>G4BUGLh1|6KlojJ^P;31PvV)Dd%L?CJLcRu(0vH;ms{K26U~k}vt!Kc zSTZ|?>}0V@TAj32AvQlKx)P@!6kP?g#E!(p*l{q#PJ8h#RY-3y-hyk< z+l#MwyLbxLWyeo+6emIXws8@}rNr6TaW%9<@oa?0d{0F=zIR%P z?`rAiV2=cH$ZJ3hBGdTJhw=?bljqxf-=yn9+9BMlw=K)}Ep5|8zs4V%cKMD&^lSLB zsgHT0UqjH3M)X7UYY6(m=l!BzW01BFX`-JkC;HiGqMx0nZHo5*8e^auV}@_wI|}dy zV>s3r1I5_ylJ^Rv;0?wC85PK|Jj&FQQbyY}kjW2Z3IiEZ%4mJ?99CltRAbEW`}q7g zg)tm!jDezD>X~<;;h)eL3uIIv!}2K8NQyEBV}VS5AX6B~kW!`<#$F0AhG$!1!#X@a z(U?G<*sw0JbvE=c|O zd{6Y_vpmiLKBE);_?$}1Ay4$P1m+=}v z_GRyhJlTK!lQ@?B)_Gkg`>LOdblFS&JRHmZ+`Pt?{kYSR{?FQr`~9pR=llCvKe?tQ zc4QClOxQ;DEGAxLZ|)0_E_)UeFS6(JAf(Hl#jGa>G3wkmmwSxLKDn$X7qRTz6L&Du zW$#r{h;V7m?d^(Pw1hrtLyBZ0uz_A$25Je%oW<9y}g=qkuLj0b1uq$!g)xS{ew9d->ZFtKgjwM?;m9S zWe#M+-u7O^t)4|}Fne3Kx5qH|UwylMZ1a%+cKg=8-9EMdtY;BCPuTk%)4!*lMXWG; zUdvv__FlpC1@@lB@3(zmd0qZt?HlLShqZ6|z6|?C>-$7deV=If2JT;-x=+M0?y>6H z?nNzoP3!xzKt_S$9%~oNqf9+1cypf^$m9nyg@FtyWwbu{{=dFY1l9M6hToTAc=Id* z$EdrZ?S2BXcYxL>kWrwhJGHj~>j7`>6FaD<)qUc);68EO@%u3D^Puk&ktg?I+@nE1 zi@50ceHix%&~nI=`!Mczz%=e-AopRmoZN@mX>uQCr={)_FAwe$FF&pyzuV6?_&t8n zkKf+c_lf97>OL{(2fuYM`tkeqEX(iGi+=nzysi&vqMw~6`q^pPruYWF#u%u^nBg1v zef|{2aI7%~3j6GmZ|aj08``FUj0$8}USlkf;atrRWJueqN9%*{kZX*AYK$3vAHSat z-e3&J8e^cSXK$h58%e>NXAyyn3S?LwWg1B-qxA`7@&lQ|K!%hutuVGIz}TYWHsm+y zIp+NSz1Wc7kJlK3t;L4?7P^*0p4gDzU1u7{p>W+&rZ|V4!EzMex~ckr`J?F z3tgXS_*P=~W69W+&e^UXR>NL!@ztTQ8vcUQ#ZgWdcP64aUntaR!(MRiRH`s~#;j_$ z>&)qA)K*PDBRZRPINe+)(&Y@N+KuY;5YxbiK_ijs66jH{!Fu>(#rguQkKF#Nj zzu4OkJ|AV>@k32sH`JEjn%}QBH$~9i{->uUhSDien9Y zTz$SNsS;>YVV#QAHu5GQ?~>#tcP5j`P`_k9jIKI%Nxy#E;;J6yzJT9D_ z-RyQXQ?2-holuN_mzWm>j6fMYp>( z?tlT?KsUPs2X13}kdOMdtITAkmdmWq+%^Zg$1x5r^oS%QT5eEskj@_z8MJL@_xh%4 z)c~1C@Y{nvf91qJ%lJTMB4j2dC!wBXXlQaM>QTog4IPTU??w4X@p}xvuj1E;-{bgw z4Zp894Nnf&ZD%F3blc#etE;l&S#R5S%Cm1Hk|T6E-S@04)Atc0wlO`(*Y#y5v$b4y zefGARRNo&@^*twIFEu7)s1#ETi zpAkKb{O-u#dKdUN`bU4V6%_eO{iwO?Pqi3|3dLgX-0Q#3#!s@?3|N0xoZt}Uqxd-k? z|E5pIUOq>Y-ICBdiT)+AKTPovn;+!QpN_FXAG%*K+dUS}is_gF^s)Pue~)#a|0aHi zV~3Fc*7sVv9J%1lbBygL9X)lw0Bb@ds>678KUz7eT`v4k@!I5ALjs#<%l(_mlUilEm&1jzAQK%v1&zbcJ_#+tCklPEWNq7VCl#_l{KPbc~Nmm zdBun+Ge+hW%(&vR$pteOPAiy}ms>D$-lYY(3kyc&tXiL2kTW{3Z2icBB}EkljZR5v8T;Rt;ZMzWT)hBi*m+X#DRIcKW!hUt5R>Zmg4aC2zOEVKvJu}d~oL|prN}-Ca?dp%2rpF zsI1}*WfdD%sjOvXYqORYRV-J*f2LQKtE}?X#YL4xDyy<&eWjKzTD4*cl1oa7i$Rwo-5eRJ-nkJAyyQ&a&)- z{*!t3iec9T$>Kj{{DSF)?|>MaNclBE`87fL?J`h7J#Cs-S0KG@8bzgiTZp!mT@;kh zW+ChE>p}ZBT4_9#@H5*!LD z_%~IYCH$yn->_;v1|D>XX_(^Rv0C$Vs=9!1s=Cz>%;X>YK?ETN%UdBvb%oKp)xC!9 zev~!(kkkHbS6HcTMf3|!!Qp4%_C)&W{(5}NeqB?@@tr6?M@`}?^drBM{Ua}_PYTSx zZt#^eRkem%_XAMfj?`J8zI25Uw>(j>zHn`IL1cm`6AA8>S?+swPp3@h@4_wS+lva=$oaJvmGLj-4v-Vx+zjtR30hNbn^Q_=ZWf?s?x%u zh$5FHRoM?xk0(jhlpGc6g=jxWeLG2NEf+~rtQU6lgIH^l*y@6sHEUhe;{BlZaLG*t zE)uQ%Afa`>PEO&`!5!=eEv>0_k#3rLu^+^=rq)HeYwF#8P{1shbq3@(o?23ipGQ zHch3nq-h@VEbRv|ZJW+WUDWdXK}{Ry#_q;>$Wz%5a(bi{T^HdYPqZJz^hhhZt-?c| z9sQuDM_O_0NaH6%u79rrEqtzBb#>8A`mu7z)2g`s+21-OE2|5ugMG4c9)M3eUY#DI ztLr=DdGXWN(qmdnOA88*D(c-nrSIRINN!YA7jB)O5;r+5S1(+Wd{aAQh(1|86)LH! z*iebpRIi3JOXmDAR9RKJv7n|XDb7)O#bcrBlFF*cnu-nO zH8(yIs@}YQbw$~mm-e4qXkB4jXhV5vaYfa7Ihx4P;ds6q+E86o9Vy(5F_o@~RM!-& zS@$g7`jR4Gz~2ttT(GgIxT*q*rGA1>g^ntvvZ{jL6BSjF^%aFhp2wXpkQykvsOAT_ zD_K)M5r7vyO(sq(Dc&?GJ2y*zs%$K(sxGZ4-|5lcJ?eAkn1A`5HN=ar9{t_uV9Wp< z%r8CXyD#k26JKA$Q8qmy&L-A9o7)kLELgL_i!v|Gnr&VF`mc1+|E`#?E<*HnBfq9a zsuo^AbY?&ovH?gqphBV=gvd|QfG?y@b8CWqIeHx@SzBa z_kw^?z$`+vhXKQ0@!nG2r;7LTc|TaZR|zPFi1(tb$Ndh#gMe|~1HY9J?FU&8_#&V< zN4)ngA?S1hh5=z%A07lu1&j^@Kfo~Xmhv7jpY4E^fMO{4u^n&++X3UeCw<;e1wBK2 zSd%dB83zAlMEwWO72i1E@kgJMHhOtOdrWL=H=-uTeWrw|4h%Xl%Yk7BE_GnO11lXE z71%}i>4D)X(WjItMGpwE?LbV3yba=(_Uc56)NyrU;614ruvav+r=Z>xx3{NkH5(>j ztdJpP;N@mh%6wmw=`*8dPEItMo4YkfG_~i7``Wz=@|*IVI_#`S`rd@UDTQ|_FSn9?Z1 zT)QJ)73m(p39lP>3eh0^*HlO*=+}+hTo2zefxphgTB8{SsM*gKP;cgauEA2pc6GdsM9zC^@klC zlV#v+o!vLikGJ-XqxE*ynWh~*tD)XlW!kJum}cscMBSCf*{1D<4ouU)98G-P7+|RJ zqnsk^pr7+GQT{=T>&=u!XANqqJ!_DE327JuO~W@}kWp{?Wj(ZVz`(s1(C51ybfX6b zE;gIyTF(4Vof?f!o3?e}m8e_d)J>lrjb3ueR@&-$uvIN@Vot+$0iR=FP6YJo7YkqE zJVcE_HuYRZMlt)c-SRi;926^-6nu(Tb+{6)T$( zi&ixWOj+WJYb3-#?o32WnnjjJBuYj^1r{~xZZ#9WCNTswVS8AE!dFrf^^X`4trxpN z?*Uv7wkJfa9c-c{@;;8bZ{T_e*EezP!Syh%Z{d2ReNloYzA=rvhQ!;0i6H7{9RWmN;ZgSq zLf-r9MGN>pifb?AeN4(bLf-E~-Q&2P!1Zlht+<}V^&MQ_ZJ(H!DEWmFA;}N-QnpYu z)R>vblx={rE-R5GakAoBQr;^-N6VX?$d+x{zHG?5uYE#df|M6f%B$`Z9_Rin=tQph zsCXOlzSGXRii%&L{MGhL_O{JRh#l>z*~>R3#NF-9KX~UzLhOWpIrGefxToFp2~R@o z!WiKZL~}bq-0(HU4S&-d-`DEr`0uZuBi_XL-fBM^_VfkAw9m$x@UHON31dcGB2kx! z8d)~ieAEcqTlIvY;CiP{@}Ay`@_t-xxSqzN zy>5;{I4-G=F1*k7NS}O*-=9T)yxWl>4jFC{W;W$ByudNdtlhq;)^70GO+FBEm@W?z zaP`{`?DmeW8|ED?=Wv&9gFJ _GkKS=qMBxo6v8m)w3w?p@DMtsDG&woTc0JwF!Z z(Mela0s9&jHMje&_C?`e@1VU-^^J^9OQdgw*im!G=!TWR8BUz#M{%b6#zikljCXKC z4Mx3jV2pSOwm<1b^b>(#ZS9JY58jp@P2V~?@lZWT(N}CQNtpE_gTC&8_Ggwv+Y^ah zC)Q^{*FMA2hSgua>gxIvk^e#wKQSJkv0NVU{DGr7P<}&*Pu_k&hpE-ppOicn^A7@Y&$TN^&VsRqb&-j z8D;ZPRc`TA1$?H$k9&3o_W2Zki)UHj@qG?W;4FJw)syDc%N#l=AHLeD2hEQUAEm$Y zW6gJ2VAaZ_bdIb#TAw&_w4U-$fSj$4{5dGMIP!BWt&aS}X>sIdxy|z(DL?UBJugf7 ziQk5?P<}}hIFz6Dq)GWD9Y=my@5ry|Q+~yFr<7mQq5Qg@^6Pr8^$SmQt(SFOw|)AD zs9Ep!4JWhCGfs4!e}*y{J+1K!XN@bkC#+ygzJh9vql~rF98_>Oad00e4xn1&ec;G7 z)d!AT<9W^+S8#t=!M1z_)fz_`d|QGF?jsKFTZsdx)_5N{Ql36=^!lCw`_=1vF3Ng+ z)AsfHCXQa;EVp?6DAxz^x1=reM>sd`jGX``q1>bJ`~@V z<@(TcxIT0}*N3jBP8Wl|Uf;_po3p+tn_l0<(d(P#Hc$5X^iRZZ^(>R}6Tb~(q5P62 za40|P_4<}{9QkFvBfq9k`4!(G=hHuFI+S16Q+{3F{aHSCLT&BJaeHqYA059nlz2iu z%kdt)eFUH7W4XQ`J?2?{+&)XY_8FcwyngvL%j-|%J;VJhKQ52YST4NZ;Mv4yYV<&@ z824J>k}c*}05h!n@q#_>wF%e%z-$1_S@)KCE8vVTA2zoE&b@QEuMu$Z*Qfjb7I67v z*ZJ5#efA%H?0?Z!ANbfmv*56g{iobC#Lxb->d*DF|K)cN_p|?%-^li}|EeeR{Omt! zp6Y+`D%F2dk?KF~X4QZ8t*ZaR`&9p{9#;LY{l4md!HPdhzLPf`_L1M5d(V}8uKr%O zpU?an)D0bfow)&!W8DbIv645(O4&J9%FeM;c8-;_I9B#QZ_>*?_D`SvnveZ2y6P=C z_RIn$`;?oM?6c~X?3drIWWVwo=la=ySgz_n>T=cp;uWg@q!QJC+6L8s_HC;F!h2Q! zs~%DPN6dR!@*Q9Fmelnnw|*dXY&|^0&v%y3`xxnuKks9d>k|H$c3J!JVL8tYXN>SS zj`-7rqjfjDRy%4g_GMCDeefmh%ShZUNjtCk-CNR5*}vh8|DHgdsk$3pt3kiR=+}gP z_sRO}lD2cQ{q%gCY`+|gFP1rLVtdZE1NUWSPONRmy*E^Mq!#yp0=_+JJN7KY(Mt54c0D z4EtlJ{0VTOhqNKyG4_yA!BnoH6UF7&PZU?rX<%;p3xStXc8>%c2^a(n0*(e8{Zi`g z48RQB2dt^~7?j7Le7-f^9*go=lt)@K>~ScM18%xC)1H9x1k?wuupL4$w z>c?73ZR&9-?uWkA5jz*~T);HIG{E72!(Zx*squBhhFW87HO7wE7p-x&8YlW$6Kpls z&e(ZY$X4S89V^QjZ%1sHmFD>GqJ7hB8hFv?++$63qm%TT@yb~oNyY0pJ@F6t*( z*Vyw>o{#cG>pFV@$_r2qSvS~=P+kQ5OzTVbVw4x7KFi9tm!P}^ps z?PVx01O8;I*j|qEa@6NoYweXNuS7Z5y3xJ{=1n`)a*(#JIlH zdg+L9eW~@*8RPm=>!l;c^`*wy5##z&GOdza+YCEw`y$t&1-^YaGnvk z#M)>-ihh(w?mIxX71&!mjP{`ujR5_n2_M0B>@}Xh0^jnK3wmoA3c5qb8z%iI^Yt+@2l|+ zD)q|#*k|pDx0~N#;7fj-KkUVhgdT!(40DFH$xcUk^yP+mskH^~rbp*XU?0Q}J!V{C z1ZG;bb`W?OmSN7aYu$s3?; z(pRt@`})dPfG*a0GyN3pQrf7h+fBL97U!Ig!JN}(=_@!coCPRfp>X!lhiTh&`wH5{ zSl9*an7)Ez!r6iH6%KE=eAEeeCV9K%qOXv;hV0T;IK18RjgWi!kG8wzp`RdM&V}?7 zC&`z7Lh4lei5~ds{X{ok`U}YybI5&0`V6kuEUeds)=pcWZ;pZPvep>pB5RlQ9eV!s z*{Jp**{~bSYO>wupckQUc8Oswwwk3sQ8u&ld~-*rfj))yk^_6W!n)rc3*4M_hPlMr zEq#jCC4CCz%7a{2S`XS6123<_Fqc|!=~FaKe$&%_r$UaatcUDzz?)ian9Hm^b^^Xf z)1yD3Z`3}D{)BTp4mL ze(77Z{^?V~z?*-!5xCy^JNX{52sXpHUyQluyNR0Xm*u+&c`wUoisuU7znJ zS|{`)q`3?-((f#X-=Qt8bm(3Kx_n1T(uF@E-RnR%6LMa5n_+T~iqA9cfB2rF>C>;| z0e?F1>3`@~7|TKbsr}GYv{Og4ecI(^uuIP8T-X?GXMQQ(aR3*r$NM_qBG?jr&|>%? z;xBRVmpb^%fKMOGHA5fcwm-L=Zn-`ydFXdeC3)zJ8e!k+U8wWz-FSD%g6`(tiEj|V z`Cm0mAv)h4hHttcAlh4KCx&Ame#|R<(k}V{=e>z|z~4u^{rr0CAgY0b3TX2tI z@|%i2fbU84Nnzmc!ut%Lb@W?&mT{f1elXrE@>RQhr#bA*^%2Z5eG_Hpdz1D}oKyOm zA>I$`=$qJ%^U{(0#v&ci+jEZha2=n6Tx0aDv|-xQVb~A-6UTycRpp<0kxe+V1$&W= zeu`tnne0gVDdLg0TRyiO^iy&kAS?Y;xEH>xKb}tLugF)wH|6)im;OrX(D7H3p^ruP z8HkR>9AoY0;C}Jf@%;g~Bt|_s^c}yIhx(=8#5V`vvPYbL@HJC$zx*-7xBl@pt zs9*VQe18C5^IgNd1n|1=8>ZK1`su%vT?`q?_n>!iPB8o#Z{U}J=1;tVU-AAqZ{RnC zhr9v$F|9NDu{2+nzk_Wrc?18+KCgNM|Hb<^=}%m=;lqB<`k#3N^j&;j(%$KxY47yE zv{U*qO`m>@{*JnlwgO*7U&ggU{cx>NKeTb`gLT}Jx5890Bt z;fGs%mjV_(f4^@gVA|f_OPxilU@CYCKe|6MkAN$`oX^M~i3x1;dm%gt0-}GzM zfAjyU{`Ef9|Bmxi|GO_#{l7Lv^ zzUzMdmelo^eg7-@+~v_EC=2UUF=Vw3Gl#SO2`z?u71SPO5UU-Pup>EbFp zhZXMA`V61AJjb8$q#%@&?4Pc@UL<#7q3p=i{~BNy3^OO z)WnrzfqV(RMEhaAkq z6BxIaeZ>>p3sxTT> zfzNmdmUT=-0x=O<-Xox`<0FQPb6$(Rl#;<1i6JQ8_fp42jFZr1zB6drd_U0jv~$+C zElg$SQ5R2R>nk4`sy|&!+jARr($jAfuwVHEY@7B$*%^m?y!c}| z@1VusOVIZ9c-+685U`sYaQ1xz&cVL~_$9!P9N$GA0ydhD`h3){23(CZ@nlJ&IQ^V4$ECT~RBji42=+U+uw%Yc_}9h7)UdcZ43 zTRGYatY_snJlo>e!j@+!UtEdyO0=)G{@!Mt^v9~LtqN^b;IYQ~2V45^YadDvDx9Zl z&{l(fh1NgX8&Tefa*_2Rxtz}v6aAGi5QqL>ZSiu?X4=7pd+T@ z2s&d|>GcR%+o&KTE+-n&vZ+Y#eB;d`jtKHTH%ZCvhg^d_%bZ^z2#md7mz zpM|G_9LeWgDeMKmGkO3%*oH0Gu!U0VWt^qTy5#SbqG*ev?MCYr=^NTgA4=!%lhAM9 zRxT6>gDcf{zc)R;PA^i|p?w8F1RpKy}$XkX9*Uq@fS@k91K zZkgP?wJ&IeeDnn;JAUm0TES2HfIj$XAJ9gB4l+V=>hL9wB3cao2*~jJ5b(%a+UQf`An0~D$>K+ z-ifxIpjU1E#@>bUF5uNz9Y>#iqzAkvv^Ak^gY}aRD0}RT(cjS*=(rSRn;kLwI;ESA7;RnYrXxn%R=Vhn z(Uz4iI%2eCHTI4eb*aYQ5u+~ESUY3X9es@3-rRD!W%{(`ppQ8vB0ef{f{LnBrTYr@2WkX4q z`YzI4Daw^;8b&+vIplK&GU@MY@*d4^AUy2tx0f7}Sr=D8QmcK)_d z#``$GVO$Qn+$W{&Q!cFgcd+aAmyDHQzG=Todjy{FUg- z{Z+0Hp5b#${QaVg_i=c0A5+WMowr*q?oCTwLw0$F@9=iZ=l+ck&t$YrJjW+*eSRiR zk~jCKrCu@5GTx^L-fn(8znA=+{pl}35BWbf%v$S1JA(4+pBm;@ti$qLP0yD;t1moc z1Z?XQJ0Ez3&l_f)bwr-eD|=WL;GT2=+KT_hFr$`09Q?&Eti^d*y=ByKFIww}d(o?b zxAtERbE{?6(N1o}dC4u7zmEIRNqX3aUIV-vUpLGKE2WP6cB^pD{c!p&`XEh{{)cB` zv|;)(`k!lnOS`52x)x>HC+(Z_nvb(F+Vgd&=lE&2)YEF5pU_^fM?G~xz0kk7?ZqvF zo9Ca4AN|N@$&bEb0&K9Sc%K}u3+LQQ;5U$1>v5i^mf`HQxH8_S9CmS=byl5@4J<;x@?RL{?N(|XV*@Eq_jsQ|oIlYv=_}aI z-|L+G{YF7~Hdc%|r_It=a9sTDuk;lPXAgatwq3Wcpk1tmUC@r{ zD>x?pR#^H9hd1p&%h#Q^TQ2$vscXnCeTBoDHmT*4zQXx!pIavS3i8%xSDX)wvH3)K z(^p8nYG2U>M%I-AZ{zorqTod$2K?HO4YSFbApH{8e=_FB`7JfaUWK`$ZIYjS*HEyP&mnD} zJXfcz-D7@gFrTzf`X{#YchvG+UD4Q2->B`j2D~=@LD?z&5#{G^t!2#5Ny@{sbm^zz z&t%Mx!;ki&WpeYT50biud@|C|So&5AWi~j9;Z~W+Yq`vqZLCjAr07k^`EC*g-nPNsz}Kxj z`7W^?Hp6FBBj%pIM9uZf^8JLo@A?SuF@QV%)HUWu>x6!UG-;pMGT{@V5Y;{zvbd(0^(_v`w+;Y0* z`mE%k-#L}!p)Xnt`R-Em%Z%uf~Q-F}W&#{AF+Bx8Pn&oc-<>*%-mEaN%}qn_sk`KrBZ%n#<6zKOEC zV}3ZN^ffx>hwaRfAbpdfCu4s29ON3KZ>0^>o^;F)$HH6+(m(Yg8+9VjFnW=Veu`sc z9tQd;>IC?E-10HzN6SG!CFcRM%9x*C`1Yj}`YZC)?@iR*@%YkTNgX==sv7#(ex5fO z^HYQS#t~lpW`F(bccoKL4t>XOZAAT!3%usdfIBa8`oY(1!u_uCUK#VF=ZOAmGwPc% zz2+9c=E+|3%YgS!@g`$_lwAywF^eY>^P_ckA~8Qn+TCJ)_`IaO(?8SR>3?ab^kbSn z{TTfnbt7#BzKFhzYlZsZTA_Yu#=*D4SA0D6!zO)xdG*Q@ly|_i$ZnI%yrCuyq9Op zs$RzYdRc!c zJUnOjG1o`F>eEVyIvMk$`|ICKN~O)%y*KE`2o$IVt#6Vr(%AR z_;{8`8vxCoVt$Cv^`Y^TF+Y?U=RVzIiim$q%nxYy8S?{r{fhYkZN48I6Z7-ju{PcI z9Bg`14>3QelkeTCt!vEBbB;YLoW5gzz&B~r=)?EzE-^o_c^UJAHrQ;}m>-27&sS}| z$NXSz$}>+F{bbCKLpSKq>JsyVwJGBsdZDjlWVB7ULRM+hJ@5%Td?+i%*fr({e55`1 z!6$jfNuBf*^TQlw$Ss@@j$FZBN{ z(P-R{Mvfxp9ZJUiQ0KtSKwAcM-8~OcGVTX78TT_5cw>Rb_{U@7en<;A%ASwSSH$l* z8TUh4Nqasve-ZJdZId4GlXNxc!f4Yui0B*Iev7ytg|qK6wDpveNc)DC-(oJ|8+7iFJmBZSKA0nj zeB`;9f^lWHDrb=P39XP(`-JJ}J014PJVM$hw7_RbpP*>$Q8s;Sej)7>TH)8EPf+;H zCn}Hj1ugJ(GVaIW=a$LMTl<1m$S31|PLiMY0j=OCeLx@lv=3;bzlRT)4_)%YfHY4yoj`kMw}I5_mo3luADPNDp>; z9q`;arj#wJJW<*YP{%s%N7!dM?8bmaUSlt4(eIQZ@6P8Xq!5A zr){1tveHgZ7g@E<(sp!gf;$$0w$oG0K`rWgI`igvU{7b<$2-#|E%zCAUuV_C?|wE- zyVS7`?$`#-Lr*aSr_%YXj*&TC<)^)$&idDDp!=CP{cJMtg%e}Z(^+?4=c#?2pC0eL zw6F8gse^2ZxY2)MMDeA?%5clm}-Zkf)XBGVZe` zp3mT3{(mPtOX2<`A?WPOSNF+tAljpFa0rPLbZ`i9zhBDZ;DjBVd0X!I1A0#Jv*J=6Ob#hK|4j^p|P}-QfZ{ZmQ?v;(9ENu)By5FtO6$o*!Y|OzC4i5XHPn?iA za!wtbu!EED;1Ci=%7cAaX=8xW#?*Zq_m7h{hO)FVK=e!QdFQKp^?&L(IKshUfAnc3 zL?2~i4o=v?$#-xFiPOc#-gImXds})Ao3Vc)Z31n24x631j$^L%95y?1%{^;9hjFKl z`^kC^-FSKq-F12n-E~PD`>kVR*f-SjabJ+~aeq(C$GtqR0q)Uh`M6Id@zAE_m0%(QQuRpTE2t#l$(}0 z7j-^QzN_e5w9L7v^E#5g&OLhp>UB;>e*5tqK<6hOiZ-2x_=_m(yu*C2)A@zZMZL}` zd>+a=Pc7fEbza&u)c?71(w@rlalN0)^67V4+K$d;n}NB}xjShuI!En=sMonWX)ikW z=@``O+?}M8jW}WEk7TYroiCDfau83<+>m2YuX8|>PTs&{bCr_LRM6?3n^f;bWFWqm zIdHjNbgo<^pJzZqgHcQ*lxlzYsUUe>1 z+D=b7PiLUMr<|KJQQuRJ%`obF%c)6P?i`xLJzlO%;&;!DNxRg!R_Sx~TH!qOl(RGk z^*UcF*P_m=nTL9vN0V#uv&x@&I?GRcKb_^*Yak2r?as;BWiMj0%JJG$u1)44JYGJ> zJhUG#f8+7;HU7EwBKVwe=l!IAPwqvOs@$(Sr>8q-CVhcBcju{=50T$~pVzT*U4351 zCcn#&H&uQo0?O}1b>G4~uF3C2l<|$#+&w3&&bcbT{T&N^oR*yML2p!`l$_iaISul6EP2JM#aIo)*rHYtyTBLG1=nO}}{_zlQ; zW&h*R>GGX8&G}B8cJw=px%T9DBHHwK81v-Gy@-pCeuptvo5Vw#{tjc#Io2_koc<1T zo59758O@*o$Kv@t+wW9q(*IjnH6Yz$>-8-VDyFJIla65?L%ML0OZ z!C`;&X(c3%l*hpdJ2?3c4k2;6*x0p>ja_?m4w)B>I%f_tJ%`NUC2b6It>=*Ws3acR z^c*tx73-MOO3$GiPtT#dPS2scE@@+>j*XQbm5;fJq)nhr%g1~|(#9|jEgy5`NIbM@ z`Iwi7bK=Q{|x(F6HQ^P2`PGn-Pb@->-0GiuJ55shAY=~g3dHknItGzc@>;Qc6%FfO^oN{EJbfuL^_KaV!2Ut%4^eHeN%r;k@{13@j^_5Pr^wcQ7P z2cxQ9^V=$>PU=FBalu0ni!ybZRWEk8N0AFKDjsZ4h!}VZ#HqM=qiqJ-E=^o|XCjgC zq$kp$U2){n^z_E4h@;;(a6N?Uo4EGidKlNYa6QsKDlrOugczL|jXsG4?i)c~jE;_O z?5?ZJ{M+jNasM6A+IHfz-vx;aWIz87v%wrax)JbJ^MVT+S?{!i-VTwG$dGs$@r=f$ zkUa`Ln2^Jl@JYNei7~Q$jBiZiuGDyYy=Vc>qqz2B%#R&AX00FK%mmJ?#4ONBc*ZBj zgN`^dYy5bOeINQij_V0r-^SI7>q%VS!S&ttiHV7lcPJ5(ym1d%O@aSg`nY2!KF5}s z$dvu$*h3*T_RP#i);sNzUREMY;$_9N8XJ;he=<4t>_oO~&-P_w?E5fQ^?#3(yWIj?ztoMM`}DXG5PL8*LKH=psQIl0u+m`mM!&YQ+u>gFMD8uO-`FL~3P zyy|YyLi`r_s3rGZ-Br2saO0G@K?`!#lHjo=3l(xSAZS=>J`5R z{Lk0D;x~Z*^|n|12jK62;T5x?iz67{eHh=o*~q8*r^LVnlJSTzV&^tcnk2IA9}^x zfImOr74HC22R{R(4t@?u9sB~2I%twHDQb-OH}yTnz4xDfWBeW75A{6$&+qykW8!~K zzcIRF2|IZ%DQe$-;9VhtgufEv{Z3VP?;EP_-gksJ{Qj|Z3H-k?sH(&%XRa^J z$;!$sUca^=Qhak^qjLX7HKKLfXe z1*{-d)rWO+sJx=4NQ4SEmsf9IFG3~d8$xRfs@DqVU)I-DiBMHVVL?rS2-OsAs*%_Q z>r2<5x~QzGB2rmUSXf$Kg8HJ>8%iQ&rRAiUEJq{-XmEW|IXZ(+O~GowwWwDm1;xot zUc{QWY{|UEk@+i@%=!Q58d_HUl^K%E;#U&Gj)T4dL8Ln z_T}2t-Sm3Z5I^&ie_XHRm7;Ew>zK>&gG8c2_t)!H!wd&sx9k2+wZOMD*6mULdVOmc zbP#oaEx(58X!pAMYk#7l_9qS*IR;%Oya*S^rRi&X)i8`U_g~n3gK{P=_SgO{xeoB* z&cEp03MT(0vg7V)qb+7;J z#$H^3hVFI5)&0AhXm{Leo&H(u)!usPXFC~^shpDa_(HX zQ}?Kwe5a0fN<6JU{T!lwXx%xe5;Qyg@iElpPs^<<8&K$}f5yWK;}(tR!0!G34|^5} ARR910 literal 0 HcmV?d00001 diff --git a/hsa/gfx1250/fmha_fwd_f16/fmha_fwd_f16.csv b/hsa/gfx1250/fmha_fwd_f16/fmha_fwd_f16.csv new file mode 100644 index 0000000000..514854aa66 --- /dev/null +++ b/hsa/gfx1250/fmha_fwd_f16/fmha_fwd_f16.csv @@ -0,0 +1,9 @@ +dtype,hdim_q,hdim_v,mask,border,knl_name,co_name +bf16,64,64,0,0,BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_rxy_sink_KERNEL_FUNC,BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_rxy_sink.co +bf16,64,64,0,1,BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_brd_rxy_sink_KERNEL_FUNC,BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_brd_rxy_sink.co +bf16,64,64,1,0,BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_cas_rxy_sink_KERNEL_FUNC,BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_cas_rxy_sink.co +bf16,64,64,1,1,BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_cas_brd_rxy_sink_KERNEL_FUNC,BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_cas_brd_rxy_sink.co +bf16,128,128,0,0,BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_rxy_KERNEL_FUNC,BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_rxy.co +bf16,128,128,0,1,BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_brd_rxy_KERNEL_FUNC,BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_brd_rxy.co +bf16,128,128,1,0,BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_cas_rxy_KERNEL_FUNC,BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_cas_rxy.co +bf16,128,128,1,1,BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_cas_brd_rxy_KERNEL_FUNC,BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_cas_brd_rxy.co diff --git a/op_tests/test_fmha_fwd_f16_asm.py b/op_tests/test_fmha_fwd_f16_asm.py new file mode 100644 index 0000000000..be4d280fa2 --- /dev/null +++ b/op_tests/test_fmha_fwd_f16_asm.py @@ -0,0 +1,387 @@ +# SPDX-License-Identifier: MIT +# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. +"""Correctness + performance tests for aiter.fmha_fwd_f16 (ASM path). + +Layout convention used in tests +-------------------------------- +* i_perm=2 (sbhd): input q/k/v shape [s, b, h, d] ← kernel default +* o_perm=0 (bshd): output shape [b, s, h, d] ← kernel default + +Sink convention +--------------- +D64 (_rxy_sink) kernels compile ENABLE_SINK=1. An explicit sink tensor +[q_head_num] fp32 (AITER post-scale) is required. + +Sink mechanism (from common_fmha.h::fmha_merge_sink_rowwise): + After computing standard softmax numerators/denominators, the sink acts as + an additional "virtual KV token" with zero value vector. It only adds to + the softmax denominator: + new_max = max(max_attn_raw, sink_raw) + sink_term = exp2((sink_raw - new_max) * scale * log2e) + denom = denom * rescale + sink_term + numer = numer * rescale # sink contributes 0 to output + In AITER/post-scale convention: sink_raw = sink_user * sqrt(head_dim). +""" + +from __future__ import annotations + +import math +from typing import Optional + +import pytest +import torch + +pytestmark = pytest.mark.skipif( + not torch.cuda.is_available(), + reason="ROCm/HIP GPU not available", +) + +import aiter +from aiter.test_common import checkAllclose, run_perftest + + +# --------------------------------------------------------------------------- +# Layout helpers +# --------------------------------------------------------------------------- + +def make_sbhd(*shape, **kw) -> torch.Tensor: + """Create contiguous sbhd [s, b, h, d] tensor.""" + return torch.randn(*shape, **kw) + + +def to_bhsd(t: torch.Tensor, perm: int) -> torch.Tensor: + """Permute a 4-D tensor in `perm` layout to bhsd [b, h, s, d]. + + perm code: + 0 = bshd [b, s, h, d] + 1 = bhsd [b, h, s, d] (no-op) + 2 = sbhd [s, b, h, d] + """ + if perm == 0: # bshd → bhsd + return t.permute(0, 2, 1, 3).contiguous() + elif perm == 1: # bhsd → bhsd + return t.contiguous() + elif perm == 2: # sbhd → bhsd + return t.permute(1, 2, 0, 3).contiguous() + raise ValueError(f"unsupported perm={perm}") + + +# --------------------------------------------------------------------------- +# Reference implementations (inputs/outputs in bhsd) +# --------------------------------------------------------------------------- + +def ref_standard(q, k, v, scale, is_causal): + """Standard attention, no sink. All tensors bhsd.""" + b, hq, sq, d = q.shape + _, hk, sk, _ = k.shape + if hq != hk: + k = k.repeat_interleave(hq // hk, dim=1) + v = v.repeat_interleave(hq // hk, dim=1) + qf, kf, vf = q.float(), k.float(), v.float() + attn = qf @ kf.transpose(-1, -2) * scale # [b, hq, sq, sk] + if is_causal: + # Bottom-right causal (matches kernel / poc_kl fmha_causal_mask): + # mask out k > q + (sk - sq) for row q in [0, sq), col k in [0, sk). + # When sq == sk this reduces to the standard lower-triangular causal. + m = torch.triu( + torch.ones(sq, sk, dtype=torch.bool, device=q.device), + sk - sq + 1, + ) + attn = attn.masked_fill(m, float("-inf")) + lse = torch.logsumexp(attn, dim=-1) + out = (torch.softmax(attn, dim=-1) @ vf).to(q.dtype) + return out, lse # bhsd, [b, hq, sq] + + +def ref_with_sink(q, k, v, scale, is_causal, sink_post_scale: torch.Tensor): + """Attention with sink mechanism matching fmha_merge_sink_rowwise. + + sink_post_scale: [hq] fp32, AITER post-scale convention. + Internally converted to pre-scale: sink_raw = sink_post_scale * sqrt(d). + The sink adds to the softmax denominator with zero value contribution. + """ + b, hq, sq, d = q.shape + _, hk, sk, _ = k.shape + if hq != hk: + k = k.repeat_interleave(hq // hk, dim=1) + v = v.repeat_interleave(hq // hk, dim=1) + + qf, kf, vf = q.float(), k.float(), v.float() + attn = qf @ kf.transpose(-1, -2) # [b, hq, sq, sk] (pre-scale raw) + + if is_causal: + # Bottom-right causal (matches kernel / poc_kl fmha_causal_mask). + m = torch.triu( + torch.ones(sq, sk, dtype=torch.bool, device=q.device), + sk - sq + 1, + ) + attn = attn.masked_fill(m, float("-inf")) + + # Convert sink from AITER post-scale to pre-scale raw + sink_raw = (sink_post_scale * math.sqrt(d)).float() # [hq] + # Broadcast to [b, hq, sq] to match per-row max + sink_raw_bhs = sink_raw[None, :, None].expand(b, hq, sq) # [b, hq, sq] + + # Compute softmax max over real tokens + max_attn, _ = attn.max(dim=-1) # [b, hq, sq] + # Effective max including sink (pre-scale domain) + max_total = torch.maximum(max_attn, sink_raw_bhs) # [b, hq, sq] + + # Rescale numerator (O) and denominator (sum): + # row_scale = exp2((old_max - new_max) * scale * log2e) + # = exp(( max_attn - max_total) * scale) + row_scale = torch.exp((max_attn - max_total) * scale) # [b, hq, sq] + + # Standard softmax numerators (rescaled) + probs_unnorm = torch.exp((attn - max_total.unsqueeze(-1)) * scale) # [b,hq,sq,sk] + probs_sum = probs_unnorm.sum(dim=-1) * row_scale # wait, already accounted for + + # Re-derive carefully using max_total directly: + # exp((x - max_total) * scale) for each attn score x + # sum of these = denom_real + denom_real = torch.exp((attn - max_total.unsqueeze(-1)) * scale).sum(dim=-1) # [b,hq,sq] + + # Sink term: exp((sink_raw - max_total) * scale) + sink_term = torch.exp((sink_raw_bhs - max_total) * scale) # [b, hq, sq] + + denom_total = denom_real + sink_term # [b, hq, sq] + + # Final probabilities for real tokens only (sink value=0, so no contribution to out) + probs = torch.exp((attn - max_total.unsqueeze(-1)) * scale) / denom_total.unsqueeze(-1) + + out = (probs @ vf).to(q.dtype) # [b, hq, sq, d] + + # LSE including sink: log(denom_total) + max_total * scale + lse = torch.log(denom_total) + max_total * scale # [b, hq, sq] + + return out, lse + + +# --------------------------------------------------------------------------- +# Correctness tests (sbhd input → bshd output, compare against bhsd reference) +# --------------------------------------------------------------------------- + +def _d64_sink(hq: int, device: str) -> torch.Tensor: + """Non-zero sink for D64: fixed per-head values in AITER post-scale domain.""" + # Use values in [0.5, 2.0] post-scale; vary across heads for thorough test + return torch.linspace(0.5, 2.0, hq, dtype=torch.float32, device=device) + + +@pytest.mark.parametrize("head_dim", [64, 128]) +@pytest.mark.parametrize("is_causal", [False, True]) +@pytest.mark.parametrize( + "batch,hq,hk,sq,sk", + [ + # Shapes from run.sh aligned tests: batch=1, kv_head_num=4, gqa=16 + # → q_head_num = 4 * 16 = 64 + (1, 8, 1, 128, 2048), # aligned (test_d64 / test_d128) + (1, 8, 1, 130, 2048), # q unaligned: sq not mult of 128 + (1, 8, 1, 128, 2300), # kv unaligned: sk not mult of 256 + ], +) +def test_fmha_fwd_f16_correctness(batch, hq, hk, sq, sk, head_dim, is_causal): + device = "cuda" + torch.manual_seed(0) + + q_s = make_sbhd(sq, batch, hq, head_dim, dtype=torch.bfloat16, device=device) + k_s = make_sbhd(sk, batch, hk, head_dim, dtype=torch.bfloat16, device=device) + v_s = make_sbhd(sk, batch, hk, head_dim, dtype=torch.bfloat16, device=device) + scale = 1.0 / math.sqrt(head_dim) + + # D64 → non-zero sink (exercises ENABLE_SINK code path) + # D128 → no sink (kernel ignores it) + sink = _d64_sink(hq, device) if head_dim == 64 else None + + # ASM forward: sbhd in → bshd out + i_perm, o_perm = 2, 0 + out_kernel, lse_asm = aiter.fmha_fwd_f16( + q_s, k_s, v_s, + softmax_scale=scale, is_causal=is_causal, + return_lse=True, i_perm=i_perm, o_perm=o_perm, sink=sink, + ) + + # Reference is always bhsd-in / bhsd-out. Convert kernel I/O accordingly. + q_b = to_bhsd(q_s, i_perm) + k_b = to_bhsd(k_s, i_perm) + v_b = to_bhsd(v_s, i_perm) + + if head_dim == 64: + out_ref_bhsd, lse_ref = ref_with_sink(q_b, k_b, v_b, scale, is_causal, sink) + else: + out_ref_bhsd, lse_ref = ref_standard(q_b, k_b, v_b, scale, is_causal) + + out_asm_bhsd = to_bhsd(out_kernel, o_perm) + + checkAllclose(out_asm_bhsd, out_ref_bhsd, rtol=1e-2, atol=1e-2, + msg=f"out mismatch (d={head_dim}, causal={is_causal})") + checkAllclose(lse_asm, lse_ref, rtol=1e-2, atol=1e-2, + msg=f"lse mismatch (d={head_dim}, causal={is_causal})") + + +def test_fmha_fwd_f16_ops_layer(): + """Direct ops-layer call (sbhd in, bshd out, D64 with non-zero sink).""" + device = "cuda" + torch.manual_seed(0) + + sq, batch, hq, hk, sk, d = 128, 1, 8, 2, 2048, 64 + q_s = make_sbhd(sq, batch, hq, d, dtype=torch.bfloat16, device=device) + k_s = make_sbhd(sk, batch, hk, d, dtype=torch.bfloat16, device=device) + v_s = make_sbhd(sk, batch, hk, d, dtype=torch.bfloat16, device=device) + scale = 1.0 / math.sqrt(d) + sink = _d64_sink(hq, device) + + i_perm, o_perm = 2, 0 + out_kernel, lse_asm = aiter.fmha_fwd_f16_asm( + q_s, k_s, v_s, scale, False, True, + i_perm=i_perm, o_perm=o_perm, sink=sink, + ) + + out_ref, lse_ref = ref_with_sink( + to_bhsd(q_s, i_perm), to_bhsd(k_s, i_perm), to_bhsd(v_s, i_perm), + scale, False, sink, + ) + checkAllclose(to_bhsd(out_kernel, o_perm), out_ref, rtol=1e-2, atol=1e-2) + checkAllclose(lse_asm, lse_ref, rtol=1e-2, atol=1e-2) + + +def test_fmha_fwd_f16_d64_requires_sink(): + """Calling D64 without a sink tensor must raise an error.""" + device = "cuda" + q = make_sbhd(128, 1, 4, 64, dtype=torch.bfloat16, device=device) + k = make_sbhd(2048, 1, 4, 64, dtype=torch.bfloat16, device=device) + v = make_sbhd(2048, 1, 4, 64, dtype=torch.bfloat16, device=device) + with pytest.raises(RuntimeError, match="D64.*sink"): + aiter.fmha_fwd_f16(q, k, v, sink=None) + + +# --------------------------------------------------------------------------- +# Performance tests +# --------------------------------------------------------------------------- + +@pytest.mark.parametrize("head_dim", [64, 128]) +@pytest.mark.parametrize("is_causal", [False, True]) +def test_fmha_fwd_f16_perf(head_dim, is_causal): + device = "cuda" + torch.manual_seed(0) + + # perf_d64 / perf_d128 in run.sh: batch=2 kv_head_num=8 gqa=8 → hq=64 + sq, batch, hq, hk, sk = 8192, 2, 64, 8, 8192 + q_s = make_sbhd(sq, batch, hq, head_dim, dtype=torch.bfloat16, device=device) + k_s = make_sbhd(sk, batch, hk, head_dim, dtype=torch.bfloat16, device=device) + v_s = make_sbhd(sk, batch, hk, head_dim, dtype=torch.bfloat16, device=device) + scale = 1.0 / math.sqrt(head_dim) + sink = _d64_sink(hq, device) if head_dim == 64 else None + + _, us = run_perftest( + aiter.fmha_fwd_f16, + q_s, k_s, v_s, + scale, is_causal, False, + num_iters=10, num_warmup=2, + sink=sink, + ) + flops = 2.0 * batch * hq * sq * sk * (2 * head_dim) + if is_causal: + flops /= 2.0 + tflops = flops / (us * 1e-6) / 1e12 + print(f"[perf] d={head_dim} causal={is_causal}: {us:.1f}us, {tflops:.2f} TFLOPS") + + +# --------------------------------------------------------------------------- +# __main__: CLI single-shape runner +# --------------------------------------------------------------------------- +import argparse + +parser = argparse.ArgumentParser( + formatter_class=argparse.RawTextHelpFormatter, + description="Run aiter.fmha_fwd_f16 on a single shape and dump kernel args.", +) +parser.add_argument("-b", "--batch", type=int, default=1, + help="batch size (default 1)") +parser.add_argument("-n", "--q_head_num", type=int, default=8, + help="q_head_num (default 8)") +parser.add_argument("-kn", "--kv_head_num", type=int, default=1, + help="kv_head_num (default 1, must divide q_head_num)") +parser.add_argument("-q", "--seqlen_q", type=int, default=128, + help="q seq length (default 128)") +parser.add_argument("-k", "--seqlen_k", type=int, default=2048, + help="kv seq length (default 2048)") +parser.add_argument("-d", "--head_dim", type=int, choices=[64, 128], default=128, + help="head dim, 64 or 128 (default 128)") +parser.add_argument("-c", "--causal", action="store_true", + help="enable causal mask") +parser.add_argument("-i", "--i_perm", type=int, choices=[0, 1, 2], default=2, + help="input layout: 0=bshd 1=bhsd 2=sbhd (default 2)") +parser.add_argument("-o", "--o_perm", type=int, choices=[0, 1, 2], default=0, + help="output layout: 0=bshd 1=bhsd 2=sbhd (default 0)") +parser.add_argument("--ref", action="store_true", + help="also run PyTorch reference and print max diff") +parser.add_argument("--perf", action="store_true", + help="run perf benchmark for this shape (10 iters, 2 warmup)") + +if __name__ == "__main__": + args = parser.parse_args() + + device = "cuda" + torch.manual_seed(0) + + b, hq, hk = args.batch, args.q_head_num, args.kv_head_num + sq, sk, d = args.seqlen_q, args.seqlen_k, args.head_dim + causal = args.causal + assert hq % hk == 0, "q_head_num must be a multiple of kv_head_num" + print(f"Shape: b={b} hq={hq} hk={hk} sq={sq} sk={sk} d={d} causal={causal} " + f"i_perm={args.i_perm} o_perm={args.o_perm}", flush=True) + + q_s = make_sbhd(sq, b, hq, d, dtype=torch.bfloat16, device=device) + k_s = make_sbhd(sk, b, hk, d, dtype=torch.bfloat16, device=device) + v_s = make_sbhd(sk, b, hk, d, dtype=torch.bfloat16, device=device) + scale = 1.0 / math.sqrt(d) + sink = _d64_sink(hq, device) if d == 64 else None + torch.cuda.synchronize() + + import time as _t + t0 = _t.time() + out_kernel, lse_asm = aiter.fmha_fwd_f16( + q_s, k_s, v_s, scale, causal, True, + i_perm=args.i_perm, o_perm=args.o_perm, sink=sink, + ) + torch.cuda.synchronize() + print(f"asm time: {(_t.time()-t0)*1000:.2f} ms", flush=True) + print(f"out.shape={tuple(out_kernel.shape)} lse.shape={tuple(lse_asm.shape)}", flush=True) + + if args.ref: + # Convert kernel I/O to bhsd; ref is always bhsd-in / bhsd-out. + q_b = to_bhsd(q_s, args.i_perm) + k_b = to_bhsd(k_s, args.i_perm) + v_b = to_bhsd(v_s, args.i_perm) + if d == 64: + out_ref, lse_ref = ref_with_sink(q_b, k_b, v_b, scale, causal, sink) + else: + out_ref, lse_ref = ref_standard(q_b, k_b, v_b, scale, causal) + # cast asm output to fp32 BEFORE permute to avoid bf16 contiguous hang + out_asm_bhsd = to_bhsd(out_kernel.float(), args.o_perm) + out_ref_f = out_ref.float() + diff_o = (out_asm_bhsd - out_ref_f).abs().max().item() + diff_l = (lse_asm - lse_ref).abs().max().item() + # Pass criterion (bf16 attention conventional thresholds): + # |dO| <= 2e-2 |dLSE| <= 2e-2 + ok_o = diff_o <= 2e-2 + ok_l = diff_l <= 2e-2 + print(f"ref: max|dO|={diff_o:.4f} {'OK' if ok_o else 'FAIL'} " + f"max|dLSE|={diff_l:.4f} {'OK' if ok_l else 'FAIL'}", + flush=True) + if not (ok_o and ok_l): + import sys + sys.exit(1) + + if args.perf: + _, us = run_perftest( + aiter.fmha_fwd_f16, + q_s, k_s, v_s, scale, causal, False, + num_iters=10, num_warmup=2, + i_perm=args.i_perm, o_perm=args.o_perm, sink=sink, + ) + flops = 2.0 * b * hq * sq * sk * (2 * d) + if causal: + flops /= 2.0 + tflops = flops / (us * 1e-6) / 1e12 + print(f"perf: {us:.1f} us ({tflops:.2f} TFLOPS)", flush=True) From f1d631bc0f2d511d06b8ecf484a84e583e771c48 Mon Sep 17 00:00:00 2001 From: tingchen Date: Fri, 1 May 2026 16:16:05 +0000 Subject: [PATCH 02/43] update fmha fwd f16 integration Co-authored-by: Cursor --- 3rdparty/composable_kernel | 2 +- aiter/__init__.py | 9 - aiter/fused_fmha_fwd_f16.py | 124 ----- aiter/jit/optCompilerConfig.json | 2 +- aiter/jit/utils/build_targets.py | 1 + aiter/jit/utils/cpp_extension.py | 28 +- aiter/ops/fmha_fwd_f16_asm.py | 82 ---- aiter/ops/mha.py | 94 +++- csrc/include/rocm_ops.hpp | 2 - csrc/include/torch/fmha_fwd_f16.h | 26 +- csrc/py_itfs_cu/asm_fmha_fwd_f16.cu | 174 +++---- ...HA_FWD_D128_1TG_4W_32mx4_256nx1_cas_rxy.co | Bin 68776 -> 0 bytes ...6_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_rxy.co | Bin 54176 -> 0 bytes ...WD_D64_1TG_4W_32mx4_256nx1_cas_rxy_sink.co | Bin 57912 -> 0 bytes ...HA_FWD_D64_1TG_4W_32mx4_256nx1_rxy_sink.co | Bin 43432 -> 0 bytes hsa/gfx1250/fmha_fwd_f16/fmha_fwd_f16.csv | 14 +- op_tests/test_fmha_fwd_f16_asm.py | 425 ++++++++++-------- 17 files changed, 450 insertions(+), 533 deletions(-) delete mode 100644 aiter/fused_fmha_fwd_f16.py delete mode 100644 aiter/ops/fmha_fwd_f16_asm.py delete mode 100755 hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_cas_rxy.co delete mode 100755 hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_rxy.co delete mode 100755 hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_cas_rxy_sink.co delete mode 100755 hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_rxy_sink.co diff --git a/3rdparty/composable_kernel b/3rdparty/composable_kernel index fdf4bb7fcc..6b1d184e66 160000 --- a/3rdparty/composable_kernel +++ b/3rdparty/composable_kernel @@ -1 +1 @@ -Subproject commit fdf4bb7fcc984811cef48ce817d89aac064b984a +Subproject commit 6b1d184e66e143a6fa0c1b7d049c373a082ecc4c diff --git a/aiter/__init__.py b/aiter/__init__.py index 9d5e6443f7..b6dcb34a2f 100644 --- a/aiter/__init__.py +++ b/aiter/__init__.py @@ -116,15 +116,6 @@ def getLogger(): e, ) - # FMHA fwd f16 ASM (independent of CK) — kept in a separate try block so - # it stays importable even when `module_aiter_core` fails to build on - # e.g. gfx1250. - try: - from .ops.fmha_fwd_f16_asm import fmha_fwd_f16_asm # noqa: F401,E402 - from .fused_fmha_fwd_f16 import fmha_fwd_f16 # noqa: F401,E402 - except (ImportError, RuntimeError, OSError, KeyError) as e: - logger.warning("aiter.fmha_fwd_f16 unavailable: %s", e) - # Import Triton-based communication primitives from ops.triton.comms (optional, only if Iris is available) try: from .ops.triton.comms import ( diff --git a/aiter/fused_fmha_fwd_f16.py b/aiter/fused_fmha_fwd_f16.py deleted file mode 100644 index cf9d734f24..0000000000 --- a/aiter/fused_fmha_fwd_f16.py +++ /dev/null @@ -1,124 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. - -""" -fused_fmha_fwd_f16 -================== - -Customer-facing API for the ASM-based FMHA forward kernel (BF16, gfx1250). - -Layout convention ------------------ -Tensor shapes and their physical memory ordering are controlled by ``i_perm`` -(input) and ``o_perm`` (output): - - 0 = bshd — [batch, seq, head, dim] - 1 = bhsd — [batch, head, seq, dim] - 2 = sbhd — [seq, batch, head, dim] ← default input (i_perm=2) - -Default output is ``o_perm=0`` (bshd → [batch, seq_q, head_q, dim_v]). - -Each tensor **must be contiguous** and its physical layout must match the -declared perm (e.g. for ``i_perm=2`` the tensor shape must be ``[s,b,h,d]`` -with natural strides ``[b*h*d, h*d, d, 1]``). - -Sink convention ---------------- -``sink`` is an optional per-Q-head f32 tensor of shape ``[q_head_num]``. -Values are in the **AITER / CK-Tile post-scale domain** (same domain as the -softmax logit ``Q·Kᵀ / sqrt(d)``). The kernel uses pre-scale internally; -this module performs the conversion: ``sink_raw = sink_user * sqrt(d)``. - -Supported shapes ----------------- -- ``q.shape`` determined by ``i_perm`` and ``(batch, q_head_num, q_seq_len, d)`` -- ``d ∈ {64, 128}`` -- dtype: bf16 -- GQA: ``q_head_num % kv_head_num == 0`` - -The border variant (_brd) is selected automatically when ``q_seq_len`` is not -a multiple of 128 or ``kv_seq_len`` is not a multiple of 256. - -Environment ------------ -Set ``AITER_ASM_DIR`` to ``{AITER_ROOT}/hsa`` and ``AITER_GPU_ARCHS=gfx1250`` -so the compiled kernel objects (``*.co``) can be located at runtime. -""" - -from __future__ import annotations - -import math -from typing import Optional, Tuple, Union - -import torch - -from .ops.fmha_fwd_f16_asm import fmha_fwd_f16_asm - - -def fmha_fwd_f16( - q: torch.Tensor, - k: torch.Tensor, - v: torch.Tensor, - softmax_scale: Optional[float] = None, - is_causal: bool = False, - return_lse: bool = False, - i_perm: int = 2, - o_perm: int = 0, - sink: Optional[torch.Tensor] = None, - out: Optional[torch.Tensor] = None, -) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: - """BF16 fused multi-head attention forward (ASM path, gfx1250). - - Parameters - ---------- - q, k, v : torch.Tensor - BF16 tensors. Physical shape determined by ``i_perm`` - (default 2 = sbhd → ``[seq, batch, head, dim]``). - All must be **contiguous**. - softmax_scale : float, optional - Defaults to ``1 / sqrt(head_dim)``. - is_causal : bool - Apply causal (lower-triangular) masking. - return_lse : bool - If True, also return LSE with shape ``[batch, q_head_num, q_seq_len]`` - in fp32. - i_perm : int - Input layout code: 0=bshd, 1=bhsd, 2=sbhd (default). - o_perm : int - Output layout code: 0=bshd (default), 1=bhsd, 2=sbhd. - sink : torch.Tensor, optional - Per-Q-head sink logits, shape ``[q_head_num]``, fp32, **post-scale** - (AITER convention). Converted to pre-scale internally. - **Required for D64 (head_dim=64)** — D64 `_rxy_sink` kernels always - run the sink code path. Pass ``torch.zeros(q_head_num)`` for a - neutral zero-logit sink. - Optional for D128 (head_dim=128) — D128 kernels ignore this field. - out : torch.Tensor, optional - Pre-allocated output buffer matching ``o_perm`` shape. - - Returns - ------- - torch.Tensor or (torch.Tensor, torch.Tensor) - ``out`` alone if ``return_lse=False``, otherwise ``(out, lse)``. - """ - if softmax_scale is None: - # head_dim is always the last dimension regardless of perm - softmax_scale = 1.0 / math.sqrt(q.size(-1)) - - results = fmha_fwd_f16_asm( - q, - k, - v, - float(softmax_scale), - bool(is_causal), - bool(return_lse), - int(i_perm), - int(o_perm), - sink, - out, - ) - - if return_lse: - assert len(results) == 2 - return results[0], results[1] - return results[0] diff --git a/aiter/jit/optCompilerConfig.json b/aiter/jit/optCompilerConfig.json index 81d48cc213..42fe11a69e 100644 --- a/aiter/jit/optCompilerConfig.json +++ b/aiter/jit/optCompilerConfig.json @@ -5,7 +5,7 @@ ], "flags_extra_cc": [], "flags_extra_hip": [], - "extra_ldflags": "None", + "extra_ldflags": "['-L/opt/rocm/lib', '-Wl,-rpath,/opt/rocm/lib']", "extra_include": [], "verbose": "False", "torch_exclude": "True", diff --git a/aiter/jit/utils/build_targets.py b/aiter/jit/utils/build_targets.py index 86580f1f63..c20ace6290 100644 --- a/aiter/jit/utils/build_targets.py +++ b/aiter/jit/utils/build_targets.py @@ -39,6 +39,7 @@ GFX_CU_NUM_MAP = { "gfx942": 304, # MI300X (SPX, full GPU); MI308X shares gfx942 — use CU_NUM override "gfx950": 256, # MI350 + "gfx1250": 256, # MI450 } diff --git a/aiter/jit/utils/cpp_extension.py b/aiter/jit/utils/cpp_extension.py index 531ce8755b..d784cdcb7d 100644 --- a/aiter/jit/utils/cpp_extension.py +++ b/aiter/jit/utils/cpp_extension.py @@ -91,7 +91,33 @@ def get_hip_version(): output = subprocess.check_output([hipconfig, "--version"], text=True) return output except Exception: - raise RuntimeError("ROCm version file not found") + pass + # Fallback: try /opt/rocm/bin/hipconfig directly + rocm_hipconfig = "/opt/rocm/bin/hipconfig" + if os.path.isfile(rocm_hipconfig): + try: + output = subprocess.check_output([rocm_hipconfig, "--version"], text=True) + return output + except Exception: + pass + # Fallback: read HIP version from header file + for ver_path in [ + "/opt/rocm/include/hip/hip_version.h", + "/opt/rocm/.info/version", + ]: + if os.path.isfile(ver_path): + with open(ver_path) as f: + content = f.read() + if "HIP_VERSION_MAJOR" in content: + import re + major = re.search(r"HIP_VERSION_MAJOR\s+(\d+)", content) + minor = re.search(r"HIP_VERSION_MINOR\s+(\d+)", content) + patch = re.search(r"HIP_VERSION_PATCH\s+(\d+)", content) + if major and minor and patch: + return f"{major.group(1)}.{minor.group(1)}.{patch.group(1)}" + else: + return content.strip() + raise RuntimeError("ROCm version file not found") def _find_rocm_home() -> Optional[str]: diff --git a/aiter/ops/fmha_fwd_f16_asm.py b/aiter/ops/fmha_fwd_f16_asm.py deleted file mode 100644 index 908957fe67..0000000000 --- a/aiter/ops/fmha_fwd_f16_asm.py +++ /dev/null @@ -1,82 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. - -"""Python stub for the ASM FMHA-forward (BF16) op. - -The real implementation lives in C++ (`csrc/py_itfs_cu/asm_fmha_fwd_f16.cu`) -and is exposed through the pybind module ``module_fmha_fwd_f16_asm``. -""" - -from typing import List, Optional - -import torch -from torch import Tensor - -from ..jit.core import compile_ops - - -def _shape_from_perm(perm: int, batch: int, heads: int, seqlen: int, dim: int): - """Return the expected tensor shape for the given perm code.""" - if perm == 0: # bshd - return (batch, seqlen, heads, dim) - elif perm == 1: # bhsd - return (batch, heads, seqlen, dim) - else: # sbhd - return (seqlen, batch, heads, dim) - - -def _dims_from_perm(t: Tensor, perm: int): - """Extract (batch, heads, seqlen, dim) from tensor shape given perm.""" - if perm == 0: # bshd [b,s,h,d] - return t.size(0), t.size(2), t.size(1), t.size(3) - elif perm == 1: # bhsd [b,h,s,d] - return t.size(0), t.size(1), t.size(2), t.size(3) - else: # sbhd [s,b,h,d] - return t.size(1), t.size(2), t.size(0), t.size(3) - - -def gen_fmha_fwd_f16_asm_fake_tensors( - q: Tensor, - k: Tensor, - v: Tensor, - softmax_scale: float, - is_causal: bool, - return_lse: bool, - i_perm: int = 2, - o_perm: int = 0, - sink: Optional[Tensor] = None, - out: Optional[Tensor] = None, -) -> List[Tensor]: - batch, q_head_num, q_seq_len, _ = _dims_from_perm(q, i_perm) - _, _, _, d_v = _dims_from_perm(v, i_perm) - - fake_out_shape = _shape_from_perm(o_perm, batch, q_head_num, q_seq_len, d_v) - fake_out = ( - out if out is not None - else torch.empty(fake_out_shape, dtype=q.dtype, device=q.device) - ) - if return_lse: - fake_lse = torch.empty( - (batch, q_head_num, q_seq_len), dtype=torch.float32, device=q.device - ) - return [fake_out, fake_lse] - return [fake_out] - - -@compile_ops( - "module_fmha_fwd_f16_asm", - fc_name="fmha_fwd_f16_asm", - gen_fake=gen_fmha_fwd_f16_asm_fake_tensors, -) -def fmha_fwd_f16_asm( - q: Tensor, - k: Tensor, - v: Tensor, - softmax_scale: float, - is_causal: bool, - return_lse: bool, - i_perm: int = 2, - o_perm: int = 0, - sink: Optional[Tensor] = None, - out: Optional[Tensor] = None, -) -> List[Tensor]: ... diff --git a/aiter/ops/mha.py b/aiter/ops/mha.py index 7e0379e285..10a31ccca5 100644 --- a/aiter/ops/mha.py +++ b/aiter/ops/mha.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: MIT # Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. -from typing import Any, Optional, Tuple +from typing import Any, List, Optional, Tuple import torch from torch import Generator, Tensor @@ -15,7 +15,6 @@ ) from ..utility import dtypes - def cmdGenFunc_mha_fwd( q: Tensor, k: Tensor, @@ -270,6 +269,57 @@ def fmha_v3_fwd( ) -> Tuple[Tensor, Tensor, Tensor, Tensor]: ... +# --------------------------------------------------------------------------- +# fmha_fwd_f16 (BF16 ASM, gfx1250) — single-shot batched FMHA forward. +# +# API contract: q/k/v are **bshd shape** ([batch, seq, head, dim]); strides are +# read directly from the tensor so non-contiguous bshd-shaped views (e.g. of +# sbhd / bhsd allocations) are accepted. Only `tensor.stride(-1) == 1` is +# required. The .cu host driver multiplies softmax_scale by sqrt(head_dim) +# before kernel launch (kernel uses pre-scale convention). +# --------------------------------------------------------------------------- +def gen_fmha_fwd_f16_asm_fake_tensors( + q: Tensor, + k: Tensor, + v: Tensor, + softmax_scale: float, + is_causal: bool, + return_lse: bool, + sink: Optional[Tensor] = None, + out: Optional[Tensor] = None, +) -> List[Tensor]: + batch, q_seq_len, q_head_num, _ = q.shape + d_v = v.size(3) + fake_out = ( + out if out is not None + else torch.empty((batch, q_seq_len, q_head_num, d_v), + dtype=q.dtype, device=q.device) + ) + if return_lse: + fake_lse = torch.empty( + (batch, q_head_num, q_seq_len), dtype=torch.float32, device=q.device + ) + return [fake_out, fake_lse] + return [fake_out] + + +@compile_ops( + "module_fmha_fwd_f16_asm", + fc_name="fmha_fwd_f16_asm", + gen_fake=gen_fmha_fwd_f16_asm_fake_tensors, +) +def fmha_fwd_f16_asm( + q: Tensor, + k: Tensor, + v: Tensor, + softmax_scale: float, + is_causal: bool, + return_lse: bool, + sink: Optional[Tensor] = None, + out: Optional[Tensor] = None, +) -> List[Tensor]: ... + + def cmdGenFunc_mha_varlen_fwd( q: torch.Tensor, k: torch.Tensor, @@ -1326,6 +1376,26 @@ def can_impl_fmha_v3_fwd(): ret = ret and ((gqa_ratio & (gqa_ratio - 1)) == 0) return ret + def can_impl_fmha_fwd_f16(): + # gfx1250 ASM bf16 forward (fmha_fwd_f16_asm). Single-shot batched + # (no varlen / dropout / swa / quant / alibi / bias). Sink logits + # (per-Q-head fp32) supported; sink-token (sink_size) not supported. + ret = (get_gfx() == "gfx1250") + ret = ret and (q.dtype == dtypes.bf16) + ret = ret and (hdim_q in (64, 128)) + ret = ret and (hdim_v == hdim_q) + ret = ret and (nhead_q % nhead_k == 0) + ret = ret and (not swa) + ret = ret and (sink_size == 0) + ret = ret and (alibi_slopes is None and bias is None) + ret = ret and (dropout_p == 0.0) + ret = ret and (cu_seqlens_q is None and cu_seqlens_kv is None) + ret = ret and (q_descale is None and k_descale is None and v_descale is None) + # D128 kernel ignores sink; if user passed sink_ptr, fall back to CK + # (which honors it) so semantics are preserved. + ret = ret and (sink_ptr is None or hdim_q == 64) + return ret + q, k, v = [maybe_contiguous(x) for x in (q, k, v)] # Validate newly added optional cumulative length / padded arrays if provided. @@ -1340,7 +1410,25 @@ def _validate_cu(name: str, x: Optional[torch.Tensor]): _validate_cu("cu_seqlens_q", cu_seqlens_q) _validate_cu("cu_seqlens_kv", cu_seqlens_kv) - if can_impl_fmha_v3_fwd() and seqlen_q > 128: # Prefer CK for decode cases + if can_impl_fmha_fwd_f16(): + # gfx1250 ASM bf16 path: q/k/v are bshd; kernel reads strides directly, + # no API-side permute. sink_ptr forwarded as-is (post-scale); the .cu + # multiplies by sqrt(qk_head_dim) before kernel launch. + sink_for_kernel = sink_ptr + if hdim_q == 64 and sink_for_kernel is None: + # D64 kernels always read SINK; auto-fill zero-logit so callers + # who don't care about sink still hit this fast path. + sink_for_kernel = torch.zeros(nhead_q, dtype=torch.float32, device=q.device) + _r = fmha_fwd_f16_asm( + q, k, v, + float(softmax_scale), bool(causal), True, + sink_for_kernel, out, + ) + out_ = _r[0] + softmax_lse = _r[1] + S_dmask = torch.empty((0,), dtype=torch.float32, device=q.device) + rng_state = torch.empty((2,), dtype=torch.int64, device=q.device) + elif can_impl_fmha_v3_fwd() and seqlen_q > 128: # Prefer CK for decode cases out_, softmax_lse, S_dmask, rng_state = fmha_v3_fwd( q, k, diff --git a/csrc/include/rocm_ops.hpp b/csrc/include/rocm_ops.hpp index 88cf9bffe9..ba564544b8 100644 --- a/csrc/include/rocm_ops.hpp +++ b/csrc/include/rocm_ops.hpp @@ -831,8 +831,6 @@ namespace py = pybind11; py::arg("softmax_scale"), \ py::arg("is_causal"), \ py::arg("return_lse"), \ - py::arg("i_perm") = 2, \ - py::arg("o_perm") = 0, \ py::arg("sink") = std::nullopt, \ py::arg("out") = std::nullopt); diff --git a/csrc/include/torch/fmha_fwd_f16.h b/csrc/include/torch/fmha_fwd_f16.h index 582b05b648..2c04fb9963 100644 --- a/csrc/include/torch/fmha_fwd_f16.h +++ b/csrc/include/torch/fmha_fwd_f16.h @@ -8,20 +8,20 @@ namespace torch_itfs { // ASM FMHA forward (BF16, gfx1250). // -// Layout conventions (i_perm / o_perm): -// 0 = bshd [batch, seq, head, dim] -// 1 = bhsd [batch, head, seq, dim] -// 2 = sbhd [seq, batch,head, dim] (defaults) +// API contract: q/k/v have **bshd shape**: +// q : [batch, seq_q, q_head_num, qk_head_dim] +// k : [batch, seq_k, kv_head_num, qk_head_dim] +// v : [batch, seq_k, kv_head_num, v_head_dim] +// out (returned): [batch, seq_q, q_head_num, v_head_dim] // -// q/k/v shapes are fully determined by i_perm: -// i_perm=2: q [s,b,hq,d], k [s,b,hk,d], v [s,b,hk,d_v] -// i_perm=1: q [b,hq,s,d], k [b,hk,s,d], v [b,hk,s,d_v] -// i_perm=0: q [b,s,hq,d], k [b,s,hk,d], v [b,s,hk,d_v] +// The kernel reads strides directly from `tensor.stride(...)`, so callers may +// pass a non-contiguous bshd-shaped view of an sbhd / bhsd allocation — +// strides will correctly reflect the underlying memory layout. Only +// `tensor.stride(-1) == 1` (last-dim contiguous) is required. // -// out shape is determined by o_perm (default 0 → bshd [b,s,hq,d_v]). -// -// sink: optional per-head f32 tensor [q_head_num], post-scale AITER convention. -// Internally converted to pre-scale: sink_raw = sink_user * sqrt(qk_head_dim). +// sink: optional per-Q-head fp32 tensor [q_head_num], AITER post-scale +// convention (same domain as Q·K^T * softmax_scale). Internally +// converted to pre-scale: sink_raw = sink_user * sqrt(qk_head_dim). std::vector fmha_fwd_f16( at::Tensor& q, const at::Tensor& k, @@ -29,8 +29,6 @@ std::vector fmha_fwd_f16( float softmax_scale, bool is_causal, bool return_lse, - int i_perm = 2, - int o_perm = 0, std::optional sink_ = std::nullopt, std::optional out_ = std::nullopt); diff --git a/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu b/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu index 9daf019cb6..80f98df3d8 100644 --- a/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu +++ b/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu @@ -3,11 +3,11 @@ // // ASM FMHA forward (BF16, gfx1250 / MI4xx) — ported from poc_kl/mi400/fmha_fwd_f16. // -// Layout convention (i_perm / o_perm): -// 0 = bshd [batch, seq, head, dim] -// 1 = bhsd [batch, head, seq, dim]sm_ -// 2 = sbhd [seq, batch,head, dim] ← default input (i_perm=2) -// ← default output (o_perm=0 → bshd) +// Layout: q/k/v expected in **bshd shape** ([batch, seq, head, dim]). The +// kernel reads per-dim strides directly from the input tensor, so callers may +// pass a non-contiguous bshd-shaped view backed by sbhd / bhsd memory and the +// kernel will follow the strides correctly. Only `tensor.stride(-1) == 1` +// (last-dim contiguous) is required, matching flash_attn_func semantics. // // sink convention (AITER / CK-Tile post-scale): // The user passes sink in the same domain as Q*K^T * softmax_scale (post-scale). @@ -70,11 +70,14 @@ struct __attribute__((packed)) KernelArgs // ---- helpers --------------------------------------------------------------- +// Kernel selection: only (dtype, hdim_q, hdim_v, mask) — we always use the +// _brd (border) kernel variants which are a strict superset (handle aligned +// + unaligned q_seq_len/kv_seq_len uniformly). The csv schema therefore has +// no `border` column. static std::string get_heuristic_kernel_fmha_fwd_f16(const std::string& dtype, int hdim_q, int hdim_v, int mask_flag, - int border_flag, const std::string& arch_id, CFG* cfgs) { @@ -86,75 +89,25 @@ static std::string get_heuristic_kernel_fmha_fwd_f16(const std::string& dtype, if (cfg.hdim_q != hdim_q) continue; if (cfg.hdim_v != hdim_v) continue; if (cfg.mask != mask_flag) continue; - if (cfg.border != border_flag) continue; return el.first; } TORCH_CHECK(false, "fmha_fwd_f16_asm: no kernel for dtype=", dtype, " hdim_q=", hdim_q, " hdim_v=", hdim_v, - " mask=", mask_flag, " border=", border_flag, + " mask=", mask_flag, " arch=", arch_id); return ""; } -// Extract logical dimensions from tensor shape given the perm code. -// perm: 0=bshd [b,s,h,d], 1=bhsd [b,h,s,d], 2=sbhd [s,b,h,d] -static void dims_from_perm(const at::Tensor& t, int perm, - int& batch, int& heads, int& seqlen, int& dim) -{ - switch (perm) { - case 0: // bshd - batch = t.size(0); seqlen = t.size(1); heads = t.size(2); dim = t.size(3); - break; - case 1: // bhsd - batch = t.size(0); heads = t.size(1); seqlen = t.size(2); dim = t.size(3); - break; - default: // sbhd - seqlen = t.size(0); batch = t.size(1); heads = t.size(2); dim = t.size(3); - break; - } -} - -// Stride (in bytes) of tensor t along its [batch, head, seq] logical dimensions -// given perm (the physical dimension ordering stored in t.shape). -static void strides_from_perm(const at::Tensor& t, int perm, int elem_size, - int& s_batch, int& s_head, int& s_seq) -{ - switch (perm) { - case 0: // bshd: dim0=b, dim1=s, dim2=h, dim3=d - s_batch = (int)t.stride(0) * elem_size; - s_seq = (int)t.stride(1) * elem_size; - s_head = (int)t.stride(2) * elem_size; - break; - case 1: // bhsd: dim0=b, dim1=h, dim2=s, dim3=d - s_batch = (int)t.stride(0) * elem_size; - s_head = (int)t.stride(1) * elem_size; - s_seq = (int)t.stride(2) * elem_size; - break; - default: // sbhd: dim0=s, dim1=b, dim2=h, dim3=d - s_seq = (int)t.stride(0) * elem_size; - s_batch = (int)t.stride(1) * elem_size; - s_head = (int)t.stride(2) * elem_size; - break; - } -} - -// Build the expected shape vector for a tensor given logical dims and perm. -static std::vector shape_from_perm(int perm, - int batch, int heads, - int seqlen, int dim) -{ - switch (perm) { - case 0: return {batch, seqlen, heads, dim}; // bshd - case 1: return {batch, heads, seqlen, dim}; // bhsd - default:return {seqlen, batch, heads, dim}; // sbhd - } -} - // ---- main entry ------------------------------------------------------------ -// q/k/v layouts are determined by i_perm (default sbhd=2). -// Output layout is determined by o_perm (default bshd=0). +// API contract: q/k/v have **bshd shape**, i.e. q.shape = [batch, seq_q, hq, d], +// k/v.shape = [batch, seq_k, hk, d]. The kernel reads strides directly from +// `tensor.stride(...)`, so the underlying memory layout is whatever the user +// arranged — they may pass a non-contiguous bshd-shaped view of an sbhd / bhsd +// allocation, and the kernel will follow strides correctly. Only `stride(-1) +// == 1` (last dim contiguous) is required, matching flash_attn_func. +// // sink: optional [q_head_num] fp32 tensor in AITER post-scale convention. // Internally converted to pre-scale: sink_raw = sink_user * sqrt(qk_head_dim). std::vector fmha_fwd_f16(at::Tensor& q, @@ -163,40 +116,35 @@ std::vector fmha_fwd_f16(at::Tensor& q, float softmax_scale, bool is_causal, bool return_lse, - int i_perm, - int o_perm, std::optional sink_, std::optional out_) { // ---- basic validation -------------------------------------------------- TORCH_CHECK(q.dim() == 4 && k.dim() == 4 && v.dim() == 4, - "fmha_fwd_f16_asm: q/k/v must be 4-D tensors"); - TORCH_CHECK(q.is_contiguous() && k.is_contiguous() && v.is_contiguous(), - "fmha_fwd_f16_asm: q/k/v must be contiguous " - "(physical layout must match i_perm=", i_perm, ")"); - TORCH_CHECK(i_perm >= 0 && i_perm <= 2, "i_perm must be 0, 1, or 2"); - TORCH_CHECK(o_perm >= 0 && o_perm <= 2, "o_perm must be 0, 1, or 2"); + "fmha_fwd_f16_asm: q/k/v must be 4-D tensors (bshd shape)"); + TORCH_CHECK(q.stride(-1) == 1 && k.stride(-1) == 1 && v.stride(-1) == 1, + "fmha_fwd_f16_asm: q/k/v must have contiguous last dim"); TORCH_CHECK(q.scalar_type() == at::kBFloat16, "fmha_fwd_f16_asm: only bf16 is supported"); TORCH_CHECK(k.scalar_type() == at::kBFloat16 && v.scalar_type() == at::kBFloat16, "fmha_fwd_f16_asm: k/v must also be bf16"); - // ---- dimension extraction ---------------------------------------------- - int batch, q_head_num, q_seq_len, qk_head_dim; - dims_from_perm(q, i_perm, batch, q_head_num, q_seq_len, qk_head_dim); - - int kv_batch, kv_head_num, kv_seq_len, kv_head_dim_check; - dims_from_perm(k, i_perm, kv_batch, kv_head_num, kv_seq_len, kv_head_dim_check); - - int v_batch, v_heads_check, v_seq_check, v_head_dim; - dims_from_perm(v, i_perm, v_batch, v_heads_check, v_seq_check, v_head_dim); - - TORCH_CHECK(kv_batch == batch, "k batch mismatch"); - TORCH_CHECK(v_batch == batch, "v batch mismatch"); - TORCH_CHECK(kv_head_dim_check == qk_head_dim, "k head_dim mismatch"); - TORCH_CHECK(v_heads_check == kv_head_num, "v head_num mismatch with k"); - TORCH_CHECK(v_seq_check == kv_seq_len, "v seq_len mismatch with k"); - TORCH_CHECK(q_head_num % kv_head_num == 0, "q_head_num must be a multiple of kv_head_num"); + // ---- dimension extraction (bshd) --------------------------------------- + const int batch = (int)q.size(0); + const int q_seq_len = (int)q.size(1); + const int q_head_num = (int)q.size(2); + const int qk_head_dim = (int)q.size(3); + + const int kv_seq_len = (int)k.size(1); + const int kv_head_num = (int)k.size(2); + const int v_head_dim = (int)v.size(3); + + TORCH_CHECK((int)k.size(0) == batch, "k batch mismatch"); + TORCH_CHECK((int)v.size(0) == batch, "v batch mismatch"); + TORCH_CHECK((int)k.size(3) == qk_head_dim, "k head_dim mismatch"); + TORCH_CHECK((int)v.size(1) == kv_seq_len, "v seq_len mismatch with k"); + TORCH_CHECK((int)v.size(2) == kv_head_num, "v head_num mismatch with k"); + TORCH_CHECK(q_head_num % kv_head_num == 0, "q_head_num must be a multiple of kv_head_num"); TORCH_CHECK(qk_head_dim == 64 || qk_head_dim == 128, "fmha_fwd_f16_asm: only head_dim 64 or 128 supported, got ", qk_head_dim); TORCH_CHECK(v_head_dim == qk_head_dim, @@ -205,41 +153,47 @@ std::vector fmha_fwd_f16(at::Tensor& q, const int gqa = q_head_num / kv_head_num; const int mask_flag = is_causal ? 1 : 0; - // ---- stride extraction (in bytes) from tensor's actual strides -------- + // ---- stride extraction (in bytes), bshd dim layout -------------------- + // bshd: dim0=b, dim1=s, dim2=h, dim3=d const int elem_size = q.element_size(); // 2 for bf16 - int stride_q_batch, stride_q_head, stride_q_seq; - strides_from_perm(q, i_perm, elem_size, stride_q_batch, stride_q_head, stride_q_seq); + const int stride_q_batch = (int)q.stride(0) * elem_size; + const int stride_q_seq = (int)q.stride(1) * elem_size; + const int stride_q_head = (int)q.stride(2) * elem_size; - int stride_k_batch, stride_k_head, stride_k_seq; - strides_from_perm(k, i_perm, elem_size, stride_k_batch, stride_k_head, stride_k_seq); + const int stride_k_batch = (int)k.stride(0) * elem_size; + const int stride_k_seq = (int)k.stride(1) * elem_size; + const int stride_k_head = (int)k.stride(2) * elem_size; - int stride_v_batch, stride_v_head, stride_v_seq; - strides_from_perm(v, i_perm, elem_size, stride_v_batch, stride_v_head, stride_v_seq); + const int stride_v_batch = (int)v.stride(0) * elem_size; + const int stride_v_seq = (int)v.stride(1) * elem_size; + const int stride_v_head = (int)v.stride(2) * elem_size; const int sub_Q = 128; // ts_qo: Q-tile size used by all kernels const int stride_q_tg = sub_Q * stride_q_seq; const int stride_lse_head = q_seq_len * (int)sizeof(float); // fixed layout - // ---- output allocation ------------------------------------------------- + // ---- output allocation (bshd) ----------------------------------------- at::Tensor out; if (out_.has_value()) { out = out_.value(); - auto expected = shape_from_perm(o_perm, batch, q_head_num, q_seq_len, v_head_dim); - TORCH_CHECK(out.sizes() == at::IntArrayRef(expected), - "fmha_fwd_f16_asm: pre-allocated out shape mismatch"); - TORCH_CHECK(out.is_contiguous() && out.scalar_type() == q.scalar_type(), - "fmha_fwd_f16_asm: out must be contiguous bf16"); + TORCH_CHECK(out.dim() == 4 && + (int)out.size(0) == batch && (int)out.size(1) == q_seq_len && + (int)out.size(2) == q_head_num && (int)out.size(3) == v_head_dim, + "fmha_fwd_f16_asm: pre-allocated out shape must be " + "[batch, q_seq_len, q_head_num, v_head_dim]"); + TORCH_CHECK(out.stride(-1) == 1 && out.scalar_type() == q.scalar_type(), + "fmha_fwd_f16_asm: out must have contiguous last dim and same dtype as q"); } else { - auto shape = shape_from_perm(o_perm, batch, q_head_num, q_seq_len, v_head_dim); - out = at::empty(at::IntArrayRef(shape), q.options()); + out = at::empty({batch, q_seq_len, q_head_num, v_head_dim}, q.options()); } - int stride_o_batch, stride_o_head, stride_o_seq; - strides_from_perm(out, o_perm, elem_size, stride_o_batch, stride_o_head, stride_o_seq); + const int stride_o_batch = (int)out.stride(0) * elem_size; + const int stride_o_seq = (int)out.stride(1) * elem_size; + const int stride_o_head = (int)out.stride(2) * elem_size; // ---- LSE allocation (fixed layout [batch, q_head_num, q_seq_len] fp32) - // Always allocate even when not returned: the kernel may access ptr_LSE. @@ -325,18 +279,16 @@ std::vector fmha_fwd_f16(at::Tensor& q, size_t arg_size = sizeof(args); // ---- kernel selection -------------------------------------------------- - // border_flag: automatically detected from seq-len alignment. - // q_seq_len must be a multiple of sub_Q (128) and - // kv_seq_len a multiple of 256 for the non-border variants. - const int border_flag = ((q_seq_len % 128) != 0 || (kv_seq_len % 256) != 0) ? 1 : 0; - + // Always use the _brd (border) kernel variant: it handles both aligned + // and unaligned q_seq_len/kv_seq_len uniformly (border path is a no-op + // when sequences are aligned), so there's no runtime branch on alignment. const std::string dtype = "bf16"; const std::string arch_id = get_gpu_arch(); CFG* cfg_map = &cfg_fmha_fwd_f16; static SynchronizedCache impl_ptr_map; const std::string kernel_key = get_heuristic_kernel_fmha_fwd_f16( - dtype, qk_head_dim, v_head_dim, mask_flag, border_flag, arch_id, cfg_map); + dtype, qk_head_dim, v_head_dim, mask_flag, arch_id, cfg_map); auto it = cfg_map->find(kernel_key); TORCH_CHECK(it != cfg_map->end(), "fmha_fwd_f16_asm: kernel not found in CFG: ", kernel_key); diff --git a/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_cas_rxy.co b/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_cas_rxy.co deleted file mode 100755 index a166a10b007ffb7064ac560b79cf889d851e48bd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 68776 zcmeHw3w%`7wfC7bXChi#fm)3)z<`K6LkQt1;_wtWh=@GIN0R{IAx}dry}UDgyKU|9hQ1>`X!kc-#DJ z<@XEgti9J>XRWo@Ui-{Bd+l8|Y5HV45QsXK{0mwQR=~d|i2B!69i((>h-D+K4*0*L z^%*M*^=zx@GGWqZreO!r@*j)!Jyk)*f6udcie1(euJPH#&hsz)``JL-(oVP6`px=X=_2aa*87PxmUSK4@qTCekM~mcq`>@_ z0S}!o#TshcpM&CB)I|WtT{H3OnR6`b&Zmo3E?T;_C}(+b$*STNYwy4BiJYR6C2QZX zdTxC>r)Xj6x}p^YMGF@euU)&Rq_}8N!R;l>N{dmmXi*8u3f8VET3GzeB`a1hC|dDE z&Z?r7#ZRm$Eh)J6TRE#2FJ4<*YCVy&cG>a;CI&%ucOZa7TgaVqI8z$C~1AEE}5VGx_#y1#63MD_Bvys-Q^o zDLe~4Pp>U4S+=OSK-i^OtvL%;Po-IvE;%XG!Pr@_`c9hFQVx=|SO<5U1+(fjvjs(^ z3zr()5@*5fi6yrc87!*Lf`zvEa@B<=l}nxlFKwyi1sRsAKMQ8sQp*c6E!A)q+@4sy zrqr<1k+Wg3Vr}tB7B2*>v*59J`TFKV<%K}UvtaeqZN8ZoEn8`d6*&uDd)G^kWw6RU z8&uo&`7Uoa-~4^M$~gvtXw+UEOpL zz7U9=1v9Pb>WoqNLSV;PaMPNuZmwzKOxT^>qd*nswd<}gzD-XnF9fRPuK(*#9qLwY zlUpTEdn86ld6z83=Ze|;-`TU%*n(V~-vYB;U*XV)j{ZZuSryB+x}-A#4N z^;2i0pVVHkt<&D>*_ARq3tY%2|8I$_1-eymsZ;-z{hPqMDp_tClTZ zU9wVXQ*ct9!1r_3tu0<#u;>oxv}|F)+R~zh%b&+nUs?nx{GFWJi`Ew}E?Eu3(ms)A zb50s%P04Eho>*N{uyXaH;=oh>FOX6gySVg6z?LjrH3)!z_-Qg|(2~U)hU5*))jw6% z7niJEwtCeA0sXs2e(p5;pTDy@2l1Q|Fq|8-#*e&9J)Z&mir zeZLy?{r7v`8$>bx^gmMswzbaQ=pUge`b!JcDpbFwt&Un~{kyt_Hb*Y7tgEc^lNF%p zpnpKNK=*MfIyV}t4x|G2rc#i4u*J6r?^}bcci;x^TRGM{5x^K=E+OipfYG4!&P?JR zt#=BEce37D188-&-ifgs_zu7%V1hX4s|it`WI13xp!Henod!biIRY33gk!y*1ndYH zy9D1FiwIF2wy<54eN%fC=K{4_KPAAG`!{%Z_F34_jZ~AIOS5tNiim zirTofwbqU~)c1S^JNhuf8rA!f-*WqP7Fd9qaDe zbFX%Gc94cn^lb;`p4I8#O-`ZI_%zC6YIHhoBpBMA?|*S!QJ~eB|`@~ER98J8(yE!>xf);_xjoPE zSe9~}jwe!O*V-32UJ$l+92@;V=vDh1TkMSM4yp|O&%8gZPKRlqIdA8t&zzSv zgFNhW9XoW+d3L#zrOILb{WkC*U?_Aq_{R31JKfni(e?dk9Be%a{J zvDg(?Y^Hx-4FA^hrY2Nuvk(_;^u1Lcz-%)05c@l3pXagv9QwysSr7QIWwB4PtP{1P zqZj`fUBaf;NdKDs`yA?G*1zHJP;Fw`oSlhl=I%^Qo43=#AWmI#gMv`x-c)Q!rIl-2 zsU=-wR!8_;xwFMdg?3uKz!N@>>B-^AayYI~-7s&UsV)e9b;N&(-Fwt-%;UfVBCU8?E;Rax&;Z$;BP z(R(|`(lqc1r9z6PPpXfq>l5m;{h`i@+DIyb@~ch&qHQ!Fc#4p`vdXP0-2X}Z?SZ`C zR`PaL_+FGfg}ses^kArAjxyoy=BguHLmvae#+ zzvKGLwO8z^xhiGtsO^|Hdwt5fzqay6Z=6V34qW5 zg0p}Ag!z^8Q_gzVDo31qE+*F>;QFKuUa!3Xe*bCP@h-sF z4bI8(Qudg#RH`f$vvWQ2joA^8v5znb9)!BR9jf>2&`Vx0^t@-!aa8%&LQY3JG-eRn zel0k#jBN?omQc5ijZT)uMjyam9sUmDuO5Gg@OK!0Z{V*1f4{|FBmVw~za#j22Y>J5 z?^v0VZFDuRpTS=Z{`TST40%Gf*hl1{Z1NF#Xq#xL=PS-rk0DlZ4no*KER{Lyv2#Yd z)c;}3PmiGv55{VKO5=z%>~G=^vmUte9mGQWIh6l=blC)(uz$Kgn%w(>fSM!k%*x8; z{J6`PAp%x+!Z#_O9R!WzNkq z{(d zmsyFn-Q#a8SHSxD{*yM4_-a|le)*VXB_aZMM+H_D3fvnLSe+2~Oj2M?gTQ@r;NKRm zF%4Hn1n!CoOcV;-9TQlU5V$ufu)0CuGkfNIbRMZaADxGl0*_wG9?6Kn15tr>g#r)8 z1lA`69!d&4+#v7`We;4#9yJkx`=SDqg#r)61lA=49!v_XZxDD$*+Z_+nuoH-N918y zN%ZZ%?5ADA)`EkHPIi|7`nxx-D{);F*t-8hmIv%EPgV&{C9bU}<+`e8Nm$b-dOi@= z^i`c^g*APmQ%TtI>8k@J;gBQzs{#*%_lAUjbzoNbsl7f=&^S-Ybq(5T`sz+6>1)7W z(^rFL7U(G_WxjrvFDI^#UhkJf<`0kh<+z^pp|4lZSz%3|fG(Oo0sfkv^3V0@t03pR zxhLg3F;DogJayt^`CQnO@>GM5rl*{mp5>aJ<+T4m&?kNS=i|D@w?F$L>D!+)HNO3M zUDxw_%KoHJ_Iyd%pY(OG740uPzo+ca>-xa=ls!pb7kEk8pY-+U3))lh1P$%a>%*N+ z(${x7Nq-ppHGMs3Xn*pd%(N%l@|yNk<-Yw@xo>}#C*d=iJ_%hkeG>d>f6^zxpY|tx z9pt1vSzZS@X-{2Fd+Ku9pXK%7qvD|_O=5HvI-O?O0(Vt@k?v4tqDipXkCa^jo@R_8*ng)R~$KYDcF#>l*1ttmw?v4qp zN(kJW6jj#=!_F#-=n1=bY`JQx#LpAdK`De!QEK$&B3 zt>zek`=SDqg#r)61lA=49!v_XZxASRj9k}51fDSuWsi@@L(eZ`Fdp^%G7;B|)toOF zi}d_L8a==8x~k{fD!!3E(X(?mO<#pr#P}vW-&XOB*VTcyReU3TRiJY>&M)L&jabKc zrFepd@r~CtoleqMcRER51OA%68Z?Y=x ze5O5VD?PuEM$a$2uIriCjq^L{lRalC`;)#7wxa!oXI?kX@4T)L*MpCyr<|Ie<(i)5nZITF4&1b9 zZvQ=Z4TufcoRfN5{g%P|mTg`6TV@L9mnWP3mT9%kj;3w8WOpf_eZ%bXw*1aAf6KIb zkH4{8_;t@;N!jn0v25F< z=4HRw*1XL6#n%0fHSSO-5U|2l;N|eSoh^mqcXc=5-87E%kllfI*Z4UBs|WBN8b3E+ zMSw>%{;7b~6L?RJe>#BIjDeSXX&k@pF9m)naLUhn9)}DUqU^#CKC5In4jDcN{Bs(I z3>N{vNaK*)^o4y>)+Ginn~gFVi^I#r6T-N8^_VtjmF4uJL{Wt1s}r8Xpj_`T_5!@j(HrKk)t< z9~`g-03QH6?GKPa`K0bI$e?^uC ztqc74@&UePpnnJI{{GO$=hMK;gEW5J#%I*P%Q+fP+4#&FczLkK13@bnc&^5SL2C%` zAsP<_tvuj)8b2p!4Fx_F_&6P3AcN`C$0391)5jr$>C?v{gXz=9A%p4D$0391)5jr$ z>C?v{gZ62DBU59$9cAFRw)U1G_~WhnF-Lhbm5&_tu2b-sH%q}2-t`JvWwRBu%jPKP zl+9Hzt85;i6+G!v`RL<(-RJ1fVZeuJ9Q`>Q_;8J*KSux`p>g!*NZ=zij{Y14e3Zt~ zpQC|~);Rj}3gA}&H+{c;MqTj{ckgd;)OOr%b=5PnmvA zpECWLK4tnfeaiG}`jqL{^eNM?=~LRTO`mEn8BCvQFB!B?;im3 z_!Nz!Kc@nps&Vw^G~m-Tj{bCkyTDDKGX0u9W%@OJ%JggclDbug%Q>I_jr%b=5 zPien4eX6}=Fny}MWY9j9R6cUldrrY)-t!8c@P4eIRdzr@yX+?lI%WT&U{={r0ZpHp zjy|5Q`yBmw4e)C;j{ckhe1^u+pVtDvR^#Z;nZRdi9Q}D6@ar^={+tDTmd4Sa*8{&E zxam`-U(=^dzot)_eodb;{hB^y`ZayZ^lSQ*>DTlr?boJHwU-R0Pqmi}+Nb!Dm~rYw z1&?_zDR{#BHwCS-LkikuFDvMjy`o@N*{gu2Pt8Uj&(?j8{+t7Rj>gfSbAiv*IQnxQ z@Oc_Xf8GH6292XX=L4UwarEbnz;Dzz`tv5>Hvu<&%JggclDbug%Q>I_jr%b=5 zPnmvApECWLKBfKI^r`le!St#2k^%3J)Vn9bO;-O`!dGl`z6jXceH4pp`oA*prXM*K zfceW`ceVhI`NC0WE8xU?yM(p_PXFrY(3b#bfBVJ|+xHmvc8KjSzwW&d+Yc@}8e;nq zw{^~9`>V_Ct!L{ixfe{kSdCe(EFA z{<j0_idO+&RdsA1+ zPF*QGb*1domAt4c+h087r4ZZq823tu?JvLXb*1~@B9VQ>Z6f zw!dVUwC{PfwC^`Z+7DSG?MJPX_T%o7_ER60_Sbz&+IOAwlDhAJ;@6d}uh{aQva$O_ z=PZ72|9jVw-)jEebyVXL@80Q`n?5|M`nlp$U9+}#JwEVc+0CzP>Nyc>M?1XyHB1I^ zMY}QW=cRvmUHK{7SA6PE18FlUyZMz;v^$D+JJIeDReodIch0n(?vFEVmk<5ogRdG? zo4u&C7uP?6q z;<~###_Nyk{-EvQj`arOdLYUpZq&=cbq=n3y5qfET<7AtmpjqR!*w3$FLft-`MAzU zd2e@$Hyqc)aebLP%^Qj9k+|;TPWMLRdNk-ScV~EGa6Ja)echQJ?RX*Z3tw!EUj%p& zU^ZYjU>CqHFCK}DevR=9-M*ga*cktu+usvC(as&{iLOWD7rQy0=nFn>uCH%n{1P|M z*B5-zucGgfcvpA0uWw_V_7Qy>dnRVTwD)xZ}o1#^$oZl?ym4|#PyA!AK|W2zAxjYG0yQNab!iJpyd+C}tijMFZn=aD$=B6>E)X&2G6F;2UP zo{e$ZMf5xpr(NjtA4j?A>mNtCGrxg!ej&z}(`rwC3t=f6bM^-BE#6g#!1+ z1Xd>mK9dw!(;!gRT;Uq;?J6SzcSQvz3I*uW^PoRlACU+1 zi?TEF=l&Xbdsg7}AKrBHP31SvclHnK`SljoTFdXoj#yKBGuDT2y^~%a$h8##^yMOy zbM26YwKvutStn*&{SJn&w8JCaHC{x@4nzgkg{_sZhOe-lQErJ>h4x%q!}U@Y*54Eg z&B1P<>3)^%jCM=Cn3UBg1Riq0J2J@*U*WFzo_F#>YDYGSQY9a#nnKC=@Nw&p$=%kQw06f+qIpmT+iF<`!d#YcjMY83p)2ieV=P>C+fyLwHE5ItZU-<=o_-bxyMT`4i{o>a&o6=XbaNmVeZAKQK{Y#>Cn!T5yeNKVxOmTO3F)6D~2z-XVoeSTd>OP?2IOWo7HIqV9(+BNy7uwD=_aU!A z%J$K>^WfXAyVGME&3?z;V&2G*; zdd$Y8tSTXJFMXpge4FEzahy6MH$mdK&{T8o(c`y4%ATQb^oOr=95aqfAAC>s3D=qu zM=pbk&6c!jvmT7o)C2hsDoz(l`!?&rI88l}Gtq4iVpg(^dOU@8%Kng>@%c>UW_)IQ zadb-|Mz>Y&y=X^$k@F$(9Xel*KFeETJJa3DCh?s{{E!2aj`g|le~xv|Z+c8~E>iqBh9ED;S04CrY;%6oeWK+{HDC~Ku4a)=SjzYEq@bvO@4mt*Ycb4YF?(?oD)o0IX`T|J=Jsm zpXFJezBvZIImg}OMWpONRAAje^wF3v*v?#cugBj46=Bv>$(u9iH!g&ufsfH*(M}dZ+E&=su(7 z4$7>aQxHon(C~Sl{g@A*hudU1@H@Z~nl{V* zB>YPH>g{fyML%*LADHIEbuzc%Z^d4zf(&!jt__uHxm=MUBi@J`t%I+Eq!z57ws_H&Vr9~zL<^qg3t7-FCY`2=^3Zz`uuJHKeoRS?FYjj z#@~gw?(LN6a(qn0_@JLpUWVTWfKyi5Va8g;+n)=_YZ}G@*L9?p@B5uEop);TwjKk|pv+(5oZeSRl=IXLb*2j4H_{GfPq zj9`8+^~%(PzXcRu*qpxwGWE*TgL4IWBS%Ti6`6Wv>cKgK{E^3`=8R0eGWFoxk>-!M zO#5oRI4798Xug>`W$M7WgFKNBrRI)Io%DOPOkFsa@E*vWQgcbBZkf8AFz*A|Gj%g{ z(R?|VsJ#l%c02ARzcG&dau;&-y9Uqm^3eZN9<&|HI`Yd)ENgcJ5Oq~UQ9kVv-^VdF zIFCGp_I&=~ZxPages4bLrhgUhLI7vP@ta!OR}-TCnc*m(`3?Mz1)TLQ+u^$qu z5}~hAV+mt+BDRpOdcGl!9J%g%r^I$G=@u1!RfH|+#(9UjA-_(|JB32uOgEOdq#NfS z>V}-W?tIS_llDz?V;S0>vY$8yseXbkoP!cVf5!d9xrls_2iTqOoEk)yX1cJvC0#ft z(H6)ZRCCf5=)W6(Xgk4>lhuIl*Ik|qhTjBqeiBr367@!IVRz0+zh(W;g5m!pd?6Ul z4&`Pw^7_S~nvYapK`zcmzbB8EgW*52{P%J_&vfu1;g(X{w1j9Bl1`Dn2(NWI@9q`x>g5fuaKO0nYlH%{rNxx?KKL^7d-|eaF(%HAmG1!Ik5&0rlvODJ^ z+NHB^m(H*YZPFPwp$(3~2Aq$`8~K#o`Cf`P>FnF2vu~5munBE&3^vemBj>U^f3wgg zoqd~hhE4b`OY2CTI4>#wus!FcQ|YL6;=Dxpk-OQQ@2XnTQR~DxiM)~9*`4pM*!L~z zsCD9;l;(}N&VFx6N39d*sWgB1Ci|T_vfR{B>%{qq{E;u(o$s|W`%&vioj50vH}Xll z^W7Hx+w4cJqt=OYQkpkp&+JF7qt;3Drcayw$9YQYr}g<;=49TC^^rI0^^(Ykyao6z z8b?0l=YfA-r~RR4z*0V``wKECpVT;HP(G<~$e?^u0H)e5J;}Xj`j*uhKYH)vX4;TI1!mwFdYajo)cow*kLR<6CU21bB(Y@3O76 zz}Es#ulI%wrcWP-45m*XhYY4qABPO4PalU2rcWP-45m*XhYY4qABPOur_<}b+fjz} zdhd3WL2?K-_&J#y{hZ8Aeom(6=VZn_$;sS|cLvNgsPH5wbF0@RClh*ot!4ydC)M8b^O_0KP%v=+8TV-=T5z=SJWgHIDxL0`M;YH+{VGw20W&5 z^k+Hna*d-uHv`|SarEb%!0!ZZ`jqL{^eNM?=~Jd()2B?orcarEO`kITnm%RvHGN9^ zwdqssC4=cx?InZuDapy)=;vf^@^dmhKPNNhNlxZw5AXF=oRXZ(tzMIyO!!m<`nW>( zIr?)8@GTlge{Kc7RpaQ-yMW)NarEci!0*;L`tu&(_h=mbxefR>jiW#B1%5AZ)2B?o zrcarEO`kITnm%RvHGRtTYxNUy9gimcpA8*%vj{f`-@Gof`{dpho`!tUJ{4($_YaIQ# z1NaV&qd)Hle!s@ipAP_kK;!7o2Z28b-1I5aujx~!U(=^dzot)_eodb;{hB^y`ZayZ z^lSQ*_G{Cp+Diu0r`k&f?NgGIxzW$b+~ntEdVWr3%#)nV&ECs?oRXZ(tzMIyO!(A8 z=;McUpQAq?2L7DTlr)351M+OJKYYA+c~pK31|^jhc!yaP*& z+T260#|lR_ZE8eW)u_jNSiRLZlH?fVZr%J(8$(wDF8cYSA(rEt$AO5{*NsK_ieLUH zG*RVvy&gJ(_SK`F@4eF@)^sMjkF%hCQ2kj2u{hPNPw%4}gd&4a9 zpnm-}_t=w#Z}!5knSLy%ei5l}re7A`pHi+wzsD6H(zT>tRNB`Owxl2VP`;{u&nrHK zLf=e3mbauI+q18$`@PvC1$|=DzL`EOr#=a(Z>A65<*I(~x1a~#<&v%?eUj4tl>J3M zs^4K>^-fsmo9WXa{G0U`+pGSDUAecq1zG3|Wr2&8@88v_YslPhoqqr2oRSbUNzZ#NeEDsMeAm8v)M5KOKX{_XQ`P5uu+ujSwK!Rh5Ey_TQ!r<0%bTK<&Jzcu+k zfIn(~BK_&?uVrVywq|!N^XcT(vT7L^!??za?UPfR&pCBdG3QK?Z}#*>=EhSN=Ek!e zF@JYN>Z{1Jek$e|Dc?K&tD6Q!rLLL~w9nwI1W)sMAM}T(Vonj78rJa|bo+RHrFHWw zW00?`=a8DI{u}}t%psN3l{x#2*_11Wa>STj38_!$IfHtsIYa7qPrP@a|Cr)bA-@VlcquFl^(+&Uq0pY#}sv^J@;Tv(7d#4mhh+~q>PNSw0RBYdCnUV zsZUTgUV~5YC~G34o)_xy44~q=(4N+YS?~?^Q$6N> zwo3@_)7ntS{~FBuDsKC-oyvwf&L{cYgBWY(wvYM3#w^BR`feTco`ZTbhdqD!jal<_ zA3#5T_sp!X^5Z~~@*wUttM|u19q!M0qBm%mqi)U(jQ>kwPWjWlk^7Cj^(w?4%2W@T zxc~W5wn6^7-v6As{(dR*#B<83elO$xFyv!g?_z!SmH3MtdOz*Y5402GS!a|#@?zuV zjB{#CQXaD$dTKkQ%yK`j)RRBTGxOs=Fz#a^?wl%Lo^fe*hIGqx$cA^WNLkWR~!DT}tlsoLuMo9CTye4gse799r@^WlRlK3E!7WYSF{O}EJ zH?BWQKZ_&YtC%Y7o9Gi2UUh_-`smo2K-_2hLZNM<56jT5i9R~ERw4eY*edOs=@XN- zr|b_MYpXFQs#q)ZP2^7s?-u(*$J|QHlPcy4Z4>z$gl7}^b?i-W{zvRw0Y6&--z#*J z-YQ(L!gZ1RJiY<>&pLc20NonYtwG%a_kdS|>k|0pLib-3o$43ZgLI{+D+S#mx6WIS z>-D%UcK=na@23AHATML$2Gngp-D3Bkw-MJH!EcHCe-#~LKo#W!-6qs+0^L%#Uaf7F zG1eGYYsUTfRL!{?v@*sT<6J8)V+#I{HR9>#V8}>18CyriF2%q{*7_klXNOZ(8oY$%5KVQ+89<^m8r82Ix%LOKqPS?0ckJQh{9J1Vd$hPoK) zZgmfNYHnHfc#lG<+q)Tco8gbk-B-MrlvO7LK2w3Z3bb3{9#(O@e%bRql2TW*6?I!t zx6*yxYml;iccbiXl&x~#P;vb5vNu&6w@{Do8I>|d8si*u5h>dh6_}7Q(irEM)A74z z8GI9Qv`}bv%h)**=U9wMS^C^689R+}jzt~6X&<&v3T?I6s4>nl+8|}mh;16<9Gg0R z*TFt2ehW?8<@Vz??PSL78q6aq?h3y)>!IUzlJg1Tc3bt(ak~!lF5|6rhI;6DU5|N~ z?F-LPZXK^{F#j@Mw^eQ(uakUELA!16r)}`f)$Xso?YQ0!-&*4~q~moR>_WQxP_EB)Q1<}%t#$v?dkEJLfv(hTOvmdaQ&t_Xm3*KxWoNuL<<;?; zGBRG9@|*H9UQ<3(Zc|S6JPx@`c}@9L?1a5cIZe43r09QGs2vXzqf_`3JNyOrJ(W{NxtKGkao*~EQbPM%}?`pq;*-zPFj?sverO(%!jke4|;27l^ zf9lM0=hVIuLVI8iXqcD4u^W@J^f`RY^SI#$c9>(AV}?5OtU8XFq|nySMSJEma7;Hy z*`awTYwE!D{?wCa?5TYveECe7wX9KJ9#dwn^;i7_nN_}-(6rgk zT;tF7Jj+k*DiRQ z9G^FVj$@AFkn8u!kLMlgeI*tl*MQ@ZYw=BA=e`n(Cv2(rmC*8=@^a0-;tAVv&Aur= z{Y=Ztwff}8^AfpM-;|$g^T`AIN`&>k5-rKkHTh{CSl7Ny@Rs(K$Va)#3t=3`S#*3Z zUxfZ!@k`s;>YiW5v+YopK4XtQyb3;im;0GAeox_;N#}40?cQRvU-j>{bGO^EEGA{? zv-s%SYv9}WxSh+G!$rB6W1`}?(AF$Q`!)YzJKNj~%NnFi?JH3N-@ezqsEloR7Nv^g zsONJ!=NFDij#bKxvh;a;^ohmrVUAbEaq7(TD^(m9+TEOc^q8g0XrDg&kM>^*-{!bw z9H-7a^HRlep{?fJqsK30Mtild#I5jkj$_7g>4Wd7K4CsEah{RM*lbCgHtWGSO+9$7 zCgXIWv~RN>jMLPEXK|{1CEBP5_mxoghumski8GX&@tN&;)+gihnaa&LO}(+NL|E-B z(MGwsuLSkwS)wYwBPVMK`mE%?Y-hWhU1shZL?17OkAKPSqT;mbC&;V!l~{^)rGK!U z``oa^cPx<8drPc`Z-3eCuHrWD$FZgNmRN>%>;Kz!cDRu;b8m?a@ay~CUMimRUaF11 zx5TYzv*G{Q&I4}mvdq0DHsW3nx_!#Hw}iSE#*yAzVmaDv{2$wS$n9H}xwizLiS!)8 zc(3kh#QPN(E1W|(&*?b8c+Y$A3{@TPS7A&r4sgEHuc4W;loj^BusrOzVe z+@{Aa^MBc%=YDc7DHPfRrMU0H+I=QKTzIfqbRo-3;IeTAmYax>Rg`s(el&h;hE^GG?DoT(n1OQ;9WFjad?v{4Vv zA=HECovLU3Gt`66`pW*0TRrPb`!?I3a|zq?Tvhd~-$uDPhfrVaEzym8OT^G$TRR2A zUv-D8XYXx@tMvWt@O?htQ%9cB$~<2B^nK`C`sR+~c9?A+fRA&|cnEU_pX*g$@ElZs zZ;72gze@09`$y4!HT+@oIYG6z1o^A6gXjA-D6jZb(76q8>jlB4drP1lpZUpO(c_t) za&V04y(KpIxSAJyKU3qwkFlE(V;ScvFh1z#Ti?R(1HilgBzsFJ-u_%bUfVDZ7-zR* zPGJ1F&u_m2?Kvk<_5`1$AUpXz0DhcTIrng$WQ^dv$Fa`2f&6(kE$0UEP46w?%fWG{ z_m)t+IYuD6sTbui_2BOT#TPc`9FeJ4rXHLt$eZWia<0hKD^m~78RXA1aye&Y>XoSn z=Z-Xg#AVu7>%}?2)J5~n)G1R3&K=~b&f0~{nL6or_?fzJF5x|RE-&YjOx-edVIFV# zK9D_AH&YkQmvf2UTVguyrG97Zkaa5DTcQO0fA?pDj)k&IxJE_ls@9@>+r_?*V{C99 znT_^*{;{~XgwXFT1>N?pL7W${^40r#1f8g~uOcQV7YHtb9ag7W2mf-Uk=bsqL z@a=I|Lh2HH-r{&x_QKfU+(I5a)0lG$=|I1m^CrhP=Rc0`q_nRhMEzdQZ?qNT18u`O zhBo1O$ed#ugkJV6z?fycplw)=_RlbG&_=X5ZNj;Rw%{4cYOay?O?0uO?XHN_HPeOj zjr40?dM_j5Jbzj3Eg|ij=@u1!RfH|+#(9Uj@$6>x8%F4x=~gK1o9V{+hr02+XSKJ4 zv~QwYOxm8ZpEw7peu6G)ZwaAqrb|+IpR%7g7m*Lom{xmB2z@hM8iao{T{tJv7Ce`l zbJ7>ke~*p`o^o#q>dkYj+p@QW>MO|A*1aXjTa7`)^tS9RLH>$f#q?9}Ey3~5@yTa@ zbq|g&#P-weEur}PbCS77LFS$WoR7$t=Ww@mZwd0|S>0{jTSD=N-A}c*gw|2-Eur|s z_HEr;g8X?dcw6?CAa9-<&N)f%!*Hs77dR)Sc_XgteHb$LU(kCmaGpx@hi~e=7+7xV zXzndR{@7bW?R|8*y(P$-XO(kK%IwFM_LfNVhU}-(QR}36XX?gzO6#Zf`CINS5k{WJ zC+B^)-=8?6Gkv@JoSza^CmJGwgYmU|-lp zpJC6l1p5KkXV~*B!T!MY8TLF&Z~*WD$el~)t$ombXO5@y*4j}9$%Xvnyl;JuJ$)({ zKBCXDr%w$5uFtWjPvrsE=h)Myh63j~_NGq_MA<+;XOm}S4gx+%vQbsQ=@_FbL{C;R{-ZZ_NGteqby(h5PWJF@L?K< zPYnk?T;r+#y)(y6pK31|v`@+25}%y+O`nRQzx6rx^r>;c^*Q$Rsqw(|Irj9a3BY-d zz3Ee9P&P*U41DTJ;8$uKJ~bBjSdGJ{t^$6Q#y^4#rcbq(4BDrXew>oMB|bUtTc2Z3 zpPGvP*5}yMr=|hd=h)MyT;MMJ!Stz#D4VE#1U@wh_#}0E^*Q$Rsq2CB9DCEJrlV}S z_7V8hHNdaYIDBdb@EID1PhAWAT8)1M8BCvQFBzKbE%7YYWm`k&URF5b?=A7{<2|fr zB)U+1CTJRe-;T|A4tU!gm=#m~pp=Y>b5T|FWA9OC)k=K108 zj+VQF!PLXoflCBTar(M(AQHQ^Xzbzqx~Vu zVV)t*_S%+wZKBpvX8P&;h{ThRD?X%aNx!JHuOnk}naH_RaKRIrT|MeKUP@-bCe-k~cxRmh?$V`&0Ip z&Z9`czUrN@&^ObkLHIY*hwW8P66~t?mZ0yOXWg6m!}NRe-23$260oPwl1^=&4^d^#eV}Lu+T;^SdAY zw+8xXxVuEpsSkI~qF&d1>!K{3v%&r%J^6Q?^iqi9 zXh8An>a6rDw!9bOxOAWBtmt+BFel@5^6MOqYWla&zcu-FE(hgjj>hTa*Et=OU**=F zMt+^!Q3d-mU*mN0>--MNPx{l@U*+XN_S4C&^70__>Flj-tzsBtU<^y=;Qg%moKyER z%sGeUn>~HeJnx?6i21uCQZIW;{0wu9l<(c%_4vT3)KwFLR_%A7`7q8L{u$;Jp^?2M zgl-@6^1q0^8#bAHOZ*Hz$Ta|Cr)bA-^!-V#Eu^jHV^O3l3`pgZl!d^^o+oo_QscvKQnM#fq8Sqf zbHB=W7T%||p^pDGnDQA+|LwbJ+ z_DKtT_|2H{jXtsmvY(0%>DV{)jZ9gz9ZuC&+vAM)mv{|+cubzP!^n*cBX6=hV0S>4 zBbFVANPS%da zuVH@)p*h?ab$wCS-5ukx4D@gGM_GT|qlY_I_Lq3=P1#=p^?a_r5cLMRO zZ8hda6>o*UiTp|7-C}>}xLb+2QpH`NZ6bez@N6Q#j=u>$10a45gntZ#??v3GmxJpZ zT=#Uxr}LGo_*?+GT-4>Fu9rK}%fodZeDhLwaynnR8ulPvKI-y8*V~=q4afCxTwmr+ zOXn*mATML$NYsr)T_1P4HyYQY!S8Z+Mmk@)it>SO4C=;!uCF^&qxw-JKPif8{-_a>1SX*e;}@kJsRU2 zv!Z`voMSZo?91m3(1|^c#5q<)|He4&NjeoH=wqNWWjAF_KkGt1(wVZGvZkMLAs^$c zDZ43a`q`Fa19YbBrmX2_TFM7HQ+CE;Q{MFREd3s|ru?S7>NytjnR4s3siwT@ITrGn za+`8$yP5Ku@~Kz=S@;~L*Pg1m<1P5j-IB*P7QQnUzSGa0=5> z{t|Cte+jAEJ05l8;g18{xn4}lsuKdAnTWcHXgAQkLH3t;3;Rn*UCm_FO-9`y_eQTl z%Jxk`*%XxJxHrrG5^udJ`%9o6xeApsMjGQBa}g=q6&09}G13_4nB%jmnnT}$Z{oSL zP-u3`*f|pCSd2+oRYKri89R+}jzzZD_F?;^&{T_!8si+J4N~@u*rqYgv6+5GhJ950 z7Mixp?Z@qU*vX9BHJC?K+!cOp)_WO3sGEVhA?^Zi zCa!1VI?r8{j@R|D1LwmyiP)1#^JfBn~S<(?ycSpxV{0` zJg=VbJ?K|;ln-<_qV7iU<9YR5_bg-SNL6v8m%VZOS+-_GpZAY>NJkagIeDuW3Wli9H(Q9FwB|kvQ$C<27x_ zIBd#p%BtfvZAdy(c2iazuPGnnt0}uFtB%){k94N&rmQ+%EBQcY%FcLg%B$lwWn{cI z@N|pzP|q#zfSD8V_Eyp$8{hp z_H2MJ!LP2Ujayr5?U?fpu=5q{=);H)bFK5|r<_;{1kT;g7M$#T@50dI*fVC^{^w@I zYSnyT#eid+12Q*9?@M9sLt*YWVeT(s?jvFDAHkd;y&r(N2Y{LH&wJ?{|5N4kn>qZM zIs42X(0eMdD zj*X9ttm@kkej}SX`1C2Ab8qI>o4NF6&OCie=gONo&g7%>n#~+$^3i$AW{xuX=-gy8 z51I5j_t?xcX1q7^@M&|McW>s`)4y~+y_q*ppVj&DX5K9Mw3H`HJ}u_OqP(R%SkkwY zV@zNEILb}G{y565eVqQK^NY=V;!P;mdBbL&FmoMsKCqbw+*n0a?Pr`XIPHgko|9APs@kbZ0CJ7(qvX66Et-ptcHUCtlJ*QxR_ zIX3>@bmF-Ad(%meDKpUf@eK}75k@V&F zSqu50yRZinA-)5obEPAK?~O`*I`_Fy@EStU@8dIA4A=Z_N{IK@>DK zi3olmD)s60I)#GQ6OxCrhm@VQ9G3A7pX;O4JJM$#@1fp>KKp!+_wietYq!j`Sp5Co z60NzOiF~*oiF~-8h!o0-(#GP#$4l|-c9^W;5~FEt+o%p+p~_} z;i(V5tG^Fe4?Ao7@Vlns+yDXWYL%^Ay~?1ArOD#ie+7$bOHMDT-A z!PDPz3k5$+n2s^HRxt(;vZufMB?Lc^l=}3yyavJR2|+J0M#^d|DNBC~jR<}qD)s5_ znT3Ma6OxCrhm_UCr0k64XcA*y^T+#Z;?LEu`7s9X!1Wla#JfWk8(N^%V~F2q>GS;7 zs$z^k7WwT{k0E}?RD66|es?4-zboo7#BYbB;kQBZ<99#O@VlNKL#^@AW2iM6J%(DN zNynJCM2@<*L=JxAU~D*yZ#>#QHE;Q`p#@rPAASo_F~+wuzfq_d13PQ`@Ex*>F+MHd zxvCfgT5TV`t0xWL#j6buzRm3DZfyT%J+KZx)Z zwlm5t@w(x<`&G6x+Aa0?n>;ef4qxG}_qwAzGQoDnxEs74xbAhe?Of?@^!VpJpuD$hJ6E}$*Av%$uC<-08}s;krZ4tn8s~2IdZE1U^|mwKtx){?V^5|D?pE(o zl=q)!I}_cz70-d#hh~zyO?|uI?}G~m*GJJJZ~UluzZsr(AcZcR9-Qme|g8w^H%R2cK)) zN4>r%&tGmkGu&N@&v5X$*8Q5-59PyG+s;fkq4d^kop?zCOVCkK3J7mh(~7xKvkAKHauP}_y{d7!6VI0m&{=*u5Rx#`y*N4d3+ zGoFrtfAK7#f#{DhU$7mX88imvW6SL@&k7oZ^08&M!?Rt+qI~>4c9>_mxr$1%n+tjAbB%7@=3V~k^1kFnt>9}PYn zV;sYJjEzJ&`ErcWZ^y%L$$KJrlh@>Bi0yz=R$^WQoCe+;C)52s^Je(;GkyA5pl2-L zSmoHKtvS|dTeeHTi}m?ZR_=+%vGMn&6UWWpn@)O6@m*{l`gO{KwlfrP+9STt!ngB* zPyZ^O4*+MxZJf!u^3|E&z;glMtZ(^r7$YNr^9+JffIMelG$79lxB`%81O!7)7T-H| zc`g`!6VUldFx;N+VmUW47H}@&m{tA^zh_M0*kugi*k#P1T{yOB7e$Bh!!b^~aE#L~ zv>)w4`_V3~?G@JA-e8$~O8sxWi~UE@m*bT0V*gR}Rqq8X|2+wydQT#FRYdT;QNgPT zQU6S#;5CH6&awvP?Txa#31Q7bA&o_?2u2q1wTv(dU+R% zYxSN45OSo?tWF4iASw0fGpHK`uO|e(yeE;e8cWL3=Xgg1KMYc?X|2@g5lkWj|=DK>n(E_c055RNF)q9eD!iVRE^F091 z|JLsTD*O3-eA+6XmS-^Q_W(TKTD>RnX?PyBdQSox{T`q-KKeaCYc%>jKx;JV_aswA zFP`D6-dRlb-;+!|X&;^^tln?5K&$Ojn#UO$d64F;bR36FDMyf)F(6^N*u~?=6)2 z^!dhWOi5W~OlZ<)KPLn~tsKf8I1^LF7(f+c1h0z-elRL{`rOt+!4DIrV+^iUi~)q~ z=`%hPf*(jqefr$a2EpqIK`${z%4#erOP_lh5&S?@>eFYJ77AWZNFK@_QdSd_vNM*W zNsO8A$C&xz&(-t&7&HIm7^<8ve$6vr>DN5dmA+3}J%%d%@x^mhS;uoyRgA${)MJQe zmU4XX{71#dr>zF99z#4Qlr%gClr%i!ll*ExqsLHdeDoMd zU*^Y{Whd>!vl&%vXn|JShvyzrAD&-GK0J?5#TeLG+lS||DLy_e&wEq%1g*9Y&*dWx z&)p+Go~cI~o|&iZ(;6RbpVnx!eOjYYzeVsaXwvzP7n@X3ZqIW(mf_sxjwe!O*V-32 zUeFr&s)JwCL2J;f_Bpm?pW{r!4ri|xsCY`r))1>pVW{`({ zu49MJInOS4vQ&A1Wx#`gp->WhWBUtd7VbQEmb0_Nb)lV3$cZ^)#>8S*Ub)#$I6Ix0 z_$xeFws@+u^IQkCQ-wB|%Gw#j8ecmR+L;x~ie*(~?aXpOKXz;^cGXpzwQjc6^<}F^ zzjvx#GGNBfE^+>}*OYauDs!I2ftXu-)l3fnlN2S_~zZYlQF5&_uXub9MXg@b4Gh%P4dW5Zn?D!?H zW$Ma%Q>j#-N2&*`Y@N8WN003>D}i=j$KT`l`v(4Yw-yINNEwOL;FoDTRq&kNuz`rwZ{I93PdTi|#8bHQx?_kK2i+d4lB zW^)f{=Y?Q4f7?1Q2DANjSJ{6g{#r1b`y)HQ3TAVik@K5iw!iKwo9m36w}RQ+6WRGw zF#CPNzXY@WbywNPh<^~wK0$aqnB9Qi;tf9zTE78&^QS@UO~Bv&JZSv~;D7!iX#Ezj z@!x{h?*M=QYS8*mz(4*nX#E%9pZ`5*H3A;_kD&Dy;M@Ndw0;lx&L4u-9{}I`@1XTZ z!1w=O(E1bL(f% z>BAp4p2Hj`?oEB{yTNRK%~tjs#Gegj^F6__f2JOQz8#PKB&h6o%(vq)-;T$8J05#E zsO)&mx8pJ2j>mjE9{XKT+3}cf$78VLVaR*<`$6k9z}J5mv|a~%6$t-m zS??YZnuiYy&BJd9&BJIznupQm=)0$x+cP#1i5AVWr zfGM*TJ-N@8zV174(b9kp=ef`$3Rh87LWA3AD8 z@vtF77B4Cqv2Z~y;E1(hY2xTbLq;qdR#cQ*JhZqdZ`8s? z#re5;i-(O|oHwF)=%@w5bBE>^FJ4@fH=-Yna;kr-U+fN6412ko|A=)Co?~Id_9p+v z0PAGI3pz@1RM+OLT3uRfR*_@+xfYLSx*gv}gg~ zQk2V;KT#q?*_LBdH`C))!zn%?$1CsE!N>JD=5Ty3m0GRa>+z~#Z=YV*>-K)Jg?COE zwM|$* diff --git a/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_rxy.co b/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_rxy.co deleted file mode 100755 index 82dec0c14d94b33180709e029b9573541cc927f3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 54176 zcmeHw3wTx4mH)}P=SF;t0<{|9LN17iJVFQ|JVd<8L-8OY@(^{J8%Ti2(-06Xy*VJF zRmP&lm|E<;wb%xXHd;E3Efa1noyIXvqk|fiX;Y_<*w;`;ts=wy|9*R&eK@&!g46kb zw!`;b);@c$z0O)|@3q!BXRqvCF@4qyGb1Bn8S*b+v=|v~nIPhp6F-&0%pmI~7`^a+ zZ{utu6V>b+k>++V=@Qehmy5@0eNVTl4T$uXOAT=|^^g6)f?@PgmxeK@Ep&b$*Bbi! zkyoeDZU22+xK=I=C*T&`` z25oyfr)Wv}`l6MEMN5{HtXsFYtfZ*8@bV5qLu4Q3Rje_E`CbT3m30ny0oM$ z{Wg2lZPu(_c}KT*xnAF;_h0fZw!TaFFL{>*`YyRAeU}~%%(pl&vu>EKZxreFMt94I zR(HH#U+J8&Lg$<6i7Ygkt$zyf~iL56i9vBBek4H;<44Z;}nS1dc+nNl`mQD zqn0=YYELY?t;k2B_7q5Hov%67!NjJ2mgPD{F`tr5PI z;hX|7E$NyxD|{(q$0<(MUY#DIt?PR!qvbaa{Woy>2g*7c(RkFIgaB1nrlHx)>to|$CI{r~T5#v$VLnvBVx@>i? z@3yOFmzT`mbvdhwHWn^jSyWzl`7**Q_hOwx}5c^OP8)GTcxBZJRwHL zcXHORD_K`qdV$SU5F>8>iL{Rs?#G* zZAL2N{!|KFk2c=J{~sEojQ23q9~wEvdtpEaFqaVR5x_{mcyA7`dmHaXdELi&Z!MtF z*LcriJ+5~ECIJ(?27WCe+LNpYYyvbsW4zZw2s%dqBY>D-A0`2N13DMret;3+&EYj* zl%xc&=eRKH!nH1RT)4x92^ZEH_^R-e1O0kA&&kws4j9I^198JBSZ~}_ zzb@5F>3m)4^e^{@0s_X?`d+BFjC<=(zsjoYH6vJQ1ue(Q&v%@0rX8?|T-Mrs0wi=mv0prY91A|a^nstqt zuqv(TR;6iN^s0F}?y=hV$RuHH=7Hs80%7&)%$p=;yUvjD;9vREz89Ck9yra%@jSO+)LGA z90tJGWS%i=v(@XGv(Kp9boLpUvq{4|-7c0pz8 znuV1Hj4^fXjS51L`%})cDkIl4Qp@@~MsLiy&DPykDp+X@22IRyV+g)|E?eeg_V4d( zHg-as6-#75vjo%aaeG|WL)elMyQg$~uH5fBImBRIQ z^vxLT4BmcW>O$29sOpBKhA5mNi6Pr*OV@x-FcnldLsLUl+tA?9?K}G>>cgop>MuVI zh`x~wxlR$1R_11-8u#CW-(K+hq~f=~x~@UpQ}{iN-?#Cr#qSyXp2hDw^`law)ctZ& zIqH76R=njnIon64MyobJRhOH}RXDkcT*dFTpriTCOXaDyykH*qt*IZG8magNRQ$?y zDnqXEVH@*@{>m}l0>6jrIaZGGbCkbSf9c-3%TvaV`rdi-Hl&OP>#M$h_;|{A2)^sA zvr@*F>#d-Xkur8dMtB~hs-7TW1}hV0X64l2S2j=0e01|v;|<97X8k#sPk(1xv2$P( zf%%!SlsUN~m8wWNW^RnW#WBM%`V_*XBT(~Z2Ag7L@Re8~_(HmRwJ7hyuMWTc_?;w8@aB9%8cHXhkcQTYhB^+=|33yFK;I5?gpZO~@YrdSY|8&I z{QhHT!vm!9``$Hd_=nIpuODVRu4&&Ee2n=+)W0~XVya0v+#8QJ_dX{>#h$w|Gjkbt zcDX!+LFz8U$|HYf063PFyDK=s-i7<@;(fs61-jf9RtO$<1%_2z5E!iH&`#Y3e)^z% zfy*0tW!p~onr(f2d}q3R?|gAWMV}Y*V&r}2i^EVJtmd47`^?AuzQD2W3*HcP;NQPU zdzla%=uAvyZ=r9pb`0LS066`KGw%e>q~LJp($okSCugg<**q}RcpSDj29HRF5ejeG zG=JdN?#Xttw+v1_z8Rzp_^SJsrL4`yH2T`d>VG)fsZXVL_N>pnZTno1b;0Hv7T&PA zC;VSzBzks_|7E^1j291_(1G|Z!#MWVV}_9k3)~$MSREBu;|Q!x2;7$xSl1$O|9p(8 zfil8dRaoGzh`>Zt;BH4?bwXfGQebV1zn?Oh7lTVG$dX3L|#+r+I8fzNc#*J>{lVff6$EK5sGedeB zXZ{y?GCPVf)7%&%aCbytbyQ%DBd|6ha9>hjU5h}8F(|7TBXCzlU?M7TwWsbv7+*-g7QT-DO3?%k{Ts`5 zeNN!l_BnxH2l^Vn7C7{8q(h$Ruh^Gm`YToM`ZrbY`ZpyPe6q$*Ko*Uk0Dby5;wM0# z{*Cz6;FJD}_0`~${z})=U+H@KH`dpJj>adS8lUwVpY@!Rvw*MT%RH3npQ%skO2-%C z==j2N!=StYjPJxx4w|d@^VS#HY68^Cx?{DzFVO8>-f!dOtBiY9QV zf0mp3oWO7Da{|8^^fi7HaHxOMAywx>8b1kHG=38Fsej@p zL7)03egpWVK3U%YKB-S#Pkrio>Yw#Zpri50r^aW!#%F!{yUehWn>H;NzW1IH&WJ5J zsi)Pu48CLB*Prh)S2MosY4m-)i`9iC13 z%j5e220+Tnds9~OPFcx2WhL*Fm9!`;`=3ANl_2|PU-4Ry z{V%@$O(py2BH?}fZNmHIn}zqQ9u(fM`}%pA?0>;H=|AWy>3_+5=|5(f^q;t1`d@L6 z^q={#^uPXF(!c-oSJZt+l)R~Qed*mFC>`5R^v&cu&tJQYjF*4yGAg@-|Kq&e^zl(O z&RfsypSivNe~&y-cgt&=22I0v@m{Zf?PYuySGae2^Su0bZ>o99{#(!d!$|5(>TY?h z9Q}@>UnTlIqUvw*=FZ9X)8lcn{qiASeDvj`>hrf9cx3eGQJd;<9muITz6sZW24aC@ z=EwHJZXI(-Y(B=FFoiiz$aotMOs=5(amdln&c`~0lr3XeBXpA0FHjKcHzv0?ZT}H` zkEP6y0JpC_B=)geJ_cOqA**j-Xsqv`aBtdBPjM-GPjU5_hNfjVhFD9S@o9$-(34MTYt$^-4mvEe8W2X3}KB{mY}k*E*bkysAO zIVcaZuZ-oQoQv{cds-|H&n|2zrfCO;>t^ zRJvcQQhouk#p>w0*S+p{FM+!w0;{6}YaD^K34!~P0_$1?%6Bi6)psv}yCMP;QGvT1 zfz=6tHA#WBEdu4cmz1l*0#BL-=dy7n> zTk!78Z$bLoDc?K!ouL@@{H|i)yM(bF-ZT z!D$|d{^6;nHQC-6YmvIc*{Bq2PfH@l;td+sPRM1LQay8RJ> z$#c>7+!3Zl|DOT>N13s9GGIPrjtb7f&jRPX(Wb??;7DCVLSW-D(9RlXh8QQR(VsG7 zy=B0B$ea|Mrt{E0YpiK8ezZv4q4QDKZ@g(Sj_g4{8H2I`9FM*De&grakUn)`f&2YD zTbL{Ci?y5qd-1I&A~?yYz=Mv!2EPnJ^r4(s^BJ%g-{ca4(?*u0)HSsTJmi<52K^}~ z)`DLjv>aRSdNd`pT-{~qhva{FUFYCsAyYhL|4YQFt?4x%6B#@f{Y ze(y0LdnU$j$hBr@ioGf3|Naxk_zba4>vB65t8wQt*0~1oyO05y2cdoFb*2@uotXMg z)GXh8XdlCZX6OogOYA9k4r5JefB`uNqtCD#OzTQ}Ym9S!_~~Y7s=X~X*QKv)4d2r) z1m5t4rZvsJH+F-z1KJ_JuaV!8e`kiK+xMyGjN`bbJ@Ogks5}2?GxPC1EemvLk9;P% z0A<=8?e=4Lz9~F>YvWuCzaI$EZu(Wc$TjPB2#koHCqkyu{{=lQn(GG9W z^Otr99?8>?{|&pWg*8J*@c+ z*o$w4e*PD_G^%ME_;%PC|Hu6_@Gj}+pT5}7qn@{ZUj4lBTcolL$d>{?eqP(~=FVZx zjZ{~>`RS$crtBX5=`;9_yApGo-;~q+csc6%eOCGLMv0L_Fn%L%Hmw=<_E=czc1HwO za}MWV4qs(Ip!_&(hTo)>9~YdOi_kx((6p|$cf=g2t4#>p$GM%0xjoZ&?afCVX4~{5tyK!hhh%X zUg^gvGuB|mj| zI_;Q#T*ly8H75KH&ugsnD&K5KpB~FWKTSEXHmv+~RQmT=4*F@zf%W78d*QQ^J(S}q z^i%o=-}KKX%QyWq`(wRY`R9}6n|_*dV~u-23O>50eAl2K<;8lq_;<*BF~%%!nQ6_k ztJ?T?&hdQA@oVizm7iAQ1b&r&hs?v!FMow;&9-;N`l~hLJ@uT|1(?^@*!~k_K*dX(=4EW6v=r`$h)0$^j$NYI(eD2}%kd7bp_3ECC39hdn2|Ho@ zV2sl~fWDshKn{TC>vLca^Z|@@+ScjoNe}q}p0Cfv{HLumzG<6gEK>AnL*SS8l?QsX zZN@h}CYnFw6(~RN=bbjKd8MBxJ>(oHKkw(Av^B5v`Q#7z2+HUCd8e&vUg`5adhn}$ z-u=AN|9kW>zx=$j&Cf4=KWQSL!Snr^e?Pwqfk&Fib?|(@=D!WUetNF&*ZlkW)wKM4 zGbZ?XWqjC#d#dOBKgzQ_=jLS0&H47;SXk-~Mg%sD#28KfylE}4YhrvC*owaD`Gn8K zg3~w|{ikd;LksPvV~*4{B?KNCh5l12OzTFwR>ct7vwjyy3QqGB^uO|pW@wT9Y^+7< z4(Fg>e@qwR*JZhu5zl5tJjt${hn{#dtGaU?1@ z2lLUt;5M;iN9r090vj3Ev<)+kP;RVs59IqmQgE6I(0|e$V$&^BcZl&!+cDz@<;A-A zfW3GRFa)Q^eDA@$lCgSsz-KUyj3d0pdb)}uQK8pkIT%MM2iD*R@_E0faxi{S4y@P9 zbN)%n!RLIXfAFoI^G}j*#u4^MUVuF3OaE^5&-g)kkwY+$&-f9H*Ng|v(9QNgs^{)& z;8!{KXJPL1*`6{Y$6x@T>p7=qV{AD$=X}8ovF}{WamI^zh!=dOS7QO5_)Jefy}+e+ zBj~aJP3S)w^Wn;S;MZOH%61SVa#26+e!Oo2&iImvzoxjP(RBM~q9334Nnhb3K9CRE zSie9o^}Ij9y;gC-ooD)apZ0$xd@Su_8tjAfd`2nW4FIoRWrpZ$6>T>bkk&P@1Nzxn zhzaaJ+wDIG{TUOK-SRmKyp!Hs&|^$x>|uy0E}-Z_=X@7Pmn&Tk#tPC#{)UPb>2jsZ!I(k%$nj7yBVDd^ zIT$-U`tZxtua=83!7q!Zn=Vtj42&J5iChsCJJMy+-@DRfVJzW2kXNE&NxE$5vK;r{ z2fU}t=9fj&Wh_y7+~~U<_mX#vBX8|O9&`V|8L>Q!|J7eMEyFnS)&+*KI}C`n>anQ5 z<`H*}!!{U4cA`I@fA}s!JmA;l18>$>@ht>!b{y~2(!Z7v?fVK)Kj#~Gj|H6jEz{z+ zCEB<5P9peq%9dcW6M#4GNz;l-|NRqjz2Mt;rv<$6JEp~NOO%7}B=g~09pLT6`^9Gh zM=SU|7D4=BT_3~Pm5{aspSNhwN?))I#um~*PR>9+a}f{tyBRlW-;95>@1*pvCPaG; z;~RBF|3KX^#!x57=TR}HMeu9YePFZn7t{^w(SINP26aT8Qzwiy)CF>Y2C7&i{oBZ5 zNZ(yyX=^77t3c>I zw!aeyeL(n!KNnhxMIyeR$Xugq;HjwWu)Jb1g zCw-w4e#_D_QYOYFMIX9nTuhU+1`YB%_vzamu5Qxyf;-jI8&|sAXb&B7Nl24di#N^l{WOQYOYE(nhY`Kz`fe z{PvHdmQl;ZnB>t0@9E>HWz;fh+MLt=@nf9Q@@aYa-qQkJ5)-#%>+fQr@TN_zsH>j% zc(yS_#j50F_oX~Z>EA$z_D1qS`6w^tAswwt(rFR=Ci1iJ znO6|gDc5Tj>(T!Z`B?Vyn-=+4^NV*a_Sd@P_+^p?<-24{_TEgqI|;pZ^0A)sg{8fn ze3_WX_lZZzmt?JCcw6Io7q8yq%qbJdX5l zClBi>PeR(;$wQk`<9^BFY}yp@I+7o{B#r|r%p;!L> zp8;Mt7b-H&S9AZKKK+CKzX{~L|I(nbLC*U}#*7ax#Qas`4Sf#A-wU!o=R(nsW$u^0 z{(+_bhS2>R3eLmZekJq&jz9Me^4X7iNI(z#9?g4i4$8g1cz*vN@Au$Ea=)*PAFuE! zZ#Cp4KJT^U)psq@UHAS`i~aBV?D?wSm&c7&_>`}f@&TXraQmM=Q1}%YA1gb&r_X1C z?DwTX=Lh-xMS1EV5Aiwg7kyXeebG-|vpC;NUzPcO+i%{ro(8@i$N5hS{X5X?v7v0 ztLA||Ovjw$%=R&-VJ2eE)$;DdxyamM^1|F<*2CxT4oiDAX*SJ7jFI}9Szp~WG9qoY zguva0zvqoVl`)PVQMo@MYBLugWI4y#&4l?X=&aJp;1Lh9_eQ}W_5`3j2mHTPmnj3fwMazb=7{}beu@yS%CeBqxp{t zek~#D+VH63LIa)wlwTLzZgrT6`O0x>LhNV1gwXC*huZ(wA?_=`?ef-{4z-_8^0@~- z)}Q-3`SW*X(hqa)HbCzAX!qy%=C8OZbD*^@_Y5)=uy_+E@vKXPDS-^#qOKI zAN_hi<1??tU+y*JX*WJlPxNPfQUA!xtryeJaeh!A^rK!qC;4rwnIzNwiIoDZ+}0g{PCjav!jC3yc})I(e^oe ze~fj&KYT0dZpA$c?fMvPvTo7a*?f<6(9UOS$@^@LOU`F&ob~Vn`xDZhl>E=uxa58w ziPLX!45}6_7XK*t2Sw)AIM?cu(zk&Sv>HVZt#PiwX#YxmDL*N=O`;R32x< zb0p5S7wuoEC;Cc5`nBVq_2fS+?d|!;+|YXCcOdUs9R6PURO#PFo`}$DAWWA>`_=^f zKKn-nw~aikL%%liXx~~5|F3+j^lK-NBYis^5AADf5fhcK75p~*Cxv#04w zxq{n<{}!RyhJWpQ6O8}xohvcVR$}f&?PP2<%BxW>vY(G>A6(660^qGh+gh|OwhzY2 zP%gvVTw?#T!c*e{eGso4ZRNl#wi{v_P~L!YiTy8XosIJ^0ee* zQ$B)o40wLt{k(e5x!{j@e%}4Ode67ukABwAyPsF@xt6v8JU{P#UcKj8@&`OW@ASof ze!XW|&U@hc`S^EaAQn&wJ)ZL4^)%IcK$D2#vR({+-JM)Ib zMq1;vxv$j;Vzu&Gy98rE(==E3*?YEPRC-B=n zl|%dO2E<+ZTjM0<(Ehp!ahUz1C&{<=*L8@$^w&M*Tl?!IpHtB9KFp{4FgMrOKZ|Wg zc{}FTTD!&b*A37M@g6|i18BR=emk}UC5a_M5e;wP2@=oBD z+pV6zPJ&ugM?%tDkp2ui9UeKjQg$ z_w%a#wc-zWe%|S?{rqZwO&;m5{rvm+rN1VBe!l&Ds^@X=<>%MWpYom1m!D5RU-Z*z zt|8~2zFOz~e|#qPPK3`6F%EQ)7diy>D$f9Gu?F@ofJ78LO+V902sq^-3Vy;B=$7bmB_6I7r zhkP-2f%_LF1-JQg=pX%&X>GDUjI~JJ;X>3c{)uUQ!9E&eAMSI+=X3+@%m?CH>Aa`R z5N$Lpb>1FO^U#-T!L(7XwNhs8jimDX1^3{5;BbAIw(Cfpw^tO`+i(1?8KUjdW+*fF zQ=-iz1-EGd`g2{GHr*n1hZdra>wC0i=7q`_l!esy49M%}*^oYUVS)SoJTt$aeYsbY z%I_DPc6(d-W#GCi<>a1DD!*U)sQi8{i{@SP>z9G+u9TDeK&kwGmp?zxn%9WSkDq6* zxvFsj&noXnaC)p~uC=m1_nT7r{nEc%J#&qfawES#|<^6Kcg6*;ulS; z!ag2j4iM$#K2|!vzX7JGYV_3i_vHKznRt+ z+p6GPzZH2sciEX0TyrITWouZsTmrmXUpK9-cCQNN`;dle!x|;?l2#&D;%@tl3eIun z^3Wa`2e>||=y0tR>ytNv4(*X~U=hl+JK8Ptd=yQ0O_k#o#hNYc_9o!bzG$CZPbEF% z_lIgV6jFV{>-k9&G?&D767*GNeN`TZfC-`^4cTpRUhV11D~)%??^`*p18`FZv8 z#&uBAQhSEE`TZT~md_NV#r?$iEJ8ohA#Z+qX}szF^cnQyBSEjD{Qi8@tMz&MaqO4G z=kj8V-^#a4Yny#m1@~V2F--@|TJEk9(F?d$_apq<6n)|rXHyhHY$8yk5Qx5L^ zMn4^u{ymn1ewuP{zc`iO-$OZ=->>u!zEytzN%Bqq%>LYuj{f;%`KF(y+{o__sr>#P z^3D8y%FF%klz&HV%rcBw*>6m1yPZ|x|NAY@@p8=Zf3W*0Kdr_I{ObJv<>*)bJJWi= z4vBwv_w3=^-hjFNC3~Rq+q@rbOXv8PqTh!9G_4(WxWb>~zY+8LL3^_Pz0=>59`^v${(dcNfcDN9r|p{YNYSGm zAs*4j%0Q3y%^0W0Mf1nK1L^Diywk2Vuk`h#$9)Cq>;1fww&s<7pZsypLHd0^@AT!G zSNeUA9(=2xcR#O;0UkZfF+cBY^YcspPnv4aLf8Lm{{8&Y|C1*7Gt~ZH^WTPFKRws~ zYySQGYFd8285jJ#GDb|sJ=L>5_KD;3Jf9~vVQxNb53k_9bExz7zgvkh+Vrt$RoWvf zn438bebsXa;z(3*8#keUEF%zl#LlU3q|VzXZx#AuV{EI+&Q)=QHm%YDQ9ED75Aw|S0On#Ez=7XYx$@fMmex#TtX%wsG$SveR>Cs@*>B70CW5ujMuh4fzVg&0`=^DAN(rk z{&viLKHpPD?jy*2O3vvAFt(hVJO0}YvF}5eAg%jg2lTVs5fkV?9&r2bK!3&r z@}A(c6nH1Shd_^Um9d9$l0JfQkG9U(K>FN2k+Fevy&Qj+589p1@mI8IBjDXH7y0nZ z!S?}07dmH*NS7;J4#o=7<{piV73p%N%fXmI`rOBnF(X~BbU7G1Jo@m<)UTF{F~Kj3 zrkgHPx(tjRq^b6U1kdR*>2LVyvM`qL9^5;Uu_Rr#bXk~B>D>psr_1J-Mbl+0(K-IJ za4+?qu|x7ynB!lD@xS*o0n0$$19$>N9LhFpMO-2zu?!D18;l( z0QS>c_4)(Z0V^W?YY9Pb-v-q0xG-Sd4*1Z;0hQxV`}W>T1YdIeVY3^7w{t|mic0_e zci_5mbile3uxe~T<@i$$zL%&Rf8gmHe?E^f{y3<^+T*T-v?chwMSE8If^9IikOueN zWNaZG@OLwA(!Lr0Xx~ZcUrmVi8pb#3ivEGRVT_?pxMwG0OpD-4?l^3g{(`z;J^JsX z-=L1DbLxb#hPvQBpDNZ!f0YNXWih1huCTPVlZEj{#x>9Tn=)Q=k5HB4Fa6ue77=>Y zgdNGoxI@{vzo>eL5&U+tMWuf`*%*H)8}}qtIsVeWjckte?R1s=lCo7(0xyH{7IjC*Y+gGpR~EREn||-hwm)^oiWLy z4Zp7Q;nVZrbq+k^lt&+PQ|H99-Y=s+$Dj0(IA|L*}ZrNY!lH=#)745-V9M_43UOV|% zPx->q-cCNfPMz5ExS~V6j^v9-{|3U2V>)Ctp^MAdpmjfoTqX-_Q?7*@j8+xDg8SgFVa!DAJDI|O~G#`Pm9oRClC88+kswn zjz8zVe@|wA9w+C$e}85##~=FCIsTmc{ym%h=LB*7`}c45a{QqOt}XK3UG3*ge3j1x zd5O<^b+x}U@hM*|Z9PX3ACo&L2Ab;q~1k?!=ZZ9(}k@IW7?V@~pg_A#g71;m_Yd3WMm^zX~ediea^ zVQH5f{}&Krq`rpxHbmst-VvnwI(CvyBDmx>wEzMK0(Ck3aP5PejRzow^S1Z7t-LU1L=U+|S2 z>%kxQl-4n!39?h4%zM(b*1I}0ghmx1d8D7^nAIVkF>ZvVJwe`B2F~t?)TtbQO;5*( zB;ptQ5l8bM75rL4)V1MJ$Atzw1E@Sy!R=OuULF%-zsfrm+TH3<`~Nz`edV`Z-c*i% z2mEP2pX757p27S%{+mAb<@hshqurmQU;aB^jz8q%^JjXym*Y=<;P=|KyK$z0&lg?g z`12eEobT2y&mVmCIL{wE(zmzE^9OJAXxe*^RIm^^ETkfY|`yPWm#We3C3-VjE+e=l?8W8XlYzu+|vLA&1DJR*IX z2tnh}P_*g2&XrGo19|>}(>x4qdhc`A0sruD)agCY=|Ah{wnrBU&>bs zZjY!8(MGve9CH-MK1ygbbJ^!@m~XI{=R;Q4v? z^Xfh8fZ@+IJ}GaW163cfnit?~unh0kGn4M4??cQ9|T4>j|N zreIw3KK$I*TICZ7td5{f@5`@Z%R9*Pm$sTK(Wdw5ccjkSe|s9*^uGNn=DdSEf5EAn zfi}I5e~Z-ZzZ!LVKY#l1=6BwfJb$!f&A6%n?FE=)+;cxHb>80FVk5E#zxLmC?_h4? zxic!byCZWCg{oUx9bo`l;0J4J(ffJ?Ihy~{B}>}(0;oCahLwqI7vCQzivVtX8-6( z@~!=K9pW$jbx--${yNF$6!g0WBxgZrf87Kg>4(LZ+Mf-f{dF^VB%at%`?Dgn zzot&|H%>Xde@T$%b>G+F-v7*X z(0Ts;yncWFzCT~zpO^2?$7ha(&ad|8RQvO%c`uza-C2&bKPNgp2b$}#I=|naXYS8= z=bE|BZTIK0`*V|-Gtp5lvOm9<_v$F0m-IW#-$lL7C1x)4r-fI}@lOk{`Ya%SPBZ6} z&SCcFD*JPj{W-{-Q#$w9pEF82I#1M}6Y9?c_2+z&j?VS;=XDZa=W_bdoj&3XN4>CeyHgnFHa>Cd}l?wHQ6^ygJ}l@CW+UFE-VE_aph)?I!ZdGhD8aSrQTPJiwu z^O$swravdspL^-gx%B5;ao+m#$I^4H(sQeb@6UVdE{BTt)mdH}ZR4*^Cfdzkn@rlK z{P}C?`D*ETY3X@rj1Q(S-z+`Ptg9R!=0J3|Z)bP=bat1s)73tl+{aeug{9}TrRT1t z=c=iB@5@O`&qeDfZ=|a{k?!(By2}Ge&;Llz{h%%WEy+te{acck>#PRy?RMcC6d}In zd%34!xvq&wyO&EEmFqe};P2-%n1eFcKMC<2+RG(O$o1}|w0pUMEplB;2z)u$P3o!) zsq=Cz!*ablBJEzTWK^ze2}y%v=SbZt^P%*Cybb6hX#he#FMla4*NqWr_wtgWa@|bG z^$?{Gl)3id0D?C!CnX`*2b0q7<(Ra{brT`*57F)nsjDL-jj&uN32}WeBG+EtO;oO% z2uVZfL+VbM55u<>pLlLdw))oj+~e8mo91)RXY)HDbFlP!ICHAh_tFk<{dq~G!@MHW zVO|jFFu!LZzE|-*pScl=&K~!kE-rHvs_`v|I0+Z0+QnhM3F$G95k4v_#V<7 z4!=iqhvUyxP~R9{5I(r}{vr5bK7rN`^99s*iWfvKuIH=o4#3s=;rcu2a6O%ETpuT$ zCg@q~hwGDyj*H86S49W7T0cn_hwI0r*W}_f14rwpD>_;~UEyf`bcLgA3hV32#{eoH zBiGeoxvpW(0NTBr4CV{qx{lEEF(@k^1Bm(Us`vn04g6N*9~F0Zj8vawt;gF z$Dpiy3?O*-*6$K>eK0BQ-kMyCTsIK{Uwn+z)frOft!akk`d~!by>-c`TsIMthSG=B z)j3jk(tNb>F>ko`{)Wuw+BaMugKyc|hN|!lQ2B-qaJ3C_tyTFL*S@$0N*m%DrnVuj zM=CllZncYB3tVkO2^Xi@#o_v$@-e{CHq;dzZ9`q*XdCJZ$MZ4oh+JIbQ1tnE1 zqxHje7v&o|z}5QUnuhW*uAaHppnMGUto6fht%{C|%Wt{rp1{@m;Wu~U@SD2wF)j|j zhimJ}Z^)*qtB-tjCy^Z9qH@yC@)04K0k~!{pU^c{Xn0kdM@zv*{PpJ zS)Y;0cWr$p>UpTwXP=&rvOe>a@0$AD$}H4(bWSDj)zP_>q(2yc!*(goiS3WR`W#Bq z9D={w(dSNnT6m?8`n2$>->HW{@A~XG&MAFf2n~5qF$d3$2q0XdE{9_ z`Ybro$;aPc@?5WBsMqIskxl{r{*veJ@vI?zJ{Rdsz~7wlY`x*A*XM5$U!R>f0`>Zw zJU;i*zVsQA)VV%0lD4L8g!4+|p?qh6nT$8&!4Id@Y~ug|l~ zL4D*dImeD?h;?@U7-@BN-Wccd0?d8#awFzC@%`tE@qD1J&J!a~{`12)_w?C$v|(*y z`KZ@t;?ah+jTN9?pLItY);2c5rOR{3IB&0X|E72v<~3>g&mALfSLciozpHb_XkXs9 z*it#qOrN)Hp`qSjHy$0>&c$8DjJ){NZ#2H}eXMa(DJa4J1zrF10 zZ!OdRM(=;>TkPM8ytGsQnQ?#ZxO2~DB5ZAkta_ya&*CN-oglL!VBvMypNZqcmTqg)gBO=${*>q94t|cT5 zj-4ZQRfMQJX+D%ba88c;P67z|yz`>Ma@`n_cJCahs9ZM_0$;wxqO87?0D=$i44{Nu zA52QSccxE^TsIK{U%rz_U7aCy-nmF&xjqc`A#xX z-nV(i7T?KvRu@wLOMK;M13cLp1toRuAX`J1!d=%AL^dK)%xK%9_l-Z zi^DTGh{N+Ww0?NDgyN?II$A$n;b{GIg`;c=XPYP=0|*=O&cg}IbxlOty)$rlJ`3>c z2!Su_Jt!+51E_3Ut`iBl-kp?d@4T88xvnJyzW5la^UjzF%XNYfINo_O5xK63O1pPn zjIt@It8xU#J4+`a*WL1=^no)5l#c;aK1Qw^!gAdhk!$Z9jHp~U6M8-dW#wZ4!Mk^6 zLPD+&CZ*jw2cbo-n+Sm~K1S;545{({ci|1634$qxXJ_fd^ZHQ+;aPD(I zeMQH`tp%>OA)dQH9G2QB{3uv^ah-XN6$=<6U$?Mx zSHKwcx_O#un5S9Sm?%e8-N@I?`wb(SdB&{GRMvbonbs?ocXGm$}oCo z7{*q^$}@RAD8n!>z0^(_Tk8${ajAtZ0If-KP1+IY#+o!j+F`g(r0sRexVxTsLEJa# zNSnUz7GuJgHgx&7pM#N;JYk}}+1Q1CkKy;V`jipJy$$3m`*=XxWVBtDy6pZ`DwUC) z%7&JW5wK_pSOtsX^#t7=u%T(I=I{^>*~l!2Sc< z({*-b=C;k5iOerTd-oBa{Vq&hsQP8@u(n!*2X6HUcPviG({A%%g2ES+V`%e9+)F^fDoK%jwH?F~}WnizJrKcJxP!u(N73FJ?e{$(XXF0MeSm21^)K?b z&GG(*k#${WENgx*{LYAF;B2qJk9t`~FaA9x_AL5gAj>^NIg9VM)(e3w?x|zF6v*Pc zt@Uys%gwvW;+eA68-XnDH)H*KAd7iM)-M8CZr)WE^Ng%_0$JQ+#`;4b>qEjn1+v_{ ztE^+Z{wR=jobbN`SuJ=MZ~0Nc_yyqGKM5Fb1OD>Gfbk!Izy4{!_$6TLzXgn60si*& zfbna<-@g?wegpW&p9hRqz$5<=Fx~-t_csCKw}9{cE@1o)@Pq#h7{3Sn@V^4a9{`X3 zIbi$|@Yu%z<39n9rvkXCY;9;s*Qk$R>csb}i365rB0($hC9Pv~jyH~&vhzr*`+ zM|%4BcPI3ea?iZ}Tb@t;zppvMTs33UTaUFEMwsxIjHQ6Bcvc|%y`BQmCmHKFjP zO}ad6B+B@^A*8$D7yrNC-=GdMIVte3>1=^#3NW%<7!Vd*J%Hm%jBilSDI51-bFQzU!e?#L- z&AZ-vZup$C;`vRZSFJ7|y>`u#!WAnESCy14bIYSwmFDN>j$XQIc~Rlg+lvdAjwvv5 zMz335R9sTFZuFHi#uOCJxc2HR3TG^wRyb`;-h{$2^R6n)UsyOcZ`H>9!n|<>t2d4* zEZcZT&N^dO>FO0FWs|~JTstj1Yu2=I-q@VnoZRrR<>lpT*G(EddRb}t^7V^zmaJJd zdhT^oSB+k|a>J@oYs=Q$TC$`(2aVye6PHX_yd;0z*rIXyV<(O;88>Fk(&D1=OBUw> zjwu>DZv4^-1^E;5M$EZl-qfq7Pn%RcX8e+IMMb$KV@rziCN3#1$ zvNeTki;9a&S1&_-$>R0P3Rjk{CPlAYs3<^-t4da*GwxGfv>0$X>ZK&m==tCk+SBLG zo<6H^#{Ah+PqAChiejU1+M?N4Tsw2BR>rAA>K$4)Oie-;T-bP)ZV;&LN7u41ZBlpB zcBjsxj+U<>NQJ-E% zRNa57+fRdZ+(l?$U9LuO;nHCs{WsQ-|k3dZL(q6^de|3Axa_BnvHij-=E00AN*j}Sr#4-vvsWg{Z;5Fbqf2@eSYlYoHfJ|`GZ zkt!m_)M8I+v5gdMZ0#+!-h{qyTfMEe_-^a%ZF|$l?dA5by@J5`f8Werhe-$qX*KH4 z{C+2EubDM7Yi513_G{Lj*z{R5oKPs=Rpu|OUQ?mqm?S?q9{X!^aCwAjV^vrD@2<{L zQ6#f$zR5m;?hh&x+BJ}4va}f_wIcc54J#Cl=|7f(3QC=$S*7~7p+5f?-lup`30{^f^V!wsd<`n15~Pt7n>I zp)&Seko-gBWq?k;V%nv1<|}pUV@0cqmsb>Jtt=@oD=DqG=dOpdiprN&ysG+dc`U1F zN#)w2(t@HTOG+v#_LP?t6&KuGzM`@OImN~0NGqsVQ?#VyOUp`EFD@#5IIFB^RmsC^ zD$5J5{BqXnrAsSHD%HbT6)QHDbp1-!x}wsxB?T*2lodZ}#0wU$UAnZSylpd&=w?=z zm2T*;m8+zc?(b$Rp0twjZnm;OTFHLLt(>A^zD&c+x_Y`al;2@Pn~g;qDwY(L7L^w) zm6CHzN!K=N!{QAE+QgDrxnWJo7gvnR2`c%@O$8MtHx-nYlob?7J%y)H&tnypO~ARXEjXVEtvQYAq^GM}Pr_$H@zbbG zdb+yJEPN)k{WNNmo~}Nzr}gioy3@N9XyAG6s;f(GlGDmFp+?i{KmDphH|4czYJZ1K z1t-AA1FudG(c1Mr6Z-kPKT2PbQJPa!e0-o@>rngCvx#nv1J$avgYQyn+ALRJK09@# z_Kc%CZ0gCZW#y~auEA_twyLD8vS8_o^(DmxyjcBr-WdFgz9Pn@a7v)4bj7l=uHQHz zokgB;y6?+cRkXffX=zbq!Og47R|b_c&xJC)GJa{|lo=!Q3T9k!+2n#53#S!K%gZeo zIq%Yf+=T_Ba#pR+Eyx+2SGIm+!IGkig7Wnnpi9}x0&n`<+0$ng%$PrW>bJAjl&@G< zR9TWz>A247D_IrG)|3}4S-rNb^2RS`Rcu(bcy;MZlmD?6vsM=G%UWBuV(IGgRk~LN z$Ga8!de+*Cl8S=j4d~m7B?T3gMN3wG7gv8NHPG>|X5C!0u4HNXY78*PDdWkkKjyaQg#WzX>Ji4>kK8{768WI#@QyFI|0B;o%FC~pPEplL73#FK?_N^%M%A{L z!bsp`|1lSybjLcNavZ>r(yb(sH6m8D+K0F8rYlOjJKG(W6fpF_U8dcBbRIqLN_ zpsI&@JPBPsItGadBjpz0Iq_18#I&q2_9Pyp)9{h-}J~?Q=3`khjG<8L~t8Q=Vy2-5>iLro=UAtZC)^?o{sdXc6 z+|A96$48Iel&fl+Myvaq!j~1+76xfbnfk-YXl)z?r$lS2-K2X5kEgpX(l$4ZPL4*K z$&mZmyqeK@qqn)Y&(RG08F8R6gf(?*pn|2z1yB&Yeot zsAx`Dee6KEKhnCmGo5<3)}8LwI_iQ0PB*l%-L1_>{|=L{4yky4aE!d~#?A3zq^s|4 zX^uNlKXLZ$%~ks&-|xCSp3m{&@tHi%z;hYY6Lx|l}v(AXlW*ts9*NJpF!>M+oIvu0Z?0^r0Mk2SP-gtAjS#Iq#FR<^}vGMr0 zahtkbiL}{4+W7JD_{A4*B9_ksmPOy>)S9gdKFNVi6vj2GF8?L%5O>b-IleCK_BrN* z-lr7DGOCU>jn6;tuNX=eRbzu=j{kR%7gxW)@0U&WGw0XVU$LMzIdfsHg2|FsT%##E za%VEWtWITzRB~DGxWc4I+GaNysZ~8tC$NV(sv^sl#iPA@$E(#Y)b}8MpKnU4L=&2c zFVlQI(!PM-L->6WzuovfjNg~=`*KtNWPd$31|$andC4UBZ78Y+#0P8}m>j6{Ky_MX zGE>WB)@N=b#%H3QNHU^j1|+J6MU zJ<#_nTHoH9-;1WBa-{9U*eS`Y9rjYv8H*dK8hAsHjT$Wr^9mk+8 zn(8($n!2cNQPOSJSXs2Fj+Fl4Zsp|FC^zSJ*X9)r=;`j;t_Tl#wer?L2 z?sx|>TIUXAL?@x*$LYTxgs*UtAx>NPGULl1>@wb?e)qv|KZra<9|B!cJmxyboBVyu z2d`}hU*voT=|3GGo9d7bNsULVw$BdfbH}aGXg1Fqy8<2Rp~J1f2K95oka6AYU6HZg zF0`|Y?Lf)%<@gMPi(TQtvGc@3_qP z$dLHO$)SNvR*h5bG!Ie_VUCXcIm)T@jH;^neLsI&zj(h*1CkF_qbmBet;>>bwaTPV zez55~v*S(4#?xGB6J!L?5@0s7ROma`hGAap~9$KHq3`3vDH*{a2 zp-*XjaEx{vC`UG*WtJ! zw57Qx(?d?LM;eSw9gf{c%yC1gJU!wX`TEd~^t}-y-w>Lc{^;JIPRPuC)TFb{tLFyw z;P~*>K|1Pu>+tdV7rZ6y&P`vq;CMa979LMe9y^{+{f9x%#z6mE95)2|voDQ-{*-A5 z^yl%u(ATv7ly3|@r}d}&KJpCGr{h7`>slTo#&|jwq`b+)P-?Y0!>o4_Ce@UnQ zlFqq)!Ku#m($@2~L;n!RS<>w4P zKT*ChG*|0S`F-dM_1ASmhWayI=C`gV&|jwq`b+)P-?ZDK5B-zWL;WS4`b&D-bNS$5 zRaFay?73}deCVdE+EixJtZoSU{ZUw4Ds{5hA`L59EVx|-@pa37F#y7Ia!IURCGk#-2zpGoJ<7}K^+ zKQZlRTTe_=Kiksms>#2MghDD^g$|^5>!CD{>&iah`v@NislMR*3g0E9`ho8!e7BIw z0G}cJ$3m(<`2NCwJfsGIA0RxgZ3luM2%h@0&7;tvC(?Qz`h?cuD0KKF_)iKC9nJ-R zuJF*|Jn-iU4;|9LrwI=o&Ifvsg1=Dsi$a*np#y`39~x2@fxk%j;UP5y{1D+sgw#;* zL&2x;039?Y#a_@sV^VnNpfM>tbkLX-9y(}D3J)DLCWVI%8k5392aQSaDLizd4k6*H+8Jb38mx8LZ1OIXePt#!E zVPfyM96UM?9T+bBQ3sFELkF^iPda$S9y%~W_)u77gU=Q|99AR2j}$%&u5}~UO@*N(?_9$jp?J%!N&Aa=wM^|D0HwfeH1#_m_7;}Y)l`84uWYubM0&$ z_)NF6bqIfNOEb<ub2&%%zF_jA& z=Zei?&(YvV3lDqdfzJ~j_8bF#jPS7MSny+ohdsxEA16HQIUf9Y;bG5xfF=gYLn6hzAOxd_5rfggjQ#P)NDZ#aksm{{D##CqNAef44j2!lHL8<)|A0{Q8 zWB#q0s#uMtPHeNLZfuLD(b#REHl`-P#uLQmu;(Si76Y`#FXIL z##CqNU}LJYbP!D8QL6S+_h@>=-=XO-|6Wa1Y^SD94EL;%b7QrdMq~Gb+L)RK8&4CP z!=BT@PZu8coB@7@@UZ8l;4c*(_Ph-IWx~UrmxI4tc-V6$_?g1Po*sA)yp1Uv*Tj^K zYhudAH8Ex5nwYY2O-$LiCZ=p$6H|g~8&jR7gN>=q(m^n_Ut{F3{~b+__}|s^nEyRZ zRqXqkIc)PkX*BjDP#aUTVB=Y0bJ+6=@K*>Ad(H+wTX@*>O7K?-4|~o5KSy}j z^D6LH2@iYD1wU7K*z;=eSA(}PW#gKdvT;pJ*|;XAY+MslHm->&8`s2?jcZ~`aBX9% zvvjaA)mb_SrudSC*Hh1Gdc=QD(_{WGG*z)*YU;!eXzIqE*EAY?0o2CSJlJ@i*c|qp z4}QMzu;&8s3xtO~7lL0XJnVT5_-llRJr{vrBs}bSE%T{zkf9XBFqAY*q7jmL3zwq(AD9iVsZpvS@(3BrpV#<%Z z*_5BW*_6NhepCLchfVoK-!$b1&i|WkchuU$5wx#lEuqc_W{TuzbJC&qrAP!mD1^eIHR|bRTn*(S1_2 z(f!hUjP6%{;oK<8pFi4^?|-Q&f6;tXe&jM!e%xA9e)4Um{N?wV@>hM?ltQ9lIt{VI_#w4v*IM44jGP^tH{{2{s!am;**&be||5k8(1hN>1 zv>xF>evkec-8qL&RhE4}RavoV`m}yer$5_uS6|S+pc$YUpaVb$JllO&CTJ%3bZ@Lb z2*-nP+{YX5561Ce9QXAm_(O0!1hW0SiT*Gg4?}u}m+xocI19)9y(xY+jxG5u{xaC}UE4<bG_yM0vs>E@o4Wx{~8=$gX27})V~(T*Ft`bSEjLV=1WV0^UKVa zmIUXQnJ)(uoL^?Xv?MscOg~!^oL{D&EeXyq)6atm&M(u?mIUXQ>1RuV<6`>RlHjblq+ArYT z;;MmpDzD6BbC*P8V+j=JdS^h=!d zvECX#!=yctZ|J^swd#fRiye2ISME2UJlA+|9gD&`wL&BFR3FIno#eRVy-Hu!Tezq* zW4e=mvA52D1m&oYUh@Lo=3`ykVdVE5?xfRSy6_Rq$2zw@^RbSEdSToedZON*LmiiX zlzJg;FZBX#{3P4*@Lwz(zdN%+qP}J9);cXSVQ+cSl=RT+xqGCChN`G5nHFZD9g5Kk0jQ+ zz0bzxLX;+dB+--5%|Sb%1>_4jEG&cY?Rx?}y_7mpbkx-bP%T9=Dgm z8k{J`G4l#1eWF+8XFx8~bKFUu?=MKo&G+IyF*P{iq)+xX`3r;k z>M=$NYj*}fZt&HPJH@NLG@C-X#9hAWPix+<88ZiBIC2cFlMroK8IVLOT0T+;W@-913f^_C`9# zg5$#TmyH)&2fNLGR(tdV?`3_7>rjoaFfzO8 z!vwpwuOKc40~f?GeFgi3b*si#1Z~^;aZJ!=s%={@`U*YP&|UkAplw^f-f^xYeWc0O zhkkEwa&Lz(YwHJL?6Q|Q z?o4l|_8nqBS(h(9BnPsqD-U{I>3!b62y%I=9e0jbuYHQt$$NU@cP#X{%6rHk0=cmj zjyu=e?I+=Tq(1r+`bP0l^e43O5MYpgioR0(5^XmO*wXU>>4JaaCJVSB&gmBg->fGa z^>F;?2W8yoXKbw5?Z4Y?)8}+n+w?zGz_z|t@y+!JZ8;vcyxRMcpJCFT$TxJ~P>k95 zn;mza*WmMdrUqr@dZy6GJVhH%fQ{#Sd;DRLpKy!gF7WnhpTxP(>zDbseo>IeTy0?A zb6_iClkFjGFWUoc?OSC0=~MC{H{%{B{W|Yo^mW8cV1~Az1>5u5#Mt^deQmITt$8@63bWAr=<|^|V#^Jr#WbuaoGL@*$749(6p|(Qolw#(Bc@b8uZzX!7+n&Ede-$6#anChE@X zCh<+QDSb_k@S8FECgx+UeqY{Wu^!a7`xD{AF`frG$LL#$VdCj9@I(K^zFkR72B2Tros-|rN7c+ z82GDE7>}9vJBT2MjWPFg!O!{}?ms|hCpb<){ej=gL;9RA;+_L^?w5md@HJz>&-;qw z^1e!JME^Ao=?lJ!`w!4-zV5gegI@bh#|=l^DE*hgMUTF`4tfpm33`1eoc>!-_Xpwh z-;w`uIQF%(H7=C8&3Zd%Nz)&|AqWZ^e0x`@L_*t`me(2^j$n( z67TfS#5?^jaY{cX_0x~h-*Id-R^W^1%Q#m!KAbBYA7Y&2!Exkxy!FN#U0y_6JsuCI zzYKch8{za<$v+uR|265qhSR&h(Lc(*_6YiVBgCb%ks;f{kzNZtN-J$%knq>Zx3BQ7CKLtzwMmf5thHZ|ELJd-#>CpgyoBV zV9Kv}!IZ!0_on=Y|1;&QBc}Wv=b7^NTwuz7c8n>%P8kBv_17%;?Hv3B5`Tmf)v#->hePvzjE6Xo@{5i~f_Vsy}<(EDCvdi+T z|HJ5h^Zy#%W1+Wn-)}qT6T0tr_dhSf@{7J<$`}2}lwWbcl)ve}O!*COnDW(KO!+%L zY0BSop(%ge51!NQuKfMWdaQ4W{GV>;&I>*fG3WIUcOT8Q!iT$$G9S72gP7#nk6E9q z8unhsW?heUQrFA6Da1wH0R9Hy5f}9-@ShSMaZv@}3xr2pR3Z36;Sm>A1inak#6>Lz zzgT$0MJ)lp1biwMClpe8ZJ5N$KnJ}xOnB&^*ML! zzg&34MXdn8LU_bQ-3b0h;Sm?L68uW=DLnKKDUC_77j)2=6dpQgObQPjG$w_I4jPlf zLkEpX;h}@Zr0~!|V-kD{51pt(3J;yAgNe;rkJzk@L2Oo45S!%(u~~87#Aa>sO>9<; zZ(_5y_^o2IHaMyj_AM2A-{PoM;8zL%X-AcTFB2ZCYgU6_Eqt}3)_`9l{H>0<3H(jM zZ+28U_;TTIb5sTR3h=3QpU}a^^ik+wWBMp`urYlUI@p*#3LR`rAB7G!rjJ4g8`DRj zgJ3$f?z0nhNUi(qL>9QIrbey#AZ=Q{A~goizE27j~gu;+U4>xG9sH-O(DJnXp<{6^tn&s)IX z0^Y`yjca1c#x*fzVVaZOCwxF)7-ToY3^u8AqZwT-FH(!s`5XXzlAiffE)3}UmY zd=s1H2eDak-^6BZ@=a`3jc;PJw)m}Lvw*28*tkk;4tstY{HKM7J$>-L@UUkLd`x)Q zGY&p3JnUHwzFK(La})SY!o!}og1;5KjVT+~#FUL|V#>xfF=gYLn6hzAOxd_5rfggj zQ-W(7Q=O%Qjj7JkK`>=vvo`woXlzvlu~~i)n-%v>Y}O_p&-%5WGO<}({8q79z*G%v zTq8DzJvW2jEIjPF1^gD_Vb9yZ-zGflc{}*qg@--w0Dp(@u;*6rTZM-`?*xA*cpFnT zu8Aod*Tj^KYhudAH8Ex5nwYY2O-$LiCZ+_}Hl{jD2OCqJrGsEmJWg`6PvX$h|Q`BVzc}pHY@I%*sM+dfxu6h*sLvntJo}HYA0;GQ)~`< z-Ut3Z;bG5O@U_Cjp7(>lUwGKF4t$;Pu;*vNe^z+d^8xS=2oHOH4*ci9+nBO(O-$Li zCZ=p$6H_*>i76Y`#FUL|V#>xfF(tURG1XZ**qG`p9b`@9Yj}TBKWZf|@cVLZt4$5a%Hl#euu^;spJ=AXx%Bhpo&ve$85!A(c7vkBz?#o4s`|ZKI7Us3B zU%nwpS=|@tm*o59leEKhwo_=e(}3smY^NUWB)Q(Qop$VRR8pR8HJ~m1y~^NwaeS{h z8g|Hq9Ueo!MfdrSaUG_K!{OL47AJS5iN#6DTl()gi9Dapiu|EK=Q`?qsQvmS_T~^` zZ)9xPeiP&wi^F4y$4Md{NAx|0x+ONJm-@u>iD$cJGCrpVj_-fAd&e!BMFeXIs+;Dk-XCt%sWHCU3X||ifG|B+e zb&TN=95-ODX&fUDc-Upf3>6yrdQ#-=ZW}`+m~O<})R?yFmwMP2)|(O3#aJZ4a|7n4 z#`8()ml!3%bR%@tm~N+?{Gc7`iayq}9l>)W+R=FKpdG>UK8{Iy-(-!u)sF$�Pcf z_ZQ#pyNlZHsEgl;CHAF1_BUe{aC1H4F0Mxm#-~6(1$rp(U2H=EqlHK>MEYXT#fZ-+ z1}z4Ey|>S&th7VfrN~=~yc@i4`O9&<9LJyXp3>{hL~mm9M&#Xyx(d7|zZA!%kSp}| zYq?Z?kSjx88S;v}@9Om<%&T7nEKe}Lcn$K`Ab+vai~DIm40-xt6Ti}uSOD1th`-85y7pft zztP08v?NRn%fZA|G9RI9m%yi+ac)U)EtlZG4)cTfFEsT%VQkQn;94#-?r0NhxdiL= zn6nz|C((;zK)u*c=zW4-g8e$oV~zcwU0XL>2f=b;oq4Gk-dZ(uuk22yAIM@xb}itQjR^{r=v z+m;0VllX*2=qNtHVEJHzK1zH-1AK<|38t>y2GcDG`YO|>mIQs3_=HCIHSH7Lp+4dZ z8sO{b3)p|?zT4KxZd-goBlM#$c&GgrAJB;Qv=8W@J@Eni=+ zz8%M9-m57ez<$sNY(?Hy*l)G>YkwP#w?S@=_nMZ|=N8rnxx0{e7xHfMe(P_?@pc@S zd%x4?G<~jOeVE%jkhcT%Rd|2!cj9;_Za;v$2T~QOM=)oV{tG+ESs@tNf67X?=18}4*Cw;wl(C2s8k(T;4`}J5S zcKx>*d-yV4dG&hwM`~K;~1;j=Rx&JrzH~HM)gAaMEw_{;uOks88Ei zvw4p?>BKzOK{B7;RM)X)!MEGzpY2Zbl*GT7ecQ1r})^JcTC5I7(C2P=X%H@Z>C5PUu7NCZOaHY9$HXV`O}iFiT{iK2Ez;S4;+EsI z7;ATl*XxkZvEaDSzu0)Ob+Fs~XSGK^@?N$_Uoi|AY%kU`odmd`mdn9OI5wuMlh<!xWB^s(+#+orG3<0`(Qowg<3%x;%HL$?c`VaJ=T!JNIr>k*T9v!xin zHUH_jTfLq!uBjFqwv9I{2X^lC&W+jeX3J2v{J$J`o0o=fy$Wu2q}%akmB7qryk4>8 zkgfc)5z~f??tY9;jVrhr=b48Z>>Z6_JLvdX3!lM2Ib&u z)`QOol3k}s_r)N zW;|aK@AS{aJN++lNy`P~_8YJ7WxQEB)_#4o z7g*=*N0y2&LcCc|*rGG`M6=_~GNHpqdx3S{eq<>;Al^*RO^J&`++82orjPK5yXy!1$KOYJMJ?u7~>-Q z+i{|Dtb(Ri-skmNC|IuDx#MD^WSN6CgrpAGnJ?@C9@!(~TJ7Vf$@Z96h##AoSWS>1^ zYBYG+XOEc5126mR5mRHp%RYPmhz>TUI!gz^l!-Sx9K@S_v=^A{aYsx|hP`EvJ7Q`I zc-iBQn3@Wnd)(QWnt(LfXOEb=1ib9CM@&rwFZ=8fQCh_P>`AQk4Av5+X9V$PPd?O76`L3bK11d+EAl$N!KCYW zvri+=iTe^sd`{hySYyg~Gb4LKyjj&-R=gS7O~vLwo^cL*PHFEgBx{oE9x-c@^C5?K zJbSsvQK2c*NQyKaZzknsZF2o1!P?}L)USg2GJ?8TudGY1dnAZaXs3ROkC8RW_0Uhp zC!D06{Gc7`%D4!&BkPgt(T-k^+)g{~#+#un9dDM&wL}j#G2Uz~?7)4VQt@V}v%Pq; z%0HWUvy?pEW9T>n)Y)FV8RhwGR^(IhX4DyLj@!nwQT~K@Gt}K-ycy~{t#~uk&398L z#GC#2L`?7dF)+Qpop>{(>36;+uXViHj{`g#nU3Sl&~6ITD8qNXt>Vpqc^z+tJYcqU zyqS@&FEn{4k2k~I)a&i7`cv^{LA@D4U9IBHFgJC~%}MH)_$w{bt~ec?aM))?ER%<&R#;ynUQ%a7kTaN1(x#NFTrOp_80Q>#XTW!@BPJe z`rbm5-K%n==5V(NRMFY4)wn6|e2hNXNs=B>v2 zN%T^IUKuCRE9K8Imo@%_c5U5k9R&aN&{O08B<*%I9x4BhcJ+Mdpk0{{4fI{`>0>c2 z?d{Q)iaDciLLA8i$k}_g4FEp?d$V!BwN%U*#~iYm$jii7x7~j&6?29<8FMxma)Tkq z*xM6g&R7>@44zNir;Td~bj%s+O5yp$ecLFfvCaA*XRvGbag()E2D8Wab2Ia@bN7|w z`SAUq2YrS18-D~|u$K|na8HEYWUsXhla|^;DIa;V_Zoe}zCR-7%*gDWg1q+jViVuc z@JHAZ^I7&x%7c6!@WH*&*p6P4ZD_s0t=TJ0d_p616rV62Wyb@b+&@ixLIZq;_6eqr z-3HSq?yDv~p%H#f`vfCjcdGh`FKB?T(=lg3d$vw?+u{ovp`VU9dx!0b4`@Vt+6Q#d zp7?-$^!M-qGcZ=|?S&^kfc>Bkm^Wet=^vF=qykX3sqO0*N`BhI*y}ckkx@ zd1elE?mm9>G4BUGLh1|6KlojJ^P;31PvV)Dd%L?CJLcRu(0vH;ms{K26U~k}vt!Kc zSTZ|?>}0V@TAj32AvQlKx)P@!6kP?g#E!(p*l{q#PJ8h#RY-3y-hyk< z+l#MwyLbxLWyeo+6emIXws8@}rNr6TaW%9<@oa?0d{0F=zIR%P z?`rAiV2=cH$ZJ3hBGdTJhw=?bljqxf-=yn9+9BMlw=K)}Ep5|8zs4V%cKMD&^lSLB zsgHT0UqjH3M)X7UYY6(m=l!BzW01BFX`-JkC;HiGqMx0nZHo5*8e^auV}@_wI|}dy zV>s3r1I5_ylJ^Rv;0?wC85PK|Jj&FQQbyY}kjW2Z3IiEZ%4mJ?99CltRAbEW`}q7g zg)tm!jDezD>X~<;;h)eL3uIIv!}2K8NQyEBV}VS5AX6B~kW!`<#$F0AhG$!1!#X@a z(U?G<*sw0JbvE=c|O zd{6Y_vpmiLKBE);_?$}1Ay4$P1m+=}v z_GRyhJlTK!lQ@?B)_Gkg`>LOdblFS&JRHmZ+`Pt?{kYSR{?FQr`~9pR=llCvKe?tQ zc4QClOxQ;DEGAxLZ|)0_E_)UeFS6(JAf(Hl#jGa>G3wkmmwSxLKDn$X7qRTz6L&Du zW$#r{h;V7m?d^(Pw1hrtLyBZ0uz_A$25Je%oW<9y}g=qkuLj0b1uq$!g)xS{ew9d->ZFtKgjwM?;m9S zWe#M+-u7O^t)4|}Fne3Kx5qH|UwylMZ1a%+cKg=8-9EMdtY;BCPuTk%)4!*lMXWG; zUdvv__FlpC1@@lB@3(zmd0qZt?HlLShqZ6|z6|?C>-$7deV=If2JT;-x=+M0?y>6H z?nNzoP3!xzKt_S$9%~oNqf9+1cypf^$m9nyg@FtyWwbu{{=dFY1l9M6hToTAc=Id* z$EdrZ?S2BXcYxL>kWrwhJGHj~>j7`>6FaD<)qUc);68EO@%u3D^Puk&ktg?I+@nE1 zi@50ceHix%&~nI=`!Mczz%=e-AopRmoZN@mX>uQCr={)_FAwe$FF&pyzuV6?_&t8n zkKf+c_lf97>OL{(2fuYM`tkeqEX(iGi+=nzysi&vqMw~6`q^pPruYWF#u%u^nBg1v zef|{2aI7%~3j6GmZ|aj08``FUj0$8}USlkf;atrRWJueqN9%*{kZX*AYK$3vAHSat z-e3&J8e^cSXK$h58%e>NXAyyn3S?LwWg1B-qxA`7@&lQ|K!%hutuVGIz}TYWHsm+y zIp+NSz1Wc7kJlK3t;L4?7P^*0p4gDzU1u7{p>W+&rZ|V4!EzMex~ckr`J?F z3tgXS_*P=~W69W+&e^UXR>NL!@ztTQ8vcUQ#ZgWdcP64aUntaR!(MRiRH`s~#;j_$ z>&)qA)K*PDBRZRPINe+)(&Y@N+KuY;5YxbiK_ijs66jH{!Fu>(#rguQkKF#Nj zzu4OkJ|AV>@k32sH`JEjn%}QBH$~9i{->uUhSDien9Y zTz$SNsS;>YVV#QAHu5GQ?~>#tcP5j`P`_k9jIKI%Nxy#E;;J6yzJT9D_ z-RyQXQ?2-holuN_mzWm>j6fMYp>( z?tlT?KsUPs2X13}kdOMdtITAkmdmWq+%^Zg$1x5r^oS%QT5eEskj@_z8MJL@_xh%4 z)c~1C@Y{nvf91qJ%lJTMB4j2dC!wBXXlQaM>QTog4IPTU??w4X@p}xvuj1E;-{bgw z4Zp894Nnf&ZD%F3blc#etE;l&S#R5S%Cm1Hk|T6E-S@04)Atc0wlO`(*Y#y5v$b4y zefGARRNo&@^*twIFEu7)s1#ETi zpAkKb{O-u#dKdUN`bU4V6%_eO{iwO?Pqi3|3dLgX-0Q#3#!s@?3|N0xoZt}Uqxd-k? z|E5pIUOq>Y-ICBdiT)+AKTPovn;+!QpN_FXAG%*K+dUS}is_gF^s)Pue~)#a|0aHi zV~3Fc*7sVv9J%1lbBygL9X)lw0Bb@ds>678KUz7eT`v4k@!I5ALjs#<%l(_mlUilEm&1jzAQK%v1&zbcJ_#+tCklPEWNq7VCl#_l{KPbc~Nmm zdBun+Ge+hW%(&vR$pteOPAiy}ms>D$-lYY(3kyc&tXiL2kTW{3Z2icBB}EkljZR5v8T;Rt;ZMzWT)hBi*m+X#DRIcKW!hUt5R>Zmg4aC2zOEVKvJu}d~oL|prN}-Ca?dp%2rpF zsI1}*WfdD%sjOvXYqORYRV-J*f2LQKtE}?X#YL4xDyy<&eWjKzTD4*cl1oa7i$Rwo-5eRJ-nkJAyyQ&a&)- z{*!t3iec9T$>Kj{{DSF)?|>MaNclBE`87fL?J`h7J#Cs-S0KG@8bzgiTZp!mT@;kh zW+ChE>p}ZBT4_9#@H5*!LD z_%~IYCH$yn->_;v1|D>XX_(^Rv0C$Vs=9!1s=Cz>%;X>YK?ETN%UdBvb%oKp)xC!9 zev~!(kkkHbS6HcTMf3|!!Qp4%_C)&W{(5}NeqB?@@tr6?M@`}?^drBM{Ua}_PYTSx zZt#^eRkem%_XAMfj?`J8zI25Uw>(j>zHn`IL1cm`6AA8>S?+swPp3@h@4_wS+lva=$oaJvmGLj-4v-Vx+zjtR30hNbn^Q_=ZWf?s?x%u zh$5FHRoM?xk0(jhlpGc6g=jxWeLG2NEf+~rtQU6lgIH^l*y@6sHEUhe;{BlZaLG*t zE)uQ%Afa`>PEO&`!5!=eEv>0_k#3rLu^+^=rq)HeYwF#8P{1shbq3@(o?23ipGQ zHch3nq-h@VEbRv|ZJW+WUDWdXK}{Ry#_q;>$Wz%5a(bi{T^HdYPqZJz^hhhZt-?c| z9sQuDM_O_0NaH6%u79rrEqtzBb#>8A`mu7z)2g`s+21-OE2|5ugMG4c9)M3eUY#DI ztLr=DdGXWN(qmdnOA88*D(c-nrSIRINN!YA7jB)O5;r+5S1(+Wd{aAQh(1|86)LH! z*iebpRIi3JOXmDAR9RKJv7n|XDb7)O#bcrBlFF*cnu-nO zH8(yIs@}YQbw$~mm-e4qXkB4jXhV5vaYfa7Ihx4P;ds6q+E86o9Vy(5F_o@~RM!-& zS@$g7`jR4Gz~2ttT(GgIxT*q*rGA1>g^ntvvZ{jL6BSjF^%aFhp2wXpkQykvsOAT_ zD_K)M5r7vyO(sq(Dc&?GJ2y*zs%$K(sxGZ4-|5lcJ?eAkn1A`5HN=ar9{t_uV9Wp< z%r8CXyD#k26JKA$Q8qmy&L-A9o7)kLELgL_i!v|Gnr&VF`mc1+|E`#?E<*HnBfq9a zsuo^AbY?&ovH?gqphBV=gvd|QfG?y@b8CWqIeHx@SzBa z_kw^?z$`+vhXKQ0@!nG2r;7LTc|TaZR|zPFi1(tb$Ndh#gMe|~1HY9J?FU&8_#&V< zN4)ngA?S1hh5=z%A07lu1&j^@Kfo~Xmhv7jpY4E^fMO{4u^n&++X3UeCw<;e1wBK2 zSd%dB83zAlMEwWO72i1E@kgJMHhOtOdrWL=H=-uTeWrw|4h%Xl%Yk7BE_GnO11lXE z71%}i>4D)X(WjItMGpwE?LbV3yba=(_Uc56)NyrU;614ruvav+r=Z>xx3{NkH5(>j ztdJpP;N@mh%6wmw=`*8dPEItMo4YkfG_~i7``Wz=@|*IVI_#`S`rd@UDTQ|_FSn9?Z1 zT)QJ)73m(p39lP>3eh0^*HlO*=+}+hTo2zefxphgTB8{SsM*gKP;cgauEA2pc6GdsM9zC^@klC zlV#v+o!vLikGJ-XqxE*ynWh~*tD)XlW!kJum}cscMBSCf*{1D<4ouU)98G-P7+|RJ zqnsk^pr7+GQT{=T>&=u!XANqqJ!_DE327JuO~W@}kWp{?Wj(ZVz`(s1(C51ybfX6b zE;gIyTF(4Vof?f!o3?e}m8e_d)J>lrjb3ueR@&-$uvIN@Vot+$0iR=FP6YJo7YkqE zJVcE_HuYRZMlt)c-SRi;926^-6nu(Tb+{6)T$( zi&ixWOj+WJYb3-#?o32WnnjjJBuYj^1r{~xZZ#9WCNTswVS8AE!dFrf^^X`4trxpN z?*Uv7wkJfa9c-c{@;;8bZ{T_e*EezP!Syh%Z{d2ReNloYzA=rvhQ!;0i6H7{9RWmN;ZgSq zLf-r9MGN>pifb?AeN4(bLf-E~-Q&2P!1Zlht+<}V^&MQ_ZJ(H!DEWmFA;}N-QnpYu z)R>vblx={rE-R5GakAoBQr;^-N6VX?$d+x{zHG?5uYE#df|M6f%B$`Z9_Rin=tQph zsCXOlzSGXRii%&L{MGhL_O{JRh#l>z*~>R3#NF-9KX~UzLhOWpIrGefxToFp2~R@o z!WiKZL~}bq-0(HU4S&-d-`DEr`0uZuBi_XL-fBM^_VfkAw9m$x@UHON31dcGB2kx! z8d)~ieAEcqTlIvY;CiP{@}Ay`@_t-xxSqzN zy>5;{I4-G=F1*k7NS}O*-=9T)yxWl>4jFC{W;W$ByudNdtlhq;)^70GO+FBEm@W?z zaP`{`?DmeW8|ED?=Wv&9gFJ _GkKS=qMBxo6v8m)w3w?p@DMtsDG&woTc0JwF!Z z(Mela0s9&jHMje&_C?`e@1VU-^^J^9OQdgw*im!G=!TWR8BUz#M{%b6#zikljCXKC z4Mx3jV2pSOwm<1b^b>(#ZS9JY58jp@P2V~?@lZWT(N}CQNtpE_gTC&8_Ggwv+Y^ah zC)Q^{*FMA2hSgua>gxIvk^e#wKQSJkv0NVU{DGr7P<}&*Pu_k&hpE-ppOicn^A7@Y&$TN^&VsRqb&-j z8D;ZPRc`TA1$?H$k9&3o_W2Zki)UHj@qG?W;4FJw)syDc%N#l=AHLeD2hEQUAEm$Y zW6gJ2VAaZ_bdIb#TAw&_w4U-$fSj$4{5dGMIP!BWt&aS}X>sIdxy|z(DL?UBJugf7 ziQk5?P<}}hIFz6Dq)GWD9Y=my@5ry|Q+~yFr<7mQq5Qg@^6Pr8^$SmQt(SFOw|)AD zs9Ep!4JWhCGfs4!e}*y{J+1K!XN@bkC#+ygzJh9vql~rF98_>Oad00e4xn1&ec;G7 z)d!AT<9W^+S8#t=!M1z_)fz_`d|QGF?jsKFTZsdx)_5N{Ql36=^!lCw`_=1vF3Ng+ z)AsfHCXQa;EVp?6DAxz^x1=reM>sd`jGX``q1>bJ`~@V z<@(TcxIT0}*N3jBP8Wl|Uf;_po3p+tn_l0<(d(P#Hc$5X^iRZZ^(>R}6Tb~(q5P62 za40|P_4<}{9QkFvBfq9k`4!(G=hHuFI+S16Q+{3F{aHSCLT&BJaeHqYA059nlz2iu z%kdt)eFUH7W4XQ`J?2?{+&)XY_8FcwyngvL%j-|%J;VJhKQ52YST4NZ;Mv4yYV<&@ z824J>k}c*}05h!n@q#_>wF%e%z-$1_S@)KCE8vVTA2zoE&b@QEuMu$Z*Qfjb7I67v z*ZJ5#efA%H?0?Z!ANbfmv*56g{iobC#Lxb->d*DF|K)cN_p|?%-^li}|EeeR{Omt! zp6Y+`D%F2dk?KF~X4QZ8t*ZaR`&9p{9#;LY{l4md!HPdhzLPf`_L1M5d(V}8uKr%O zpU?an)D0bfow)&!W8DbIv645(O4&J9%FeM;c8-;_I9B#QZ_>*?_D`SvnveZ2y6P=C z_RIn$`;?oM?6c~X?3drIWWVwo=la=ySgz_n>T=cp;uWg@q!QJC+6L8s_HC;F!h2Q! zs~%DPN6dR!@*Q9Fmelnnw|*dXY&|^0&v%y3`xxnuKks9d>k|H$c3J!JVL8tYXN>SS zj`-7rqjfjDRy%4g_GMCDeefmh%ShZUNjtCk-CNR5*}vh8|DHgdsk$3pt3kiR=+}gP z_sRO}lD2cQ{q%gCY`+|gFP1rLVtdZE1NUWSPONRmy*E^Mq!#yp0=_+JJN7KY(Mt54c0D z4EtlJ{0VTOhqNKyG4_yA!BnoH6UF7&PZU?rX<%;p3xStXc8>%c2^a(n0*(e8{Zi`g z48RQB2dt^~7?j7Le7-f^9*go=lt)@K>~ScM18%xC)1H9x1k?wuupL4$w z>c?73ZR&9-?uWkA5jz*~T);HIG{E72!(Zx*squBhhFW87HO7wE7p-x&8YlW$6Kpls z&e(ZY$X4S89V^QjZ%1sHmFD>GqJ7hB8hFv?++$63qm%TT@yb~oNyY0pJ@F6t*( z*Vyw>o{#cG>pFV@$_r2qSvS~=P+kQ5OzTVbVw4x7KFi9tm!P}^ps z?PVx01O8;I*j|qEa@6NoYweXNuS7Z5y3xJ{=1n`)a*(#JIlH zdg+L9eW~@*8RPm=>!l;c^`*wy5##z&GOdza+YCEw`y$t&1-^YaGnvk z#M)>-ihh(w?mIxX71&!mjP{`ujR5_n2_M0B>@}Xh0^jnK3wmoA3c5qb8z%iI^Yt+@2l|+ zD)q|#*k|pDx0~N#;7fj-KkUVhgdT!(40DFH$xcUk^yP+mskH^~rbp*XU?0Q}J!V{C z1ZG;bb`W?OmSN7aYu$s3?; z(pRt@`})dPfG*a0GyN3pQrf7h+fBL97U!Ig!JN}(=_@!coCPRfp>X!lhiTh&`wH5{ zSl9*an7)Ez!r6iH6%KE=eAEeeCV9K%qOXv;hV0T;IK18RjgWi!kG8wzp`RdM&V}?7 zC&`z7Lh4lei5~ds{X{ok`U}YybI5&0`V6kuEUeds)=pcWZ;pZPvep>pB5RlQ9eV!s z*{Jp**{~bSYO>wupckQUc8Oswwwk3sQ8u&ld~-*rfj))yk^_6W!n)rc3*4M_hPlMr zEq#jCC4CCz%7a{2S`XS6123<_Fqc|!=~FaKe$&%_r$UaatcUDzz?)ian9Hm^b^^Xf z)1yD3Z`3}D{)BTp4mL ze(77Z{^?V~z?*-!5xCy^JNX{52sXpHUyQluyNR0Xm*u+&c`wUoisuU7znJ zS|{`)q`3?-((f#X-=Qt8bm(3Kx_n1T(uF@E-RnR%6LMa5n_+T~iqA9cfB2rF>C>;| z0e?F1>3`@~7|TKbsr}GYv{Og4ecI(^uuIP8T-X?GXMQQ(aR3*r$NM_qBG?jr&|>%? z;xBRVmpb^%fKMOGHA5fcwm-L=Zn-`ydFXdeC3)zJ8e!k+U8wWz-FSD%g6`(tiEj|V z`Cm0mAv)h4hHttcAlh4KCx&Ame#|R<(k}V{=e>z|z~4u^{rr0CAgY0b3TX2tI z@|%i2fbU84Nnzmc!ut%Lb@W?&mT{f1elXrE@>RQhr#bA*^%2Z5eG_Hpdz1D}oKyOm zA>I$`=$qJ%^U{(0#v&ci+jEZha2=n6Tx0aDv|-xQVb~A-6UTycRpp<0kxe+V1$&W= zeu`tnne0gVDdLg0TRyiO^iy&kAS?Y;xEH>xKb}tLugF)wH|6)im;OrX(D7H3p^ruP z8HkR>9AoY0;C}Jf@%;g~Bt|_s^c}yIhx(=8#5V`vvPYbL@HJC$zx*-7xBl@pt zs9*VQe18C5^IgNd1n|1=8>ZK1`su%vT?`q?_n>!iPB8o#Z{U}J=1;tVU-AAqZ{RnC zhr9v$F|9NDu{2+nzk_Wrc?18+KCgNM|Hb<^=}%m=;lqB<`k#3N^j&;j(%$KxY47yE zv{U*qO`m>@{*JnlwgO*7U&ggU{cx>NKeTb`gLT}Jx5890Bt z;fGs%mjV_(f4^@gVA|f_OPxilU@CYCKe|6MkAN$`oX^M~i3x1;dm%gt0-}GzM zfAjyU{`Ef9|Bmxi|GO_#{l7Lv^ zzUzMdmelo^eg7-@+~v_EC=2UUF=Vw3Gl#SO2`z?u71SPO5UU-Pup>EbFp zhZXMA`V61AJjb8$q#%@&?4Pc@UL<#7q3p=i{~BNy3^OO z)WnrzfqV(RMEhaAkq z6BxIaeZ>>p3sxTT> zfzNmdmUT=-0x=O<-Xox`<0FQPb6$(Rl#;<1i6JQ8_fp42jFZr1zB6drd_U0jv~$+C zElg$SQ5R2R>nk4`sy|&!+jARr($jAfuwVHEY@7B$*%^m?y!c}| z@1VusOVIZ9c-+685U`sYaQ1xz&cVL~_$9!P9N$GA0ydhD`h3){23(CZ@nlJ&IQ^V4$ECT~RBji42=+U+uw%Yc_}9h7)UdcZ43 zTRGYatY_snJlo>e!j@+!UtEdyO0=)G{@!Mt^v9~LtqN^b;IYQ~2V45^YadDvDx9Zl z&{l(fh1NgX8&Tefa*_2Rxtz}v6aAGi5QqL>ZSiu?X4=7pd+T@ z2s&d|>GcR%+o&KTE+-n&vZ+Y#eB;d`jtKHTH%ZCvhg^d_%bZ^z2#md7mz zpM|G_9LeWgDeMKmGkO3%*oH0Gu!U0VWt^qTy5#SbqG*ev?MCYr=^NTgA4=!%lhAM9 zRxT6>gDcf{zc)R;PA^i|p?w8F1RpKy}$XkX9*Uq@fS@k91K zZkgP?wJ&IeeDnn;JAUm0TES2HfIj$XAJ9gB4l+V=>hL9wB3cao2*~jJ5b(%a+UQf`An0~D$>K+ z-ifxIpjU1E#@>bUF5uNz9Y>#iqzAkvv^Ak^gY}aRD0}RT(cjS*=(rSRn;kLwI;ESA7;RnYrXxn%R=Vhn z(Uz4iI%2eCHTI4eb*aYQ5u+~ESUY3X9es@3-rRD!W%{(`ppQ8vB0ef{f{LnBrTYr@2WkX4q z`YzI4Daw^;8b&+vIplK&GU@MY@*d4^AUy2tx0f7}Sr=D8QmcK)_d z#``$GVO$Qn+$W{&Q!cFgcd+aAmyDHQzG=Todjy{FUg- z{Z+0Hp5b#${QaVg_i=c0A5+WMowr*q?oCTwLw0$F@9=iZ=l+ck&t$YrJjW+*eSRiR zk~jCKrCu@5GTx^L-fn(8znA=+{pl}35BWbf%v$S1JA(4+pBm;@ti$qLP0yD;t1moc z1Z?XQJ0Ez3&l_f)bwr-eD|=WL;GT2=+KT_hFr$`09Q?&Eti^d*y=ByKFIww}d(o?b zxAtERbE{?6(N1o}dC4u7zmEIRNqX3aUIV-vUpLGKE2WP6cB^pD{c!p&`XEh{{)cB` zv|;)(`k!lnOS`52x)x>HC+(Z_nvb(F+Vgd&=lE&2)YEF5pU_^fM?G~xz0kk7?ZqvF zo9Ca4AN|N@$&bEb0&K9Sc%K}u3+LQQ;5U$1>v5i^mf`HQxH8_S9CmS=byl5@4J<;x@?RL{?N(|XV*@Eq_jsQ|oIlYv=_}aI z-|L+G{YF7~Hdc%|r_It=a9sTDuk;lPXAgatwq3Wcpk1tmUC@r{ zD>x?pR#^H9hd1p&%h#Q^TQ2$vscXnCeTBoDHmT*4zQXx!pIavS3i8%xSDX)wvH3)K z(^p8nYG2U>M%I-AZ{zorqTod$2K?HO4YSFbApH{8e=_FB`7JfaUWK`$ZIYjS*HEyP&mnD} zJXfcz-D7@gFrTzf`X{#YchvG+UD4Q2->B`j2D~=@LD?z&5#{G^t!2#5Ny@{sbm^zz z&t%Mx!;ki&WpeYT50biud@|C|So&5AWi~j9;Z~W+Yq`vqZLCjAr07k^`EC*g-nPNsz}Kxj z`7W^?Hp6FBBj%pIM9uZf^8JLo@A?SuF@QV%)HUWu>x6!UG-;pMGT{@V5Y;{zvbd(0^(_v`w+;Y0* z`mE%k-#L}!p)Xnt`R-Em%Z%uf~Q-F}W&#{AF+Bx8Pn&oc-<>*%-mEaN%}qn_sk`KrBZ%n#<6zKOEC zV}3ZN^ffx>hwaRfAbpdfCu4s29ON3KZ>0^>o^;F)$HH6+(m(Yg8+9VjFnW=Veu`sc z9tQd;>IC?E-10HzN6SG!CFcRM%9x*C`1Yj}`YZC)?@iR*@%YkTNgX==sv7#(ex5fO z^HYQS#t~lpW`F(bccoKL4t>XOZAAT!3%usdfIBa8`oY(1!u_uCUK#VF=ZOAmGwPc% zz2+9c=E+|3%YgS!@g`$_lwAywF^eY>^P_ckA~8Qn+TCJ)_`IaO(?8SR>3?ab^kbSn z{TTfnbt7#BzKFhzYlZsZTA_Yu#=*D4SA0D6!zO)xdG*Q@ly|_i$ZnI%yrCuyq9Op zs$RzYdRc!c zJUnOjG1o`F>eEVyIvMk$`|ICKN~O)%y*KE`2o$IVt#6Vr(%AR z_;{8`8vxCoVt$Cv^`Y^TF+Y?U=RVzIiim$q%nxYy8S?{r{fhYkZN48I6Z7-ju{PcI z9Bg`14>3QelkeTCt!vEBbB;YLoW5gzz&B~r=)?EzE-^o_c^UJAHrQ;}m>-27&sS}| z$NXSz$}>+F{bbCKLpSKq>JsyVwJGBsdZDjlWVB7ULRM+hJ@5%Td?+i%*fr({e55`1 z!6$jfNuBf*^TQlw$Ss@@j$FZBN{ z(P-R{Mvfxp9ZJUiQ0KtSKwAcM-8~OcGVTX78TT_5cw>Rb_{U@7en<;A%ASwSSH$l* z8TUh4Nqasve-ZJdZId4GlXNxc!f4Yui0B*Iev7ytg|qK6wDpveNc)DC-(oJ|8+7iFJmBZSKA0nj zeB`;9f^lWHDrb=P39XP(`-JJ}J014PJVM$hw7_RbpP*>$Q8s;Sej)7>TH)8EPf+;H zCn}Hj1ugJ(GVaIW=a$LMTl<1m$S31|PLiMY0j=OCeLx@lv=3;bzlRT)4_)%YfHY4yoj`kMw}I5_mo3luADPNDp>; z9q`;arj#wJJW<*YP{%s%N7!dM?8bmaUSlt4(eIQZ@6P8Xq!5A zr){1tveHgZ7g@E<(sp!gf;$$0w$oG0K`rWgI`igvU{7b<$2-#|E%zCAUuV_C?|wE- zyVS7`?$`#-Lr*aSr_%YXj*&TC<)^)$&idDDp!=CP{cJMtg%e}Z(^+?4=c#?2pC0eL zw6F8gse^2ZxY2)MMDeA?%5clm}-Zkf)XBGVZe` zp3mT3{(mPtOX2<`A?WPOSNF+tAljpFa0rPLbZ`i9zhBDZ;DjBVd0X!I1A0#Jv*J=6Ob#hK|4j^p|P}-QfZ{ZmQ?v;(9ENu)By5FtO6$o*!Y|OzC4i5XHPn?iA za!wtbu!EED;1Ci=%7cAaX=8xW#?*Zq_m7h{hO)FVK=e!QdFQKp^?&L(IKshUfAnc3 zL?2~i4o=v?$#-xFiPOc#-gImXds})Ao3Vc)Z31n24x631j$^L%95y?1%{^;9hjFKl z`^kC^-FSKq-F12n-E~PD`>kVR*f-SjabJ+~aeq(C$GtqR0q)Uh`M6Id@zAE_m0%(QQuRpTE2t#l$(}0 z7j-^QzN_e5w9L7v^E#5g&OLhp>UB;>e*5tqK<6hOiZ-2x_=_m(yu*C2)A@zZMZL}` zd>+a=Pc7fEbza&u)c?71(w@rlalN0)^67V4+K$d;n}NB}xjShuI!En=sMonWX)ikW z=@``O+?}M8jW}WEk7TYroiCDfau83<+>m2YuX8|>PTs&{bCr_LRM6?3n^f;bWFWqm zIdHjNbgo<^pJzZqgHcQ*lxlzYsUUe>1 z+D=b7PiLUMr<|KJQQuRJ%`obF%c)6P?i`xLJzlO%;&;!DNxRg!R_Sx~TH!qOl(RGk z^*UcF*P_m=nTL9vN0V#uv&x@&I?GRcKb_^*Yak2r?as;BWiMj0%JJG$u1)44JYGJ> zJhUG#f8+7;HU7EwBKVwe=l!IAPwqvOs@$(Sr>8q-CVhcBcju{=50T$~pVzT*U4351 zCcn#&H&uQo0?O}1b>G4~uF3C2l<|$#+&w3&&bcbT{T&N^oR*yML2p!`l$_iaISul6EP2JM#aIo)*rHYtyTBLG1=nO}}{_zlQ; zW&h*R>GGX8&G}B8cJw=px%T9DBHHwK81v-Gy@-pCeuptvo5Vw#{tjc#Io2_koc<1T zo59758O@*o$Kv@t+wW9q(*IjnH6Yz$>-8-VDyFJIla65?L%ML0OZ z!C`;&X(c3%l*hpdJ2?3c4k2;6*x0p>ja_?m4w)B>I%f_tJ%`NUC2b6It>=*Ws3acR z^c*tx73-MOO3$GiPtT#dPS2scE@@+>j*XQbm5;fJq)nhr%g1~|(#9|jEgy5`NIbM@ z`Iwi7bK=Q{|x(F6HQ^P2`PGn-Pb@->-0GiuJ55shAY=~g3dHknItGzc@>;Qc6%FfO^oN{EJbfuL^_KaV!2Ut%4^eHeN%r;k@{13@j^_5Pr^wcQ7P z2cxQ9^V=$>PU=FBalu0ni!ybZRWEk8N0AFKDjsZ4h!}VZ#HqM=qiqJ-E=^o|XCjgC zq$kp$U2){n^z_E4h@;;(a6N?Uo4EGidKlNYa6QsKDlrOugczL|jXsG4?i)c~jE;_O z?5?ZJ{M+jNasM6A+IHfz-vx;aWIz87v%wrax)JbJ^MVT+S?{!i-VTwG$dGs$@r=f$ zkUa`Ln2^Jl@JYNei7~Q$jBiZiuGDyYy=Vc>qqz2B%#R&AX00FK%mmJ?#4ONBc*ZBj zgN`^dYy5bOeINQij_V0r-^SI7>q%VS!S&ttiHV7lcPJ5(ym1d%O@aSg`nY2!KF5}s z$dvu$*h3*T_RP#i);sNzUREMY;$_9N8XJ;he=<4t>_oO~&-P_w?E5fQ^?#3(yWIj?ztoMM`}DXG5PL8*LKH=psQIl0u+m`mM!&YQ+u>gFMD8uO-`FL~3P zyy|YyLi`r_s3rGZ-Br2saO0G@K?`!#lHjo=3l(xSAZS=>J`5R z{Lk0D;x~Z*^|n|12jK62;T5x?iz67{eHh=o*~q8*r^LVnlJSTzV&^tcnk2IA9}^x zfImOr74HC22R{R(4t@?u9sB~2I%twHDQb-OH}yTnz4xDfWBeW75A{6$&+qykW8!~K zzcIRF2|IZ%DQe$-;9VhtgufEv{Z3VP?;EP_-gksJ{Qj|Z3H-k?sH(&%XRa^J z$;!$sUca^=Qhak^qjLX7HKKLfXe z1*{-d)rWO+sJx=4NQ4SEmsf9IFG3~d8$xRfs@DqVU)I-DiBMHVVL?rS2-OsAs*%_Q z>r2<5x~QzGB2rmUSXf$Kg8HJ>8%iQ&rRAiUEJq{-XmEW|IXZ(+O~GowwWwDm1;xot zUc{QWY{|UEk@+i@%=!Q58d_HUl^K%E;#U&Gj)T4dL8Ln z_T}2t-Sm3Z5I^&ie_XHRm7;Ew>zK>&gG8c2_t)!H!wd&sx9k2+wZOMD*6mULdVOmc zbP#oaEx(58X!pAMYk#7l_9qS*IR;%Oya*S^rRi&X)i8`U_g~n3gK{P=_SgO{xeoB* z&cEp03MT(0vg7V)qb+7;J z#$H^3hVFI5)&0AhXm{Leo&H(u)!usPXFC~^shpDa_(HX zQ}?Kwe5a0fN<6JU{T!lwXx%xe5;Qyg@iElpPs^<<8&K$}f5yWK;}(tR!0!G34|^5} ARR910 diff --git a/hsa/gfx1250/fmha_fwd_f16/fmha_fwd_f16.csv b/hsa/gfx1250/fmha_fwd_f16/fmha_fwd_f16.csv index 514854aa66..19b465cfab 100644 --- a/hsa/gfx1250/fmha_fwd_f16/fmha_fwd_f16.csv +++ b/hsa/gfx1250/fmha_fwd_f16/fmha_fwd_f16.csv @@ -1,9 +1,5 @@ -dtype,hdim_q,hdim_v,mask,border,knl_name,co_name -bf16,64,64,0,0,BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_rxy_sink_KERNEL_FUNC,BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_rxy_sink.co -bf16,64,64,0,1,BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_brd_rxy_sink_KERNEL_FUNC,BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_brd_rxy_sink.co -bf16,64,64,1,0,BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_cas_rxy_sink_KERNEL_FUNC,BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_cas_rxy_sink.co -bf16,64,64,1,1,BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_cas_brd_rxy_sink_KERNEL_FUNC,BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_cas_brd_rxy_sink.co -bf16,128,128,0,0,BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_rxy_KERNEL_FUNC,BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_rxy.co -bf16,128,128,0,1,BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_brd_rxy_KERNEL_FUNC,BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_brd_rxy.co -bf16,128,128,1,0,BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_cas_rxy_KERNEL_FUNC,BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_cas_rxy.co -bf16,128,128,1,1,BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_cas_brd_rxy_KERNEL_FUNC,BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_cas_brd_rxy.co +dtype,hdim_q,hdim_v,mask,knl_name,co_name +bf16,64,64,0,BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_brd_rxy_sink_KERNEL_FUNC,BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_brd_rxy_sink.co +bf16,64,64,1,BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_cas_brd_rxy_sink_KERNEL_FUNC,BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_cas_brd_rxy_sink.co +bf16,128,128,0,BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_brd_rxy_KERNEL_FUNC,BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_brd_rxy.co +bf16,128,128,1,BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_cas_brd_rxy_KERNEL_FUNC,BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_cas_brd_rxy.co diff --git a/op_tests/test_fmha_fwd_f16_asm.py b/op_tests/test_fmha_fwd_f16_asm.py index be4d280fa2..26d737d2dd 100644 --- a/op_tests/test_fmha_fwd_f16_asm.py +++ b/op_tests/test_fmha_fwd_f16_asm.py @@ -1,11 +1,18 @@ # SPDX-License-Identifier: MIT # Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. -"""Correctness + performance tests for aiter.fmha_fwd_f16 (ASM path). +"""Correctness + performance tests for fmha_fwd_f16 (BF16 ASM, gfx1250). + +Public API: aiter.flash_attn_func (preferred) +Ops layer: aiter.fmha_fwd_f16_asm (low-level, ~v3 style) + Layout convention used in tests -------------------------------- -* i_perm=2 (sbhd): input q/k/v shape [s, b, h, d] ← kernel default -* o_perm=0 (bshd): output shape [b, s, h, d] ← kernel default +The aiter API only accepts bshd shape ([b, s, h, d]). To exercise the +kernel's ability to follow strides for sbhd / bhsd memory layouts, the +test allocates qkv in the chosen `layout` and `permute()`s to bshd shape +WITHOUT calling `.contiguous()` — the resulting tensors are bshd-shaped +non-contiguous views whose `.stride()` reflects the underlying memory. Sink convention --------------- @@ -41,120 +48,108 @@ # --------------------------------------------------------------------------- -# Layout helpers +# Reference implementations. Inputs accepted as bshd (matches kernel API); +# output `out` is bshd; `lse` is [b, hq, sq] (matches kernel layout). +# +# NB: we intentionally *do not* use `aiter.test_mha_common.attention_ref` +# here — although it's mathematically equivalent (sink as virtual KV with +# zero V, see test_mha_common.py:584), running it on gfx1250 + ROCm 7.13 +# triggers a downstream driver wedge that causes the *next* ASM kernel +# launch to hang. The pure-einsum impl below is hand-derived from +# fmha_merge_sink_rowwise, runs reliably on gfx1250, and produces +# bit-identical numerics to attention_ref(... .float() ...). # --------------------------------------------------------------------------- -def make_sbhd(*shape, **kw) -> torch.Tensor: - """Create contiguous sbhd [s, b, h, d] tensor.""" - return torch.randn(*shape, **kw) - - -def to_bhsd(t: torch.Tensor, perm: int) -> torch.Tensor: - """Permute a 4-D tensor in `perm` layout to bhsd [b, h, s, d]. +def _ref_attn(q, k, v, *, is_causal: bool, sink: Optional[torch.Tensor] = None): + """bshd-in / bshd-out attention reference, sink optional. - perm code: - 0 = bshd [b, s, h, d] - 1 = bhsd [b, h, s, d] (no-op) - 2 = sbhd [s, b, h, d] + Math: attn = Q @ K^T, scale = 1/sqrt(d), + denom = sum(exp((attn - max) * scale)) [+ exp((sink_raw - max) * scale)], + out = (exp((attn - max) * scale) / denom) @ V, + lse = max * scale + log(denom). + sink (optional): [hq] fp32, AITER post-scale; converted internally to + pre-scale raw via × sqrt(d) to match kernel ABI. """ - if perm == 0: # bshd → bhsd - return t.permute(0, 2, 1, 3).contiguous() - elif perm == 1: # bhsd → bhsd - return t.contiguous() - elif perm == 2: # sbhd → bhsd - return t.permute(1, 2, 0, 3).contiguous() - raise ValueError(f"unsupported perm={perm}") - - -# --------------------------------------------------------------------------- -# Reference implementations (inputs/outputs in bhsd) -# --------------------------------------------------------------------------- - -def ref_standard(q, k, v, scale, is_causal): - """Standard attention, no sink. All tensors bhsd.""" - b, hq, sq, d = q.shape - _, hk, sk, _ = k.shape + b, sq, hq, d = q.shape + _, sk, hk, _ = k.shape if hq != hk: - k = k.repeat_interleave(hq // hk, dim=1) - v = v.repeat_interleave(hq // hk, dim=1) + k = k.repeat_interleave(hq // hk, dim=2) + v = v.repeat_interleave(hq // hk, dim=2) qf, kf, vf = q.float(), k.float(), v.float() - attn = qf @ kf.transpose(-1, -2) * scale # [b, hq, sq, sk] + scale = 1.0 / math.sqrt(d) + attn = torch.einsum("bshd,bkhd->bhsk", qf, kf) # raw, no scale if is_causal: - # Bottom-right causal (matches kernel / poc_kl fmha_causal_mask): - # mask out k > q + (sk - sq) for row q in [0, sq), col k in [0, sk). - # When sq == sk this reduces to the standard lower-triangular causal. - m = torch.triu( - torch.ones(sq, sk, dtype=torch.bool, device=q.device), - sk - sq + 1, - ) + m = torch.triu(torch.ones(sq, sk, dtype=torch.bool, device=q.device), + sk - sq + 1) attn = attn.masked_fill(m, float("-inf")) - lse = torch.logsumexp(attn, dim=-1) - out = (torch.softmax(attn, dim=-1) @ vf).to(q.dtype) - return out, lse # bhsd, [b, hq, sq] + max_attn, _ = attn.max(dim=-1) # [b, hq, sq] + if sink is not None: + sink_raw = sink.float() * math.sqrt(d) # [hq] + sink_raw_bhs = sink_raw[None, :, None].expand(b, hq, sq) + max_total = torch.maximum(max_attn, sink_raw_bhs) + else: + max_total = max_attn + denom_real = torch.exp((attn - max_total.unsqueeze(-1)) * scale).sum(dim=-1) + if sink is not None: + sink_term = torch.exp((sink_raw_bhs - max_total) * scale) + denom_total = denom_real + sink_term + else: + denom_total = denom_real + probs = torch.exp((attn - max_total.unsqueeze(-1)) * scale) \ + / denom_total.unsqueeze(-1) + out = torch.einsum("bhsk,bkhd->bshd", probs, vf).to(q.dtype) + lse = torch.log(denom_total) + max_total * scale # [b, hq, sq] + return out, lse -def ref_with_sink(q, k, v, scale, is_causal, sink_post_scale: torch.Tensor): - """Attention with sink mechanism matching fmha_merge_sink_rowwise. +def _cmp(a: torch.Tensor, b: torch.Tensor, *, rtol=1e-2, atol=1e-2, msg: str = ""): + """bf16-safe wrapper around checkAllclose. - sink_post_scale: [hq] fp32, AITER post-scale convention. - Internally converted to pre-scale: sink_raw = sink_post_scale * sqrt(d). - The sink adds to the softmax denominator with zero value contribution. + On gfx1250 + ROCm 7.13 some bf16 element-wise GPU ops (isnan / isclose / + contiguous) deadlock when invoked right after a custom ASM kernel. The + deadlock is unrelated to fmha_fwd_f16 itself (it has been reproduced with + pure-PyTorch programs). As a workaround we cast both tensors to fp32 on + CPU before comparing — this avoids triggering the buggy GPU bf16 path. """ - b, hq, sq, d = q.shape - _, hk, sk, _ = k.shape - if hq != hk: - k = k.repeat_interleave(hq // hk, dim=1) - v = v.repeat_interleave(hq // hk, dim=1) - - qf, kf, vf = q.float(), k.float(), v.float() - attn = qf @ kf.transpose(-1, -2) # [b, hq, sq, sk] (pre-scale raw) - - if is_causal: - # Bottom-right causal (matches kernel / poc_kl fmha_causal_mask). - m = torch.triu( - torch.ones(sq, sk, dtype=torch.bool, device=q.device), - sk - sq + 1, - ) - attn = attn.masked_fill(m, float("-inf")) - - # Convert sink from AITER post-scale to pre-scale raw - sink_raw = (sink_post_scale * math.sqrt(d)).float() # [hq] - # Broadcast to [b, hq, sq] to match per-row max - sink_raw_bhs = sink_raw[None, :, None].expand(b, hq, sq) # [b, hq, sq] - - # Compute softmax max over real tokens - max_attn, _ = attn.max(dim=-1) # [b, hq, sq] - # Effective max including sink (pre-scale domain) - max_total = torch.maximum(max_attn, sink_raw_bhs) # [b, hq, sq] + a32 = a.detach().float().cpu() + b32 = b.detach().float().cpu() + checkAllclose(a32, b32, rtol=rtol, atol=atol, msg=msg) - # Rescale numerator (O) and denominator (sum): - # row_scale = exp2((old_max - new_max) * scale * log2e) - # = exp(( max_attn - max_total) * scale) - row_scale = torch.exp((max_attn - max_total) * scale) # [b, hq, sq] - # Standard softmax numerators (rescaled) - probs_unnorm = torch.exp((attn - max_total.unsqueeze(-1)) * scale) # [b,hq,sq,sk] - probs_sum = probs_unnorm.sum(dim=-1) * row_scale # wait, already accounted for - - # Re-derive carefully using max_total directly: - # exp((x - max_total) * scale) for each attn score x - # sum of these = denom_real - denom_real = torch.exp((attn - max_total.unsqueeze(-1)) * scale).sum(dim=-1) # [b,hq,sq] - - # Sink term: exp((sink_raw - max_total) * scale) - sink_term = torch.exp((sink_raw_bhs - max_total) * scale) # [b, hq, sq] - - denom_total = denom_real + sink_term # [b, hq, sq] - - # Final probabilities for real tokens only (sink value=0, so no contribution to out) - probs = torch.exp((attn - max_total.unsqueeze(-1)) * scale) / denom_total.unsqueeze(-1) +# --------------------------------------------------------------------------- +# Layout helpers +# --------------------------------------------------------------------------- - out = (probs @ vf).to(q.dtype) # [b, hq, sq, d] +def make_qkv_bshd(layout: int, sq: int, sk: int, batch: int, hq: int, hk: int, d: int, + dtype=torch.bfloat16, device: str = "cuda"): + """Allocate (q, k, v) in `layout` memory, return **bshd-shaped views**. - # LSE including sink: log(denom_total) + max_total * scale - lse = torch.log(denom_total) + max_total * scale # [b, hq, sq] + The API only accepts bshd shape ([b, s, h, d]). But the kernel reads + strides directly via `tensor.stride(...)`, so the underlying memory may + be laid out differently. This helper allocates contiguous tensors in + the requested layout and returns a `permute()` view (no `.contiguous()`) + so .shape == bshd while .stride() reflects the underlying memory. - return out, lse + layout code: + 0 = bshd → contiguous bshd, strides = (s*h*d, h*d, d, 1) + 1 = bhsd → underlying [b,h,s,d], permute(0,2,1,3) → bshd view + 2 = sbhd → underlying [s,b,h,d], permute(1,0,2,3) → bshd view + """ + if layout == 0: # bshd allocation, naturally contiguous + q = torch.randn(batch, sq, hq, d, dtype=dtype, device=device) + k = torch.randn(batch, sk, hk, d, dtype=dtype, device=device) + v = torch.randn(batch, sk, hk, d, dtype=dtype, device=device) + elif layout == 1: # bhsd allocation, view as bshd + q = torch.randn(batch, hq, sq, d, dtype=dtype, device=device).permute(0, 2, 1, 3) + k = torch.randn(batch, hk, sk, d, dtype=dtype, device=device).permute(0, 2, 1, 3) + v = torch.randn(batch, hk, sk, d, dtype=dtype, device=device).permute(0, 2, 1, 3) + elif layout == 2: # sbhd allocation, view as bshd + q = torch.randn(sq, batch, hq, d, dtype=dtype, device=device).permute(1, 0, 2, 3) + k = torch.randn(sk, batch, hk, d, dtype=dtype, device=device).permute(1, 0, 2, 3) + v = torch.randn(sk, batch, hk, d, dtype=dtype, device=device).permute(1, 0, 2, 3) + else: + raise ValueError(f"unsupported layout={layout}") + return q, k, v # --------------------------------------------------------------------------- @@ -183,75 +178,160 @@ def test_fmha_fwd_f16_correctness(batch, hq, hk, sq, sk, head_dim, is_causal): device = "cuda" torch.manual_seed(0) - q_s = make_sbhd(sq, batch, hq, head_dim, dtype=torch.bfloat16, device=device) - k_s = make_sbhd(sk, batch, hk, head_dim, dtype=torch.bfloat16, device=device) - v_s = make_sbhd(sk, batch, hk, head_dim, dtype=torch.bfloat16, device=device) + # Allocate in sbhd memory but return bshd-shaped views (kernel reads + # strides directly so non-contiguous bshd views work). + q, k, v = make_qkv_bshd(layout=2, sq=sq, sk=sk, batch=batch, + hq=hq, hk=hk, d=head_dim, + dtype=torch.bfloat16, device=device) scale = 1.0 / math.sqrt(head_dim) - # D64 → non-zero sink (exercises ENABLE_SINK code path) - # D128 → no sink (kernel ignores it) + # D64 -> non-zero sink (exercises ENABLE_SINK code path) + # D128 -> no sink (kernel ignores it) sink = _d64_sink(hq, device) if head_dim == 64 else None - # ASM forward: sbhd in → bshd out - i_perm, o_perm = 2, 0 - out_kernel, lse_asm = aiter.fmha_fwd_f16( - q_s, k_s, v_s, - softmax_scale=scale, is_causal=is_causal, - return_lse=True, i_perm=i_perm, o_perm=o_perm, sink=sink, + _r = aiter.flash_attn_func( + q, k, v, + softmax_scale=scale, causal=is_causal, + return_lse=True, sink_ptr=sink, ) + out_kernel, lse_asm = _r[0], _r[1] - # Reference is always bhsd-in / bhsd-out. Convert kernel I/O accordingly. - q_b = to_bhsd(q_s, i_perm) - k_b = to_bhsd(k_s, i_perm) - v_b = to_bhsd(v_s, i_perm) + # Reference: bshd in / bshd out (matches kernel layout, no permute needed) + out_ref, lse_ref = _ref_attn(q, k, v, is_causal=is_causal, sink=sink) - if head_dim == 64: - out_ref_bhsd, lse_ref = ref_with_sink(q_b, k_b, v_b, scale, is_causal, sink) - else: - out_ref_bhsd, lse_ref = ref_standard(q_b, k_b, v_b, scale, is_causal) - - out_asm_bhsd = to_bhsd(out_kernel, o_perm) - - checkAllclose(out_asm_bhsd, out_ref_bhsd, rtol=1e-2, atol=1e-2, - msg=f"out mismatch (d={head_dim}, causal={is_causal})") - checkAllclose(lse_asm, lse_ref, rtol=1e-2, atol=1e-2, - msg=f"lse mismatch (d={head_dim}, causal={is_causal})") + _cmp(out_kernel, out_ref, rtol=1e-2, atol=1e-2, + msg=f"out mismatch (d={head_dim}, causal={is_causal})") + _cmp(lse_asm, lse_ref, rtol=1e-2, atol=1e-2, + msg=f"lse mismatch (d={head_dim}, causal={is_causal})") def test_fmha_fwd_f16_ops_layer(): - """Direct ops-layer call (sbhd in, bshd out, D64 with non-zero sink).""" + """Direct ops-layer call: bshd qkv (sbhd memory layout), D64 + non-zero sink.""" device = "cuda" torch.manual_seed(0) sq, batch, hq, hk, sk, d = 128, 1, 8, 2, 2048, 64 - q_s = make_sbhd(sq, batch, hq, d, dtype=torch.bfloat16, device=device) - k_s = make_sbhd(sk, batch, hk, d, dtype=torch.bfloat16, device=device) - v_s = make_sbhd(sk, batch, hk, d, dtype=torch.bfloat16, device=device) + q, k, v = make_qkv_bshd(layout=2, sq=sq, sk=sk, batch=batch, + hq=hq, hk=hk, d=d, + dtype=torch.bfloat16, device=device) scale = 1.0 / math.sqrt(d) sink = _d64_sink(hq, device) - i_perm, o_perm = 2, 0 out_kernel, lse_asm = aiter.fmha_fwd_f16_asm( - q_s, k_s, v_s, scale, False, True, - i_perm=i_perm, o_perm=o_perm, sink=sink, + q, k, v, scale, False, True, sink=sink, ) - out_ref, lse_ref = ref_with_sink( - to_bhsd(q_s, i_perm), to_bhsd(k_s, i_perm), to_bhsd(v_s, i_perm), - scale, False, sink, - ) - checkAllclose(to_bhsd(out_kernel, o_perm), out_ref, rtol=1e-2, atol=1e-2) - checkAllclose(lse_asm, lse_ref, rtol=1e-2, atol=1e-2) + out_ref, lse_ref = _ref_attn(q, k, v, is_causal=False, sink=sink) + _cmp(out_kernel, out_ref, rtol=1e-2, atol=1e-2) + _cmp(lse_asm, lse_ref, rtol=1e-2, atol=1e-2) def test_fmha_fwd_f16_d64_requires_sink(): - """Calling D64 without a sink tensor must raise an error.""" + """Direct ops-layer call without sink on D64 must raise the C++ check. + + Note: when going through aiter.flash_attn_func, the dispatcher auto-fills + a zero sink for D64, so this error path is unreachable from the public + API — we exercise it via the lower-level ops stub. + """ device = "cuda" - q = make_sbhd(128, 1, 4, 64, dtype=torch.bfloat16, device=device) - k = make_sbhd(2048, 1, 4, 64, dtype=torch.bfloat16, device=device) - v = make_sbhd(2048, 1, 4, 64, dtype=torch.bfloat16, device=device) + q, k, v = make_qkv_bshd(layout=0, sq=128, sk=2048, batch=1, + hq=4, hk=4, d=64, + dtype=torch.bfloat16, device=device) + scale = 1.0 / math.sqrt(64) with pytest.raises(RuntimeError, match="D64.*sink"): - aiter.fmha_fwd_f16(q, k, v, sink=None) + aiter.fmha_fwd_f16_asm(q, k, v, scale, False, True, sink=None) + + +# --------------------------------------------------------------------------- +# Memory-layout tests: API takes only bshd shape, but the kernel reads strides +# directly so non-contiguous bshd views (backed by sbhd / bhsd memory) must +# also produce correct results. 3 layouts x 2 head_dim = 6 cases. +# --------------------------------------------------------------------------- + +@pytest.mark.parametrize("head_dim", [64, 128]) +@pytest.mark.parametrize("layout", [0, 1, 2]) +def test_fmha_fwd_f16_layout(layout, head_dim): + device = "cuda" + torch.manual_seed(0) + batch, hq, hk, sq, sk = 1, 8, 1, 128, 2048 + + q, k, v = make_qkv_bshd(layout=layout, sq=sq, sk=sk, batch=batch, + hq=hq, hk=hk, d=head_dim, + dtype=torch.bfloat16, device=device) + scale = 1.0 / math.sqrt(head_dim) + sink = _d64_sink(hq, device) if head_dim == 64 else None + + _r = aiter.flash_attn_func( + q, k, v, + softmax_scale=scale, causal=False, return_lse=True, sink_ptr=sink, + ) + out_kernel, lse_asm = _r[0], _r[1] + + out_ref, lse_ref = _ref_attn(q, k, v, is_causal=False, sink=sink) + + _cmp(out_kernel, out_ref, rtol=1e-2, atol=1e-2, + msg=f"out mismatch (layout={layout}, d={head_dim})") + _cmp(lse_asm, lse_ref, rtol=1e-2, atol=1e-2, + msg=f"lse mismatch (layout={layout}, d={head_dim})") + + +# --------------------------------------------------------------------------- +# Integration test: aiter.flash_attn_func -> mha._flash_attn_forward dispatcher +# -> our fmha_fwd_f16_asm branch. Verifies the public-API path on gfx1250 +# matches a direct ops-layer call bit-for-bit (same kernel, same args). +# --------------------------------------------------------------------------- + +def _is_gfx1250() -> bool: + try: + from aiter.jit.utils.chip_info import get_gfx + return get_gfx() == "gfx1250" + except Exception: + return False + + +@pytest.mark.skipif(not _is_gfx1250(), + reason="flash_attn_func dispatch to fmha_fwd_f16_asm only on gfx1250") +@pytest.mark.parametrize("head_dim", [64, 128]) +@pytest.mark.parametrize("is_causal", [False, True]) +def test_fmha_fwd_f16_via_flash_attn_func(head_dim, is_causal): + device = "cuda" + torch.manual_seed(0) + batch, hq, hk, sq, sk = 1, 8, 1, 128, 2048 + + # bshd input (flash_attn_func contract); contiguous. + q, k, v = make_qkv_bshd(layout=0, sq=sq, sk=sk, batch=batch, + hq=hq, hk=hk, d=head_dim, + dtype=torch.bfloat16, device=device) + scale = 1.0 / math.sqrt(head_dim) + sink_ptr = _d64_sink(hq, device) if head_dim == 64 else None + + # Direct ops-layer + out_direct, lse_direct = aiter.fmha_fwd_f16_asm( + q, k, v, scale, is_causal, True, sink=sink_ptr, + ) + + # Through public API + result = aiter.flash_attn_func( + q, k, v, + softmax_scale=scale, + causal=is_causal, + return_lse=True, + sink_ptr=sink_ptr, + ) + out_via, lse_via = result[0], result[1] + + # Same kernel, same args -> bit-identical (cast to fp32 to avoid bf16 + # element-wise hang in some ROCm builds). + do = (out_via.float() - out_direct.float()).abs().max().item() + dl = (lse_via.float() - lse_direct.float()).abs().max().item() + assert do == 0.0, ( + f"flash_attn_func != fmha_fwd_f16_asm " + f"(d={head_dim}, causal={is_causal}) max|dO|={do}" + ) + assert dl == 0.0, ( + f"lse via flash_attn_func != direct " + f"(d={head_dim}, causal={is_causal}) max|dLSE|={dl}" + ) # --------------------------------------------------------------------------- @@ -264,17 +344,17 @@ def test_fmha_fwd_f16_perf(head_dim, is_causal): device = "cuda" torch.manual_seed(0) - # perf_d64 / perf_d128 in run.sh: batch=2 kv_head_num=8 gqa=8 → hq=64 + # perf_d64 / perf_d128 in run.sh: batch=2 kv_head_num=8 gqa=8 -> hq=64 sq, batch, hq, hk, sk = 8192, 2, 64, 8, 8192 - q_s = make_sbhd(sq, batch, hq, head_dim, dtype=torch.bfloat16, device=device) - k_s = make_sbhd(sk, batch, hk, head_dim, dtype=torch.bfloat16, device=device) - v_s = make_sbhd(sk, batch, hk, head_dim, dtype=torch.bfloat16, device=device) + q, k, v = make_qkv_bshd(layout=2, sq=sq, sk=sk, batch=batch, + hq=hq, hk=hk, d=head_dim, + dtype=torch.bfloat16, device=device) scale = 1.0 / math.sqrt(head_dim) sink = _d64_sink(hq, device) if head_dim == 64 else None _, us = run_perftest( - aiter.fmha_fwd_f16, - q_s, k_s, v_s, + aiter.fmha_fwd_f16_asm, + q, k, v, scale, is_causal, False, num_iters=10, num_warmup=2, sink=sink, @@ -293,7 +373,7 @@ def test_fmha_fwd_f16_perf(head_dim, is_causal): parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter, - description="Run aiter.fmha_fwd_f16 on a single shape and dump kernel args.", + description="Run aiter.fmha_fwd_f16_asm on a single shape and dump kernel args.", ) parser.add_argument("-b", "--batch", type=int, default=1, help="batch size (default 1)") @@ -309,10 +389,10 @@ def test_fmha_fwd_f16_perf(head_dim, is_causal): help="head dim, 64 or 128 (default 128)") parser.add_argument("-c", "--causal", action="store_true", help="enable causal mask") -parser.add_argument("-i", "--i_perm", type=int, choices=[0, 1, 2], default=2, - help="input layout: 0=bshd 1=bhsd 2=sbhd (default 2)") -parser.add_argument("-o", "--o_perm", type=int, choices=[0, 1, 2], default=0, - help="output layout: 0=bshd 1=bhsd 2=sbhd (default 0)") +parser.add_argument("-l", "--layout", type=int, choices=[0, 1, 2], default=0, + help="input memory layout: 0=bshd 1=bhsd 2=sbhd (default 0)\n" + "(API always sees bshd shape; non-zero layout returns a\n" + "non-contiguous bshd view of the underlying memory)") parser.add_argument("--ref", action="store_true", help="also run PyTorch reference and print max diff") parser.add_argument("--perf", action="store_true", @@ -329,38 +409,31 @@ def test_fmha_fwd_f16_perf(head_dim, is_causal): causal = args.causal assert hq % hk == 0, "q_head_num must be a multiple of kv_head_num" print(f"Shape: b={b} hq={hq} hk={hk} sq={sq} sk={sk} d={d} causal={causal} " - f"i_perm={args.i_perm} o_perm={args.o_perm}", flush=True) + f"layout={args.layout}", flush=True) - q_s = make_sbhd(sq, b, hq, d, dtype=torch.bfloat16, device=device) - k_s = make_sbhd(sk, b, hk, d, dtype=torch.bfloat16, device=device) - v_s = make_sbhd(sk, b, hk, d, dtype=torch.bfloat16, device=device) + q, k, v = make_qkv_bshd(layout=args.layout, sq=sq, sk=sk, batch=b, + hq=hq, hk=hk, d=d, + dtype=torch.bfloat16, device=device) scale = 1.0 / math.sqrt(d) sink = _d64_sink(hq, device) if d == 64 else None torch.cuda.synchronize() import time as _t t0 = _t.time() - out_kernel, lse_asm = aiter.fmha_fwd_f16( - q_s, k_s, v_s, scale, causal, True, - i_perm=args.i_perm, o_perm=args.o_perm, sink=sink, + out_kernel, lse_asm = aiter.fmha_fwd_f16_asm( + q, k, v, scale, causal, True, sink=sink, ) torch.cuda.synchronize() print(f"asm time: {(_t.time()-t0)*1000:.2f} ms", flush=True) print(f"out.shape={tuple(out_kernel.shape)} lse.shape={tuple(lse_asm.shape)}", flush=True) if args.ref: - # Convert kernel I/O to bhsd; ref is always bhsd-in / bhsd-out. - q_b = to_bhsd(q_s, args.i_perm) - k_b = to_bhsd(k_s, args.i_perm) - v_b = to_bhsd(v_s, args.i_perm) - if d == 64: - out_ref, lse_ref = ref_with_sink(q_b, k_b, v_b, scale, causal, sink) - else: - out_ref, lse_ref = ref_standard(q_b, k_b, v_b, scale, causal) - # cast asm output to fp32 BEFORE permute to avoid bf16 contiguous hang - out_asm_bhsd = to_bhsd(out_kernel.float(), args.o_perm) + # ref takes bshd directly (matches kernel layout). + out_ref, lse_ref = _ref_attn(q, k, v, is_causal=causal, sink=sink) + # cast asm output to fp32 to avoid bf16 element-wise hang in some ROCm builds + out_kernel_f = out_kernel.float() out_ref_f = out_ref.float() - diff_o = (out_asm_bhsd - out_ref_f).abs().max().item() + diff_o = (out_kernel_f - out_ref_f).abs().max().item() diff_l = (lse_asm - lse_ref).abs().max().item() # Pass criterion (bf16 attention conventional thresholds): # |dO| <= 2e-2 |dLSE| <= 2e-2 @@ -375,10 +448,10 @@ def test_fmha_fwd_f16_perf(head_dim, is_causal): if args.perf: _, us = run_perftest( - aiter.fmha_fwd_f16, - q_s, k_s, v_s, scale, causal, False, + aiter.fmha_fwd_f16_asm, + q, k, v, scale, causal, False, num_iters=10, num_warmup=2, - i_perm=args.i_perm, o_perm=args.o_perm, sink=sink, + sink=sink, ) flops = 2.0 * b * hq * sq * sk * (2 * d) if causal: From 37805ac13d24b9c610e571c1a1b420015b494c4c Mon Sep 17 00:00:00 2001 From: tingchen Date: Mon, 4 May 2026 15:41:58 +0000 Subject: [PATCH 03/43] fmha_fwd_f16: refine API surface, fix perf timing on gfx1250, polish tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit asm_fmha_fwd_f16.cu: - Set args.opt = 7 = (reverse_kv | double_q | remap_xy) so the packed s_opt SGPR matches the launch-time gdx/gdy swap and stays compatible with future _dq variants. Bits 0/1 are compile-time gated off in the shipped _brd_rxy / _cas_brd_rxy[_sink] builds, so this is a no-op for current .co files but documents the invariant. - Always push LSE into the returned vector (caller may ignore it when return_lse==false). Required to keep a fixed-arity 2-tuple return for torch.library / compile_ops schema inference. aiter/ops/mha.py: - fmha_fwd_f16_asm + gen_fake_tensors now declare -> Tuple[Tensor, Tensor] (drops the List import and a conditional return_lse branch in the fake tensor generator). Variadic Tuple[Tensor, ...] is rejected by torch's infer_schema; fixed-arity matches mxfp8 / fmha_v3_fwd convention. - Fix misleading docstrings: softmax_scale is forwarded as-is; only sink is multiplied by sqrt(qk_head_dim) to convert from AITER post-scale to the kernel's pre-scale raw-logit domain. op_tests/test_fmha_fwd_f16_asm.py: - Mxfp8-style helper extraction: run_kernel / run_ref / run_cli used consistently by the parametrized tests and the __main__ runner. - Add _bench (cuda.Event timing) and use it instead of run_perftest in the perf test + CLI; the torch.profiler / ROCTracer path on gfx1250 + ROCm 7.x silently drops kernel events ("ROCTracer produced duplicate flow start") so run_perftest reports 0 us / inf TFLOPS — invisible when running pytest without -s. _bench bypasses the profiler. - Add _nrms relative-error metric (matches op_tests/test_mha_mxfp8.py) printed alongside max-diff in correctness tests / CLI --ref output. - Add sanity asserts in test_fmha_fwd_f16_perf so a future regression in timing infra (us == 0 or inf TFLOPS) FAILs explicitly instead of silently PASSing. - Parametrize batch on test_fmha_fwd_f16_correctness ([1, 2]) to catch potential batch-stride bugs. - Move `import argparse` to the top of the file alongside other imports. - Keep _ref_attn as the in-file reference: attention_ref(upcast=True) works numerically but its returned lse is cast back to q.dtype (bf16, see test_mha_common.py:615), introducing ~1 ULP of bf16 quantization on lse (~0.03 absolute for sq=8192 d=128) that exceeds tight CLI thresholds. attention_ref is still imported (with noqa: F401) so the swap is one line if the upstream API stops casting lse. Co-authored-by: Cursor --- aiter/jit/optCompilerConfig.json | 2 +- aiter/ops/mha.py | 28 ++- csrc/py_itfs_cu/asm_fmha_fwd_f16.cu | 18 +- op_tests/test_fmha_fwd_f16_asm.py | 364 ++++++++++++++++++---------- 4 files changed, 273 insertions(+), 139 deletions(-) diff --git a/aiter/jit/optCompilerConfig.json b/aiter/jit/optCompilerConfig.json index 42fe11a69e..81d48cc213 100644 --- a/aiter/jit/optCompilerConfig.json +++ b/aiter/jit/optCompilerConfig.json @@ -5,7 +5,7 @@ ], "flags_extra_cc": [], "flags_extra_hip": [], - "extra_ldflags": "['-L/opt/rocm/lib', '-Wl,-rpath,/opt/rocm/lib']", + "extra_ldflags": "None", "extra_include": [], "verbose": "False", "torch_exclude": "True", diff --git a/aiter/ops/mha.py b/aiter/ops/mha.py index 10a31ccca5..38e370921c 100644 --- a/aiter/ops/mha.py +++ b/aiter/ops/mha.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: MIT # Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. -from typing import Any, List, Optional, Tuple +from typing import Any, Optional, Tuple import torch from torch import Generator, Tensor @@ -275,8 +275,10 @@ def fmha_v3_fwd( # API contract: q/k/v are **bshd shape** ([batch, seq, head, dim]); strides are # read directly from the tensor so non-contiguous bshd-shaped views (e.g. of # sbhd / bhsd allocations) are accepted. Only `tensor.stride(-1) == 1` is -# required. The .cu host driver multiplies softmax_scale by sqrt(head_dim) -# before kernel launch (kernel uses pre-scale convention). +# required. softmax_scale is forwarded to the kernel as-is (the kernel +# applies it internally to Q·K^T before softmax). sink (when provided) is +# in AITER post-scale convention; the .cu host driver multiplies it by +# sqrt(qk_head_dim) to convert to the kernel's pre-scale raw-logit domain. # --------------------------------------------------------------------------- def gen_fmha_fwd_f16_asm_fake_tensors( q: Tensor, @@ -287,7 +289,7 @@ def gen_fmha_fwd_f16_asm_fake_tensors( return_lse: bool, sink: Optional[Tensor] = None, out: Optional[Tensor] = None, -) -> List[Tensor]: +) -> Tuple[Tensor, Tensor]: batch, q_seq_len, q_head_num, _ = q.shape d_v = v.size(3) fake_out = ( @@ -295,12 +297,10 @@ def gen_fmha_fwd_f16_asm_fake_tensors( else torch.empty((batch, q_seq_len, q_head_num, d_v), dtype=q.dtype, device=q.device) ) - if return_lse: - fake_lse = torch.empty( - (batch, q_head_num, q_seq_len), dtype=torch.float32, device=q.device - ) - return [fake_out, fake_lse] - return [fake_out] + fake_lse = torch.empty( + (batch, q_head_num, q_seq_len), dtype=torch.float32, device=q.device + ) + return (fake_out, fake_lse) @compile_ops( @@ -317,7 +317,7 @@ def fmha_fwd_f16_asm( return_lse: bool, sink: Optional[Tensor] = None, out: Optional[Tensor] = None, -) -> List[Tensor]: ... +) -> Tuple[Tensor, Tensor]: ... def cmdGenFunc_mha_varlen_fwd( @@ -1412,8 +1412,10 @@ def _validate_cu(name: str, x: Optional[torch.Tensor]): if can_impl_fmha_fwd_f16(): # gfx1250 ASM bf16 path: q/k/v are bshd; kernel reads strides directly, - # no API-side permute. sink_ptr forwarded as-is (post-scale); the .cu - # multiplies by sqrt(qk_head_dim) before kernel launch. + # no API-side permute. softmax_scale is forwarded as-is (kernel applies + # it internally to Q·K^T). sink_ptr is in AITER post-scale convention; + # the .cu host driver multiplies it by sqrt(qk_head_dim) to convert to + # the kernel's pre-scale raw-logit domain before launch. sink_for_kernel = sink_ptr if hdim_q == 64 and sink_for_kernel is None: # D64 kernels always read SINK; auto-fill zero-logit so callers diff --git a/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu b/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu index 80f98df3d8..698ede97dd 100644 --- a/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu +++ b/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu @@ -257,7 +257,16 @@ std::vector fmha_fwd_f16(at::Tensor& q, args.stride_k_seq = stride_k_seq; args.stride_k_head = stride_k_head; args.stride_k_batch = stride_k_batch; - args.opt = 0; + // s_opt SGPR (kernarg dword @ offset 0xF0): packs three host-side switches. + // Bit layout must stay in lockstep with poc_kl/.../fmha_fwd_f16.cpp::opt_packed + // and the S_OPT_BIT_* defines in BF16_FMHA_FWD_*.sp3: + // bit0: reverse_kv (compile-time gated by CAS_MASK build; ignored by mask=0 kernels) + // bit1: double_q (compile-time gated by DOUBLE_Q build; ignored by non-_dq kernels) + // bit2: remap_xy (must be 1 — we swap gdx/gdy at launch below) + // 7 = 0b111 enables all three. Safe for the four shipped _brd_rxy / + // _cas_brd_rxy [_sink] .co binaries because bits 0/1 are compile-time + // gated off in those builds; bit2 matches the gdx/gdy swap on launch. + args.opt = 7; args.lse = return_lse ? 1 : 0; args.kv_seq_len = kv_seq_len; args.qk_head_dim = qk_head_dim; @@ -353,7 +362,12 @@ std::vector fmha_fwd_f16(at::Tensor& q, std::vector ret; ret.push_back(out); - if (return_lse) ret.push_back(lse); + // Always return LSE in slot [1]. When return_lse==false the kernel skips + // writing it (args.lse=0) so the data is undefined; callers that don't + // need LSE should simply ignore the second tensor. Keeping a fixed + // 2-tuple return matches torch.library schema requirements (compile_ops + // / infer_schema only accepts fixed-arity Tuple). + ret.push_back(lse); return ret; } diff --git a/op_tests/test_fmha_fwd_f16_asm.py b/op_tests/test_fmha_fwd_f16_asm.py index 26d737d2dd..3a17a93f3b 100644 --- a/op_tests/test_fmha_fwd_f16_asm.py +++ b/op_tests/test_fmha_fwd_f16_asm.py @@ -32,7 +32,10 @@ from __future__ import annotations +import argparse import math +import sys +import time as _t from typing import Optional import pytest @@ -45,30 +48,44 @@ import aiter from aiter.test_common import checkAllclose, run_perftest +from aiter.test_mha_common import attention_ref # noqa: F401 (kept for easy swap-back; see doc-block below) # --------------------------------------------------------------------------- -# Reference implementations. Inputs accepted as bshd (matches kernel API); -# output `out` is bshd; `lse` is [b, hq, sq] (matches kernel layout). +# Reference implementation. Inputs accepted as bshd (matches kernel API); +# output `out` is bshd, `lse` is [b, hq, sq] (matches kernel layout). +# +# We default to the in-file `_ref_attn` rather than +# `aiter.test_mha_common.attention_ref` because the latter casts its +# returned `lse` back to q.dtype (bf16) — see test_mha_common.py:615 — +# even when called with upcast=True. That round-trip introduces ~1 bf16 +# ULP of quantization on lse (~3e-2 for sq=8192 d=128), which exceeds +# tight comparison thresholds. `_ref_attn` keeps lse in fp32 and +# matches the kernel to ~5e-6 (essentially fp32 noise floor). # -# NB: we intentionally *do not* use `aiter.test_mha_common.attention_ref` -# here — although it's mathematically equivalent (sink as virtual KV with -# zero V, see test_mha_common.py:584), running it on gfx1250 + ROCm 7.13 -# triggers a downstream driver wedge that causes the *next* ASM kernel -# launch to hang. The pure-einsum impl below is hand-derived from -# fmha_merge_sink_rowwise, runs reliably on gfx1250, and produces -# bit-identical numerics to attention_ref(... .float() ...). +# attention_ref is still imported above so it is trivial to swap back +# when (a) the upstream API stops casting lse to bf16, or (b) you only +# need rtol-based comparison (rtol=1% absorbs the bf16 quantization). +# +# Historical aside: an earlier ROCm 7.13 driver could enter a wedged +# state after many ASM kernel launches, after which ANY GPU op (incl. +# attention_ref) would hang in uninterruptible sleep until +# `rocm-smi --gpureset`. The wedge is environmental, not a property +# of attention_ref itself. # --------------------------------------------------------------------------- -def _ref_attn(q, k, v, *, is_causal: bool, sink: Optional[torch.Tensor] = None): - """bshd-in / bshd-out attention reference, sink optional. - Math: attn = Q @ K^T, scale = 1/sqrt(d), - denom = sum(exp((attn - max) * scale)) [+ exp((sink_raw - max) * scale)], +def _ref_attn(q, k, v, *, is_causal: bool, sink: "Optional[torch.Tensor]" = None): + """bshd-in / bshd-out attention reference, sink optional. Pure-einsum + fp32 implementation; lse is returned in fp32 (matches kernel's output). + + Math: attn = Q @ K^T, scale = 1/sqrt(d), + denom = sum(exp((attn - max) * scale)) + [+ exp((sink_raw - max) * scale)], out = (exp((attn - max) * scale) / denom) @ V, lse = max * scale + log(denom). sink (optional): [hq] fp32, AITER post-scale; converted internally to - pre-scale raw via × sqrt(d) to match kernel ABI. + pre-scale raw via x sqrt(d) to match kernel ABI. """ b, sq, hq, d = q.shape _, sk, hk, _ = k.shape @@ -77,14 +94,14 @@ def _ref_attn(q, k, v, *, is_causal: bool, sink: Optional[torch.Tensor] = None): v = v.repeat_interleave(hq // hk, dim=2) qf, kf, vf = q.float(), k.float(), v.float() scale = 1.0 / math.sqrt(d) - attn = torch.einsum("bshd,bkhd->bhsk", qf, kf) # raw, no scale + attn = torch.einsum("bshd,bkhd->bhsk", qf, kf) if is_causal: m = torch.triu(torch.ones(sq, sk, dtype=torch.bool, device=q.device), sk - sq + 1) attn = attn.masked_fill(m, float("-inf")) - max_attn, _ = attn.max(dim=-1) # [b, hq, sq] + max_attn, _ = attn.max(dim=-1) if sink is not None: - sink_raw = sink.float() * math.sqrt(d) # [hq] + sink_raw = sink.float() * math.sqrt(d) sink_raw_bhs = sink_raw[None, :, None].expand(b, hq, sq) max_total = torch.maximum(max_attn, sink_raw_bhs) else: @@ -95,13 +112,15 @@ def _ref_attn(q, k, v, *, is_causal: bool, sink: Optional[torch.Tensor] = None): denom_total = denom_real + sink_term else: denom_total = denom_real - probs = torch.exp((attn - max_total.unsqueeze(-1)) * scale) \ - / denom_total.unsqueeze(-1) + probs = torch.exp((attn - max_total.unsqueeze(-1)) * scale) / denom_total.unsqueeze(-1) out = torch.einsum("bhsk,bkhd->bshd", probs, vf).to(q.dtype) - lse = torch.log(denom_total) + max_total * scale # [b, hq, sq] + lse = torch.log(denom_total) + max_total * scale return out, lse + + + def _cmp(a: torch.Tensor, b: torch.Tensor, *, rtol=1e-2, atol=1e-2, msg: str = ""): """bf16-safe wrapper around checkAllclose. @@ -116,6 +135,45 @@ def _cmp(a: torch.Tensor, b: torch.Tensor, *, rtol=1e-2, atol=1e-2, msg: str = " checkAllclose(a32, b32, rtol=rtol, atol=atol, msg=msg) +def _nrms(actual: torch.Tensor, expected: torch.Tensor) -> float: + """Normalized RMS error on fp32 CPU tensors (avoids bf16 GPU element-wise hang). + + Definition matches op_tests/test_mha_mxfp8.py: + nrms = sqrt(sum((|a-b| / max(|b|, eps))^2)) / + (sqrt(numel) * max(|a|.max, |b|.max, eps)) + A small relative metric (~1e-3 for bf16, ~1e-6 for fp32) regardless of + output magnitude — useful complement to the absolute max-diff check. + """ + a32 = actual.detach().float().cpu() + b32 = expected.detach().float().cpu() + abs_diff = (a32 - b32).abs() + eps = 1e-7 + max_item = max(a32.abs().max().item(), b32.abs().max().item(), eps) + sq_diff = (abs_diff / b32.abs().clamp(min=eps)).pow(2) + return (sq_diff.sum().sqrt() / (math.sqrt(b32.numel()) * max_item)).item() + + +def _bench(fn, *args, num_iters: int = 10, num_warmup: int = 2, **kwargs) -> float: + """CUDA-Event-based per-iter timing (us). + + Bypasses run_perftest because torch.profiler / ROCTracer drops kernel + events on gfx1250 + ROCm 7.x (warning: "ROCTracer produced duplicate + flow start"), making run_perftest report 0 us / inf TFLOPS. + """ + for _ in range(num_warmup): + fn(*args, **kwargs) + torch.cuda.synchronize() + start = torch.cuda.Event(enable_timing=True) + end = torch.cuda.Event(enable_timing=True) + start.record() + for _ in range(num_iters): + fn(*args, **kwargs) + end.record() + end.synchronize() + return start.elapsed_time(end) * 1000.0 / num_iters # ms->us, per-iter + + + # --------------------------------------------------------------------------- # Layout helpers # --------------------------------------------------------------------------- @@ -152,26 +210,63 @@ def make_qkv_bshd(layout: int, sq: int, sk: int, batch: int, hq: int, hk: int, d return q, k, v +def _d64_sink(hq: int, device: str) -> torch.Tensor: + """Non-zero sink for D64: fixed per-head values in AITER post-scale domain. + + Values in [0.5, 2.0]; varies across heads to exercise broadcast. + """ + return torch.linspace(0.5, 2.0, hq, dtype=torch.float32, device=device) + + # --------------------------------------------------------------------------- -# Correctness tests (sbhd input → bshd output, compare against bhsd reference) +# Kernel / reference helpers (mxfp8-style: one-line wrappers used by tests). # --------------------------------------------------------------------------- -def _d64_sink(hq: int, device: str) -> torch.Tensor: - """Non-zero sink for D64: fixed per-head values in AITER post-scale domain.""" - # Use values in [0.5, 2.0] post-scale; vary across heads for thorough test - return torch.linspace(0.5, 2.0, hq, dtype=torch.float32, device=device) +def run_kernel(q, k, v, *, scale: float, is_causal: bool, + sink: Optional[torch.Tensor] = None, + via: str = "ops"): + """Call the kernel and return (out, lse). + + via = "ops" → low-level aiter.fmha_fwd_f16_asm + via = "public" → public aiter.flash_attn_func (dispatcher → asm path) + """ + if via == "ops": + return aiter.fmha_fwd_f16_asm( + q, k, v, scale, is_causal, True, sink=sink, + ) + if via == "public": + r = aiter.flash_attn_func( + q, k, v, + softmax_scale=scale, causal=is_causal, + return_lse=True, sink_ptr=sink, + ) + return r[0], r[1] + raise ValueError(f"unknown via={via!r}") +def run_ref(q, k, v, *, is_causal: bool, sink: Optional[torch.Tensor] = None): + """Reference (out, lse) computed on the same bshd tensors via the in-file + `_ref_attn`. See doc-block above for why we don't use + `aiter.test_mha_common.attention_ref` directly. + """ + return _ref_attn(q, k, v, is_causal=is_causal, sink=sink) + + +# --------------------------------------------------------------------------- +# Correctness tests (sbhd input → bshd output, compare against bhsd reference) +# --------------------------------------------------------------------------- + +@pytest.mark.parametrize("batch", [1, 2]) @pytest.mark.parametrize("head_dim", [64, 128]) @pytest.mark.parametrize("is_causal", [False, True]) @pytest.mark.parametrize( - "batch,hq,hk,sq,sk", + "hq,hk,sq,sk", [ - # Shapes from run.sh aligned tests: batch=1, kv_head_num=4, gqa=16 + # Shapes from run.sh aligned tests: kv_head_num=4, gqa=16 # → q_head_num = 4 * 16 = 64 - (1, 8, 1, 128, 2048), # aligned (test_d64 / test_d128) - (1, 8, 1, 130, 2048), # q unaligned: sq not mult of 128 - (1, 8, 1, 128, 2300), # kv unaligned: sk not mult of 256 + (8, 1, 128, 2048), # aligned (test_d64 / test_d128) + (8, 1, 130, 2048), # q unaligned: sq not mult of 128 + (8, 1, 128, 2300), # kv unaligned: sk not mult of 256 ], ) def test_fmha_fwd_f16_correctness(batch, hq, hk, sq, sk, head_dim, is_causal): @@ -189,20 +284,19 @@ def test_fmha_fwd_f16_correctness(batch, hq, hk, sq, sk, head_dim, is_causal): # D128 -> no sink (kernel ignores it) sink = _d64_sink(hq, device) if head_dim == 64 else None - _r = aiter.flash_attn_func( - q, k, v, - softmax_scale=scale, causal=is_causal, - return_lse=True, sink_ptr=sink, + out_kernel, lse_asm = run_kernel( + q, k, v, scale=scale, is_causal=is_causal, sink=sink, via="public", ) - out_kernel, lse_asm = _r[0], _r[1] + out_ref, lse_ref = run_ref(q, k, v, is_causal=is_causal, sink=sink) - # Reference: bshd in / bshd out (matches kernel layout, no permute needed) - out_ref, lse_ref = _ref_attn(q, k, v, is_causal=is_causal, sink=sink) + nrms_o = _nrms(out_kernel, out_ref) + print(f"[corr d={head_dim} causal={is_causal} b={batch} sq={sq} sk={sk}] " + f"nrms(out)={nrms_o:.3e}") _cmp(out_kernel, out_ref, rtol=1e-2, atol=1e-2, - msg=f"out mismatch (d={head_dim}, causal={is_causal})") + msg=f"out mismatch (d={head_dim}, causal={is_causal}, b={batch})") _cmp(lse_asm, lse_ref, rtol=1e-2, atol=1e-2, - msg=f"lse mismatch (d={head_dim}, causal={is_causal})") + msg=f"lse mismatch (d={head_dim}, causal={is_causal}, b={batch})") def test_fmha_fwd_f16_ops_layer(): @@ -217,11 +311,11 @@ def test_fmha_fwd_f16_ops_layer(): scale = 1.0 / math.sqrt(d) sink = _d64_sink(hq, device) - out_kernel, lse_asm = aiter.fmha_fwd_f16_asm( - q, k, v, scale, False, True, sink=sink, + out_kernel, lse_asm = run_kernel( + q, k, v, scale=scale, is_causal=False, sink=sink, via="ops", ) + out_ref, lse_ref = run_ref(q, k, v, is_causal=False, sink=sink) - out_ref, lse_ref = _ref_attn(q, k, v, is_causal=False, sink=sink) _cmp(out_kernel, out_ref, rtol=1e-2, atol=1e-2) _cmp(lse_asm, lse_ref, rtol=1e-2, atol=1e-2) @@ -261,13 +355,10 @@ def test_fmha_fwd_f16_layout(layout, head_dim): scale = 1.0 / math.sqrt(head_dim) sink = _d64_sink(hq, device) if head_dim == 64 else None - _r = aiter.flash_attn_func( - q, k, v, - softmax_scale=scale, causal=False, return_lse=True, sink_ptr=sink, + out_kernel, lse_asm = run_kernel( + q, k, v, scale=scale, is_causal=False, sink=sink, via="public", ) - out_kernel, lse_asm = _r[0], _r[1] - - out_ref, lse_ref = _ref_attn(q, k, v, is_causal=False, sink=sink) + out_ref, lse_ref = run_ref(q, k, v, is_causal=False, sink=sink) _cmp(out_kernel, out_ref, rtol=1e-2, atol=1e-2, msg=f"out mismatch (layout={layout}, d={head_dim})") @@ -303,22 +394,14 @@ def test_fmha_fwd_f16_via_flash_attn_func(head_dim, is_causal): hq=hq, hk=hk, d=head_dim, dtype=torch.bfloat16, device=device) scale = 1.0 / math.sqrt(head_dim) - sink_ptr = _d64_sink(hq, device) if head_dim == 64 else None + sink = _d64_sink(hq, device) if head_dim == 64 else None - # Direct ops-layer - out_direct, lse_direct = aiter.fmha_fwd_f16_asm( - q, k, v, scale, is_causal, True, sink=sink_ptr, + out_direct, lse_direct = run_kernel( + q, k, v, scale=scale, is_causal=is_causal, sink=sink, via="ops", ) - - # Through public API - result = aiter.flash_attn_func( - q, k, v, - softmax_scale=scale, - causal=is_causal, - return_lse=True, - sink_ptr=sink_ptr, + out_via, lse_via = run_kernel( + q, k, v, scale=scale, is_causal=is_causal, sink=sink, via="public", ) - out_via, lse_via = result[0], result[1] # Same kernel, same args -> bit-identical (cast to fp32 to avoid bf16 # element-wise hang in some ROCm builds). @@ -352,24 +435,104 @@ def test_fmha_fwd_f16_perf(head_dim, is_causal): scale = 1.0 / math.sqrt(head_dim) sink = _d64_sink(hq, device) if head_dim == 64 else None - _, us = run_perftest( + us = _bench( aiter.fmha_fwd_f16_asm, q, k, v, scale, is_causal, False, - num_iters=10, num_warmup=2, sink=sink, + num_iters=10, num_warmup=2, ) flops = 2.0 * batch * hq * sq * sk * (2 * head_dim) if is_causal: flops /= 2.0 tflops = flops / (us * 1e-6) / 1e12 print(f"[perf] d={head_dim} causal={is_causal}: {us:.1f}us, {tflops:.2f} TFLOPS") + # Sanity: catch silent-PASS when timing infrastructure breaks (e.g. profiler + # / ROCTracer drops events → us=0, TFLOPS=inf). Without these asserts the + # test would PASS with bogus numbers. + assert us > 0.0, ( + f"perf timing returned us={us}; timing path broken " + f"(run with -s to see live numbers)" + ) + assert math.isfinite(tflops) and 0 < tflops < 5000, ( + f"TFLOPS={tflops} not finite / out of plausible range; " + f"likely broken timing" + ) # --------------------------------------------------------------------------- -# __main__: CLI single-shape runner +# CLI single-shape runner: shared by `__main__` invocation and ad-hoc usage. # --------------------------------------------------------------------------- -import argparse + +def run_cli(*, batch: int, hq: int, hk: int, sq: int, sk: int, head_dim: int, + causal: bool = False, layout: int = 0, + do_ref: bool = False, do_perf: bool = False) -> int: + """Single-shape runner. + + Returns 0 on success, 1 if --ref check fails. Prints a one-line summary + of kernel shape / time and (if requested) ref / perf metrics. + """ + device = "cuda" + torch.manual_seed(0) + assert hq % hk == 0, "q_head_num must be a multiple of kv_head_num" + + print(f"Shape: b={batch} hq={hq} hk={hk} sq={sq} sk={sk} d={head_dim} " + f"causal={causal} layout={layout}", flush=True) + + q, k, v = make_qkv_bshd(layout=layout, sq=sq, sk=sk, batch=batch, + hq=hq, hk=hk, d=head_dim, + dtype=torch.bfloat16, device=device) + scale = 1.0 / math.sqrt(head_dim) + sink = _d64_sink(hq, device) if head_dim == 64 else None + torch.cuda.synchronize() + + t0 = _t.time() + out_kernel, lse_asm = run_kernel( + q, k, v, scale=scale, is_causal=causal, sink=sink, via="ops", + ) + torch.cuda.synchronize() + print(f"asm time: {(_t.time()-t0)*1000:.2f} ms", flush=True) + print(f"out.shape={tuple(out_kernel.shape)} lse.shape={tuple(lse_asm.shape)}", + flush=True) + + rc = 0 + if do_ref: + out_ref, lse_ref = run_ref(q, k, v, is_causal=causal, sink=sink) + diff_o = (out_kernel.float() - out_ref.float()).abs().max().item() + diff_l = (lse_asm.float() - lse_ref.float()).abs().max().item() + nrms_o = _nrms(out_kernel, out_ref) + # Pass criterion (bf16 attention conventional thresholds): + # |dO| <= 2e-2 |dLSE| <= 2e-2 + ok_o = diff_o <= 2e-2 + ok_l = diff_l <= 2e-2 + print(f"ref: max|dO|={diff_o:.4f} {'OK' if ok_o else 'FAIL'} " + f"max|dLSE|={diff_l:.4f} {'OK' if ok_l else 'FAIL'} " + f"nrms(O)={nrms_o:.3e}", + flush=True) + if not (ok_o and ok_l): + rc = 1 + + if do_perf: + us = _bench( + aiter.fmha_fwd_f16_asm, + q, k, v, scale, causal, False, + sink=sink, + num_iters=10, num_warmup=2, + ) + flops = 2.0 * batch * hq * sq * sk * (2 * head_dim) + if causal: + flops /= 2.0 + tflops = flops / (us * 1e-6) / 1e12 + print(f"perf: {us:.1f} us ({tflops:.2f} TFLOPS)", flush=True) + # CLI surfaces the same breakage pytest would: us=0 / TFLOPS=inf + # signals broken timing infra (profiler / ROCTracer event drop). + if not (us > 0.0 and math.isfinite(tflops) and 0 < tflops < 5000): + print(f"perf: WARNING — bogus timing (us={us}, tflops={tflops})", + flush=True) + rc = 1 + + return rc + parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter, @@ -394,67 +557,22 @@ def test_fmha_fwd_f16_perf(head_dim, is_causal): "(API always sees bshd shape; non-zero layout returns a\n" "non-contiguous bshd view of the underlying memory)") parser.add_argument("--ref", action="store_true", - help="also run PyTorch reference and print max diff") + help="also run PyTorch reference and print max diff + nrms") parser.add_argument("--perf", action="store_true", help="run perf benchmark for this shape (10 iters, 2 warmup)") if __name__ == "__main__": args = parser.parse_args() - - device = "cuda" - torch.manual_seed(0) - - b, hq, hk = args.batch, args.q_head_num, args.kv_head_num - sq, sk, d = args.seqlen_q, args.seqlen_k, args.head_dim - causal = args.causal - assert hq % hk == 0, "q_head_num must be a multiple of kv_head_num" - print(f"Shape: b={b} hq={hq} hk={hk} sq={sq} sk={sk} d={d} causal={causal} " - f"layout={args.layout}", flush=True) - - q, k, v = make_qkv_bshd(layout=args.layout, sq=sq, sk=sk, batch=b, - hq=hq, hk=hk, d=d, - dtype=torch.bfloat16, device=device) - scale = 1.0 / math.sqrt(d) - sink = _d64_sink(hq, device) if d == 64 else None - torch.cuda.synchronize() - - import time as _t - t0 = _t.time() - out_kernel, lse_asm = aiter.fmha_fwd_f16_asm( - q, k, v, scale, causal, True, sink=sink, + rc = run_cli( + batch=args.batch, + hq=args.q_head_num, + hk=args.kv_head_num, + sq=args.seqlen_q, + sk=args.seqlen_k, + head_dim=args.head_dim, + causal=args.causal, + layout=args.layout, + do_ref=args.ref, + do_perf=args.perf, ) - torch.cuda.synchronize() - print(f"asm time: {(_t.time()-t0)*1000:.2f} ms", flush=True) - print(f"out.shape={tuple(out_kernel.shape)} lse.shape={tuple(lse_asm.shape)}", flush=True) - - if args.ref: - # ref takes bshd directly (matches kernel layout). - out_ref, lse_ref = _ref_attn(q, k, v, is_causal=causal, sink=sink) - # cast asm output to fp32 to avoid bf16 element-wise hang in some ROCm builds - out_kernel_f = out_kernel.float() - out_ref_f = out_ref.float() - diff_o = (out_kernel_f - out_ref_f).abs().max().item() - diff_l = (lse_asm - lse_ref).abs().max().item() - # Pass criterion (bf16 attention conventional thresholds): - # |dO| <= 2e-2 |dLSE| <= 2e-2 - ok_o = diff_o <= 2e-2 - ok_l = diff_l <= 2e-2 - print(f"ref: max|dO|={diff_o:.4f} {'OK' if ok_o else 'FAIL'} " - f"max|dLSE|={diff_l:.4f} {'OK' if ok_l else 'FAIL'}", - flush=True) - if not (ok_o and ok_l): - import sys - sys.exit(1) - - if args.perf: - _, us = run_perftest( - aiter.fmha_fwd_f16_asm, - q, k, v, scale, causal, False, - num_iters=10, num_warmup=2, - sink=sink, - ) - flops = 2.0 * b * hq * sq * sk * (2 * d) - if causal: - flops /= 2.0 - tflops = flops / (us * 1e-6) / 1e12 - print(f"perf: {us:.1f} us ({tflops:.2f} TFLOPS)", flush=True) + sys.exit(rc) From edb9b77bb9dad3d994fad37fa9bd65966491acb4 Mon Sep 17 00:00:00 2001 From: tingchen Date: Thu, 7 May 2026 18:14:28 +0800 Subject: [PATCH 04/43] reformat python files --- aiter/jit/utils/cpp_extension.py | 1 + aiter/ops/mha.py | 59 ++-- op_tests/test_fmha_fwd_f16_asm.py | 455 ++++++++++++++++++++++-------- 3 files changed, 371 insertions(+), 144 deletions(-) diff --git a/aiter/jit/utils/cpp_extension.py b/aiter/jit/utils/cpp_extension.py index d784cdcb7d..fdb66bab49 100644 --- a/aiter/jit/utils/cpp_extension.py +++ b/aiter/jit/utils/cpp_extension.py @@ -110,6 +110,7 @@ def get_hip_version(): content = f.read() if "HIP_VERSION_MAJOR" in content: import re + major = re.search(r"HIP_VERSION_MAJOR\s+(\d+)", content) minor = re.search(r"HIP_VERSION_MINOR\s+(\d+)", content) patch = re.search(r"HIP_VERSION_PATCH\s+(\d+)", content) diff --git a/aiter/ops/mha.py b/aiter/ops/mha.py index 38e370921c..2e186cf5c4 100644 --- a/aiter/ops/mha.py +++ b/aiter/ops/mha.py @@ -15,6 +15,7 @@ ) from ..utility import dtypes + def cmdGenFunc_mha_fwd( q: Tensor, k: Tensor, @@ -216,7 +217,8 @@ def mha_fwd( v_descale: Optional[Tensor] = None, sink_ptr: Optional[Tensor] = None, gen: Optional[Generator] = None, -) -> Tuple[Tensor, Tensor, Tensor, Tensor]: ... +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + ... def gen_fmha_v3_fwd_fake_tensors( @@ -266,7 +268,8 @@ def fmha_v3_fwd( k_descale: Optional[Tensor] = None, v_descale: Optional[Tensor] = None, gen: Optional[Generator] = None, -) -> Tuple[Tensor, Tensor, Tensor, Tensor]: ... +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + ... # --------------------------------------------------------------------------- @@ -293,9 +296,11 @@ def gen_fmha_fwd_f16_asm_fake_tensors( batch, q_seq_len, q_head_num, _ = q.shape d_v = v.size(3) fake_out = ( - out if out is not None - else torch.empty((batch, q_seq_len, q_head_num, d_v), - dtype=q.dtype, device=q.device) + out + if out is not None + else torch.empty( + (batch, q_seq_len, q_head_num, d_v), dtype=q.dtype, device=q.device + ) ) fake_lse = torch.empty( (batch, q_head_num, q_seq_len), dtype=torch.float32, device=q.device @@ -317,7 +322,8 @@ def fmha_fwd_f16_asm( return_lse: bool, sink: Optional[Tensor] = None, out: Optional[Tensor] = None, -) -> Tuple[Tensor, Tensor]: ... +) -> Tuple[Tensor, Tensor]: + ... def cmdGenFunc_mha_varlen_fwd( @@ -550,7 +556,8 @@ def mha_varlen_fwd( cu_seqlens_q_padded: Optional[torch.Tensor] = None, cu_seqlens_k_padded: Optional[torch.Tensor] = None, sink_ptr: Optional[Tensor] = None, -) -> Tuple[Tensor, Tensor, Tensor, Tensor]: ... +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + ... def gen_fmha_v3_varlen_fwd_fake_tensor( @@ -583,7 +590,6 @@ def gen_fmha_v3_varlen_fwd_fake_tensor( cu_seqlens_q_padded: Optional[torch.Tensor] = None, cu_seqlens_k_padded: Optional[torch.Tensor] = None, ) -> Tuple[Tensor, Tensor, Tensor, Tensor]: - device = q.device dtype = q.dtype @@ -650,7 +656,8 @@ def fmha_v3_varlen_fwd( gen: Optional[torch.Generator] = None, cu_seqlens_q_padded: Optional[torch.Tensor] = None, cu_seqlens_k_padded: Optional[torch.Tensor] = None, -) -> Tuple[Tensor, Tensor, Tensor, Tensor]: ... +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + ... def cmdGenFunc_mha_bwd( @@ -861,7 +868,8 @@ def mha_bwd( gen: Optional[Generator] = None, sink: Optional[Tensor] = None, d_sink: Optional[Tensor] = None, -) -> Tuple[Tensor, Tensor, Tensor, Tensor]: ... +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + ... def gen_fmha_v3_bwd_fake_tensors( @@ -913,7 +921,8 @@ def fmha_v3_bwd( alibi_slopes: Optional[Tensor] = None, rng_state: Optional[Tensor] = None, gen: Optional[Generator] = None, -) -> Tuple[Tensor, Tensor, Tensor, Tensor]: ... +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + ... def cmdGenFunc_mha_varlen_bwd( @@ -1226,7 +1235,8 @@ def mha_varlen_bwd( cu_seqlens_k_padded: Optional[Tensor] = None, sink: Optional[Tensor] = None, d_sink: Optional[Tensor] = None, -) -> Tuple[Tensor, Tensor, Tensor, Tensor]: ... +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + ... def gen_fmha_v3_varlen_bwd_fake_tensor( @@ -1297,7 +1307,8 @@ def fmha_v3_varlen_bwd( gen: Optional[Generator] = None, cu_seqlens_q_padded: Optional[Tensor] = None, cu_seqlens_k_padded: Optional[Tensor] = None, -) -> Tuple[Tensor, Tensor, Tensor, Tensor]: ... +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + ... def maybe_contiguous(x): @@ -1327,7 +1338,6 @@ def _flash_attn_forward( sink_ptr: Optional[Tensor] = None, out: Optional[torch.Tensor] = None, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: - batch_size, seqlen_q, nhead_q, hdim_q = q.shape _, seqlen_k, nhead_k, hdim_v = v.shape if sink_ptr is not None: @@ -1380,7 +1390,7 @@ def can_impl_fmha_fwd_f16(): # gfx1250 ASM bf16 forward (fmha_fwd_f16_asm). Single-shot batched # (no varlen / dropout / swa / quant / alibi / bias). Sink logits # (per-Q-head fp32) supported; sink-token (sink_size) not supported. - ret = (get_gfx() == "gfx1250") + ret = get_gfx() == "gfx1250" ret = ret and (q.dtype == dtypes.bf16) ret = ret and (hdim_q in (64, 128)) ret = ret and (hdim_v == hdim_q) @@ -1422,9 +1432,14 @@ def _validate_cu(name: str, x: Optional[torch.Tensor]): # who don't care about sink still hit this fast path. sink_for_kernel = torch.zeros(nhead_q, dtype=torch.float32, device=q.device) _r = fmha_fwd_f16_asm( - q, k, v, - float(softmax_scale), bool(causal), True, - sink_for_kernel, out, + q, + k, + v, + float(softmax_scale), + bool(causal), + True, + sink_for_kernel, + out, ) out_ = _r[0] softmax_lse = _r[1] @@ -1497,7 +1512,6 @@ def can_impl_fmha_v3_bwd( deterministic: bool, is_v3_atomic_fp32: Optional[bool] = True, ) -> bool: - _, seqlen_q, nhead_q, hdim_q = q.shape _, seqlen_k, nhead_k, hdim_v = v.shape batch_stride_q = q.stride(0) @@ -2147,7 +2161,6 @@ def _flash_attn_varlen_forward( zero_tensors: bool = False, sink_ptr: Optional[Tensor] = None, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: - _, nhead_q, hdim_q = q.shape batch_size = cu_seqlens_q.numel() - 1 @@ -2317,7 +2330,6 @@ def _flash_attn_varlen_backward( sink: Optional[Tensor] = None, d_sink: Optional[Tensor] = None, ) -> torch.Tensor: - _, nhead_q, hdim_q = q.shape nhead_k = v.shape[-2] @@ -2556,7 +2568,6 @@ def forward( sink_ptr=sink_ptr, ) if is_grad: - assert return_lse ctx.save_for_backward( q, k, v, out_padded, softmax_lse, cu_seqlens_q, cu_seqlens_k, rng_state @@ -2953,7 +2964,8 @@ def mha_batch_prefill( seqlen_k: Optional[Tensor] = None, sink_ptr: Optional[Tensor] = None, gen: Optional[Generator] = None, -) -> Tuple[Tensor, Tensor, Tensor, Tensor]: ... +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + ... def _mha_batch_prefill( @@ -2989,7 +3001,6 @@ def _mha_batch_prefill( ] = None, # [num_block, num_kv_head, 2] per-page K/V descales sink_ptr: Optional[Tensor] = None, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: - q, k, v = [maybe_contiguous(x) for x in (q, k, v)] out, softmax_lse, S_dmask, rng_state = mha_batch_prefill( q, diff --git a/op_tests/test_fmha_fwd_f16_asm.py b/op_tests/test_fmha_fwd_f16_asm.py index 3a17a93f3b..14767058f7 100644 --- a/op_tests/test_fmha_fwd_f16_asm.py +++ b/op_tests/test_fmha_fwd_f16_asm.py @@ -48,7 +48,9 @@ import aiter from aiter.test_common import checkAllclose, run_perftest -from aiter.test_mha_common import attention_ref # noqa: F401 (kept for easy swap-back; see doc-block below) +from aiter.test_mha_common import ( + attention_ref, +) # noqa: F401 (kept for easy swap-back; see doc-block below) # --------------------------------------------------------------------------- @@ -96,8 +98,9 @@ def _ref_attn(q, k, v, *, is_causal: bool, sink: "Optional[torch.Tensor]" = None scale = 1.0 / math.sqrt(d) attn = torch.einsum("bshd,bkhd->bhsk", qf, kf) if is_causal: - m = torch.triu(torch.ones(sq, sk, dtype=torch.bool, device=q.device), - sk - sq + 1) + m = torch.triu( + torch.ones(sq, sk, dtype=torch.bool, device=q.device), sk - sq + 1 + ) attn = attn.masked_fill(m, float("-inf")) max_attn, _ = attn.max(dim=-1) if sink is not None: @@ -112,15 +115,14 @@ def _ref_attn(q, k, v, *, is_causal: bool, sink: "Optional[torch.Tensor]" = None denom_total = denom_real + sink_term else: denom_total = denom_real - probs = torch.exp((attn - max_total.unsqueeze(-1)) * scale) / denom_total.unsqueeze(-1) + probs = torch.exp((attn - max_total.unsqueeze(-1)) * scale) / denom_total.unsqueeze( + -1 + ) out = torch.einsum("bhsk,bkhd->bshd", probs, vf).to(q.dtype) lse = torch.log(denom_total) + max_total * scale return out, lse - - - def _cmp(a: torch.Tensor, b: torch.Tensor, *, rtol=1e-2, atol=1e-2, msg: str = ""): """bf16-safe wrapper around checkAllclose. @@ -147,9 +149,9 @@ def _nrms(actual: torch.Tensor, expected: torch.Tensor) -> float: a32 = actual.detach().float().cpu() b32 = expected.detach().float().cpu() abs_diff = (a32 - b32).abs() - eps = 1e-7 + eps = 1e-7 max_item = max(a32.abs().max().item(), b32.abs().max().item(), eps) - sq_diff = (abs_diff / b32.abs().clamp(min=eps)).pow(2) + sq_diff = (abs_diff / b32.abs().clamp(min=eps)).pow(2) return (sq_diff.sum().sqrt() / (math.sqrt(b32.numel()) * max_item)).item() @@ -164,22 +166,31 @@ def _bench(fn, *args, num_iters: int = 10, num_warmup: int = 2, **kwargs) -> flo fn(*args, **kwargs) torch.cuda.synchronize() start = torch.cuda.Event(enable_timing=True) - end = torch.cuda.Event(enable_timing=True) + end = torch.cuda.Event(enable_timing=True) start.record() for _ in range(num_iters): fn(*args, **kwargs) end.record() end.synchronize() - return start.elapsed_time(end) * 1000.0 / num_iters # ms->us, per-iter - + return start.elapsed_time(end) * 1000.0 / num_iters # ms->us, per-iter # --------------------------------------------------------------------------- # Layout helpers # --------------------------------------------------------------------------- -def make_qkv_bshd(layout: int, sq: int, sk: int, batch: int, hq: int, hk: int, d: int, - dtype=torch.bfloat16, device: str = "cuda"): + +def make_qkv_bshd( + layout: int, + sq: int, + sk: int, + batch: int, + hq: int, + hk: int, + d: int, + dtype=torch.bfloat16, + device: str = "cuda", +): """Allocate (q, k, v) in `layout` memory, return **bshd-shaped views**. The API only accepts bshd shape ([b, s, h, d]). But the kernel reads @@ -193,18 +204,30 @@ def make_qkv_bshd(layout: int, sq: int, sk: int, batch: int, hq: int, hk: int, d 1 = bhsd → underlying [b,h,s,d], permute(0,2,1,3) → bshd view 2 = sbhd → underlying [s,b,h,d], permute(1,0,2,3) → bshd view """ - if layout == 0: # bshd allocation, naturally contiguous + if layout == 0: # bshd allocation, naturally contiguous q = torch.randn(batch, sq, hq, d, dtype=dtype, device=device) k = torch.randn(batch, sk, hk, d, dtype=dtype, device=device) v = torch.randn(batch, sk, hk, d, dtype=dtype, device=device) elif layout == 1: # bhsd allocation, view as bshd - q = torch.randn(batch, hq, sq, d, dtype=dtype, device=device).permute(0, 2, 1, 3) - k = torch.randn(batch, hk, sk, d, dtype=dtype, device=device).permute(0, 2, 1, 3) - v = torch.randn(batch, hk, sk, d, dtype=dtype, device=device).permute(0, 2, 1, 3) + q = torch.randn(batch, hq, sq, d, dtype=dtype, device=device).permute( + 0, 2, 1, 3 + ) + k = torch.randn(batch, hk, sk, d, dtype=dtype, device=device).permute( + 0, 2, 1, 3 + ) + v = torch.randn(batch, hk, sk, d, dtype=dtype, device=device).permute( + 0, 2, 1, 3 + ) elif layout == 2: # sbhd allocation, view as bshd - q = torch.randn(sq, batch, hq, d, dtype=dtype, device=device).permute(1, 0, 2, 3) - k = torch.randn(sk, batch, hk, d, dtype=dtype, device=device).permute(1, 0, 2, 3) - v = torch.randn(sk, batch, hk, d, dtype=dtype, device=device).permute(1, 0, 2, 3) + q = torch.randn(sq, batch, hq, d, dtype=dtype, device=device).permute( + 1, 0, 2, 3 + ) + k = torch.randn(sk, batch, hk, d, dtype=dtype, device=device).permute( + 1, 0, 2, 3 + ) + v = torch.randn(sk, batch, hk, d, dtype=dtype, device=device).permute( + 1, 0, 2, 3 + ) else: raise ValueError(f"unsupported layout={layout}") return q, k, v @@ -222,9 +245,17 @@ def _d64_sink(hq: int, device: str) -> torch.Tensor: # Kernel / reference helpers (mxfp8-style: one-line wrappers used by tests). # --------------------------------------------------------------------------- -def run_kernel(q, k, v, *, scale: float, is_causal: bool, - sink: Optional[torch.Tensor] = None, - via: str = "ops"): + +def run_kernel( + q, + k, + v, + *, + scale: float, + is_causal: bool, + sink: Optional[torch.Tensor] = None, + via: str = "ops", +): """Call the kernel and return (out, lse). via = "ops" → low-level aiter.fmha_fwd_f16_asm @@ -232,13 +263,23 @@ def run_kernel(q, k, v, *, scale: float, is_causal: bool, """ if via == "ops": return aiter.fmha_fwd_f16_asm( - q, k, v, scale, is_causal, True, sink=sink, + q, + k, + v, + scale, + is_causal, + True, + sink=sink, ) if via == "public": r = aiter.flash_attn_func( - q, k, v, - softmax_scale=scale, causal=is_causal, - return_lse=True, sink_ptr=sink, + q, + k, + v, + softmax_scale=scale, + causal=is_causal, + return_lse=True, + sink_ptr=sink, ) return r[0], r[1] raise ValueError(f"unknown via={via!r}") @@ -256,6 +297,7 @@ def run_ref(q, k, v, *, is_causal: bool, sink: Optional[torch.Tensor] = None): # Correctness tests (sbhd input → bshd output, compare against bhsd reference) # --------------------------------------------------------------------------- + @pytest.mark.parametrize("batch", [1, 2]) @pytest.mark.parametrize("head_dim", [64, 128]) @pytest.mark.parametrize("is_causal", [False, True]) @@ -264,9 +306,9 @@ def run_ref(q, k, v, *, is_causal: bool, sink: Optional[torch.Tensor] = None): [ # Shapes from run.sh aligned tests: kv_head_num=4, gqa=16 # → q_head_num = 4 * 16 = 64 - (8, 1, 128, 2048), # aligned (test_d64 / test_d128) - (8, 1, 130, 2048), # q unaligned: sq not mult of 128 - (8, 1, 128, 2300), # kv unaligned: sk not mult of 256 + (8, 1, 128, 2048), # aligned (test_d64 / test_d128) + (8, 1, 130, 2048), # q unaligned: sq not mult of 128 + (8, 1, 128, 2300), # kv unaligned: sk not mult of 256 ], ) def test_fmha_fwd_f16_correctness(batch, hq, hk, sq, sk, head_dim, is_causal): @@ -275,9 +317,17 @@ def test_fmha_fwd_f16_correctness(batch, hq, hk, sq, sk, head_dim, is_causal): # Allocate in sbhd memory but return bshd-shaped views (kernel reads # strides directly so non-contiguous bshd views work). - q, k, v = make_qkv_bshd(layout=2, sq=sq, sk=sk, batch=batch, - hq=hq, hk=hk, d=head_dim, - dtype=torch.bfloat16, device=device) + q, k, v = make_qkv_bshd( + layout=2, + sq=sq, + sk=sk, + batch=batch, + hq=hq, + hk=hk, + d=head_dim, + dtype=torch.bfloat16, + device=device, + ) scale = 1.0 / math.sqrt(head_dim) # D64 -> non-zero sink (exercises ENABLE_SINK code path) @@ -285,18 +335,36 @@ def test_fmha_fwd_f16_correctness(batch, hq, hk, sq, sk, head_dim, is_causal): sink = _d64_sink(hq, device) if head_dim == 64 else None out_kernel, lse_asm = run_kernel( - q, k, v, scale=scale, is_causal=is_causal, sink=sink, via="public", + q, + k, + v, + scale=scale, + is_causal=is_causal, + sink=sink, + via="public", ) out_ref, lse_ref = run_ref(q, k, v, is_causal=is_causal, sink=sink) nrms_o = _nrms(out_kernel, out_ref) - print(f"[corr d={head_dim} causal={is_causal} b={batch} sq={sq} sk={sk}] " - f"nrms(out)={nrms_o:.3e}") + print( + f"[corr d={head_dim} causal={is_causal} b={batch} sq={sq} sk={sk}] " + f"nrms(out)={nrms_o:.3e}" + ) - _cmp(out_kernel, out_ref, rtol=1e-2, atol=1e-2, - msg=f"out mismatch (d={head_dim}, causal={is_causal}, b={batch})") - _cmp(lse_asm, lse_ref, rtol=1e-2, atol=1e-2, - msg=f"lse mismatch (d={head_dim}, causal={is_causal}, b={batch})") + _cmp( + out_kernel, + out_ref, + rtol=1e-2, + atol=1e-2, + msg=f"out mismatch (d={head_dim}, causal={is_causal}, b={batch})", + ) + _cmp( + lse_asm, + lse_ref, + rtol=1e-2, + atol=1e-2, + msg=f"lse mismatch (d={head_dim}, causal={is_causal}, b={batch})", + ) def test_fmha_fwd_f16_ops_layer(): @@ -305,14 +373,28 @@ def test_fmha_fwd_f16_ops_layer(): torch.manual_seed(0) sq, batch, hq, hk, sk, d = 128, 1, 8, 2, 2048, 64 - q, k, v = make_qkv_bshd(layout=2, sq=sq, sk=sk, batch=batch, - hq=hq, hk=hk, d=d, - dtype=torch.bfloat16, device=device) + q, k, v = make_qkv_bshd( + layout=2, + sq=sq, + sk=sk, + batch=batch, + hq=hq, + hk=hk, + d=d, + dtype=torch.bfloat16, + device=device, + ) scale = 1.0 / math.sqrt(d) - sink = _d64_sink(hq, device) + sink = _d64_sink(hq, device) out_kernel, lse_asm = run_kernel( - q, k, v, scale=scale, is_causal=False, sink=sink, via="ops", + q, + k, + v, + scale=scale, + is_causal=False, + sink=sink, + via="ops", ) out_ref, lse_ref = run_ref(q, k, v, is_causal=False, sink=sink) @@ -328,9 +410,17 @@ def test_fmha_fwd_f16_d64_requires_sink(): API — we exercise it via the lower-level ops stub. """ device = "cuda" - q, k, v = make_qkv_bshd(layout=0, sq=128, sk=2048, batch=1, - hq=4, hk=4, d=64, - dtype=torch.bfloat16, device=device) + q, k, v = make_qkv_bshd( + layout=0, + sq=128, + sk=2048, + batch=1, + hq=4, + hk=4, + d=64, + dtype=torch.bfloat16, + device=device, + ) scale = 1.0 / math.sqrt(64) with pytest.raises(RuntimeError, match="D64.*sink"): aiter.fmha_fwd_f16_asm(q, k, v, scale, False, True, sink=None) @@ -342,6 +432,7 @@ def test_fmha_fwd_f16_d64_requires_sink(): # also produce correct results. 3 layouts x 2 head_dim = 6 cases. # --------------------------------------------------------------------------- + @pytest.mark.parametrize("head_dim", [64, 128]) @pytest.mark.parametrize("layout", [0, 1, 2]) def test_fmha_fwd_f16_layout(layout, head_dim): @@ -349,21 +440,45 @@ def test_fmha_fwd_f16_layout(layout, head_dim): torch.manual_seed(0) batch, hq, hk, sq, sk = 1, 8, 1, 128, 2048 - q, k, v = make_qkv_bshd(layout=layout, sq=sq, sk=sk, batch=batch, - hq=hq, hk=hk, d=head_dim, - dtype=torch.bfloat16, device=device) + q, k, v = make_qkv_bshd( + layout=layout, + sq=sq, + sk=sk, + batch=batch, + hq=hq, + hk=hk, + d=head_dim, + dtype=torch.bfloat16, + device=device, + ) scale = 1.0 / math.sqrt(head_dim) - sink = _d64_sink(hq, device) if head_dim == 64 else None + sink = _d64_sink(hq, device) if head_dim == 64 else None out_kernel, lse_asm = run_kernel( - q, k, v, scale=scale, is_causal=False, sink=sink, via="public", + q, + k, + v, + scale=scale, + is_causal=False, + sink=sink, + via="public", ) out_ref, lse_ref = run_ref(q, k, v, is_causal=False, sink=sink) - _cmp(out_kernel, out_ref, rtol=1e-2, atol=1e-2, - msg=f"out mismatch (layout={layout}, d={head_dim})") - _cmp(lse_asm, lse_ref, rtol=1e-2, atol=1e-2, - msg=f"lse mismatch (layout={layout}, d={head_dim})") + _cmp( + out_kernel, + out_ref, + rtol=1e-2, + atol=1e-2, + msg=f"out mismatch (layout={layout}, d={head_dim})", + ) + _cmp( + lse_asm, + lse_ref, + rtol=1e-2, + atol=1e-2, + msg=f"lse mismatch (layout={layout}, d={head_dim})", + ) # --------------------------------------------------------------------------- @@ -372,16 +487,20 @@ def test_fmha_fwd_f16_layout(layout, head_dim): # matches a direct ops-layer call bit-for-bit (same kernel, same args). # --------------------------------------------------------------------------- + def _is_gfx1250() -> bool: try: from aiter.jit.utils.chip_info import get_gfx + return get_gfx() == "gfx1250" except Exception: return False -@pytest.mark.skipif(not _is_gfx1250(), - reason="flash_attn_func dispatch to fmha_fwd_f16_asm only on gfx1250") +@pytest.mark.skipif( + not _is_gfx1250(), + reason="flash_attn_func dispatch to fmha_fwd_f16_asm only on gfx1250", +) @pytest.mark.parametrize("head_dim", [64, 128]) @pytest.mark.parametrize("is_causal", [False, True]) def test_fmha_fwd_f16_via_flash_attn_func(head_dim, is_causal): @@ -390,17 +509,37 @@ def test_fmha_fwd_f16_via_flash_attn_func(head_dim, is_causal): batch, hq, hk, sq, sk = 1, 8, 1, 128, 2048 # bshd input (flash_attn_func contract); contiguous. - q, k, v = make_qkv_bshd(layout=0, sq=sq, sk=sk, batch=batch, - hq=hq, hk=hk, d=head_dim, - dtype=torch.bfloat16, device=device) + q, k, v = make_qkv_bshd( + layout=0, + sq=sq, + sk=sk, + batch=batch, + hq=hq, + hk=hk, + d=head_dim, + dtype=torch.bfloat16, + device=device, + ) scale = 1.0 / math.sqrt(head_dim) - sink = _d64_sink(hq, device) if head_dim == 64 else None + sink = _d64_sink(hq, device) if head_dim == 64 else None out_direct, lse_direct = run_kernel( - q, k, v, scale=scale, is_causal=is_causal, sink=sink, via="ops", + q, + k, + v, + scale=scale, + is_causal=is_causal, + sink=sink, + via="ops", ) out_via, lse_via = run_kernel( - q, k, v, scale=scale, is_causal=is_causal, sink=sink, via="public", + q, + k, + v, + scale=scale, + is_causal=is_causal, + sink=sink, + via="public", ) # Same kernel, same args -> bit-identical (cast to fp32 to avoid bf16 @@ -421,6 +560,7 @@ def test_fmha_fwd_f16_via_flash_attn_func(head_dim, is_causal): # Performance tests # --------------------------------------------------------------------------- + @pytest.mark.parametrize("head_dim", [64, 128]) @pytest.mark.parametrize("is_causal", [False, True]) def test_fmha_fwd_f16_perf(head_dim, is_causal): @@ -429,18 +569,31 @@ def test_fmha_fwd_f16_perf(head_dim, is_causal): # perf_d64 / perf_d128 in run.sh: batch=2 kv_head_num=8 gqa=8 -> hq=64 sq, batch, hq, hk, sk = 8192, 2, 64, 8, 8192 - q, k, v = make_qkv_bshd(layout=2, sq=sq, sk=sk, batch=batch, - hq=hq, hk=hk, d=head_dim, - dtype=torch.bfloat16, device=device) + q, k, v = make_qkv_bshd( + layout=2, + sq=sq, + sk=sk, + batch=batch, + hq=hq, + hk=hk, + d=head_dim, + dtype=torch.bfloat16, + device=device, + ) scale = 1.0 / math.sqrt(head_dim) - sink = _d64_sink(hq, device) if head_dim == 64 else None + sink = _d64_sink(hq, device) if head_dim == 64 else None us = _bench( aiter.fmha_fwd_f16_asm, - q, k, v, - scale, is_causal, False, + q, + k, + v, + scale, + is_causal, + False, sink=sink, - num_iters=10, num_warmup=2, + num_iters=10, + num_warmup=2, ) flops = 2.0 * batch * hq * sq * sk * (2 * head_dim) if is_causal: @@ -455,8 +608,7 @@ def test_fmha_fwd_f16_perf(head_dim, is_causal): f"(run with -s to see live numbers)" ) assert math.isfinite(tflops) and 0 < tflops < 5000, ( - f"TFLOPS={tflops} not finite / out of plausible range; " - f"likely broken timing" + f"TFLOPS={tflops} not finite / out of plausible range; " f"likely broken timing" ) @@ -464,9 +616,20 @@ def test_fmha_fwd_f16_perf(head_dim, is_causal): # CLI single-shape runner: shared by `__main__` invocation and ad-hoc usage. # --------------------------------------------------------------------------- -def run_cli(*, batch: int, hq: int, hk: int, sq: int, sk: int, head_dim: int, - causal: bool = False, layout: int = 0, - do_ref: bool = False, do_perf: bool = False) -> int: + +def run_cli( + *, + batch: int, + hq: int, + hk: int, + sq: int, + sk: int, + head_dim: int, + causal: bool = False, + layout: int = 0, + do_ref: bool = False, + do_perf: bool = False, +) -> int: """Single-shape runner. Returns 0 on success, 1 if --ref check fails. Prints a one-line summary @@ -476,48 +639,75 @@ def run_cli(*, batch: int, hq: int, hk: int, sq: int, sk: int, head_dim: int, torch.manual_seed(0) assert hq % hk == 0, "q_head_num must be a multiple of kv_head_num" - print(f"Shape: b={batch} hq={hq} hk={hk} sq={sq} sk={sk} d={head_dim} " - f"causal={causal} layout={layout}", flush=True) + print( + f"Shape: b={batch} hq={hq} hk={hk} sq={sq} sk={sk} d={head_dim} " + f"causal={causal} layout={layout}", + flush=True, + ) - q, k, v = make_qkv_bshd(layout=layout, sq=sq, sk=sk, batch=batch, - hq=hq, hk=hk, d=head_dim, - dtype=torch.bfloat16, device=device) + q, k, v = make_qkv_bshd( + layout=layout, + sq=sq, + sk=sk, + batch=batch, + hq=hq, + hk=hk, + d=head_dim, + dtype=torch.bfloat16, + device=device, + ) scale = 1.0 / math.sqrt(head_dim) - sink = _d64_sink(hq, device) if head_dim == 64 else None + sink = _d64_sink(hq, device) if head_dim == 64 else None torch.cuda.synchronize() t0 = _t.time() out_kernel, lse_asm = run_kernel( - q, k, v, scale=scale, is_causal=causal, sink=sink, via="ops", + q, + k, + v, + scale=scale, + is_causal=causal, + sink=sink, + via="ops", ) torch.cuda.synchronize() print(f"asm time: {(_t.time()-t0)*1000:.2f} ms", flush=True) - print(f"out.shape={tuple(out_kernel.shape)} lse.shape={tuple(lse_asm.shape)}", - flush=True) + print( + f"out.shape={tuple(out_kernel.shape)} lse.shape={tuple(lse_asm.shape)}", + flush=True, + ) rc = 0 if do_ref: out_ref, lse_ref = run_ref(q, k, v, is_causal=causal, sink=sink) - diff_o = (out_kernel.float() - out_ref.float()).abs().max().item() - diff_l = (lse_asm.float() - lse_ref.float()).abs().max().item() - nrms_o = _nrms(out_kernel, out_ref) + diff_o = (out_kernel.float() - out_ref.float()).abs().max().item() + diff_l = (lse_asm.float() - lse_ref.float()).abs().max().item() + nrms_o = _nrms(out_kernel, out_ref) # Pass criterion (bf16 attention conventional thresholds): # |dO| <= 2e-2 |dLSE| <= 2e-2 ok_o = diff_o <= 2e-2 ok_l = diff_l <= 2e-2 - print(f"ref: max|dO|={diff_o:.4f} {'OK' if ok_o else 'FAIL'} " - f"max|dLSE|={diff_l:.4f} {'OK' if ok_l else 'FAIL'} " - f"nrms(O)={nrms_o:.3e}", - flush=True) + print( + f"ref: max|dO|={diff_o:.4f} {'OK' if ok_o else 'FAIL'} " + f"max|dLSE|={diff_l:.4f} {'OK' if ok_l else 'FAIL'} " + f"nrms(O)={nrms_o:.3e}", + flush=True, + ) if not (ok_o and ok_l): rc = 1 if do_perf: us = _bench( aiter.fmha_fwd_f16_asm, - q, k, v, scale, causal, False, + q, + k, + v, + scale, + causal, + False, sink=sink, - num_iters=10, num_warmup=2, + num_iters=10, + num_warmup=2, ) flops = 2.0 * batch * hq * sq * sk * (2 * head_dim) if causal: @@ -527,8 +717,9 @@ def run_cli(*, batch: int, hq: int, hk: int, sq: int, sk: int, head_dim: int, # CLI surfaces the same breakage pytest would: us=0 / TFLOPS=inf # signals broken timing infra (profiler / ROCTracer event drop). if not (us > 0.0 and math.isfinite(tflops) and 0 < tflops < 5000): - print(f"perf: WARNING — bogus timing (us={us}, tflops={tflops})", - flush=True) + print( + f"perf: WARNING — bogus timing (us={us}, tflops={tflops})", flush=True + ) rc = 1 return rc @@ -538,28 +729,52 @@ def run_cli(*, batch: int, hq: int, hk: int, sq: int, sk: int, head_dim: int, formatter_class=argparse.RawTextHelpFormatter, description="Run aiter.fmha_fwd_f16_asm on a single shape and dump kernel args.", ) -parser.add_argument("-b", "--batch", type=int, default=1, - help="batch size (default 1)") -parser.add_argument("-n", "--q_head_num", type=int, default=8, - help="q_head_num (default 8)") -parser.add_argument("-kn", "--kv_head_num", type=int, default=1, - help="kv_head_num (default 1, must divide q_head_num)") -parser.add_argument("-q", "--seqlen_q", type=int, default=128, - help="q seq length (default 128)") -parser.add_argument("-k", "--seqlen_k", type=int, default=2048, - help="kv seq length (default 2048)") -parser.add_argument("-d", "--head_dim", type=int, choices=[64, 128], default=128, - help="head dim, 64 or 128 (default 128)") -parser.add_argument("-c", "--causal", action="store_true", - help="enable causal mask") -parser.add_argument("-l", "--layout", type=int, choices=[0, 1, 2], default=0, - help="input memory layout: 0=bshd 1=bhsd 2=sbhd (default 0)\n" - "(API always sees bshd shape; non-zero layout returns a\n" - "non-contiguous bshd view of the underlying memory)") -parser.add_argument("--ref", action="store_true", - help="also run PyTorch reference and print max diff + nrms") -parser.add_argument("--perf", action="store_true", - help="run perf benchmark for this shape (10 iters, 2 warmup)") +parser.add_argument("-b", "--batch", type=int, default=1, help="batch size (default 1)") +parser.add_argument( + "-n", "--q_head_num", type=int, default=8, help="q_head_num (default 8)" +) +parser.add_argument( + "-kn", + "--kv_head_num", + type=int, + default=1, + help="kv_head_num (default 1, must divide q_head_num)", +) +parser.add_argument( + "-q", "--seqlen_q", type=int, default=128, help="q seq length (default 128)" +) +parser.add_argument( + "-k", "--seqlen_k", type=int, default=2048, help="kv seq length (default 2048)" +) +parser.add_argument( + "-d", + "--head_dim", + type=int, + choices=[64, 128], + default=128, + help="head dim, 64 or 128 (default 128)", +) +parser.add_argument("-c", "--causal", action="store_true", help="enable causal mask") +parser.add_argument( + "-l", + "--layout", + type=int, + choices=[0, 1, 2], + default=0, + help="input memory layout: 0=bshd 1=bhsd 2=sbhd (default 0)\n" + "(API always sees bshd shape; non-zero layout returns a\n" + "non-contiguous bshd view of the underlying memory)", +) +parser.add_argument( + "--ref", + action="store_true", + help="also run PyTorch reference and print max diff + nrms", +) +parser.add_argument( + "--perf", + action="store_true", + help="run perf benchmark for this shape (10 iters, 2 warmup)", +) if __name__ == "__main__": args = parser.parse_args() From ce1268ee1f1dc276b6cd2cba2635c1c1a2bb5be8 Mon Sep 17 00:00:00 2001 From: tingchen Date: Thu, 7 May 2026 18:33:57 +0800 Subject: [PATCH 05/43] reformat --- aiter/ops/mha.py | 31 +++++++++++-------------------- op_tests/test_fmha_fwd_f16_asm.py | 1 - 2 files changed, 11 insertions(+), 21 deletions(-) diff --git a/aiter/ops/mha.py b/aiter/ops/mha.py index 2e186cf5c4..130e95babb 100644 --- a/aiter/ops/mha.py +++ b/aiter/ops/mha.py @@ -217,8 +217,7 @@ def mha_fwd( v_descale: Optional[Tensor] = None, sink_ptr: Optional[Tensor] = None, gen: Optional[Generator] = None, -) -> Tuple[Tensor, Tensor, Tensor, Tensor]: - ... +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: ... def gen_fmha_v3_fwd_fake_tensors( @@ -268,8 +267,8 @@ def fmha_v3_fwd( k_descale: Optional[Tensor] = None, v_descale: Optional[Tensor] = None, gen: Optional[Generator] = None, -) -> Tuple[Tensor, Tensor, Tensor, Tensor]: - ... +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: ... + # --------------------------------------------------------------------------- @@ -322,8 +321,7 @@ def fmha_fwd_f16_asm( return_lse: bool, sink: Optional[Tensor] = None, out: Optional[Tensor] = None, -) -> Tuple[Tensor, Tensor]: - ... +) -> Tuple[Tensor, Tensor]: ... def cmdGenFunc_mha_varlen_fwd( @@ -556,8 +554,7 @@ def mha_varlen_fwd( cu_seqlens_q_padded: Optional[torch.Tensor] = None, cu_seqlens_k_padded: Optional[torch.Tensor] = None, sink_ptr: Optional[Tensor] = None, -) -> Tuple[Tensor, Tensor, Tensor, Tensor]: - ... +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: ... def gen_fmha_v3_varlen_fwd_fake_tensor( @@ -656,8 +653,7 @@ def fmha_v3_varlen_fwd( gen: Optional[torch.Generator] = None, cu_seqlens_q_padded: Optional[torch.Tensor] = None, cu_seqlens_k_padded: Optional[torch.Tensor] = None, -) -> Tuple[Tensor, Tensor, Tensor, Tensor]: - ... +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: ... def cmdGenFunc_mha_bwd( @@ -868,8 +864,7 @@ def mha_bwd( gen: Optional[Generator] = None, sink: Optional[Tensor] = None, d_sink: Optional[Tensor] = None, -) -> Tuple[Tensor, Tensor, Tensor, Tensor]: - ... +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: ... def gen_fmha_v3_bwd_fake_tensors( @@ -921,8 +916,7 @@ def fmha_v3_bwd( alibi_slopes: Optional[Tensor] = None, rng_state: Optional[Tensor] = None, gen: Optional[Generator] = None, -) -> Tuple[Tensor, Tensor, Tensor, Tensor]: - ... +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: ... def cmdGenFunc_mha_varlen_bwd( @@ -1235,8 +1229,7 @@ def mha_varlen_bwd( cu_seqlens_k_padded: Optional[Tensor] = None, sink: Optional[Tensor] = None, d_sink: Optional[Tensor] = None, -) -> Tuple[Tensor, Tensor, Tensor, Tensor]: - ... +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: ... def gen_fmha_v3_varlen_bwd_fake_tensor( @@ -1307,8 +1300,7 @@ def fmha_v3_varlen_bwd( gen: Optional[Generator] = None, cu_seqlens_q_padded: Optional[Tensor] = None, cu_seqlens_k_padded: Optional[Tensor] = None, -) -> Tuple[Tensor, Tensor, Tensor, Tensor]: - ... +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: ... def maybe_contiguous(x): @@ -2964,8 +2956,7 @@ def mha_batch_prefill( seqlen_k: Optional[Tensor] = None, sink_ptr: Optional[Tensor] = None, gen: Optional[Generator] = None, -) -> Tuple[Tensor, Tensor, Tensor, Tensor]: - ... +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: ... def _mha_batch_prefill( diff --git a/op_tests/test_fmha_fwd_f16_asm.py b/op_tests/test_fmha_fwd_f16_asm.py index 14767058f7..26f697f4f3 100644 --- a/op_tests/test_fmha_fwd_f16_asm.py +++ b/op_tests/test_fmha_fwd_f16_asm.py @@ -52,7 +52,6 @@ attention_ref, ) # noqa: F401 (kept for easy swap-back; see doc-block below) - # --------------------------------------------------------------------------- # Reference implementation. Inputs accepted as bshd (matches kernel API); # output `out` is bshd, `lse` is [b, hq, sq] (matches kernel layout). From b9af1ac99959c7b829965bfe5b0eed5562c380eb Mon Sep 17 00:00:00 2001 From: tingchen Date: Thu, 7 May 2026 18:40:22 +0800 Subject: [PATCH 06/43] reformat --- aiter/ops/mha.py | 1 - 1 file changed, 1 deletion(-) diff --git a/aiter/ops/mha.py b/aiter/ops/mha.py index 130e95babb..65b05ecd5c 100644 --- a/aiter/ops/mha.py +++ b/aiter/ops/mha.py @@ -268,7 +268,6 @@ def fmha_v3_fwd( v_descale: Optional[Tensor] = None, gen: Optional[Generator] = None, ) -> Tuple[Tensor, Tensor, Tensor, Tensor]: ... - # --------------------------------------------------------------------------- From 3866ad1620af99eedfa40bbf0edb23a1caddbace Mon Sep 17 00:00:00 2001 From: tingchen Date: Thu, 7 May 2026 20:56:55 +0800 Subject: [PATCH 07/43] move module import to top of file --- op_tests/test_fmha_fwd_f16_asm.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/op_tests/test_fmha_fwd_f16_asm.py b/op_tests/test_fmha_fwd_f16_asm.py index 26f697f4f3..f1bae7e60a 100644 --- a/op_tests/test_fmha_fwd_f16_asm.py +++ b/op_tests/test_fmha_fwd_f16_asm.py @@ -41,17 +41,17 @@ import pytest import torch +import aiter +from aiter.test_common import checkAllclose, run_perftest +#from aiter.test_mha_common import ( +# attention_ref, +#) # noqa: F401 (kept for easy swap-back; see doc-block below) + pytestmark = pytest.mark.skipif( not torch.cuda.is_available(), reason="ROCm/HIP GPU not available", ) -import aiter -from aiter.test_common import checkAllclose, run_perftest -from aiter.test_mha_common import ( - attention_ref, -) # noqa: F401 (kept for easy swap-back; see doc-block below) - # --------------------------------------------------------------------------- # Reference implementation. Inputs accepted as bshd (matches kernel API); # output `out` is bshd, `lse` is [b, hq, sq] (matches kernel layout). From 9741a1b9a32ec273b2a784adb9ac480a61bc6e1a Mon Sep 17 00:00:00 2001 From: tingchen Date: Thu, 7 May 2026 21:14:46 +0800 Subject: [PATCH 08/43] reformat --- op_tests/test_fmha_fwd_f16_asm.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/op_tests/test_fmha_fwd_f16_asm.py b/op_tests/test_fmha_fwd_f16_asm.py index f1bae7e60a..4b12027e41 100644 --- a/op_tests/test_fmha_fwd_f16_asm.py +++ b/op_tests/test_fmha_fwd_f16_asm.py @@ -42,9 +42,11 @@ import torch import aiter + from aiter.test_common import checkAllclose, run_perftest #from aiter.test_mha_common import ( # attention_ref, +# #) # noqa: F401 (kept for easy swap-back; see doc-block below) pytestmark = pytest.mark.skipif( From 31f77ffe5b5d7c0e2bcb80939987c06d63c24274 Mon Sep 17 00:00:00 2001 From: tingchen Date: Thu, 7 May 2026 21:21:53 +0800 Subject: [PATCH 09/43] reformat --- op_tests/test_fmha_fwd_f16_asm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/op_tests/test_fmha_fwd_f16_asm.py b/op_tests/test_fmha_fwd_f16_asm.py index 4b12027e41..475ce80d06 100644 --- a/op_tests/test_fmha_fwd_f16_asm.py +++ b/op_tests/test_fmha_fwd_f16_asm.py @@ -44,10 +44,11 @@ import aiter from aiter.test_common import checkAllclose, run_perftest + #from aiter.test_mha_common import ( # attention_ref, # -#) # noqa: F401 (kept for easy swap-back; see doc-block below) +# ) # noqa: F401 (kept for easy swap-back; see doc-block below) pytestmark = pytest.mark.skipif( not torch.cuda.is_available(), From dd98467c4aeeeae582bcd6e7a3672fb7bb68d285 Mon Sep 17 00:00:00 2001 From: tingchen Date: Thu, 7 May 2026 21:27:09 +0800 Subject: [PATCH 10/43] reformat --- op_tests/test_fmha_fwd_f16_asm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/op_tests/test_fmha_fwd_f16_asm.py b/op_tests/test_fmha_fwd_f16_asm.py index 475ce80d06..3bf7afbc5e 100644 --- a/op_tests/test_fmha_fwd_f16_asm.py +++ b/op_tests/test_fmha_fwd_f16_asm.py @@ -45,7 +45,7 @@ from aiter.test_common import checkAllclose, run_perftest -#from aiter.test_mha_common import ( +# from aiter.test_mha_common import ( # attention_ref, # # ) # noqa: F401 (kept for easy swap-back; see doc-block below) From a5bcd1c4ef0fe70ff9756a5b527982505ab4abf5 Mon Sep 17 00:00:00 2001 From: tingchen Date: Thu, 7 May 2026 21:37:04 +0800 Subject: [PATCH 11/43] remove unused import --- op_tests/test_fmha_fwd_f16_asm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/op_tests/test_fmha_fwd_f16_asm.py b/op_tests/test_fmha_fwd_f16_asm.py index 3bf7afbc5e..c05a6b4afc 100644 --- a/op_tests/test_fmha_fwd_f16_asm.py +++ b/op_tests/test_fmha_fwd_f16_asm.py @@ -43,7 +43,7 @@ import aiter -from aiter.test_common import checkAllclose, run_perftest +from aiter.test_common import checkAllclose # from aiter.test_mha_common import ( # attention_ref, From 82e9a293f915587b64d3fd12d1c6b34b27f7f590 Mon Sep 17 00:00:00 2001 From: tingchen Date: Thu, 7 May 2026 21:44:49 +0800 Subject: [PATCH 12/43] sync 3rdparty/composable_kernel submodule pin with main (fdf4bb7f) Co-authored-by: Cursor --- 3rdparty/composable_kernel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/composable_kernel b/3rdparty/composable_kernel index 6b1d184e66..fdf4bb7fcc 160000 --- a/3rdparty/composable_kernel +++ b/3rdparty/composable_kernel @@ -1 +1 @@ -Subproject commit 6b1d184e66e143a6fa0c1b7d049c373a082ecc4c +Subproject commit fdf4bb7fcc984811cef48ce817d89aac064b984a From db4b417b11c2152c44a746f3822b361bf1e1529f Mon Sep 17 00:00:00 2001 From: tingchen Date: Fri, 8 May 2026 17:44:15 +0800 Subject: [PATCH 13/43] fmha_fwd_f16: gfx1250-only check --- aiter/jit/utils/build_targets.py | 2 +- csrc/py_itfs_cu/asm_fmha_fwd_f16.cu | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/aiter/jit/utils/build_targets.py b/aiter/jit/utils/build_targets.py index c20ace6290..5fa0f5d53c 100644 --- a/aiter/jit/utils/build_targets.py +++ b/aiter/jit/utils/build_targets.py @@ -39,7 +39,7 @@ GFX_CU_NUM_MAP = { "gfx942": 304, # MI300X (SPX, full GPU); MI308X shares gfx942 — use CU_NUM override "gfx950": 256, # MI350 - "gfx1250": 256, # MI450 + "gfx1250": 256, } diff --git a/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu b/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu index 698ede97dd..b86530f0dd 100644 --- a/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu +++ b/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu @@ -1,7 +1,7 @@ // SPDX-License-Identifier: MIT // Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. // -// ASM FMHA forward (BF16, gfx1250 / MI4xx) — ported from poc_kl/mi400/fmha_fwd_f16. +// ASM FMHA forward (BF16, gfx1250) — ported from poc_kl fmha_fwd_f16. // // Layout: q/k/v expected in **bshd shape** ([batch, seq, head, dim]). The // kernel reads per-dim strides directly from the input tensor, so callers may @@ -293,6 +293,9 @@ std::vector fmha_fwd_f16(at::Tensor& q, // when sequences are aligned), so there's no runtime branch on alignment. const std::string dtype = "bf16"; const std::string arch_id = get_gpu_arch(); + if(arch_id != "gfx1250"){ + AITER_CHECK(false, __func__, ": fmha_fwd_f16 is only supported on gfx1250"); + } CFG* cfg_map = &cfg_fmha_fwd_f16; static SynchronizedCache impl_ptr_map; From b684c053c995690bc256d422bf179cdb1ee89738 Mon Sep 17 00:00:00 2001 From: tingchen Date: Sat, 9 May 2026 11:38:33 +0800 Subject: [PATCH 14/43] add arch id guard --- op_tests/test_fmha_fwd_f16_asm.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/op_tests/test_fmha_fwd_f16_asm.py b/op_tests/test_fmha_fwd_f16_asm.py index c05a6b4afc..5912861243 100644 --- a/op_tests/test_fmha_fwd_f16_asm.py +++ b/op_tests/test_fmha_fwd_f16_asm.py @@ -44,6 +44,7 @@ import aiter from aiter.test_common import checkAllclose +from aiter.jit.utils.chip_info import get_gfx # from aiter.test_mha_common import ( # attention_ref, @@ -314,6 +315,8 @@ def run_ref(q, k, v, *, is_causal: bool, sink: Optional[torch.Tensor] = None): ], ) def test_fmha_fwd_f16_correctness(batch, hq, hk, sq, sk, head_dim, is_causal): + if get_gfx() not in ["gfx1250"]: + return device = "cuda" torch.manual_seed(0) @@ -371,6 +374,8 @@ def test_fmha_fwd_f16_correctness(batch, hq, hk, sq, sk, head_dim, is_causal): def test_fmha_fwd_f16_ops_layer(): """Direct ops-layer call: bshd qkv (sbhd memory layout), D64 + non-zero sink.""" + if get_gfx() not in ["gfx1250"]: + return device = "cuda" torch.manual_seed(0) @@ -411,6 +416,8 @@ def test_fmha_fwd_f16_d64_requires_sink(): a zero sink for D64, so this error path is unreachable from the public API — we exercise it via the lower-level ops stub. """ + if get_gfx() not in ["gfx1250"]: + return device = "cuda" q, k, v = make_qkv_bshd( layout=0, @@ -438,6 +445,8 @@ def test_fmha_fwd_f16_d64_requires_sink(): @pytest.mark.parametrize("head_dim", [64, 128]) @pytest.mark.parametrize("layout", [0, 1, 2]) def test_fmha_fwd_f16_layout(layout, head_dim): + if get_gfx() not in ["gfx1250"]: + return device = "cuda" torch.manual_seed(0) batch, hq, hk, sq, sk = 1, 8, 1, 128, 2048 @@ -490,22 +499,11 @@ def test_fmha_fwd_f16_layout(layout, head_dim): # --------------------------------------------------------------------------- -def _is_gfx1250() -> bool: - try: - from aiter.jit.utils.chip_info import get_gfx - - return get_gfx() == "gfx1250" - except Exception: - return False - - -@pytest.mark.skipif( - not _is_gfx1250(), - reason="flash_attn_func dispatch to fmha_fwd_f16_asm only on gfx1250", -) @pytest.mark.parametrize("head_dim", [64, 128]) @pytest.mark.parametrize("is_causal", [False, True]) def test_fmha_fwd_f16_via_flash_attn_func(head_dim, is_causal): + if get_gfx() not in ["gfx1250"]: + return device = "cuda" torch.manual_seed(0) batch, hq, hk, sq, sk = 1, 8, 1, 128, 2048 @@ -566,6 +564,8 @@ def test_fmha_fwd_f16_via_flash_attn_func(head_dim, is_causal): @pytest.mark.parametrize("head_dim", [64, 128]) @pytest.mark.parametrize("is_causal", [False, True]) def test_fmha_fwd_f16_perf(head_dim, is_causal): + if get_gfx() not in ["gfx1250"]: + return device = "cuda" torch.manual_seed(0) From 7cc452ed701d29179971a294fd43772c41643986 Mon Sep 17 00:00:00 2001 From: tingchen Date: Sat, 9 May 2026 19:59:30 +0800 Subject: [PATCH 15/43] runtime guard gfx1250 --- op_tests/test_fmha_fwd_f16_asm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/op_tests/test_fmha_fwd_f16_asm.py b/op_tests/test_fmha_fwd_f16_asm.py index 5912861243..41a27d497d 100644 --- a/op_tests/test_fmha_fwd_f16_asm.py +++ b/op_tests/test_fmha_fwd_f16_asm.py @@ -44,7 +44,7 @@ import aiter from aiter.test_common import checkAllclose -from aiter.jit.utils.chip_info import get_gfx +from aiter.jit.utils.chip_info import get_gfx_runtime as get_gfx # from aiter.test_mha_common import ( # attention_ref, From 279b557427f004e8f4cd9297bafee84b1123fcad Mon Sep 17 00:00:00 2001 From: tingchen Date: Sun, 10 May 2026 22:38:47 +0800 Subject: [PATCH 16/43] add arch guard in main --- op_tests/test_fmha_fwd_f16_asm.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/op_tests/test_fmha_fwd_f16_asm.py b/op_tests/test_fmha_fwd_f16_asm.py index 41a27d497d..6d4c2a4786 100644 --- a/op_tests/test_fmha_fwd_f16_asm.py +++ b/op_tests/test_fmha_fwd_f16_asm.py @@ -779,6 +779,8 @@ def run_cli( ) if __name__ == "__main__": + if get_gfx() not in ["gfx1250"]: + sys.exit(0) args = parser.parse_args() rc = run_cli( batch=args.batch, From 60d3555672d4b24df450ee6fce69347d5e3771e5 Mon Sep 17 00:00:00 2001 From: tingchen Date: Tue, 12 May 2026 16:32:35 +0800 Subject: [PATCH 17/43] reorg fmha_fwd_f16 integration --- aiter/jit/optCompilerConfig.json | 3 +- aiter/ops/mha.py | 115 ++++++--- csrc/include/rocm_ops.hpp | 12 - csrc/include/torch/fmha_fwd_f16.h | 36 --- csrc/py_itfs_cu/asm_fmha_fwd_f16.cu | 314 ++++++++++--------------- csrc/pybind/fmha_fwd_f16_asm_pybind.cu | 9 - 6 files changed, 206 insertions(+), 283 deletions(-) delete mode 100644 csrc/include/torch/fmha_fwd_f16.h delete mode 100644 csrc/pybind/fmha_fwd_f16_asm_pybind.cu diff --git a/aiter/jit/optCompilerConfig.json b/aiter/jit/optCompilerConfig.json index 81d48cc213..3f1afaa476 100644 --- a/aiter/jit/optCompilerConfig.json +++ b/aiter/jit/optCompilerConfig.json @@ -1089,8 +1089,7 @@ }, "module_fmha_fwd_f16_asm": { "srcs": [ - "f'{AITER_CSRC_DIR}/py_itfs_cu/asm_fmha_fwd_f16.cu'", - "f'{AITER_CSRC_DIR}/pybind/fmha_fwd_f16_asm_pybind.cu'" + "f'{AITER_CSRC_DIR}/py_itfs_cu/asm_fmha_fwd_f16.cu'" ], "flags_extra_cc": [ "'-DENABLE_CK=0'" diff --git a/aiter/ops/mha.py b/aiter/ops/mha.py index 65b05ecd5c..bc8674de87 100644 --- a/aiter/ops/mha.py +++ b/aiter/ops/mha.py @@ -277,40 +277,32 @@ def fmha_v3_fwd( # read directly from the tensor so non-contiguous bshd-shaped views (e.g. of # sbhd / bhsd allocations) are accepted. Only `tensor.stride(-1) == 1` is # required. softmax_scale is forwarded to the kernel as-is (the kernel -# applies it internally to Q·K^T before softmax). sink (when provided) is -# in AITER post-scale convention; the .cu host driver multiplies it by -# sqrt(qk_head_dim) to convert to the kernel's pre-scale raw-logit domain. +# applies it internally to Q·K^T before softmax). +# +# Memory-allocation policy: all GPU tensors (out, lse, sink) are allocated on +# the Python side; the C++ entry point performs only pointer + stride +# bookkeeping and kernel launch (no torch dependency). The public wrapper +# `fmha_fwd_f16_asm` below handles allocation and the AITER-post-scale → +# kernel-pre-scale conversion for sink (multiply by sqrt(qk_head_dim)). # --------------------------------------------------------------------------- -def gen_fmha_fwd_f16_asm_fake_tensors( +@compile_ops( + "module_fmha_fwd_f16_asm", + fc_name="fmha_fwd_f16_asm", + ffi_type="ctypes", +) +def _fmha_fwd_f16_asm( q: Tensor, k: Tensor, v: Tensor, + out: Tensor, + lse: Tensor, + sink: Tensor, softmax_scale: float, is_causal: bool, return_lse: bool, - sink: Optional[Tensor] = None, - out: Optional[Tensor] = None, -) -> Tuple[Tensor, Tensor]: - batch, q_seq_len, q_head_num, _ = q.shape - d_v = v.size(3) - fake_out = ( - out - if out is not None - else torch.empty( - (batch, q_seq_len, q_head_num, d_v), dtype=q.dtype, device=q.device - ) - ) - fake_lse = torch.empty( - (batch, q_head_num, q_seq_len), dtype=torch.float32, device=q.device - ) - return (fake_out, fake_lse) +) -> None: ... -@compile_ops( - "module_fmha_fwd_f16_asm", - fc_name="fmha_fwd_f16_asm", - gen_fake=gen_fmha_fwd_f16_asm_fake_tensors, -) def fmha_fwd_f16_asm( q: Tensor, k: Tensor, @@ -320,7 +312,65 @@ def fmha_fwd_f16_asm( return_lse: bool, sink: Optional[Tensor] = None, out: Optional[Tensor] = None, -) -> Tuple[Tensor, Tensor]: ... +) -> Tuple[Tensor, Tensor]: + """Public wrapper: allocates `out`/`lse`/`sink` buffers as needed and + forwards to the ctypes-backed kernel entry point. + + Contract details: + * `sink` (caller) is in AITER post-scale convention. This wrapper + converts it to the kernel's pre-scale raw-logit domain by multiplying + by sqrt(qk_head_dim) before launch. + * The kernel always accesses `ptr_LSE`, so an LSE buffer is always + allocated even when `return_lse=False`; in that case the contents are + undefined and callers should ignore the returned `lse`. + * D64 kernels (`_rxy_sink`) compile ENABLE_SINK=1 and read `sink`; + callers MUST pass an explicit sink for D64. + * D128 kernels (`_rxy`) compile ENABLE_SINK=0 and ignore `sink`; the + kernarg slot must still be a valid non-null pointer, so we always + allocate a zero buffer when none is supplied. + """ + batch, q_seq_len, q_head_num, qk_head_dim = q.shape + v_head_dim = v.size(3) + + if out is None: + out = torch.empty( + (batch, q_seq_len, q_head_num, v_head_dim), + dtype=q.dtype, + device=q.device, + ) + + lse = torch.empty( + (batch, q_head_num, q_seq_len), dtype=torch.float32, device=q.device + ) + + if sink is not None: + # AITER post-scale → kernel pre-scale. + sink_for_kernel = (sink * (qk_head_dim**0.5)).to(torch.float32).contiguous() + elif qk_head_dim == 64: + raise RuntimeError( + "fmha_fwd_f16_asm: D64 kernels require an explicit `sink` tensor " + f"of shape [q_head_num]={q_head_num} fp32 (AITER post-scale " + "convention). Pass `sink=torch.zeros(q_head_num, dtype=torch.float32)` " + "if you want a zero-logit sink." + ) + else: + # D128: kernel never reads sink contents but slot must be non-null. + sink_for_kernel = torch.zeros( + q_head_num, dtype=torch.float32, device=q.device + ) + + _fmha_fwd_f16_asm( + q, + k, + v, + out, + lse, + sink_for_kernel, + float(softmax_scale), + bool(is_causal), + bool(return_lse), + ) + return out, lse def cmdGenFunc_mha_varlen_fwd( @@ -1415,14 +1465,15 @@ def _validate_cu(name: str, x: Optional[torch.Tensor]): # gfx1250 ASM bf16 path: q/k/v are bshd; kernel reads strides directly, # no API-side permute. softmax_scale is forwarded as-is (kernel applies # it internally to Q·K^T). sink_ptr is in AITER post-scale convention; - # the .cu host driver multiplies it by sqrt(qk_head_dim) to convert to - # the kernel's pre-scale raw-logit domain before launch. + # the public `fmha_fwd_f16_asm` wrapper multiplies it by + # sqrt(qk_head_dim) and auto-fills the D64 zero-sink case internally, + # so we just forward the user's sink_ptr here. sink_for_kernel = sink_ptr if hdim_q == 64 and sink_for_kernel is None: - # D64 kernels always read SINK; auto-fill zero-logit so callers - # who don't care about sink still hit this fast path. + # D64 kernels always read SINK; pass an explicit zero-logit so the + # wrapper does not raise on us. sink_for_kernel = torch.zeros(nhead_q, dtype=torch.float32, device=q.device) - _r = fmha_fwd_f16_asm( + out_, softmax_lse = fmha_fwd_f16_asm( q, k, v, @@ -1432,8 +1483,6 @@ def _validate_cu(name: str, x: Optional[torch.Tensor]): sink_for_kernel, out, ) - out_ = _r[0] - softmax_lse = _r[1] S_dmask = torch.empty((0,), dtype=torch.float32, device=q.device) rng_state = torch.empty((2,), dtype=torch.int64, device=q.device) elif can_impl_fmha_v3_fwd() and seqlen_q > 128: # Prefer CK for decode cases diff --git a/csrc/include/rocm_ops.hpp b/csrc/include/rocm_ops.hpp index ba564544b8..43a9a96cc6 100644 --- a/csrc/include/rocm_ops.hpp +++ b/csrc/include/rocm_ops.hpp @@ -822,18 +822,6 @@ namespace py = pybind11; py::arg("sink") = std::nullopt, \ py::arg("d_sink") = std::nullopt); -#define FMHA_FWD_F16_ASM_PYBIND \ - m.def("fmha_fwd_f16_asm", \ - &aiter::torch_itfs::fmha_fwd_f16, \ - py::arg("q"), \ - py::arg("k"), \ - py::arg("v"), \ - py::arg("softmax_scale"), \ - py::arg("is_causal"), \ - py::arg("return_lse"), \ - py::arg("sink") = std::nullopt, \ - py::arg("out") = std::nullopt); - #define MHA_FWD_ASM_PYBIND \ m.def("fmha_v3_fwd", \ &aiter::torch_itfs::fmha_v3_fwd, \ diff --git a/csrc/include/torch/fmha_fwd_f16.h b/csrc/include/torch/fmha_fwd_f16.h deleted file mode 100644 index 2c04fb9963..0000000000 --- a/csrc/include/torch/fmha_fwd_f16.h +++ /dev/null @@ -1,36 +0,0 @@ -#pragma once -// SPDX-License-Identifier: MIT -// Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. -#include - -namespace aiter { -namespace torch_itfs { - -// ASM FMHA forward (BF16, gfx1250). -// -// API contract: q/k/v have **bshd shape**: -// q : [batch, seq_q, q_head_num, qk_head_dim] -// k : [batch, seq_k, kv_head_num, qk_head_dim] -// v : [batch, seq_k, kv_head_num, v_head_dim] -// out (returned): [batch, seq_q, q_head_num, v_head_dim] -// -// The kernel reads strides directly from `tensor.stride(...)`, so callers may -// pass a non-contiguous bshd-shaped view of an sbhd / bhsd allocation — -// strides will correctly reflect the underlying memory layout. Only -// `tensor.stride(-1) == 1` (last-dim contiguous) is required. -// -// sink: optional per-Q-head fp32 tensor [q_head_num], AITER post-scale -// convention (same domain as Q·K^T * softmax_scale). Internally -// converted to pre-scale: sink_raw = sink_user * sqrt(qk_head_dim). -std::vector fmha_fwd_f16( - at::Tensor& q, - const at::Tensor& k, - const at::Tensor& v, - float softmax_scale, - bool is_causal, - bool return_lse, - std::optional sink_ = std::nullopt, - std::optional out_ = std::nullopt); - -} // namespace torch_itfs -} // namespace aiter diff --git a/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu b/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu index b86530f0dd..501636d552 100644 --- a/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu +++ b/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu @@ -9,22 +9,25 @@ // kernel will follow the strides correctly. Only `tensor.stride(-1) == 1` // (last-dim contiguous) is required, matching flash_attn_func semantics. // -// sink convention (AITER / CK-Tile post-scale): -// The user passes sink in the same domain as Q*K^T * softmax_scale (post-scale). -// The kernel expects pre-scale raw logits. This file converts: -// sink_raw = sink_user * sqrt(qk_head_dim) -#include -#include +// Memory-allocation policy: +// All tensors (q, k, v, out, lse, sink) are allocated by the Python caller. +// This C++ entry point performs **only pointer + stride bookkeeping and +// kernel launch** — no GPU memory allocation, no temporary tensors, no torch +// dependency. In particular, the AITER post-scale → pre-scale conversion +// for `sink` (multiply by sqrt(qk_head_dim)) is the caller's responsibility: +// pass `sink` already in the kernel's pre-scale raw-logit domain. +// +// sink slot semantics (still enforced here): +// D64 `_rxy_sink` kernels compile ENABLE_SINK=1 → `sink` MUST be non-null. +// D128 `_rxy` kernels compile ENABLE_SINK=0 → `sink` slot must still be +// a valid non-null pointer (kernarg layout requires it), but +// the kernel never reads its contents. Pass a zero buffer. +#include "aiter_tensor.h" +#include "aiter_ctypes_error.h" +#include "asm_fmha_fwd_f16_configs.hpp" #include #include #include -#include - -#include "aiter_hip_common.h" -#include "asm_fmha_fwd_f16_configs.hpp" - -namespace aiter { -namespace torch_itfs { // Kernel argument block (ABI = FmhaFwdKernelArgsBase in fmha_fwd_f16.cpp). // kernarg_size = 528 B (33 slots × 16 B, including ptr_SINK at the end). @@ -91,7 +94,7 @@ static std::string get_heuristic_kernel_fmha_fwd_f16(const std::string& dtype, if (cfg.mask != mask_flag) continue; return el.first; } - TORCH_CHECK(false, + AITER_CHECK(false, "fmha_fwd_f16_asm: no kernel for dtype=", dtype, " hdim_q=", hdim_q, " hdim_v=", hdim_v, " mask=", mask_flag, @@ -101,152 +104,132 @@ static std::string get_heuristic_kernel_fmha_fwd_f16(const std::string& dtype, // ---- main entry ------------------------------------------------------------ -// API contract: q/k/v have **bshd shape**, i.e. q.shape = [batch, seq_q, hq, d], -// k/v.shape = [batch, seq_k, hk, d]. The kernel reads strides directly from -// `tensor.stride(...)`, so the underlying memory layout is whatever the user -// arranged — they may pass a non-contiguous bshd-shaped view of an sbhd / bhsd -// allocation, and the kernel will follow strides correctly. Only `stride(-1) -// == 1` (last dim contiguous) is required, matching flash_attn_func. +AITER_CTYPES_ERROR_DEF + +// C ABI: every tensor is caller-allocated. No GPU memory is allocated here; +// no torch dependency. +// +// q/k/v have **bshd shape**, i.e. q.shape = [batch, seq_q, hq, d], k/v.shape = +// [batch, seq_k, hk, d]. Kernel reads strides directly from the tensor, so +// non-contiguous bshd-shaped views backed by sbhd / bhsd memory work — only +// `stride(-1) == 1` is required. // -// sink: optional [q_head_num] fp32 tensor in AITER post-scale convention. -// Internally converted to pre-scale: sink_raw = sink_user * sqrt(qk_head_dim). -std::vector fmha_fwd_f16(at::Tensor& q, - const at::Tensor& k, - const at::Tensor& v, - float softmax_scale, - bool is_causal, - bool return_lse, - std::optional sink_, - std::optional out_) +// out : [batch, q_seq_len, q_head_num, v_head_dim] bf16, last dim contiguous. +// lse : [batch, q_head_num, q_seq_len] fp32. Always required by kernel ABI +// (kernel may touch ptr_LSE even when return_lse=0); pass a buffer of +// the right size regardless of whether you read it. +// sink : [q_head_num] fp32 in the kernel's pre-scale raw-logit domain. +// Required for D64 (ENABLE_SINK=1). For D128 (ENABLE_SINK=0) the +// slot must still be a valid non-null pointer of the right size, but +// contents are ignored — pass a zero buffer. +AITER_CTYPES_DEFINE_ENTRYPOINT_VOID( + fmha_fwd_f16_asm, + (aiter_tensor_t* q, + aiter_tensor_t* k, + aiter_tensor_t* v, + aiter_tensor_t* out, + aiter_tensor_t* lse, + aiter_tensor_t* sink, + float softmax_scale, + int is_causal, + int return_lse, + hipStream_t stream), + (q, k, v, out, lse, sink, softmax_scale, is_causal, return_lse, stream)) { - // ---- basic validation -------------------------------------------------- - TORCH_CHECK(q.dim() == 4 && k.dim() == 4 && v.dim() == 4, + // ---- arch + dtype validation ------------------------------------------ + const std::string arch_id = get_gpu_arch(); + AITER_CHECK(arch_id == "gfx1250", + "fmha_fwd_f16_asm: only supported on gfx1250, got ", arch_id); + + AITER_CHECK(q && k && v && out && lse && sink, + "fmha_fwd_f16_asm: q/k/v/out/lse/sink must all be non-null"); + AITER_CHECK(q->dtype() == AITER_DTYPE_bf16 && + k->dtype() == AITER_DTYPE_bf16 && + v->dtype() == AITER_DTYPE_bf16, + "fmha_fwd_f16_asm: q/k/v must be bf16"); + AITER_CHECK(out->dtype() == AITER_DTYPE_bf16, + "fmha_fwd_f16_asm: out must be bf16"); + AITER_CHECK(lse->dtype() == AITER_DTYPE_fp32, + "fmha_fwd_f16_asm: lse must be fp32"); + AITER_CHECK(sink->dtype() == AITER_DTYPE_fp32, + "fmha_fwd_f16_asm: sink must be fp32"); + + AITER_CHECK(q->dim() == 4 && k->dim() == 4 && v->dim() == 4, "fmha_fwd_f16_asm: q/k/v must be 4-D tensors (bshd shape)"); - TORCH_CHECK(q.stride(-1) == 1 && k.stride(-1) == 1 && v.stride(-1) == 1, + AITER_CHECK(q->stride(-1) == 1 && k->stride(-1) == 1 && v->stride(-1) == 1, "fmha_fwd_f16_asm: q/k/v must have contiguous last dim"); - TORCH_CHECK(q.scalar_type() == at::kBFloat16, - "fmha_fwd_f16_asm: only bf16 is supported"); - TORCH_CHECK(k.scalar_type() == at::kBFloat16 && v.scalar_type() == at::kBFloat16, - "fmha_fwd_f16_asm: k/v must also be bf16"); // ---- dimension extraction (bshd) --------------------------------------- - const int batch = (int)q.size(0); - const int q_seq_len = (int)q.size(1); - const int q_head_num = (int)q.size(2); - const int qk_head_dim = (int)q.size(3); + const int batch = (int)q->size(0); + const int q_seq_len = (int)q->size(1); + const int q_head_num = (int)q->size(2); + const int qk_head_dim = (int)q->size(3); - const int kv_seq_len = (int)k.size(1); - const int kv_head_num = (int)k.size(2); - const int v_head_dim = (int)v.size(3); + const int kv_seq_len = (int)k->size(1); + const int kv_head_num = (int)k->size(2); + const int v_head_dim = (int)v->size(3); - TORCH_CHECK((int)k.size(0) == batch, "k batch mismatch"); - TORCH_CHECK((int)v.size(0) == batch, "v batch mismatch"); - TORCH_CHECK((int)k.size(3) == qk_head_dim, "k head_dim mismatch"); - TORCH_CHECK((int)v.size(1) == kv_seq_len, "v seq_len mismatch with k"); - TORCH_CHECK((int)v.size(2) == kv_head_num, "v head_num mismatch with k"); - TORCH_CHECK(q_head_num % kv_head_num == 0, "q_head_num must be a multiple of kv_head_num"); - TORCH_CHECK(qk_head_dim == 64 || qk_head_dim == 128, + AITER_CHECK((int)k->size(0) == batch, "fmha_fwd_f16_asm: k batch mismatch"); + AITER_CHECK((int)v->size(0) == batch, "fmha_fwd_f16_asm: v batch mismatch"); + AITER_CHECK((int)k->size(3) == qk_head_dim, "fmha_fwd_f16_asm: k head_dim mismatch"); + AITER_CHECK((int)v->size(1) == kv_seq_len, "fmha_fwd_f16_asm: v seq_len mismatch with k"); + AITER_CHECK((int)v->size(2) == kv_head_num, "fmha_fwd_f16_asm: v head_num mismatch with k"); + AITER_CHECK(q_head_num % kv_head_num == 0, "fmha_fwd_f16_asm: q_head_num must be a multiple of kv_head_num"); + AITER_CHECK(qk_head_dim == 64 || qk_head_dim == 128, "fmha_fwd_f16_asm: only head_dim 64 or 128 supported, got ", qk_head_dim); - TORCH_CHECK(v_head_dim == qk_head_dim, + AITER_CHECK(v_head_dim == qk_head_dim, "fmha_fwd_f16_asm: v_head_dim must equal qk_head_dim"); + AITER_CHECK(out->dim() == 4 && + (int)out->size(0) == batch && (int)out->size(1) == q_seq_len && + (int)out->size(2) == q_head_num && (int)out->size(3) == v_head_dim, + "fmha_fwd_f16_asm: out shape must be [batch, q_seq_len, q_head_num, v_head_dim]"); + AITER_CHECK(out->stride(-1) == 1, + "fmha_fwd_f16_asm: out must have contiguous last dim"); + + AITER_CHECK(lse->dim() == 3 && + (int)lse->size(0) == batch && + (int)lse->size(1) == q_head_num && + (int)lse->size(2) == q_seq_len, + "fmha_fwd_f16_asm: lse shape must be [batch, q_head_num, q_seq_len]"); + + AITER_CHECK(sink->dim() == 1 && (int)sink->size(0) == q_head_num, + "fmha_fwd_f16_asm: sink must be 1-D with size q_head_num (", q_head_num, ")"); + const int gqa = q_head_num / kv_head_num; const int mask_flag = is_causal ? 1 : 0; // ---- stride extraction (in bytes), bshd dim layout -------------------- // bshd: dim0=b, dim1=s, dim2=h, dim3=d - const int elem_size = q.element_size(); // 2 for bf16 + const int elem_size = (int)q->element_size(); // 2 for bf16 - const int stride_q_batch = (int)q.stride(0) * elem_size; - const int stride_q_seq = (int)q.stride(1) * elem_size; - const int stride_q_head = (int)q.stride(2) * elem_size; - - const int stride_k_batch = (int)k.stride(0) * elem_size; - const int stride_k_seq = (int)k.stride(1) * elem_size; - const int stride_k_head = (int)k.stride(2) * elem_size; - - const int stride_v_batch = (int)v.stride(0) * elem_size; - const int stride_v_seq = (int)v.stride(1) * elem_size; - const int stride_v_head = (int)v.stride(2) * elem_size; - - const int sub_Q = 128; // ts_qo: Q-tile size used by all kernels - const int stride_q_tg = sub_Q * stride_q_seq; - const int stride_lse_head = q_seq_len * (int)sizeof(float); // fixed layout + const int stride_q_batch = (int)q->stride(0) * elem_size; + const int stride_q_seq = (int)q->stride(1) * elem_size; + const int stride_q_head = (int)q->stride(2) * elem_size; - // ---- output allocation (bshd) ----------------------------------------- - at::Tensor out; - if (out_.has_value()) - { - out = out_.value(); - TORCH_CHECK(out.dim() == 4 && - (int)out.size(0) == batch && (int)out.size(1) == q_seq_len && - (int)out.size(2) == q_head_num && (int)out.size(3) == v_head_dim, - "fmha_fwd_f16_asm: pre-allocated out shape must be " - "[batch, q_seq_len, q_head_num, v_head_dim]"); - TORCH_CHECK(out.stride(-1) == 1 && out.scalar_type() == q.scalar_type(), - "fmha_fwd_f16_asm: out must have contiguous last dim and same dtype as q"); - } - else - { - out = at::empty({batch, q_seq_len, q_head_num, v_head_dim}, q.options()); - } + const int stride_k_batch = (int)k->stride(0) * elem_size; + const int stride_k_seq = (int)k->stride(1) * elem_size; + const int stride_k_head = (int)k->stride(2) * elem_size; - const int stride_o_batch = (int)out.stride(0) * elem_size; - const int stride_o_seq = (int)out.stride(1) * elem_size; - const int stride_o_head = (int)out.stride(2) * elem_size; + const int stride_v_batch = (int)v->stride(0) * elem_size; + const int stride_v_seq = (int)v->stride(1) * elem_size; + const int stride_v_head = (int)v->stride(2) * elem_size; - // ---- LSE allocation (fixed layout [batch, q_head_num, q_seq_len] fp32) - - // Always allocate even when not returned: the kernel may access ptr_LSE. - at::Tensor lse = at::empty({batch, q_head_num, q_seq_len}, - q.options().dtype(at::kFloat)); + const int stride_o_batch = (int)out->stride(0) * elem_size; + const int stride_o_seq = (int)out->stride(1) * elem_size; + const int stride_o_head = (int)out->stride(2) * elem_size; - // ---- sink buffer ------------------------------------------------------- - // D64 `_rxy_sink` kernels (ENABLE_SINK=1): ptr_SINK is actively read. - // Sink must be provided for D64; passing a zero buffer silently passes - // logit=0 through the sink path (which still exercises the code path but - // is numerically equivalent to a very negative logit after max-subtraction). - // We therefore REQUIRE an explicit sink for D64 so callers are aware. - // - // D128 `_rxy` kernels (ENABLE_SINK=0): ptr_SINK is compiled out; the slot - // must still be a valid non-null pointer, but values are irrelevant. - // Zeros are used when no sink is supplied for D128. - // - // sink_ is in AITER post-scale convention (same domain as Q·K^T * scale). - // Convert to pre-scale for kernel: sink_raw = sink_user * sqrt(qk_head_dim). - at::Tensor sink; - if (sink_.has_value()) - { - TORCH_CHECK(sink_.value().dim() == 1 && sink_.value().size(0) == q_head_num, - "fmha_fwd_f16_asm: sink must be 1-D with size q_head_num (", q_head_num, ")"); - TORCH_CHECK(sink_.value().scalar_type() == at::kFloat, - "fmha_fwd_f16_asm: sink must be fp32"); - // AITER post-scale → pre-scale: multiply by sqrt(qk_head_dim) - float pre_scale = std::sqrt(static_cast(qk_head_dim)); - sink = (sink_.value() * pre_scale).contiguous(); - } - else if (qk_head_dim == 64) - { - // D64 _rxy_sink kernels always compute the sink path (ENABLE_SINK=1). - // Require an explicit sink so callers know it is active. - TORCH_CHECK(false, - "fmha_fwd_f16_asm: D64 (_rxy_sink) kernels require an explicit `sink` " - "tensor of shape [q_head_num]=", q_head_num, " fp32 (AITER post-scale " - "convention). Pass `sink=torch.zeros(q_head_num, dtype=torch.float32)` " - "if you want a zero-logit sink."); - } - else - { - // D128 _rxy kernels: ENABLE_SINK=0, ptr_SINK is ignored by the kernel. - sink = at::zeros({q_head_num}, q.options().dtype(at::kFloat)); - } + const int sub_Q = 128; // ts_qo: Q-tile size used by all kernels + const int stride_q_tg = sub_Q * stride_q_seq; + const int stride_lse_head = q_seq_len * (int)sizeof(float); // fixed layout // ---- kernel args ------------------------------------------------------- KernelArgs args = {}; - args.ptr_O = out.data_ptr(); - args.ptr_Q = q.data_ptr(); - args.ptr_K = k.data_ptr(); - args.ptr_V = v.data_ptr(); - args.ptr_LSE = lse.data_ptr(); + args.ptr_O = out->data_ptr(); + args.ptr_Q = q->data_ptr(); + args.ptr_K = k->data_ptr(); + args.ptr_V = v->data_ptr(); + args.ptr_LSE = lse->data_ptr(); args.scalar_f = softmax_scale; args.q_seq_len = q_seq_len; args.stride_q_seq = stride_q_seq; @@ -283,7 +266,7 @@ std::vector fmha_fwd_f16(at::Tensor& q, args.stride_lse_head = stride_lse_head; args.ptr_QSeqPad = nullptr; args.ptr_KSeqPad = nullptr; - args.ptr_SINK = sink.data_ptr(); + args.ptr_SINK = sink->data_ptr(); size_t arg_size = sizeof(args); @@ -291,18 +274,14 @@ std::vector fmha_fwd_f16(at::Tensor& q, // Always use the _brd (border) kernel variant: it handles both aligned // and unaligned q_seq_len/kv_seq_len uniformly (border path is a no-op // when sequences are aligned), so there's no runtime branch on alignment. - const std::string dtype = "bf16"; - const std::string arch_id = get_gpu_arch(); - if(arch_id != "gfx1250"){ - AITER_CHECK(false, __func__, ": fmha_fwd_f16 is only supported on gfx1250"); - } - CFG* cfg_map = &cfg_fmha_fwd_f16; + const std::string dtype = "bf16"; + CFG* cfg_map = &cfg_fmha_fwd_f16; static SynchronizedCache impl_ptr_map; const std::string kernel_key = get_heuristic_kernel_fmha_fwd_f16( dtype, qk_head_dim, v_head_dim, mask_flag, arch_id, cfg_map); auto it = cfg_map->find(kernel_key); - TORCH_CHECK(it != cfg_map->end(), + AITER_CHECK(it != cfg_map->end(), "fmha_fwd_f16_asm: kernel not found in CFG: ", kernel_key); const char* name = it->second.knl_name.c_str(); @@ -319,40 +298,6 @@ std::vector fmha_fwd_f16(at::Tensor& q, // All _rxy kernels use remap_xy=1: swap gdx↔gdy at launch so that // bid.x indexes heads and bid.y indexes Q-tiles. - auto stream = at::hip::getCurrentHIPStream().stream(); - - // ---- DEBUG DUMP ------------------------------------------------------- - fprintf(stderr, - "\n[fmha_fwd_f16 DEBUG] kernel_key=%s co=%s arg_size=%zu\n" - " KernelArgs:\n" - " ptr_O=%p ptr_Q=%p ptr_K=%p ptr_V=%p ptr_LSE=%p\n" - " scalar_f=%g\n" - " q_seq_len=%d kv_seq_len=%d q_head_num=%d gqa=%d\n" - " qk_head_dim=%d v_head_dim=%d opt=%d lse=%d\n" - " stride_q_seq=%d stride_q_tg=%d stride_q_head=%d stride_q_batch=%d\n" - " stride_k_seq=%d stride_k_head=%d stride_k_batch=%d\n" - " stride_v_seq=%d stride_v_head=%d stride_v_batch=%d\n" - " stride_o_seq=%d stride_o_head=%d stride_o_batch=%d\n" - " ptr_QSeq=%p ptr_KSeq=%p stride_lse_head=%d\n" - " ptr_QSeqPad=%p ptr_KSeqPad=%p ptr_SINK=%p\n" - " Launch dims (after rxy swap): gdx(head)=%d gdy(Qtile)=%d gdz(batch)=%d\n" - " bdx=%d bdy=1 bdz=1\n" - " Pre-swap: gdx(Qtile)=%d gdy(head)=%d gdz(batch)=%d\n", - kernel_key.c_str(), co_name, arg_size, - args.ptr_O, args.ptr_Q, args.ptr_K, args.ptr_V, args.ptr_LSE, - args.scalar_f, - args.q_seq_len, args.kv_seq_len, args.q_head_num, args.gqa, - args.qk_head_dim, args.v_head_dim, args.opt, args.lse, - args.stride_q_seq, args.stride_q_tg, args.stride_q_head, args.stride_q_batch, - args.stride_k_seq, args.stride_k_head, args.stride_k_batch, - args.stride_v_seq, args.stride_v_head, args.stride_v_batch, - args.stride_o_seq, args.stride_o_head, args.stride_o_batch, - args.ptr_QSeq, args.ptr_KSeq, args.stride_lse_head, - args.ptr_QSeqPad, args.ptr_KSeqPad, args.ptr_SINK, - gdy, gdx, gdz, bdx, - gdx, gdy, gdz); - fflush(stderr); - impl_ptr->launch_kernel({&args, &arg_size, gdy, // launch_gdx = head count (swapped) @@ -362,17 +307,4 @@ std::vector fmha_fwd_f16(at::Tensor& q, 1, 1, stream}); - - std::vector ret; - ret.push_back(out); - // Always return LSE in slot [1]. When return_lse==false the kernel skips - // writing it (args.lse=0) so the data is undefined; callers that don't - // need LSE should simply ignore the second tensor. Keeping a fixed - // 2-tuple return matches torch.library schema requirements (compile_ops - // / infer_schema only accepts fixed-arity Tuple). - ret.push_back(lse); - return ret; } - -} // namespace torch_itfs -} // namespace aiter diff --git a/csrc/pybind/fmha_fwd_f16_asm_pybind.cu b/csrc/pybind/fmha_fwd_f16_asm_pybind.cu deleted file mode 100644 index 62257c87ac..0000000000 --- a/csrc/pybind/fmha_fwd_f16_asm_pybind.cu +++ /dev/null @@ -1,9 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. -#include "rocm_ops.hpp" -#include "torch/fmha_fwd_f16.h" - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) -{ - FMHA_FWD_F16_ASM_PYBIND; -} From e14c5e09cae144efd1ce5834546e00a6ecc7cabb Mon Sep 17 00:00:00 2001 From: tingchen Date: Tue, 12 May 2026 16:50:43 +0800 Subject: [PATCH 18/43] reformat --- aiter/ops/mha.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/aiter/ops/mha.py b/aiter/ops/mha.py index bc8674de87..45993e6620 100644 --- a/aiter/ops/mha.py +++ b/aiter/ops/mha.py @@ -355,9 +355,7 @@ def fmha_fwd_f16_asm( ) else: # D128: kernel never reads sink contents but slot must be non-null. - sink_for_kernel = torch.zeros( - q_head_num, dtype=torch.float32, device=q.device - ) + sink_for_kernel = torch.zeros(q_head_num, dtype=torch.float32, device=q.device) _fmha_fwd_f16_asm( q, From e0af956ed9738e5002430984582ec170683d681e Mon Sep 17 00:00:00 2001 From: tingchen Date: Thu, 14 May 2026 15:20:51 +0800 Subject: [PATCH 19/43] ENABLE_CK=0 for module_aiter_core to bypass ck compile --- aiter/jit/optCompilerConfig.json | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/aiter/jit/optCompilerConfig.json b/aiter/jit/optCompilerConfig.json index 83842745ba..cbcb7c8094 100644 --- a/aiter/jit/optCompilerConfig.json +++ b/aiter/jit/optCompilerConfig.json @@ -3,8 +3,12 @@ "srcs": [ "f'{AITER_CSRC_DIR}/pybind/aiter_core_pybind.cu'" ], - "flags_extra_cc": [], - "flags_extra_hip": [], + "flags_extra_cc": [ + "'-DENABLE_CK=0'" + ], + "flags_extra_hip": [ + "'-DENABLE_CK=0'" + ], "extra_ldflags": "None", "extra_include": [], "verbose": "False", From 869fb632eebb379ba4b211c93ee620bebbffcdc3 Mon Sep 17 00:00:00 2001 From: HaonanWang98 Date: Thu, 14 May 2026 10:46:47 +0000 Subject: [PATCH 20/43] Revert CK submodule pin to match main --- 3rdparty/composable_kernel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/composable_kernel b/3rdparty/composable_kernel index 207a95d5e4..10cb6916c3 160000 --- a/3rdparty/composable_kernel +++ b/3rdparty/composable_kernel @@ -1 +1 @@ -Subproject commit 207a95d5e4081316f3fb18a035b3918c118367c4 +Subproject commit 10cb6916c34f957e81e8472c085603b4427baab9 From 4a592212f89cd2b4761f63d693f479f40d1c68c1 Mon Sep 17 00:00:00 2001 From: HaonanWang98 Date: Fri, 15 May 2026 13:45:53 +0000 Subject: [PATCH 21/43] update kernel and .cu to v8 --- csrc/py_itfs_cu/asm_fmha_fwd_f16.cu | 164 +++++++++--------- ...HA_FWD_D128_1TG_4W_32mx4_256nx1_brd_rxy.co | Bin 68776 -> 0 bytes ...WD_D128_1TG_4W_32mx4_256nx1_cas_brd_rxy.co | Bin 83256 -> 0 bytes ...FWD_D128_1TG_4W_32mx4_256nx1_rxy_brd_v8.co | Bin 0 -> 67528 bytes ...D128_1TG_4W_32mx4_256nx1_rxy_cas_brd_v8.co | Bin 0 -> 81872 bytes ...WD_D64_1TG_4W_32mx4_256nx1_brd_rxy_sink.co | Bin 58040 -> 0 bytes ...64_1TG_4W_32mx4_256nx1_cas_brd_rxy_sink.co | Bin 72512 -> 0 bytes ...D64_1TG_4W_32mx4_256nx1_rxy_sink_brd_v8.co | Bin 0 -> 56528 bytes ...1TG_4W_32mx4_256nx1_rxy_sink_cas_brd_v8.co | Bin 0 -> 71136 bytes hsa/gfx1250/fmha_fwd_f16/fmha_fwd_f16.csv | 8 +- op_tests/test_fmha_fwd_f16_asm.py | 97 +++++++++-- 11 files changed, 172 insertions(+), 97 deletions(-) delete mode 100755 hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_brd_rxy.co delete mode 100755 hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_cas_brd_rxy.co create mode 100755 hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_rxy_brd_v8.co create mode 100755 hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_rxy_cas_brd_v8.co delete mode 100755 hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_brd_rxy_sink.co delete mode 100755 hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_cas_brd_rxy_sink.co create mode 100755 hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_rxy_sink_brd_v8.co create mode 100755 hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D64_1TG_4W_32mx4_256nx1_rxy_sink_cas_brd_v8.co diff --git a/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu b/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu index 501636d552..f1a8db965a 100644 --- a/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu +++ b/csrc/py_itfs_cu/asm_fmha_fwd_f16.cu @@ -29,47 +29,59 @@ #include #include -// Kernel argument block (ABI = FmhaFwdKernelArgsBase in fmha_fwd_f16.cpp). -// kernarg_size = 528 B (33 slots × 16 B, including ptr_SINK at the end). -struct __attribute__((packed)) KernelArgs +// Kernel argument block — packed ABI (132 B = 0x84), matches +// FmhaFwdKernelArgsBase in poc_kl/mi400/fmha_fwd_f16/fmha_fwd_f16.cpp +// and the .args YAML emitted into the v8 .s patched HSA metadata. +// +// Field names mirror poc_kl: short names (d_addr / q_seqs / k_hs / ...) +// rather than the older 528-B slot-padded layout we used pre-v8. +// +// d = output O +// q/k/v_seqs = stride along seq dim (bytes) +// q/k/v_hs = stride along head dim (bytes) +// q/k/v_bas = stride along batch dim (bytes) +// q_ts = stride between Q-tiles (sub_Q * q_seqs) +// lse_hs = stride per Q head for LSE (q_seq_len * 4) +// opt = packed switches: bit0 reverse_kv, bit1 double_q, +// bit2 remap_xy. We swap gdx/gdy at launch, so bit2=1. +// sink_addr = per-Q-head f32 sink logits (pre-scale). Read only by +// D64 `_rxy_sink_*` kernels (ENABLE_SINK=1). For D128 +// the slot must still be valid (kernarg layout) but is +// ignored; we pass a zero buffer. +#pragma pack(push, 1) +struct KernelArgs { - void* ptr_O; p2 _padO; - void* ptr_Q; p2 _padQ; - void* ptr_K; p2 _padK; - void* ptr_V; p2 _padV; - void* ptr_LSE; p2 _padLSE; - float scalar_f; p3 _padSc; - int q_seq_len; p3 _p0; - int stride_q_seq; p3 _p1; - int stride_q_tg; p3 _p2; - int stride_q_head; p3 _p3; - int stride_q_batch; p3 _p4; - int gqa; p3 _p5; - int stride_k_seq; p3 _p6; - int stride_k_head; p3 _p7; - int stride_k_batch; p3 _p8; - int opt; p3 _p9; - int lse; p3 _p10; - int kv_seq_len; p3 _p11; - int qk_head_dim; p3 _p12; - int v_head_dim; p3 _p13; - int q_head_num; p3 _p14; - int stride_v_seq; p3 _p15; - int stride_v_head; p3 _p16; - int stride_v_batch; p3 _p17; - int stride_o_seq; p3 _p18; - int stride_o_head; p3 _p19; - int stride_o_batch; p3 _p20; - void* ptr_QSeq; p2 _padQSeq; - void* ptr_KSeq; p2 _padKSeq; - int stride_lse_head;p3 _p21; - void* ptr_QSeqPad; p2 _padQSeqPad; - void* ptr_KSeqPad; p2 _padKSeqPad; - // per-Q-head f32 sink logits (pre-scale raw domain). - // D64 `_rxy_sink` kernels: ENABLE_SINK reads this at UCONST offset 0x200. - // D128 `_rxy` kernels: slot must exist for kernarg_size=528 but is unused. - void* ptr_SINK; p2 _padSINK; + void* d_addr; // off 0x00 s_D_addr + const void* q_addr; // off 0x08 s_Q_addr + const void* k_addr; // off 0x10 s_K_addr + const void* v_addr; // off 0x18 s_V_addr + void* lse_addr; // off 0x20 s_LSE_addr + float scalar; // off 0x28 s_scalar + int q_seq_len; // off 0x2C s_Q_seq_len + int q_seqs; // off 0x30 s_Q_Seqs + int q_ts; // off 0x34 s_Q_Ts + int q_hs; // off 0x38 s_Q_Hs + int q_bas; // off 0x3C s_Q_BAs + int gqa; // off 0x40 s_gqa + int k_seqs; // off 0x44 s_K_Seqs + int k_hs; // off 0x48 s_K_Hs + int k_bas; // off 0x4C s_K_BAs + int opt; // off 0x50 s_opt (bits 0..2) + int lse; // off 0x54 s_LSE (1 = write LSE) + int kv_seq_len; // off 0x58 s_KV_seq_len + int q_head_num; // off 0x5C s_Q_head_num + int v_seqs; // off 0x60 s_V_Seqs + int v_hs; // off 0x64 s_V_Hs + int v_bas; // off 0x68 s_V_BAs + int d_seqs; // off 0x6C s_D_Seqs (== O stride along seq) + int d_hs; // off 0x70 s_D_Hs + int d_bas; // off 0x74 s_D_BAs + int lse_hs; // off 0x78 s_LSE_Hs + void* sink_addr; // off 0x7C s_SINK_buf_addr }; +#pragma pack(pop) +static_assert(sizeof(KernelArgs) == 0x84, + "fmha_fwd_f16_asm: KernelArgs must be 132B packed (matches v8 .args)"); // ---- helpers --------------------------------------------------------------- @@ -224,49 +236,45 @@ AITER_CTYPES_DEFINE_ENTRYPOINT_VOID( const int stride_lse_head = q_seq_len * (int)sizeof(float); // fixed layout // ---- kernel args ------------------------------------------------------- - KernelArgs args = {}; - args.ptr_O = out->data_ptr(); - args.ptr_Q = q->data_ptr(); - args.ptr_K = k->data_ptr(); - args.ptr_V = v->data_ptr(); - args.ptr_LSE = lse->data_ptr(); - args.scalar_f = softmax_scale; - args.q_seq_len = q_seq_len; - args.stride_q_seq = stride_q_seq; - args.stride_q_tg = stride_q_tg; - args.stride_q_head = stride_q_head; - args.stride_q_batch = stride_q_batch; - args.gqa = gqa; - args.stride_k_seq = stride_k_seq; - args.stride_k_head = stride_k_head; - args.stride_k_batch = stride_k_batch; - // s_opt SGPR (kernarg dword @ offset 0xF0): packs three host-side switches. - // Bit layout must stay in lockstep with poc_kl/.../fmha_fwd_f16.cpp::opt_packed - // and the S_OPT_BIT_* defines in BF16_FMHA_FWD_*.sp3: + // ABI = FmhaFwdKernelArgsBase from poc_kl/mi400/fmha_fwd_f16/fmha_fwd_f16.cpp + // (132 B packed). Field naming follows the poc_kl source-of-truth. + KernelArgs args; + memset(&args, 0, sizeof(args)); + args.d_addr = out->data_ptr(); + args.q_addr = q->data_ptr(); + args.k_addr = k->data_ptr(); + args.v_addr = v->data_ptr(); + args.lse_addr = lse->data_ptr(); + args.scalar = softmax_scale; + args.q_seq_len = q_seq_len; + args.q_seqs = stride_q_seq; + args.q_ts = stride_q_tg; + args.q_hs = stride_q_head; + args.q_bas = stride_q_batch; + args.gqa = gqa; + args.k_seqs = stride_k_seq; + args.k_hs = stride_k_head; + args.k_bas = stride_k_batch; + // s_opt SGPR: packs three host-side switches. Bit layout matches + // poc_kl/.../fmha_fwd_f16.cpp::opt_packed and the S_OPT_BIT_* defines: // bit0: reverse_kv (compile-time gated by CAS_MASK build; ignored by mask=0 kernels) // bit1: double_q (compile-time gated by DOUBLE_Q build; ignored by non-_dq kernels) // bit2: remap_xy (must be 1 — we swap gdx/gdy at launch below) - // 7 = 0b111 enables all three. Safe for the four shipped _brd_rxy / - // _cas_brd_rxy [_sink] .co binaries because bits 0/1 are compile-time + // 7 = 0b111 enables all three. Safe for the four shipped _rxy_brd / + // _rxy_cas_brd [_sink] .co binaries because bits 0/1 are compile-time // gated off in those builds; bit2 matches the gdx/gdy swap on launch. - args.opt = 7; - args.lse = return_lse ? 1 : 0; - args.kv_seq_len = kv_seq_len; - args.qk_head_dim = qk_head_dim; - args.v_head_dim = v_head_dim; - args.q_head_num = q_head_num; - args.stride_v_seq = stride_v_seq; - args.stride_v_head = stride_v_head; - args.stride_v_batch = stride_v_batch; - args.stride_o_seq = stride_o_seq; - args.stride_o_head = stride_o_head; - args.stride_o_batch = stride_o_batch; - args.ptr_QSeq = nullptr; - args.ptr_KSeq = nullptr; - args.stride_lse_head = stride_lse_head; - args.ptr_QSeqPad = nullptr; - args.ptr_KSeqPad = nullptr; - args.ptr_SINK = sink->data_ptr(); + args.opt = 7; + args.lse = return_lse ? 1 : 0; + args.kv_seq_len = kv_seq_len; + args.q_head_num = q_head_num; + args.v_seqs = stride_v_seq; + args.v_hs = stride_v_head; + args.v_bas = stride_v_batch; + args.d_seqs = stride_o_seq; + args.d_hs = stride_o_head; + args.d_bas = stride_o_batch; + args.lse_hs = stride_lse_head; + args.sink_addr = sink->data_ptr(); size_t arg_size = sizeof(args); diff --git a/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_brd_rxy.co b/hsa/gfx1250/fmha_fwd_f16/BF16_FMHA_FWD_D128_1TG_4W_32mx4_256nx1_brd_rxy.co deleted file mode 100755 index 03ea794cb2e8d2148c05bdcd24be103a0c012d4b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 68776 zcmeHw3wTu3wg1VPGb37C0j)+DU_eBkA%yTo7@h(L5s-(dH3<+N@-zfQj3*Nah)69e zVrsE7TC9hZ{rpJjTRM^+os<3HuiO?SG|fU^Z)(!I(s-X2_fKZ`E2F; z!a8g3wbxl|owe3JbIw_NmyVk}-pt4dTZa7e8Ffa6drS~^k1JbA>qI}>1{j}GhH*UKnf~Lslszdh|E0mh z7fQ2++VfTOM*d(G6DhOyz9f@Sj;tt!Y`QdqRSaOtZ1?|UMvplIQ$qekaV z&tw(MD_&i&G_PRZyuwwhb{7>E%+I^MXmN2NdgjkBLR;Rdl?C$(zrAqjin#?#pU7HX zu&nTjmBmGQ*FBlFV!?t{g~i4bS*sSWD{S?htThEoR~P0jS-gDy(?Xs%clCk=g+=M7 zc}ksT#qyvxNSZXPKqXl6}T!Y2k%=k{4$3_2cx3!lymaM!BMu ztL7CfEhx%cphr%Xk(T$OR?fX6PhDc&SbWFI!f!3^pW`z5&TVTDUx~K=a8z z3qH@RDlS?)zc5eOC0VUJ3sz4jSrsolDb%6JS+M$UlGP#xNm8ss+s=YnRg&4C4U46#3Qw|lIm0*$9($IoX&fpqXS6;GR!`sNntA@>WnQsDXTfXF8o6R=tg_FB z)osFR`RWv@!e_xsyQY{d>6$NROg#%`+BaQIJ>2rof}3{EHR;aza>mNDV5c=*ks5?A zXGG3|nbvegS`@yVvF$9lX-!u&)--k|?9T31ppx_2wCf9R)6>e!8C7!D|MR;Jbt*gM zRPocE%AEk8b+bA%L~81LIivP(Z>8^OD@`q!f6`EOr&H|h0XJJl98%+{?`+v{0B zeD;4T0_8rm^N;^X+en17H@b25Lvg*7M5p^t{hBsfYoYb;>L!LAyT~v`8W+aPLDNeA zfNX;9v($8cI8v37$hbF=fYf~rK0Wxz=wrMOH~7fNGTsjXMgX%3(H{m3`;7Oe5^rt1 zpHIAv@%~Ccqpk6Ngzdn$0mcDi#6e$0i2gX+0c!z`i;ef|2*KwVU>Fb%^idqJHDKg& zoDVPzx~aqg^Vtu$641B==VL$MHueL?h?75Hal(A?a^x*D5;zbv9y^c`h&)ICG_EP% zA2l}ZHzOAHy->l{E)2Oa+l65lPIY0v3s<@@;=*k%jJdFiFmT|)wDU&vdH1<=hg^8n zg?}{gG~#Cm+O>*2CoUN|U>KVYL=9u$YGcFxRf$&WTCGZ)|6pqv&S#YGZ-sWt*u4Mz zYpflu#`||zek)?-=0+j|2CUCDcI+QuJiOmGA%91{+lHqyil;9T*b#vNMhD8vtb}zB zkH=XS+BWVVkQjjTBr>e84=f)raKKjU?tvTqUmsLHaL~Z5{<{Zl9CTpNy@L{iwi;o^ zd;8anP8oo`-!ShrjB+E8<1@bSny)k3&a)<&F>8l4&e~xbSH5PRk8^CZc7&nRapH*# zgXfKeT^#){MCACm+h@G6=|IE`IMItXA1I0Y54Ktq3DY(_zL3WuId0A4upD2)<9s~R z!FeL)hoyR~f%fBu&{#YU$uW5zcgK?Fag1F*>g>nn0AnCF>dXB@XuZMY5{kfQ<@OfIupP5ROWJ4(KEL0}4b znCDxj|GW##GAp3kVf_Opa37%GzZrZZ2hN{t?HFsjel&P+Br;^k`tz?t+Z4BL=+H=H z*s%5V@5|udTHeH%@+}7PqKRv7lx1KxnRuAjJ7Qko@cLQwk8iLa@FBzCH3=9e_74qT z_GerY4mAe5$GpDJqc39o0)L11$0p6(5xaKQj>M$dI}8MI;@TS&gd+DQA`2^wY|}_A zY#%XN!{^GZjaI_H!{`E@@NuIno|!IO7zwm*A1O0-g5RU~`{w?H5#5i|L>4MMhPKD> z_c;E(g}+_+djfyo#^00sJ107;xa*SW0uN6lfN#avj4qKbTdzo5q51$-Th~NaMbkCb zbt_|O68QKNenrzQ(M|Pr^LN|2y=`oNC=o*Y$P<7V8_p0sK}cSKGNTgbe+qxQA@6sT zyzLdf2W?N|?-~4k7k^dwdlrA+!{7J!_eu0o=gUfDsq+C>vSmfGw)Rc*RegY}Ejy8| zXtHD3O5SV1N6VX&$WeVc{v61=XMgWRZzV6Fl2`CVhTwQ04*y}kiWqN0-lO|@ts=&+ zaQwCX!**AXOc>ktx6YZdCSlyazv4$nPb7>7kh?CtFkw8n-|`z731d5SMCLIn_7lWR z|Bjd$*fGZc^|CR6N6N-*zGdSrn{O!}W4sAH-`al>{OXIQ92K6|ErGA|AJ%Av{a0J({Hsl{loik>^FVAOF5R1V+r-x z$k2FcWavTs)!^?C{%Y}e7=K6bcNBkh`1=k1>hbqS{2jyJ`}q3^e+{KdwxLxx-iyC# z{O!Zv8S;c|kx$7(+2m95&^FOf&j*|b9z`x-jz?G`SIM04=y^kJ>i-b(|D))`ji<{0 zNgT2KJnT*UA@&1joLk5>=JRNOX=v#fldxyd{*I9$iB9X8 zqpWRR%4dP59cgBqq#5e(85x%7<t|nI))M)zG-53~$KP45 z4CAE(Cv714%P<<=e$+5xA%VNX0xRafJS6a7SYS=Qz(Wy%wK0K*;{uP= z2|TLofn(UCIwWvkSYSL~;K7K%nwY>tae=jU0uL*D$njb8Q1a9o+O>A)py&oJ9PRVg$TI5wV=)m$9{K;{*9mmr@arNpvJ*eqp&_&b7z+cl-{#h=4CFGnv>!h3~W(yy- zCr+Gfp9OnTo+|Ls^psQ6vt84(o%Zhy`nYTVTpU-s_UE;TyY?qdwQGMK*L41#vOnqL zonKYc+~UDSR6A~bG~3M((?;x^!&o( z%Fgep{6_j%=e8X*eI;@c^PBK|PvtirS7p4X@*C+ZGun3G{6hX!$aTzDiYI88-*{Zz z<|KVpo0Ig_;IHYcK*Rh-K9rgHieq`qe5Kmm{HEI7{HF9mPS*4>=%VRk;LrR<`WW~# zzmdKYax!1By%KUVU+H${E8Wig#`Y@k(e#v4)3aUEvzGXWEmt((?;x z^!&o(n$9^LIKPuV-g&yRKj~{=E81Up=5*lv&g0sQoDQ6?NnevOUD==XwYV0vr{W13 z+MmZq+MJ}XZF7?T2>5IITF}t`S@>{%nuKXEc2rx@h`1_|yKR zkApw$Px>0jNqe%r26ED#x}Ema?X*AJYr#j;Q%+6Kc1_Rr^xrbwdzX~V>bd*wUXfnw zvl7p!-!gcfv86qK%S_<>@>HYWGR=t|X5Xj`V^o;OSHAHv=*ErObl|U~ea{Elk4i~fKF*~@|K{?>{WY6U@_MP}B(I;p z)R@-z`KAMwG3s#S0@DZ@8LwGG+8V^mfOjB1Oz`!i=a^RW7`w6}g_~pd=3%&_>JK_TbzZ-aa;)B{6TVT_3EyK^ZoO>CxKWxMG=?dFe z6UA2d1HY2EE%*b#yAi)u@CSikMf^I!w*&7^e45}70q;Tldck)9?@4^7;12`uMSS*2 zf7piWL!WGLee!?6+Gcwub@#n zLqW52rh-=KECmClvjL3@Px@3Yukm8J=Jcrn#Fq$8pBhMfnc(!PLBv-GPM;b~{5HYq zQ$vWa5}ZCYl=y1F=~Kgq-=6AIDSi#xrua4R6u$D_={pBm0@PzZA zf=20f1BE3tpBl?+e2-jn`qVh$_XK-%;@7z5DSiz+#jk;<_%-kpzXqP-*T7T!8o2gr zuTPyT8N5Dqu4K?Y6<0oT+<9KX2ImC@PdGnO&?r5qpjrA;1+CJ5Q!r5aGeECTP3ATJ zx?FSm)V0LFAvk?%3h_q;r%zo+JSsSSYAW%^1gB3;BmOPH=~L5*KOs1M>U!c&rutNh zU*npm_%-kpzXqP-*T7T!8hDCd15fd5;M%XfK6S2S@cPubl0o|vUpzBUy`o@)^QwX; zoL?wtlpa>lEPYKutMqjR1Ep^OdVOjJukllI&FND!iGN3M`qV7qPYX_;noaz>g43sN zApWf2^r<<-zb`m_>PF%}5S%`B6Y(FW`c#Tv@VHSiR_2A<;Az*GDhc#2;GPw{Ku z+ONGnb*^Oa`qa6SK|jADEHQe%9vrsLx(l$Y{RlqT>G^u^n|@@K1LiJy%i0Jy{LbUn zCcv@xw)1ZVocxWU{;vYg_|A=fj_)+;JwL}^HSI$`$M-Eb?&tVHx3vv${K&FP0~~+N z{p|uAf8Ap_0gj*j?7#rWcOEC>d(4*c{R(CLklSVasEsmy;=?k2+7mK<&JSh$6*J#c z=k34xxS!`6^U$U0JlFpqC%|{)#b~>v*NxU{K#b>e{>yH zT;h2?{j%hff1!O~Yx}?UKG}Bj>m{AX;(O0luYD7fK~&MMOZs{7AKp@a%JJo2 z_)~A%OxkXKy%^(;W84mmdswyKnDm`99jC9ynU2eae$l=o`|Qu%bl~B>efyN`2ky%% zJy8N2(7?Nmh?(o`M%+fsuFgzccftgGoRII2qQ2p!)ISP6+S$4IzE+Mcqx&J)B(t4w zpwq5jc5BA|!}z|CIzJ5Bwsu$N6Zg0Qw6H^FTVFS)ZRb#H#!yRXse4Ol^)(HS?Q|&k zN~@h60XqVQ07HOX0K2@>dS_R_uE2x#V5b|7yWzNlJ=E!rZ6weUJ95Am9pJ>!XY>(X&3v_!2#j zMHyeBXML3MC3@CJX&2G6K1#cYp2woJi|AP&rCmhN`Y7!pde%p27t!-rly;%de-`DY zuYVThPJgfd`8UF+|Mptt1yG z`KGcP=U4~wIzQIL_uKN_Lu-7edo#W{;Wt$Jn?ruD!S8eCqn+O)8Tg*Z*e&0l;mz=& z;MHbuu)Wd=N!!7&z?z`3?2X_s(;8wIIh7dC?{)YMmVs}2@`dJ52hemJX<9?=VkaVP zwK0K*E$|MFGlRqIHO^BQPr3AWTXCT|(h=iBV@zwfz1FFdwxgZU*5w-0y4qgna16d5 z?x4P>Mn8O}`Z)Mr(#H&PUI|ItzOcafB^Y~2FVo^YlYu--o$<|ahndhhUuX_~88nyn zH7(9R5oxQ52|UyQ-kAf;Am^h>jHk}{PPxNO=o}ZC+DkD$v%hI^o~o0!!xN%oco-WAP1ihu!!_Dl9bde1Qie0&BcF_%Vih z;#=tsyYVesOlVSciA!5;oxsCh9rj>6^~Cqp9TNEd$`G0s+xcnS_p9p!nG^U1D__R9 z*v^(a9^Y|yNI+*Tt5-%Xx7W^ojHBN8=DP#GlQN)t0N1bUwPtXHUE+AZn+oCjbhS-u zr0qC+Twlib;T`xrl>wbQqrcmArWLj$j{3gph_~`C8gQM*A&eN_B<6GtI*U+dGF%A2FG3B!ME@o zc0)$5{9buKas3+K$#>Wd8NKp*<(=j7!?*GscHLx;5VyR ze&&3yjM{I#vU+9XH+(8KpkD&=cx6qI&GljWMxr^{y!_H+Q*n>+%o+SGy%fI9Z!y#J zcrn`fU8Bn5hh#49itE?=Ces>kZ*@Y_wks^Kl0KXTAHK%EPvvpO48P@6d0c4rT#4~n zd8ReN-sVK4ttuvPFMT^3zCF=?K;?1DrN94-3r%%5jL)8DT9fSUPMx&vqi^THw{3fe z!!f8skUWll&i$NU7?X_EkhJX#3yjgu-QdHFSLSi*jM@ds<3h8GbB~VMh_qG41n!}4 zbcb&EpTZ@oVizRGwDX3G%A^4xPJW zT<#Lnnqu#C+DBMx@zj3$^+5Rbb@n$^&gQuoSNty4Kz{6jF#}ha)>J#@=&XvwHOX9`4gY7XbAHn?&ACYNV+=uF##avbF}69s>1(3pLp_el^Iq8*(^^*MdGbT8 zj>_|1*~weW%A8O6P{*TkzE^g}s+N^GKgkby)hoMKR_6aCKlqndcJ_JYW$q_W)Bz=P zzn0%C?`+VKCu)R}xnIklBCnU9oBOr=UU@YyuiTsyys~nBD8V_^eg2>2UY@==9KJcz z-tB~>?O<47O>bPI;dh$WEPIc`-vZ?rtL{&DUo13-hGYDQGBY^ae#VJNTWw6>;XW8Y zqSUl*u&dM@!g$ud1>!<;WCX^KzQ+vCvA^fkN!!sZj0OKXz)Bp|d z=Xo7-;d6}LkhJX!3ygDK({XFS2RR<~9%>%R7n*~)7(ehfiQ|a0)x-oI;=HC~nDYqr zMvX{E{vL=6P3=I8A9{zxbe*&v=6t5(nDYnqMLkJ}-S{0~2u+LSehPji*Xq3v7vnl| z9wCl86g7|J3%?fY!FhywpmwDr@Aq4(2j>s!fjSqt&p$&wc%QHA54qKS{u#>6d4%Ip zPb2sFGXAvo=lnr^QG3&o_xNF4ukrVr!JF)VQTN@GkXPyZli~Zkx2KM%&FR4VdiwMf zTwD6))Vs_e$4-ZjbH12?`GWWK>RLc1-qSNr&vN3XH>!MTFGQTL?gigdlw_28UA{-}*ob4I#e>3VSPNb*NsrhT zrR%`CgFI1VrRI)wo%D0HbX_=?@EoYeQgcbVZt1$5@SX>Gc+j*AAo?o%qkYoDu8$)&IFD?{c;5f;w+QJ# zzb6-TlfQvyA%Ih&_)RV2s|eA*cOcrQeha^20jED{T70&|_)h*N5&CKsONiOQpqud> z)5@3e`-T9Y^8?+H^PMdJ9p)F85*HO(iGCoBYL&ok5NnayfIN!*1&FQG; z8{(+D>&RzH9M_a?Vc}Ov*pzOZcc>d`_SC$SFZ7LcV|!D&asHuhsPpT{=S&e9pQ0Pv zF!q$!iF1&;PSAyOP)z90_&RYeA|KQicH}dsI+3N3E^Kc~7tTqv1?myioHPvA@5UdR zmd|em>hS)$-SfWSJAl?teQHjk-l$jX$T{gZ?0?Z0{4c_neZfqBcA%ceulUq_q^=d@ z;(YXb@_5Y`{3H9{^acM+_#eLDF~VOXr@OJEVh}m~J<|Wy7kr=n@A`rt68^~-{D|-` zJ~bbazoN%{be!~Ru0X%a?SO~}#sK4;@yXmy`Bi)&w}0}%2j?BZxVF#wf^Pvf{J+u3g%~F0@Ho*n~D{ zfDJewkvD2IJMy^{ZPM1YNn6(@ZDAAIpaC|}a-)v3BY(5dCT(4tw1rLhEKBQ1oj5Nk z{;)mgrBmssb>h54`B4wrkbkxN+Rl{>x~}hB%HXZ@oDbVrFUs{|oo6BOmjq{>=K|s{3(h*vg~Wd@IO{wY5r0K+ z)_E=_{tLlb=Xopf*92#s=Mv&?ob(6QH_|5?T%Tl}Cw;QP^-0!w(kB~SpJbgUeX_yz zN!EGNCmUR!WSu8{vcdI9)_Kw=8(g1cohN;=0Y0hy;atj~{o!27Aa$N=QRlhNt@A8# z>pUH|&NJdjo#%Q->O9LGsq@_Ac(gQ2UY{NX-0Rb$fqQ*=3~=q!`Wy0dDTDro{9MW)bqQd9jWu&umtK9%CvxaKK-4LrrKfv5O2@D#rWp5oWQQ~VmZ_G_U|1&U3vZb)Myp)Ol`lymg-RsS;k}kL8-v zr|u&DiQx1phxlIwr%#mO9xEb)F?|ou}j0 zc}5(m^IY%XU4hC|Qs=qJ@z#0Lr^tuZO^&zDlRmYT*Z6X| z=Jctr5^pa!ed<2qc&o!aM4$Q^@lJx%r?wIAEI57Ye&SaMPM>;!_?3dwryeAJRjN;= z_%*J1ieCdy@oV5IehoauuYsrdHSiR_2Cn_u>r>}S2Cq+@D;cy;NuB39x6ZS~t@Cu; zI?sqBb)M^;*W5fMb)K6XZ=EN7YCErS54q;_sfUR75}ZD@gLogo=~E9A?<+Wcs)Bew z!Rb?9C*EIh`qU%D2MA7|`UdeqsXmqB*SO{>ehoauuYsrdHSiR_2A<;Az*GDhxb|zW zPn|0nygqfVWYFJN*WtNZY{>df`WvrcsHCJGZIweF?__jU?_J`h-$Cc8otvITpV1 zmPI*M{Q5nM)hW@zkj&jApdOmue#o_XBVs)&++tyf}e`-FMi`gORXdCuA@soyzRH* z|9AXvTR-1p(hf1$f!|j0++}___dS=j_w#)BUn%GN`k+CIp88foU()kj^Im(;BHvXX z9Je@r!4G@`B2f9W_+UJ@a{Gj`#J7woiFqAT_W|VhCZaH z-_QAh_&}i5dLN73IBV(Ed1}k>M}p+_4Oh> z`QP-@R~6qSzkN&Pi#z@QtK_=(%8M1fzCOEM{>{n%jO*W3^lz7cbMk)-dM*F%k54Z@ z>9zc%Kb`!f*YYP^{>{n%G5k^c6X{Q9e=R$&YjbwjGM`RfEvuG+IgHgeU#$(Xz3QJq1S2jIQ#Bndcair1m=L>xmA=*-8)bm0O z?g3O@7uwU>FaY1+b*jbO&v7x~eOepp{9lcEU*&CAwo})U^uSZ{L z)%6*7exRM0&)TB>;aBRfVxChmNqM~O&{NwX;ca*GN-g=LJ-z1sW6yaE0?pmEnb6)IddgH z3jIORxjxG8&f_w+h7i0Ci5=>r{GM3nSK3SENujM3n?MeHS6nCK4vT$`Mfn}E&abo+ zbEP5U8p+Rg${&*c#`42AwB7hkQ}SLM`CjEz8K0t0Sa{VCrt70~YYcgxx5^D z{5to>IR7JeE`^^hh41Ctac4P>m*cp=egW@*+1bqy2 zUfI2}Chv10AL+cZdu2`D-$Fj-S+DF~S(Ep*j1ACvW%tUOyq~3fp!3SkTV9Kt8YB`de48yu8NrL9g6iIknxq@_OadKBsa#_wsV*^!Q@<4fd1bURDl# z$ARxGwqHeUjB48z7FZcUUj%)(+J_xAw=90VQ@-@=S&zQ;@W&rO=4s$v57mZPs6 z@LOg7m$Mzm+d)@s z*C+FJ9P%;`??B%U^sTmk?^NKp0>^9YKPK~a4dnyfBj|es{BF1Ztn#(Q(y^$@?QTA- zg^bL@5=-?_=5~pl`Y2;n=j$Vok#rJ6^-;#E=zlEA*wp!&Hf0_bd(=l6o1%Yxl(DGu zHEl>bu}6KBF)8{Vi_)GtU(<%n!(Q3Fvg&+I8;+fXiM;AFe$a$cN8t1NPS2Z)JD=@Hs9rAgc^#C{js~KEtA9F&|b}%fk z=4O1WeDjY@>kj)p<&&3?FTO#hPv#5Fp|61EmIJ1>&VJvCNLzCMKGyx@|I`fLX@98d z)F>D0aJYwETxgHng7Nu3F|88&Bd1QAxC3NxNI zgN)ITv?cceoPn{dYhaABZh$&tt%{&pt3qfG&IAo>9T>Y2X-n=G$l8$`eqaU}yNns? z%surOGjXAMN!#JsXydo)jAhn1%QYwps^=Ndm$eF7W<$nQhXn5P%FMMY zIF|eM?Z!9sVWDZXzoAzL)(}un?(3)4s*o{itqQG6T-uWRAbNFR4FUD!UVv(?3ZZSW zomoGit`lTdHEc4z#dc=h0LOFBLA6$eE2~$=I%!L>GwTJYH`b~Mde^F$|FRi$?1q$D z=6qbA`7dEXYx{)5dN%6Iy$tnQ6$R*D@N?9(*hZ;$FCfOpBGj~%*=8y0+SIk-zJaJC zn2RxseqmbcZL5^Nek*F!HrRnu)(?=siZy(LKM!=bzF}JBcB@j>u91i8!}s>Av0I9| zpN;kf*c(9BQD8iB9^kk3iVwg0$G7%3fDhx5^S~S&Gwv9-tXWe$-TDCfT0ZIz7`Hcq zj`79#WNiTXVXcawUaMj*>Ju2BH-U~Z$2eq70QqrWM!ia{Ag{9buk2cUSucB}*N%I}qz-~W>z_jTm=|6cj|y+3(it%{&ttD-6S`JI1~2fmeO zt)G^kIi0wcQSXnV(^pls`H_~0JqAA<*o`Sr%rzG!1m`9qF&C4%MHa(s>gL%9+ z_%*dwMK0P^tqJou_RQmb`FvczrEicRa#nWyt*e2ev9o~9n$GgPfr(Lz1AR)w-ZW#H3f@-ab7Rt@FDyT2_JXQG}bzTc`&5C|!T3hYRQtw(s^zmZ&_*doi^}Ibmm1@)S8*%GtoNAl0Ova$@67k) z$9-URzF&zLV7zmV({auDNbzGFVLoDv6@ee)n{%ALE?PeBAIn_tm7Q^|Wo51>KkhBd zT9KhOJ#68Ppr zcF$7oJ&LyE9;i!kjY>W-tsQpnQq~!d#aMM8f_WrgXb+WOyp!PzK5S=|Mx-sdSL!m1 zM?r;EVP~s(gfXpu55$G`h=cL#&-VqtZs(NNN!!uo7#H#RtVirzHGfcM{tjSWu>l(7 zUA68*F?^2k8iP^RD@e#BoI0lKZoAZqqT$xrBOS zy$ZGNQCw(i*I@j*k0qw-r0p>0G#$sBL#QwJKUFovLepZoSwAe->b;<@A12OyQ#qHM zsUDn5s0a62RqIu>P!G-_)Pwu7s(bx2)Pwi>%KngB-RsNv7Tce53CDB)S9P!7Lb*AI zP+zQ9(Shq#L~y+}webbNVGmUI-dm7Y>HAyZ`@FxWj@(O@wZZi1`*3aPo7?_s208Wt z_&Dc`?U*xoU$3qO_iJ_6tJvZ4s{lWae+1)Kz#rD1=TqxdkiUu@+~2Q6d-)f9)@^{B zF7l{iSP%pdo;zo_N zpL^(XZXn;}dKIo5j61zvh2qT^f$UzrD2G=M{vJ?#VROzA>3XH>!MTFGxlb?Wigdlw z_28UA{@lx#b4I#e>3VSPNb*NsrhTrR%`CgFMxqz>qmzC;bdRT^G(J zJO}p==3J7lTe>c+4Njg1vZw3j)kX8=T%y;jn2d9&-x=HFI~A^1QH1Ni`C^}CpzU&g zqauBktI)pXGS|lu8=OaGU_9@C)Or;{zo!^s2tmlfO%ZUe>EX%&rC9_Fg_KU&imd1Ne@*~x&M~wJ)~ir+Or6lnItYka<_p?}?HIq8d4o2h&1nM($`2A&Np&hbCT;L5$8V9oNqX;Dc!=tuadAS-8k=1H|{a5e!~cTBi-_4 zd?VdB|4=vXORd(cknt(HMP%$LuM_7Wb)BG#TCYOr&-glVE+QZ9g{{`B5Lp`OQYZWy z>B2dQw&4ERoRjXv^?PKH@09CRP;c(9-IDbx)U|?KEnTmIyj2V$r?+Ii3i4OKu$OAf^5&lEoRiY8WBPUDoRs7Z*-xdT z)=BeD*NyX()=%s6SzoUr$l8%1Qg`|JzT~bQu!g~xVVA$h-snHyBKz%fy^2eC&K`e% z8NBONbfrH`6d69>m)x}ju1yDCn_+_gE%!#({_y#}?@MmwiJx+>`xL(h zp5oWQQ~VmZ_G_Pn|0nv`)MJK>_R@I1O36fKh&t<`yNvrD(d+PNn==+_=$-ebHW`F2c>s5^9_hXOlr~mgWls)W+ z+}<{jT(1Il;CJ#o_o$6>?uie#3-El?o{;m+`QboCuWIw4FX?%%D`vjuC*S_7kNY`( z%tM!|ao7JKN6}NiD(VM%#>1f7K2-E0%S4WA?r$65xa%IfG{AR>)TbKyke+^j`G8l| zp6Q*hdEHNc?=kZ&KhN85;d^R534I+|*FpY6Rtx`8cMJcC4+;NiPhJ|(bs#awPkQoi zKkik!*;zo~`Zw+B+%T-t{V8oGWt(X!tIlx^k_e)vjC-v?Istj7k5=^(vs3nlq$-SNRwI z)H^OTm4q0xhieOHetM3e?rM$@T3N3`=#?I;Az!g~y$a|~d$NX4^U|^z!lQzaGBVHd znpI;yFVljDS7svP?!)~pb^LtW9{75zbbuoIRswS?etxEuPqp|68I zRMxC`6Khro&5`ct>yEyT_Hc)7pg-CZZ9Q?0PWA{@>syX%?KKf<5 zXR_uw279SoDYUg>6UbrrcIsr@VX@D#=w)_RvgWxGc2YIZGOm&QhK$`8lK#f>!#C7i z2D^D{pDU2_RbG|xDf)zkR}EphK03d~ko!44UuaYGVH?J!=%e#%CFTH?Uu9e)eIhdU zl-EP&+bYb9D&Goyiu`fm-Q@MqdA9;{rOLZPn<9Um@Jx|k=ieCb0gyj?!#{e%_d<5q z$-;3Kjyv0uOJM2I6=i zj<2*QC2OE#ke4}eF!~0gubVyD8H(ef;CGchC0PSqN%=rG9DTz<*WI3~YM>>?>Z6%< zwyM))3|4Xe2Cc+cee`lW$B~$V|J%9A8t7`sNIHqFW6}2ZKu7eik1}R;4K!`aJS_I8 zk1}RO|N1CnGl=jp$(6k}*y;pXxtjT*_$VWP_>|R-u_qdRc zdDbhtSJvdcEn@?8UfI2}Chut}ALzWYGZ%a1P2SJa??LO8-z%@WkA-|*x%Kz1UU}7h zEadab?UhsIR>nJ+ZEBzBHP8H*8VtBeWUBe7E-Wh`>Mwhzb0g{DeuR3BxG)=Ar5u}yuHv6;L_ zhJ9527MhmJ?dI)T*vXr>t1*wLyes@#tcT9qan2{m+bz{Y=j|HIAI!JL8S0_)buH#$ zj?X_sxpls-#{A2C-BP)AzK-)g1>+{cpC-XK``WiSlW{y5zLjm~C-Zd;>_WOJ=$nGR ze)e2vDvqb(ILDr!%-6NB1L>xtZ#ww(w--1wa6AKax%Q%DzK%m)=HXfBn}xms_N~qh zIKBbL1MQ{Bd|gBNKzAehZUnzU_Hvc4C6uU9^GUk-bD<@Cy>?#t^xz;+L@lW+ZUn zLL6rVBF|;;A^0`r`=iFD{bs~^AJ~Nowsv92h1tf1a}ri00Rn5YwGlgz-#gF$IM$)r za^U$Xk^O2uFe1Pa=YXvD(Q93J*S7GkVc}h~!n-zwcTEb`8R<0!yz2^h*AU>j^tu73 zS|`A}EqU6ihw!fZz`7B=zJqtI1aEyl>xOi#`>AT&d9J2v+R4Ak8g{hn zdU~!0@L7?SvGG}vRlOU+Z)9&>KYdEq@q6p_z4iItI(+(+uD|!zO_PtVmG;&}laH>2 z_SQXDQk{xwVhezjV#Aw??@H?Yb7(TYJpGg)`^@XJO z)_$I@j*#(ns@hJ*#y^@)jGKQnopeljYdF(uHq&b}(`zv~KbW4H%k)~y=IY#72Xv}^ zm`}Iw^6Bc2nik|1imXQc(PtNU+^kI^5C_LNZVP=(&%My; zBlV8$olf3wvD9DeN<2d% zP0XdKbZPjV3i1e#)IH+e6FNz$sFN$9L4nf_Z@14o7)}=WD z8f~BE_-OkyN2Bf29F2-8yq8xw22kY~!7D?8@8P!w=udvj!S4)!R}&_4431Tf0fc`i zzr~0NzAG;M$?q=e1g|0lz2q2aOMWX65`>)~CTMjG@f|IFp6^;!j&Wm=?>==5@tvFEFc2FZ`_{Yb<2 zdOC)h+g)md}7yjsdN<51-YOhR@Dx&CzK4G)FVnA8Ynm zZ18F`IM`n41hK|MaF}Tgv5TAzIPN&ow1(Qn4u6w}#+kuk_8O-n+CyVZYq-7E>4f7h z*O=DT_Bw~p4!T}z21nQ>P6+K?ZPOZQJ5FaDce~EC!gj>r@0spc=V_F^-syt&?$?{v zXuDkT?}>Gu#@L&jE70C^wrP#EH!GgKv9`@Pdy9H^!QTg$^f7~czj6uaF6m`je4p}V z9ADbkwD>-SzqK<5m_fcjxfJc0{Y{JSPcFl8yFsSK_a*$TNuBw=Bopn_neRs~$1!!L zEqJaF)@`CK+M%5~(-!341?xD`7VXha{b>vG?25H(Xbaw-d{$&-j{2;~s(({=h24AK zWLo3xt;(me;8WMw_c>RhJuA<&CfM7QPi4cWCfW};-O!#r&$K4l+m%n{z^80`hjSI$ za~7J`WV=G~$pxQl?MIyMXwO|@T2t(uiqAmsxz7Hk(*x}TSD4mRJEr&y2A^s6<4#Ys z4_;+j)9qb~ekka#x4-T5Li^C$O^f@_@V=Mv#kgRM(B|FXUyKpPnvN0rS9kaqV}!A$ zV}$w4M+QkGBe2ihx(v>M5$?UznBqdAAOG* z%KVIH3Z-z4wJ;7wjr?m*54oO+iTB<*zYW}cpbSRnl@ zm;MHq{zlOE1^>}^n^rd9*n5$K0mpyMOkHo!>lho1cJgJ6(Qik?Z^?Tsc$3%o#mMb|6P96K1DpijjFZXkI($=H z`l&AcbkH*wFjg7+v^8U$w&l3wvsjleW#u}1jE#RZoftR&XgcYb;+ub6(JsxcKi1rOW9jQu{a-zc{b$jaamr`0|1A2d=K_ZNoCHukClS0d zB>0}N;8ld^-x`oCFZR(RaoKj}Zdj6&Jjc5cr-t!K(;?%X1QGt1zT( zXGriEA$f!auOuXoe8H;-$%EG}B5f6fXggy$ls&Llw0ceg2>p_KriTPS6qf$vUg-IP zA0Y(2Jd4G#dQJieIg7cuV9xfl3w$i2LzRvnN0QbyR&q-Vw?s2W2lYmA)2WXCueh$zajeZW$ z98K~$$wbkM`vR+H78Bj)Boj~ChkF#O=NnDXYWq~Xw&NbYcc$|$%lI!svHA5Yx{7IH^s-LbD4<1&zDVaw6l(%gou9!#3>Y-1~p&8mpSUVV-9i=6Tj66UX_gt@j(|y@rt=xL|Ub)oRkX3wD%@yC5)y zJk0Yg(|_ItW|OiPnzuEznLBT3;ftBZ6;y&6s~jz#oVN$^$zB7U)Neh(tz?T(5OAjrOk@ zoidC^jW4`rCNhlH_>j2VuyRb|oihw`*f2X`ltf9T6AAuVQ zr{W1?<9^clao%`D`t*6%8-x3$@MX|ZvK4Tnb;T81+3xm( zU+avniLQ#SYpmJdxXC<=K zxq(Ag%fNm=&FW5i>ee^WSB+D;XJv`*efw@@yW6k$Whb%~U3M&cYk5-lXOp_;Byv=L zjz0&w?}4uJy}1Q{%IusSFi|s}iJEZ(Yp!Sdvjgk5B}A0FOzF7TmRw9 z@khXq{>Nwh3Gn#;`iwsVHed~){{}pf;JRphhtTS^hhd}E*L_&pdge#?J6<|%)TRR; z!%m<4r2ahCIB~t}hG%`5?l)VRM~Oe@%j9!{hJU3VfUX@Ie(Fp2G0(p;o-)FoD_|^}7##?|#f9x~f2K?m#pK%nB zcKi=O+VPiwwBxS;X-C?bcBGwYN7|Woq@8I;+L?BwooPqfnRcX|X~!Mvy-;I2eS61A zJMI4V=XUx7ejhjGPoMnZq@5DjtK^^EpS<-o$M~(9QS$bqb%qfl{0-+)zc4;>MjhmH!(Ll{GvhcM>&2dAe=VBZ0Q^v&=Y0eL=t&~%~~Z9b4ag7mpZcH%z3 zE3*+kxz3h8?%unkgf#fv1%J`c|2{)D2=Maa-;pl~{DJ@@(}jK)&c8?wE^zU6F8ts` zVg(OFZolQWk9BbuO@fg)@*AzN4lAbhjEqmd^hF~B|A65S2e<{rKVL>C^CEx8j9x_k zdg+LtUHj;u?B@pbu6pQ${$2VmTVC9E<%)TEOP1y>D=b>*9`{|gI5#`H?}BBE3i1}* zK0j|kzkxN|RTzkzwmi1k_bj`9pD~nd#S~#yb3!S0{rl#z@P<#2j&jW={0rKj4>0&jU76_-=KK|3JS6d`xh4E44F5- zFgH7A!GOUFat0OlA2N4fcK_VM1q%vt2KAs(PW4Y+7qgWSxl$Q2Nk*)#?>xikoNQ7D z!|{I`176Tt+QR}2BWwAJ;zA>9{vFF#-LcHbTDW|5)}n${iwyUFwig!}Sw$=67Zev5 zS;d8GixqXjvc>bzT)4DoMc&GS`STYqUx@a?xvLlEEnU2v9Fxa+iUTaTtZ+F7<2=O$ za{(8jU5*4AXLj-O>~YhljGLS{e&&=hpJHg%lKDp7*f~>1T|04%HpdE$E$`DTtdbx&(+GsbsRGsKTIT6=wZ1nZ8q>+ z2yuJz{5rlh47r4Qyp~_XPU!b}#_N2dq0T2R8>NFjCcFxN)JyZ%{;FXZecpdzFM{K~ z_~Uq;?~-wV->du=d0$}ie-eBC4ZtzSQV0Ii@fGn4;Yw*%HsYbSpBq+KGg#X6_+xH- z9&^XH)``{SlXiNn?J4rR-ASOwr@LtS_&M(Q?Bqx}=pPWu1!?ViyoQTUG5+iBcsy@R z`KQNeScKl>wewI8K2Ip}-8T9u>9qZ{|I$CS?Oaj?D%|n-!I|<;pI0}QVMxj8AVh#a&va- zarXCl+LLeOiIXQH-Z+twm3c^voixr3h2qMQ|H4k46AF$g;=%Etky1J#!m>e5GyK1~ z^KGXH=ULb2HI|vcn32t(%l}wxuBi(e{V(2J3XH_X^na`e7aZq84IQUrBiSpsppjNR z`sxf1&fkRqcA&0je0?o~_IEW(+E3s<3R-s+X}Z(j7LOO~xz zFn`$##mnd4zUYOOYgWy>_Qm2Aix;n6w8nX%c=gh?i<3P{2eop+z4P=Co5D5su3Yr}rB{{&Mt*R|yw!{D zn73@v@_F-(ozkf?SUvu*c^2kOOIPGi!nZ=nvoK~_x}wo6d@Ho|EUcN9u4vlQ^qH7DyIX-uu4_}LFS^4l zD{qCWyoV!zd%XLn9sn?2^tXgsBO3bDuw=Y`0X5QkZ_bggCk2kCT zo4bzxrSFJwE1Xl9zijD}<;{NDq?}dWa+_Bd-#-7Id5f3LUo-FS6{~IwjPsm=GrTkY zVECwUeFw}Ncg^^b^Ty2{GjB}al0ozO&bWGBzuEJyEV=!je)CHD4_JOr-+4FBUp;TZ zs)h4b-E;3eZ|t2gR$GtXwtk<`s7?Uvuk= z#jEeVeZh)lAB_BZS1i75VRiAH%a<-*vFdi+s(Hto75Yi>ovRnEp11H`v~B6l^H#5! zfAei`;O;Ny1{(gu;=AYHwP^9G73g60Q|z_kCV?zdWmvHR(m ze<}`mA$NSi{*RddQQ!QvVFB$oSIt=VFV)$9pYWeWZ-#NqKb~TFlmEV6$2(uII>xPu zLoCp{X{WjAFSD;cf$rf89A|`cex@8c&CCY!1bW}3qI2WPs!%rca5f9Cdpn=v|DQX( zoKG?QpF73Qr!mMRWFJbLk3+`8&Zkp|n>(MD5?|>P%SLxMP;XCRwHCR^irka6fuA%-mFdB~NJPD@;m=OMT9JY<@f z_953~-6vXOeBETxzNquuzEDx}HOVtdcmvUwn5f$S5=cp#?)vNVt@ z1DOou)|LF0rpI!1_S`3$qhrHP`QBzISI*|W=U%P0Hyan(t|BU_`t?gD z`}bep&)L4Wzw^}I@c7d0r9qkFh|O@eXnPVJG^(h)Ol8%BJRYkQ$~NxppY4z9WJBt^ z1Iqgk=)XlhFkoZky93Jy3>>f}^1#521NRMlcwlzm7AMYl(Dq%YLkP0x`|iVzQ|=U% zgq?4_8}5j*bJRpPt+uPNYP;)P{;qp2uCY~ZkE2aTh_fMw*Ghm*bW^F2yq+Tqo&%mN0c4lpk?KNAWl& z$FzAQs7sqiPvv#_b9?%lS=+M{XK!~f$+Op7rzINla5lN5!s+8W*(Gh0PIGXsOl?%z$abd_Y=YxX zXRHECmL!YXwoR5fJ7Diw{C#h4)=BNfWs*xYPNVEO{5_Aq@8fSL{$9Y}pW^Suy&bb1 z^}Oqp?F5Erv%p(WH`FQFY0G8V%k()&UDi3%WA*UGVn@+TXSs??&0n_FX6|i}m$@wQt4A;w`yv=NcVsn|=Ab|c%!k+mI?8zly(V z{O!Tt8QO$z$**Zc_sQ3^Vfw^Kvu<&{dKNK@YZ1Z+VydiN&z>{HWBZ@PI`u5h;l^37 zQ#p*7!|^75lIMXncMucZ*HQkfA&Jp0W%t~8zmxHuE@v2{{C6qz1LVm*40^J!3|eR6QN!+H{4ZS7P( z3p#D6Gvm0KqR zh^-GT$*MAE4A+Kd_r5+kxi_2Lc6xO-=htc7s&(1)+0)BT$N$Tn^yyvW5A0XS`PIJT zeIRkxaT7WryMWObd$Ju|^?2gg{BD`Fyd#6_k{MeaJn8^Kcku{|v4SobIOYeMhn{-=H|#)AE+Z9<3r&*MWE9@nqE@VNdV*f;vM z&|&}64t-{SvM!I=pSnEge_bB*Kg%=VjM2}aEk-{B`|N+}XJDWGPyHJB$^K+{4g6$( znsWB1DQEw)ycTwhKK(TMEI0Zr=UQ|5>8>>g^4E=1u1`taC0c9KnXEINPj{X91N!8i zXpI>YtT7^Y#zj_^irk$PS(O&~YDQ#rok&?@aIDuDkvrld)1@MJCPh}JMefdstf~_! zYm6LM#6+I44INXzrVYKuEDqKfk^AE!Yf42PNQ$gYi#(VSd8kgLtT8y&YmCS}agmu) zk^7S(YtkYQWJK21iIg=)j;muL&)A0UkFRONtS`ebAI>K?m=rF#~4t-|4VqG3HUg`26zUlHHzUg)$CL8@U+G6z6u+R8L{WR<| zzEQstellLMyb^vgUYT;nD^t$+#_}rIG5Yk==(F7Dvz(lq1bwr<%)l|@Gy9W$W!4w! znDvFnH62UZaeb$LrsFi-|J1KRU$OtiW=T7)?>w#zm9*n}P5qkCG~NHyufW~{=)&XrwHF@OKLq|f7+qX>`&I^G5b@O2mP`zn9{xs$6f0oz6j?t%|MxW(IpXH7BGF^MFTQ{rwt_OM~ zd#o?czM}Usc<%C0Tkd7XbA5TK$zJB9b++c$Y167r*^KLEl%38#%ihbJ^csI)zp%f< zz3HZX*Wal2U3)I~fr7_hUtSgop2K9_X54pX$3pKY%D%v}nXLPHf@KelZCdv0hnki- zzuvS@IU^57LLn#Wgx-yw+rrTp`|5VU?F^2Doc6%&4L&F2bO7#P@VOx;1{^c^TOp?- za7TmxB!tI}p?5nOjJ^G3z?T8je_rz_d}xWXmS0??eK-mqE(X5XVEAwe@FfPrhf9Gk zH5fj$0&Zn6d}s~a+FR z#Rg|x{2Uv4x3|Heu+s;)kHO)v(-*j}!I7|20$gJ7Ibr8Y;46Vg zn)m`AY)&7A4>qTd!UvnvN8y9b>7($$=JZkcU~~E?e6Trv6h0VE^S3fRw+rY4e{(Ce z58+>I+J`mDpQ1T(#Gk5VgFj8nWBzn4ox}_+-NZ~SRbrNwMTyyvPWU*d`eBUwnK8$B z_6P27FvfEL@Bo7`o&$jg8jSHA1U$%KjOSqB!3JYIhX4;T7~?qMzq@w^K7DuXee!-0nzjPV=+Ji=g%XB;?gFvfEv z@JNF(o}++A8I1894Llmy=9JBAa?0j4Ic4*joU(aMPT9OBr)*x6Q#P;3DZ^`G7~@H1%rTzV0AFJ;#&a_8WP>rD*8*Q_FvfEV@DzhFo>PIR z8jSIr20YDRjOTRV>A*IpY+jR7Hm}Jko7d!&&1-VX<~2EG^O~Hpc}-3kUfY~1ln*wi z3gv_06n`XUoO)Z!2LBx`kNJPCrIR?QrJHzHOO<#}%c8{lkT$1gV2o#&F~@k$1fFRy z#&Z_%EQ2wgvw>$DjPbk<_&S3zo^ycb7>x0p3q03gjOX>h*8|&}vUyET*}NvFY+jR7 zHm}Jko7d!&&1-VX<~2EGcx`j4P(Ik4DwGe`-=y|cspHR_MqgiEX5IyI%OVd)^gBOY z-gohCPXzDyC>QUR1<#`RJ_z6SaYu&o-T@km{m)VAeiDYRJfkYdGZ(E3JDba!p0C+_ z+}5vZcr5P|{rXo;bXY`GlAbs^1F!%h*6Z|+Z_x$P_!A}5l&#$f(ybYLpel=C_lfc~btLcKb19Q)> zW(s}^n0tOT`#2xAVtk?~YX~^`7%(~65ODHwU~;k{;N%m)p9lCZ^?5kHAlw#eqiqR)%AjXVD9(T zje--v-0!P-f|J19@2mNO%YeDxS2qh@56u0(S|oS_F!%dvNejH@CC@$9eHqT<^!>m# zr+r|X(+Oak(@9{P(`CRmr`H49oZbLzIK6pH;e9xOb%D>Z3fT|mANV~y$2vb~2gaED zT(wljoSfo*U)?5{oZ^08-7c7%;(lMP5KK;Szpw5POippXuT~2tr?}r&cM2w_xZhWI z=X0tj%M7nE=6SpZ&f_(39` zV@^(Szpow^OippXuf8LgoZ^08Jt~--;(lK}CYYS!eqTKyn4IE%Up*<9oZ^08J(bU? zJYHkW^LP!M$7|p`UIXXx8aR*FzLRVr_Sm*mc=RVbJcfc z%*iS4_ti6k$tmvl)w6=hDem`GN-#Oa{l0omFgeBjzWTmka*F$X^@3n>iu--_Vm_zx zc#Sd7<27&|uYvP;4V=eo;5=Rf=kXfY@Y?27p?t78RVW_}r@q#6EQ?b!IL0r@n3Gd8 ziGLuNoZ@pV^|D}cYBuo?1(Q>Jj-`Gin4IEsEcFw?Rk)r=oH z#P_Qo6W_0W?vf(bZ{1(&cf4Bacbh5o`!13CgYT63BOj3Z6P}d%Q(u(&ZO6W&uiInM zhq|waZv0I5vG+oYB7Q&e$KFQ1d;G`VMm;a_E-87r?#m;3oXfw}wrES+qdkw8-SFPJ zj$`mmbhCHAhaWdmT6b-Z=WG7?L(NmxFaOrR_GF()*$wZlLA@iWw;lDK(&ck=+&R;F zW<1WcUO%)i)q6y*z5O=rd#ZQuUhDP(hl>-()&WC0c&|3;_VagP-X`77{!ENJWfmN# zVAVx*RK~>jCsht2W^Qew)61nse?yg>PHg=BJ?B!t3mR85}o2 z7yZzxMYxOKqGPN%=g{fuvhAmro(~g+s$^ghinfSgN#9Tg6#Bm^BtWbI|E0( zL4Fq;cfoNxZ;0O&$6ayU-W%q3$8mS)cJPM#J#pL<FG^ zuh^IN!j9J`Xm5S0wO10f7j`kO(%!?Vw%&lCz4a;fkF>Ww#r~1@)~DD%(%!=<_K&o; zKE?jw*nX4!C*Quw{+l^499$U=PIdF9`EeY_aopXT;g7=cDDbX_H_IP`<1r}j>0RfK z!|^yA_wwfY<8eG5$Hm?a{zM#4gnn=DCVvu+C!xHLSL#p3@njtL^%nS3a6ARaCEh}R z8jhzy|4MJMKLf`zP~OkG#h-=aSvcF6?#q0sPjP<9 ze5p@ye#v|}oZ|eF`BIJ{@irkqLS(z5O zJ0r5HPNaN8D92}QgM4e zatg@4vN^sTzX9Jt@w-0r9W1{g3So>FqMYBzIrzrR*(KjY;+NwC(W~6(AaA7~ld}DB zku_20_V=ShT{YNSqduq zclj@&KK(M^5@tl_PMC!o&pP;~y`BEXALqfP>Lak* zvX>j>dJ~hfJ#mqlmZ;mZhpV{$gb;t(W_$FsS%Ispi|_K=?ZS6^anZ??irk+RS!1^$f;wy`zTa=R3*T3!MJKN< z87Zr+6M4{X!*0}PJMqnbyX2?M*EsEdRzZE_7HF46n~krw zkH&AipChQpb|Vi#JAS+8pzTE%zs}dV(c#`Y-~N^_hVkj_xoU*x`?~`!BWFQ7e&gq$ z%^h*R%eAhGdr4n^^LI$T-{bt~8gZi|z4iXf0f&(vpq+zuc0!%5(_J;nEBDFu?&rGE z(cUI}Hz56~=Ni6^ybOBXXS-^Qw;A6O1oMFN5Z_AD-=2TsM#p*&>HC~xz?_e~2Rah) zk8@@wzKv`JJDiWaCu)sj&O6TAF9W`5J$yGwuEl;Aj&k0%fga}<=jXJ*UK-zx&V*m& zdZ#bKdVaQr9_JkA;dH}a&O7)>pE@56M>!vPPh*e4k$^kM&(Llce6;Pp@w-wz zH_*N;{IPw_^DW>oxsg4YZ??TgzUg_7`ivReV=n`@`F(KXIKBqu{MJ^-@dL6(cgFbj zyxvvgye)oA%67&@R+7WT;PBPnBRYLUnRHu zfZG$i$8{X1U*?+%?xb1n{ebzy442k17&wB!{FPxK{ zt1&6t5f_c{JCNr?$LUh3f7HPX#H`F|w&P{g)BO*>8K2M8Z^mcVNA3|FpU>29#%Z=2`AXVl5u;Dn@7<`! z_9C~5#CNp$3XECF5?4*~D)QnxIo=N(zs7r7$7wxI@K?uow7Dzl^}Eeglf4~&+a&XU zytJ3R9spin>wQnhY+j4=ir;EGh>zV+XTS8lbO*ImjGa4&r7H)EjcQt7dqWzMXT6_dUEH zGV2Fpy}l;bgdo=U#602p!8Ojr0LFS=1NmHXvA!7dfiZwE?K^E7Um5f1 zA9Bm+m~Z>exoUi6%+J|FT(y0-eP#U5*#p09-+9jVm$9EVk((wL`;Gs$zq6r7o5)*} zi~Yv`Jb!I_LF_mF+x{9`w%=S6Y+t!Ptiv_+eg416y*#-&4BVXQ?eb$%wm&YirYFW| z*nO^=mj(2dUVe(cvt*+<2w7vJEjx!$XK?V!*4J_WJVfe!C8Igb6nInLdfl1I{W*f{(w7V9w()&CN1&+*EKVTxsI^i$Xn8$`+%Sbfs&B8(%~5n|*<(d$U5*gNfZ za2;VgkWZyO@Apr)9b7-y4&+{u`}{L(2k-NB|HE&6pMQpaa~)xQybEDUL|4ZL@PefcL_a}k-ytii?kuRnl@9W9w$rxL5bISd0ly#?p z<6JLhV7=fyy&enr#Cv+i=~;oj>tK)d=c0aZ@L|*gi0grUJ$JB1^g;QUhq2#;9QUY; zOupR?xIz6Q)Z_g=?Q4Ck5A=g`tWCI?zTY1dV7)E`JTv$Eod2T`V>v&@V1AJ2~T%y+QT)HF@ozJ z=Q`I0+DD!oy*AM9k-*-uzz@#57U9nmTpzS;&JnB+cDow4gL?sO7k$pXK;w2bZU@&2 z+D0xOy;d}CSL1eY&7ghc>(OgQ<90P}2iK0AeZ*z>KilX>3OxGh{ucn#zT(rZcMwl!|cG5b33y>Z*@wivryOLR_7)ZKz>$sXhIZ+0Lj zW!vz1ehJ2Z{1dKnoWs9q?KnGQkT_R)CCVp06>uDLgX_pP)aU&V_afAT{_cLzoAeBx zg+NYDVNWgft0-~))d47<@_p=MA*a3QDn47{{Lbx3M88_kCCu4D(3|lCSCvZrJ%fQ~ z{SbRv$m@RMDn46cJGdvAiP)Ni9=?ICxF~!i!TYf|)*qH#=r}vlaxTsLEzW1%Uzi(Q zTWACMmD=;3i+a%C$#s+So9iFvcSh=0QsVq>u5aus#s~Hd*BJH*ax>{QrcU&$^mQ<2 z886s3EJyuU88_HR>~r=B*BbT(@;SBFYmL;;Yl|awcf{meleTcZk#Q|)Z`K=Pij6JMINh2QUo@>(I^Zc9P=>Ma9D;#YV=~GnC_r&N+m7Eq)Y^ehAs{({S`7 z;@84@P15#*HR=Db{LjPD=D+Kx`=v$DFAeAyu1B@PhPCH&DfUT=pif!^ebNGb!ais~9~i%pTdY0zEbNmOL7%ihpYU0hX(QXj zbxGSt-*a91sy3Q7ab2SS$Whjw&#F$;M$;y)Nwkd|X6^axierDGHkvkZP0HCuT<5r- zsEwvgT&HsO!A*`k+sJaejiyappJ*R>(c1I5R^xFrZDgCcCeb$XrnTp@E%MtQN7F{r zCay_2+wi^dIGQ$^HW}OGv^{=Yr%d}yd;XBQ$8Ny;lQ*0z?^8yQ_v=RB8x2O@ubY5x zG8lQk<^j($7Y3Gf)9LW zbf&@Zf$xmYG8jJaozd9_!w0@IdY!@Wf$xmYF&IAZozb}l!w0@IdOfiDMvwW=3gv_O zMz4@Qm~S>C9YfAS^vyyuUYCWOMZk*;M&7T*z>5t=-mfLVOAJQduUmj`F&KHjmI5y| z7L1-{i_9xM09>2DG?`FXj= z?sc7I7~f@Pyzg_J+ktO4_qTd!UvnvN8y9b>7($$=JZkcU~~E?e6Trv6h0VE=kgj9 z(1%=Jg97>>xianva*wSIa*wSGa*z2z?y;mVxyRPynE`WZl>3r%gE5}>0pAB~bIRs5 zIc4*joU(aMPT9OBr)*x6Q#P;3DVx{il;O3_sY3Z+bE;527)~WMSJno($JY7lHTL}t zS|)wTJ+|JL++*ebCSAVC&&xdqPOZZjuQOwg@w^}SeuFWdKCo{v#xnt&Fc{;R1Wp=^ z@hk%_GZ^E!9(cXM7|#vB8-Q(2*}NvFY+jR7Hm}Jko7d!&&1-VX<~2EG^O~G8ytX-2 zC?9N270L(0Dfwn|ZIF9xU66as4|0zseaStx-p6x&9j7Gs*d{+O_ZT=;jxjDbV~+9M z2)xl?jOQlcO$K8;9{_&9V2tNx;LQeOJRbyp&|r+`L%x0J1o#nyF`kbCKWZ?>b1U#xgE5|u z0Y7Fi#`AID#|_4KJ^}m$u+1r(*W{GVYjVowH92MTnw+wEO-|XoCZ}v(lT(J*Hm3^Z zgUzWz`CvFD-)yc8a*wSGa*z2z?y;mVxyRP~?`kfU`;vQXlb@G+44m4AG2Ujz9OL;U z@RJ5(JhuaHHyGpj6!239V>~N>D-6bXei!(=24g&*27cOLjOR1J&j8z;vUyET*}NvF zY+jR7Hm}Jko7d!&&1-VX<~2EGcx`j4P(Ik4DwGf3^xR`NfK%$V;Jabug5om+^=H9% z!@zuIp#D7gZWx%)4Aefs%ss|u2I_Ue%ss|u2I?1rnR|@S4Ad_LGxr#u8K}QJ{@j8& zw`QQM;U&>w?lC^cY6!l4X6`XQ$7%?^eP-@4KF4YZzI|ryF+Rs?2)=!0?lC^cY6!l4 zX6`XQ$7%?^eP-@4KF4anx6kIeMIn7K&n*h+gL#g{++z#TH|k9p3+5i-&g;5Jg>npEXaOr$+$WoQ?zAoE`~m zb9xl8&FRs=hSMgmK_PuGc?}BbgXGHin$NM$m)v9I6!*F6V;OUDiu--_&w|M*?)TNd z3MQwx-&g-Gn4IE%U;T$*a*F$X^`C;tDem{x?*x-m-0!RZ%I8!buQBF%yavwWHEztd#r?jzG@ny>yvCU4@ftXf*T8wa z2F~L(a2~IL^LPzxcx`j4P(Ik4DwGd~Q<)%6N$#;Te2!&tY757>wTwAA#r?i&E0~<( zeqZ5DV&)qqr?}r&9R!n8-0!Q7g2^fF_tj;B$tmvl)#ZZ8Dem{x75SXX<2A-SkJrF? zyavwWHEvw)L!;qushCf>yxe9XOubz&u9PjY%kIDI(;V57Bo1aI<=v+o0Mh>HX)!;We@I3Sm zj72%~AdQ1$9;LX{tEI&GgOgCcrR~t7YaqAvxW4Ff)UO`=+YY6ocZk=Y_xdM_*I)AX zXNu*h_YtqV;_r?qUia?*Xc3u;_Z1JJZpGjQ9cE75v@Z!g{1#Q+59gU9tv@8&!E(IM zxhE~>GXqf0cJVrF$5gx%X!?bAs0aQ1ygu_R@w#kJM(Wp4;`{;n!S=DeY!B_2zNDQx z(XXXHvw!ps-i>Cv-cu|`{e$#l$=e?)`my5gK2fZ1`jYR#710LU*KK`=T}7MrIbyF# z`&iEQ#pHaG_7&lMK>C&L_Pn-3y%V)BF7<0DPt-ozp?{U#-q3bRMZZb=Sbn1Ru|CJT zs@rcnWYM0a)Nj%rma{!+Ip3r`d^b^#d$$E0_--QgPSl=^)c?xyq8&Z%=wJPgqv$tj zPo3CrGG45&#~b}Rv;FrTaNrBMkO*C>x&Oe0Z6h6$9oBcsK3|6U=aBc0cDyn|-aplM zU}QG1BCPjl$4eu8kDcwQMti7F-p~1|@P7VZzNg6d zrSA&g@A$`0)GN?8<2dsbv48LH#D3X1V*kO5#r|VgTpFRi887P7{`J3nN87#aA3oIa z;=ag#X}=!6{35My#%EVx|78BZ67ai<{0{7&%>OT-Z~Wi&#i{+LzVV;>r}Cfr#{X0M#^_jdS+eExSXq^gzl?O$nlMx&!K;40@f7KspdHzLvIg{ zuX1j9Z&=4M$Ym;HSUn+FL!g5-q=Ic_Zc4^%`jtdEV$6=ToKKrIgYD95hMeCy=HX>M zGon*Ti8{M^J!8+T5p26&BSg1~I(4G2+tDBX^`n2moMPMApS!Ro7+c0SM{HD3(nrQw z_IWkdd9E8VIiIF)JciEBxRh1ezL|9*gL?tiqmJ>vRP?JTQI_YUSr=+>51`|^=$^6< zi@*(zQ!Unh)=P`+Q}&^W|J7Lcb=(eo`>H-PaX!QQ9>iEXhvTsO))p}ile;x&_e`9( zb2aw6ZEn$QGX`iM-{Wt5zA}gd8Tx~`*W`RK2WoJAt`nW1!(5E^+Q9g~B&o_?>5LqQ z$j?}b_(Pv+;gfQn-Fpk`AWviKR;|mX&z_F$y5s-TEzE~loT&UKnfHg_ALDu(=c4yg zZ#V1wis3f=Vmxbs@~7Uezk+d2&q?}Ym!qAgAF_6N5LaqxALWhnKz?Ce$3firs=ScY z%Uu_GjdMAEVO@VF&VNo|iV?URGE9r*fPQf=*2ZsK5i4mh|>?z3+|zPlTc`|c*ln;=<^__RMJ z=W9w)UW)PskPDC(k3Ln-dAY}TD&{PZ_$d1OrOovz<~+zq-5N^RIw1W}pW?UdCcd)2 zbet63TImz`fp6ODq~1a4pTjAB!*1d$`-!pAk$O%1XF2_k$@!-KgBzyb_&r_jUL5gW z$5g4G*Pgi8s-bM$9ur&Bi2JNxD!O^?VHxV>wa3KPO2mI1Tcuu;_9Uh5SB{5?wN+RX zb*vTrJpVId`-J0RVr~W2NgZ=VH_!h%v6<(;iM?sA|A?K-z_VrGUa6Pym*aRjj^}%C z;GO2+UWfMt&|8UfD{*dtx8Gld<5l41&E8*VJv}by59+PKxi!#R=+*dl;rK2bFY^9M z=PMxp((sot@gAJJ2j>=h2mG}-UJH9my#KBB7y~NlAN1DY+&bvp;??SJwPlXgr*sa4 zAU;)b?S`(*vHBEqAIO{n|B>?`cOMKNsV8&maEjlqOZ)3noU^(6U*3Blu1bH@r#NS& z{q-r%(cFD6?=_$&{c$+Oxhn0iPq9C#r(*;;20hz%+t=KEF8rgO?Yr%3?*113G0xh) z+rH-RYdJTdXZvpZn!BH+f6%jiXDqh;&E3nA_t3TdxBb;I2L9Q8oA0b`e>ukFpzXKq zr|CD_U)w*!IUVD(+m7GYE(LEK$;ae_J3hFx)O!bcS#;UXxX8*R&Lwf~R_~y%*OsNv zcPN!}yVv8~dhqc!?>#>$WmRdBua@InIqEI*4(T{vyY!6?897(I3FkK9-0j|new~!< z*^IKyC|mA*q~rLZrN7m2+`)OgpIaewq&~$t7n8Cbagk}6BlRiHITOFDmx7y!qoty= zQ|8X$6z5`6$|}<$cgx(VPjN1q_?kRKxQ12n|=^=1)h4+8_EjZo+Zmsm{a`Czb{X)G*aPASDyTkjf zzZJ(@alFd=`&_)PML$sQah!V`_EvlU&)=^x{(?Yr%(iP!XxdbaPjuO?n=|Db34&UkJ6YvMJ1 zWW2WhxBX?jrhm5Iwx9Zb9DdpU+WzU-iT<+vwEbe7)?7pGRK{wPKlPf6pVKir%5~rb z`Sm)ZT<2>+P79}#@88ME zQqehZBXn-s=c={dr+!k(YSSWhp48G`y3zZ*&vcF?`o$bD4suy!MCZ^=s9*XESFQ6t z_v@tWqj@M>@RzQ--#g;74)Ubx@Ae&>M|{_LQfIvEMma}gQnn{9GBX2pnNx#vlsPNd zX5>kY>O85UvwtRZm}i4?Hz{Q`X^}d&=ygAJqnx{(Gi)>Rq((VsGNMyE3-y_wgLAr0 z$_~y(8S^A?E;GNLjKQj?ex8B$GEau_*^xTcF_C+0pP46>b&)4Ex(oRm;-Zr&6}dkt zvc_%$^H;E)$dek?c~V6uuPqrVtF04x&~5|sSFoMPlN!}|Qbp&q`Nez6peKdaC{mfhyY&Y_xM(sSQ3*U01zSoeKN3Rs)v+!50N_fY7 z=2BvNkta23@}$nk`Nh9RJ{8YN=sc<8kLO9f1^JZ9JU79dN_uSICv#XVK%HCu+Ewd4 zm0*6QTah1WgIAPb?h4x1a}D_)Zie2i@4KqpYnEW%B-+sDkn4f@ot7cz%tr6L1oxuM ziNyKHb%1#uv>oQ0K%R%|V2AUO>%bfwbKY^@GGCImNxdL^9_JV5C-YL! z9`d9{O`g;R$c@4Ic|G(v=Qt0UkAn8noOke%J}pG<49-X9maxZwc~Z4a^rgv@YW%nT zWey5$6Me@V6t@4I8^&Meo}fMCNsTi1gzZ0bP0$ANq()7i)D!v791}Sk$kSl^&zNra zv9V|SYWv1~60}u?_L)4X#@9UG0uCcjYLxdP%zt!3-)wu0e4{U{&zQkyDLsKDhf!bt zT<(~B*Zokg^PDh_kCFTGg&4nOzj4(j@BBne%67&@>O85-!QltIZzuSB3g=8NCz0sv zUWEF~|JGHTz2=FelvSlg>O83{!R-gV776AgqF>CJqT{&eR4+#TmH*dO4|y#UbyBAD zq^<(DANDRuunzL1N*u>|-lubY;hf}LjY-*#xJaERbul>1`N}xXHX~1}#BtHt$+gGK z*`$R zZFHR0;{<Jcw0@f|P9nOv!Nf!mLI?RDJd^*FaouGFQdch|qWYO5DZ z*tt^g0k0qPI_Y@MYw0>cuGCvm=bry`)#G00MB`kkYjLe7yeJfb*SeoSD~LkF-6`BdkY^ajRgD^P6j&85iRpa;556Z~M+zZ+vB}r#<9K z)v@08owkjyjQjKtxl(o9w|!?UH@-6N=jSqat+AY1IKLNdCvBi@t-!4D>WDY zjsLd4+*8sfa;4_tzwtlMU)x>~|Be5)zs8pBH`fK*SFRDma7}%$zgX_)c|WlZ+!j=;*EBPaxrVU4$dww^`RhdIwEbr8IvJ}^q9%78F>+Al9-}+wv4E^R>!urUSD);(Q z|CIgDHH7U&uGDtSm72tOZMrZVea0K0@4X*FTqXCnfcw0^XB&|#HOjnnBJ$9sO-*ZR1hrXQSRCRge`0oLn6z%xBRf*89VF_!bA9P@)b-}Eu| z2aucpRdS_j+re5uTMuC#FwSnln!xz+NKk((>T^w??`hsk!FSqw9QL@ba_!+d$r!