/* * Copyright 2024 Intel Corporation * SPDX-License-Identifier: MIT */ /** * \file * Identify sequences of logical operations to convert to bfi * * It is difficult for opt_algebraic to match general expressions like * * (a & some_constant) | (b & ~some_constant) * * Common cases like some_constant = 0x7fffffff can be added, but this may * miss other opportunities. This pass implements that general pattern * matching. * * Either nir_op_bfi or nir_op_bitfield_select may be generated by this pass. * * Future work may also detect cases like: * * (a & some_constant) | ~(b | some_constant) * ~((a | some_constant) & (b | ~some_constant)) * etc. */ #include "nir_builder.h" static bool parse_iand(nir_scalar alu, nir_scalar *value, uint64_t *mask) { if (nir_scalar_alu_op(alu) == nir_op_iand) { /* If both source are constants, do not perform the conversion. There * are lowerings in opt_algebraic that can generate this pattern on * platforms that set has_bfi and avoid_ternary_with_two_constants. * Undoing that lowering would result in infinite optimization loops. */ nir_scalar left = nir_scalar_chase_alu_src(alu, 0); nir_scalar right = nir_scalar_chase_alu_src(alu, 1); if (nir_scalar_is_const(left) && nir_scalar_is_const(right)) return false; if (nir_scalar_is_const(left)) { *mask = nir_scalar_as_uint(left); *value = right; return true; } else if (nir_scalar_is_const(right)) { *mask = nir_scalar_as_uint(right); *value = left; return true; } } else if (nir_scalar_alu_op(alu) == nir_op_extract_u16 || nir_scalar_alu_op(alu) == nir_op_extract_u8) { /* There may be leftovers from opt_algebraic that haven't been constant * folded yet. */ nir_scalar left = nir_scalar_chase_alu_src(alu, 0); if (nir_scalar_is_const(left)) return false; if (nir_scalar_as_uint(nir_scalar_chase_alu_src(alu, 1)) == 0) { *mask = nir_scalar_alu_op(alu) == nir_op_extract_u16 ? 0xffff : 0xff; *value = left; return true; } } return false; } static bool nir_opt_generate_bfi_instr(nir_builder *b, nir_alu_instr *alu, UNUSED void *cb_data) { /* Since none of the source bits will overlap, these are equvalent. */ if (alu->op != nir_op_ior && alu->op != nir_op_ixor && alu->op != nir_op_iadd) return false; if (alu->def.bit_size == 1) return false; /* bfi only supports 32bit. */ if (!b->shader->options->has_bitfield_select && alu->def.bit_size != 32) return false; nir_scalar insert[NIR_MAX_VEC_COMPONENTS]; nir_scalar base[NIR_MAX_VEC_COMPONENTS]; nir_const_value mask_cvals[NIR_MAX_VEC_COMPONENTS]; for (unsigned i = 0; i < alu->def.num_components; i++) { nir_scalar alu_scalar = nir_get_scalar(&alu->def, i); nir_scalar left = nir_scalar_chase_alu_src(alu_scalar, 0); nir_scalar right = nir_scalar_chase_alu_src(alu_scalar, 1); if (!nir_scalar_is_alu(left) || !nir_scalar_is_alu(right)) return false; nir_scalar src1; nir_scalar src2; uint64_t mask1; uint64_t mask2; if (!parse_iand(left, &src1, &mask1)) return false; if (!parse_iand(right, &src2, &mask2)) return false; if (mask1 != (~mask2 & BITFIELD64_MASK(alu->def.bit_size))) return false; /* The mask used by the bfi instruction must be odd. When the mask is odd, * the implict shift applied by the bfi is by zero bits. Since one of the * masks must be odd, the rule can always be applied. * * bitfield_select does not have this restriction, but don't do it for vectors * because swapping only part of the components would hurt. */ uint64_t mask; if (b->shader->options->has_bitfield_select && alu->def.num_components > 1) { /* Just pick one. */ mask = mask1; insert[i] = src1; base[i] = src2; } else if ((mask1 & 1) != 0) { /* Because mask1 == ~mask2. */ assert((mask2 & 1) == 0); mask = mask1; insert[i] = src1; base[i] = src2; } else { /* Because mask1 == ~mask2. */ assert((mask2 & 1) != 0); mask = mask2; insert[i] = src2; base[i] = src1; } mask_cvals[i] = nir_const_value_for_uint(mask, alu->def.bit_size); } b->cursor = nir_before_instr(&alu->instr); nir_def *mask_vec = nir_build_imm(b, alu->def.num_components, alu->def.bit_size, mask_cvals); nir_def *insert_vec = nir_vec_scalars(b, insert, alu->def.num_components); nir_def *base_vec = nir_vec_scalars(b, base, alu->def.num_components); nir_def *bfi; if (b->shader->options->has_bitfield_select) { bfi = nir_bitfield_select(b, mask_vec, insert_vec, base_vec); } else { assert(b->shader->options->has_bfi); bfi = nir_bfi(b, mask_vec, insert_vec, base_vec); } nir_def_replace(&alu->def, bfi); return true; } bool nir_opt_generate_bfi(nir_shader *shader) { if (!shader->options->has_bfi && !shader->options->has_bitfield_select) return false; return nir_shader_alu_pass(shader, nir_opt_generate_bfi_instr, nir_metadata_control_flow, NULL); }