About Social Code
aboutsummaryrefslogtreecommitdiff
path: root/src/compiler/nir/nir_opt_generate_bfi.c
blob: 63289d9a05b2ec761843a56dc91e47cb1cbb8b0c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
/*
 * Copyright 2024 Intel Corporation
 * SPDX-License-Identifier: MIT
 */

/**
 * \file
 * Identify sequences of logical operations to convert to bfi
 *
 * It is difficult for opt_algebraic to match general expressions like
 *
 *    (a & some_constant) | (b & ~some_constant)
 *
 * Common cases like some_constant = 0x7fffffff can be added, but this may
 * miss other opportunities. This pass implements that general pattern
 * matching.
 *
 * Either nir_op_bfi or nir_op_bitfield_select may be generated by this pass.
 *
 * Future work may also detect cases like:
 *
 *    (a & some_constant) | ~(b | some_constant)
 *    ~((a | some_constant) & (b | ~some_constant))
 *    etc.
 */

#include "nir_builder.h"

static bool
parse_iand(nir_scalar alu, nir_scalar *value, uint64_t *mask)
{
   if (nir_scalar_alu_op(alu) == nir_op_iand) {
      /* If both source are constants, do not perform the conversion. There
       * are lowerings in opt_algebraic that can generate this pattern on
       * platforms that set has_bfi and avoid_ternary_with_two_constants.
       * Undoing that lowering would result in infinite optimization loops.
       */
      nir_scalar left = nir_scalar_chase_alu_src(alu, 0);
      nir_scalar right = nir_scalar_chase_alu_src(alu, 1);
      if (nir_scalar_is_const(left) && nir_scalar_is_const(right))
         return false;

      if (nir_scalar_is_const(left)) {
         *mask = nir_scalar_as_uint(left);
         *value = right;
         return true;
      } else if (nir_scalar_is_const(right)) {
         *mask = nir_scalar_as_uint(right);
         *value = left;
         return true;
      }
   } else if (nir_scalar_alu_op(alu) == nir_op_extract_u16 ||
              nir_scalar_alu_op(alu) == nir_op_extract_u8) {
      /* There may be leftovers from opt_algebraic that haven't been constant
       * folded yet.
       */
      nir_scalar left = nir_scalar_chase_alu_src(alu, 0);
      if (nir_scalar_is_const(left))
         return false;

      if (nir_scalar_as_uint(nir_scalar_chase_alu_src(alu, 1)) == 0) {
         *mask = nir_scalar_alu_op(alu) == nir_op_extract_u16 ? 0xffff : 0xff;
         *value = left;
         return true;
      }
   }

   return false;
}

static bool
nir_opt_generate_bfi_instr(nir_builder *b,
                           nir_alu_instr *alu,
                           UNUSED void *cb_data)
{
   /* Since none of the source bits will overlap, these are equvalent. */
   if (alu->op != nir_op_ior &&
       alu->op != nir_op_ixor &&
       alu->op != nir_op_iadd)
      return false;

   if (alu->def.bit_size == 1)
      return false;

   /* bfi only supports 32bit. */
   if (!b->shader->options->has_bitfield_select && alu->def.bit_size != 32)
      return false;

   nir_scalar insert[NIR_MAX_VEC_COMPONENTS];
   nir_scalar base[NIR_MAX_VEC_COMPONENTS];
   nir_const_value mask_cvals[NIR_MAX_VEC_COMPONENTS];

   for (unsigned i = 0; i < alu->def.num_components; i++) {
      nir_scalar alu_scalar = nir_get_scalar(&alu->def, i);
      nir_scalar left = nir_scalar_chase_alu_src(alu_scalar, 0);
      nir_scalar right = nir_scalar_chase_alu_src(alu_scalar, 1);

      if (!nir_scalar_is_alu(left) || !nir_scalar_is_alu(right))
         return false;

      nir_scalar src1;
      nir_scalar src2;
      uint64_t mask1;
      uint64_t mask2;

      if (!parse_iand(left, &src1, &mask1))
         return false;

      if (!parse_iand(right, &src2, &mask2))
         return false;

      if (mask1 != (~mask2 & BITFIELD64_MASK(alu->def.bit_size)))
         return false;

      /* The mask used by the bfi instruction must be odd. When the mask is odd,
       * the implict shift applied by the bfi is by zero bits. Since one of the
       * masks must be odd, the rule can always be applied.
       *
       * bitfield_select does not have this restriction, but don't do it for vectors
       * because swapping only part of the components would hurt.
       */
      uint64_t mask;
      if (b->shader->options->has_bitfield_select && alu->def.num_components > 1) {
         /* Just pick one. */
         mask = mask1;
         insert[i] = src1;
         base[i] = src2;
      } else if ((mask1 & 1) != 0) {
         /* Because mask1 == ~mask2. */
         assert((mask2 & 1) == 0);

         mask = mask1;
         insert[i] = src1;
         base[i] = src2;
      } else {
         /* Because mask1 == ~mask2. */
         assert((mask2 & 1) != 0);

         mask = mask2;
         insert[i] = src2;
         base[i] = src1;
      }

      mask_cvals[i] = nir_const_value_for_uint(mask, alu->def.bit_size);
   }

   b->cursor = nir_before_instr(&alu->instr);

   nir_def *mask_vec = nir_build_imm(b, alu->def.num_components, alu->def.bit_size, mask_cvals);
   nir_def *insert_vec = nir_vec_scalars(b, insert, alu->def.num_components);
   nir_def *base_vec = nir_vec_scalars(b, base, alu->def.num_components);

   nir_def *bfi;

   if (b->shader->options->has_bitfield_select) {
      bfi = nir_bitfield_select(b, mask_vec, insert_vec, base_vec);
   } else {
      assert(b->shader->options->has_bfi);

      bfi = nir_bfi(b, mask_vec, insert_vec, base_vec);
   }

   nir_def_replace(&alu->def, bfi);
   return true;
}

bool
nir_opt_generate_bfi(nir_shader *shader)
{
   if (!shader->options->has_bfi && !shader->options->has_bitfield_select)
      return false;

   return nir_shader_alu_pass(shader, nir_opt_generate_bfi_instr,
                              nir_metadata_control_flow, NULL);
}