1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
|
/*
* Copyright 2023 Valve Corpoation
* Copyright 2020 Raspberry Pi Ltd
* SPDX-License-Identifier: MIT
*/
#include "nir.h"
#include "nir_builder.h"
#include "nir_intrinsics_indices.h"
static void
rewrite_offset(nir_builder *b, nir_intrinsic_instr *instr,
uint32_t type_sz, uint32_t offset_src, nir_def *size)
{
/* Compute the maximum offset being accessed and if it is out of bounds
* rewrite it to 0 to ensure the access is within bounds.
*/
const uint32_t access_size = instr->num_components * type_sz;
nir_def *max_access_offset =
nir_iadd_imm(b, instr->src[offset_src].ssa, access_size - 1);
nir_def *offset =
nir_bcsel(b, nir_uge(b, max_access_offset, size), nir_imm_int(b, 0),
instr->src[offset_src].ssa);
/* Rewrite offset */
nir_src_rewrite(&instr->src[offset_src], offset);
}
/*
* Wrap a intrinsic in an if, predicated on a "valid" condition. If the
* intrinsic produces a destination, it will be zero in the invalid case.
*/
static void
wrap_in_if(nir_builder *b, nir_intrinsic_instr *instr, nir_def *valid)
{
bool has_dest = nir_intrinsic_infos[instr->intrinsic].has_dest;
nir_def *res, *zero;
if (has_dest) {
zero = nir_imm_zero(b, instr->def.num_components,
instr->def.bit_size);
}
nir_push_if(b, valid);
{
nir_instr *orig = nir_instr_clone(b->shader, &instr->instr);
nir_builder_instr_insert(b, orig);
if (has_dest)
res = &nir_instr_as_intrinsic(orig)->def;
}
nir_pop_if(b, NULL);
if (has_dest)
nir_def_rewrite_uses(&instr->def, nir_if_phi(b, res, zero));
/* We've cloned and wrapped, so drop original instruction */
nir_instr_remove(&instr->instr);
}
static void
lower_buffer_load(nir_builder *b, nir_intrinsic_instr *instr)
{
uint32_t type_sz = instr->def.bit_size / 8;
nir_def *size;
nir_def *index = instr->src[0].ssa;
if (instr->intrinsic == nir_intrinsic_load_ubo) {
size = nir_get_ubo_size(b, 32, index);
} else {
size = nir_get_ssbo_size(b, index);
}
rewrite_offset(b, instr, type_sz, 1, size);
}
static void
lower_buffer_store(nir_builder *b, nir_intrinsic_instr *instr)
{
uint32_t type_sz = nir_src_bit_size(instr->src[0]) / 8;
rewrite_offset(b, instr, type_sz, 2,
nir_get_ssbo_size(b, instr->src[1].ssa));
}
static void
lower_buffer_atomic(nir_builder *b, nir_intrinsic_instr *instr)
{
rewrite_offset(b, instr, 4, 1, nir_get_ssbo_size(b, instr->src[0].ssa));
}
static void
lower_buffer_shared(nir_builder *b, nir_intrinsic_instr *instr)
{
uint32_t type_sz, offset_src;
if (instr->intrinsic == nir_intrinsic_load_shared) {
offset_src = 0;
type_sz = instr->def.bit_size / 8;
} else if (instr->intrinsic == nir_intrinsic_store_shared) {
offset_src = 1;
type_sz = nir_src_bit_size(instr->src[0]) / 8;
} else {
/* atomic */
offset_src = 0;
type_sz = 4;
}
rewrite_offset(b, instr, type_sz, offset_src,
nir_imm_int(b, b->shader->info.shared_size));
}
static void
lower_image(nir_builder *b, nir_intrinsic_instr *instr, bool deref)
{
enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
uint32_t num_coords = nir_image_intrinsic_coord_components(instr);
bool is_array = nir_intrinsic_image_array(instr);
nir_def *coord = instr->src[1].ssa;
/* Get image size. imageSize for cubes returns the size of a single face. */
unsigned size_components = num_coords;
if (dim == GLSL_SAMPLER_DIM_CUBE && !is_array)
size_components -= 1;
nir_def *size = nir_image_size(b, size_components, 32,
instr->src[0].ssa, nir_imm_int(b, 0),
.image_array = is_array, .image_dim = dim);
if (deref) {
nir_def_as_intrinsic(size)->intrinsic =
nir_intrinsic_image_deref_size;
}
if (dim == GLSL_SAMPLER_DIM_CUBE) {
nir_def *z = is_array ? nir_imul_imm(b, nir_channel(b, size, 2), 6)
: nir_imm_int(b, 6);
size = nir_vec3(b, nir_channel(b, size, 0), nir_channel(b, size, 1), z);
}
nir_def *in_bounds = nir_ball(b, nir_ult(b, coord, size));
if (dim == GLSL_SAMPLER_DIM_MS) {
nir_def *sample = instr->src[2].ssa;
nir_def *samples = nir_image_samples(b, 32, instr->src[0].ssa,
.image_array = is_array, .image_dim = dim);
if (deref) {
nir_def_as_intrinsic(samples)->intrinsic =
nir_intrinsic_image_deref_samples;
}
in_bounds = nir_iand(b, in_bounds, nir_ult(b, sample, samples));
}
/* Only execute if coordinates are in-bounds. Otherwise, return zero. */
wrap_in_if(b, instr, in_bounds);
}
struct pass_opts {
nir_intrin_filter_cb filter;
const void *data;
};
static bool
lower(nir_builder *b, nir_intrinsic_instr *intr, void *_opts)
{
const struct pass_opts *opts = _opts;
if (!opts->filter(intr, opts->data))
return false;
b->cursor = nir_before_instr(&intr->instr);
switch (intr->intrinsic) {
case nir_intrinsic_image_load:
case nir_intrinsic_image_store:
case nir_intrinsic_image_atomic:
case nir_intrinsic_image_atomic_swap:
lower_image(b, intr, false);
return true;
case nir_intrinsic_image_deref_load:
case nir_intrinsic_image_deref_store:
case nir_intrinsic_image_deref_atomic:
case nir_intrinsic_image_deref_atomic_swap:
lower_image(b, intr, true);
return true;
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ssbo:
lower_buffer_load(b, intr);
return true;
case nir_intrinsic_store_ssbo:
lower_buffer_store(b, intr);
return true;
case nir_intrinsic_ssbo_atomic:
case nir_intrinsic_ssbo_atomic_swap:
lower_buffer_atomic(b, intr);
return true;
case nir_intrinsic_store_shared:
case nir_intrinsic_load_shared:
case nir_intrinsic_shared_atomic:
case nir_intrinsic_shared_atomic_swap:
/* Vulkan's robustBufferAccess feature is only concerned with buffers that
* are bound through descriptor sets, so shared memory is not included,
* but this lowering may be useful for debugging.
*/
lower_buffer_shared(b, intr);
return true;
default:
UNREACHABLE("driver requested lowering for unsupported intrinsic");
}
}
/*
* Buffer/image robustness lowering with robustBufferAccess/robustImageAccess
* semantics. This is sufficient for GL, but not for D3D. However, Vulkan
* drivers get buffer robustness lowered via nir_lower_explicit_io.
*/
bool
nir_lower_robust_access(nir_shader *s, nir_intrin_filter_cb filter,
const void *data)
{
struct pass_opts opt = { .filter = filter, .data = data };
return nir_shader_intrinsics_pass(s, lower, nir_metadata_none, &opt);
}
|