About Social Code
aboutsummaryrefslogtreecommitdiff
path: root/src/gallium/frontends/nine/nine_shader.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/frontends/nine/nine_shader.c')
-rw-r--r--src/gallium/frontends/nine/nine_shader.c4274
1 files changed, 0 insertions, 4274 deletions
diff --git a/src/gallium/frontends/nine/nine_shader.c b/src/gallium/frontends/nine/nine_shader.c
deleted file mode 100644
index cc70f6fd958..00000000000
--- a/src/gallium/frontends/nine/nine_shader.c
+++ /dev/null
@@ -1,4274 +0,0 @@
-/*
- * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
- * Copyright 2013 Christoph Bumiller
- * SPDX-License-Identifier: MIT
- */
-
-#include "nine_shader.h"
-
-#include "device9.h"
-#include "nine_debug.h"
-#include "nine_state.h"
-#include "vertexdeclaration9.h"
-
-#include "util/bitscan.h"
-#include "util/macros.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
-#include "pipe/p_shader_tokens.h"
-#include "tgsi/tgsi_ureg.h"
-#include "tgsi/tgsi_dump.h"
-#include "nir/tgsi_to_nir.h"
-#include "nir.h"
-
-#define DBG_CHANNEL DBG_SHADER
-
-#define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
-
-
-struct shader_translator;
-
-typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
-
-static inline const char *d3dsio_to_string(unsigned opcode);
-
-
-#define NINED3D_SM1_VS 0xfffe
-#define NINED3D_SM1_PS 0xffff
-
-#define NINE_MAX_COND_DEPTH 64
-#define NINE_MAX_LOOP_DEPTH 64
-
-#define NINED3DSP_END 0x0000ffff
-
-#define NINED3DSPTYPE_FLOAT4 0
-#define NINED3DSPTYPE_INT4 1
-#define NINED3DSPTYPE_BOOL 2
-
-#define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
-
-#define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
-#define NINED3DSP_WRITEMASK_SHIFT 16
-
-#define NINED3DSHADER_INST_PREDICATED (1 << 28)
-
-#define NINED3DSHADER_REL_OP_GT 1
-#define NINED3DSHADER_REL_OP_EQ 2
-#define NINED3DSHADER_REL_OP_GE 3
-#define NINED3DSHADER_REL_OP_LT 4
-#define NINED3DSHADER_REL_OP_NE 5
-#define NINED3DSHADER_REL_OP_LE 6
-
-#define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
-#define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
-
-#define NINED3DSI_TEXLD_PROJECT 0x1
-#define NINED3DSI_TEXLD_BIAS 0x2
-
-#define NINED3DSP_WRITEMASK_0 0x1
-#define NINED3DSP_WRITEMASK_1 0x2
-#define NINED3DSP_WRITEMASK_2 0x4
-#define NINED3DSP_WRITEMASK_3 0x8
-#define NINED3DSP_WRITEMASK_ALL 0xf
-
-#define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
-
-#define NINE_SWIZZLE4(x,y,z,w) \
- TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
-
-#define NINE_APPLY_SWIZZLE(src, s) \
- ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
-
-#define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
-#define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
-#define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
-
-/*
- * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
- * BIAS <= PS 1.4 (x-0.5)
- * BIASNEG <= PS 1.4 (-(x-0.5))
- * SIGN <= PS 1.4 (2(x-0.5))
- * SIGNNEG <= PS 1.4 (-2(x-0.5))
- * COMP <= PS 1.4 (1-x)
- * X2 = PS 1.4 (2x)
- * X2NEG = PS 1.4 (-2x)
- * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
- * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
- * ABS >= SM 3.0 (abs(x))
- * ABSNEG >= SM 3.0 (-abs(x))
- * NOT >= SM 2.0 pedication only
- */
-#define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
-#define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
-#define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
-#define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
-#define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
-#define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
-#define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
-#define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
-#define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
-#define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
-#define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
-#define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
-#define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
-#define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
-
-static const char *sm1_mod_str[] =
-{
- [NINED3DSPSM_NONE] = "",
- [NINED3DSPSM_NEG] = "-",
- [NINED3DSPSM_BIAS] = "bias",
- [NINED3DSPSM_BIASNEG] = "biasneg",
- [NINED3DSPSM_SIGN] = "sign",
- [NINED3DSPSM_SIGNNEG] = "signneg",
- [NINED3DSPSM_COMP] = "comp",
- [NINED3DSPSM_X2] = "x2",
- [NINED3DSPSM_X2NEG] = "x2neg",
- [NINED3DSPSM_DZ] = "dz",
- [NINED3DSPSM_DW] = "dw",
- [NINED3DSPSM_ABS] = "abs",
- [NINED3DSPSM_ABSNEG] = "-abs",
- [NINED3DSPSM_NOT] = "not"
-};
-
-static void
-sm1_dump_writemask(BYTE mask)
-{
- if (mask & 1) DUMP("x"); else DUMP("_");
- if (mask & 2) DUMP("y"); else DUMP("_");
- if (mask & 4) DUMP("z"); else DUMP("_");
- if (mask & 8) DUMP("w"); else DUMP("_");
-}
-
-static void
-sm1_dump_swizzle(BYTE s)
-{
- char c[4] = { 'x', 'y', 'z', 'w' };
- DUMP("%c%c%c%c",
- c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
-}
-
-static const char sm1_file_char[] =
-{
- [D3DSPR_TEMP] = 'r',
- [D3DSPR_INPUT] = 'v',
- [D3DSPR_CONST] = 'c',
- [D3DSPR_ADDR] = 'A',
- [D3DSPR_RASTOUT] = 'R',
- [D3DSPR_ATTROUT] = 'D',
- [D3DSPR_OUTPUT] = 'o',
- [D3DSPR_CONSTINT] = 'I',
- [D3DSPR_COLOROUT] = 'C',
- [D3DSPR_DEPTHOUT] = 'D',
- [D3DSPR_SAMPLER] = 's',
- [D3DSPR_CONST2] = 'c',
- [D3DSPR_CONST3] = 'c',
- [D3DSPR_CONST4] = 'c',
- [D3DSPR_CONSTBOOL] = 'B',
- [D3DSPR_LOOP] = 'L',
- [D3DSPR_TEMPFLOAT16] = 'h',
- [D3DSPR_MISCTYPE] = 'M',
- [D3DSPR_LABEL] = 'X',
- [D3DSPR_PREDICATE] = 'p'
-};
-
-static void
-sm1_dump_reg(BYTE file, INT index)
-{
- switch (file) {
- case D3DSPR_LOOP:
- DUMP("aL");
- break;
- case D3DSPR_COLOROUT:
- DUMP("oC%i", index);
- break;
- case D3DSPR_DEPTHOUT:
- DUMP("oDepth");
- break;
- case D3DSPR_RASTOUT:
- DUMP("oRast%i", index);
- break;
- case D3DSPR_CONSTINT:
- DUMP("iconst[%i]", index);
- break;
- case D3DSPR_CONSTBOOL:
- DUMP("bconst[%i]", index);
- break;
- default:
- DUMP("%c%i", sm1_file_char[file], index);
- break;
- }
-}
-
-struct sm1_src_param
-{
- INT idx;
- struct sm1_src_param *rel;
- BYTE file;
- BYTE swizzle;
- BYTE mod;
- BYTE type;
- union {
- DWORD d[4];
- float f[4];
- int i[4];
- BOOL b;
- } imm;
-};
-static void
-sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
-
-struct sm1_dst_param
-{
- INT idx;
- struct sm1_src_param *rel;
- BYTE file;
- BYTE mask;
- BYTE mod;
- int8_t shift; /* sint4 */
- BYTE type;
-};
-
-static inline void
-assert_replicate_swizzle(const struct ureg_src *reg)
-{
- assert(reg->SwizzleY == reg->SwizzleX &&
- reg->SwizzleZ == reg->SwizzleX &&
- reg->SwizzleW == reg->SwizzleX);
-}
-
-static void
-sm1_dump_immediate(const struct sm1_src_param *param)
-{
- switch (param->type) {
- case NINED3DSPTYPE_FLOAT4:
- DUMP("{ %f %f %f %f }",
- param->imm.f[0], param->imm.f[1],
- param->imm.f[2], param->imm.f[3]);
- break;
- case NINED3DSPTYPE_INT4:
- DUMP("{ %i %i %i %i }",
- param->imm.i[0], param->imm.i[1],
- param->imm.i[2], param->imm.i[3]);
- break;
- case NINED3DSPTYPE_BOOL:
- DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
- break;
- default:
- assert(0);
- break;
- }
-}
-
-static void
-sm1_dump_src_param(const struct sm1_src_param *param)
-{
- if (param->file == NINED3DSPR_IMMEDIATE) {
- assert(!param->mod &&
- !param->rel &&
- param->swizzle == NINED3DSP_NOSWIZZLE);
- sm1_dump_immediate(param);
- return;
- }
-
- if (param->mod)
- DUMP("%s(", sm1_mod_str[param->mod]);
- if (param->rel) {
- DUMP("%c[", sm1_file_char[param->file]);
- sm1_dump_src_param(param->rel);
- DUMP("+%i]", param->idx);
- } else {
- sm1_dump_reg(param->file, param->idx);
- }
- if (param->mod)
- DUMP(")");
- if (param->swizzle != NINED3DSP_NOSWIZZLE) {
- DUMP(".");
- sm1_dump_swizzle(param->swizzle);
- }
-}
-
-static void
-sm1_dump_dst_param(const struct sm1_dst_param *param)
-{
- if (param->mod & NINED3DSPDM_SATURATE)
- DUMP("sat ");
- if (param->mod & NINED3DSPDM_PARTIALP)
- DUMP("pp ");
- if (param->mod & NINED3DSPDM_CENTROID)
- DUMP("centroid ");
- if (param->shift < 0)
- DUMP("/%u ", 1 << -param->shift);
- if (param->shift > 0)
- DUMP("*%u ", 1 << param->shift);
-
- if (param->rel) {
- DUMP("%c[", sm1_file_char[param->file]);
- sm1_dump_src_param(param->rel);
- DUMP("+%i]", param->idx);
- } else {
- sm1_dump_reg(param->file, param->idx);
- }
- if (param->mask != NINED3DSP_WRITEMASK_ALL) {
- DUMP(".");
- sm1_dump_writemask(param->mask);
- }
-}
-
-struct sm1_semantic
-{
- struct sm1_dst_param reg;
- BYTE sampler_type;
- D3DDECLUSAGE usage;
- BYTE usage_idx;
-};
-
-struct sm1_op_info
-{
- /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
- * should be ignored completely */
- unsigned sio;
- unsigned opcode; /* TGSI_OPCODE_x */
-
- /* versions are still set even handler is set */
- struct {
- unsigned min;
- unsigned max;
- } vert_version, frag_version;
-
- /* number of regs parsed outside of special handler */
- unsigned ndst;
- unsigned nsrc;
-
- /* some instructions don't map perfectly, so use a special handler */
- translate_instruction_func handler;
-};
-
-struct sm1_instruction
-{
- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
- BYTE flags;
- BOOL coissue;
- BOOL predicated;
- BYTE ndst;
- BYTE nsrc;
- struct sm1_src_param src[4];
- struct sm1_src_param src_rel[4];
- struct sm1_src_param pred;
- struct sm1_src_param dst_rel[1];
- struct sm1_dst_param dst[1];
-
- const struct sm1_op_info *info;
-};
-
-static void
-sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
-{
- unsigned i;
-
- /* no info stored for these: */
- if (insn->opcode == D3DSIO_DCL)
- return;
- for (i = 0; i < indent; ++i)
- DUMP(" ");
-
- if (insn->predicated) {
- DUMP("@");
- sm1_dump_src_param(&insn->pred);
- DUMP(" ");
- }
- DUMP("%s", d3dsio_to_string(insn->opcode));
- if (insn->flags) {
- switch (insn->opcode) {
- case D3DSIO_TEX:
- DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
- break;
- default:
- DUMP("_%x", insn->flags);
- break;
- }
- }
- if (insn->coissue)
- DUMP("_co");
- DUMP(" ");
-
- for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) {
- sm1_dump_dst_param(&insn->dst[i]);
- DUMP(" ");
- }
-
- for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) {
- sm1_dump_src_param(&insn->src[i]);
- DUMP(" ");
- }
- if (insn->opcode == D3DSIO_DEF ||
- insn->opcode == D3DSIO_DEFI ||
- insn->opcode == D3DSIO_DEFB)
- sm1_dump_immediate(&insn->src[0]);
-
- DUMP("\n");
-}
-
-struct sm1_local_const
-{
- INT idx;
- struct ureg_src reg;
- float f[4]; /* for indirect addressing of float constants */
-};
-
-struct shader_translator
-{
- const DWORD *byte_code;
- const DWORD *parse;
- const DWORD *parse_next;
-
- struct ureg_program *ureg;
-
- /* shader version */
- struct {
- BYTE major;
- BYTE minor;
- } version;
- unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */
- unsigned num_constf_allowed;
- unsigned num_consti_allowed;
- unsigned num_constb_allowed;
-
- bool native_integers;
- bool inline_subroutines;
- bool want_texcoord;
- bool shift_wpos;
- bool wpos_is_sysval;
- bool face_is_sysval_integer;
- bool mul_zero_wins;
- bool always_output_pointsize;
- bool no_vs_window_space;
- unsigned texcoord_sn;
-
- struct sm1_instruction insn; /* current instruction */
-
- struct {
- struct ureg_dst *r;
- struct ureg_dst oPos;
- struct ureg_dst oPos_out; /* the real output when doing streamout or clipplane emulation */
- struct ureg_dst oFog;
- struct ureg_dst oPts;
- struct ureg_dst oCol[4];
- struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
- struct ureg_dst oDepth;
- struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
- struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */
- struct ureg_src vPos;
- struct ureg_src vFace;
- struct ureg_src s;
- struct ureg_dst p;
- struct ureg_dst address;
- struct ureg_dst a0;
- struct ureg_dst predicate;
- struct ureg_dst predicate_tmp;
- struct ureg_dst predicate_dst;
- struct ureg_dst tS[8]; /* texture stage registers */
- struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
- struct ureg_dst t[8]; /* scratch TEMPs */
- struct ureg_src vC[2]; /* PS color in */
- struct ureg_src vT[8]; /* PS texcoord in */
- struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop/rep ctr */
- struct ureg_dst aL[NINE_MAX_LOOP_DEPTH]; /* aL emulation */
- } regs;
- unsigned num_temp; /* ARRAY_SIZE(regs.r) */
- unsigned num_scratch;
- unsigned loop_depth;
- unsigned loop_depth_max;
- unsigned cond_depth;
- unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
- unsigned cond_labels[NINE_MAX_COND_DEPTH];
- bool loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
- bool predicated_activated;
-
- unsigned *inst_labels; /* LABEL op */
- unsigned num_inst_labels;
-
- unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
-
- struct sm1_local_const *lconstf;
- unsigned num_lconstf;
- struct sm1_local_const *lconsti;
- unsigned num_lconsti;
- struct sm1_local_const *lconstb;
- unsigned num_lconstb;
-
- bool slots_used[NINE_MAX_CONST_ALL_VS];
- unsigned *slot_map;
- unsigned num_slots;
-
- bool indirect_const_access;
- bool failure;
-
- struct nine_vs_output_info output_info[16];
- int num_outputs;
-
- struct nine_shader_info *info;
-
- int16_t op_info_map[D3DSIO_BREAKP + 1];
-};
-
-#define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
-#define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
-
-#define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
-
-static void
-sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
-
-static void
-sm1_instruction_check(const struct sm1_instruction *insn)
-{
- if (insn->opcode == D3DSIO_CRS)
- {
- if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
- {
- DBG("CRS.mask.w\n");
- }
- }
-}
-
-static void
-nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex,
- int mask, int output_index)
-{
- tx->output_info[tx->num_outputs].output_semantic = Usage;
- tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex;
- tx->output_info[tx->num_outputs].mask = mask;
- tx->output_info[tx->num_outputs].output_index = output_index;
- tx->num_outputs++;
-}
-
-static struct ureg_src nine_float_constant_src(struct shader_translator *tx, int idx)
-{
- struct ureg_src src;
-
- if (tx->slot_map)
- idx = tx->slot_map[idx];
- /* vswp constant handling: we use two buffers
- * to fit all the float constants. The special handling
- * doesn't need to be elsewhere, because all the instructions
- * accessing the constants directly are VS1, and swvp
- * is VS >= 2 */
- if (tx->info->swvp_on && idx >= 4096) {
- /* TODO: swvp rel is broken if many constants are used */
- src = ureg_src_register(TGSI_FILE_CONSTANT, idx - 4096);
- src = ureg_src_dimension(src, 1);
- } else {
- src = ureg_src_register(TGSI_FILE_CONSTANT, idx);
- src = ureg_src_dimension(src, 0);
- }
-
- if (!tx->info->swvp_on)
- tx->slots_used[idx] = true;
- if (tx->info->const_float_slots < (idx + 1))
- tx->info->const_float_slots = idx + 1;
- if (tx->num_slots < (idx + 1))
- tx->num_slots = idx + 1;
-
- return src;
-}
-
-static struct ureg_src nine_integer_constant_src(struct shader_translator *tx, int idx)
-{
- struct ureg_src src;
-
- if (tx->info->swvp_on) {
- src = ureg_src_register(TGSI_FILE_CONSTANT, idx);
- src = ureg_src_dimension(src, 2);
- } else {
- unsigned slot_idx = tx->info->const_i_base + idx;
- if (tx->slot_map)
- slot_idx = tx->slot_map[slot_idx];
- src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
- src = ureg_src_dimension(src, 0);
- tx->slots_used[slot_idx] = true;
- tx->info->int_slots_used[idx] = true;
- if (tx->num_slots < (slot_idx + 1))
- tx->num_slots = slot_idx + 1;
- }
-
- if (tx->info->const_int_slots < (idx + 1))
- tx->info->const_int_slots = idx + 1;
-
- return src;
-}
-
-static struct ureg_src nine_boolean_constant_src(struct shader_translator *tx, int idx)
-{
- struct ureg_src src;
-
- char r = idx / 4;
- char s = idx & 3;
-
- if (tx->info->swvp_on) {
- src = ureg_src_register(TGSI_FILE_CONSTANT, r);
- src = ureg_src_dimension(src, 3);
- } else {
- unsigned slot_idx = tx->info->const_b_base + r;
- if (tx->slot_map)
- slot_idx = tx->slot_map[slot_idx];
- src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
- src = ureg_src_dimension(src, 0);
- tx->slots_used[slot_idx] = true;
- tx->info->bool_slots_used[idx] = true;
- if (tx->num_slots < (slot_idx + 1))
- tx->num_slots = slot_idx + 1;
- }
- src = ureg_swizzle(src, s, s, s, s);
-
- if (tx->info->const_bool_slots < (idx + 1))
- tx->info->const_bool_slots = idx + 1;
-
- return src;
-}
-
-static struct ureg_src nine_special_constant_src(struct shader_translator *tx, int idx)
-{
- struct ureg_src src;
-
- unsigned slot_idx = idx + (IS_PS ? NINE_MAX_CONST_PS_SPE_OFFSET :
- (tx->info->swvp_on ? NINE_MAX_CONST_SWVP_SPE_OFFSET : NINE_MAX_CONST_VS_SPE_OFFSET));
-
- if (!tx->info->swvp_on && tx->slot_map)
- slot_idx = tx->slot_map[slot_idx];
- src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
- src = ureg_src_dimension(src, 0);
-
- if (!tx->info->swvp_on)
- tx->slots_used[slot_idx] = true;
- if (tx->num_slots < (slot_idx + 1))
- tx->num_slots = slot_idx + 1;
-
- return src;
-}
-
-static bool
-tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
-{
- INT i;
-
- if (index < 0 || index >= tx->num_constf_allowed) {
- tx->failure = true;
- return false;
- }
- for (i = 0; i < tx->num_lconstf; ++i) {
- if (tx->lconstf[i].idx == index) {
- *src = tx->lconstf[i].reg;
- return true;
- }
- }
- return false;
-}
-static bool
-tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
-{
- int i;
-
- if (index < 0 || index >= tx->num_consti_allowed) {
- tx->failure = true;
- return false;
- }
- for (i = 0; i < tx->num_lconsti; ++i) {
- if (tx->lconsti[i].idx == index) {
- *src = tx->lconsti[i].reg;
- return true;
- }
- }
- return false;
-}
-static bool
-tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
-{
- int i;
-
- if (index < 0 || index >= tx->num_constb_allowed) {
- tx->failure = true;
- return false;
- }
- for (i = 0; i < tx->num_lconstb; ++i) {
- if (tx->lconstb[i].idx == index) {
- *src = tx->lconstb[i].reg;
- return true;
- }
- }
- return false;
-}
-
-static void
-tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
-{
- unsigned n;
-
- FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
-
- for (n = 0; n < tx->num_lconstf; ++n)
- if (tx->lconstf[n].idx == index)
- break;
- if (n == tx->num_lconstf) {
- if ((n % 8) == 0) {
- tx->lconstf = REALLOC(tx->lconstf,
- (n + 0) * sizeof(tx->lconstf[0]),
- (n + 8) * sizeof(tx->lconstf[0]));
- assert(tx->lconstf);
- }
- tx->num_lconstf++;
- }
- tx->lconstf[n].idx = index;
- tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
-
- memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f));
-}
-static void
-tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
-{
- unsigned n;
-
- FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
-
- for (n = 0; n < tx->num_lconsti; ++n)
- if (tx->lconsti[n].idx == index)
- break;
- if (n == tx->num_lconsti) {
- if ((n % 8) == 0) {
- tx->lconsti = REALLOC(tx->lconsti,
- (n + 0) * sizeof(tx->lconsti[0]),
- (n + 8) * sizeof(tx->lconsti[0]));
- assert(tx->lconsti);
- }
- tx->num_lconsti++;
- }
-
- tx->lconsti[n].idx = index;
- tx->lconsti[n].reg = tx->native_integers ?
- ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
- ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
-}
-static void
-tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
-{
- unsigned n;
-
- FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
-
- for (n = 0; n < tx->num_lconstb; ++n)
- if (tx->lconstb[n].idx == index)
- break;
- if (n == tx->num_lconstb) {
- if ((n % 8) == 0) {
- tx->lconstb = REALLOC(tx->lconstb,
- (n + 0) * sizeof(tx->lconstb[0]),
- (n + 8) * sizeof(tx->lconstb[0]));
- assert(tx->lconstb);
- }
- tx->num_lconstb++;
- }
-
- tx->lconstb[n].idx = index;
- tx->lconstb[n].reg = tx->native_integers ?
- ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
- ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
-}
-
-static inline struct ureg_dst
-tx_scratch(struct shader_translator *tx)
-{
- if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) {
- tx->failure = true;
- return tx->regs.t[0];
- }
- if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
- tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
- return tx->regs.t[tx->num_scratch++];
-}
-
-static inline struct ureg_dst
-tx_scratch_scalar(struct shader_translator *tx)
-{
- return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
-}
-
-static inline struct ureg_src
-tx_src_scalar(struct ureg_dst dst)
-{
- struct ureg_src src = ureg_src(dst);
- int c = ffs(dst.WriteMask) - 1;
- if (dst.WriteMask == (1 << c))
- src = ureg_scalar(src, c);
- return src;
-}
-
-static inline void
-tx_temp_alloc(struct shader_translator *tx, INT idx)
-{
- assert(idx >= 0);
- if (idx >= tx->num_temp) {
- unsigned k = tx->num_temp;
- unsigned n = idx + 1;
- tx->regs.r = REALLOC(tx->regs.r,
- k * sizeof(tx->regs.r[0]),
- n * sizeof(tx->regs.r[0]));
- for (; k < n; ++k)
- tx->regs.r[k] = ureg_dst_undef();
- tx->num_temp = n;
- }
- if (ureg_dst_is_undef(tx->regs.r[idx]))
- tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
-}
-
-static inline void
-tx_addr_alloc(struct shader_translator *tx, INT idx)
-{
- assert(idx == 0);
- if (ureg_dst_is_undef(tx->regs.address))
- tx->regs.address = ureg_DECL_address(tx->ureg);
- if (ureg_dst_is_undef(tx->regs.a0))
- tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
-}
-
-static inline bool
-TEX_if_fetch4(struct shader_translator *tx, struct ureg_dst dst,
- unsigned target, struct ureg_src src0,
- struct ureg_src src1, INT idx)
-{
- struct ureg_dst tmp;
- struct ureg_src src_tg4[3] = {src0, ureg_imm1f(tx->ureg, 0.f), src1};
-
- if (!(tx->info->fetch4 & (1 << idx)))
- return false;
-
- /* TODO: needs more tests, but this feature is not much used at all */
-
- tmp = tx_scratch(tx);
- ureg_tex_insn(tx->ureg, TGSI_OPCODE_TG4, &tmp, 1, target, TGSI_RETURN_TYPE_FLOAT,
- NULL, 0, src_tg4, 3);
- ureg_MOV(tx->ureg, dst, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z, X, Y, W)));
- return true;
-}
-
-/* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
- * the projection should be applied on the texture. It doesn't
- * apply on texkill.
- * The doc is very imprecise here (it says the projection is done
- * before rasterization, thus in vs, which seems wrong since ps instructions
- * are affected differently)
- * For now we only apply to the ps TEX instruction and TEXBEM.
- * Perhaps some other instructions would need it */
-static inline void
-apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
- struct ureg_src src, INT idx)
-{
- struct ureg_dst tmp;
- unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
-
- /* no projection */
- if (dim == 1) {
- ureg_MOV(tx->ureg, dst, src);
- } else {
- tmp = tx_scratch_scalar(tx);
- ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
- ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
- }
-}
-
-static inline void
-TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
- unsigned target, struct ureg_src src0,
- struct ureg_src src1, INT idx)
-{
- unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
- struct ureg_dst tmp;
- bool shadow = !!(tx->info->sampler_mask_shadow & (1 << idx));
-
- /* dim == 1: no projection
- * Looks like must be disabled when it makes no
- * sense according the texture dimensions
- */
- if (dim == 1 || (dim <= target && !shadow)) {
- ureg_TEX(tx->ureg, dst, target, src0, src1);
- } else if (dim == 4) {
- ureg_TXP(tx->ureg, dst, target, src0, src1);
- } else {
- tmp = tx_scratch(tx);
- apply_ps1x_projection(tx, tmp, src0, idx);
- ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
- }
-}
-
-static inline void
-tx_texcoord_alloc(struct shader_translator *tx, INT idx)
-{
- assert(IS_PS);
- assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT));
- if (ureg_src_is_undef(tx->regs.vT[idx]))
- tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
- TGSI_INTERPOLATE_PERSPECTIVE);
-}
-
-static inline unsigned *
-tx_bgnloop(struct shader_translator *tx)
-{
- tx->loop_depth++;
- if (tx->loop_depth_max < tx->loop_depth)
- tx->loop_depth_max = tx->loop_depth;
- assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
- return &tx->loop_labels[tx->loop_depth - 1];
-}
-
-static inline unsigned *
-tx_endloop(struct shader_translator *tx)
-{
- assert(tx->loop_depth);
- tx->loop_depth--;
- ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
- ureg_get_instruction_number(tx->ureg));
- return &tx->loop_labels[tx->loop_depth];
-}
-
-static struct ureg_dst
-tx_get_loopctr(struct shader_translator *tx, bool loop_or_rep)
-{
- const unsigned l = tx->loop_depth - 1;
-
- if (!tx->loop_depth)
- {
- DBG("loop counter requested outside of loop\n");
- return ureg_dst_undef();
- }
-
- if (ureg_dst_is_undef(tx->regs.rL[l])) {
- /* loop or rep ctr creation */
- tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
- if (loop_or_rep)
- tx->regs.aL[l] = ureg_DECL_local_temporary(tx->ureg);
- tx->loop_or_rep[l] = loop_or_rep;
- }
- /* loop - rep - endloop - endrep not allowed */
- assert(tx->loop_or_rep[l] == loop_or_rep);
-
- return tx->regs.rL[l];
-}
-
-static struct ureg_dst
-tx_get_loopal(struct shader_translator *tx)
-{
- int loop_level = tx->loop_depth - 1;
-
- while (loop_level >= 0) {
- /* handle loop - rep - endrep - endloop case */
- if (tx->loop_or_rep[loop_level])
- /* the aL value is in the Y component (nine implementation) */
- return tx->regs.aL[loop_level];
- loop_level--;
- }
-
- DBG("aL counter requested outside of loop\n");
- return ureg_dst_undef();
-}
-
-static inline unsigned *
-tx_cond(struct shader_translator *tx)
-{
- assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
- tx->cond_depth++;
- return &tx->cond_labels[tx->cond_depth - 1];
-}
-
-static inline unsigned *
-tx_elsecond(struct shader_translator *tx)
-{
- assert(tx->cond_depth);
- return &tx->cond_labels[tx->cond_depth - 1];
-}
-
-static inline void
-tx_endcond(struct shader_translator *tx)
-{
- assert(tx->cond_depth);
- tx->cond_depth--;
- ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
- ureg_get_instruction_number(tx->ureg));
-}
-
-static inline struct ureg_dst
-nine_ureg_dst_register(unsigned file, int index)
-{
- return ureg_dst(ureg_src_register(file, index));
-}
-
-static inline struct ureg_src
-nine_get_position_input(struct shader_translator *tx)
-{
- struct ureg_program *ureg = tx->ureg;
-
- if (tx->wpos_is_sysval)
- return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
- else
- return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION,
- 0, TGSI_INTERPOLATE_LINEAR);
-}
-
-static struct ureg_src
-tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_src src;
- struct ureg_dst tmp;
-
- assert(!param->rel || (IS_VS && param->file == D3DSPR_CONST) ||
- (param->file == D3DSPR_INPUT && tx->version.major == 3));
-
- switch (param->file)
- {
- case D3DSPR_TEMP:
- tx_temp_alloc(tx, param->idx);
- src = ureg_src(tx->regs.r[param->idx]);
- break;
- /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
- case D3DSPR_ADDR:
- if (IS_VS) {
- assert(param->idx == 0);
- /* the address register (vs only) must be
- * assigned before use */
- assert(!ureg_dst_is_undef(tx->regs.a0));
- /* Round to lowest for vs1.1 (contrary to the doc), else
- * round to nearest */
- if (tx->version.major < 2 && tx->version.minor < 2)
- ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
- else
- ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
- src = ureg_src(tx->regs.address);
- } else {
- if (tx->version.major < 2 && tx->version.minor < 4) {
- /* no subroutines, so should be defined */
- src = ureg_src(tx->regs.tS[param->idx]);
- } else {
- tx_texcoord_alloc(tx, param->idx);
- src = tx->regs.vT[param->idx];
- }
- }
- break;
- case D3DSPR_INPUT:
- if (IS_VS) {
- src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
- } else {
- if (tx->version.major < 3) {
- src = ureg_DECL_fs_input_centroid(
- ureg, TGSI_SEMANTIC_COLOR, param->idx,
- tx->info->color_flatshade ? TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE,
- tx->info->force_color_in_centroid ?
- TGSI_INTERPOLATE_LOC_CENTROID : 0,
- 0, 1);
- } else {
- if(param->rel) {
- /* Copy all inputs (non consecutive)
- * to temp array (consecutive).
- * This is not good for performance.
- * A better way would be to have inputs
- * consecutive (would need implement alternative
- * way to match vs outputs and ps inputs).
- * However even with the better way, the temp array
- * copy would need to be used if some inputs
- * are not GENERIC or if they have different
- * interpolation flag. */
- if (ureg_src_is_undef(tx->regs.v_consecutive)) {
- int i;
- tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0));
- for (i = 0; i < 10; i++) {
- if (!ureg_src_is_undef(tx->regs.v[i]))
- ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]);
- else
- ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
- }
- }
- src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx);
- } else {
- assert(param->idx < ARRAY_SIZE(tx->regs.v));
- src = tx->regs.v[param->idx];
- }
- }
- }
- if (param->rel)
- src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
- break;
- case D3DSPR_PREDICATE:
- if (ureg_dst_is_undef(tx->regs.predicate)) {
- /* Forbidden to use the predicate register before being set */
- tx->failure = true;
- tx->regs.predicate = ureg_DECL_temporary(tx->ureg);
- }
- src = ureg_src(tx->regs.predicate);
- break;
- case D3DSPR_SAMPLER:
- assert(param->mod == NINED3DSPSM_NONE);
- /* assert(param->swizzle == NINED3DSP_NOSWIZZLE); Passed by wine tests */
- src = ureg_DECL_sampler(ureg, param->idx);
- break;
- case D3DSPR_CONST:
- if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
- src = nine_float_constant_src(tx, param->idx);
- if (param->rel) {
- tx->indirect_const_access = true;
- src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
- }
- }
- if (!IS_VS && tx->version.major < 2) {
- /* ps 1.X clamps constants */
- tmp = tx_scratch(tx);
- ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
- ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
- src = ureg_src(tmp);
- }
- break;
- case D3DSPR_CONST2:
- case D3DSPR_CONST3:
- case D3DSPR_CONST4:
- DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
- assert(!"CONST2/3/4");
- src = ureg_imm1f(ureg, 0.0f);
- break;
- case D3DSPR_CONSTINT:
- /* relative addressing only possible for float constants in vs */
- if (!tx_lconsti(tx, &src, param->idx))
- src = nine_integer_constant_src(tx, param->idx);
- break;
- case D3DSPR_CONSTBOOL:
- if (!tx_lconstb(tx, &src, param->idx))
- src = nine_boolean_constant_src(tx, param->idx);
- break;
- case D3DSPR_LOOP:
- if (ureg_dst_is_undef(tx->regs.address))
- tx->regs.address = ureg_DECL_address(ureg);
- if (!tx->native_integers)
- ureg_ARR(ureg, tx->regs.address,
- ureg_scalar(ureg_src(tx_get_loopal(tx)), TGSI_SWIZZLE_Y));
- else
- ureg_UARL(ureg, tx->regs.address,
- ureg_scalar(ureg_src(tx_get_loopal(tx)), TGSI_SWIZZLE_Y));
- src = ureg_src(tx->regs.address);
- break;
- case D3DSPR_MISCTYPE:
- switch (param->idx) {
- case D3DSMO_POSITION:
- if (ureg_src_is_undef(tx->regs.vPos))
- tx->regs.vPos = nine_get_position_input(tx);
- if (tx->shift_wpos) {
- /* TODO: do this only once */
- struct ureg_dst wpos = tx_scratch(tx);
- ureg_ADD(ureg, wpos, tx->regs.vPos,
- ureg_imm4f(ureg, -0.5f, -0.5f, 0.0f, 0.0f));
- src = ureg_src(wpos);
- } else {
- src = tx->regs.vPos;
- }
- break;
- case D3DSMO_FACE:
- if (ureg_src_is_undef(tx->regs.vFace)) {
- if (tx->face_is_sysval_integer) {
- tmp = ureg_DECL_temporary(ureg);
- tx->regs.vFace =
- ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
-
- /* convert bool to float */
- ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
- ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
- tx->regs.vFace = ureg_src(tmp);
- } else {
- tx->regs.vFace = ureg_DECL_fs_input(ureg,
- TGSI_SEMANTIC_FACE, 0,
- TGSI_INTERPOLATE_CONSTANT);
- }
- tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
- }
- src = tx->regs.vFace;
- break;
- default:
- assert(!"invalid src D3DSMO");
- break;
- }
- break;
- case D3DSPR_TEMPFLOAT16:
- break;
- default:
- assert(!"invalid src D3DSPR");
- }
-
- switch (param->mod) {
- case NINED3DSPSM_DW:
- tmp = tx_scratch(tx);
- /* NOTE: app is not allowed to read w with this modifier */
- ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), ureg_scalar(src, TGSI_SWIZZLE_W));
- ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
- src = ureg_src(tmp);
- break;
- case NINED3DSPSM_DZ:
- tmp = tx_scratch(tx);
- /* NOTE: app is not allowed to read z with this modifier */
- ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), ureg_scalar(src, TGSI_SWIZZLE_Z));
- ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
- src = ureg_src(tmp);
- break;
- default:
- break;
- }
-
- if (param->swizzle != NINED3DSP_NOSWIZZLE && param->file != D3DSPR_SAMPLER)
- src = ureg_swizzle(src,
- (param->swizzle >> 0) & 0x3,
- (param->swizzle >> 2) & 0x3,
- (param->swizzle >> 4) & 0x3,
- (param->swizzle >> 6) & 0x3);
-
- switch (param->mod) {
- case NINED3DSPSM_ABS:
- src = ureg_abs(src);
- break;
- case NINED3DSPSM_ABSNEG:
- src = ureg_negate(ureg_abs(src));
- break;
- case NINED3DSPSM_NEG:
- src = ureg_negate(src);
- break;
- case NINED3DSPSM_BIAS:
- tmp = tx_scratch(tx);
- ureg_ADD(ureg, tmp, src, ureg_imm1f(ureg, -0.5f));
- src = ureg_src(tmp);
- break;
- case NINED3DSPSM_BIASNEG:
- tmp = tx_scratch(tx);
- ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 0.5f), ureg_negate(src));
- src = ureg_src(tmp);
- break;
- case NINED3DSPSM_NOT:
- if (tx->native_integers && param->file == D3DSPR_CONSTBOOL) {
- tmp = tx_scratch(tx);
- ureg_NOT(ureg, tmp, src);
- src = ureg_src(tmp);
- break;
- } else { /* predicate */
- tmp = tx_scratch(tx);
- ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
- src = ureg_src(tmp);
- }
- FALLTHROUGH;
- case NINED3DSPSM_COMP:
- tmp = tx_scratch(tx);
- ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
- src = ureg_src(tmp);
- break;
- case NINED3DSPSM_DZ:
- case NINED3DSPSM_DW:
- /* Already handled*/
- break;
- case NINED3DSPSM_SIGN:
- tmp = tx_scratch(tx);
- ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
- src = ureg_src(tmp);
- break;
- case NINED3DSPSM_SIGNNEG:
- tmp = tx_scratch(tx);
- ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
- src = ureg_src(tmp);
- break;
- case NINED3DSPSM_X2:
- tmp = tx_scratch(tx);
- ureg_ADD(ureg, tmp, src, src);
- src = ureg_src(tmp);
- break;
- case NINED3DSPSM_X2NEG:
- tmp = tx_scratch(tx);
- ureg_ADD(ureg, tmp, src, src);
- src = ureg_negate(ureg_src(tmp));
- break;
- default:
- assert(param->mod == NINED3DSPSM_NONE);
- break;
- }
-
- return src;
-}
-
-static struct ureg_dst
-_tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
-{
- struct ureg_dst dst;
-
- switch (param->file)
- {
- case D3DSPR_TEMP:
- assert(!param->rel);
- tx_temp_alloc(tx, param->idx);
- dst = tx->regs.r[param->idx];
- break;
- /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
- case D3DSPR_ADDR:
- assert(!param->rel);
- if (tx->version.major < 2 && !IS_VS) {
- if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
- tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
- dst = tx->regs.tS[param->idx];
- } else
- if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
- tx_texcoord_alloc(tx, param->idx);
- dst = ureg_dst(tx->regs.vT[param->idx]);
- } else {
- tx_addr_alloc(tx, param->idx);
- dst = tx->regs.a0;
- }
- break;
- case D3DSPR_RASTOUT:
- assert(!param->rel);
- switch (param->idx) {
- case 0:
- if (ureg_dst_is_undef(tx->regs.oPos)) {
- if (tx->info->clip_plane_emulation > 0) {
- tx->regs.oPos = ureg_DECL_temporary(tx->ureg);
- } else {
- tx->regs.oPos = tx->regs.oPos_out;
- }
- }
- dst = tx->regs.oPos;
- break;
- case 1:
- if (ureg_dst_is_undef(tx->regs.oFog))
- tx->regs.oFog =
- ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16));
- dst = tx->regs.oFog;
- break;
- case 2:
- if (ureg_dst_is_undef(tx->regs.oPts))
- tx->regs.oPts = ureg_DECL_temporary(tx->ureg);
- dst = tx->regs.oPts;
- break;
- default:
- assert(0);
- break;
- }
- break;
- /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
- case D3DSPR_OUTPUT:
- if (tx->version.major < 3) {
- assert(!param->rel);
- dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
- } else {
- assert(!param->rel); /* TODO */
- assert(param->idx < ARRAY_SIZE(tx->regs.o));
- dst = tx->regs.o[param->idx];
- }
- break;
- case D3DSPR_ATTROUT: /* VS */
- case D3DSPR_COLOROUT: /* PS */
- assert(param->idx >= 0 && param->idx < 4);
- assert(!param->rel);
- tx->info->rt_mask |= 1 << param->idx;
- if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
- /* ps < 3: oCol[0] will have fog blending afterward
- * ps: oCol[0] might have alphatest afterward */
- if (!IS_VS && param->idx == 0) {
- tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
- } else {
- tx->regs.oCol[param->idx] =
- ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
- }
- }
- dst = tx->regs.oCol[param->idx];
- if (IS_VS && tx->version.major < 3)
- dst = ureg_saturate(dst);
- break;
- case D3DSPR_DEPTHOUT:
- assert(!param->rel);
- if (ureg_dst_is_undef(tx->regs.oDepth))
- tx->regs.oDepth =
- ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
- TGSI_WRITEMASK_Z, 0, 1);
- dst = tx->regs.oDepth; /* XXX: must write .z component */
- break;
- case D3DSPR_PREDICATE:
- if (ureg_dst_is_undef(tx->regs.predicate))
- tx->regs.predicate = ureg_DECL_temporary(tx->ureg);
- dst = tx->regs.predicate;
- break;
- case D3DSPR_TEMPFLOAT16:
- DBG("unhandled D3DSPR: %u\n", param->file);
- break;
- default:
- assert(!"invalid dst D3DSPR");
- break;
- }
- if (param->rel)
- dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
-
- if (param->mask != NINED3DSP_WRITEMASK_ALL)
- dst = ureg_writemask(dst, param->mask);
- if (param->mod & NINED3DSPDM_SATURATE)
- dst = ureg_saturate(dst);
-
- if (tx->predicated_activated) {
- tx->regs.predicate_dst = dst;
- dst = tx->regs.predicate_tmp;
- }
-
- return dst;
-}
-
-static struct ureg_dst
-tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
-{
- if (param->shift) {
- tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
- return tx->regs.tdst;
- }
- return _tx_dst_param(tx, param);
-}
-
-static void
-tx_apply_dst0_modifiers(struct shader_translator *tx)
-{
- struct ureg_dst rdst;
- float f;
-
- if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
- return;
- rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
-
- assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
-
- if (tx->insn.dst[0].shift < 0)
- f = 1.0f / (1 << -tx->insn.dst[0].shift);
- else
- f = 1 << tx->insn.dst[0].shift;
-
- ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
-}
-
-static struct ureg_src
-tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
-{
- struct ureg_src src;
-
- assert(!param->shift);
- assert(!(param->mod & NINED3DSPDM_SATURATE));
-
- switch (param->file) {
- case D3DSPR_INPUT:
- if (IS_VS) {
- src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
- } else {
- assert(!param->rel);
- assert(param->idx < ARRAY_SIZE(tx->regs.v));
- src = tx->regs.v[param->idx];
- }
- break;
- default:
- src = ureg_src(tx_dst_param(tx, param));
- break;
- }
- if (param->rel)
- src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
-
- if (!param->mask)
- WARN("mask is 0, using identity swizzle\n");
-
- if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
- char s[4];
- int n;
- int c;
- for (n = 0, c = 0; c < 4; ++c)
- if (param->mask & (1 << c))
- s[n++] = c;
- assert(n);
- for (c = n; c < 4; ++c)
- s[c] = s[n - 1];
- src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
- }
- return src;
-}
-
-static HRESULT
-NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_dst dst;
- struct ureg_src src[2];
- struct sm1_src_param *src_mat = &tx->insn.src[1];
- unsigned i;
-
- dst = tx_dst_param(tx, &tx->insn.dst[0]);
- src[0] = tx_src_param(tx, &tx->insn.src[0]);
-
- for (i = 0; i < n; i++)
- {
- const unsigned m = (1 << i);
-
- src[1] = tx_src_param(tx, src_mat);
- src_mat->idx++;
-
- if (!(dst.WriteMask & m))
- continue;
-
- /* XXX: src == dst case ? */
-
- switch (k) {
- case 3:
- ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
- break;
- case 4:
- ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
- break;
- default:
- DBG("invalid operation: M%ux%u\n", m, n);
- break;
- }
- }
-
- return D3D_OK;
-}
-
-#define VNOTSUPPORTED 0, 0
-#define V(maj, min) (((maj) << 8) | (min))
-
-static inline const char *
-d3dsio_to_string( unsigned opcode )
-{
- static const char *names[] = {
- "NOP",
- "MOV",
- "ADD",
- "SUB",
- "MAD",
- "MUL",
- "RCP",
- "RSQ",
- "DP3",
- "DP4",
- "MIN",
- "MAX",
- "SLT",
- "SGE",
- "EXP",
- "LOG",
- "LIT",
- "DST",
- "LRP",
- "FRC",
- "M4x4",
- "M4x3",
- "M3x4",
- "M3x3",
- "M3x2",
- "CALL",
- "CALLNZ",
- "LOOP",
- "RET",
- "ENDLOOP",
- "LABEL",
- "DCL",
- "POW",
- "CRS",
- "SGN",
- "ABS",
- "NRM",
- "SINCOS",
- "REP",
- "ENDREP",
- "IF",
- "IFC",
- "ELSE",
- "ENDIF",
- "BREAK",
- "BREAKC",
- "MOVA",
- "DEFB",
- "DEFI",
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- "TEXCOORD",
- "TEXKILL",
- "TEX",
- "TEXBEM",
- "TEXBEML",
- "TEXREG2AR",
- "TEXREG2GB",
- "TEXM3x2PAD",
- "TEXM3x2TEX",
- "TEXM3x3PAD",
- "TEXM3x3TEX",
- NULL,
- "TEXM3x3SPEC",
- "TEXM3x3VSPEC",
- "EXPP",
- "LOGP",
- "CND",
- "DEF",
- "TEXREG2RGB",
- "TEXDP3TEX",
- "TEXM3x2DEPTH",
- "TEXDP3",
- "TEXM3x3",
- "TEXDEPTH",
- "CMP",
- "BEM",
- "DP2ADD",
- "DSX",
- "DSY",
- "TEXLDD",
- "SETP",
- "TEXLDL",
- "BREAKP"
- };
-
- if (opcode < ARRAY_SIZE(names)) return names[opcode];
-
- switch (opcode) {
- case D3DSIO_PHASE: return "PHASE";
- case D3DSIO_COMMENT: return "COMMENT";
- case D3DSIO_END: return "END";
- default:
- return NULL;
- }
-}
-
-#define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
-#define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
- (inst).vert_version.max | \
- (inst).frag_version.min | \
- (inst).frag_version.max)
-
-#define SPECIAL(name) \
- NineTranslateInstruction_##name
-
-#define DECL_SPECIAL(name) \
- static HRESULT \
- NineTranslateInstruction_##name( struct shader_translator *tx )
-
-static HRESULT
-NineTranslateInstruction_Generic(struct shader_translator *);
-
-DECL_SPECIAL(NOP)
-{
- /* Nothing to do. NOP was used to avoid hangs
- * with very old d3d drivers. */
- return D3D_OK;
-}
-
-DECL_SPECIAL(SUB)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
- struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
-
- ureg_ADD(ureg, dst, src0, ureg_negate(src1));
- return D3D_OK;
-}
-
-DECL_SPECIAL(ABS)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
-
- ureg_MOV(ureg, dst, ureg_abs(src));
- return D3D_OK;
-}
-
-DECL_SPECIAL(XPD)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
- struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
-
- ureg_MUL(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
- ureg_swizzle(src0, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
- TGSI_SWIZZLE_X, 0),
- ureg_swizzle(src1, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
- TGSI_SWIZZLE_Y, 0));
- ureg_MAD(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
- ureg_swizzle(src0, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
- TGSI_SWIZZLE_Y, 0),
- ureg_negate(ureg_swizzle(src1, TGSI_SWIZZLE_Y,
- TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)),
- ureg_src(dst));
- ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
- ureg_imm1f(ureg, 1));
- return D3D_OK;
-}
-
-DECL_SPECIAL(M4x4)
-{
- return NineTranslateInstruction_Mkxn(tx, 4, 4);
-}
-
-DECL_SPECIAL(M4x3)
-{
- return NineTranslateInstruction_Mkxn(tx, 4, 3);
-}
-
-DECL_SPECIAL(M3x4)
-{
- return NineTranslateInstruction_Mkxn(tx, 3, 4);
-}
-
-DECL_SPECIAL(M3x3)
-{
- return NineTranslateInstruction_Mkxn(tx, 3, 3);
-}
-
-DECL_SPECIAL(M3x2)
-{
- return NineTranslateInstruction_Mkxn(tx, 3, 2);
-}
-
-DECL_SPECIAL(CMP)
-{
- ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
- tx_src_param(tx, &tx->insn.src[0]),
- tx_src_param(tx, &tx->insn.src[2]),
- tx_src_param(tx, &tx->insn.src[1]));
- return D3D_OK;
-}
-
-DECL_SPECIAL(CND)
-{
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_dst cgt;
- struct ureg_src cnd;
-
- /* the coissue flag was a tip for compilers to advise to
- * execute two operations at the same time, in cases
- * the two executions had the same dst with different channels.
- * It has no effect on current hw. However it seems CND
- * is affected. The handling of this very specific case
- * handled below mimic wine behaviour */
- if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
- ureg_MOV(tx->ureg,
- dst, tx_src_param(tx, &tx->insn.src[1]));
- return D3D_OK;
- }
-
- cnd = tx_src_param(tx, &tx->insn.src[0]);
- cgt = tx_scratch(tx);
-
- if (tx->version.major == 1 && tx->version.minor < 4)
- cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
-
- ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
-
- ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
- tx_src_param(tx, &tx->insn.src[1]),
- tx_src_param(tx, &tx->insn.src[2]));
- return D3D_OK;
-}
-
-DECL_SPECIAL(CALL)
-{
- assert(tx->insn.src[0].idx < tx->num_inst_labels);
- ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
- return D3D_OK;
-}
-
-DECL_SPECIAL(CALLNZ)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
-
- if (!tx->native_integers)
- ureg_IF(ureg, src, tx_cond(tx));
- else
- ureg_UIF(ureg, src, tx_cond(tx));
- ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
- tx_endcond(tx);
- ureg_ENDIF(ureg);
- return D3D_OK;
-}
-
-DECL_SPECIAL(LOOP)
-{
- struct ureg_program *ureg = tx->ureg;
- unsigned *label;
- struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
- struct ureg_dst ctr;
- struct ureg_dst aL;
- struct ureg_dst tmp;
- struct ureg_src ctrx;
-
- label = tx_bgnloop(tx);
- ctr = tx_get_loopctr(tx, true);
- aL = tx_get_loopal(tx);
- ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
-
- /* src: num_iterations*/
- ureg_MOV(ureg, ureg_writemask(ctr, NINED3DSP_WRITEMASK_0),
- ureg_scalar(src, TGSI_SWIZZLE_X));
- /* al: unused - start_value of al - step for al - unused */
- ureg_MOV(ureg, aL, src);
- ureg_BGNLOOP(tx->ureg, label);
- tmp = tx_scratch_scalar(tx);
- /* Initially ctr.x contains the number of iterations.
- * ctr.y will contain the updated value of al.
- * We decrease ctr.x at the end of every iteration,
- * and stop when it reaches 0. */
-
- if (!tx->native_integers) {
- /* case src and ctr contain floats */
- /* to avoid precision issue, we stop when ctr <= 0.5 */
- ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
- ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
- } else {
- /* case src and ctr contain integers */
- ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
- ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
- }
- ureg_BRK(ureg);
- tx_endcond(tx);
- ureg_ENDIF(ureg);
- return D3D_OK;
-}
-
-DECL_SPECIAL(RET)
-{
- /* RET as a last instruction could be safely ignored.
- * Remove it to prevent crashes/warnings in case underlying
- * driver doesn't implement arbitrary returns.
- */
- if (*(tx->parse_next) != NINED3DSP_END) {
- ureg_RET(tx->ureg);
- }
- return D3D_OK;
-}
-
-DECL_SPECIAL(ENDLOOP)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_dst ctr = tx_get_loopctr(tx, true);
- struct ureg_dst al = tx_get_loopal(tx);
- struct ureg_dst dst_ctrx, dst_al;
- struct ureg_src src_ctr, al_counter;
-
- dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
- dst_al = ureg_writemask(al, NINED3DSP_WRITEMASK_1);
- src_ctr = ureg_src(ctr);
- al_counter = ureg_scalar(ureg_src(al), TGSI_SWIZZLE_Z);
-
- /* ctr.x -= 1
- * al.y (aL) += step */
- if (!tx->native_integers) {
- ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
- ureg_ADD(ureg, dst_al, ureg_src(al), al_counter);
- } else {
- ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
- ureg_UADD(ureg, dst_al, ureg_src(al), al_counter);
- }
- ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
- return D3D_OK;
-}
-
-DECL_SPECIAL(LABEL)
-{
- unsigned k = tx->num_inst_labels;
- unsigned n = tx->insn.src[0].idx;
- assert(n < 2048);
- if (n >= k)
- tx->inst_labels = REALLOC(tx->inst_labels,
- k * sizeof(tx->inst_labels[0]),
- n * sizeof(tx->inst_labels[0]));
-
- tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
- return D3D_OK;
-}
-
-DECL_SPECIAL(SINCOS)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
- struct ureg_dst tmp = tx_scratch_scalar(tx);
-
- assert(!(dst.WriteMask & 0xc));
-
- /* Copying to a temporary register avoids src/dst aliasing.
- * src is supposed to have replicated swizzle. */
- ureg_MOV(ureg, tmp, src);
-
- /* z undefined, w untouched */
- ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
- tx_src_scalar(tmp));
- ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
- tx_src_scalar(tmp));
- return D3D_OK;
-}
-
-DECL_SPECIAL(SGN)
-{
- ureg_SSG(tx->ureg,
- tx_dst_param(tx, &tx->insn.dst[0]),
- tx_src_param(tx, &tx->insn.src[0]));
- return D3D_OK;
-}
-
-DECL_SPECIAL(REP)
-{
- struct ureg_program *ureg = tx->ureg;
- unsigned *label;
- struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
- struct ureg_dst ctr;
- struct ureg_dst tmp;
- struct ureg_src ctrx;
-
- label = tx_bgnloop(tx);
- ctr = ureg_writemask(tx_get_loopctr(tx, false), NINED3DSP_WRITEMASK_0);
- ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
-
- /* NOTE: rep must be constant, so we don't have to save the count */
- assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
-
- /* rep: num_iterations - 0 - 0 - 0 */
- ureg_MOV(ureg, ctr, rep);
- ureg_BGNLOOP(ureg, label);
- tmp = tx_scratch_scalar(tx);
- /* Initially ctr.x contains the number of iterations.
- * We decrease ctr.x at the end of every iteration,
- * and stop when it reaches 0. */
-
- if (!tx->native_integers) {
- /* case src and ctr contain floats */
- /* to avoid precision issue, we stop when ctr <= 0.5 */
- ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
- ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
- } else {
- /* case src and ctr contain integers */
- ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
- ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
- }
- ureg_BRK(ureg);
- tx_endcond(tx);
- ureg_ENDIF(ureg);
-
- return D3D_OK;
-}
-
-DECL_SPECIAL(ENDREP)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_dst ctr = tx_get_loopctr(tx, false);
- struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
- struct ureg_src src_ctr = ureg_src(ctr);
-
- /* ctr.x -= 1 */
- if (!tx->native_integers)
- ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
- else
- ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
-
- ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
- return D3D_OK;
-}
-
-DECL_SPECIAL(ENDIF)
-{
- tx_endcond(tx);
- ureg_ENDIF(tx->ureg);
- return D3D_OK;
-}
-
-DECL_SPECIAL(IF)
-{
- struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
-
- if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
- ureg_UIF(tx->ureg, src, tx_cond(tx));
- else
- ureg_IF(tx->ureg, src, tx_cond(tx));
-
- return D3D_OK;
-}
-
-static inline unsigned
-sm1_insn_flags_to_tgsi_setop(BYTE flags)
-{
- switch (flags) {
- case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
- case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
- case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
- case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
- case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
- case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
- default:
- assert(!"invalid comparison flags");
- return TGSI_OPCODE_SGT;
- }
-}
-
-DECL_SPECIAL(IFC)
-{
- const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
- struct ureg_src src[2];
- struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
- src[0] = tx_src_param(tx, &tx->insn.src[0]);
- src[1] = tx_src_param(tx, &tx->insn.src[1]);
- ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
- ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
- return D3D_OK;
-}
-
-DECL_SPECIAL(ELSE)
-{
- ureg_ELSE(tx->ureg, tx_elsecond(tx));
- return D3D_OK;
-}
-
-DECL_SPECIAL(BREAKC)
-{
- const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
- struct ureg_src src[2];
- struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
- src[0] = tx_src_param(tx, &tx->insn.src[0]);
- src[1] = tx_src_param(tx, &tx->insn.src[1]);
- ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
- ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
- ureg_BRK(tx->ureg);
- tx_endcond(tx);
- ureg_ENDIF(tx->ureg);
- return D3D_OK;
-}
-
-static const char *sm1_declusage_names[] =
-{
- [D3DDECLUSAGE_POSITION] = "POSITION",
- [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
- [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
- [D3DDECLUSAGE_NORMAL] = "NORMAL",
- [D3DDECLUSAGE_PSIZE] = "PSIZE",
- [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
- [D3DDECLUSAGE_TANGENT] = "TANGENT",
- [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
- [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
- [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
- [D3DDECLUSAGE_COLOR] = "COLOR",
- [D3DDECLUSAGE_FOG] = "FOG",
- [D3DDECLUSAGE_DEPTH] = "DEPTH",
- [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
-};
-
-static inline unsigned
-sm1_to_nine_declusage(struct sm1_semantic *dcl)
-{
- return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
-}
-
-static void
-sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
- bool tc,
- struct sm1_semantic *dcl)
-{
- BYTE index = dcl->usage_idx;
-
- /* For everything that is not matching to a TGSI_SEMANTIC_****,
- * we match to a TGSI_SEMANTIC_GENERIC with index.
- *
- * The index can be anything UINT16 and usage_idx is BYTE,
- * so we can fit everything. It doesn't matter if indices
- * are close together or low.
- *
- *
- * POSITION >= 1: 10 * index + 7
- * COLOR >= 2: 10 * (index-1) + 8
- * FOG: 16
- * TEXCOORD[0..15]: index
- * BLENDWEIGHT: 10 * index + 19
- * BLENDINDICES: 10 * index + 20
- * NORMAL: 10 * index + 21
- * TANGENT: 10 * index + 22
- * BINORMAL: 10 * index + 23
- * TESSFACTOR: 10 * index + 24
- */
-
- switch (dcl->usage) {
- case D3DDECLUSAGE_POSITION:
- case D3DDECLUSAGE_POSITIONT:
- case D3DDECLUSAGE_DEPTH:
- if (index == 0) {
- sem->Name = TGSI_SEMANTIC_POSITION;
- sem->Index = 0;
- } else {
- sem->Name = TGSI_SEMANTIC_GENERIC;
- sem->Index = 10 * index + 7;
- }
- break;
- case D3DDECLUSAGE_COLOR:
- if (index < 2) {
- sem->Name = TGSI_SEMANTIC_COLOR;
- sem->Index = index;
- } else {
- sem->Name = TGSI_SEMANTIC_GENERIC;
- sem->Index = 10 * (index-1) + 8;
- }
- break;
- case D3DDECLUSAGE_FOG:
- assert(index == 0);
- sem->Name = TGSI_SEMANTIC_GENERIC;
- sem->Index = 16;
- break;
- case D3DDECLUSAGE_PSIZE:
- assert(index == 0);
- sem->Name = TGSI_SEMANTIC_PSIZE;
- sem->Index = 0;
- break;
- case D3DDECLUSAGE_TEXCOORD:
- assert(index < 16);
- if (index < 8 && tc)
- sem->Name = TGSI_SEMANTIC_TEXCOORD;
- else
- sem->Name = TGSI_SEMANTIC_GENERIC;
- sem->Index = index;
- break;
- case D3DDECLUSAGE_BLENDWEIGHT:
- sem->Name = TGSI_SEMANTIC_GENERIC;
- sem->Index = 10 * index + 19;
- break;
- case D3DDECLUSAGE_BLENDINDICES:
- sem->Name = TGSI_SEMANTIC_GENERIC;
- sem->Index = 10 * index + 20;
- break;
- case D3DDECLUSAGE_NORMAL:
- sem->Name = TGSI_SEMANTIC_GENERIC;
- sem->Index = 10 * index + 21;
- break;
- case D3DDECLUSAGE_TANGENT:
- sem->Name = TGSI_SEMANTIC_GENERIC;
- sem->Index = 10 * index + 22;
- break;
- case D3DDECLUSAGE_BINORMAL:
- sem->Name = TGSI_SEMANTIC_GENERIC;
- sem->Index = 10 * index + 23;
- break;
- case D3DDECLUSAGE_TESSFACTOR:
- sem->Name = TGSI_SEMANTIC_GENERIC;
- sem->Index = 10 * index + 24;
- break;
- case D3DDECLUSAGE_SAMPLE:
- sem->Name = TGSI_SEMANTIC_COUNT;
- sem->Index = 0;
- break;
- default:
- unreachable("Invalid DECLUSAGE.");
- break;
- }
-}
-
-#define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
-#define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
-#define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
-#define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
-static inline unsigned
-d3dstt_to_tgsi_tex(BYTE sampler_type)
-{
- switch (sampler_type) {
- case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
- case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
- case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
- case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
- default:
- assert(0);
- return TGSI_TEXTURE_UNKNOWN;
- }
-}
-static inline unsigned
-d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
-{
- switch (sampler_type) {
- case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
- case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
- case NINED3DSTT_VOLUME:
- case NINED3DSTT_CUBE:
- default:
- assert(0);
- return TGSI_TEXTURE_UNKNOWN;
- }
-}
-static inline unsigned
-ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
-{
- bool shadow = !!(info->sampler_mask_shadow & (1 << stage));
- switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
- case 1: return shadow ? TGSI_TEXTURE_SHADOW1D : TGSI_TEXTURE_1D;
- case 0: return shadow ? TGSI_TEXTURE_SHADOW2D : TGSI_TEXTURE_2D;
- case 3: return TGSI_TEXTURE_3D;
- default:
- return TGSI_TEXTURE_CUBE;
- }
-}
-
-static const char *
-sm1_sampler_type_name(BYTE sampler_type)
-{
- switch (sampler_type) {
- case NINED3DSTT_1D: return "1D";
- case NINED3DSTT_2D: return "2D";
- case NINED3DSTT_VOLUME: return "VOLUME";
- case NINED3DSTT_CUBE: return "CUBE";
- default:
- return "(D3DSTT_?)";
- }
-}
-
-static inline unsigned
-nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
-{
- switch (sem->Name) {
- case TGSI_SEMANTIC_POSITION:
- case TGSI_SEMANTIC_NORMAL:
- return TGSI_INTERPOLATE_LINEAR;
- case TGSI_SEMANTIC_BCOLOR:
- case TGSI_SEMANTIC_COLOR:
- return TGSI_INTERPOLATE_COLOR;
- case TGSI_SEMANTIC_FOG:
- case TGSI_SEMANTIC_GENERIC:
- case TGSI_SEMANTIC_TEXCOORD:
- case TGSI_SEMANTIC_CLIPDIST:
- case TGSI_SEMANTIC_CLIPVERTEX:
- return TGSI_INTERPOLATE_PERSPECTIVE;
- case TGSI_SEMANTIC_EDGEFLAG:
- case TGSI_SEMANTIC_FACE:
- case TGSI_SEMANTIC_INSTANCEID:
- case TGSI_SEMANTIC_PCOORD:
- case TGSI_SEMANTIC_PRIMID:
- case TGSI_SEMANTIC_PSIZE:
- case TGSI_SEMANTIC_VERTEXID:
- return TGSI_INTERPOLATE_CONSTANT;
- default:
- assert(0);
- return TGSI_INTERPOLATE_CONSTANT;
- }
-}
-
-DECL_SPECIAL(DCL)
-{
- struct ureg_program *ureg = tx->ureg;
- bool is_input;
- bool is_sampler;
- struct tgsi_declaration_semantic tgsi;
- struct sm1_semantic sem;
- sm1_read_semantic(tx, &sem);
-
- is_input = sem.reg.file == D3DSPR_INPUT;
- is_sampler =
- sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
-
- DUMP("DCL ");
- sm1_dump_dst_param(&sem.reg);
- if (is_sampler)
- DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
- else
- if (tx->version.major >= 3)
- DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
- else
- if (sem.usage | sem.usage_idx)
- DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
- else
- DUMP("\n");
-
- if (is_sampler) {
- const unsigned m = 1 << sem.reg.idx;
- ureg_DECL_sampler(ureg, sem.reg.idx);
- tx->info->sampler_mask |= m;
- tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
- d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
- d3dstt_to_tgsi_tex(sem.sampler_type);
- return D3D_OK;
- }
-
- sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
- if (IS_VS) {
- if (is_input) {
- /* linkage outside of shader with vertex declaration */
- ureg_DECL_vs_input(ureg, sem.reg.idx);
- assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map));
- tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
- tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1);
- /* NOTE: preserving order in case of indirect access */
- } else
- if (tx->version.major >= 3) {
- /* SM2 output semantic determined by file */
- assert(sem.reg.mask != 0);
- if (sem.usage == D3DDECLUSAGE_POSITIONT)
- tx->info->position_t = true;
- assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o));
- assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing");
- tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
- ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
- nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx);
- if ((tx->info->process_vertices || tx->info->clip_plane_emulation > 0) &&
- sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) {
- tx->regs.oPos_out = tx->regs.o[sem.reg.idx]; /* TODO: probably not good declare it twice */
- tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
- tx->regs.oPos = tx->regs.o[sem.reg.idx];
- }
-
- if (tgsi.Name == TGSI_SEMANTIC_PSIZE) {
- tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
- tx->regs.oPts = tx->regs.o[sem.reg.idx];
- }
- }
- } else {
- if (is_input && tx->version.major >= 3) {
- unsigned interp_flag;
- unsigned interp_location = 0;
- /* SM3 only, SM2 input semantic determined by file */
- assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v));
- assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing");
- /* PositionT and tessfactor forbidden */
- if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR)
- return D3DERR_INVALIDCALL;
-
- if (tgsi.Name == TGSI_SEMANTIC_POSITION) {
- /* Position0 is forbidden (likely because vPos already does that) */
- if (sem.usage == D3DDECLUSAGE_POSITION)
- return D3DERR_INVALIDCALL;
- /* Following code is for depth */
- tx->regs.v[sem.reg.idx] = nine_get_position_input(tx);
- return D3D_OK;
- }
-
- if (sem.reg.mod & NINED3DSPDM_CENTROID ||
- (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
- interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
- interp_flag = nine_tgsi_to_interp_mode(&tgsi);
- /* We replace TGSI_INTERPOLATE_COLOR because some drivers don't support it,
- * and those who support it do the same replacement we do */
- if (interp_flag == TGSI_INTERPOLATE_COLOR)
- interp_flag = tx->info->color_flatshade ? TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
-
- tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_centroid(
- ureg, tgsi.Name, tgsi.Index,
- interp_flag,
- interp_location, 0, 1);
- } else
- if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
- /* FragColor or FragDepth */
- assert(sem.reg.mask != 0);
- ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
- 0, 1);
- }
- }
- return D3D_OK;
-}
-
-DECL_SPECIAL(DEF)
-{
- tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
- return D3D_OK;
-}
-
-DECL_SPECIAL(DEFB)
-{
- tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
- return D3D_OK;
-}
-
-DECL_SPECIAL(DEFI)
-{
- tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
- return D3D_OK;
-}
-
-DECL_SPECIAL(POW)
-{
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src[2] = {
- tx_src_param(tx, &tx->insn.src[0]),
- tx_src_param(tx, &tx->insn.src[1])
- };
- /* Anything^0 is 1, including 0^0.
- * Assume mul_zero_wins drivers already have
- * this behaviour. Emulate for the others. */
- if (tx->mul_zero_wins) {
- ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
- } else {
- struct ureg_dst tmp = tx_scratch_scalar(tx);
- ureg_POW(tx->ureg, tmp, ureg_abs(src[0]), src[1]);
- ureg_CMP(tx->ureg, dst,
- ureg_negate(ureg_abs(ureg_scalar(src[1], TGSI_SWIZZLE_X))),
- tx_src_scalar(tmp), ureg_imm1f(tx->ureg, 1.0f));
- }
- return D3D_OK;
-}
-
-/* Tests results on Win 10:
- * NV (NVIDIA GeForce GT 635M)
- * AMD (AMD Radeon HD 7730M)
- * INTEL (Intel(R) HD Graphics 4000)
- * PS2 and PS3:
- * RCP and RSQ can generate inf on NV and AMD.
- * RCP and RSQ are clamped on INTEL (+- FLT_MAX),
- * NV: log not clamped
- * AMD: log(0) is -FLT_MAX (but log(inf) is inf)
- * INTEL: log(0) is -FLT_MAX and log(inf) is 127
- * All devices have 0*anything = 0
- *
- * INTEL VS2 and VS3: same behaviour.
- * Some differences VS2 and VS3 for constants defined with inf/NaN.
- * While PS3, VS3 and PS2 keep NaN and Inf shader constants without change,
- * VS2 seems to clamp to zero (may be test failure).
- * AMD VS2: unknown, VS3: very likely behaviour of PS3
- * NV VS2 and VS3: very likely behaviour of PS3
- * For both, Inf in VS becomes NaN is PS
- * "Very likely" because the test was less extensive.
- *
- * Thus all clamping can be removed for shaders 2 and 3,
- * as long as 0*anything = 0.
- * Else clamps to enforce 0*anything = 0 (anything being then
- * neither inf or NaN, the user being unlikely to pass them
- * as constant).
- * The status for VS1 and PS1 is unknown.
- */
-
-DECL_SPECIAL(RCP)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
- struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx);
- ureg_RCP(ureg, tmp, src);
- if (!tx->mul_zero_wins) {
- /* FLT_MAX has issues with Rayman */
- ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX/2.f), ureg_src(tmp));
- ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX/2.f), ureg_src(tmp));
- }
- return D3D_OK;
-}
-
-DECL_SPECIAL(RSQ)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
- struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx);
- ureg_RSQ(ureg, tmp, ureg_abs(src));
- if (!tx->mul_zero_wins)
- ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
- return D3D_OK;
-}
-
-DECL_SPECIAL(LOG)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_dst tmp = tx_scratch_scalar(tx);
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
- ureg_LG2(ureg, tmp, ureg_abs(src));
- if (tx->mul_zero_wins) {
- ureg_MOV(ureg, dst, tx_src_scalar(tmp));
- } else {
- ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
- }
- return D3D_OK;
-}
-
-DECL_SPECIAL(LIT)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_dst tmp = tx_scratch(tx);
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
- ureg_LIT(ureg, tmp, src);
- /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
- * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
- * it 0^0 if src.w=0, which value is driver dependent. */
- ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
- ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
- ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
- ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
- return D3D_OK;
-}
-
-DECL_SPECIAL(NRM)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_dst tmp = tx_scratch_scalar(tx);
- struct ureg_src nrm = tx_src_scalar(tmp);
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
- ureg_DP3(ureg, tmp, src, src);
- ureg_RSQ(ureg, tmp, nrm);
- if (!tx->mul_zero_wins)
- ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
- ureg_MUL(ureg, dst, src, nrm);
- return D3D_OK;
-}
-
-DECL_SPECIAL(DP2ADD)
-{
- struct ureg_dst tmp = tx_scratch_scalar(tx);
- struct ureg_src dp2 = tx_src_scalar(tmp);
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src[3];
- int i;
- for (i = 0; i < 3; ++i)
- src[i] = tx_src_param(tx, &tx->insn.src[i]);
- assert_replicate_swizzle(&src[2]);
-
- ureg_DP2(tx->ureg, tmp, src[0], src[1]);
- ureg_ADD(tx->ureg, dst, src[2], dp2);
-
- return D3D_OK;
-}
-
-DECL_SPECIAL(TEXCOORD)
-{
- struct ureg_program *ureg = tx->ureg;
- const unsigned s = tx->insn.dst[0].idx;
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
-
- tx_texcoord_alloc(tx, s);
- ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
- ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
-
- return D3D_OK;
-}
-
-DECL_SPECIAL(TEXCOORD_ps14)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
-
- assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
-
- ureg_MOV(ureg, dst, src);
-
- return D3D_OK;
-}
-
-DECL_SPECIAL(TEXKILL)
-{
- struct ureg_src reg;
-
- if (tx->version.major > 1 || tx->version.minor > 3) {
- reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
- } else {
- tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
- reg = tx->regs.vT[tx->insn.dst[0].idx];
- }
- if (tx->version.major < 2)
- reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
- ureg_KILL_IF(tx->ureg, reg);
-
- return D3D_OK;
-}
-
-DECL_SPECIAL(TEXBEM)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
- struct ureg_dst tmp, tmp2, texcoord;
- struct ureg_src sample, m00, m01, m10, m11, c8m, c16m2;
- struct ureg_src bumpenvlscale, bumpenvloffset;
- const int m = tx->insn.dst[0].idx;
-
- assert(tx->version.major == 1);
-
- sample = ureg_DECL_sampler(ureg, m);
- tx->info->sampler_mask |= 1 << m;
-
- tx_texcoord_alloc(tx, m);
-
- tmp = tx_scratch(tx);
- tmp2 = tx_scratch(tx);
- texcoord = tx_scratch(tx);
- /*
- * Bump-env-matrix:
- * 00 is X
- * 01 is Y
- * 10 is Z
- * 11 is W
- */
- c8m = nine_special_constant_src(tx, m);
- c16m2 = nine_special_constant_src(tx, 8+m/2);
-
- m00 = NINE_APPLY_SWIZZLE(c8m, X);
- m01 = NINE_APPLY_SWIZZLE(c8m, Y);
- m10 = NINE_APPLY_SWIZZLE(c8m, Z);
- m11 = NINE_APPLY_SWIZZLE(c8m, W);
-
- /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
- if (m % 2 == 0) {
- bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, X);
- bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, Y);
- } else {
- bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, Z);
- bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, W);
- }
-
- apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
-
- /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
- ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
- NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord));
- /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
- ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
- NINE_APPLY_SWIZZLE(src, Y),
- NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
-
- /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
- ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
- NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord));
- /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
- ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
- NINE_APPLY_SWIZZLE(src, Y),
- NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
-
- /* Now the texture coordinates are in tmp.xy */
-
- if (tx->insn.opcode == D3DSIO_TEXBEM) {
- ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
- } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
- /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
- ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
- ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(src, Z),
- bumpenvlscale, bumpenvloffset);
- ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
- }
-
- tx->info->bumpenvmat_needed = 1;
-
- return D3D_OK;
-}
-
-DECL_SPECIAL(TEXREG2AR)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
- struct ureg_src sample;
- const int m = tx->insn.dst[0].idx;
- ASSERTED const int n = tx->insn.src[0].idx;
- assert(m >= 0 && m > n);
-
- sample = ureg_DECL_sampler(ureg, m);
- tx->info->sampler_mask |= 1 << m;
- ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(W,X,X,X)), sample);
-
- return D3D_OK;
-}
-
-DECL_SPECIAL(TEXREG2GB)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
- struct ureg_src sample;
- const int m = tx->insn.dst[0].idx;
- ASSERTED const int n = tx->insn.src[0].idx;
- assert(m >= 0 && m > n);
-
- sample = ureg_DECL_sampler(ureg, m);
- tx->info->sampler_mask |= 1 << m;
- ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
-
- return D3D_OK;
-}
-
-DECL_SPECIAL(TEXM3x2PAD)
-{
- return D3D_OK; /* this is just padding */
-}
-
-DECL_SPECIAL(TEXM3x2TEX)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
- struct ureg_src sample;
- const int m = tx->insn.dst[0].idx - 1;
- ASSERTED const int n = tx->insn.src[0].idx;
- assert(m >= 0 && m > n);
-
- tx_texcoord_alloc(tx, m);
- tx_texcoord_alloc(tx, m+1);
-
- /* performs the matrix multiplication */
- ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
- ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
-
- sample = ureg_DECL_sampler(ureg, m + 1);
- tx->info->sampler_mask |= 1 << (m + 1);
- ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
-
- return D3D_OK;
-}
-
-DECL_SPECIAL(TEXM3x3PAD)
-{
- return D3D_OK; /* this is just padding */
-}
-
-DECL_SPECIAL(TEXM3x3SPEC)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
- struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
- struct ureg_src sample;
- struct ureg_dst tmp;
- const int m = tx->insn.dst[0].idx - 2;
- ASSERTED const int n = tx->insn.src[0].idx;
- assert(m >= 0 && m > n);
-
- tx_texcoord_alloc(tx, m);
- tx_texcoord_alloc(tx, m+1);
- tx_texcoord_alloc(tx, m+2);
-
- ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
- ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
- ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src);
-
- sample = ureg_DECL_sampler(ureg, m + 2);
- tx->info->sampler_mask |= 1 << (m + 2);
- tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
-
- /* At this step, dst = N = (u', w', z').
- * We want dst to be the texture sampled at (u'', w'', z''), with
- * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
- ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
- ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
- /* at this step tmp.x = 1/N.N */
- ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
- /* at this step tmp.y = N.E */
- ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
- /* at this step tmp.x = N.E/N.N */
- ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
- ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
- /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
- ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(E));
- ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
-
- return D3D_OK;
-}
-
-DECL_SPECIAL(TEXREG2RGB)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
- struct ureg_src sample;
- const int m = tx->insn.dst[0].idx;
- ASSERTED const int n = tx->insn.src[0].idx;
- assert(m >= 0 && m > n);
-
- sample = ureg_DECL_sampler(ureg, m);
- tx->info->sampler_mask |= 1 << m;
- ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), src, sample);
-
- return D3D_OK;
-}
-
-DECL_SPECIAL(TEXDP3TEX)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
- struct ureg_dst tmp;
- struct ureg_src sample;
- const int m = tx->insn.dst[0].idx;
- ASSERTED const int n = tx->insn.src[0].idx;
- assert(m >= 0 && m > n);
-
- tx_texcoord_alloc(tx, m);
-
- tmp = tx_scratch(tx);
- ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
- ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
-
- sample = ureg_DECL_sampler(ureg, m);
- tx->info->sampler_mask |= 1 << m;
- ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
-
- return D3D_OK;
-}
-
-DECL_SPECIAL(TEXM3x2DEPTH)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
- struct ureg_dst tmp;
- const int m = tx->insn.dst[0].idx - 1;
- ASSERTED const int n = tx->insn.src[0].idx;
- assert(m >= 0 && m > n);
-
- tx_texcoord_alloc(tx, m);
- tx_texcoord_alloc(tx, m+1);
-
- tmp = tx_scratch(tx);
-
- /* performs the matrix multiplication */
- ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
- ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
-
- ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
- /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
- ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
- /* res = 'w' == 0 ? 1.0 : z/w */
- ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
- ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
- /* replace the depth for depth testing with the result */
- tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
- TGSI_WRITEMASK_Z, 0, 1);
- ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
- /* note that we write nothing to the destination, since it's disallowed to use it afterward */
- return D3D_OK;
-}
-
-DECL_SPECIAL(TEXDP3)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
- const int m = tx->insn.dst[0].idx;
- ASSERTED const int n = tx->insn.src[0].idx;
- assert(m >= 0 && m > n);
-
- tx_texcoord_alloc(tx, m);
-
- ureg_DP3(ureg, dst, tx->regs.vT[m], src);
-
- return D3D_OK;
-}
-
-DECL_SPECIAL(TEXM3x3)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
- struct ureg_src sample;
- struct ureg_dst E, tmp;
- const int m = tx->insn.dst[0].idx - 2;
- ASSERTED const int n = tx->insn.src[0].idx;
- assert(m >= 0 && m > n);
-
- tx_texcoord_alloc(tx, m);
- tx_texcoord_alloc(tx, m+1);
- tx_texcoord_alloc(tx, m+2);
-
- ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
- ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
- ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src);
-
- switch (tx->insn.opcode) {
- case D3DSIO_TEXM3x3:
- ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
- break;
- case D3DSIO_TEXM3x3TEX:
- sample = ureg_DECL_sampler(ureg, m + 2);
- tx->info->sampler_mask |= 1 << (m + 2);
- ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
- break;
- case D3DSIO_TEXM3x3VSPEC:
- sample = ureg_DECL_sampler(ureg, m + 2);
- tx->info->sampler_mask |= 1 << (m + 2);
- E = tx_scratch(tx);
- tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
- ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
- ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
- ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
- /* At this step, dst = N = (u', w', z').
- * We want dst to be the texture sampled at (u'', w'', z''), with
- * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
- ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
- ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
- /* at this step tmp.x = 1/N.N */
- ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
- /* at this step tmp.y = N.E */
- ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
- /* at this step tmp.x = N.E/N.N */
- ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
- ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
- /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
- ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(ureg_src(E)));
- ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
- break;
- default:
- return D3DERR_INVALIDCALL;
- }
- return D3D_OK;
-}
-
-DECL_SPECIAL(TEXDEPTH)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_dst r5;
- struct ureg_src r5r, r5g;
-
- assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
-
- /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
- * r5 won't be used afterward, thus we can use r5.ba */
- r5 = tx->regs.r[5];
- r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
- r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
-
- ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
- ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
- /* r5.r = r/g */
- ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
- r5r, ureg_imm1f(ureg, 1.0f));
- /* replace the depth for depth testing with the result */
- tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
- TGSI_WRITEMASK_Z, 0, 1);
- ureg_MOV(ureg, tx->regs.oDepth, r5r);
-
- return D3D_OK;
-}
-
-DECL_SPECIAL(BEM)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
- struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
- struct ureg_src m00, m01, m10, m11, c8m;
- const int m = tx->insn.dst[0].idx;
- struct ureg_dst tmp = tx_scratch(tx);
- /*
- * Bump-env-matrix:
- * 00 is X
- * 01 is Y
- * 10 is Z
- * 11 is W
- */
- c8m = nine_special_constant_src(tx, m);
- m00 = NINE_APPLY_SWIZZLE(c8m, X);
- m01 = NINE_APPLY_SWIZZLE(c8m, Y);
- m10 = NINE_APPLY_SWIZZLE(c8m, Z);
- m11 = NINE_APPLY_SWIZZLE(c8m, W);
- /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */
- ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
- NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
- /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
- ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
- NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
-
- /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
- ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
- NINE_APPLY_SWIZZLE(src1, X), src0);
- /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
- ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
- NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
- ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
-
- tx->info->bumpenvmat_needed = 1;
-
- return D3D_OK;
-}
-
-DECL_SPECIAL(TEXLD)
-{
- struct ureg_program *ureg = tx->ureg;
- unsigned target;
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src[2] = {
- tx_src_param(tx, &tx->insn.src[0]),
- tx_src_param(tx, &tx->insn.src[1])
- };
- assert(tx->insn.src[1].idx >= 0 &&
- tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
- target = tx->sampler_targets[tx->insn.src[1].idx];
-
- if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
- return D3D_OK;
-
- switch (tx->insn.flags) {
- case 0:
- ureg_TEX(ureg, dst, target, src[0], src[1]);
- break;
- case NINED3DSI_TEXLD_PROJECT:
- ureg_TXP(ureg, dst, target, src[0], src[1]);
- break;
- case NINED3DSI_TEXLD_BIAS:
- ureg_TXB(ureg, dst, target, src[0], src[1]);
- break;
- default:
- assert(0);
- return D3DERR_INVALIDCALL;
- }
- return D3D_OK;
-}
-
-DECL_SPECIAL(TEXLD_14)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
- const unsigned s = tx->insn.dst[0].idx;
- const unsigned t = ps1x_sampler_type(tx->info, s);
-
- tx->info->sampler_mask |= 1 << s;
- ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
-
- return D3D_OK;
-}
-
-DECL_SPECIAL(TEX)
-{
- struct ureg_program *ureg = tx->ureg;
- const unsigned s = tx->insn.dst[0].idx;
- const unsigned t = ps1x_sampler_type(tx->info, s);
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src[2];
-
- tx_texcoord_alloc(tx, s);
-
- src[0] = tx->regs.vT[s];
- src[1] = ureg_DECL_sampler(ureg, s);
- tx->info->sampler_mask |= 1 << s;
-
- TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
-
- return D3D_OK;
-}
-
-DECL_SPECIAL(TEXLDD)
-{
- unsigned target;
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src[4] = {
- tx_src_param(tx, &tx->insn.src[0]),
- tx_src_param(tx, &tx->insn.src[1]),
- tx_src_param(tx, &tx->insn.src[2]),
- tx_src_param(tx, &tx->insn.src[3])
- };
- assert(tx->insn.src[1].idx >= 0 &&
- tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
- target = tx->sampler_targets[tx->insn.src[1].idx];
-
- if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
- return D3D_OK;
-
- ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
- return D3D_OK;
-}
-
-DECL_SPECIAL(TEXLDL)
-{
- unsigned target;
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src[2] = {
- tx_src_param(tx, &tx->insn.src[0]),
- tx_src_param(tx, &tx->insn.src[1])
- };
- assert(tx->insn.src[1].idx >= 0 &&
- tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
- target = tx->sampler_targets[tx->insn.src[1].idx];
-
- if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
- return D3D_OK;
-
- ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
- return D3D_OK;
-}
-
-DECL_SPECIAL(SETP)
-{
- const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
- struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
- struct ureg_src src[2] = {
- tx_src_param(tx, &tx->insn.src[0]),
- tx_src_param(tx, &tx->insn.src[1])
- };
- ureg_insn(tx->ureg, cmp_op, &dst, 1, src, 2, 0);
- return D3D_OK;
-}
-
-DECL_SPECIAL(BREAKP)
-{
- struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
- ureg_IF(tx->ureg, src, tx_cond(tx));
- ureg_BRK(tx->ureg);
- tx_endcond(tx);
- ureg_ENDIF(tx->ureg);
- return D3D_OK;
-}
-
-DECL_SPECIAL(PHASE)
-{
- return D3D_OK; /* we don't care about phase */
-}
-
-DECL_SPECIAL(COMMENT)
-{
- return D3D_OK; /* nothing to do */
-}
-
-
-#define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
- { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
-
-static const struct sm1_op_info inst_table[] =
-{
- _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */
- _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
- _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
- _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */
- _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
- _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
- _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RCP)), /* 6 */
- _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
- _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
- _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
- _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
- _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
- _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
- _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
- _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
- _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
- _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
- _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
- _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
- _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
-
- _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
- _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
- _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
- _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
- _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
-
- _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
- _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
- _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
- _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
- _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
- _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
-
- _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
-
- _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
- _OPI(CRS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(XPD)), /* XXX: .w */
- _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
- _OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)),
- _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
-
- _OPI(SINCOS, NOP, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
- _OPI(SINCOS, NOP, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
-
- /* More flow control */
- _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
- _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
- _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
- _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
- _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
- _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
- _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
- _OPI(BREAKC, NOP, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
- /* we don't write to the address register, but a normal register (copied
- * when needed to the address register), thus we don't use ARR */
- _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
-
- _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
- _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
-
- _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
- _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
- _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
- _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
- _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
- _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
- _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
- _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
- _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
- _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
- _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
- _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
- _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
- _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
- _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
- _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
-
- _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
- _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
- _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
- _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
-
- _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
-
- /* More tex stuff */
- _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
- _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
- _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
- _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
- _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
- _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
-
- /* Misc */
- _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
- _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
- _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
- _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
- _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
- _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
- _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
- _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
- _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
-};
-
-static const struct sm1_op_info inst_phase =
- _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
-
-static const struct sm1_op_info inst_comment =
- _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
-
-static void
-create_op_info_map(struct shader_translator *tx)
-{
- const unsigned version = (tx->version.major << 8) | tx->version.minor;
- unsigned i;
-
- for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i)
- tx->op_info_map[i] = -1;
-
- if (tx->processor == PIPE_SHADER_VERTEX) {
- for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
- assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
- if (inst_table[i].vert_version.min <= version &&
- inst_table[i].vert_version.max >= version)
- tx->op_info_map[inst_table[i].sio] = i;
- }
- } else {
- for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
- assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
- if (inst_table[i].frag_version.min <= version &&
- inst_table[i].frag_version.max >= version)
- tx->op_info_map[inst_table[i].sio] = i;
- }
- }
-}
-
-static inline HRESULT
-NineTranslateInstruction_Generic(struct shader_translator *tx)
-{
- struct ureg_dst dst[1];
- struct ureg_src src[4];
- unsigned i;
-
- for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i)
- dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
- for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i)
- src[i] = tx_src_param(tx, &tx->insn.src[i]);
-
- ureg_insn(tx->ureg, tx->insn.info->opcode,
- dst, tx->insn.ndst,
- src, tx->insn.nsrc, 0);
- return D3D_OK;
-}
-
-static inline DWORD
-TOKEN_PEEK(struct shader_translator *tx)
-{
- return *(tx->parse);
-}
-
-static inline DWORD
-TOKEN_NEXT(struct shader_translator *tx)
-{
- return *(tx->parse)++;
-}
-
-static inline void
-TOKEN_JUMP(struct shader_translator *tx)
-{
- if (tx->parse_next && tx->parse != tx->parse_next) {
- WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
- tx->parse = tx->parse_next;
- }
-}
-
-static inline bool
-sm1_parse_eof(struct shader_translator *tx)
-{
- return TOKEN_PEEK(tx) == NINED3DSP_END;
-}
-
-static void
-sm1_read_version(struct shader_translator *tx)
-{
- const DWORD tok = TOKEN_NEXT(tx);
-
- tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
- tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
-
- switch (tok >> 16) {
- case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break;
- case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break;
- default:
- DBG("Invalid shader type: %x\n", tok);
- tx->processor = ~0;
- break;
- }
-}
-
-/* This is just to check if we parsed the instruction properly. */
-static void
-sm1_parse_get_skip(struct shader_translator *tx)
-{
- const DWORD tok = TOKEN_PEEK(tx);
-
- if (tx->version.major >= 2) {
- tx->parse_next = tx->parse + 1 /* this */ +
- ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
- } else {
- tx->parse_next = NULL; /* TODO: determine from param count */
- }
-}
-
-static void
-sm1_print_comment(const char *comment, UINT size)
-{
- if (!size)
- return;
- /* TODO */
-}
-
-static void
-sm1_parse_comments(struct shader_translator *tx, BOOL print)
-{
- DWORD tok = TOKEN_PEEK(tx);
-
- while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
- {
- const char *comment = "";
- UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
- tx->parse += size + 1;
-
- if (print)
- sm1_print_comment(comment, size);
-
- tok = TOKEN_PEEK(tx);
- }
-}
-
-static void
-sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
-{
- *reg = TOKEN_NEXT(tx);
-
- if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
- {
- if (tx->version.major < 2)
- *rel = (1 << 31) |
- ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
- ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
- D3DSP_NOSWIZZLE;
- else
- *rel = TOKEN_NEXT(tx);
- }
-}
-
-static void
-sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
-{
- int8_t shift;
- dst->file =
- (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
- (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
- dst->type = TGSI_RETURN_TYPE_FLOAT;
- dst->idx = tok & D3DSP_REGNUM_MASK;
- dst->rel = NULL;
- dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
- dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
- shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
- dst->shift = (shift & 0x7) - (shift & 0x8);
-}
-
-static void
-sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
-{
- src->file =
- ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
- ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
- src->type = TGSI_RETURN_TYPE_FLOAT;
- src->idx = tok & D3DSP_REGNUM_MASK;
- src->rel = NULL;
- src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
- src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
-
- switch (src->file) {
- case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
- case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
- case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
- default:
- break;
- }
-}
-
-static void
-sm1_parse_immediate(struct shader_translator *tx,
- struct sm1_src_param *imm)
-{
- imm->file = NINED3DSPR_IMMEDIATE;
- imm->idx = INT_MIN;
- imm->rel = NULL;
- imm->swizzle = NINED3DSP_NOSWIZZLE;
- imm->mod = 0;
- switch (tx->insn.opcode) {
- case D3DSIO_DEF:
- imm->type = NINED3DSPTYPE_FLOAT4;
- memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
- tx->parse += 4;
- break;
- case D3DSIO_DEFI:
- imm->type = NINED3DSPTYPE_INT4;
- memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
- tx->parse += 4;
- break;
- case D3DSIO_DEFB:
- imm->type = NINED3DSPTYPE_BOOL;
- memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
- tx->parse += 1;
- break;
- default:
- assert(0);
- break;
- }
-}
-
-static void
-sm1_read_dst_param(struct shader_translator *tx,
- struct sm1_dst_param *dst,
- struct sm1_src_param *rel)
-{
- DWORD tok_dst, tok_rel = 0;
-
- sm1_parse_get_param(tx, &tok_dst, &tok_rel);
- sm1_parse_dst_param(dst, tok_dst);
- if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
- sm1_parse_src_param(rel, tok_rel);
- dst->rel = rel;
- }
-}
-
-static void
-sm1_read_src_param(struct shader_translator *tx,
- struct sm1_src_param *src,
- struct sm1_src_param *rel)
-{
- DWORD tok_src, tok_rel = 0;
-
- sm1_parse_get_param(tx, &tok_src, &tok_rel);
- sm1_parse_src_param(src, tok_src);
- if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
- assert(rel);
- sm1_parse_src_param(rel, tok_rel);
- src->rel = rel;
- }
-}
-
-static void
-sm1_read_semantic(struct shader_translator *tx,
- struct sm1_semantic *sem)
-{
- const DWORD tok_usg = TOKEN_NEXT(tx);
- const DWORD tok_dst = TOKEN_NEXT(tx);
-
- sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
- sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
- sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
-
- sm1_parse_dst_param(&sem->reg, tok_dst);
-}
-
-static void
-sm1_parse_instruction(struct shader_translator *tx)
-{
- struct sm1_instruction *insn = &tx->insn;
- HRESULT hr;
- DWORD tok;
- const struct sm1_op_info *info = NULL;
- unsigned i;
-
- sm1_parse_comments(tx, true);
- sm1_parse_get_skip(tx);
-
- tok = TOKEN_NEXT(tx);
-
- insn->opcode = tok & D3DSI_OPCODE_MASK;
- insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
- insn->coissue = !!(tok & D3DSI_COISSUE);
- insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
-
- if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) {
- int k = tx->op_info_map[insn->opcode];
- if (k >= 0) {
- assert(k < ARRAY_SIZE(inst_table));
- info = &inst_table[k];
- }
- } else {
- if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
- if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
- }
- if (!info) {
- DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
- TOKEN_JUMP(tx);
- return;
- }
- insn->info = info;
- insn->ndst = info->ndst;
- insn->nsrc = info->nsrc;
-
- /* check version */
- {
- unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
- unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
- unsigned ver = (tx->version.major << 8) | tx->version.minor;
- if (ver < min || ver > max) {
- DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
- min, ver, max);
- return;
- }
- }
-
- for (i = 0; i < insn->ndst; ++i)
- sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
- if (insn->predicated)
- sm1_read_src_param(tx, &insn->pred, NULL);
- for (i = 0; i < insn->nsrc; ++i)
- sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
-
- /* parse here so we can dump them before processing */
- if (insn->opcode == D3DSIO_DEF ||
- insn->opcode == D3DSIO_DEFI ||
- insn->opcode == D3DSIO_DEFB)
- sm1_parse_immediate(tx, &tx->insn.src[0]);
-
- sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
- sm1_instruction_check(insn);
-
- if (insn->predicated) {
- tx->predicated_activated = true;
- if (ureg_dst_is_undef(tx->regs.predicate_tmp)) {
- tx->regs.predicate_tmp = ureg_DECL_temporary(tx->ureg);
- tx->regs.predicate_dst = ureg_DECL_temporary(tx->ureg);
- }
- }
-
- if (info->handler)
- hr = info->handler(tx);
- else
- hr = NineTranslateInstruction_Generic(tx);
- tx_apply_dst0_modifiers(tx);
-
- if (insn->predicated) {
- tx->predicated_activated = false;
- /* TODO: predicate might be allowed on outputs,
- * which cannot be src. Workaround it. */
- ureg_CMP(tx->ureg, tx->regs.predicate_dst,
- ureg_negate(tx_src_param(tx, &insn->pred)),
- ureg_src(tx->regs.predicate_tmp),
- ureg_src(tx->regs.predicate_dst));
- }
-
- if (hr != D3D_OK)
- tx->failure = true;
- tx->num_scratch = 0; /* reset */
-
- TOKEN_JUMP(tx);
-}
-
-#define GET_CAP(n) screen->caps.n
-#define GET_SHADER_CAP(n) screen->shader_caps[info->type].n
-
-static HRESULT
-tx_ctor(struct shader_translator *tx, struct pipe_screen *screen, struct nine_shader_info *info)
-{
- unsigned i;
-
- memset(tx, 0, sizeof(*tx));
-
- tx->info = info;
-
- tx->byte_code = info->byte_code;
- tx->parse = info->byte_code;
-
- for (i = 0; i < ARRAY_SIZE(info->input_map); ++i)
- info->input_map[i] = NINE_DECLUSAGE_NONE;
- info->num_inputs = 0;
-
- info->position_t = false;
- info->point_size = false;
-
- memset(tx->slots_used, 0, sizeof(tx->slots_used));
- memset(info->int_slots_used, 0, sizeof(info->int_slots_used));
- memset(info->bool_slots_used, 0, sizeof(info->bool_slots_used));
-
- tx->info->const_float_slots = 0;
- tx->info->const_int_slots = 0;
- tx->info->const_bool_slots = 0;
-
- info->sampler_mask = 0x0;
- info->rt_mask = 0x0;
-
- info->lconstf.data = NULL;
- info->lconstf.ranges = NULL;
-
- info->bumpenvmat_needed = 0;
-
- for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) {
- tx->regs.rL[i] = ureg_dst_undef();
- }
- tx->regs.address = ureg_dst_undef();
- tx->regs.a0 = ureg_dst_undef();
- tx->regs.p = ureg_dst_undef();
- tx->regs.oDepth = ureg_dst_undef();
- tx->regs.vPos = ureg_src_undef();
- tx->regs.vFace = ureg_src_undef();
- for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i)
- tx->regs.o[i] = ureg_dst_undef();
- for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i)
- tx->regs.oCol[i] = ureg_dst_undef();
- for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i)
- tx->regs.vC[i] = ureg_src_undef();
- for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i)
- tx->regs.vT[i] = ureg_src_undef();
-
- sm1_read_version(tx);
-
- info->version = (tx->version.major << 4) | tx->version.minor;
-
- tx->num_outputs = 0;
-
- create_op_info_map(tx);
-
- tx->ureg = ureg_create(info->type);
- if (!tx->ureg) {
- return E_OUTOFMEMORY;
- }
-
- tx->native_integers = GET_SHADER_CAP(integers);
- tx->inline_subroutines = !GET_SHADER_CAP(subroutines);
- tx->want_texcoord = GET_CAP(tgsi_texcoord);
- tx->shift_wpos = !GET_CAP(fs_coord_pixel_center_integer);
- tx->texcoord_sn = tx->want_texcoord ?
- TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
- tx->wpos_is_sysval = GET_CAP(fs_position_is_sysval);
- tx->face_is_sysval_integer = GET_CAP(fs_face_is_integer_sysval);
- tx->no_vs_window_space = !GET_CAP(vs_window_space_position);
- tx->mul_zero_wins = GET_CAP(legacy_math_rules);
-
- if (info->emulate_features) {
- tx->shift_wpos = true;
- tx->no_vs_window_space = true;
- tx->mul_zero_wins = false;
- }
-
- if (IS_VS) {
- tx->num_constf_allowed = NINE_MAX_CONST_F;
- } else if (tx->version.major < 2) {/* IS_PS v1 */
- tx->num_constf_allowed = 8;
- } else if (tx->version.major == 2) {/* IS_PS v2 */
- tx->num_constf_allowed = 32;
- } else {/* IS_PS v3 */
- tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
- }
-
- if (tx->version.major < 2) {
- tx->num_consti_allowed = 0;
- tx->num_constb_allowed = 0;
- } else {
- tx->num_consti_allowed = NINE_MAX_CONST_I;
- tx->num_constb_allowed = NINE_MAX_CONST_B;
- }
-
- if (info->swvp_on) {
- /* TODO: The values tx->version.major == 1 */
- tx->num_constf_allowed = 8192;
- tx->num_consti_allowed = 2048;
- tx->num_constb_allowed = 2048;
- }
-
- /* VS must always write position. Declare it here to make it the 1st output.
- * (Some drivers like nv50 are buggy and rely on that.)
- */
- if (IS_VS) {
- tx->regs.oPos_out = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
- } else {
- ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
- if (!tx->shift_wpos)
- ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
- }
-
- if (tx->mul_zero_wins)
- ureg_property(tx->ureg, TGSI_PROPERTY_LEGACY_MATH_RULES, 1);
-
- /* Add additional definition of constants */
- if (info->add_constants_defs.c_combination) {
- unsigned i;
-
- assert(info->add_constants_defs.int_const_added);
- assert(info->add_constants_defs.bool_const_added);
- /* We only add constants that are used by the shader
- * and that are not defined in the shader */
- for (i = 0; i < NINE_MAX_CONST_I; ++i) {
- if ((*info->add_constants_defs.int_const_added)[i]) {
- DBG("Defining const i%i : { %i %i %i %i }\n", i,
- info->add_constants_defs.c_combination->const_i[i][0],
- info->add_constants_defs.c_combination->const_i[i][1],
- info->add_constants_defs.c_combination->const_i[i][2],
- info->add_constants_defs.c_combination->const_i[i][3]);
- tx_set_lconsti(tx, i, info->add_constants_defs.c_combination->const_i[i]);
- }
- }
- for (i = 0; i < NINE_MAX_CONST_B; ++i) {
- if ((*info->add_constants_defs.bool_const_added)[i]) {
- DBG("Defining const b%i : %i\n", i, (int)(info->add_constants_defs.c_combination->const_b[i] != 0));
- tx_set_lconstb(tx, i, info->add_constants_defs.c_combination->const_b[i]);
- }
- }
- }
- return D3D_OK;
-}
-
-static void
-tx_dtor(struct shader_translator *tx)
-{
- if (tx->slot_map)
- FREE(tx->slot_map);
- if (tx->num_inst_labels)
- FREE(tx->inst_labels);
- FREE(tx->lconstf);
- FREE(tx->regs.r);
- FREE(tx);
-}
-
-/* CONST[0].xyz = width/2, -height/2, zmax-zmin
- * CONST[1].xyz = x+width/2, y+height/2, zmin */
-static void
-shader_add_vs_viewport_transform(struct shader_translator *tx)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_src c0 = ureg_src_register(TGSI_FILE_CONSTANT, 0);
- struct ureg_src c1 = ureg_src_register(TGSI_FILE_CONSTANT, 1);
- /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/
-
- c0 = ureg_src_dimension(c0, 4);
- c1 = ureg_src_dimension(c1, 4);
- /* TODO: find out when we need to apply the viewport transformation or not.
- * Likely will be XYZ vs XYZRHW in vdecl_out
- * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0);
- * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1);
- */
- ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos));
-}
-
-static void
-shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_dst dst_col, struct ureg_src src_col)
-{
- struct ureg_program *ureg = tx->ureg;
- struct ureg_src fog_end, fog_coeff, fog_density, fog_params;
- struct ureg_src fog_vs, fog_color;
- struct ureg_dst fog_factor, depth;
-
- if (!tx->info->fog_enable) {
- ureg_MOV(ureg, dst_col, src_col);
- return;
- }
-
- if (tx->info->fog_mode != D3DFOG_NONE) {
- depth = tx_scratch_scalar(tx);
- if (tx->info->zfog)
- ureg_MOV(ureg, depth, ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_Z));
- else /* wfog: use w. position's w contains 1/w */
- ureg_RCP(ureg, depth, ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_W));
- }
-
- fog_color = nine_special_constant_src(tx, 12);
- fog_params = nine_special_constant_src(tx, 13);
- fog_factor = tx_scratch_scalar(tx);
-
- if (tx->info->fog_mode == D3DFOG_LINEAR) {
- fog_end = NINE_APPLY_SWIZZLE(fog_params, X);
- fog_coeff = NINE_APPLY_SWIZZLE(fog_params, Y);
- ureg_ADD(ureg, fog_factor, fog_end, ureg_negate(ureg_src(depth)));
- ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
- } else if (tx->info->fog_mode == D3DFOG_EXP) {
- fog_density = NINE_APPLY_SWIZZLE(fog_params, X);
- ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density);
- ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
- ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
- } else if (tx->info->fog_mode == D3DFOG_EXP2) {
- fog_density = NINE_APPLY_SWIZZLE(fog_params, X);
- ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density);
- ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
- ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
- ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
- } else {
- fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 16,
- TGSI_INTERPOLATE_PERSPECTIVE),
- TGSI_SWIZZLE_X);
- ureg_MOV(ureg, fog_factor, fog_vs);
- }
-
- ureg_LRP(ureg, ureg_writemask(dst_col, TGSI_WRITEMASK_XYZ),
- tx_src_scalar(fog_factor), src_col, fog_color);
- ureg_MOV(ureg, ureg_writemask(dst_col, TGSI_WRITEMASK_W), src_col);
-}
-
-static void
-shader_add_ps_alpha_test_stage(struct shader_translator *tx, struct ureg_src src_color)
-{
- struct ureg_program *ureg = tx->ureg;
- unsigned cmp_op;
- struct ureg_src src[2];
- struct ureg_dst tmp = tx_scratch(tx);
- if (tx->info->alpha_test_emulation == PIPE_FUNC_ALWAYS)
- return;
- if (tx->info->alpha_test_emulation == PIPE_FUNC_NEVER) {
- ureg_KILL(ureg);
- return;
- }
- cmp_op = pipe_comp_to_tgsi_opposite(tx->info->alpha_test_emulation);
- src[0] = ureg_scalar(src_color, TGSI_SWIZZLE_W); /* Read color alpha channel */
- src[1] = ureg_scalar(nine_special_constant_src(tx, 14), TGSI_SWIZZLE_X); /* Read alphatest */
- ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
- ureg_KILL_IF(tx->ureg, ureg_negate(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X))); /* if opposite test passes, discard */
-}
-
-static void parse_shader(struct shader_translator *tx)
-{
- struct nine_shader_info *info = tx->info;
-
- while (!sm1_parse_eof(tx) && !tx->failure)
- sm1_parse_instruction(tx);
- tx->parse++; /* for byte_size */
-
- if (tx->failure)
- return;
-
- if (IS_PS) {
- struct ureg_dst oCol0 = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 0);
- struct ureg_dst tmp_oCol0;
- if (tx->version.major < 3) {
- tmp_oCol0 = ureg_DECL_temporary(tx->ureg);
- if (tx->version.major < 2) {
- assert(tx->num_temp); /* there must be color output */
- info->rt_mask |= 0x1;
- shader_add_ps_fog_stage(tx, tmp_oCol0, ureg_src(tx->regs.r[0]));
- } else {
- shader_add_ps_fog_stage(tx, tmp_oCol0, ureg_src(tx->regs.oCol[0]));
- }
- } else {
- assert(!ureg_dst_is_undef(tx->regs.oCol[0]));
- tmp_oCol0 = tx->regs.oCol[0];
- }
- shader_add_ps_alpha_test_stage(tx, ureg_src(tmp_oCol0));
- ureg_MOV(tx->ureg, oCol0, ureg_src(tmp_oCol0));
- }
-
- if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
- tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16);
- ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
- }
-
- if (info->position_t) {
- if (tx->no_vs_window_space) {
- ERR("POSITIONT is not yet implemented for your device.\n");
- } else {
- ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, true);
- }
- }
-
- if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
- struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
- ureg_MAX(tx->ureg, ureg_writemask(tx->regs.oPts, TGSI_WRITEMASK_X), ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
- ureg_MIN(tx->ureg, ureg_writemask(oPts, TGSI_WRITEMASK_X), ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
- info->point_size = true;
- } else if (IS_VS && tx->always_output_pointsize) {
- struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
- ureg_MOV(tx->ureg, ureg_writemask(oPts, TGSI_WRITEMASK_X), nine_special_constant_src(tx, 8));
- info->point_size = true;
- }
-
- if (IS_VS && tx->info->clip_plane_emulation > 0) {
- struct ureg_dst clipdist[2] = {ureg_dst_undef(), ureg_dst_undef()};
- int num_clipdist = ffs(tx->info->clip_plane_emulation);
- int i;
- /* TODO: handle undefined channels of oPos (w is not always written to I think. default is 1) *
- * Note in d3d9 it's not possible to output clipvert, so we don't need to check
- * for its existence */
- clipdist[0] = ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_CLIPDIST, 0, ((1 << num_clipdist) - 1) & 0xf, 0, 1);
- if (num_clipdist >= 5)
- clipdist[1] = ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_CLIPDIST, 1, ((1 << (num_clipdist - 4)) - 1) & 0xf, 0, 1);
- ureg_property(tx->ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED, num_clipdist);
- for (i = 0; i < num_clipdist; i++) {
- assert(!ureg_dst_is_undef(clipdist[i>>2]));
- if (!(tx->info->clip_plane_emulation & (1 << i)))
- ureg_MOV(tx->ureg, ureg_writemask(clipdist[i>>2], 1 << (i & 0x2)), ureg_imm1f(tx->ureg, 0.f));
- else
- ureg_DP4(tx->ureg, ureg_writemask(clipdist[i>>2], 1 << (i & 0x2)),
- ureg_src(tx->regs.oPos), nine_special_constant_src(tx, i));
- }
-
- ureg_MOV(tx->ureg, tx->regs.oPos_out, ureg_src(tx->regs.oPos));
- }
-
- if (info->process_vertices)
- shader_add_vs_viewport_transform(tx);
-
- ureg_END(tx->ureg);
-}
-
-#define NINE_SHADER_DEBUG_OPTION_NO_NIR_VS (1 << 2)
-#define NINE_SHADER_DEBUG_OPTION_NO_NIR_PS (1 << 3)
-#define NINE_SHADER_DEBUG_OPTION_DUMP_NIR (1 << 4)
-#define NINE_SHADER_DEBUG_OPTION_DUMP_TGSI (1 << 5)
-
-static const struct debug_named_value nine_shader_debug_options[] = {
- { "no_nir_vs", NINE_SHADER_DEBUG_OPTION_NO_NIR_VS, "Never use NIR for vertex shaders even if the driver prefers it." },
- { "no_nir_ps", NINE_SHADER_DEBUG_OPTION_NO_NIR_PS, "Never use NIR for pixel shaders even if the driver prefers it." },
- { "dump_nir", NINE_SHADER_DEBUG_OPTION_DUMP_NIR, "Print translated NIR shaders." },
- { "dump_tgsi", NINE_SHADER_DEBUG_OPTION_DUMP_TGSI, "Print TGSI shaders." },
- DEBUG_NAMED_VALUE_END /* must be last */
-};
-
-static inline bool
-nine_shader_get_debug_flag(uint64_t flag)
-{
- static uint64_t flags = 0;
- static bool first_run = true;
-
- if (unlikely(first_run)) {
- first_run = false;
- flags = debug_get_flags_option("NINE_SHADER", nine_shader_debug_options, 0);
-
- // Check old TGSI dump envvar too
- if (debug_get_bool_option("NINE_TGSI_DUMP", false)) {
- flags |= NINE_SHADER_DEBUG_OPTION_DUMP_TGSI;
- }
- }
-
- return !!(flags & flag);
-}
-
-static void
-nine_pipe_nir_shader_state_from_tgsi(struct pipe_shader_state *state, const struct tgsi_token *tgsi_tokens,
- struct pipe_screen *screen)
-{
- struct nir_shader *nir = tgsi_to_nir(tgsi_tokens, screen, screen->get_disk_shader_cache != NULL);
-
- if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_NIR))) {
- nir_print_shader(nir, stdout);
- }
-
- state->type = PIPE_SHADER_IR_NIR;
- state->tokens = NULL;
- state->ir.nir = nir;
- memset(&state->stream_output, 0, sizeof(state->stream_output));
-}
-
-static void *
-nine_ureg_create_shader(struct ureg_program *ureg,
- struct pipe_context *pipe,
- const struct pipe_stream_output_info *so)
-{
- struct pipe_shader_state state;
- const struct tgsi_token *tgsi_tokens;
- struct pipe_screen *screen = pipe->screen;
-
- tgsi_tokens = ureg_finalize(ureg);
- if (!tgsi_tokens)
- return NULL;
-
- assert(((struct tgsi_header *) &tgsi_tokens[0])->HeaderSize >= 2);
- enum pipe_shader_type shader_type = ((struct tgsi_processor *) &tgsi_tokens[1])->Processor;
-
- bool use_nir = true;
-
- /* Allow user to override preferred IR, this is very useful for debugging */
- if (unlikely(shader_type == PIPE_SHADER_VERTEX && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_VS)))
- use_nir = false;
- if (unlikely(shader_type == PIPE_SHADER_FRAGMENT && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_PS)))
- use_nir = false;
-
- DUMP("shader type: %s, selected IR: %s\n",
- shader_type == PIPE_SHADER_VERTEX ? "VS" : "PS",
- use_nir ? "NIR" : "TGSI");
-
- if (use_nir) {
- nine_pipe_nir_shader_state_from_tgsi(&state, tgsi_tokens, screen);
- } else {
- pipe_shader_state_from_tgsi(&state, tgsi_tokens);
- }
-
- assert(state.tokens || state.ir.nir);
-
- if (so)
- state.stream_output = *so;
-
- state.report_compile_error = false;
-
- switch (shader_type) {
- case PIPE_SHADER_VERTEX:
- return pipe->create_vs_state(pipe, &state);
- case PIPE_SHADER_FRAGMENT:
- return pipe->create_fs_state(pipe, &state);
- default:
- unreachable("unsupported shader type");
- }
-}
-
-
-void *
-nine_create_shader_with_so_and_destroy(struct ureg_program *p,
- struct pipe_context *pipe,
- const struct pipe_stream_output_info *so)
-{
- void *result = nine_ureg_create_shader(p, pipe, so);
- ureg_destroy(p);
- return result;
-}
-
-HRESULT
-nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe)
-{
- struct shader_translator *tx;
- HRESULT hr = D3D_OK;
- const unsigned processor = info->type;
- struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen;
- unsigned *const_ranges = NULL;
-
- user_assert(processor != ~0, D3DERR_INVALIDCALL);
-
- tx = MALLOC_STRUCT(shader_translator);
- if (!tx)
- return E_OUTOFMEMORY;
-
- info->emulate_features = device->driver_caps.shader_emulate_features;
-
- if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
- hr = E_OUTOFMEMORY;
- goto out;
- }
- tx->always_output_pointsize = device->driver_caps.always_output_pointsize;
-
- assert(IS_VS || !info->swvp_on);
-
- if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
- hr = D3DERR_INVALIDCALL;
- DBG("Unsupported shader version: %u.%u !\n",
- tx->version.major, tx->version.minor);
- goto out;
- }
- if (tx->processor != processor) {
- hr = D3DERR_INVALIDCALL;
- DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
- goto out;
- }
- DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
- tx->version.major, tx->version.minor);
-
- parse_shader(tx);
-
- if (tx->failure) {
- /* For VS shaders, we print the warning later,
- * we first try with swvp. */
- if (IS_PS)
- ERR("Encountered buggy shader\n");
- ureg_destroy(tx->ureg);
- hr = D3DERR_INVALIDCALL;
- goto out;
- }
-
- /* Recompile after compacting constant slots if possible */
- if (!tx->indirect_const_access && !info->swvp_on && tx->num_slots > 0) {
- unsigned *slot_map;
- unsigned c;
- int i, j, num_ranges, prev;
-
- DBG("Recompiling shader for constant compaction\n");
- ureg_destroy(tx->ureg);
-
- if (tx->num_inst_labels)
- FREE(tx->inst_labels);
- FREE(tx->lconstf);
- FREE(tx->regs.r);
-
- num_ranges = 0;
- prev = -2;
- for (i = 0; i < NINE_MAX_CONST_ALL_VS; i++) {
- if (tx->slots_used[i]) {
- if (prev != i - 1)
- num_ranges++;
- prev = i;
- }
- }
- slot_map = MALLOC(NINE_MAX_CONST_ALL_VS * sizeof(unsigned));
- const_ranges = CALLOC(num_ranges + 1, 2 * sizeof(unsigned)); /* ranges stop when last is of size 0 */
- if (!slot_map || !const_ranges) {
- hr = E_OUTOFMEMORY;
- goto out;
- }
- c = 0;
- j = -1;
- prev = -2;
- for (i = 0; i < NINE_MAX_CONST_ALL_VS; i++) {
- if (tx->slots_used[i]) {
- if (prev != i - 1)
- j++;
- /* Initialize first slot of the range */
- if (!const_ranges[2*j+1])
- const_ranges[2*j] = i;
- const_ranges[2*j+1]++;
- prev = i;
- slot_map[i] = c++;
- }
- }
-
- if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
- hr = E_OUTOFMEMORY;
- goto out;
- }
- tx->always_output_pointsize = device->driver_caps.always_output_pointsize;
- tx->slot_map = slot_map;
- parse_shader(tx);
- assert(!tx->failure);
-#if !defined(NDEBUG)
- i = 0;
- j = 0;
- while (const_ranges[i*2+1] != 0) {
- j += const_ranges[i*2+1];
- i++;
- }
- assert(j == tx->num_slots);
-#endif
- }
-
- /* record local constants */
- if (tx->num_lconstf && tx->indirect_const_access) {
- struct nine_range *ranges;
- float *data;
- int *indices;
- unsigned i, k, n;
-
- hr = E_OUTOFMEMORY;
-
- data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
- if (!data)
- goto out;
- info->lconstf.data = data;
-
- indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
- if (!indices)
- goto out;
-
- /* lazy sort, num_lconstf should be small */
- for (n = 0; n < tx->num_lconstf; ++n) {
- for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
- if (tx->lconstf[i].idx < tx->lconstf[k].idx)
- k = i;
- }
- indices[n] = tx->lconstf[k].idx;
- memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float));
- tx->lconstf[k].idx = INT_MAX;
- }
-
- /* count ranges */
- for (n = 1, i = 1; i < tx->num_lconstf; ++i)
- if (indices[i] != indices[i - 1] + 1)
- ++n;
- ranges = MALLOC(n * sizeof(ranges[0]));
- if (!ranges) {
- FREE(indices);
- goto out;
- }
- info->lconstf.ranges = ranges;
-
- k = 0;
- ranges[k].bgn = indices[0];
- for (i = 1; i < tx->num_lconstf; ++i) {
- if (indices[i] != indices[i - 1] + 1) {
- ranges[k].next = &ranges[k + 1];
- ranges[k].end = indices[i - 1] + 1;
- ++k;
- ranges[k].bgn = indices[i];
- }
- }
- ranges[k].end = indices[i - 1] + 1;
- ranges[k].next = NULL;
- assert(n == (k + 1));
-
- FREE(indices);
- hr = D3D_OK;
- }
-
- /* r500 */
- if (info->const_float_slots > device->max_vs_const_f &&
- (info->const_int_slots || info->const_bool_slots) &&
- !info->swvp_on)
- ERR("Overlapping constant slots. The shader is likely to be buggy\n");
-
-
- if (tx->indirect_const_access) { /* vs only */
- info->const_float_slots = device->max_vs_const_f;
- tx->num_slots = MAX2(tx->num_slots, device->max_vs_const_f);
- }
-
- if (!info->swvp_on) {
- info->const_used_size = sizeof(float[4]) * tx->num_slots;
- if (tx->num_slots)
- ureg_DECL_constant2D(tx->ureg, 0, tx->num_slots-1, 0);
- } else {
- ureg_DECL_constant2D(tx->ureg, 0, 4095, 0);
- ureg_DECL_constant2D(tx->ureg, 0, 4095, 1);
- ureg_DECL_constant2D(tx->ureg, 0, 2047, 2);
- ureg_DECL_constant2D(tx->ureg, 0, 511, 3);
- }
-
- if (info->process_vertices)
- ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */
-
- if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_TGSI))) {
- const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, NULL);
- tgsi_dump(toks, 0);
- ureg_free_tokens(toks);
- }
-
- if (info->process_vertices) {
- NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out,
- tx->output_info,
- tx->num_outputs,
- &(info->so));
- info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so));
- } else
- info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, NULL);
- if (!info->cso) {
- hr = D3DERR_DRIVERINTERNALERROR;
- FREE(info->lconstf.data);
- FREE(info->lconstf.ranges);
- goto out;
- }
-
- info->const_ranges = const_ranges;
- const_ranges = NULL;
- info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
-out:
- if (const_ranges)
- FREE(const_ranges);
- tx_dtor(tx);
- return hr;
-}