<?xml version="1.0" encoding="UTF-8"?>
<!--
Copyright © 2020 Google, Inc.

Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice (including the next
paragraph) shall be included in all copies or substantial portions of the
Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
 -->

<isa>

<!--
	Cat7 Instructions:  barrier, cache, sleep instructions
 -->

<bitset name="#instruction-cat7" extends="#instruction">
	<pattern low="0"  high="31">xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx</pattern>
	<pattern low="32" high="43">xxxxxxxxxxxx</pattern>
	<field   pos="44"          name="SS" type="bool" display="(ss)"/>
	<field   pos="59"          name="JP" type="bool" display="(jp)"/>
	<field   pos="60"          name="SY" type="bool" display="(sy)"/>
	<pattern low="61" high="63">111</pattern>  <!-- cat7 -->
</bitset>

<bitset name="#instruction-cat7-barrier" extends="#instruction-cat7">
	<display>
		{SY}{SS}{JP}{NAME}{G}{L}{R}{W}
	</display>
	<pattern low="45" high="50">x1xxxx</pattern>
	<field   pos="51"          name="W"  type="bool" display=".w"  />  <!-- write -->
	<field   pos="52"          name="R"  type="bool" display=".r"  />  <!-- read -->
	<field   pos="53"          name="L"  type="bool" display=".l"  />  <!-- local -->
	<field   pos="54"          name="G"  type="bool" display=".g"  />  <!-- global -->
	<encode>
		<map name="W">src->cat7.w</map>
		<map name="R">src->cat7.r</map>
		<map name="L">src->cat7.l</map>
		<map name="G">src->cat7.g</map>
	</encode>
</bitset>

<bitset name="bar" extends="#instruction-cat7-barrier">
	<pattern low="55" high="58">0000</pattern>
</bitset>

<bitset name="fence" extends="#instruction-cat7-barrier">
	<pattern low="55" high="58">0001</pattern>
</bitset>

<enum name="#dccln-type">
	<value val="0" display=".shr"/>
	<value val="1" display=".all"/>
</enum>

<bitset name="#instruction-cat7-data" extends="#instruction-cat7">
	<display>
		{SY}{SS}{JP}{NAME}{TYPE}
	</display>
	<pattern low="45" high="50">xxxxxx</pattern>
	<field   pos="51" 		   name="TYPE" type="#dccln-type"/>
	<pattern low="52" high="54">xx0</pattern>
	<encode>
		<!-- TODO: read handle type -->
		<map name="TYPE">1</map>
	</encode>
</bitset>

<enum name="#sleep-duration">
	<value val="0" display=".s">
		<doc>
			Short sleep
		</doc>
	</value>
	<value val="1" display=".l">
		<doc>
			Long sleep, around 20x longer than short
		</doc>
	</value>
</enum>

<bitset name="sleep" extends="#instruction-cat7">
	<doc>
		Short/Long Sleep
		TODO: how is it different from a bunch of nops?
	</doc>
	<display>
		{SY}{SS}{JP}{NAME}{DURATION}
	</display>
	<pattern low="45" high="50">xxxxxx</pattern>
	<field   pos="51"          name="DURATION" type="#sleep-duration"/>
	<pattern low="52" high="54">xxx</pattern>
	<pattern low="55" high="58">0010</pattern>
	<encode>
		<!-- TODO: read duration -->
		<map name="DURATION">1</map>
	</encode>
</bitset>

<bitset name="icinv" extends="#instruction-cat7">
	<doc>
		Seem to be Instruction Cache Invalidate, supported by the fact
		that it considerably slows shader execution compared to
		data cache instructions.
	</doc>
	<display>
		{SY}{SS}{JP}{NAME}
	</display>
	<pattern low="45" high="54">xxxxxxxxxx</pattern>
	<pattern low="55" high="58">0011</pattern>
</bitset>

<bitset name="dccln" extends="#instruction-cat7-data">
	<doc>
		Data (Cache?) Clean
	</doc>
	<pattern low="55" high="58">0100</pattern>
</bitset>

<bitset name="dcinv" extends="#instruction-cat7-data">
	<doc>
		Data (Cache?) Invalidate
	</doc>
	<pattern low="55" high="58">0101</pattern>
</bitset>

<bitset name="dcflu" extends="#instruction-cat7-data">
	<doc>
		Data (Cache?) Flush
	</doc>
	<pattern low="55" high="58">0110</pattern>
</bitset>

<bitset name="ccinv" extends="#instruction-cat7">
	<doc>
		_Presumably_ invalidates workgroup-wide cache for image/buffer data access.
		So while "fence" is enough to synchronize data access inside a workgroup,
		for cross-workgroup synchronization we have to invalidate that cache.
	</doc>
	<gen min="700"/>
	<display>
		{SY}{SS}{JP}{NAME}
	</display>
	<pattern low="45" high="50">x1xxxx</pattern>
	<pattern low="51" high="54">1010</pattern>
	<pattern low="55" high="58">0101</pattern>
</bitset>

<bitset name="lock" extends="#instruction-cat7">
	<doc>
		Are met at the end of compute shader:
			(sy)(jp)lock;
			unlock;
			end ;
	</doc>
	<gen min="700"/>
	<display>
		{SY}{SS}{JP}{NAME}
	</display>
	<pattern low="45" high="54">1000010000</pattern>
	<pattern low="55" high="58">0111</pattern>
</bitset>

<bitset name="unlock" extends="#instruction-cat7">
	<gen min="700"/>
	<display>
		{SY}{SS}{JP}{NAME}
	</display>
	<pattern low="45" high="54">1001010000</pattern>
	<pattern low="55" high="58">0111</pattern>
</bitset>

<bitset name="#alias-immed-src" size="32">
	<override>
		<expr>
			{SRC_TYPE} == 0 /* b16 */
		</expr>
		<display>
			h({IMMED})
		</display>
		<field name="IMMED" low="0" high="15" type="float"/>
	</override>
	<override>
		<expr>
			{SRC_TYPE} == 1 /* b32 */
		</expr>
		<display>
			({IMMED})
		</display>
		<field name="IMMED" low="0" high="31" type="float"/>
	</override>

	<display>
		{IMMED}
	</display>

	<field name="IMMED" low="0" high="31" type="uint"/>
	<encode type="struct ir3_register *">
		<map name="IMMED">extract_reg_uim(src)</map>
	</encode>
</bitset>

<bitset name="#alias-const-src" size="11">
	<display>
		{HALF}{CONST}
	</display>
	<field name="CONST" low="0" high="10" type="#reg-const"/>
	<derived name="HALF" type="bool" display="h">
		<expr>
			({SRC_TYPE} == 0) /* b16 */
		</expr>
	</derived>
	<encode type="struct ir3_register *">
		<map name="CONST">src</map>
	</encode>
</bitset>

<bitset name="#alias-gpr-src" size="8">
	<display>
		{HALF}{SRC}
	</display>
	<field name="SRC" low="0" high="7" type="#reg-gpr"/>
	<derived name="HALF" type="bool" display="h">
		<expr>
			({SRC_TYPE} == 0) /* b16 */
		</expr>
	</derived>
	<encode type="struct ir3_register *">
		<map name="SRC">src</map>
	</encode>
</bitset>

<enum name="#alias-scope">
	<doc>
		TODO: Yes, something is wrong here, needs to be tested.
	</doc>
	<value val="0" display="tex"/>
	<value val="2" display="tex"/>
	<value val="3" display="rt"/>
	<value val="4" display="mem"/>
</enum>

<enum name="#alias-src-type">
	<doc>
		These types are clearly used by the blob.
		However, it seems that there may be f32/f16/i32/i16
		types but they are interwind with the _scope_ bitfield.
		TODO: Check if it does matter.
	</doc>
	<value val="0" display="b16"/>
	<value val="1" display="b32"/>
</enum>

<enum name="#alias-src-reg-type">
	<value val="0" display="GPR"/>
	<value val="1" display="CONST"/>
	<value val="2" display="IMMED"/>
</enum>

<bitset name="alias" extends="#instruction">
	<doc>
		Add an entry to the scope-specific "alias table", when instruction
		from that scope tries to access a source register it would search
		its alias table first.

		This allows to reduce the amount of data passed around when reading
		immediates/constants and reduce register pressure. In addition,
		the alias table could be populated in the preamble further reducing
		the amount of instructions being run.

		Used like this:
			alias.tex.b32.1 r40.x, (-1.456763);
			alias.tex.b32.0 r40.y, (0.056702);
			gather4g.s2en.mode6.base0 (f32)(xyzw)r0.x, r40.x, 1;
		Or this:
			alias.tex.b32.0 r2.y, c1.w;
			isam.s2en.mode6.base0.1d (f32)(xyzw)r46.z, r2.y, 0;
			(sy)stib.f32.2d.4.mode4.base0 r46.z, r2.y, 1;

		Notice the lack of nops between alias and the instruction
		that uses it.
	</doc>
	<gen min="700"/>
	<display>
		{SY}{SS}{JP}{NAME}.{SCOPE}.{SRC_TYPE}.{UNK} {DST}, {SRC}
	</display>

	<override>
		<expr>{SRC_REG_TYPE} == 0</expr>
		<field name="SRC" low="0" high="7" type="#alias-gpr-src">
			<param name="SRC_TYPE"/>
		</field>
		<pattern low="8" high="31">000000000000000000000000</pattern>
	</override>
	<override>
		<expr>{SRC_REG_TYPE} == 1</expr>
		<field name="SRC" low="0" high="10" type="#alias-const-src">
			<param name="SRC_TYPE"/>
		</field>
		<pattern low="11" high="31">000000000000000000000</pattern>
	</override>

	<field name="SRC" low="0" high="31" type="#alias-immed-src">
		<param name="SRC_TYPE"/>
	</field>
	<field   low="32" high="39" name="DST" type="#reg-gpr"/>
	<field   low="40" high="43" name="UNK" type="uint"/>
	<field   pos="44"           name="SS" type="bool" display="(ss)"/>
	<pattern low="45" high="46">xx</pattern>
	<field   low="47" high="49" name="SCOPE" type="#alias-scope"/>
	<field   low="50" high="50" name="SRC_TYPE" type="#alias-src-type"/>
	<field   low="51" high="52" name="SRC_REG_TYPE" type="#alias-src-reg-type"/>
	<pattern low="53" high="54">1x</pattern>
	<pattern low="55" high="58">1000</pattern> <!-- OPC -->
	<field   pos="59"           name="JP" type="bool" display="(jp)"/>
	<field   pos="60"           name="SY" type="bool" display="(sy)"/>
	<pattern low="61" high="63">111</pattern>  <!-- cat7 -->
	<encode>
		<map name="SRC">src->srcs[0]</map>
		<map name="SRC_REG_TYPE">(src->srcs[0]->flags &amp; IR3_REG_CONST) ? 1 : ((src->srcs[0]->flags &amp; IR3_REG_IMMED) ? 2 : 0)</map>
		<map name="SRC_TYPE">1</map> <!-- TODO -->
		<map name="UNK">extract_reg_uim(src->srcs[1])</map>
		<map name="SCOPE">src->cat7.alias_scope</map>
	</encode>
</bitset>

</isa>