//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Wed Jul 10 12:41:20 2013 (1373485280)
// Cuda compilation tools, release 5.5, V5.5.0
//

.version 3.2
.target sm_30
.address_size 64

	.file	1 "C:/ame2014Iupdate/releases/2014.03/external/adobe/Iridas/IRIDASLIB/GPU/IRIDASPrimary.cu", 1405709992, 6569
	.file	2 "c:\\ame2014iupdate\\releases\\2014.03\\external\\adobe\\mediacore\\external\\3rdparty\\nvidia\\cuda\\win\\include\\device_functions.h", 1405710127, 191626
.global .texref texture0_RECT;
.global .texref texture2_2D;
// ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local has been demoted
.global .align 1 .b8 $str[11] = {95, 95, 67, 85, 68, 65, 95, 70, 84, 90, 0};

.visible .entry ShaderKernel_IRIDASPrimary(
	.param .u64 ShaderKernel_IRIDASPrimary_param_0,
	.param .u32 ShaderKernel_IRIDASPrimary_param_1,
	.param .u32 ShaderKernel_IRIDASPrimary_param_2,
	.param .u32 ShaderKernel_IRIDASPrimary_param_3,
	.param .u32 ShaderKernel_IRIDASPrimary_param_4,
	.param .u64 ShaderKernel_IRIDASPrimary_param_5,
	.param .u64 ShaderKernel_IRIDASPrimary_param_6,
	.param .u64 ShaderKernel_IRIDASPrimary_param_7,
	.param .u64 ShaderKernel_IRIDASPrimary_param_8
)
{
	.reg .pred 	%p<40>;
	.reg .s16 	%rs<5>;
	.reg .s32 	%r<32>;
	.reg .f32 	%f<360>;
	.reg .s64 	%rd<24>;
	// demoted variable
	.shared .align 16 .b8 ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local[192];

	ld.param.u64 	%rd4, [ShaderKernel_IRIDASPrimary_param_0];
	ld.param.u32 	%r4, [ShaderKernel_IRIDASPrimary_param_1];
	ld.param.u32 	%r5, [ShaderKernel_IRIDASPrimary_param_2];
	ld.param.u32 	%r6, [ShaderKernel_IRIDASPrimary_param_3];
	ld.param.u32 	%r7, [ShaderKernel_IRIDASPrimary_param_4];
	ld.param.u64 	%rd6, [ShaderKernel_IRIDASPrimary_param_5];
	ld.param.u64 	%rd5, [ShaderKernel_IRIDASPrimary_param_8];
	cvta.to.global.u64 	%rd1, %rd5;
	cvta.to.global.u64 	%rd2, %rd6;
	.loc 1 64 1
	mov.u32 	%r8, %ntid.x;
	mov.u32 	%r9, %ctaid.x;
	mov.u32 	%r1, %tid.x;
	mad.lo.s32 	%r2, %r8, %r9, %r1;
	mov.u32 	%r10, %ntid.y;
	mov.u32 	%r11, %ctaid.y;
	mov.u32 	%r12, %tid.y;
	mad.lo.s32 	%r3, %r10, %r11, %r12;
	.loc 1 64 1
	setp.lt.s32	%p1, %r2, %r6;
	setp.lt.s32	%p2, %r3, %r7;
	and.pred  	%p3, %p1, %p2;
	.loc 1 64 1
	@!%p3 bra 	BB0_61;
	bra.uni 	BB0_1;

BB0_1:
	.loc 1 64 1
	cvt.rn.f32.s32	%f151, %r2;
	add.ftz.f32 	%f1, %f151, 0f3F000000;
	cvt.rn.f32.s32	%f152, %r3;
	add.ftz.f32 	%f2, %f152, 0f3F000000;
	.loc 1 64 1
	setp.gt.u32	%p4, %r1, 11;
	@%p4 bra 	BB0_3;

	.loc 1 64 1
	mul.wide.u32 	%rd7, %r1, 16;
	mov.u64 	%rd8, ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local;
	add.s64 	%rd9, %rd8, %rd7;
	add.s64 	%rd10, %rd2, %rd7;
	ld.global.v4.f32 	{%f153, %f154, %f155, %f156}, [%rd10];
	st.shared.v4.f32 	[%rd9], {%f153, %f154, %f155, %f156};

BB0_3:
	.loc 1 64 1
	bar.sync 	0;
	.loc 1 64 105
	// inline asm
	tex.2d.v4.f32.f32 {%f161, %f162, %f163, %f164}, [texture0_RECT, {%f1, %f2}];
	// inline asm
	.loc 1 64 1
	ld.shared.v4.f32 	{%f167, %f168, %f169, %f170}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local];
	mul.ftz.f32 	%f171, %f162, %f168;
	fma.rn.ftz.f32 	%f172, %f163, %f167, %f171;
	fma.rn.ftz.f32 	%f10, %f161, %f169, %f172;
	.loc 1 64 1
	sub.ftz.f32 	%f11, %f163, %f10;
	sub.ftz.f32 	%f12, %f162, %f10;
	sub.ftz.f32 	%f13, %f161, %f10;
	.loc 1 64 1
	ld.global.u32 	%r13, [%rd1];
	setp.eq.s32	%p5, %r13, 0;
	.loc 1 64 105
	mov.f32 	%f359, %f161;
	mov.f32 	%f358, %f162;
	mov.f32 	%f357, %f163;
	.loc 1 64 1
	@%p5 bra 	BB0_44;

	.loc 1 64 1
	fma.rn.ftz.f32 	%f177, %f10, 0f3F7FC000, 0f3A000000;
	mov.f32 	%f178, 0f3D800000;
	.loc 1 64 152
	// inline asm
	tex.2d.v4.f32.f32 {%f173, %f174, %f175, %f176}, [texture2_2D, {%f177, %f178}];
	// inline asm
	.loc 1 64 1
	ld.shared.f32 	%f179, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+16];
	.loc 1 64 1
	fma.rn.ftz.f32 	%f180, %f179, %f11, %f10;
	fma.rn.ftz.f32 	%f181, %f179, %f12, %f10;
	fma.rn.ftz.f32 	%f182, %f179, %f13, %f10;
	.loc 1 64 1
	ld.global.u32 	%r14, [%rd1+4];
	setp.eq.s32	%p6, %r14, 0;
	.loc 1 64 1
	ld.shared.v4.f32 	{%f183, %f184, %f185, %f186}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+80];
	.loc 1 64 1
	ld.shared.v4.f32 	{%f188, %f189, %f190, %f191}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+32];
	.loc 1 64 1
	fma.rn.ftz.f32 	%f339, %f180, %f188, %f183;
	fma.rn.ftz.f32 	%f340, %f181, %f189, %f184;
	fma.rn.ftz.f32 	%f341, %f182, %f190, %f185;
	.loc 1 64 1
	@%p6 bra 	BB0_15;

	.loc 1 64 1
	setp.lt.ftz.f32	%p7, %f339, 0f00000000;
	selp.f32	%f21, 0fBF800000, 0f3F800000, %p7;
	setp.lt.ftz.f32	%p8, %f340, 0f00000000;
	selp.f32	%f22, 0fBF800000, 0f3F800000, %p8;
	setp.lt.ftz.f32	%p9, %f341, 0f00000000;
	selp.f32	%f23, 0fBF800000, 0f3F800000, %p9;
	.loc 2 2750 10
	abs.ftz.f32 	%f24, %f340;
	abs.ftz.f32 	%f25, %f341;
	.loc 1 64 1
	ld.shared.f32 	%f26, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+128];
	.loc 2 2750 10
	abs.ftz.f32 	%f27, %f339;
	.loc 1 64 1
	setp.gtu.ftz.f32	%p10, %f27, 0f00000000;
	@%p10 bra 	BB0_7;

	mov.f32 	%f336, 0f00000000;
	bra.uni 	BB0_8;

BB0_7:
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f198, %f27;
	mul.ftz.f32 	%f199, %f26, %f198;
	ex2.approx.ftz.f32 	%f336, %f199;

BB0_8:
	.loc 1 64 1
	ld.shared.f32 	%f30, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+132];
	setp.gtu.ftz.f32	%p11, %f24, 0f00000000;
	@%p11 bra 	BB0_10;

	mov.f32 	%f337, 0f00000000;
	bra.uni 	BB0_11;

BB0_10:
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f201, %f24;
	mul.ftz.f32 	%f202, %f30, %f201;
	ex2.approx.ftz.f32 	%f337, %f202;

BB0_11:
	.loc 1 64 1
	ld.shared.f32 	%f33, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+136];
	setp.gtu.ftz.f32	%p12, %f25, 0f00000000;
	@%p12 bra 	BB0_13;

	mov.f32 	%f338, 0f00000000;
	bra.uni 	BB0_14;

BB0_13:
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f204, %f25;
	mul.ftz.f32 	%f205, %f33, %f204;
	ex2.approx.ftz.f32 	%f338, %f205;

BB0_14:
	.loc 1 64 1
	mul.ftz.f32 	%f339, %f336, %f21;
	mul.ftz.f32 	%f340, %f337, %f22;
	mul.ftz.f32 	%f341, %f338, %f23;

BB0_15:
	.loc 1 64 1
	ld.global.u32 	%r15, [%rd1+28];
	setp.eq.s32	%p13, %r15, 0;
	@%p13 bra 	BB0_17;

	.loc 1 64 1
	ld.shared.v4.f32 	{%f206, %f207, %f208, %f209}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local];
	.loc 1 64 1
	mul.ftz.f32 	%f211, %f340, %f207;
	fma.rn.ftz.f32 	%f213, %f339, %f206, %f211;
	fma.rn.ftz.f32 	%f215, %f341, %f208, %f213;
	.loc 1 64 1
	sub.ftz.f32 	%f216, %f339, %f215;
	sub.ftz.f32 	%f217, %f340, %f215;
	sub.ftz.f32 	%f218, %f341, %f215;
	.loc 1 64 1
	ld.shared.f32 	%f219, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+176];
	.loc 1 64 1
	fma.rn.ftz.f32 	%f339, %f219, %f216, %f215;
	fma.rn.ftz.f32 	%f340, %f219, %f217, %f215;
	fma.rn.ftz.f32 	%f341, %f219, %f218, %f215;

BB0_17:
	.loc 1 64 1
	mul.ftz.f32 	%f48, %f173, %f339;
	mul.ftz.f32 	%f49, %f173, %f340;
	mul.ftz.f32 	%f50, %f173, %f341;
	.loc 1 64 1
	ld.shared.f32 	%f220, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+20];
	.loc 1 64 1
	fma.rn.ftz.f32 	%f221, %f220, %f11, %f10;
	fma.rn.ftz.f32 	%f222, %f220, %f12, %f10;
	fma.rn.ftz.f32 	%f223, %f220, %f13, %f10;
	.loc 1 64 1
	ld.global.u32 	%r16, [%rd1+8];
	setp.eq.s32	%p14, %r16, 0;
	.loc 1 64 1
	ld.shared.v4.f32 	{%f224, %f225, %f226, %f227}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+96];
	.loc 1 64 1
	ld.shared.v4.f32 	{%f229, %f230, %f231, %f232}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+48];
	.loc 1 64 1
	fma.rn.ftz.f32 	%f345, %f221, %f229, %f224;
	fma.rn.ftz.f32 	%f346, %f222, %f230, %f225;
	fma.rn.ftz.f32 	%f347, %f223, %f231, %f226;
	.loc 1 64 1
	@%p14 bra 	BB0_28;

	.loc 1 64 1
	setp.lt.ftz.f32	%p15, %f345, 0f00000000;
	selp.f32	%f54, 0fBF800000, 0f3F800000, %p15;
	setp.lt.ftz.f32	%p16, %f346, 0f00000000;
	selp.f32	%f55, 0fBF800000, 0f3F800000, %p16;
	setp.lt.ftz.f32	%p17, %f347, 0f00000000;
	selp.f32	%f56, 0fBF800000, 0f3F800000, %p17;
	.loc 2 2750 10
	abs.ftz.f32 	%f57, %f346;
	abs.ftz.f32 	%f58, %f347;
	.loc 1 64 1
	ld.shared.f32 	%f59, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+144];
	.loc 2 2750 10
	abs.ftz.f32 	%f60, %f345;
	.loc 1 64 1
	setp.gtu.ftz.f32	%p18, %f60, 0f00000000;
	@%p18 bra 	BB0_20;

	mov.f32 	%f342, 0f00000000;
	bra.uni 	BB0_21;

BB0_20:
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f239, %f60;
	mul.ftz.f32 	%f240, %f59, %f239;
	ex2.approx.ftz.f32 	%f342, %f240;

BB0_21:
	.loc 1 64 1
	ld.shared.f32 	%f63, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+148];
	setp.gtu.ftz.f32	%p19, %f57, 0f00000000;
	@%p19 bra 	BB0_23;

	mov.f32 	%f343, 0f00000000;
	bra.uni 	BB0_24;

BB0_23:
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f242, %f57;
	mul.ftz.f32 	%f243, %f63, %f242;
	ex2.approx.ftz.f32 	%f343, %f243;

BB0_24:
	.loc 1 64 1
	ld.shared.f32 	%f66, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+152];
	setp.gtu.ftz.f32	%p20, %f58, 0f00000000;
	@%p20 bra 	BB0_26;

	mov.f32 	%f344, 0f00000000;
	bra.uni 	BB0_27;

BB0_26:
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f245, %f58;
	mul.ftz.f32 	%f246, %f66, %f245;
	ex2.approx.ftz.f32 	%f344, %f246;

BB0_27:
	.loc 1 64 1
	mul.ftz.f32 	%f345, %f342, %f54;
	mul.ftz.f32 	%f346, %f343, %f55;
	mul.ftz.f32 	%f347, %f344, %f56;

BB0_28:
	.loc 1 64 1
	ld.global.u32 	%r17, [%rd1+32];
	setp.eq.s32	%p21, %r17, 0;
	@%p21 bra 	BB0_30;

	.loc 1 64 1
	ld.shared.v4.f32 	{%f247, %f248, %f249, %f250}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local];
	.loc 1 64 1
	mul.ftz.f32 	%f252, %f346, %f248;
	fma.rn.ftz.f32 	%f254, %f345, %f247, %f252;
	fma.rn.ftz.f32 	%f256, %f347, %f249, %f254;
	.loc 1 64 1
	sub.ftz.f32 	%f257, %f345, %f256;
	sub.ftz.f32 	%f258, %f346, %f256;
	sub.ftz.f32 	%f259, %f347, %f256;
	.loc 1 64 1
	ld.shared.f32 	%f260, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+180];
	.loc 1 64 1
	fma.rn.ftz.f32 	%f345, %f260, %f257, %f256;
	fma.rn.ftz.f32 	%f346, %f260, %f258, %f256;
	fma.rn.ftz.f32 	%f347, %f260, %f259, %f256;

BB0_30:
	.loc 1 64 1
	fma.rn.ftz.f32 	%f81, %f174, %f345, %f48;
	fma.rn.ftz.f32 	%f82, %f174, %f346, %f49;
	fma.rn.ftz.f32 	%f83, %f174, %f347, %f50;
	.loc 1 64 1
	ld.shared.f32 	%f261, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+24];
	.loc 1 64 1
	fma.rn.ftz.f32 	%f262, %f261, %f11, %f10;
	fma.rn.ftz.f32 	%f263, %f261, %f12, %f10;
	fma.rn.ftz.f32 	%f264, %f261, %f13, %f10;
	.loc 1 64 1
	ld.global.u32 	%r18, [%rd1+12];
	setp.eq.s32	%p22, %r18, 0;
	.loc 1 64 1
	ld.shared.v4.f32 	{%f265, %f266, %f267, %f268}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+112];
	.loc 1 64 1
	ld.shared.v4.f32 	{%f270, %f271, %f272, %f273}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+64];
	.loc 1 64 1
	fma.rn.ftz.f32 	%f351, %f262, %f270, %f265;
	fma.rn.ftz.f32 	%f352, %f263, %f271, %f266;
	fma.rn.ftz.f32 	%f353, %f264, %f272, %f267;
	.loc 1 64 1
	@%p22 bra 	BB0_41;

	.loc 1 64 1
	setp.lt.ftz.f32	%p23, %f351, 0f00000000;
	selp.f32	%f87, 0fBF800000, 0f3F800000, %p23;
	setp.lt.ftz.f32	%p24, %f352, 0f00000000;
	selp.f32	%f88, 0fBF800000, 0f3F800000, %p24;
	setp.lt.ftz.f32	%p25, %f353, 0f00000000;
	selp.f32	%f89, 0fBF800000, 0f3F800000, %p25;
	.loc 2 2750 10
	abs.ftz.f32 	%f90, %f352;
	abs.ftz.f32 	%f91, %f353;
	.loc 1 64 1
	ld.shared.f32 	%f92, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+160];
	.loc 2 2750 10
	abs.ftz.f32 	%f93, %f351;
	.loc 1 64 1
	setp.gtu.ftz.f32	%p26, %f93, 0f00000000;
	@%p26 bra 	BB0_33;

	mov.f32 	%f348, 0f00000000;
	bra.uni 	BB0_34;

BB0_33:
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f280, %f93;
	mul.ftz.f32 	%f281, %f92, %f280;
	ex2.approx.ftz.f32 	%f348, %f281;

BB0_34:
	.loc 1 64 1
	ld.shared.f32 	%f96, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+164];
	setp.gtu.ftz.f32	%p27, %f90, 0f00000000;
	@%p27 bra 	BB0_36;

	mov.f32 	%f349, 0f00000000;
	bra.uni 	BB0_37;

BB0_36:
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f283, %f90;
	mul.ftz.f32 	%f284, %f96, %f283;
	ex2.approx.ftz.f32 	%f349, %f284;

BB0_37:
	.loc 1 64 1
	ld.shared.f32 	%f99, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+168];
	setp.gtu.ftz.f32	%p28, %f91, 0f00000000;
	@%p28 bra 	BB0_39;

	mov.f32 	%f350, 0f00000000;
	bra.uni 	BB0_40;

BB0_39:
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f286, %f91;
	mul.ftz.f32 	%f287, %f99, %f286;
	ex2.approx.ftz.f32 	%f350, %f287;

BB0_40:
	.loc 1 64 1
	mul.ftz.f32 	%f351, %f348, %f87;
	mul.ftz.f32 	%f352, %f349, %f88;
	mul.ftz.f32 	%f353, %f350, %f89;

BB0_41:
	.loc 1 64 1
	ld.global.u32 	%r19, [%rd1+36];
	setp.eq.s32	%p29, %r19, 0;
	@%p29 bra 	BB0_43;

	.loc 1 64 1
	ld.shared.v4.f32 	{%f288, %f289, %f290, %f291}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local];
	.loc 1 64 1
	mul.ftz.f32 	%f293, %f352, %f289;
	fma.rn.ftz.f32 	%f295, %f351, %f288, %f293;
	fma.rn.ftz.f32 	%f297, %f353, %f290, %f295;
	.loc 1 64 1
	sub.ftz.f32 	%f298, %f351, %f297;
	sub.ftz.f32 	%f299, %f352, %f297;
	sub.ftz.f32 	%f300, %f353, %f297;
	.loc 1 64 1
	ld.shared.f32 	%f301, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+184];
	.loc 1 64 1
	fma.rn.ftz.f32 	%f351, %f301, %f298, %f297;
	fma.rn.ftz.f32 	%f352, %f301, %f299, %f297;
	fma.rn.ftz.f32 	%f353, %f301, %f300, %f297;

BB0_43:
	.loc 1 64 1
	fma.rn.ftz.f32 	%f357, %f175, %f351, %f81;
	fma.rn.ftz.f32 	%f358, %f175, %f352, %f82;
	fma.rn.ftz.f32 	%f359, %f175, %f353, %f83;
	bra.uni 	BB0_58;

BB0_44:
	.loc 1 64 1
	ld.global.u32 	%r20, [%rd1+16];
	setp.eq.s32	%p30, %r20, 0;
	@%p30 bra 	BB0_58;

	.loc 1 64 1
	ld.shared.f32 	%f302, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+16];
	.loc 1 64 1
	fma.rn.ftz.f32 	%f303, %f302, %f11, %f10;
	fma.rn.ftz.f32 	%f304, %f302, %f12, %f10;
	fma.rn.ftz.f32 	%f305, %f302, %f13, %f10;
	.loc 1 64 1
	ld.global.u32 	%r21, [%rd1+20];
	setp.eq.s32	%p31, %r21, 0;
	.loc 1 64 1
	ld.shared.v4.f32 	{%f306, %f307, %f308, %f309}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+80];
	.loc 1 64 1
	ld.shared.v4.f32 	{%f311, %f312, %f313, %f314}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+32];
	.loc 1 64 1
	fma.rn.ftz.f32 	%f357, %f303, %f311, %f306;
	fma.rn.ftz.f32 	%f358, %f304, %f312, %f307;
	fma.rn.ftz.f32 	%f359, %f305, %f313, %f308;
	.loc 1 64 1
	@%p31 bra 	BB0_56;

	.loc 1 64 1
	setp.lt.ftz.f32	%p32, %f357, 0f00000000;
	selp.f32	%f120, 0fBF800000, 0f3F800000, %p32;
	setp.lt.ftz.f32	%p33, %f358, 0f00000000;
	selp.f32	%f121, 0fBF800000, 0f3F800000, %p33;
	setp.lt.ftz.f32	%p34, %f359, 0f00000000;
	selp.f32	%f122, 0fBF800000, 0f3F800000, %p34;
	.loc 2 2750 10
	abs.ftz.f32 	%f123, %f358;
	abs.ftz.f32 	%f124, %f359;
	.loc 1 64 1
	ld.shared.f32 	%f125, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+128];
	.loc 2 2750 10
	abs.ftz.f32 	%f126, %f357;
	.loc 1 64 1
	setp.gtu.ftz.f32	%p35, %f126, 0f00000000;
	@%p35 bra 	BB0_48;

	mov.f32 	%f354, 0f00000000;
	bra.uni 	BB0_49;

BB0_48:
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f321, %f126;
	mul.ftz.f32 	%f322, %f125, %f321;
	ex2.approx.ftz.f32 	%f354, %f322;

BB0_49:
	.loc 1 64 1
	ld.shared.f32 	%f129, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+132];
	setp.gtu.ftz.f32	%p36, %f123, 0f00000000;
	@%p36 bra 	BB0_51;

	mov.f32 	%f355, 0f00000000;
	bra.uni 	BB0_52;

BB0_51:
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f324, %f123;
	mul.ftz.f32 	%f325, %f129, %f324;
	ex2.approx.ftz.f32 	%f355, %f325;

BB0_52:
	.loc 1 64 1
	ld.shared.f32 	%f132, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+136];
	setp.gtu.ftz.f32	%p37, %f124, 0f00000000;
	@%p37 bra 	BB0_54;

	mov.f32 	%f356, 0f00000000;
	bra.uni 	BB0_55;

BB0_54:
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f327, %f124;
	mul.ftz.f32 	%f328, %f132, %f327;
	ex2.approx.ftz.f32 	%f356, %f328;

BB0_55:
	.loc 1 64 1
	mul.ftz.f32 	%f357, %f354, %f120;
	mul.ftz.f32 	%f358, %f355, %f121;
	mul.ftz.f32 	%f359, %f356, %f122;

BB0_56:
	.loc 1 64 1
	ld.global.u32 	%r22, [%rd1+24];
	setp.eq.s32	%p38, %r22, 0;
	@%p38 bra 	BB0_58;

	.loc 1 64 1
	mul.ftz.f32 	%f329, %f358, %f168;
	fma.rn.ftz.f32 	%f330, %f357, %f167, %f329;
	fma.rn.ftz.f32 	%f331, %f359, %f169, %f330;
	.loc 1 64 1
	sub.ftz.f32 	%f332, %f357, %f331;
	sub.ftz.f32 	%f333, %f358, %f331;
	sub.ftz.f32 	%f334, %f359, %f331;
	.loc 1 64 1
	ld.shared.f32 	%f335, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+176];
	.loc 1 64 1
	fma.rn.ftz.f32 	%f357, %f335, %f332, %f331;
	fma.rn.ftz.f32 	%f358, %f335, %f333, %f331;
	fma.rn.ftz.f32 	%f359, %f335, %f334, %f331;

BB0_58:
	.loc 1 64 1
	mad.lo.s32 	%r31, %r3, %r4, %r2;
	.loc 1 64 1
	cvt.s64.s32	%rd3, %r31;
	.loc 1 64 1
	setp.eq.s32	%p39, %r5, 0;
	@%p39 bra 	BB0_60;

	cvta.to.global.u64 	%rd18, %rd4;
	.loc 1 64 1
	shl.b64 	%rd19, %rd3, 4;
	add.s64 	%rd20, %rd18, %rd19;
	st.global.v4.f32 	[%rd20], {%f359, %f358, %f357, %f164};
	bra.uni 	BB0_61;

BB0_60:
	cvta.to.global.u64 	%rd21, %rd4;
	.loc 1 64 1
	shl.b64 	%rd22, %rd3, 3;
	add.s64 	%rd23, %rd21, %rd22;
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f359;
	mov.b16 	%rs1, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f358;
	mov.b16 	%rs2, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f357;
	mov.b16 	%rs3, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f164;
	mov.b16 	%rs4, %temp;
}
	.loc 1 64 241
	st.global.v4.u16 	[%rd23], {%rs1, %rs2, %rs3, %rs4};

BB0_61:
	.loc 1 64 2
	ret;
}


