Search Unity

From Surface Shader to CG

Discussion in 'Shaders' started by AcidArrow, Oct 11, 2012.

  1. AcidArrow

    AcidArrow

    Joined:
    May 20, 2010
    Posts:
    11,799
    So, I started trying to optimize one of my surface shaders. I thought since I have some (really basic) understanding of CG, I would port it to CG and then I'll hopefully have better performance on iOS.

    The shader I started with is this:

    Code (csharp):
    1.  
    2. Shader "Reflective/Diffuse_Custom_Fresnel_Texture" {
    3. Properties {
    4.     _MainTex ("Base (RGB) RefStrength (A)", 2D) = "white" {}
    5.     _Cube ("Reflection Cubemap", Cube) = "_Skybox" { TexGen CubeReflect }
    6. }
    7. SubShader {
    8.     LOD 200
    9.     Tags { "RenderType"="Opaque" }
    10.    
    11. CGPROGRAM
    12. #pragma surface surf Lambert approxview noambient
    13. sampler2D _MainTex;
    14. samplerCUBE _Cube;
    15.  
    16.  
    17. struct Input {
    18.     half2 uv_MainTex;
    19.     half3 worldRefl;
    20.     half3 viewDir;
    21. };
    22.  
    23. void surf (Input IN, inout SurfaceOutput o) {
    24.     fixed4 tex = tex2D(_MainTex, IN.uv_MainTex);
    25.     fixed3 reflcol = texCUBE (_Cube, IN.worldRefl);
    26.     fixed rim = 1.1 - dot (IN.viewDir, o.Normal);
    27.     o.Albedo = tex.rgb;
    28.     o.Emission = reflcol.rgb * rim * tex.a;
    29. }
    30. ENDCG
    31. }
    32.    
    33. FallBack "Reflective/VertexLit"
    34. }
    35.  
    I wanted it to work with lightmaps, have a cubemap, and a pseudo-fresnel.

    So I started working on the CG version. After a while where everything was pink, I finally got it to work!

    Here is the shader:

    Code (csharp):
    1. Shader "Reflective/Diffuse_Custom_Fresnel_CG" {
    2.  
    3. Properties {
    4.     _MainTex ("Base (RGB) RefStrength (A)", 2D) = "white" {}
    5.     _Cube ("Reflection Cubemap", Cube) = "_Skybox" { TexGen CubeReflect }
    6. }
    7. SubShader {
    8.     LOD 200
    9.     Tags { "RenderType"="Opaque" "Queue"="Geometry" }
    10.         Pass {
    11.    
    12.                 CGPROGRAM
    13.                
    14.                 #pragma vertex vert approxview
    15.                 #pragma fragment frag
    16.                 #include "UnityCG.cginc"
    17.                
    18.                 struct appdata {
    19.                     fixed4 vertex : POSITION;
    20.                     fixed3 normal : NORMAL;
    21.                 };
    22.                
    23.                 struct v2f {
    24.                     fixed4 pos : SV_POSITION;
    25.                     fixed3 normalDir : TEXCOORD0;
    26.                     fixed2 uv : TEXCOORD2;
    27.                     fixed3 viewDir : TEXCOORD1;
    28.                     fixed2 uv2 : TEXCOORD3;
    29.                     fixed3 color : COLOR;
    30.                 };
    31.                
    32.                 uniform fixed4 _MainTex_ST;
    33.                 uniform sampler2D _MainTex;
    34.                 uniform samplerCUBE _Cube;      
    35.                 uniform half4 unity_LightmapST;
    36.                 uniform sampler2D unity_Lightmap;
    37.                
    38.                 v2f vert (appdata_full v) {
    39.                     v2f o;
    40.                     o.pos = mul (UNITY_MATRIX_MVP, v.vertex);              
    41.                     fixed3 viewDir = normalize(ObjSpaceViewDir(v.vertex));
    42.                     fixed dotProduct = (1.1 - dot(viewDir, v.normal));
    43.                     fixed4x4 modelMatrix = _Object2World;
    44.                     fixed4x4 modelMatrixInverse = _World2Object;  
    45.                     o.viewDir = fixed3(mul(modelMatrix, v.vertex) - fixed4(_WorldSpaceCameraPos, 1.0));
    46.                     o.normalDir = normalize(fixed3(mul(fixed4(v.normal, 0.0), modelMatrixInverse)));
    47.                     o.uv = v.texcoord1.xy * unity_LightmapST.xy + unity_LightmapST.zw;
    48.                     o.color = dotProduct;
    49.                     o.uv2 = TRANSFORM_TEX(v.texcoord, _MainTex);                    
    50.                     return o;
    51.                 }
    52.                
    53.                 fixed4 frag(v2f i) : COLOR {
    54.                     fixed4 texcol = tex2D(_MainTex, i.uv2);
    55.                     fixed3 reflectedDir = reflect(i.viewDir, normalize(i.normalDir));
    56.                     fixed4 refcol = texCUBE(_Cube, reflectedDir);
    57.                     refcol *= texcol.a;
    58.                     texcol.rgb *= DecodeLightmap(tex2D(unity_Lightmap, i.uv));
    59.                     refcol.rgb *= i.color.rgb;
    60.                     texcol += refcol;
    61.                     return texcol;
    62.                 }
    63.                
    64.             ENDCG
    65.         }
    66.     }
    67. }
    This one looks *almost* identical to the first one (the way the cubemap is placed it subtly different), but it performs worse (I got an extra 2ms on a scene where this shader was covering 1/3 of the screen).

    And this is why I'm here. I'm obviously out of my depth here. I did a lot of looking up on wiki and CG examples to manage to write this shader, I understand the basics of the code and what does what, but I have no clue on why it is performing worse. I may be doing something fundamentally wrong that I cannot understand right now. I even looked at the compiled GLes output and the shaders were similar. Actually the second one used less temp vars so it should be more efficient, right? The main difference was in how the cubemap was applied, which makes think that this line is suspect:

    fixed3 reflectedDir = reflect(i.viewDir, normalize(i.normalDir));

    But I don't know, as I said I am clearly out of my depth. How can I optimize this shader (for iOS)?
     
  2. brn

    brn

    Joined:
    Feb 8, 2011
    Posts:
    320
    fixed3 reflectedDir = reflect(i.viewDir, normalize(i.normalDir));

    Id start by moving the above line to the vertex shader. your doing a hell of allot of calcs in that one line.
    you will also save a texcoord by doing it. you are also normalizing a few things multiple times which is something you want to avoid.

    you are also converting some matrices to fixed precision in the vertex shader. I would avoid doing that because its probably costing you more than the calculation itself.

    Also Id pass the Uv's from the vertex shader to the fragment shader as half2's. Ive found fixed precision to be slower than Halfs in the vertex shader.

    Sometimes depending on what needs to be converted keeping some things as floats in the vertex shader is also faster.

    I think you will find you get a significant speed increase once you have fixed those few things.

    Edit: Actually looking at the shader there are other approaches you could take,but they are diverting from the idea of optimising what you have already. There are some less correct but faster ways of achieving some of the effects.
     
    Last edited: Oct 11, 2012
  3. Martin-Kraus

    Martin-Kraus

    Joined:
    Feb 18, 2011
    Posts:
    617
    For shader optimization on iOS I would recommend to get the compiled GLSL shader from Unity and analyse it with PVRShaman from Imagination Technologies (http://www.imgtec.com/powervr/insider/powervr-pvrshaman.asp ), which shows how many GPU cycles each line approximately requires.
     
  4. William Squire

    William Squire

    Joined:
    Jun 29, 2013
    Posts:
    3