Search Unity

Texture Gaussian Blur shader

Discussion in 'Shaders' started by fra3point, Oct 20, 2016.

  1. fra3point

    fra3point

    Joined:
    Aug 20, 2012
    Posts:
    269
    Hi, shaders gurus!

    I'm working on a project named "Mesh2Bump", a small utility to convert a flat high-detailed 3D model into a bump map. For this purpose, I need to apply a small amount of blur on a runtime-generated texture to smooth the result.

    Since I don't like to work with the SetPixels functions because they are slow,
    I need a gaussian blur shader to be used in a Blit() operation to blur the input texture by a given amount.

    Could you help me?
     
  2. Gistix

    Gistix

    Joined:
    Jul 21, 2013
    Posts:
    29
    I suppose this project may have everything that you would need.
     
    fra3point likes this.
  3. fra3point

    fra3point

    Joined:
    Aug 20, 2012
    Posts:
    269
    That example was great, but it uses a kind of multi tap linear sampling.
    What I need is the same of photoshop's (or GIMP's) gaussian blur.
    It allows to specify a in pixels, starting from 1px.

    I think I should use a convolution algorhitm with a gaussian kernel, but I have no idea on how to use it inside a shader.

     
  4. bgolus

    bgolus

    Joined:
    Dec 7, 2012
    Posts:
    12,352
    There are a ton of examples across the internet of doing gaussian (or gaussian-like) blurs of many kinds. Down sampled multi-pass separable linear sampling used by that example is essentially the "best" from an optimization standpoint if you want to get large blurs quickly. With older hardware doing high quality dynamically sized blurs was difficult since you couldn't easily do dynamically sized loops, but with DX11 hardware it's pretty easy, so if you don't care about speed that much you can calculate the blur kernel weights and number of taps needed and set those on the material dynamically.

    If you absolutely need DX9 support you pretty much just make one shader that does a fixed number of "taps" that you scale the range on with a known maximum pixel range (usually the number of taps, though some people will go to 2x that) and run it multiple times for blurs larger. Unity's older MobileBlur.shader does just that.
     
    Alic and fra3point like this.
  5. fra3point

    fra3point

    Joined:
    Aug 20, 2012
    Posts:
    269
    @bgolus I don't need speed since this is an Editor system and it's executed only a few times, so dynamic loops won't be a big problem.

    As now I wrote this simple convolution shader. It just has a static-ish gaussian kernel with the relative offsets as 5x5 matrices.

    Code (csharp):
    1. Shader "Blur" {
    2.     Properties{
    3.         _MainTex("Base (RGB)", 2D) = "white" { }
    4.     }
    5.         SubShader{
    6.  
    7.         ZTest Always Cull Off ZWrite Off Fog{ Mode Off }
    8.  
    9.         Pass{
    10.         CGPROGRAM
    11.  
    12.     #pragma vertex vert
    13.     #pragma fragment frag
    14.     #include "UnityCG.cginc"
    15.  
    16.         sampler2D _MainTex;
    17.         float4 _MainTex_TexelSize;
    18.         float step_w;
    19.         float step_h;
    20.  
    21.     struct v2f {
    22.         float4  pos : SV_POSITION;
    23.         float2  uv : TEXCOORD0;
    24.     };
    25.  
    26.     float4 _MainTex_ST;
    27.     float4 _MainTex_ST_TexelSize;
    28.  
    29.    
    30.  
    31.  
    32.     v2f vert(appdata_base v) {
    33.         v2f o;
    34.         o.pos = mul(UNITY_MATRIX_MVP, v.vertex);
    35.         o.uv = TRANSFORM_TEX(v.texcoord, _MainTex);
    36.         return o;
    37.     }
    38.  
    39.     fixed4 frag(v2f i) : COLOR{
    40.         step_w = _MainTex_TexelSize.x;
    41.         step_h = _MainTex_TexelSize.y;
    42.  
    43.         float2 offset[25] = {
    44.             float2(-step_w*2.0, -step_h*2.0), float2(-step_w, -step_h*2.0),  float2(0.0, -step_h*2.0), float2(step_w, -step_h*2.0), float2(step_w*2.0, -step_h*2.0),
    45.             float2(-step_w*2.0, -step_h),     float2(-step_w, -step_h),      float2(0.0, -step_h),     float2(step_w, -step_h),     float2(step_w*2.0, -step_h),
    46.             float2(-step_w*2.0, 0.0),         float2(-step_w, 0.0),          float2(0.0, 0.0),         float2(step_w, 0.0),         float2(step_w*2.0, 0.0),
    47.             float2(-step_w*2.0, step_h),      float2(-step_w, step_h),       float2(0.0, step_h),      float2(step_w, step_h),      float2(step_w*2.0, step_h),
    48.             float2(-step_w*2.0, step_h*2.0),  float2(-step_w, step_h*2.0),   float2(0.0, step_h*2.0),  float2(step_w, step_h*20),   float2(step_w*2.0, step_h*2.0)
    49.         };
    50.  
    51.         float kernel[25] = {
    52.  
    53.             0.003765,    0.015019,    0.023792,    0.015019,    0.003765,
    54.             0.015019,    0.059912,    0.094907,    0.059912,    0.015019,
    55.             0.023792,    0.094907,    0.150342,    0.094907,    0.023792,
    56.             0.015019,    0.059912,    0.094907,    0.059912,    0.015019,
    57.             0.003765,    0.015019,    0.023792,    0.015019,    0.003765
    58.         };
    59.  
    60.         float4 sum = float4(0.0, 0.0, 0.0, 0.0);
    61.  
    62.         for (int j = 0; j < 25; j++) {
    63.             float4 tmp = tex2D(_MainTex, i.uv + offset[j]);
    64.             sum += tmp * kernel[j];
    65.         }
    66.  
    67.         return sum;
    68.     }
    69.    
    70.     ENDCG //Shader End
    71.     }
    72.  
    73.     }
    74.    
    75. }
    76.  


    It works well, but it does nothing but a little, not tweakable blur.
    I tried to use it in multiple Blit() calls to repeat the effect and get larger blurs, but I failed.
    It seems that using the same Render Texture for source and destination in a Blit doesn't work, so I use a temporary render texture to store the progressive result for each iteration. Here's the code:


    Code (CSharp):
    1.  
    2. RenderTexture Blur(RenderTexture source, int iterations) {
    3.         RenderTexture result = source; //result will store partial results (blur iterations)
    4.         Material mat = new Material(Shader.Find("Blur")); //create blur material
    5.         RenderTexture blit = RenderTexture.GetTemporary((int)resolution, (int)resolution); //get temp RT
    6.         for (int i = 0; i < iterations; i++) {
    7.             Graphics.SetRenderTarget(blit);
    8.             GL.Clear(true, true, Color.black); //avoid artifacts in temp RT by clearing it
    9.             Graphics.Blit(result, blit, mat); //PERFORM A BLUR ITERATION
    10.             result= blit; //overwrite partial result
    11.         }
    12.         RenderTexture.ReleaseTemporary(blit);
    13.         return result; //return the last partial result
    14. }
    15.  
    This code produces the same result as a single Blit() call, and I don't know why.
    While I try to implement dynamic sized kernels, can someone clear my mind about this issue?

    Thank you for your help! :)
     
  6. bgolus

    bgolus

    Joined:
    Dec 7, 2012
    Posts:
    12,352
    That line right there is the problem. Doing result = blit is setting the result to be the same texture as blit, not copying the data from blit texture to the result texture. From that point on every iteration is reading and writing to the same texture, which isn't allowed* and will result in the Blit() doing nothing.

    There's two main ways to handle this: Either use the "ping pong" approach where you swap the render textures back and forth and copy (with another blit) the result back to the result if you have an even number of iterations, or create a new temp buffer every iteration to write to and copy that (again, with another blit) to the output. The ping pong method is more efficient, but can make for confusing code. The new buffer every frame is fairly straightforward and isn't really that much slower. See the Blur.cs from Standard Assets's Image Effects "Image Effects/Blur/Blur" for an example of the later.

    * Using a RWTexture2D you can read and write to a texture in the same shader, but it's mainly for use with compute shaders, the built in Blit() doesn't support them, and doing anything but writing to the same single pixel you read from (unlike a blur which reads from many pixels and writes to one) has "undefined" results which basically means you get junk.
     
    Alic likes this.
  7. fra3point

    fra3point

    Joined:
    Aug 20, 2012
    Posts:
    269
    Thanks, @bgolus !!! The ping pong approach works well. I switched back to a 3x3 kernel and here's the final code for multiple Blit() iterations:

    Code (CSharp):
    1. RenderTexture Blur(RenderTexture source, int iterations) {
    2.         RenderTexture rt = source;
    3.         Material mat = new Material(Shader.Find("Blur"));
    4.         RenderTexture blit = RenderTexture.GetTemporary((int)resolution, (int)resolution);
    5.         for (int i = 0; i < iterations; i++) {
    6.             Graphics.SetRenderTarget(blit);
    7.             GL.Clear(true, true, Color.black);
    8.             Graphics.Blit(rt, blit, mat);
    9.             Graphics.SetRenderTarget(rt);
    10.             GL.Clear(true, true, Color.black);
    11.             Graphics.Blit(blit, rt, mat);
    12.         }
    13.         RenderTexture.ReleaseTemporary(blit);
    14.         return rt;
    15.     }
    By the way, I was reading at the Wikipedia page for Gaussian Blur, and my eyes focused on this lines:

    Applying multiple, successive gaussian blurs to an image has the same effect as applying a single, larger gaussian blur, whose radius is the square root of the sum of the squares of the blur radii that were actually applied. (I assume that radius = 2*std. deviation)

    Does it mean that using multiple iterations is the same of using a larger kernel?
     
    Cebbi likes this.
  8. bgolus

    bgolus

    Joined:
    Dec 7, 2012
    Posts:
    12,352
    Yes. That's why so many real time blurs use multiple iterations. There's a cost to doing each pass, but usually far less than the massive number of samples required to do larger blurs.
     
    Alic and fra3point like this.