Search Unity

  1. Megacity Metro Demo now available. Download now.
    Dismiss Notice
  2. Unity support for visionOS is now available. Learn more in our blog post.
    Dismiss Notice

(Solved)Load a 16bit .Wav file WITHOUT using WWW (at runtime)

Discussion in 'Scripting' started by SanderAKALego, Jul 6, 2014.

  1. SanderAKALego

    SanderAKALego

    Joined:
    Jul 17, 2010
    Posts:
    32
    SOLVED! (read next post)
    What I'm trying to do: Load a 16-bit .Wav file at runtime (I can't use WWW in this situation)
    The problem: When attempting to play the sound, all I get is noise

    Loading the bytes from the file into memory is no problem, it works as expected. However when converting the byte array to a Float array, all the float values returned are non valid values (They should be in the range of -1 to 1). This results in the clip sounding like noise.

    Here's a picture of the first 10 samples from the file loaded in script

    As you can see the samples are off the charts

    And heres the same file when it's assigned in the inspector

    So theres nothing wrong with the file itself.
    Here's the code I'm using
    Code (CSharp):
    1. using UnityEngine;
    2. using System.Collections;
    3. using System.IO;
    4. using System;
    5. public class SoundLoader {
    6.  
    7.     public static AudioClip LoadWav(string filePath)
    8.     {
    9.         Debug.Log("Loading wav file at " + Application.dataPath + "/" + filePath + ".wav");
    10.         //Define file path
    11.         string file = Application.dataPath + "/" + filePath + ".wav";
    12.         //Read all bytes of file
    13.         byte[] wav = File.ReadAllBytes(file);
    14.  
    15.         //Get channels (usually 1 or 2)
    16.         int channels = wav[22];
    17.  
    18.         // Get past all the other sub chunks to get to the data subchunk:
    19.         int pos = 12;   // First Subchunk ID from 12 to 16
    20.  
    21.         // Keep iterating until we find the data chunk
    22.         while (!(wav[pos] == 100 && wav[pos + 1] == 97 && wav[pos + 2] == 116 && wav[pos + 3] == 97))
    23.         {
    24.             pos += 4;
    25.             int chunkSize = wav[pos] + wav[pos + 1] * 256 + wav[pos + 2] * 65536 + wav[pos + 3] * 16777216;
    26.             pos += 4 + chunkSize;
    27.         }
    28.         pos += 8;
    29.  
    30.         // Pos is now positioned to start of actual sound data.
    31.         int samples = (wav.Length - pos) / 2;     // 2 bytes per sample (16 bit sound mono)
    32.         if (channels == 2) samples /= 2;        // 4 bytes per sample (16 bit stereo)
    33.  
    34.         byte[] audioData = new byte[samples];
    35.         Array.Copy(wav, pos, audioData, 0, samples);
    36.         float[] audioSamples = BytesToFloats(audioData);
    37.  
    38.         //Create clip
    39.         AudioClip clip = AudioClip.Create("clip", samples, channels, 44100, true, false);
    40.         clip.SetData(audioSamples, 0);
    41.         return clip;
    42.     }
    43.  
    44.     private static float[] BytesToFloats(byte[] array)
    45.     {
    46.         //1 float = 4 bytes, so divide by 4
    47.         float[] floatArr = new float[array.Length / 4];
    48.         for (int i = 0; i < floatArr.Length; i++)
    49.         {
    50.             if (BitConverter.IsLittleEndian)
    51.                Array.Reverse(array, i * 4, 4);
    52.             floatArr[i] = BitConverter.ToSingle(array, i * 4) / 0x80000000;        
    53.         }
    54.         return floatArr;
    55.     }
    56. }
    57.  
     
    Last edited: Jul 6, 2014
  2. SanderAKALego

    SanderAKALego

    Joined:
    Jul 17, 2010
    Posts:
    32
    I solved my problem!
    Turns out I was trying to read 4 byte floats when the samples actually were made up by 2 shorts! (short = 2 bytes)
    So by simply reading a single short i was able to get it working.Here's a webpage that explains the Wave format in great detail.
    For anyone with the same problem here's the complete code, you may use it as you want.
    Code (CSharp):
    1. using UnityEngine;
    2. using System.Collections;
    3. using System.IO;
    4. using System;
    5. public class SoundLoader {
    6.  
    7.     public static AudioClip LoadWav(string filePath)
    8.     {
    9.         Debug.Log("Loading wav file at " + Application.dataPath + "/" + filePath + ".wav");
    10.         //Define file path
    11.         string file = Application.dataPath + "/" + filePath + ".wav";
    12.         //Read all bytes of file
    13.         byte[] wav = File.ReadAllBytes(file);
    14.  
    15.         //Get channels (usually 1 or 2)
    16.         int channels = wav[22];
    17.  
    18.         // Get past all the other sub chunks to get to the data subchunk:
    19.         int pos = 12;   // First Subchunk ID from 12 to 16
    20.  
    21.         // Keep iterating until we find the data chunk
    22.         while (!(wav[pos] == 100 && wav[pos + 1] == 97 && wav[pos + 2] == 116 && wav[pos + 3] == 97))
    23.         {
    24.             pos += 4;
    25.             int chunkSize = wav[pos] + wav[pos + 1] * 256 + wav[pos + 2] * 65536 + wav[pos + 3] * 16777216;
    26.             pos += 4 + chunkSize;
    27.         }
    28.         pos += 8;
    29.  
    30.         // Pos is now positioned to start of actual sound data.
    31.         int samples = (wav.Length - pos) / 2;     // 2 bytes per sample (16 bit sound mono)
    32.         if (channels == 2) samples /= 2;        // 4 bytes per sample (16 bit stereo)
    33.  
    34.         //Create audio data array
    35.         byte[] audioData = new byte[samples];
    36.         //Copy over data chunk
    37.         Array.Copy(wav, pos, audioData, 0, samples);
    38.         //Get samples
    39.         float[] audioSamples = BytesToFloats(audioData);
    40.  
    41.         //Create clip
    42.         AudioClip clip = AudioClip.Create("clip", samples, channels, 44100, true, false);
    43.         clip.SetData(audioSamples, 0);
    44.         return clip;
    45.     }
    46.     /// <summary>
    47.     /// Takes a byte array and returns a float[] of samples
    48.     /// </summary>
    49.     /// <param name="array">The array of bytes from file</param>
    50.     private static float[] BytesToFloats(byte[] array)
    51.     {
    52.         //1 Short = 2 bytes, so divide by 2
    53.         float[] floatArray = new float[array.Length / 2];
    54.         //Iterate through and populate array
    55.         for (int i = 0; i < floatArray.Length; i++)
    56.         {
    57.             //Convert each set of 2 bytes into a short and scale that to be in the range of -1, 1
    58.             floatArray[i] = (BitConverter.ToInt16(array, i * 2) / (float)(short.MaxValue));
    59.         }
    60.         return floatArray;
    61.     }
    62. }
    63.  
     
    mgear likes this.
  3. gregzo

    gregzo

    Joined:
    Dec 17, 2011
    Posts:
    795
    Hi!

    Sorry to be a party pooper, but your implementation of a wav parser is both very much unoptimized and dangerous.
    If it works for you in a very specific use case, and you do not care about optimization, no problem. But I would strongly advise other readers not to base their own wav parsers on this.

    Parsing audio is quite difficult at first - I really don't mean to belittle your efforts. It just should be noted that the code posted here is not safe.

    Apoligies in advance, please do not take this personally!

    Gregzo
     
  4. Bypp

    Bypp

    Joined:
    Jun 4, 2014
    Posts:
    29
    While it is good that you point out that the code above might not be optimized/dangerous, it would be even better if you told us what is wrong with it. Why is it not optimized ? Why is it dangerous ?

    We might be able to make this better for everyone !
     
  5. gregzo

    gregzo

    Joined:
    Dec 17, 2011
    Posts:
    795
    Sure! Time is scarce, if there's demand I'll happily post some code.

    For now, quick summary:

    1) First, one should parse the header and extract all important information: how many channels, what sample rate, bit depth( the OP wouldn't have had this problem had he parsed bit depth correctly - wav files can be 8, 16, 24 32 bits... ), number of frames etc...

    2) A parser doesn't HAVE to support all formats, but should throw exceptions when it encounters formats it won't handle

    3) Async is best. Calling the OP's parser from the main thread will block everything for a potentially substantial amount of time if the file is large. .NET has a very handy BackgroundWorker class which handles threading issues and is very nice to work with even without much prior experience in threading.

    4) So many allocations! Using the FileStream class could reduce that a LOT, enabling loading of larger files without trashing memory.

    5) Performance: BitConverter is slow. Much faster to use Buffer.BlockCopy to copy from byte[] to Int16[], and then to do the conversion to float sample by sample.

    These are the most obvious points, IMO.

    Cheers,

    Gregzo
     
  6. Bypp

    Bypp

    Joined:
    Jun 4, 2014
    Posts:
    29
    Much better ! Thanks !
     
  7. SanderAKALego

    SanderAKALego

    Joined:
    Jul 17, 2010
    Posts:
    32
    @gregzo
    You really seem to know your stuff, where were you when I first created the thread?

    Anyway I optimised the code a little bit, using your suggestion. Also made it throw some excptions when bit depth didn't match and etc. I also adopted your suggestion about using a backgroundworker for async loading, which seems to work great.

    However I don't know what you mean about number of frames.

    And about allocations, I switched to using FileStream for reading bytes, however I don't quite see how it reduces number of allocations as I still need to read the entire file, if you could explain in detail what you mean that would be great.

    As it is now I think it's somewhat fast (It's certainly a lot faster then what it was). Loading a rather large file (101MB) takes about half a second, and half of that again comes from the AudioClip.Create function, so it's fast enough for my needs.

    One problem I'm having though is that the AudioClips that I'm loading are 4 times as big as their filesize, and I'm not quite sure where to look on how to fix that.

    Here's the code how it looks now.
    Code (CSharp):
    1. using UnityEngine;
    2. using System.Collections;
    3. using System.IO;
    4. using System;
    5. using System.ComponentModel;
    6. public class SoundLoader {
    7.  
    8.     public static BackgroundWorker bw = new BackgroundWorker();
    9.  
    10.     public static void LoadWavWorker(object sender, DoWorkEventArgs e)
    11.     {
    12.         //Cast values
    13.         AudioClipValues values = (AudioClipValues)e.Argument;
    14.  
    15.         //Debug.Log("Loading wav file at " + values.Filepath + ".wav");
    16.         //Define file path
    17.         string file = values.Filepath + ".wav";
    18.         FileStream fs = null;
    19.         //Create a small buffer to hold some temporary information
    20.         byte[] wav = new byte[44];
    21.         try
    22.         {
    23.             fs = new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.Read);
    24.         }
    25.         catch (FileNotFoundException ie)
    26.         {
    27.             //Debug.Log(ie.Message + ie.StackTrace);
    28.             e.Cancel = true;
    29.         }
    30.         //Read header
    31.         fs.Read(wav, 0, 44);
    32.      
    33.         //Get number of channels
    34.         int channels = BitConverter.ToInt16(wav, 22);
    35.         if (channels > 2)
    36.         {
    37.             throw new NotSupportedException("File formats with more than 2 channels is not supported.");
    38.         }
    39.         //Report some progress
    40.         bw.ReportProgress(10);
    41.         //Get frequency
    42.         int frequency = BitConverter.ToInt32(wav, 24);
    43.  
    44.         //Get number of bits per sample
    45.         int bitsPerSample = BitConverter.ToInt16(wav, 34);
    46.         if (bitsPerSample != 16)
    47.         {
    48.             throw new NotSupportedException("Only 16 bit WAV files are supported, this file is: " + bitsPerSample + " bits.");
    49.         }
    50.         //Get size of subchunk2
    51.         Int32 chunkSize2 = BitConverter.ToInt32(wav, 40);
    52.  
    53.         //We no longer need wav array, clean it up
    54.         wav = null;
    55.  
    56.         // We can derive number of samples from chunkSize
    57.         int samples = 0;
    58.         if (bitsPerSample == 16)
    59.             samples = chunkSize2;
    60.         else
    61.             //Wavs with more than 2 channels are pretty rare so just throw an exception (or if num channels was less than 1 for some reason)
    62.             throw new NotSupportedException("This file format is not supported (too many channels or too few) Number of channels: " + channels);
    63.         //Report some progress
    64.         bw.ReportProgress(40);
    65.         //Create sampleblock
    66.         Int16[] audioData = new short[samples];
    67.         //Data block
    68.         byte[] block = new byte[samples];
    69.         //Read block
    70.         fs.Read(block, 0, samples);
    71.         //We are done with stream
    72.         //Clean it up
    73.         fs.Dispose();
    74.         fs.Close();
    75.         //Copy block over to audioData array
    76.         Buffer.BlockCopy(block, 0, audioData, 0, samples);
    77.         block = null;
    78.         //Convert int16 samples to floats samples
    79.         float[] audioSamples = Int16ToFloats(audioData);
    80.         //Report some progress
    81.         bw.ReportProgress(80);
    82.         //Get data object
    83.         AudioClipData data = values.data;
    84.         //Fill with data
    85.         data.Name = values.ClipName;
    86.         data.Samples = samples;
    87.         data.Channels = channels;
    88.         data.Frequency = frequency;
    89.         data.Sound3D = true;
    90.         data.Stream = false;
    91.         data.AudioSamples = audioSamples;
    92.         //Report some progress
    93.         bw.ReportProgress(100);    
    94.         //Return object
    95.         e.Result = data;
    96.     }
    97.     private static float[] Int16ToFloats(Int16[] array)
    98.     {
    99.         float[] floatArray = new float[array.Length];
    100.         for (int i = 0; i < floatArray.Length; i++)
    101.         {
    102.             floatArray[i] = ((float)array[i] / short.MaxValue);
    103.         }
    104.         return floatArray;
    105.     }
    106. }
    107. public class AudioClipData
    108. {
    109.     public string Name = "Clip";
    110.     public int Samples = 0;
    111.     public int Channels = 1;
    112.     public int Frequency = 44100;
    113.     public bool Sound3D = true;
    114.     public bool Stream = false;
    115.     public float[] AudioSamples;
    116. }
     
    Last edited: Jul 10, 2014
  8. gregzo

    gregzo

    Joined:
    Dec 17, 2011
    Posts:
    795
    Hi,

    Terminology:

    'Sample' refers to a single value, 'frame' to a single time unit. If the audio data is stereo, each frame will have 2 samples.

    One problem is that unity doesn't adopt this terminology, so the API is sometimes ambiguous. In AudioClip.Create, the length parameter should be a number of frames, not of samples.

    Your code:

    When you calculate the number of samples, you equate it to the number of bytes in the data chunk of the file. As each 16 bit sample needs 2 bytes, this is wrong.

    bytesPerSample = bitDepth / 8
    bytesPerFrame = BytesPerSample * nbOfChannels
    framesInFile = dataChunkLength / bytesPerFrame

    Other random observations:

    - you are calling Close after Dispose on FileStream. Always call Dispose last!
    - I had forgotten that you cannot call AudioClip.SetData from another thread than the main thread. This is very unfortunate, as it makes it much more complicated to reduce allocations. The idea is simple: work in small chunks in a while loop, so that you don't need to allocate huge arrays for data that is just transient. The lazy way would be to use a coroutine instead of threading, the complicated way would have the worker thread waiting on the main thread to set the converted data to the audioclip before resuming conversion on the next chunk.

    One question: why do you need to do this without the help of WWW? It's a super convoluted way to load an AudioClip, especially considering that AudioClips store data as Int16 behind the scenes.

    To summarize, you're converting Int16 to float, but when you call SetData, these floats get converted back to Int16.
     
    Last edited: Jul 11, 2014
  9. arutsuyo

    arutsuyo

    Joined:
    Jan 8, 2014
    Posts:
    2
    I'm currently working on the same problem, but working from a SQLite database, having to pull the blob as a byte[]. How would you use WWW to make it easier, I'm curious?
    So far my code is as follows: not nearly as complete as the one above as I just found this thread. I'll be doing adjustments, but any advice would be very much appreciated!


    Code (CSharp):
    1. byte [] wav = ( byte [] ) reader [ "Sound" ];
    2.             float[] stream = ConvertByteToFloatForWave( wav );
    3.             AudioClip clip = AudioClip.Create( ( string ) reader [ "Name" ], stream.Length, wav[22], 44100, false, false );
    4.             clip.SetData( stream, 0 );
    5.  
    6. private float [] ConvertByteToFloatForWave ( byte [] array )
    7.     {
    8.         float[] floatArr = new float [ array.Length / 4 ];
    9.         for ( int i = 0; i < floatArr.Length; i++ )
    10.         {
    11.             if(i == 0)
    12.             {
    13.                 if ( System.BitConverter.IsLittleEndian )
    14.                     System.Array.Reverse( array, i * 4, 4 );
    15.                 floatArr [ i ] = System.BitConverter.ToSingle( array, i * 4 ) / 0x80000000;
    16.             }
    17.             else if(i == 1)
    18.             {
    19.                 if ( !System.BitConverter.IsLittleEndian )
    20.                     System.Array.Reverse( array, i * 4, 4 );
    21.                 floatArr [ i ] = System.BitConverter.ToSingle( array, i * 4 ) / 0x80000000;
    22.             }
    23.             else if ( i == 2 || i ==3 )
    24.             {
    25.                 if ( System.BitConverter.IsLittleEndian )
    26.                     System.Array.Reverse( array, i * 4, 4 );
    27.                 floatArr [ i ] = System.BitConverter.ToSingle( array, i * 4 ) / 0x80000000;
    28.             }
    29.             else if ( i >= 4 || i <= 10 )
    30.             {
    31.                 if ( !System.BitConverter.IsLittleEndian )
    32.                     System.Array.Reverse( array, i * 4, 4 );
    33.                 floatArr [ i ] = System.BitConverter.ToSingle( array, i * 4 ) / 0x80000000;
    34.             }
    35.             else if ( i == 11 )
    36.             {
    37.                 if ( System.BitConverter.IsLittleEndian )
    38.                     System.Array.Reverse( array, i * 4, 4 );
    39.                 floatArr [ i ] = System.BitConverter.ToSingle( array, i * 4 ) / 0x80000000;
    40.             }
    41.             else if ( i > 11 )
    42.             {
    43.                 if ( !System.BitConverter.IsLittleEndian )
    44.                     System.Array.Reverse( array, i * 4, 4 );
    45.                 floatArr [ i ] = System.BitConverter.ToSingle( array, i * 4 ) / 0x80000000;
    46.             }
    47.         }
    48.         return floatArr;
    49.     }
    My sound is only half working. I can hear it somewhat as it should be, but not complete.
     
  10. gregzo

    gregzo

    Joined:
    Dec 17, 2011
    Posts:
    795
    Mmm... All these checks in a potentially huge for loop, not pretty! And you're including the header in your float array, not nice.

    If your SQLite database can return the path to the audio files, just use WWW along with that. For now, your parsing code is... interesting!

    Cheers,

    Gregzo