AR Design
UBC EML collab with UBC SALA - visualizing IoT data in AR
MicrophoneReceiver.cs
Go to the documentation of this file.
1 // Copyright (c) Microsoft Corporation. All rights reserved.
2 // Licensed under the MIT License. See LICENSE in the project root for license information.
3 
4 using System;
5 using System.Threading;
6 using UnityEngine;
7 using HoloToolkit.Unity;
8 
9 namespace HoloToolkit.Sharing.VoiceChat
10 {
14  [RequireComponent(typeof(AudioSource))]
15  public class MicrophoneReceiver : MonoBehaviour
16  {
17  private readonly BitManipulator versionExtractor = new BitManipulator(0x7, 0); // 3 bits, 0 shift
18  private readonly BitManipulator audioStreamCountExtractor = new BitManipulator(0x38, 3); // 3 bits, 3 shift
19  private readonly BitManipulator channelCountExtractor = new BitManipulator(0x1c0, 6); // 3 bits, 6 shift
20  private readonly BitManipulator sampleRateExtractor = new BitManipulator(0x600, 9); // 2 bits, 9 shift
21  private readonly BitManipulator sampleTypeExtractor = new BitManipulator(0x1800, 11); // 2 bits, 11 shift
22  private readonly BitManipulator sampleCountExtractor = new BitManipulator(0x7fe000, 13); // 10 bits, 13 shift
23  private readonly BitManipulator codecTypeExtractor = new BitManipulator(0x1800000, 23); // 2 bits, 23 shift
24  private readonly BitManipulator sequenceNumberExtractor = new BitManipulator(0x7C000000, 26); // 6 bits, 26 shift
25 
26  public Transform GlobalAnchorTransform;
27 
28  public class ProminentSpeakerInfo
29  {
30  public UInt32 SourceId;
31  public float AverageAmplitude;
32  public Vector3 HrtfPosition;
33  }
34 
38  public const int MaximumProminentSpeakers = 4;
39 
43  private int prominentSpeakerCount;
44 
48  private ProminentSpeakerInfo[] prominentSpeakerList;
49 
50  private NetworkConnectionAdapter listener;
51 
52  private readonly Mutex audioDataMutex = new Mutex();
53 
54  private const float KDropOffMaximum = 5f;
55  private const float KPanMaximum = 5f;
56 
57  public float DropOffMaximumMetres = 5.0f;
58  public float PanMaximumMetres = 5.0f;
59 
60  public float MinimumDistance = .01f;
61 
62  private byte[] networkPacketBufferBytes;
63  private CircularBuffer circularBuffer;
64 
65  #region DebugVariables
66  private readonly CircularBuffer testCircularBuffer = new CircularBuffer(48000 * 2 * 4 * 3, true);
67  private AudioSource testSource;
68  public AudioClip TestClip;
69  public bool SaveTestClip;
70  #endregion
71 
72  private void Awake()
73  {
74  prominentSpeakerList = new ProminentSpeakerInfo[MaximumProminentSpeakers];
75  for (int prominentSpeaker = 0; prominentSpeaker < MaximumProminentSpeakers; prominentSpeaker++)
76  {
77  prominentSpeakerList[prominentSpeaker] = new ProminentSpeakerInfo();
78  }
79 
80  networkPacketBufferBytes = new byte[4 * MicrophoneTransmitter.AudioPacketSize];
81  circularBuffer = new CircularBuffer(48000 * 4);
82  }
83 
84  private void TryConnect()
85  {
86  try
87  {
88  if (listener == null)
89  {
90  SharingStage sharingStage = SharingStage.Instance;
91  if (sharingStage && sharingStage.Manager != null)
92  {
93  NetworkConnection connection = SharingStage.Instance.Manager.GetServerConnection();
94 
95  listener = new NetworkConnectionAdapter();
96  listener.ConnectedCallback += OnConnected;
97  listener.DisconnectedCallback += OnDisconnected;
98  listener.ConnectionFailedCallback += OnConnectedFailed;
99  listener.MessageReceivedCallback += OnMessageReceived;
100 
101  connection.AddListener((byte)MessageID.AudioSamples, listener);
102 
103  Debug.Log("SpeakerController Start called");
104  }
105  }
106  }
107  catch (Exception ex)
108  {
109  Debug.Log("Exception: " + ex);
110  }
111  }
112 
113  private void OnDestroy()
114  {
115  if (listener != null)
116  {
117  listener.ConnectedCallback -= OnConnected;
118  listener.DisconnectedCallback -= OnDisconnected;
119  listener.ConnectionFailedCallback -= OnConnectedFailed;
120  listener.MessageReceivedCallback -= OnMessageReceived;
121  }
122  }
123 
124  private void OnConnected(NetworkConnection connection)
125  {
126  Profile.BeginRange("SpeakerController.OnConnected");
127  InternalStartSpeaker();
128  Debug.Log("SpeakerController: Connection to session server succeeded!");
129  Profile.EndRange();
130  }
131 
132  private void OnDisconnected(NetworkConnection connection)
133  {
134  InternalStopSpeaker();
135 
136  prominentSpeakerCount = 0;
137 
138  Debug.Log("SpeakerController: Session server disconnected!");
139  }
140 
141  private void OnConnectedFailed(NetworkConnection connection)
142  {
143  InternalStopSpeaker();
144  Debug.Log("SpeakerController: Connection to session server failed!");
145  }
146 
150  private void InternalStartSpeaker()
151  {
152  GetComponent<AudioSource>().Play();
153  }
154 
158  private void InternalStopSpeaker()
159  {
160  GetComponent<AudioSource>().Stop();
161  }
162 
163  private void Update()
164  {
165  TryConnect();
166 
167  AudioSource audioSource = GetComponent<AudioSource>();
168  GameObject remoteHead = GameObject.Find("mixamorig:Head");
169  if (remoteHead)
170  {
171  transform.parent = remoteHead.transform;
172  transform.localPosition = new Vector3();
173  transform.localRotation = Quaternion.identity;
174 
175  audioSource.spatialize = true;
176  audioSource.spatialBlend = 1;
177  }
178  else
179  {
180  audioSource.spatialize = false;
181  audioSource.spatialBlend = 0;
182  }
183 
184  #region debuginfo
185  if (SaveTestClip && testCircularBuffer.UsedCapacity == testCircularBuffer.TotalCapacity)
186  {
187  float[] testBuffer = new float[testCircularBuffer.UsedCapacity / 4];
188  testCircularBuffer.Read(testBuffer, 0, testBuffer.Length * 4);
189  testCircularBuffer.Reset();
190  TestClip = AudioClip.Create("testclip", testBuffer.Length / 2, 2, 48000, false);
191  TestClip.SetData(testBuffer, 0);
192  if (!testSource)
193  {
194  GameObject testObj = new GameObject("testclip");
195  testObj.transform.parent = transform;
196  testSource = testObj.AddComponent<AudioSource>();
197  }
198  testSource.PlayClip(TestClip, true);
199  SaveTestClip = false;
200  }
201  #endregion
202  }
203 
209  public void OnMessageReceived(NetworkConnection connection, NetworkInMessage message)
210  {
211  // Unused byte headerSize
212  message.ReadByte();
213 
214  Int32 pack = message.ReadInt32();
215 
216  // Unused int version
217  versionExtractor.GetBitsValue(pack);
218  int audioStreamCount = audioStreamCountExtractor.GetBitsValue(pack);
219  int channelCount = channelCountExtractor.GetBitsValue(pack);
220  int sampleRate = sampleRateExtractor.GetBitsValue(pack);
221  int sampleType = sampleTypeExtractor.GetBitsValue(pack);
222  int bytesPerSample = sizeof(float);
223  if (sampleType == 1)
224  {
225  bytesPerSample = sizeof(Int16);
226  }
227 
228  int sampleCount = sampleCountExtractor.GetBitsValue(pack);
229  int codecType = codecTypeExtractor.GetBitsValue(pack);
230 
231  // Unused int sequenceNumber
232  sequenceNumberExtractor.GetBitsValue(pack);
233 
234  if (sampleRate == 0)
235  {
236  // Unused int extendedSampleRate
237  message.ReadInt32();
238  }
239 
240  try
241  {
242  audioDataMutex.WaitOne();
243 
244  prominentSpeakerCount = 0;
245 
246  for (int i = 0; i < audioStreamCount; i++)
247  {
248  float averageAmplitude = message.ReadFloat();
249  UInt32 hrtfSourceID = (UInt32)message.ReadInt32();
250  Vector3 hrtfPosition = new Vector3();
251  Vector3 hrtfDirection = new Vector3();
252  if (hrtfSourceID != 0)
253  {
254  hrtfPosition.x = message.ReadFloat();
255  hrtfPosition.y = message.ReadFloat();
256  hrtfPosition.z = message.ReadFloat();
257 
258  hrtfDirection.x = message.ReadFloat();
259  hrtfDirection.y = message.ReadFloat();
260  hrtfDirection.z = message.ReadFloat();
261 
262  Vector3 cameraPosRelativeToGlobalAnchor = Vector3.zero;
263  Vector3 cameraDirectionRelativeToGlobalAnchor = Vector3.zero;
264 
265  if (GlobalAnchorTransform != null)
266  {
267  cameraPosRelativeToGlobalAnchor = MathUtils.TransformPointFromTo(
268  null,
269  GlobalAnchorTransform,
270  CameraCache.Main.transform.position);
271  cameraDirectionRelativeToGlobalAnchor = MathUtils.TransformDirectionFromTo(
272  null,
273  GlobalAnchorTransform,
274  CameraCache.Main.transform.position);
275  }
276 
277  cameraPosRelativeToGlobalAnchor.Normalize();
278  cameraDirectionRelativeToGlobalAnchor.Normalize();
279 
280  Vector3 soundVector = hrtfPosition - cameraPosRelativeToGlobalAnchor;
281  soundVector.Normalize();
282 
283  // x is forward
284  float fltx = (KDropOffMaximum / DropOffMaximumMetres) * Vector3.Dot(soundVector, cameraDirectionRelativeToGlobalAnchor);
285  // y is right
286  Vector3 myRight = Quaternion.Euler(0, 90, 0) * cameraDirectionRelativeToGlobalAnchor;
287  float flty = -(KPanMaximum / PanMaximumMetres) * Vector3.Dot(soundVector, myRight);
288  // z is up
289  Vector3 myUp = Quaternion.Euler(90, 0, 0) * cameraDirectionRelativeToGlobalAnchor;
290  float fltz = (KPanMaximum / PanMaximumMetres) * Vector3.Dot(soundVector, myUp);
291 
292  // Hacky distance check so we don't get too close to source.
293  Vector3 flt = new Vector3(fltx, flty, fltz);
294  if (flt.magnitude < (MinimumDistance * KDropOffMaximum))
295  {
296  flt = flt.normalized * MinimumDistance * KDropOffMaximum;
297  fltx = flt.x;
298  flty = flt.y;
299  fltz = flt.z;
300  }
301 
302  AddProminentSpeaker(hrtfSourceID, averageAmplitude, fltx, flty, fltz);
303  }
304 
305  for (int j = 0; j < channelCount; j++)
306  {
307  // if uncompressed, size = sampleCount
308  Int16 size = (Int16)sampleCount;
309  if (codecType != 0)
310  {
311  // if compressed, size is first 2 bytes, sampleCount should be number of bytes after decompression
312  size = message.ReadInt16();
313  }
314 
315  // make this array big enough to hold all of the uncompressed data only if the
316  // buffer is not the right size, minimize new operations
317  int totalBytes = size * bytesPerSample;
318  if (networkPacketBufferBytes.Length != totalBytes)
319  {
320  networkPacketBufferBytes = new byte[totalBytes];
321  }
322  message.ReadArray(networkPacketBufferBytes, (uint)(totalBytes));
323 
324  if (codecType != 0)
325  {
326  // in place decompression please - should fill out the data buffer
327  // ...
328  }
329 
330  if (hrtfSourceID > 0)
331  {
332  // TODO hrtf processing here
333  }
334 
335  circularBuffer.Write(networkPacketBufferBytes, 0, networkPacketBufferBytes.Length);
336  }
337  }
338  }
339  catch (Exception e)
340  {
341  Debug.LogError(e.Message);
342  }
343  finally
344  {
345  audioDataMutex.ReleaseMutex();
346  }
347  }
348 
349  private void OnAudioFilterRead(float[] data, int numChannels)
350  {
351  try
352  {
353  audioDataMutex.WaitOne();
354  int byteCount = data.Length * 4;
355  circularBuffer.Read(data, 0, byteCount);
356  if (SaveTestClip)
357  {
358  testCircularBuffer.Write(data, 0, byteCount);
359  }
360  }
361  catch (Exception e)
362  {
363  Debug.LogError(e.Message);
364  }
365  finally
366  {
367  audioDataMutex.ReleaseMutex();
368  }
369  }
370 
371  private void AddProminentSpeaker(UInt32 sourceID, float averageAmplitude, float posX, float posY, float posZ)
372  {
373  if (prominentSpeakerCount < MaximumProminentSpeakers)
374  {
375  ProminentSpeakerInfo prominentSpeakerInfo = prominentSpeakerList[prominentSpeakerCount++];
376  prominentSpeakerInfo.SourceId = sourceID;
377  prominentSpeakerInfo.AverageAmplitude = averageAmplitude;
378  prominentSpeakerInfo.HrtfPosition.x = posX;
379  prominentSpeakerInfo.HrtfPosition.y = posY;
380  prominentSpeakerInfo.HrtfPosition.z = posZ;
381  }
382  }
383 
385  {
386  return prominentSpeakerCount;
387  }
388 
390  {
391  if (index < prominentSpeakerCount)
392  {
393  return prominentSpeakerList[index];
394  }
395  return null;
396  }
397  }
398 }
Helper class for transmitting data over network.
System.Action< NetworkConnection > ConnectedCallback
virtual void ReadArray(byte[] data, uint arrayLength)
static Vector3 TransformDirectionFromTo(Transform from, Transform to, Vector3 dirInFrom)
Takes a direction in the coordinate space specified by the "from" transform and transforms it to be t...
Definition: MathUtils.cs:59
Transmits data from your microphone to other clients connected to a SessionServer. Requires any receiving client to be running the MicrophoneReceiver script.
System.Action< NetworkConnection > ConnectionFailedCallback
ProminentSpeakerInfo GetProminentSpeaker(int index)
The purpose of this class is to provide a cached reference to the main camera. Calling Camera...
Definition: CameraCache.cs:12
static T Instance
Returns the Singleton instance of the classes type. If no instance is found, then we search for an in...
Definition: Singleton.cs:26
void OnMessageReceived(NetworkConnection connection, NetworkInMessage message)
Now that we&#39;ve gotten a message, examine it and dissect the audio data.
static Camera Main
Returns a cached reference to the main camera and uses Camera.main if it hasn&#39;t been cached yet...
Definition: CameraCache.cs:20
Math Utilities class.
Definition: MathUtils.cs:13
Allows users of NetworkConnection to register to receive event callbacks without having their classes...
static void EndRange()
Definition: Profile.cs:48
SharingManager Manager
Sharing manager used by the application.
Definition: SharingStage.cs:97
int Read(Array dst, int dstWritePosBytes, int byteCount)
The SharingStage is in charge of managing the core networking layer for the application.
Definition: SharingStage.cs:14
System.Action< NetworkConnection, NetworkInMessage > MessageReceivedCallback
System.Action< NetworkConnection > DisconnectedCallback
int Write(Array src, int srcReadPosBytes, int byteCount)
Receives and plays voice data transmitted through the session server. This data comes from other clie...
static void BeginRange(string name)
Definition: Profile.cs:43
static Vector3 TransformPointFromTo(Transform from, Transform to, Vector3 ptInFrom)
Takes a point in the coordinate space specified by the "from" transform and transforms it to be the c...
Definition: MathUtils.cs:44
Helper class for bit manipulation.
virtual void AddListener(byte messageType, NetworkConnectionListener newListener)