AR Design
UBC EML collab with UBC SALA - visualizing IoT data in AR
DictationInputManager.cs
Go to the documentation of this file.
1 // Copyright (c) Microsoft Corporation. All rights reserved.
2 // Licensed under the MIT License. See LICENSE in the project root for license information.
3 
4 using System.Collections;
5 using UnityEngine;
6 
7 #if UNITY_WSA || UNITY_STANDALONE_WIN
8 using System.Text;
9 using UnityEngine.Windows.Speech;
10 #endif
11 
12 namespace HoloToolkit.Unity.InputModule
13 {
18  public class DictationInputManager : Singleton<DictationInputManager>, IInputSource
19  {
20 #if UNITY_WSA || UNITY_STANDALONE_WIN
21  private static StringBuilder textSoFar;
25 
29  private static readonly string DeviceName = string.Empty;
30 
35  private static int samplingRate;
36 
40  public static bool IsListening { get; private set; }
41 
45  private static string dictationResult;
46 
50  private static AudioClip dictationAudioClip;
51 
52  private static DictationRecognizer dictationRecognizer;
53 
54  private static bool isTransitioning;
55  private static bool hasFailed;
56  private static bool hasListener;
57 #endif
58 
59  #region Unity Methods
60 
61 #if UNITY_WSA || UNITY_STANDALONE_WIN
62  protected override void Awake()
63  {
64  base.Awake();
65 
66  dictationResult = string.Empty;
67 
68  dictationRecognizer = new DictationRecognizer();
69  dictationRecognizer.DictationHypothesis += DictationRecognizer_DictationHypothesis;
70  dictationRecognizer.DictationResult += DictationRecognizer_DictationResult;
71  dictationRecognizer.DictationComplete += DictationRecognizer_DictationComplete;
72  dictationRecognizer.DictationError += DictationRecognizer_DictationError;
73 
74  // Query the maximum frequency of the default microphone.
75  int minSamplingRate; // Not used.
76  Microphone.GetDeviceCaps(DeviceName, out minSamplingRate, out samplingRate);
77  }
78 
79  private void LateUpdate()
80  {
81  if (IsListening && !Microphone.IsRecording(DeviceName) && dictationRecognizer.Status == SpeechSystemStatus.Running)
82  {
83  // If the microphone stops as a result of timing out, make sure to manually stop the dictation recognizer.
84  StartCoroutine(StopRecording());
85  }
86 
87  if (!hasFailed && dictationRecognizer.Status == SpeechSystemStatus.Failed)
88  {
89  hasFailed = true;
90  InputManager.Instance.RaiseDictationError(Instance, 0, "Dictation recognizer has failed!");
91  }
92  }
93 
94  protected override void OnDestroy()
95  {
96  dictationRecognizer.Dispose();
97 
98  base.OnDestroy();
99  }
100 #endif
101 
102  #endregion // Unity Methods
103 
104 
113  public static IEnumerator StartRecording(GameObject listener = null, float initialSilenceTimeout = 5f, float autoSilenceTimeout = 20f, int recordingTime = 10)
114  {
115 #if UNITY_WSA || UNITY_STANDALONE_WIN
116  if (IsListening || isTransitioning)
117  {
118  Debug.LogWarning("Unable to start recording");
119  yield break;
120  }
121 
122  IsListening = true;
123  isTransitioning = true;
124 
125  if (listener != null)
126  {
127  hasListener = true;
128  InputManager.Instance.PushModalInputHandler(listener);
129  }
130 
131  if (PhraseRecognitionSystem.Status == SpeechSystemStatus.Running)
132  {
133  PhraseRecognitionSystem.Shutdown();
134  }
135 
136  while (PhraseRecognitionSystem.Status == SpeechSystemStatus.Running)
137  {
138  yield return null;
139  }
140 
141  dictationRecognizer.InitialSilenceTimeoutSeconds = initialSilenceTimeout;
142  dictationRecognizer.AutoSilenceTimeoutSeconds = autoSilenceTimeout;
143  dictationRecognizer.Start();
144 
145  while (dictationRecognizer.Status == SpeechSystemStatus.Failed)
146  {
147  InputManager.Instance.RaiseDictationError(Instance, 0, "Dictation recognizer failed to start!");
148  yield break;
149  }
150 
151  while (dictationRecognizer.Status == SpeechSystemStatus.Stopped)
152  {
153  yield return null;
154  }
155 
156  // Start recording from the microphone.
157  dictationAudioClip = Microphone.Start(DeviceName, false, recordingTime, samplingRate);
158  textSoFar = new StringBuilder();
159  isTransitioning = false;
160 #else
161  Debug.LogWarning("Unable to start recording! Dictation is unsupported for this platform.");
162  return null;
163 #endif
164  }
165 
169  public static IEnumerator StopRecording()
170  {
171  yield return Instance.StopRecordingInternal();
172  }
173 
177  private IEnumerator StopRecordingInternal()
178  {
179 #if UNITY_WSA || UNITY_STANDALONE_WIN
180  if (!IsListening || isTransitioning)
181  {
182  Debug.LogWarning("Unable to stop recording");
183  yield break;
184  }
185 
186  IsListening = false;
187  isTransitioning = true;
188 
189  if (hasListener)
190  {
191  InputManager.Instance.PopModalInputHandler();
192  hasListener = false;
193  }
194 
195  Microphone.End(DeviceName);
196 
197  if (dictationRecognizer.Status == SpeechSystemStatus.Running)
198  {
199  dictationRecognizer.Stop();
200  }
201 
202  StartCoroutine(FinishStopRecording());
203 #else
204  return null;
205 #endif
206  }
207 
208  private IEnumerator FinishStopRecording()
209  {
210 #if UNITY_WSA || UNITY_STANDALONE_WIN
211  while (dictationRecognizer.Status == SpeechSystemStatus.Running)
212  {
213  yield return null;
214  }
215 
216  PhraseRecognitionSystem.Restart();
217  isTransitioning = false;
218 #else
219  return null;
220 #endif
221  }
222 
223  #region Dictation Recognizer Callbacks
224 #if UNITY_WSA || UNITY_STANDALONE_WIN
225 
230  private static void DictationRecognizer_DictationHypothesis(string text)
231  {
232  // We don't want to append to textSoFar yet, because the hypothesis may have changed on the next event.
233  dictationResult = textSoFar.ToString() + " " + text + "...";
234 
235  InputManager.Instance.RaiseDictationHypothesis(Instance, 0, dictationResult);
236  }
237 
243  private static void DictationRecognizer_DictationResult(string text, ConfidenceLevel confidence)
244  {
245  textSoFar.Append(text + ". ");
246 
247  dictationResult = textSoFar.ToString();
248 
249  InputManager.Instance.RaiseDictationResult(Instance, 0, dictationResult);
250  }
251 
257  private static void DictationRecognizer_DictationComplete(DictationCompletionCause cause)
258  {
259  // If Timeout occurs, the user has been silent for too long.
260  if (cause == DictationCompletionCause.TimeoutExceeded)
261  {
262  Microphone.End(DeviceName);
263 
264  dictationResult = "Dictation has timed out. Please try again.";
265  }
266 
267  InputManager.Instance.RaiseDictationComplete(Instance, 0, dictationResult, dictationAudioClip);
268  textSoFar = null;
269  dictationResult = string.Empty;
270  }
271 
277  private static void DictationRecognizer_DictationError(string error, int hresult)
278  {
279  dictationResult = error + "\nHRESULT: " + hresult.ToString();
280 
281  InputManager.Instance.RaiseDictationError(Instance, 0, dictationResult);
282  textSoFar = null;
283  dictationResult = string.Empty;
284  }
285 #endif
286  #endregion // Dictation Recognizer Callbacks
287 
288  #region IInputSource Implementation
289 
290  public bool TryGetSourceKind(uint sourceId, out InteractionSourceInfo sourceKind)
291  {
292  sourceKind = InteractionSourceInfo.Voice;
293  return true;
294  }
295 
296  public bool SupportsInputInfo(uint sourceId, SupportedInputInfo inputInfo)
297  {
298  return (GetSupportedInputInfo(sourceId) & inputInfo) != 0;
299  }
300 
301  public bool TryGetPointerPosition(uint sourceId, out Vector3 position)
302  {
303  position = Vector3.zero;
304  return false;
305  }
306 
307  public bool TryGetPointerRotation(uint sourceId, out Quaternion rotation)
308  {
309  rotation = Quaternion.identity;
310  return false;
311  }
312 
313  public bool TryGetPointingRay(uint sourceId, out Ray pointingRay)
314  {
315  pointingRay = default(Ray);
316  return false;
317  }
318 
319  public bool TryGetGripPosition(uint sourceId, out Vector3 position)
320  {
321  position = Vector3.zero;
322  return false;
323  }
324 
325  public bool TryGetGripRotation(uint sourceId, out Quaternion rotation)
326  {
327  rotation = Quaternion.identity;
328  return false;
329  }
330 
332  {
333  return SupportedInputInfo.None;
334  }
335 
336  public bool TryGetThumbstick(uint sourceId, out bool isPressed, out Vector2 position)
337  {
338  isPressed = false;
339  position = Vector2.zero;
340  return false;
341  }
342 
343  public bool TryGetTouchpad(uint sourceId, out bool isPressed, out bool isTouched, out Vector2 position)
344  {
345  isPressed = false;
346  isTouched = false;
347  position = Vector2.zero;
348  return false;
349  }
350 
351  public bool TryGetSelect(uint sourceId, out bool isPressed, out double pressedAmount)
352  {
353  isPressed = false;
354  pressedAmount = 0.0;
355  return false;
356  }
357 
358  public bool TryGetGrasp(uint sourceId, out bool isPressed)
359  {
360  isPressed = false;
361  return false;
362  }
363 
364  public bool TryGetMenu(uint sourceId, out bool isPressed)
365  {
366  isPressed = false;
367  return false;
368  }
369 
370  #endregion // IInputSource Implementation
371  }
372 }
bool TryGetThumbstick(uint sourceId, out bool isPressed, out Vector2 position)
Singleton class that implements the DictationRecognizer to convert the user&#39;s speech to text...
bool TryGetPointerRotation(uint sourceId, out Quaternion rotation)
Returns the rotation of the input source, if available. Not all input sources support rotation inform...
bool TryGetSourceKind(uint sourceId, out InteractionSourceInfo sourceKind)
Input Manager is responsible for managing input sources and dispatching relevant events to the approp...
Definition: InputManager.cs:19
bool TryGetPointerPosition(uint sourceId, out Vector3 position)
Returns the position of the input source, if available. Not all input sources support positional info...
bool TryGetMenu(uint sourceId, out bool isPressed)
bool TryGetTouchpad(uint sourceId, out bool isPressed, out bool isTouched, out Vector2 position)
SupportedInputInfo
Flags used to indicate which input information is supported by an input source.
bool TryGetGripPosition(uint sourceId, out Vector3 position)
Returns the position of the input source, if available. Not all input sources support positional info...
bool SupportsInputInfo(uint sourceId, SupportedInputInfo inputInfo)
Returns whether the input source supports the specified input info type.
static T Instance
Returns the Singleton instance of the classes type. If no instance is found, then we search for an in...
Definition: Singleton.cs:26
static IEnumerator StopRecording()
Ends the recording session.
Interface for an input source. An input source can be anything that a user can use to interact with a...
Definition: IInputSource.cs:12
SupportedInputInfo GetSupportedInputInfo(uint sourceId)
Returns the input info that the input source can provide.
bool TryGetGripRotation(uint sourceId, out Quaternion rotation)
Returns the rotation of the input source, if available. Not all input sources support rotation inform...
bool TryGetGrasp(uint sourceId, out bool isPressed)
bool TryGetSelect(uint sourceId, out bool isPressed, out double pressedAmount)
static IEnumerator StartRecording(GameObject listener=null, float initialSilenceTimeout=5f, float autoSilenceTimeout=20f, int recordingTime=10)
Turns on the dictation recognizer and begins recording audio from the default microphone.
bool TryGetPointingRay(uint sourceId, out Ray pointingRay)
Returns the pointing ray of the input source, if available. Not all input sources support pointing in...
Singleton behaviour class, used for components that should only have one instance.
Definition: Singleton.cs:14