testsAndMisc-archive/horatio/horatio_app/lib/services/speech_service.dart

195 lines
6.1 KiB
Dart

import 'dart:io';
import 'package:flutter/foundation.dart';
import 'package:flutter/services.dart';
import 'package:path/path.dart' as p;
import 'package:path_provider/path_provider.dart';
import 'package:record/record.dart';
import 'package:speech_to_text/speech_recognition_result.dart';
import 'package:speech_to_text/speech_to_text.dart';
/// Signature matching [Process.run] for dependency injection.
typedef ProcessRunner = Future<ProcessResult> Function(
String executable,
List<String> arguments,
);
/// Wraps speech recognition for all platforms.
///
/// On platforms with native speech_to_text support (Android, iOS, macOS, web),
/// uses the [SpeechToText] plugin directly for live transcription.
///
/// On Linux desktop, records audio via [AudioRecorder] and transcribes with
/// the Whisper CLI (`whisper`) which must be installed separately.
class SpeechService {
/// Creates a [SpeechService].
///
/// All parameters are optional and intended for testing only.
SpeechService({
@visibleForTesting SpeechToText? speech,
@visibleForTesting AudioRecorder? recorder,
@visibleForTesting ProcessRunner? processRunner,
@visibleForTesting bool? overrideIsLinux,
@visibleForTesting Future<Directory> Function()? tempDirProvider,
}) : _speech = speech ?? SpeechToText(),
_recorder = recorder,
_processRunner = processRunner ?? Process.run,
_overrideIsLinux = overrideIsLinux,
_tempDirProvider = tempDirProvider ?? getTemporaryDirectory;
final SpeechToText _speech;
AudioRecorder? _recorder;
final ProcessRunner _processRunner;
final bool? _overrideIsLinux;
final Future<Directory> Function() _tempDirProvider;
bool _initialised = false;
bool _usesWhisper = false;
String _whisperCommand = 'whisper';
/// Whether speech recognition is available on this device.
bool get isAvailable => _initialised;
/// Whether the engine is currently listening.
bool get isListening => _isRecording || _speech.isListening;
bool _isRecording = false;
String? _recordingPath;
/// Initialises the speech engine. Returns `true` if available.
///
/// On Linux, checks for the `whisper` CLI tool and microphone permission.
/// On other platforms, tries the native speech_to_text plugin.
Future<bool> initialise() async {
if (_overrideIsLinux ?? Platform.isLinux) {
return _initialised = await _initLinux();
}
try {
_initialised = await _speech.initialize();
} on MissingPluginException {
_initialised = false;
}
return _initialised;
}
Future<bool> _initLinux() async {
// Check that whisper CLI is available via PATH.
try {
final result = await _processRunner('which', ['whisper']);
if (result.exitCode == 0) {
_whisperCommand = (result.stdout as String).trim();
_usesWhisper = true;
return true;
}
} on ProcessException {
// Process system itself is broken — no point trying fallback paths
// because _processRunner won't be able to run whisper either.
return false;
}
// Fallback: check common pipx / system installation paths by running
// them directly. VS Code and other GUI launchers may not include
// ~/.local/bin in PATH.
final home = Platform.environment['HOME'] ?? '';
for (final candidate in [
'$home/.local/bin/whisper',
'/usr/local/bin/whisper',
]) {
try {
final r = await _processRunner(candidate, ['--help']);
if (r.exitCode == 0) {
_whisperCommand = candidate;
_usesWhisper = true;
return true;
}
} on ProcessException {
continue;
}
}
return false;
}
/// Starts listening. Calls [onResult] with partial/final transcriptions.
Future<void> startListening({
required void Function(SpeechRecognitionResult result) onResult,
}) async {
if (!_initialised) return;
if (_usesWhisper) {
await _startRecording();
} else {
await _speech.listen(onResult: onResult);
}
}
/// Stops listening and returns transcribed text (Whisper) or empty string.
///
/// For native speech_to_text, the result is already delivered via
/// the [onResult] callback. For Whisper, call this then pass the result.
Future<String> stopListening() async {
if (_usesWhisper && _isRecording) {
return _stopAndTranscribe();
}
await _speech.stop();
return '';
}
Future<void> _startRecording() async {
_recorder ??= AudioRecorder();
if (!await _recorder!.hasPermission()) return;
final dir = await _tempDirProvider();
_recordingPath = p.join(dir.path, 'horatio_recording.wav');
await _recorder!.start(
const RecordConfig(encoder: AudioEncoder.wav),
path: _recordingPath!,
);
_isRecording = true;
}
Future<String> _stopAndTranscribe() async {
await _recorder!.stop();
_isRecording = false;
final path = _recordingPath;
if (path == null || !File(path).existsSync()) return '';
try {
final result = await _processRunner(
_whisperCommand,
[path, '--model', 'base', '--output_format', 'txt', '--language', 'en'],
);
if (result.exitCode != 0) return '';
// Whisper writes a .txt file next to the audio file.
final txtPath = '${p.withoutExtension(path)}.txt';
final txtFile = File(txtPath);
if (txtFile.existsSync()) {
final text = txtFile.readAsStringSync().trim();
// Clean up temp files.
File(path).deleteSync();
txtFile.deleteSync();
return text;
}
} on ProcessException {
// Whisper not available — fall through.
}
return '';
}
/// Whether this service uses Whisper CLI (batch mode) rather than
/// live streaming transcription.
bool get usesWhisper => _usesWhisper;
/// Human-readable setup instructions for the current platform.
static String get setupInstructions => Platform.isLinux
? 'Install Whisper: pipx install openai-whisper\n'
'Then restart the app.'
: 'Speech recognition is not available on this platform.';
/// Releases resources.
Future<void> dispose() async {
await _speech.stop();
await _recorder?.dispose();
}
}