Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,16 @@
import javax.sound.sampled.LineUnavailableException;
import javax.sound.sampled.SourceDataLine;
import javax.sound.sampled.TargetDataLine;
import java.io.ByteArrayOutputStream;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioFileFormat;
import com.google.genai.types.ReplicatedVoiceConfig;
import com.google.genai.types.VoiceConsentSignature;

/** Example of using the live module for a streaming audio conversation. */
public final class LiveAudioConversationAsync {
Expand All @@ -93,14 +103,15 @@ public final class LiveAudioConversationAsync {
private static TargetDataLine microphoneLine;
private static SourceDataLine speakerLine;
private static AsyncSession session;
private static ExecutorService micExecutor = Executors.newSingleThreadExecutor();
private static final ExecutorService micExecutor = Executors.newSingleThreadExecutor();
private static String promptString = null;
private static final ByteArrayOutputStream audioResponseBytes = new ByteArrayOutputStream();

/** Creates the parameters for sending an audio chunk. */
public static LiveSendRealtimeInputParameters createAudioContent(byte[] audioData) {

if (audioData == null) {
System.err.println("Error: Audio is null");
return null;
throw new IllegalArgumentException("Audio data cannot be null");
}

return LiveSendRealtimeInputParameters.builder()
Expand Down Expand Up @@ -155,31 +166,95 @@ public static void main(String[] args) throws LineUnavailableException {
System.out.println("Using Gemini Developer API");
}

String getModelFromArgs = null;
String voiceSamplePath = null;
String voiceConsentPath = null;
String voiceSignature = null;
promptString = null;

for (String arg : args) {
if (arg.startsWith("--model=")) {
getModelFromArgs = arg.substring("--model=".length());
} else if (arg.startsWith("--voice-sample=")) {
voiceSamplePath = arg.substring("--voice-sample=".length());
} else if (arg.startsWith("--voice-consent=")) {
voiceConsentPath = arg.substring("--voice-consent=".length());
} else if (arg.startsWith("--voice-signature=")) {
voiceSignature = arg.substring("--voice-signature=".length());
} else if (arg.startsWith("--prompt=")) {
promptString = arg.substring("--prompt=".length());
} else if (!arg.startsWith("--") && getModelFromArgs == null) {
getModelFromArgs = arg;
}
}

byte[] voiceSampleAudio = null;
byte[] consentAudio = null;

if (voiceSamplePath != null) {
try {
voiceSampleAudio = Files.readAllBytes(Paths.get(voiceSamplePath));
} catch (IOException e) {
throw new IllegalStateException("Failed to read voice sample", e);
}
if (voiceConsentPath == null && voiceSignature == null) {
throw new IllegalArgumentException(
"Either --voice-consent or --voice-signature must be provided when --voice-sample is"
+ " used.");
}
}
if (voiceConsentPath != null) {
try {
consentAudio = Files.readAllBytes(Paths.get(voiceConsentPath));
} catch (IOException e) {
throw new IllegalStateException("Failed to read voice consent", e);
}
}

final String modelId;
if (args.length != 0) {
modelId = args[0];
if (getModelFromArgs != null) {
modelId = getModelFromArgs;
} else if (client.vertexAI()) {
modelId = Constants.GEMINI_LIVE_MODEL_NAME;
} else {
modelId = Constants.GEMINI_LIVE_MODEL_NAME_PREVIEW;
}

// --- Audio Line Setup ---
microphoneLine = getMicrophoneLine();
speakerLine = getSpeakerLine();
if (promptString == null) {
microphoneLine = getMicrophoneLine();
speakerLine = getSpeakerLine();
}

// --- Live API Config for Audio ---
// Choice of ["Aoede", "Puck", "Charon", "Kore", "Fenrir", "Leda", "Orus", "Zephyr"]
String voiceName = "Aoede";

VoiceConfig.Builder voiceConfigBuilder = VoiceConfig.builder();
if (voiceSampleAudio != null) {
ReplicatedVoiceConfig.Builder repBuilder =
ReplicatedVoiceConfig.builder()
.mimeType("audio/wav")
.voiceSampleAudio(voiceSampleAudio);
if (consentAudio != null) {
repBuilder.consentAudio(consentAudio);
}
if (voiceSignature != null) {
repBuilder.voiceConsentSignature(
VoiceConsentSignature.builder().signature(voiceSignature));
}
voiceConfigBuilder.replicatedVoiceConfig(repBuilder);
} else {
voiceConfigBuilder.prebuiltVoiceConfig(
PrebuiltVoiceConfig.builder().voiceName(voiceName));
}

LiveConnectConfig config =
LiveConnectConfig.builder()
.responseModalities(Modality.Known.AUDIO)
.speechConfig(
SpeechConfig.builder()
.voiceConfig(
VoiceConfig.builder()
.prebuiltVoiceConfig(
PrebuiltVoiceConfig.builder().voiceName(voiceName)))
.voiceConfig(voiceConfigBuilder)
.languageCode("en-US"))
.realtimeInputConfig(
RealtimeInputConfig.builder()
Expand Down Expand Up @@ -215,8 +290,11 @@ public static void main(String[] args) throws LineUnavailableException {
System.out.println("Closing API session...");
session.close().get(5, TimeUnit.SECONDS); // Wait with timeout
System.out.println("API session closed.");
} catch (Exception e) {
} catch (ExecutionException | java.util.concurrent.TimeoutException e) {
System.err.println("Error closing API session: " + e.getMessage());
} catch (InterruptedException e) {
System.err.println("Interrupted while closing API session");
Thread.currentThread().interrupt();
}
}
// Close audio lines
Expand All @@ -232,19 +310,33 @@ public static void main(String[] args) throws LineUnavailableException {
session = client.async.live.connect(modelId, config).get();
System.out.println("Connected.");

// --- Start Audio Lines ---
microphoneLine.start();
speakerLine.start();
System.out.println("Microphone and speakers started. Speak now (Press Ctrl+C to exit)...");
if (session.setupComplete() != null && session.setupComplete().voiceConsentSignature().isPresent()) {
System.out.println(
"\n=== Voice Consent Signature Received ===\n"
+ session.setupComplete().voiceConsentSignature().get().signature().orElse("")
+ "\n========================================\n");
}

// --- Start Receiving Audio Responses ---
CompletableFuture<Void> receiveFuture =
session.receive(LiveAudioConversationAsync::handleAudioResponse);
System.err.println("Receive stream started."); // Add this line

// --- Start Sending Microphone Audio ---
CompletableFuture<Void> sendFuture =
CompletableFuture.runAsync(LiveAudioConversationAsync::sendMicrophoneAudio, micExecutor);
System.err.println("Receive stream started.");

CompletableFuture<Void> sendFuture;
if (promptString == null) {
// --- Start Audio Lines ---
microphoneLine.start();
speakerLine.start();
System.out.println("Microphone and speakers started. Speak now (Press Ctrl+C to exit)...");

// --- Start Sending Microphone Audio ---
sendFuture = CompletableFuture.runAsync(LiveAudioConversationAsync::sendMicrophoneAudio, micExecutor);
} else {
System.out.println("Sending prompt: " + promptString);
session.sendRealtimeInput(
LiveSendRealtimeInputParameters.builder().text(promptString).build()).get();
sendFuture = CompletableFuture.completedFuture(null);
}

// Keep the main thread alive. Wait for sending or receiving to finish (or
// error).
Expand Down Expand Up @@ -313,13 +405,19 @@ public static void handleAudioResponse(LiveServerMessage message) {
content -> {
// Handle interruptions from Gemini.
if (content.interrupted().orElse(false)) {
speakerLine.flush();
if (speakerLine != null && speakerLine.isOpen()) {
speakerLine.flush();
}
return; // Skip processing the rest of this message's audio.
}

// Handle Model turn completion.
if (content.turnComplete().orElse(false)) {
// The turn is over, no more audio will be sent for this turn.
if (promptString != null) {
saveWavFile();
System.out.println("Response received, exiting.");
System.exit(0);
}
return;
}

Expand All @@ -334,15 +432,45 @@ public static void handleAudioResponse(LiveServerMessage message) {
if (speakerLine != null && speakerLine.isOpen()) {
// Write audio data to the speaker
speakerLine.write(audioBytes, 0, audioBytes.length);
} else {
System.out.println(
"Received audio response chunk: " + audioBytes.length + " bytes.");
}
try {
audioResponseBytes.write(audioBytes);
} catch (IOException e) {
System.err.println("Failed to accumulate audio bytes: " + e.getMessage());
}
});

// If this is the last message of a generation, drain the buffer.
if (content.generationComplete().orElse(false)) {
speakerLine.drain();
if (speakerLine != null && speakerLine.isOpen()) {
speakerLine.drain();
}
}
});
}

private static void saveWavFile() {
byte[] audioData = audioResponseBytes.toByteArray();
if (audioData.length == 0) {
System.out.println("No audio data received to save.");
return;
}
try {
AudioInputStream ais = new AudioInputStream(
new ByteArrayInputStream(audioData),
SPEAKER_AUDIO_FORMAT,
audioData.length / SPEAKER_AUDIO_FORMAT.getFrameSize()
);
File outputFile = new File("output.wav");
AudioSystem.write(ais, AudioFileFormat.Type.WAVE, outputFile);
System.out.println("Saved audio response to " + outputFile.getAbsolutePath());
} catch (IOException e) {
System.err.println("Failed to save WAV file: " + e.getMessage());
}
}

private LiveAudioConversationAsync() {}
}
5 changes: 4 additions & 1 deletion src/main/java/com/google/genai/AsyncLive.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import com.google.genai.types.LiveConnectConfig;
import com.google.genai.types.LiveConnectParameters;
import com.google.genai.types.LiveServerMessage;
import com.google.genai.types.LiveServerSetupComplete;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
Expand Down Expand Up @@ -283,11 +284,13 @@ private void handleIncomingMessage(String message) {
try {
LiveServerMessage initialResponse = LiveServerMessage.fromJson(message);
if (initialResponse.setupComplete().isPresent()) {
LiveServerSetupComplete setupComplete = initialResponse.setupComplete().get();
sessionFuture.complete(
new AsyncSession(
apiClient,
this,
initialResponse.setupComplete().get().sessionId().orElse(null)));
setupComplete.sessionId().orElse(null),
setupComplete));
} else {
sessionFuture.completeExceptionally(
new GenAiIOException(
Expand Down
13 changes: 12 additions & 1 deletion src/main/java/com/google/genai/AsyncSession.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import com.google.genai.types.LiveClientContent;
import com.google.genai.types.LiveClientMessage;
import com.google.genai.types.LiveClientToolResponse;
import com.google.genai.types.LiveServerSetupComplete;
import com.google.genai.types.LiveSendClientContentParameters;
import com.google.genai.types.LiveSendRealtimeInputParameters;
import com.google.genai.types.LiveSendToolResponseParameters;
Expand All @@ -40,11 +41,17 @@ public final class AsyncSession {

private final AsyncLive.GenAiWebSocketClient websocket;
final String sessionId;
private final LiveServerSetupComplete setupComplete;

AsyncSession(ApiClient apiClient, AsyncLive.GenAiWebSocketClient websocket, String sessionId) {
AsyncSession(
ApiClient apiClient,
AsyncLive.GenAiWebSocketClient websocket,
String sessionId,
LiveServerSetupComplete setupComplete) {
this.apiClient = apiClient;
this.websocket = websocket;
this.sessionId = sessionId;
this.setupComplete = setupComplete;
}

/**
Expand Down Expand Up @@ -145,4 +152,8 @@ public CompletableFuture<Void> close() {
public String sessionId() {
return sessionId;
}

public LiveServerSetupComplete setupComplete() {
return setupComplete;
}
}
Loading
Loading