diff --git a/genai/snippets/pom.xml b/genai/snippets/pom.xml index a27c899bc68..d6f397b1f77 100644 --- a/genai/snippets/pom.xml +++ b/genai/snippets/pom.xml @@ -58,6 +58,11 @@ google-cloud-storage test + + com.openai + openai-java + 4.6.1 + junit junit diff --git a/genai/snippets/resources/hello_gemini_are_you_there.wav b/genai/snippets/resources/hello_gemini_are_you_there.wav new file mode 100644 index 00000000000..ef60adee2aa Binary files /dev/null and b/genai/snippets/resources/hello_gemini_are_you_there.wav differ diff --git a/genai/snippets/src/main/java/genai/live/LiveAudioWithTxt.java b/genai/snippets/src/main/java/genai/live/LiveAudioWithTxt.java new file mode 100644 index 00000000000..1c62f1fed4b --- /dev/null +++ b/genai/snippets/src/main/java/genai/live/LiveAudioWithTxt.java @@ -0,0 +1,170 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package genai.live; + +// [START googlegenaisdk_live_audio_with_txt] + +import static com.google.genai.types.Modality.Known.AUDIO; + +import com.google.genai.AsyncSession; +import com.google.genai.Client; +import com.google.genai.types.Blob; +import com.google.genai.types.Content; +import com.google.genai.types.LiveConnectConfig; +import com.google.genai.types.LiveSendClientContentParameters; +import com.google.genai.types.LiveServerContent; +import com.google.genai.types.LiveServerMessage; +import com.google.genai.types.Part; +import com.google.genai.types.PrebuiltVoiceConfig; +import com.google.genai.types.SpeechConfig; +import com.google.genai.types.VoiceConfig; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.concurrent.CompletableFuture; +import javax.sound.sampled.AudioFileFormat; +import javax.sound.sampled.AudioFormat; +import javax.sound.sampled.AudioInputStream; +import javax.sound.sampled.AudioSystem; + +public class LiveAudioWithTxt { + + public static void main(String[] args) { + // TODO(developer): Replace these variables before running the sample. + String modelId = "gemini-2.0-flash-live-preview-04-09"; + generateContent(modelId); + } + + // Shows how to get voice responses from text input. + public static void generateContent(String modelId) { + // Client Initialization. Once created, it can be reused for multiple requests. + try (Client client = Client.builder().location("us-central1").vertexAI(true).build()) { + + LiveConnectConfig liveConnectConfig = + LiveConnectConfig.builder() + .responseModalities(AUDIO) + .speechConfig( + SpeechConfig.builder() + .voiceConfig( + VoiceConfig.builder() + .prebuiltVoiceConfig( + PrebuiltVoiceConfig.builder().voiceName("Aoede").build()) + .build()) + .build()) + .build(); + + // Connects to the live server. + CompletableFuture sessionFuture = + client.async.live.connect(modelId, liveConnectConfig); + + // Sends content and receives response from the live server. + sessionFuture + .thenCompose( + session -> { + // A future that completes when the model signals the end of its turn. + CompletableFuture turnComplete = new CompletableFuture<>(); + // A buffer to collect all incoming audio chunks. + ByteArrayOutputStream audioBuffer = new ByteArrayOutputStream(); + // Starts receiving messages from the live session. + session.receive( + message -> handleLiveServerMessage(message, turnComplete, audioBuffer)); + // Sends content to the live session and waits for the turn to complete. + return sendContent(session) + .thenCompose(unused -> turnComplete) + .thenAccept( + unused -> { + byte[] audio = audioBuffer.toByteArray(); + if (audio.length > 0) { + saveAudioToFile(audio); + } + }) + .thenCompose(unused -> session.close()); + }) + .join(); + // Example response: + // > Answer to this audio url + // Successfully saved audio to... + } + } + + // Sends content to the live session. + private static CompletableFuture sendContent(AsyncSession session) { + String textInput = "Hello? Gemini, are you there?"; + System.out.printf("> %s\n", textInput); + return session.sendClientContent( + LiveSendClientContentParameters.builder() + .turns(Content.builder().role("user").parts(Part.fromText(textInput)).build()) + .turnComplete(true) + .build()); + } + + // Writes the inline data response to the audio buffer and signals + // `turnComplete` when the model is done generating the response. + private static void handleLiveServerMessage( + LiveServerMessage message, + CompletableFuture turnComplete, + ByteArrayOutputStream audioBuffer) { + message + .serverContent() + .flatMap(LiveServerContent::modelTurn) + .flatMap(Content::parts) + .ifPresent( + parts -> + parts.forEach( + part -> { + // When an audio blob is present, write its data to the buffer. + part.inlineData() + .flatMap(Blob::data) + .ifPresent( + data -> { + try { + audioBuffer.write(data); + } catch (IOException e) { + System.out.println( + "Error writing to audio buffer: " + e.getMessage()); + } + }); + })); + + // Checks if the model's turn is over. + if (message.serverContent().flatMap(LiveServerContent::turnComplete).orElse(false)) { + turnComplete.complete(null); + } + } + + private static void saveAudioToFile(byte[] audioData) { + try { + // Defines the audio format. + AudioFormat format = new AudioFormat(24000, 16, 1, true, false); + // Creates an AudioInputStream from the raw audio data and the format. + AudioInputStream audioStream = + new AudioInputStream( + new ByteArrayInputStream(audioData), + format, + audioData.length / format.getFrameSize()); + + Path outputPath = Paths.get("resources/output/output_audio.wav"); + AudioSystem.write(audioStream, AudioFileFormat.Type.WAVE, outputPath.toFile()); + System.out.println("Successfully saved audio to: " + outputPath.toAbsolutePath()); + } catch (IOException e) { + System.err.println("Error saving audio file: " + e.getMessage()); + } + } +} +// [END googlegenaisdk_live_audio_with_txt] diff --git a/genai/snippets/src/main/java/genai/live/LiveConversationAudioWithAudio.java b/genai/snippets/src/main/java/genai/live/LiveConversationAudioWithAudio.java new file mode 100644 index 00000000000..c7714eea611 --- /dev/null +++ b/genai/snippets/src/main/java/genai/live/LiveConversationAudioWithAudio.java @@ -0,0 +1,191 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package genai.live; + +// [START googlegenaisdk_live_conversation_audio_with_audio] + +import static com.google.genai.types.Modality.Known.AUDIO; + +import com.google.genai.AsyncSession; +import com.google.genai.Client; +import com.google.genai.types.AudioTranscriptionConfig; +import com.google.genai.types.Blob; +import com.google.genai.types.Content; +import com.google.genai.types.HttpOptions; +import com.google.genai.types.LiveConnectConfig; +import com.google.genai.types.LiveSendRealtimeInputParameters; +import com.google.genai.types.LiveServerMessage; +import com.google.genai.types.Transcription; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.concurrent.CompletableFuture; +import javax.sound.sampled.AudioFileFormat; +import javax.sound.sampled.AudioFormat; +import javax.sound.sampled.AudioInputStream; +import javax.sound.sampled.AudioSystem; + +public class LiveConversationAudioWithAudio { + + public static void main(String[] args) throws IOException { + // TODO(developer): Replace these variables before running the sample. + String modelId = "gemini-live-2.5-flash-preview-native-audio-09-2025"; + generateContent(modelId); + } + + // Shows how to get an audio response from an audio input. + public static void generateContent(String modelId) throws IOException { + // Client Initialization. Once created, it can be reused for multiple requests. + try (Client client = + Client.builder() + .location("us-central1") + .vertexAI(true) + .httpOptions(HttpOptions.builder().apiVersion("v1beta1").build()) + .build()) { + + // Reads the local audio file. + byte[] audioBytes = Files.readAllBytes(Paths.get("resources/hello_gemini_are_you_there.wav")); + + LiveConnectConfig liveConnectConfig = + LiveConnectConfig.builder() + // Set Model responses to be in Audio. + .responseModalities(AUDIO) + // To generate transcript for input audio. + .inputAudioTranscription(AudioTranscriptionConfig.builder().build()) + // To generate transcript for output audio + .outputAudioTranscription(AudioTranscriptionConfig.builder().build()) + .build(); + + // Connects to the live server. + CompletableFuture sessionFuture = + client.async.live.connect(modelId, liveConnectConfig); + + // Sends content and receives response from the live server. + sessionFuture + .thenCompose( + session -> { + // A future that completes when the model signals the end of its turn. + CompletableFuture turnComplete = new CompletableFuture<>(); + // A buffer to collect all incoming audio chunks. + ByteArrayOutputStream audioBuffer = new ByteArrayOutputStream(); + // Starts receiving messages from the live session. + session.receive( + message -> handleLiveServerMessage(message, turnComplete, audioBuffer)); + // Sends content to the live session and waits for the turn to complete. + return sendAudio(session, audioBytes) + .thenCompose(unused -> turnComplete) + .thenAccept( + unused -> { + byte[] audio = audioBuffer.toByteArray(); + if (audio.length > 0) { + saveAudioToFile(audio); + } + }) + .thenCompose(unused -> session.close()); + }) + .join(); + // Example output: + // Input transcription: Hello + // Input transcription: . + // Output transcription: Hello there! + // Output transcription: How can + // Output transcription: I help + // Output transcription: you today? + // Successfully saved audio to... + } + } + + // Sends content to the live session. + private static CompletableFuture sendAudio(AsyncSession session, byte[] audioBytes) { + return session.sendRealtimeInput( + LiveSendRealtimeInputParameters.builder() + .audio(Blob.builder().data(audioBytes).mimeType("audio/pcm;rate=16000").build()) + .build()); + } + + // Prints the transcription and writes the inline data response to the audio buffer. + // Signals `turnComplete` when the model is done generating the response. + private static void handleLiveServerMessage( + LiveServerMessage message, + CompletableFuture turnComplete, + ByteArrayOutputStream audioBuffer) { + + message + .serverContent() + .ifPresent( + serverContent -> { + serverContent + .inputTranscription() + .flatMap(Transcription::text) + .ifPresent(text -> System.out.println("Input transcription: " + text)); + + serverContent + .outputTranscription() + .flatMap(Transcription::text) + .ifPresent(text -> System.out.println("Output transcription: " + text)); + + serverContent + .modelTurn() + .flatMap(Content::parts) + .ifPresent( + parts -> + parts.forEach( + part -> { + // When an audio blob is present, write its data to the buffer. + part.inlineData() + .flatMap(Blob::data) + .ifPresent( + data -> { + try { + audioBuffer.write(data); + } catch (IOException e) { + System.out.println( + "Error writing to audio buffer: " + e.getMessage()); + } + }); + })); + + // Checks if the model's turn is over. + if (serverContent.turnComplete().orElse(false)) { + turnComplete.complete(null); + } + }); + } + + private static void saveAudioToFile(byte[] audioData) { + try { + // Defines the audio format. + AudioFormat format = new AudioFormat(24000, 16, 1, true, false); + // Creates an AudioInputStream from the raw audio data and the format. + AudioInputStream audioStream = + new AudioInputStream( + new ByteArrayInputStream(audioData), + format, + audioData.length / format.getFrameSize()); + + Path outputPath = Paths.get("resources/output/example_model_response.wav"); + AudioSystem.write(audioStream, AudioFileFormat.Type.WAVE, outputPath.toFile()); + System.out.println("Successfully saved audio to: " + outputPath.toAbsolutePath()); + } catch (IOException e) { + System.err.println("Error saving audio file: " + e.getMessage()); + } + } +} +// [END googlegenaisdk_live_conversation_audio_with_audio] diff --git a/genai/snippets/src/main/java/genai/live/LiveFuncCallWithTxt.java b/genai/snippets/src/main/java/genai/live/LiveFuncCallWithTxt.java new file mode 100644 index 00000000000..75cf06d36dd --- /dev/null +++ b/genai/snippets/src/main/java/genai/live/LiveFuncCallWithTxt.java @@ -0,0 +1,140 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package genai.live; + +// [START googlegenaisdk_live_func_call_with_txt] + +import static com.google.genai.types.Modality.Known.TEXT; + +import com.google.genai.AsyncSession; +import com.google.genai.Client; +import com.google.genai.types.Content; +import com.google.genai.types.FunctionDeclaration; +import com.google.genai.types.FunctionResponse; +import com.google.genai.types.LiveConnectConfig; +import com.google.genai.types.LiveSendClientContentParameters; +import com.google.genai.types.LiveSendToolResponseParameters; +import com.google.genai.types.LiveServerContent; +import com.google.genai.types.LiveServerMessage; +import com.google.genai.types.LiveServerToolCall; +import com.google.genai.types.Part; +import com.google.genai.types.Tool; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CompletableFuture; + +public class LiveFuncCallWithTxt { + + public static void main(String[] args) { + // TODO(developer): Replace these variables before running the sample + String modelId = "gemini-2.0-flash-live-preview-04-09"; + generateContent(modelId); + } + + // Shows how to use function calling with the Live API. + public static void generateContent(String modelId) { + // Client Initialization. Once created, it can be reused for multiple requests. + try (Client client = Client.builder().location("us-central1").vertexAI(true).build()) { + + // Function definitions. + List functionDeclarations = + List.of( + FunctionDeclaration.builder().name("turn_on_the_lights").build(), + FunctionDeclaration.builder().name("turn_off_the_lights").build()); + + LiveConnectConfig liveConnectConfig = + LiveConnectConfig.builder() + .responseModalities(TEXT) + .tools(Tool.builder().functionDeclarations(functionDeclarations).build()) + .build(); + + // Connects to the live server. + CompletableFuture sessionFuture = + client.async.live.connect(modelId, liveConnectConfig); + + // Sends content and receives response from the live session. + sessionFuture + .thenCompose( + session -> { + // A future that completes when the model signals the end of its turn. + CompletableFuture turnComplete = new CompletableFuture<>(); + // Starts receiving messages from the live session. + session.receive(message -> handleFunctionCall(message, turnComplete, session)); + // Sends content to the live session and waits for the turn to complete. + return sendContent(session) + .thenCompose(unused -> turnComplete) + .thenCompose(unused -> session.close()); + }) + .join(); + // Example response: + // > Turn off the lights please + // Function name: turn_off_the_lights + // Optional[{result=ok}] + } + } + + // Sends content to the live session. + private static CompletableFuture sendContent(AsyncSession session) { + String textInput = "Turn off the lights please"; + System.out.printf("> %s\n", textInput); + return session.sendClientContent( + LiveSendClientContentParameters.builder() + .turns(Content.builder().role("user").parts(Part.fromText(textInput)).build()) + .turnComplete(true) + .build()); + } + + // Handles function call response from the live session and signals + // `turnComplete` when the model is done generating the response. + private static void handleFunctionCall( + LiveServerMessage message, CompletableFuture turnComplete, AsyncSession session) { + message + .toolCall() + .flatMap(LiveServerToolCall::functionCalls) + .ifPresent( + functionCalls -> { + List functionResponses = new ArrayList<>(); + functionCalls.forEach( + functionCall -> + functionCall + .name() + .ifPresent( + functionName -> { + System.out.println("Function name: " + functionName); + FunctionResponse functionResponse = + FunctionResponse.builder() + .name(functionName) + .response(Map.of("result", "ok")) + .build(); + functionResponses.add(functionResponse); + System.out.println(functionResponse.response()); + })); + // Send the results of all executed functions back to the model. + session.sendToolResponse( + LiveSendToolResponseParameters.builder() + .functionResponses(functionResponses) + .build()); + }); + + // Checks if the model's turn is over. + if (message.serverContent().flatMap(LiveServerContent::turnComplete).orElse(false)) { + turnComplete.complete(null); + } + } +} +// [END googlegenaisdk_live_func_call_with_txt] diff --git a/genai/snippets/src/main/java/genai/live/LiveGroundRagEngineWithTxt.java b/genai/snippets/src/main/java/genai/live/LiveGroundRagEngineWithTxt.java new file mode 100644 index 00000000000..608793955de --- /dev/null +++ b/genai/snippets/src/main/java/genai/live/LiveGroundRagEngineWithTxt.java @@ -0,0 +1,126 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package genai.live; + +// [START googlegenaisdk_live_ground_ragengine_with_txt] + +import com.google.genai.AsyncSession; +import com.google.genai.Client; +import com.google.genai.types.Content; +import com.google.genai.types.LiveConnectConfig; +import com.google.genai.types.LiveSendClientContentParameters; +import com.google.genai.types.LiveServerContent; +import com.google.genai.types.LiveServerMessage; +import com.google.genai.types.Part; +import com.google.genai.types.Retrieval; +import com.google.genai.types.Tool; +import com.google.genai.types.VertexRagStore; +import com.google.genai.types.VertexRagStoreRagResource; +import java.util.concurrent.CompletableFuture; + +public class LiveGroundRagEngineWithTxt { + + public static void main(String[] args) { + // TODO(developer): Replace these variables before running the sample + String modelId = "gemini-2.0-flash-live-preview-04-09"; + String ragCorpus = "projects/{project}/locations/{location}/ragCorpora/{rag_corpus}"; + generateContent(modelId, ragCorpus); + } + + // Shows how to use Vertex AI RAG Engine for grounding and the Live API. + public static String generateContent(String modelId, String ragCorpus) { + // Client Initialization. Once created, it can be reused for multiple requests. + try (Client client = Client.builder().location("us-central1").vertexAI(true).build()) { + + // Sets the Vertex RAG Store for grounding + VertexRagStore vertexRagStore = + VertexRagStore.builder() + .ragResources(VertexRagStoreRagResource.builder().ragCorpus(ragCorpus).build()) + .storeContext(true) + .build(); + + LiveConnectConfig liveConnectConfig = + LiveConnectConfig.builder() + .responseModalities("TEXT") + .tools( + Tool.builder() + .retrieval(Retrieval.builder().vertexRagStore(vertexRagStore).build()) + .build()) + .build(); + + // Connects to the live server. + CompletableFuture sessionFuture = + client.async.live.connect(modelId, liveConnectConfig); + + // Sends content and receives response from the live session. + CompletableFuture responseFuture = + sessionFuture.thenCompose( + session -> { + // A future that completes when the model signals the end of its turn. + CompletableFuture turnComplete = new CompletableFuture<>(); + // A variable to concatenate the text responses from model. + StringBuilder serverResponse = new StringBuilder(); + // Starts receiving messages from the live session. + session.receive( + message -> handleLiveServerMessage(message, turnComplete, serverResponse)); + // Sends content to the live session and waits for the turn to complete. + return sendContent(session) + .thenCompose(unused -> turnComplete) + .thenCompose( + unused -> session.close().thenApply(result -> serverResponse.toString())); + }); + + String response = responseFuture.join(); + System.out.println(response); + // Example response: + // > What are the newest gemini model? + // The newest Gemini model was launched in December 2023. + // It is a multimodal model that understands and combines different + // types of information like text, code, audio, images, and video. + return response; + } + } + + // Sends content to the live session. + private static CompletableFuture sendContent(AsyncSession session) { + String textInput = "What are the newest gemini model?"; + System.out.printf("> %s\n", textInput); + return session.sendClientContent( + LiveSendClientContentParameters.builder() + .turns(Content.builder().role("user").parts(Part.fromText(textInput)).build()) + .turnComplete(true) + .build()); + } + + // Concatenates the response messages from the model and signals + // `turnComplete` when the model is done generating the response. + private static void handleLiveServerMessage( + LiveServerMessage message, + CompletableFuture turnComplete, + StringBuilder serverResponse) { + message + .serverContent() + .flatMap(LiveServerContent::modelTurn) + .flatMap(Content::parts) + .ifPresent(parts -> parts.forEach(part -> part.text().ifPresent(serverResponse::append))); + // Checks if the model's turn is over. + if (message.serverContent().flatMap(LiveServerContent::turnComplete).orElse(false)) { + turnComplete.complete(null); + } + } +} +// [END googlegenaisdk_live_ground_ragengine_with_txt] diff --git a/genai/snippets/src/main/java/genai/live/LiveStructuredOutputWithTxt.java b/genai/snippets/src/main/java/genai/live/LiveStructuredOutputWithTxt.java new file mode 100644 index 00000000000..0b73644da8d --- /dev/null +++ b/genai/snippets/src/main/java/genai/live/LiveStructuredOutputWithTxt.java @@ -0,0 +1,97 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package genai.live; + +// [START googlegenaisdk_live_structured_output_with_txt] + +import com.google.auth.oauth2.GoogleCredentials; +import com.openai.client.OpenAIClient; +import com.openai.client.okhttp.OpenAIOkHttpClient; +import com.openai.models.chat.completions.ChatCompletionCreateParams; +import com.openai.models.chat.completions.StructuredChatCompletion; +import java.io.IOException; +import java.security.GeneralSecurityException; +import java.util.List; +import java.util.Optional; + +public class LiveStructuredOutputWithTxt { + + public static class CalendarEvent { + public String name; + public String date; + public List participants; + + @Override + public String toString() { + return "name=" + name + " date=" + date + " participants=" + participants; + } + } + + public static void main(String[] args) throws GeneralSecurityException, IOException { + // TODO(developer): Replace these variables before running the sample + String projectId = "your-project-id"; + String location = "us-central1"; + // If you are calling a Gemini model, set the endpointId variable to use openapi. + // If you are calling a self-deployed model from Model Garden, set the endpointId variable + // and set the client's base URL to use your endpoint. + String endpointId = "openapi"; + generateContent(projectId, location, endpointId); + } + + // Shows how to use structured output using the OpenAI client. + public static Optional generateContent( + String projectId, String location, String endpointId) + throws GeneralSecurityException, IOException { + + // Programmatically get an access token for authentication. + GoogleCredentials credential = + GoogleCredentials.getApplicationDefault() + .createScoped(List.of("https://www.googleapis.com/auth/cloud-platform")); + + OpenAIClient client = + OpenAIOkHttpClient.builder() + .baseUrl( + String.format( + "https://%s-aiplatform.googleapis.com/v1/projects/%s/locations/%s/endpoints/%s", + location, projectId, location, endpointId)) + .apiKey(credential.refreshAccessToken().getTokenValue()) + .build(); + + // Creates and sends the chat completion request. + StructuredChatCompletion chatCompletion = + client + .chat() + .completions() + .create( + ChatCompletionCreateParams.builder() + .model("google/gemini-2.0-flash-001") + .addSystemMessage("Extract the event information.") + .addUserMessage("Alice and Bob are going to a science fair on Friday.") + .responseFormat(CalendarEvent.class) + .build()); + + Optional calendarEvent = + chatCompletion.choices().stream().findFirst().flatMap(choice -> choice.message().content()); + + calendarEvent.ifPresent(System.out::println); + // System message: Extract the event information. + // User message: Alice and Bob are going to a science fair on Friday. + // output message: name=science fair date=Friday participants=[Alice, Bob] + return calendarEvent; + } +} +// [END googlegenaisdk_live_structured_output_with_txt] diff --git a/genai/snippets/src/main/java/genai/live/LiveTxtWithAudio.java b/genai/snippets/src/main/java/genai/live/LiveTxtWithAudio.java new file mode 100644 index 00000000000..2f7cc6d679b --- /dev/null +++ b/genai/snippets/src/main/java/genai/live/LiveTxtWithAudio.java @@ -0,0 +1,128 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package genai.live; + +// [START googlegenaisdk_live_txt_with_audio] + +import static com.google.genai.types.Modality.Known.TEXT; + +import com.google.genai.AsyncSession; +import com.google.genai.Client; +import com.google.genai.types.Blob; +import com.google.genai.types.Content; +import com.google.genai.types.LiveConnectConfig; +import com.google.genai.types.LiveSendRealtimeInputParameters; +import com.google.genai.types.LiveServerContent; +import com.google.genai.types.LiveServerMessage; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.util.concurrent.CompletableFuture; + +public class LiveTxtWithAudio { + + public static void main(String[] args) throws IOException { + // TODO(developer): Replace these variables before running the sample. + String modelId = "gemini-2.0-flash-live-preview-04-09"; + generateContent(modelId); + } + + // Shows how to get text responses from audio input. + public static String generateContent(String modelId) throws IOException { + // Client Initialization. Once created, it can be reused for multiple requests. + try (Client client = Client.builder().location("us-central1").vertexAI(true).build()) { + + String audioUrl = "https://storage.googleapis.com/generativeai-downloads/data/16000.wav"; + byte[] audioBytes = downloadAudioFile(audioUrl); + System.out.printf("> Answer to this audio url %s\n", audioUrl); + + // Connects to the live server. + CompletableFuture sessionFuture = + client.async.live.connect( + modelId, LiveConnectConfig.builder().responseModalities(TEXT).build()); + + // Sends content and receives response from the live session. + CompletableFuture responseFuture = + sessionFuture.thenCompose( + session -> { + // A future that completes when the model signals the end of its turn. + CompletableFuture turnComplete = new CompletableFuture<>(); + // A variable to concatenate the text response from the model. + StringBuilder serverResponse = new StringBuilder(); + // Starts receiving messages from the live session. + session.receive( + message -> handleLiveServerMessage(message, turnComplete, serverResponse)); + // Sends content to the live session and waits for the turn to complete. + return sendContent(session, audioBytes) + .thenCompose(unused -> turnComplete) + .thenCompose( + unused -> session.close().thenApply(result -> serverResponse.toString())); + }); + + String response = responseFuture.join(); + System.out.println(response); + // Example response: + // > Answer to this audio url + // https://storage.googleapis.com/generativeai-downloads/data/16000.wav + // + // Yeah, I can hear you loud and clear. What's on your mind? + return response; + } + } + + // Download the audio file and return a byte array. + private static byte[] downloadAudioFile(String audioUrl) throws IOException { + URL url = new URL(audioUrl); + try (InputStream in = url.openStream(); + ByteArrayOutputStream out = new ByteArrayOutputStream()) { + byte[] buffer = new byte[1024]; + int bytesRead; + while ((bytesRead = in.read(buffer)) != -1) { + out.write(buffer, 0, bytesRead); + } + return out.toByteArray(); + } + } + + // Sends content to the live session. + private static CompletableFuture sendContent(AsyncSession session, byte[] audioBytes) { + return session.sendRealtimeInput( + LiveSendRealtimeInputParameters.builder() + .media(Blob.builder().data(audioBytes).mimeType("audio/pcm;rate=16000").build()) + .build()); + } + + // Concatenates the response messages from the model and signals + // `turnComplete` when the model is done generating the response. + private static void handleLiveServerMessage( + LiveServerMessage message, + CompletableFuture turnComplete, + StringBuilder serverResponse) { + message + .serverContent() + .flatMap(LiveServerContent::modelTurn) + .flatMap(Content::parts) + .ifPresent(parts -> parts.forEach(part -> part.text().ifPresent(serverResponse::append))); + + // Checks if the model's turn is over. + if (message.serverContent().flatMap(LiveServerContent::turnComplete).orElse(false)) { + turnComplete.complete(null); + } + } +} +// [END googlegenaisdk_live_txt_with_audio] diff --git a/genai/snippets/src/test/java/genai/live/LiveIT.java b/genai/snippets/src/test/java/genai/live/LiveIT.java index cc8226eb233..06c3934c7dc 100644 --- a/genai/snippets/src/test/java/genai/live/LiveIT.java +++ b/genai/snippets/src/test/java/genai/live/LiveIT.java @@ -18,15 +18,40 @@ import static com.google.common.truth.Truth.assertThat; import static com.google.common.truth.Truth.assertWithMessage; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.RETURNS_SELF; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockStatic; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; +import com.google.genai.AsyncLive; +import com.google.genai.AsyncSession; +import com.google.genai.Client; +import com.google.genai.types.Content; +import com.google.genai.types.LiveConnectConfig; +import com.google.genai.types.LiveServerContent; +import com.google.genai.types.LiveServerMessage; +import com.google.genai.types.Part; import java.io.ByteArrayOutputStream; +import java.io.IOException; import java.io.PrintStream; +import java.lang.reflect.Field; +import java.nio.file.Paths; +import java.security.GeneralSecurityException; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.function.Consumer; import org.junit.After; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; +import org.mockito.MockedStatic; @RunWith(JUnit4.class) public class LiveIT { @@ -34,6 +59,8 @@ public class LiveIT { private static final String GEMINI_FLASH_LIVE_PREVIEW = "gemini-2.0-flash-live-preview-04-09"; private static final String GEMINI_FLASH_LIVE_PREVIEW_NATIVE_AUDIO = "gemini-live-2.5-flash-preview-native-audio"; + private static final String GEMINI_FLASH_LIVE_PREVIEW_NATIVE_AUDIO_09_2025 = + "gemini-live-2.5-flash-preview-native-audio-09-2025"; private ByteArrayOutputStream bout; private PrintStream out; @@ -62,18 +89,131 @@ public void tearDown() { bout.reset(); } + @Test + public void testLiveAudioWithTxt() { + LiveAudioWithTxt.generateContent(GEMINI_FLASH_LIVE_PREVIEW); + String output = bout.toString(); + assertThat(output).contains("> Hello? Gemini, are you there?"); + assertThat(output).contains("Successfully saved audio to: "); + } + @Test public void testLiveCodeExecWithTxt() { String response = LiveCodeExecWithTxt.generateContent(GEMINI_FLASH_LIVE_PREVIEW); assertThat(response).isNotEmpty(); } + @Test + public void testLiveConversationAudioWithAudio() throws IOException { + LiveConversationAudioWithAudio.generateContent(GEMINI_FLASH_LIVE_PREVIEW_NATIVE_AUDIO_09_2025); + String output = bout.toString(); + assertThat(output).contains("Input transcription:"); + assertThat(output).contains("Output transcription:"); + assertThat(output).contains("Successfully saved audio to:"); + } + + @Test + public void testLiveFuncCallWithTxt() { + LiveFuncCallWithTxt.generateContent(GEMINI_FLASH_LIVE_PREVIEW); + String output = bout.toString(); + assertThat(output).contains("> Turn off the lights please"); + assertThat(output).contains("Function name: turn_off_the_lights"); + assertThat(output).contains("result=ok"); + } + @Test public void testLiveGroundGoogSearchWithTxt() { String response = LiveGroundGoogSearchWithTxt.generateContent(GEMINI_FLASH_LIVE_PREVIEW); assertThat(response).isNotEmpty(); } + @Test + public void testLiveGroundRagEngineWithTxt() throws NoSuchFieldException, IllegalAccessException { + + Client.Builder mockedBuilder = mock(Client.Builder.class, RETURNS_SELF); + Client mockedClient = mock(Client.class); + Client.Async mockedAsync = mock(Client.Async.class); + AsyncLive mockedLive = mock(AsyncLive.class); + AsyncSession mockedSession = mock(AsyncSession.class); + + try (MockedStatic mockedStatic = mockStatic(Client.class)) { + mockedStatic.when(Client::builder).thenReturn(mockedBuilder); + when(mockedBuilder.build()).thenReturn(mockedClient); + + // Using reflection because async and live are final fields and cannot be mocked. + Field asyncField = Client.class.getDeclaredField("async"); + asyncField.setAccessible(true); + asyncField.set(mockedClient, mockedAsync); + + Field liveField = Client.Async.class.getDeclaredField("live"); + liveField.setAccessible(true); + liveField.set(mockedAsync, mockedLive); + + when(mockedClient.async.live.connect(anyString(), any(LiveConnectConfig.class))) + .thenReturn(CompletableFuture.completedFuture(mockedSession)); + + when(mockedSession.sendClientContent(any())) + .thenReturn(CompletableFuture.completedFuture(null)); + + when(mockedSession.close()).thenReturn(CompletableFuture.completedFuture(null)); + + // Simulates the server's behavior + doAnswer( + invocation -> { + LiveServerMessage textMessage = mock(LiveServerMessage.class); + LiveServerContent textServerContent = mock(LiveServerContent.class); + Content textContent = mock(Content.class); + Part textPart = mock(Part.class); + + // Sends a text message. + when(textMessage.serverContent()).thenReturn(Optional.of(textServerContent)); + when(textServerContent.modelTurn()).thenReturn(Optional.of(textContent)); + when(textContent.parts()).thenReturn(Optional.of(List.of(textPart))); + when(textPart.text()).thenReturn(Optional.of("The newest model is Gemini.")); + // The turn is not complete yet in this message. + when(textServerContent.turnComplete()).thenReturn(Optional.of(false)); + + // Gets the message handler. + Consumer messageHandler = invocation.getArgument(0); + // Sends the message to the message handler. + messageHandler.accept(textMessage); + + // Simulates server sending the final "turn complete" message. + LiveServerMessage completeMessage = mock(LiveServerMessage.class); + LiveServerContent completeServerContent = mock(LiveServerContent.class); + + when(completeMessage.serverContent()) + .thenReturn(Optional.of(completeServerContent)); + when(completeServerContent.modelTurn()).thenReturn(Optional.empty()); + // The turn is complete. + when(completeServerContent.turnComplete()).thenReturn(Optional.of(true)); + messageHandler.accept(completeMessage); + return null; + }) + .when(mockedSession) + .receive(any()); + + String response = + LiveGroundRagEngineWithTxt.generateContent(GEMINI_FLASH_LIVE_PREVIEW, "test-rag-corpus"); + + assertThat(response).contains("The newest model is Gemini"); + verify(mockedSession).close(); + } + } + + @Test + public void testLiveStructuredOutputWithTxt() throws GeneralSecurityException, IOException { + Optional response = + LiveStructuredOutputWithTxt.generateContent( + System.getenv("GOOGLE_CLOUD_PROJECT"), "us-central1", "openapi"); + assertThat(response).isPresent(); + assertThat(response.get().name).isNotEmpty(); + assertThat(response.get().date).isNotEmpty(); + assertThat(response.get().participants).isNotEmpty(); + String output = bout.toString(); + assertThat(output).contains("name=science fair date=Friday participants=[Alice, Bob]"); + } + @Test public void testLiveTranscribeWithAudio() { String response = @@ -81,6 +221,12 @@ public void testLiveTranscribeWithAudio() { assertThat(response).isNotEmpty(); } + @Test + public void testLiveTxtWithAudio() throws IOException { + String response = LiveTxtWithAudio.generateContent(GEMINI_FLASH_LIVE_PREVIEW); + assertThat(response).isNotEmpty(); + } + @Test public void testLiveWithTxt() { String response = LiveWithTxt.generateContent(GEMINI_FLASH_LIVE_PREVIEW);