Skip to content

Commit 2e51d4d

Browse files
Google APIscopybara-github
authored andcommitted
feat: add data foundry service synthetic data generation API for v1
PiperOrigin-RevId: 796923069
1 parent d869249 commit 2e51d4d

File tree

3 files changed

+147
-0
lines changed

3 files changed

+147
-0
lines changed

google/cloud/aiplatform/v1/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ proto_library(
4040
"content.proto",
4141
"context.proto",
4242
"custom_job.proto",
43+
"data_foundry_service.proto",
4344
"data_item.proto",
4445
"data_labeling_job.proto",
4546
"dataset.proto",
@@ -229,6 +230,7 @@ java_gapic_library(
229230
java_gapic_test(
230231
name = "aiplatform_java_gapic_test_suite",
231232
test_classes = [
233+
"com.google.cloud.aiplatform.v1.DataFoundryServiceClientTest",
232234
"com.google.cloud.aiplatform.v1.DatasetServiceClientTest",
233235
"com.google.cloud.aiplatform.v1.DeploymentResourcePoolServiceClientTest",
234236
"com.google.cloud.aiplatform.v1.EndpointServiceClientTest",

google/cloud/aiplatform/v1/aiplatform_v1.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ name: aiplatform.googleapis.com
44
title: Vertex AI API
55

66
apis:
7+
- name: google.cloud.aiplatform.v1.DataFoundryService
78
- name: google.cloud.aiplatform.v1.DatasetService
89
- name: google.cloud.aiplatform.v1.DeploymentResourcePoolService
910
- name: google.cloud.aiplatform.v1.EndpointService
@@ -739,6 +740,10 @@ http:
739740

740741
authentication:
741742
rules:
743+
- selector: google.cloud.aiplatform.v1.DataFoundryService.GenerateSyntheticData
744+
oauth:
745+
canonical_scopes: |-
746+
https://www.googleapis.com/auth/cloud-platform
742747
- selector: 'google.cloud.aiplatform.v1.DatasetService.*'
743748
oauth:
744749
canonical_scopes: |-
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
syntax = "proto3";
16+
17+
package google.cloud.aiplatform.v1;
18+
19+
import "google/api/annotations.proto";
20+
import "google/api/client.proto";
21+
import "google/api/field_behavior.proto";
22+
import "google/api/resource.proto";
23+
import "google/cloud/aiplatform/v1/content.proto";
24+
25+
option csharp_namespace = "Google.Cloud.AIPlatform.V1";
26+
option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
27+
option java_multiple_files = true;
28+
option java_outer_classname = "DataFoundryServiceProto";
29+
option java_package = "com.google.cloud.aiplatform.v1";
30+
option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
31+
option ruby_package = "Google::Cloud::AIPlatform::V1";
32+
33+
// Service for generating and preparing datasets for Gen AI evaluation.
34+
service DataFoundryService {
35+
option (google.api.default_host) = "aiplatform.googleapis.com";
36+
option (google.api.oauth_scopes) =
37+
"https://www.googleapis.com/auth/cloud-platform";
38+
39+
// Generates synthetic data based on the provided configuration.
40+
rpc GenerateSyntheticData(GenerateSyntheticDataRequest)
41+
returns (GenerateSyntheticDataResponse) {
42+
option (google.api.http) = {
43+
post: "/v1/{location=projects/*/locations/*}:generateSyntheticData"
44+
body: "*"
45+
};
46+
}
47+
}
48+
49+
// Request message for DataFoundryService.GenerateSyntheticData.
50+
message GenerateSyntheticDataRequest {
51+
// The generation strategy to use.
52+
oneof strategy {
53+
// Generate data from a high-level task description.
54+
TaskDescriptionStrategy task_description = 3;
55+
}
56+
57+
// Required. The resource name of the Location to run the job.
58+
// Format: `projects/{project}/locations/{location}`
59+
string location = 1 [
60+
(google.api.field_behavior) = REQUIRED,
61+
(google.api.resource_reference) = {
62+
type: "locations.googleapis.com/Location"
63+
}
64+
];
65+
66+
// Required. The number of synthetic examples to generate.
67+
// For this stateless API, the count is limited to a small number.
68+
int32 count = 2 [(google.api.field_behavior) = REQUIRED];
69+
70+
// Required. The schema of the desired output, defined by a list of fields.
71+
repeated OutputFieldSpec output_field_specs = 4
72+
[(google.api.field_behavior) = REQUIRED];
73+
74+
// Optional. A list of few-shot examples to guide the model's output style
75+
// and format.
76+
repeated SyntheticExample examples = 5
77+
[(google.api.field_behavior) = OPTIONAL];
78+
}
79+
80+
// Represents a single named field within a SyntheticExample.
81+
message SyntheticField {
82+
// Optional. The name of the field.
83+
string field_name = 1 [(google.api.field_behavior) = OPTIONAL];
84+
85+
// Required. The content of the field.
86+
Content content = 2 [(google.api.field_behavior) = REQUIRED];
87+
}
88+
89+
// Represents a single synthetic example, composed of multiple fields.
90+
// Used for providing few-shot examples in the request and for returning
91+
// generated examples in the response.
92+
message SyntheticExample {
93+
// Required. A list of fields that constitute an example.
94+
repeated SyntheticField fields = 1 [(google.api.field_behavior) = REQUIRED];
95+
}
96+
97+
// Defines a specification for a single output field.
98+
message OutputFieldSpec {
99+
// The data type of the field.
100+
enum FieldType {
101+
// Field type is unspecified.
102+
FIELD_TYPE_UNSPECIFIED = 0;
103+
104+
// Arbitrary content field type.
105+
CONTENT = 1;
106+
107+
// Text field type.
108+
TEXT = 2;
109+
110+
// Image field type.
111+
IMAGE = 3;
112+
113+
// Audio field type.
114+
AUDIO = 4;
115+
}
116+
117+
// Required. The name of the output field.
118+
string field_name = 1 [(google.api.field_behavior) = REQUIRED];
119+
120+
// Optional. Optional, but recommended. Additional guidance specific to this
121+
// field to provide targeted instructions for the LLM to generate the content
122+
// of a single output field. While the LLM can sometimes infer content from
123+
// the field name, providing explicit guidance is preferred.
124+
string guidance = 2 [(google.api.field_behavior) = OPTIONAL];
125+
126+
// Optional. The data type of the field. Defaults to CONTENT if not set.
127+
FieldType field_type = 3 [(google.api.field_behavior) = OPTIONAL];
128+
}
129+
130+
// Defines a generation strategy based on a high-level task description.
131+
message TaskDescriptionStrategy {
132+
// Required. A high-level description of the synthetic data to be generated.
133+
string task_description = 1 [(google.api.field_behavior) = REQUIRED];
134+
}
135+
136+
// The response containing the generated data.
137+
message GenerateSyntheticDataResponse {
138+
// A list of generated synthetic examples.
139+
repeated SyntheticExample synthetic_examples = 1;
140+
}

0 commit comments

Comments
 (0)