Experiments
Use experiments to quickly tweak parameters in your generative AI pipeline and then run your pipeline using these changed parameters.
Experiments is currently in early access and may undergo significant changes.
If you want to get started immediately, you can copy this script here. Continue reading for step by step instructions.
Local setup​
Gentrace needs a connection to your environment to run the experiments with modified parameters through your pipeline.
If you are running your experiments against a local version of your pipeline, you can open a websocket to Gentrace from your local machine. In your testing script, using the Typescript SDK:
- JavaScript
- Python
gentrace/experiments/MyEmailPipelineGentraceExperiment.tstypescript
import { init, listen } from '@gentrace/core';init({apiKey: process.env.GENTRACE_API_KEY ?? '',});listen();
python
import gentracefrom gentrace.providers.test_job_runner import listengentrace.init(api_key=os.getenv("GENTRACE_API_KEY"),)listen()
To setup a server environment instead, read our guide below.
Register interactions​
Now that you have your environment configured, you can register the interactions
from your pipeline that you would like to test. Wrap all of your pipeline
functionality that you want to experiment on in a call to defineInteraction
.
Here is an example for a pipeline that writes an email based on the provided instructions.
- JavaScript
- Python
gentrace/experiments/MyEmailPipelineGentraceExperiment.tstypescript
import {init ,defineInteraction ,listen ,templateParameter } from '@gentrace/core';importOpenAI from 'openai';import {z } from 'zod';Âinit ({apiKey :process .env .GENTRACE_API_KEY ?? '',});Âconstopenai = newOpenAI ({apiKey :process .env .OPENAI_KEY ?? "",});ÂconstwriteEmail =defineInteraction ({name : 'Write email',fn : async ({fromName ,fromEmail ,toEmail ,instructions }) => {// TODO: Call your pipeline code hereconstcompletion = awaitopenai .chat .completions .create ({model : 'gpt-4o-mini',messages : [{role : 'user',content :`Write an email to ${toEmail } from (${fromName }) ${fromEmail } according to these instructions: ${instructions }`,},],});return {body :completion .choices [0].message .content ,};},// Optional - validates input typesinputType :z .object ({fromName :z .string (),fromEmail :z .string ().toEmail :z .string ().instructions :z .string (),}),});Âlisten ();
python
import osimport randomimport jsonfrom typing import Dict, Anyfrom openai import OpenAIimport gentracefrom gentrace.providers.test_job_runner import (define_interaction,listen,template_parameter,numeric_parameter,enum_parameter,)from pydantic import BaseModel, EmailStrgentrace.init(api_key=os.getenv("GENTRACE_API_KEY"),)# Initialize OpenAI clientclient = OpenAI(api_key=os.getenv("OPENAI_KEY"),)# Define parameterswrite_email_prompt_parameter = template_parameter({"name": "Write email prompt","defaultValue": "Write an email to {{toEmail}} from ({{fromName}}) {{fromEmail}} according to these instructions: {{instructions}}","variables": [{"name": "fromName", "example": "John Doe"},{"name": "instructions","example": {"subject": "Hello", "body": "Write a short email"}}]})# Input validation models using Pydanticclass WriteEmailInput(BaseModel):fromName: strfromEmail: EmailStrtoEmail: EmailStrinstructions: strasync def write_email(inputs: Dict[str, Any]) -> Dict[str, Any]:completion = await client.chat.completions.create(model="gpt-4o-mini",messages=[{"role": "user","content": write_email_prompt_parameter.render({"fromName": inputs["fromName"],"fromEmail": inputs["fromEmail"],"toEmail": inputs["toEmail"],"instructions": inputs["instructions"],}),}],)return {"body": completion.choices[0].message.content,}# Define write_email interactionwrite_email_interaction = define_interaction({"name": "Write email","fn": write_email,"parameters": [write_email_prompt_parameter],"inputType": WriteEmailInput})if __name__ == "__main__":# Start listening for test jobslisten()
Parameters​
Before tweaking the parameters you use in your pipeline, you must first specify
what those parameters are. Four types of parameters are supported: string
,
number
, enum
(a list of strings) or template
(a Mustache template).
In the example from above, you have your environment configured and your interaction registered, so you can now add your parameters. Here we add a template parameter so you can tweak the prompt for the pipeline:
- JavaScript
- Python
gentrace/experiments/MyEmailPipelineGentraceExperiment.tstypescript
import {init ,defineInteraction ,listen ,templateParameter } from '@gentrace/core';importOpenAI from 'openai';import {z } from 'zod';Âinit ({apiKey :process .env .GENTRACE_API_KEY ?? '',});Âconstopenai = newOpenAI ({apiKey :process .env .OPENAI_KEY ?? "",});ÂconstwriteEmailPromptParameter =templateParameter ({name : 'Write email prompt',defaultValue :'Write an email to {{toEmail}} from ({{fromName}}) {{fromEmail}} according to these instructions: {{instructions}}',});ÂconstwriteEmail =defineInteraction ({name : 'Write email',fn : async ({fromName ,fromEmail ,toEmail ,instructions }) => {// TODO: Call your pipeline code hereconstcompletion = awaitopenai .chat .completions .create ({model : 'gpt-4o-mini',messages : [{role : 'user',content :writeEmailPromptParameter .render ({fromName ,fromEmail ,toEmail ,instructions ,}),},],});return {body :completion .choices [0].message .content ,};},parameters : [writeEmailPromptParameter ],// Optional - validates input typesinputType :z .object ({fromName :z .string (),fromEmail :z .string ().toEmail :z .string ().instructions :z .string (),}),});Âlisten ();
python
import osimport randomimport jsonfrom typing import Dict, Anyfrom openai import OpenAIimport gentracefrom gentrace.providers.test_job_runner import (define_interaction,listen,template_parameter,numeric_parameter,enum_parameter,)from pydantic import BaseModel, EmailStrgentrace.init(api_key=os.getenv("GENTRACE_API_KEY"),)client = OpenAI(api_key=os.getenv("OPENAI_KEY"),)# Define parameterswrite_email_prompt_parameter = template_parameter({"name": "Write email prompt","defaultValue": "Write an email to {{toEmail}} from ({{fromName}}) {{fromEmail}} according to these instructions: {{instructions}}","variables": [{"name": "fromName", "example": "John Doe"},{"name": "instructions","example": {"subject": "Hello", "body": "Write a short email"}}]})# Input validation models using Pydanticclass WriteEmailInput(BaseModel):fromName: strfromEmail: EmailStrtoEmail: EmailStrinstructions: strasync def write_email(inputs: Dict[str, Any]) -> Dict[str, Any]:completion = await client.chat.completions.create(model="gpt-4o-mini",messages=[{"role": "user","content": write_email_prompt_parameter["render"]({"fromName": inputs["fromName"],"fromEmail": inputs["fromEmail"],"toEmail": inputs["toEmail"],"instructions": inputs["instructions"],}),}],)return {"body": completion.choices[0].message.content,}# Define write_email interactionwrite_email_interaction = define_interaction({"name": "Write email","fn": write_email,"parameters": [write_email_prompt_parameter],"inputType": WriteEmailInput})if __name__ == "__main__":# Start listening for test jobslisten()
Below are a few other examples for different types of parameters you may want to configure.
Here is an example for an enum parameter that you could use to test different models for your pipeline.
- JavaScript
- Python
typescript
import {defineInteraction ,enumParameter } from '@gentrace/core';import {z } from 'zod';ÂconstmodelParameter =enumParameter ({name : "AI Model",defaultValue : "GPT-4o",options : ["GPT-4o", "GPT-4o-mini", "claude-3.5-sonnet", "gemini-1.5-pro-002"],});ÂconstchooseModel =defineInteraction ({name : "Choose model",fn : async ({query }) => {// Call your pipeline code here, using the model providedreturn `I will use the model ${modelParameter .getValue ()}.`;},inputType :z .object ({query :z .string (),}),parameters : [modelParameter ],});Â
python
from gentrace.providers.test_job_runner import define_interaction, enum_parameterfrom pydantic import BaseModel# Define model parametermodel_parameter = enum_parameter({"name": "AI Model","defaultValue": "GPT-4o","options": ["GPT-4o", "GPT-4o-mini", "claude-3.5-sonnet", "gemini-1.5-pro-002"]})class ChooseModelInput(BaseModel):query: strasync def choose_model(inputs: dict) -> str:"""Choose model interaction function."""model = model_parameter.get_value()return f"I will use the model {model}."choose_model_interaction = define_interaction({"name": "Choose model","fn": choose_model,"inputType": ChooseModelInput,"parameters": [model_parameter]})
Here is an example of a numeric parameter for a pipeline that randomly guesses the date of an event.
- JavaScript
- Python
typescript
import {defineInteraction ,numericParameter } from '@gentrace/core';import {z } from 'zod';ÂconstrandomYearParameter =numericParameter ({name : 'Random component of year',defaultValue : 3,});ÂconstguessTheYear =defineInteraction ({name : 'Guess the year',fn : async ({query }) => {// Call you pipeline code herereturnMath .floor (Math .random () *randomYearParameter .getValue ()) + 2022;},parameters : [randomYearParameter ],// Optional - validates input typesinputType :z .object ({query :z .string (),}),});
python
from gentrace.providers.test_job_runner import (define_interaction,numeric_parameter,)from pydantic import BaseModelimport random# Define the random year parameterrandom_year_parameter = numeric_parameter({"name": "Random component of year","defaultValue": 3})class GuessYearInput(BaseModel):query: strasync def guess_the_year(inputs: dict) -> int:"""Guess the year interaction function."""random_component = random_year_parameter.get_value()return int(2022 + (random_component * random.random()))guess_year_interaction = define_interaction({"name": "Guess the year","fn": guess_the_year,"inputType": GuessYearInput,"parameters": [random_year_parameter]})
Interactions without parameters​
You can also define interactions without providing any parameters if you would like to test local code variations, or experiment with different datasets.
- JavaScript
- Python
typescript
import {defineInteraction } from '@gentrace/core';ÂconsttestInteraction =defineInteraction ({name : 'YOUR_FEATURE',fn : async ({blah }) => {// Insert calling your pipeline code herereturn {response : 'YOUR_RESPONSE',};},});
python
from gentrace.providers.test_job_runner import define_interactiontest_interaction = define_interaction({"name": "YOUR_FEATURE","fn": lambda inputs: {"response": "YOUR_RESPONSE"}})
Run experiments​
Once you have your environment set up and your parameters and interactions registered, you can now run your experiments. To create a new experiment, navigate to your Pipeline > Test Results. Then click on the "New experiment" button in the top right corner.
You can choose which environment, dataset and interaction you would like to test. Then you can modify the parameters to override the defaults as you see fit.
Input validation​
If you have provided input validation with your interaction, you will see validation fail for datasets that do not match your expected input type.
New result​
Once you run your experiment you will see a new result in your result grid view.
If you view the metadata for this new experiment result, you can also start a new experiment based off of the parameters used in that result directly from the sidebar.
Server setup​
To run experiments against a production or staging version of your pipeline, or if you would rather use a webhook over a websocket, you can register a webhook environment with Gentrace.
Create the environment in Settings > Environments. You must give your environment a unique name and add the webhook url that will be listening for incoming traffic.
Then within your code, setup the webhook at the path you specified.
- JavaScript
- Python
typescript
import {init ,handleWebhook } from "@gentrace/core";importexpress from "express";Âinit ({apiKey :process .env .GENTRACE_API_KEY ?? "",});Â// Create Express appconstapp =express ();Âapp .post ("/", async (req :express .Request ,res :express .Response ) => {// Get the body of the request as a JSON objectconstbody =req .body ;awaithandleWebhook (body , (responseBody ) => {res .status (200).json (responseBody );});});Â// Start the serverconstPORT =process .env .PORT || 443;app .listen (PORT , () => {console .log (`Server listening on port ${PORT }`);});
python
import osimport jsonimport hmacimport hashlibfrom fastapi import FastAPI, Request, HTTPExceptionfrom gentrace.providers.test_job_runner import handle_webhookfrom dotenv import load_dotenv# Load environment variablesload_dotenv()# Initialize FastAPI appapp = FastAPI()async def verify_signature(request: Request):"""Verify the webhook signature."""webhook_secret = os.getenv("GENTRACE_WEBHOOK_SECRET")if not webhook_secret:raise HTTPException(status_code=500, detail="Webhook secret not configured")signature = request.headers.get("x-gentrace-signature")if not signature:raise HTTPException(status_code=401, detail="No signature provided")body = await request.body()calculated_signature = f"sha256={hmac.new(webhook_secret.encode(), body, hashlib.sha256).hexdigest()}"if not hmac.compare_digest(signature, calculated_signature):raise HTTPException(status_code=401, detail="Invalid signature")@app.post("/")async def webhook_handler(request: Request):"""Handle incoming webhook requests."""# Verify the signatureawait verify_signature(request)# Get request bodybody = await request.json()# Handle the webhook and return responseresponse = await handle_webhook(body)return responsedef start_server():"""Start the FastAPI server."""import uvicornport = int(os.getenv("PORT", "443"))uvicorn.run(app, host="0.0.0.0", port=port)if __name__ == "__main__":start_server()
Securing your webhook​
Our webhook requests are secured through content hashing with a signature. We
calculate the SHA256 HMAC signature from the body and include that signature in
the x-gentrace-signature
.
To verify the webhook, calculate the signature from the raw request body (not the JSON parsed body) using the webhook secret available on your environment settings.
Once you have created the environment, you can copy the webhook secret from the environments settings page.
Securely store your secret and use it in your webhook verification code:
- JavaScript
- Python
typescript
importexpress from "express";import {createHmac } from "crypto";Â// Create Express appconstapp =express ();Â// Verify the signature of the requestapp .use (express .json ({verify : (req : any,res :express .Response ,buf ,next ) => {req .rawBody =buf ;constwebhookSecret =process .env .GENTRACE_WEBHOOK_SECRET ?? "";Â// Get the signature from the headerconstsignature =req .header ("x-gentrace-signature");Âif (!signature ) {throw newError ("No signature provided");}ÂconstcalculatedSignature = `sha256=${createHmac ("sha256",webhookSecret ).update (req .rawBody ).digest ("hex")}`;Â// Verify the signatureif (signature !==calculatedSignature ) {throw newError ("Invalid signature");}},}),);Â// Add error handling middlewareapp .use ((err :Error ,req :express .Request ,res :express .Response ,next :express .NextFunction ,) => {res .status (401).send ("Unauthorized");},);
python
from fastapi import FastAPI, Request, HTTPExceptionfrom gentrace.providers.test_job_runner import handle_webhookimport osimport hmacimport hashlib# Initialize FastAPI appapp = FastAPI()# Verify the signature of the request@app.middleware("http")async def verify_signature(request: Request, call_next):webhook_secret = os.getenv("GENTRACE_WEBHOOK_SECRET", "")# Get the signature from the headersignature = request.headers.get("x-gentrace-signature")if not signature:raise HTTPException(status_code=401, detail="No signature provided")# Get raw bodybody = await request.body()# Calculate signaturecalculated_signature = f"sha256={hmac.new(webhook_secret.encode(), body, hashlib.sha256).hexdigest()}"# Verify the signatureif not hmac.compare_digest(signature, calculated_signature):raise HTTPException(status_code=401, detail="Invalid signature")response = await call_next(request)return response@app.post("/")async def webhook_handler(request: Request):"""Handle incoming webhook requests."""body = await request.json()response = await handle_webhook(body)return responsedef start_server():"""Start the FastAPI server."""import uvicornport = int(os.getenv("PORT", "443"))uvicorn.run(app, host="0.0.0.0", port=port)if __name__ == "__main__":start_server()