Results with select cases
You may not want to create a result containing the runs for every test case. Instead, you may want to create a result with only a subset of test cases.
For example, if your pipeline degrades on Japanese language queries, you may want to create a result with those test cases only.
- TypeScript
- Python
submitTestResult()
/ submit_test_result()
View documentation for simple submission of test results here. You can use this function in a couple ways to accomplish the same goal.
Filter TestCase
list
typescript
import {getTestCases ,init ,submitTestResult } from "@gentrace/core";import {aiPipeline } from "../pipelines"; // TODO: replace with your own pipelineinit ({apiKey :process .env .GENTRACE_API_KEY ?? "",});constPIPELINE_SLUG = "guess-the-year";constjapaneseCases = (awaitgetTestCases (PIPELINE_SLUG )).filter (tc =>tc .name .startsWith ("Japanese"));constoutputsList :Record <String , any> = []for (consttestCase ofjapaneseCases ) {constoutputs =aiPipeline (testCase );outputsList .push (outputs );}awaitsubmitTestResult (PIPELINE_SLUG ,japaneseCases ,outputs );
python
import osimport gentracefrom app.pipelines import create_ai_output # TODO: replace with your own pipelinegentrace.init(api_key=os.getenv("GENTRACE_API_KEY"),)PIPELINE_SLUG = "example-pipeline"cases = [case for case in gentrace.get_test_cases(pipeline_slug=PIPELINE_SLUG) if case.get("name").startswith("Japanese")]outputs_list = []for case in cases:outputs_list.append({"value": create_ai_output(case["inputs"])})# Creates result only for Japanese casesresult = gentrace.submit_test_result(PIPELINE_SLUG, test_cases=cases, outputs_list=outputs_list)print("Test result ID:", result["resultId"])
Pull specific TestCase
typescript
import {getTestCase ,init ,submitTestResult } from "@gentrace/core";import {aiPipeline } from "../pipelines"; // TODO: replace with your own pipelineinit ({apiKey :process .env .GENTRACE_API_KEY });constPIPELINE_SLUG = "guess-the-year";constTEST_CASE_ID = "38485a80-4291-4f33-8797-36d4e9f9ad3f";constfailingCase = awaitgetTestCase (TEST_CASE_ID );constcaseOutputs :Record <string, any> =aiPipeline (failingCase );// Creates result only for this failing caseawaitsubmitTestResult (PIPELINE_SLUG ,[failingCase ],[caseOutputs ],);
python
import osimport gentracefrom app.pipelines import create_ai_output # TODO: replace with your own pipelinegentrace.init(api_key=os.getenv("GENTRACE_API_KEY"))PIPELINE_SLUG = "example-pipeline"TEST_CASE_ID = "38485a80-4291-4f33-8797-36d4e9f9ad3f"failing_case = gentrace.get_test_case(TEST_CASE_ID)case_outputs = create_ai_output(failing_case.get("inputs"))# Creates result only for this failing caseresult = gentrace.submit_test_result(PIPELINE_SLUG, test_cases=[failing_case], outputs_list=[case_outputs])print("Test result ID:", result["resultId"])
runTest()
/ run_test()
View documentation for the callback submission of test results here. You can use a filter function to specify the cases you want to run.
typescript
awaitrunTest (PIPELINE_SLUG ,async (testCase ) => {construnner =pipeline .start ();constoutputs = awaitrunner .measure ((inputs ) => {return {yourOutputKey : "Your output value",}},[testCase .inputs ],);awaitrunner .submit ();return [outputs ,runner ];},(testCase ) =>testCase .name .startsWith ("Japanese test case:"));
python
import osimport gentracePIPELINE_SLUG = "guess-the-year"gentrace.init(api_key=os.getenv("GENTRACE_API_KEY"),)pipeline = gentrace.Pipeline(PIPELINE_SLUG,openai_config={"api_key": os.getenv("OPENAI_KEY"),},)def create_embedding_callback(test_case):runner = pipeline.start()openai_handle = runner.get_openai()output = openai_handle.embeddings.create(input="sample text", model="text-similarity-davinci-001")return [output, runner]result = gentrace.run_test(PIPELINE_SLUG,create_embedding_callback,# TODO: define your filter function herecase_filter=lambda x: x.get("name").startswith("Japanese test case:"))print("Result: ", result)
The specified callback will only fire for test cases that match the filter function.
If you want to run the callback only on a single test case, you can write a modified filter function that only returns
true
for that test case.
typescript
awaitrunTest (PIPELINE_SLUG ,async (testCase ) => {construnner =pipeline .start ();constoutputs = awaitrunner .measure ((inputs ) => {return {yourOutputKey : "Your output value",}},[testCase .inputs ],);awaitrunner .submit ();return [outputs ,runner ];},(testCase ) =>testCase .id === "38485a80-4291-4f33-8797-36d4e9f9ad3f");
python
result = gentrace.run_test(PIPELINE_SLUG,create_embedding_callback,case_filter=lambda x: x.get("id") == "38485a80-4291-4f33-8797-36d4e9f9ad3f")print("Result: ", result)