How to add thinking budget to Google Gemini Flash 2.5

You can add a “thinking budget” to make Google Gemini Flash 2.5 spend some time and resources thinking about the answer it provided. This usually provides a better answer than if you do not enable the thinking budget, and can be quite handy for tasks which do require some thinking.

Python code for adding thinking budget (the thinkingBudget must be an integer in the range 0 to 24576) for extracting senior care status in VAERS

import json
import os
import time

import pandas as pd
from dotenv import load_dotenv
from google import genai
from google.genai import types
from pydantic import BaseModel, Field

load_dotenv()
gemini_api_key = os.getenv('GEMINI_API_KEY')


class PatientInfo(BaseModel):
    """Model for extracting COVID19 infection status from VAERS data."""
    patient_in_senior_care: bool = Field(description="Was the patient in senior care?")
    patient_in_senior_care_explanation: str = Field(
        description="Either An explanation for the inference, or a verbatim sentence from the symptom_text which provides a citation")


client = genai.Client(api_key=gemini_api_key)

df: pd.DataFrame = pd.read_csv(f'../csv/llm/foreign_deaths_pfizer_100.csv')
df.columns = [x.upper() for x in df.columns]

model_name = 'gemini-2.5-flash-preview-05-20'
experiment = 'patient_in_senior_care'
file_name = f'../jsonvds/{experiment}/{experiment}_{model_name}.json'
curr_json = {}
try:
    with open(file_name, 'r') as f:
        curr_json = json.load(f)
except Exception as e:
    print(e)

processed_ids = curr_json.keys()
print(processed_ids)
num_rows = 100
for index, row in df.head(num_rows).iterrows():
    symptom_text = row['SYMPTOM_TEXT']
    vaers_id = str(row['VAERS_ID'])
    if vaers_id in processed_ids:
        print(f'Skipping {vaers_id}')
        continue
    print(f'Processing {index} = {vaers_id}')
    start_time = time.time()
    prompt = symptom_text
    response = client.models.generate_content(
        model=model_name,
        contents=prompt,
        config=types.GenerateContentConfig(
            thinking_config=types.ThinkingConfig(thinking_budget=24576),
            system_instruction="You are a VAERS expert, and your goal is to read the symptom_text and provide the output in the specified schema",
            response_mime_type='application/json',
            response_schema=PatientInfo,
        )
    )
    elapsed = time.time() - start_time
    response_json = json.loads(response.model_dump_json())
    curr_json[vaers_id] = {
        "response": response_json,
        "parsed": response_json['parsed'],
        "duration": elapsed,
        "prompt": prompt
    }
    with open(file_name, 'w+') as f:
        json.dump(curr_json, f, indent=2)

with open(file_name, 'w+') as f:
    json.dump(curr_json, f, indent=2)

Leave a Reply Cancel reply