Pydantic Schema
This is the Pydantic schema used in the course.
from enum import Enum
from typing import Optional, List, Union, TypeVar, Generic
from pydantic import BaseModel, Field, field_validator
# Enums
class LanguageLevel(Enum):
NO_SPEECH = "no_speech"
BABBLING_SINGLE_WORDS = "babbling_single_words"
TWO_WORD_PHRASES = "two_word_phrases"
SENTENCES_NAMING = "sentences_naming"
class SocialCommunicationLevel(Enum):
NO_ENGAGEMENT = "no_engagement"
BASIC_ENGAGEMENT = "basic_engagement"
RECIPROCAL_INTERACTION = "reciprocal_interaction"
COMPLEX_INTERACTION = "complex_interaction"
class AdaptiveBehaviorLevel(Enum):
NO_INDEPENDENCE = "no_independence"
BASIC_SELF_HELP = "basic_self_help"
MODERATE_INDEPENDENCE = "moderate_independence"
AGE_APPROPRIATE = "age_appropriate"
class MilestoneType(Enum):
LANGUAGE = "language"
SOCIAL_COMMUNICATION = "social_communication"
ADAPTIVE_BEHAVIOR = "adaptive_behavior"
class MilestoneName(Enum):
BABBLING = "babbling"
SINGLE_WORDS = "single_words"
TWO_WORD_PHRASES = "two_word_phrases"
NAMING_OBJECTS = "naming_objects"
EYE_CONTACT = "eye_contact"
RESPONDING_TO_NAME = "responding_to_name"
POINTING = "pointing"
WAVING = "waving"
CLAPPING = "clapping"
JOINT_ATTENTION = "joint_attention"
TURN_TAKING = "turn_taking"
FEEDING_WITH_HELP = "feeding_with_help"
USING_SPOON = "using_spoon"
DANCING = "dancing"
DRESSING_WITH_HELP = "dressing_with_help"
SELF_FEEDING = "self_feeding"
SITTING_WITHOUT_SUPPORT = "sitting_without_support"
WALKING = "walking"
PRETEND_PLAY = "pretend_play"
class RepetitiveBehaviorType(Enum):
NONE = "none"
SPINNING = "spinning"
FLAPPING = "flapping"
STARING = "staring"
ZONING_OUT = "zoning_out"
HEAD_BANGING = "head_banging"
BITING = "biting"
class ASDSeverity(Enum):
LEVEL_1 = 1
LEVEL_2 = 2
LEVEL_3 = 3
class HeavyMetalType(Enum):
MERCURY = "mercury"
LEAD = "lead"
ARSENIC = "arsenic"
CADMIUM = "cadmium"
THALLIUM = "thallium"
ANTIMONY = "antimony"
CHROMIUM = "chromium"
SELENIUM = "selenium"
MANGANESE = "manganese"
COBALT = "cobalt"
VANADIUM = "vanadium"
MOLYBDENUM = "molybdenum"
TUNGSTEN = "tungsten"
URANIUM = "uranium"
PLUTONIUM = "plutonium"
BERYLLIUM = "beryllium"
BISMUTH = "bismuth"
GALLIUM = "gallium"
INDIUM = "indium"
TELLURIUM = "tellurium"
class OtherMetalType(Enum):
ALUMINUM = "aluminum"
NICKEL = "nickel"
IRON = "iron"
COPPER = "copper"
ZINC = "zinc"
MAGNESIUM = "magnesium"
CALCIUM = "calcium"
POTASSIUM = "potassium"
SODIUM = "sodium"
LITHIUM = "lithium"
STRONTIUM = "strontium"
BARIUM = "barium"
CESIUM = "cesium"
RUBIDIUM = "rubidium"
SILVER = "silver"
GOLD = "gold"
PLATINUM = "platinum"
PALLADIUM = "palladium"
RHODIUM = "rhodium"
RUTHENIUM = "ruthenium"
OSMIUM = "osmium"
IRIDIUM = "iridium"
TITANIUM = "titanium"
ZIRCONIUM = "zirconium"
HAFNIUM = "hafnium"
NIOBIUM = "niobium"
TANTALUM = "tantalum"
SCANDIUM = "scandium"
YTTRIUM = "yttrium"
LANTHANUM = "lanthanum"
CERIUM = "cerium"
PRASEODYMIUM = "praseodymium"
NEODYMIUM = "neodymium"
PROMETHIUM = "promethium"
SAMARIUM = "samarium"
EUROPIUM = "europium"
GADOLINIUM = "gadolinium"
TERBIUM = "terbium"
DYSPROSIUM = "dysprosium"
HOLMIUM = "holmium"
ERBIUM = "erbium"
THULIUM = "thulium"
YTTERBIUM = "ytterbium"
LUTETIUM = "lutetium"
ACTINIUM = "actinium"
THORIUM = "thorium"
PROTACTINIUM = "protactinium"
NEPTUNIUM = "neptunium"
AMERICIUM = "americium"
CURIUM = "curium"
BERKELIUM = "berkelium"
CALIFORNIUM = "californium"
EINSTEINIUM = "einsteinium"
FERMIUM = "fermium"
MENDELEVIUM = "mendelevium"
NOBELIUM = "nobelium"
LAWRENCIUM = "lawrencium"
class InterventionType(Enum):
SPEECH_THERAPY = "speech_therapy"
OCCUPATIONAL_THERAPY = "occupational_therapy"
ABA = "aba"
DETOX = "detox"
SOCIAL_SKILLS = "social_skills"
OTHER = "other"
class ComorbidityType(Enum):
ADHD = "adhd"
COLITIS = "colitis"
ALLERGIES = "allergies"
ASTHMA = "asthma"
ECZEMA = "eczema"
SEIZURE_DISORDER = "seizure_disorder"
OCD = "ocd"
ANXIETY = "anxiety"
OTHER = "other"
class ExposureType(Enum):
MATERNAL_AMALGAM = "maternal_amalgam"
POWER_PLANT = "power_plant"
OTHER_MERCURY = "other_mercury"
UNKNOWN = "unknown"
class CitableMonthYear(BaseModel):
year: int = Field(..., description='Non-negative integer value for year')
month: Optional[int] = Field(None, description='Non-negative Integer value for month')
citations: List[int] = Field(default_factory=list, description="Citations for the year, month values")
explanation: Optional[str] = Field(default=None,
description="Sentence fragment if sufficient, else an explanation for why this value was extracted")
@field_validator("citations", mode="before")
@classmethod
def validate_citations(cls, v):
if v is None:
return []
return v
# Citable Models
class CitableInt(BaseModel):
value: Optional[int] = Field(None, ge=0, description="Non-negative integer value or None")
citations: Optional[List[int]] = Field(default_factory=list, description="Citations for the value")
explanation: Optional[str] = Field(default=None,
description="Sentence fragment if sufficient, else an explanation for why this value was extracted")
@field_validator("citations", mode="before")
@classmethod
def validate_citations(cls, v):
if v is None:
return []
return v
class CitableFloat(BaseModel):
value: float = Field(..., ge=0, description="Floating point value which allows decimals")
citations: Optional[List[int]] = Field(default_factory=list, description="Citations for the value")
explanation: Optional[str] = Field(default=None,
description="Sentence fragment if sufficient, else an explanation for why this value was extracted")
class CitableBool(BaseModel):
value: bool = Field(..., description="Boolean value")
citations: Optional[List[int]] = Field(default_factory=list, description="Citations for the value")
explanation: Optional[str] = Field(default=None,
description="Sentence fragment if sufficient, else an explanation for why this value was extracted")
@field_validator("citations", mode="before")
@classmethod
def validate_citations(cls, v):
if v is None:
return []
return v
class CitableStr(BaseModel):
value: Optional[str] = Field(None, description="String value or None")
citations: Optional[List[int]] = Field(default_factory=list, description="Citations for the value")
explanation: Optional[str] = Field(default=None,
description="Sentence fragment if sufficient, else an explanation for why this value was extracted")
@field_validator("citations", mode="before")
@classmethod
def validate_citations(cls, v):
if v is None:
return []
return v
EnumT = TypeVar("EnumT", bound=Enum)
class CitableEnum(BaseModel, Generic[EnumT]):
value: EnumT = Field(..., description="Enum value")
citations: Optional[List[int]] = Field(default_factory=list, description="Citations for the enum value")
explanation: Optional[str] = Field(default=None,
description="Sentence fragment if sufficient, else an explanation for why this value was extracted")
@field_validator("citations", mode="before")
@classmethod
def validate_citations(cls, v):
if v is None:
return []
return v
class CitableSymptom(BaseModel):
"""Base class for symptoms that tracks both occurrence and ongoing status"""
value: bool = Field(..., description="Whether the symptom was observed")
ongoing: bool = Field(..., description="Whether the symptom is still ongoing/present")
days_to_symptom_onset: Optional[int] = Field(None, description="Days from vaccination to symptom onset")
citations: Optional[List[int]] = Field(default_factory=list, description="Citations for the symptom")
explanation: Optional[str] = Field(default=None,
description="Sentence fragment if sufficient, else an explanation for why this value was extracted")
@field_validator("citations", mode="before")
@classmethod
def validate_citations(cls, v):
if v is None:
return []
return v
# Other Models
class HeavyMetalTest(BaseModel):
metal_type: Union[HeavyMetalType, str]
value: float
unit: str
is_elevated: bool
citations: List[int]
class DevelopmentalMilestone(BaseModel):
milestone_name: CitableEnum[MilestoneName] = Field(description="Milestone name with citations")
milestone_type: Union[MilestoneType, str]
milestone_age_in_months: CitableInt = Field(description="Age in months when milestone was achieved")
milestone_has_regressed: CitableBool = Field(description="Whether this milestone has regressed")
regression_start_date_in_months: CitableInt = Field(description="Age in months when regression started")
progressed_after_regression: CitableBool = Field(description="Whether milestone progressed after regression")
@classmethod
def validate_milestone_type(cls, values):
milestone_name = values.get("milestone_name")
milestone_type = values.get("milestone_type")
if milestone_name and milestone_type:
# Only validate if both are enum values, otherwise allow string values
if isinstance(milestone_name, CitableEnum) and isinstance(milestone_type, MilestoneType):
expected_type = MILESTONE_TYPE_MAPPING.get(milestone_name.value.value)
if expected_type != milestone_type:
raise ValueError(
f"Milestone {milestone_name.value} should have milestone_type {expected_type}, not {milestone_type}"
)
return values
class DevelopmentalScore(BaseModel):
milestones: List[DevelopmentalMilestone] = Field(default_factory=list,
description="List of achieved developmental milestones")
@property
def language_score(self) -> int:
achieved = sum(1 for m in self.milestones if m.milestone_type == MilestoneType.LANGUAGE)
return min(achieved, 3)
@property
def social_communication_score(self) -> int:
achieved = sum(1 for m in self.milestones if m.milestone_type == MilestoneType.SOCIAL_COMMUNICATION)
return min(achieved // 2, 3)
@property
def adaptive_behavior_score(self) -> int:
achieved = sum(1 for m in self.milestones if m.milestone_type == MilestoneType.ADAPTIVE_BEHAVIOR)
return min(achieved, 3)
@property
def total_score(self) -> int:
return (
self.language_score +
self.social_communication_score +
self.adaptive_behavior_score
)
class RegressionRecord(BaseModel):
pre_regression_language_level: CitableEnum[LanguageLevel] = Field(
description="Language level before regression, calculated from milestones")
pre_regression_social_communication_level: CitableEnum[SocialCommunicationLevel] = Field(
description="Social communication level before regression, calculated from milestones")
pre_regression_adaptive_behavior_level: CitableEnum[AdaptiveBehaviorLevel] = Field(
description="Adaptive behavior level before regression, calculated from milestones")
post_regression_language_level: CitableEnum[LanguageLevel] = Field(
description="Language level after regression, calculated from milestones")
post_regression_social_communication_level: CitableEnum[SocialCommunicationLevel] = Field(
description="Social communication level after regression, calculated from milestones")
post_regression_adaptive_behavior_level: CitableEnum[AdaptiveBehaviorLevel] = Field(
description="Adaptive behavior level after regression, calculated from milestones")
has_language_loss: CitableBool = Field(description="Whether language loss occurred, with citations")
has_social_loss: CitableBool = Field(description="Whether social communication loss occurred, with citations")
has_adaptive_loss: CitableBool = Field(description="Whether adaptive behavior loss occurred, with citations")
has_repetitive_behaviors: CitableBool = Field(
description="Whether repetitive behaviors were observed, with citations")
class Config:
use_enum_values = True
@classmethod
def __pydantic_init_subclass__(cls):
super().__pydantic_init_subclass__()
cls.__pydantic_validator__.validate_python = cls._validate_levels
@classmethod
def _validate_levels(cls, values):
# Since pre_regression_milestones is removed, we can't calculate pre-regression levels
# Set default values for pre-regression levels
values["pre_regression_language_level"] = CitableEnum[LanguageLevel](value=LanguageLevel.NO_SPEECH, citations=[])
values["pre_regression_social_communication_level"] = CitableEnum[SocialCommunicationLevel](value=SocialCommunicationLevel.NO_ENGAGEMENT, citations=[])
values["pre_regression_adaptive_behavior_level"] = CitableEnum[AdaptiveBehaviorLevel](value=AdaptiveBehaviorLevel.NO_INDEPENDENCE, citations=[])
# Calculate post-regression levels from developmental milestones
post_milestones = [m for m in values.get("developmental_milestones", []) if not m.milestone_has_regressed.value]
post_score = DevelopmentalScore(milestones=post_milestones)
# Map integer scores to string enum values
language_level_mapping = {
0: LanguageLevel.NO_SPEECH,
1: LanguageLevel.BABBLING_SINGLE_WORDS,
2: LanguageLevel.TWO_WORD_PHRASES,
3: LanguageLevel.SENTENCES_NAMING
}
social_level_mapping = {
0: SocialCommunicationLevel.NO_ENGAGEMENT,
1: SocialCommunicationLevel.BASIC_ENGAGEMENT,
2: SocialCommunicationLevel.RECIPROCAL_INTERACTION,
3: SocialCommunicationLevel.COMPLEX_INTERACTION
}
adaptive_level_mapping = {
0: AdaptiveBehaviorLevel.NO_INDEPENDENCE,
1: AdaptiveBehaviorLevel.BASIC_SELF_HELP,
2: AdaptiveBehaviorLevel.MODERATE_INDEPENDENCE,
3: AdaptiveBehaviorLevel.AGE_APPROPRIATE
}
# Pre-regression levels are already set above with default values
values["post_regression_language_level"] = CitableEnum[LanguageLevel](value=language_level_mapping.get(post_score.language_score,
LanguageLevel.NO_SPEECH), citations=[])
values["post_regression_social_communication_level"] = CitableEnum[SocialCommunicationLevel](value=social_level_mapping.get(
post_score.social_communication_score, SocialCommunicationLevel.NO_ENGAGEMENT), citations=[])
values["post_regression_adaptive_behavior_level"] = CitableEnum[AdaptiveBehaviorLevel](value=adaptive_level_mapping.get(
post_score.adaptive_behavior_score, AdaptiveBehaviorLevel.NO_INDEPENDENCE), citations=[])
return values
class TimelineRecord(BaseModel):
date_of_vaccination: Optional[Union[CitableMonthYear, None]] = Field(default=None,
description='Month and year of vaccination, with citations')
regression_onset_date: Optional[Union[CitableMonthYear, None]] = Field(default=None,
description="Month and year of regression onset")
diagnosis_date: Optional[Union[CitableMonthYear, None]] = Field(default=None,
description="Month and year of diagnosis")
age_at_vaccination_months: Optional[Union[CitableInt, CitableFloat, None]] = Field(default=None,
description="Age at vaccination in months, with citations")
age_at_diagnosis_months: Optional[Union[CitableInt, None]] = Field(default=None,
description="Age at diagnosis in months, with citations")
class Config:
use_enum_values = True
@field_validator("age_at_vaccination_months", mode="before")
@classmethod
def validate_age_at_vaccination_months(cls, v):
if v is None:
return v
if isinstance(v, dict) and "value" in v:
# Convert years to months if the value is less than 10 (likely years)
if isinstance(v["value"], (int, float)) and v["value"] < 10:
v["value"] = v["value"] * 12
return v
@field_validator("date_of_vaccination", mode="after")
@classmethod
def validate_date_of_vaccination_after(cls, v, info):
# Handle case where date_of_vaccination is a string instead of CitableMonthYear
if isinstance(v, str):
# Try to parse the date string
try:
from datetime import datetime
date_obj = datetime.strptime(v, "%Y-%m-%d")
return CitableMonthYear(year=date_obj.year, month=date_obj.month, citations=[],
explanation=f"Vaccination date: {v}")
except ValueError:
# If parsing fails, return None
return None
return v
@field_validator("regression_onset_date", mode="before")
@classmethod
def validate_regression_onset_date(cls, v):
if v is None:
return None
return v
@field_validator("diagnosis_date", mode="after")
@classmethod
def validate_diagnosis_date_after(cls, v, info):
# Handle case where diagnosis_date is a string instead of CitableMonthYear
if isinstance(v, str):
# Try to parse the date string
try:
from datetime import datetime
date_obj = datetime.strptime(v, "%Y-%m-%d")
return CitableMonthYear(year=date_obj.year, month=date_obj.month, citations=[],
explanation=f"Diagnosis date: {v}")
except ValueError:
# If parsing fails, return None
return None
return v
@field_validator("age_at_vaccination_months", mode="before")
@classmethod
def validate_age_at_vaccination_months(cls, v):
if v is None:
return v
if isinstance(v, dict) and "value" in v:
# Convert years to months if the value is less than 10 (likely years)
if isinstance(v["value"], (int, float)) and v["value"] < 10:
v["value"] = v["value"] * 12
return v
@field_validator("age_at_diagnosis_months", mode="before")
@classmethod
def validate_age_at_diagnosis_months(cls, v):
if v is None:
return None
if isinstance(v, dict) and v.get("value") is None:
return None
return v
class EarlyVaccinationSymptoms(BaseModel):
"""Symptoms observed within the first 30 days of vaccination based on VAERS data analysis"""
# Top symptoms from VAERS analysis
had_autism_features: CitableSymptom = Field(
description="Whether autism/autistic features/ASD/PDD/Asperger's syndrome were observed within 30 days of vaccination, with citations and ongoing status")
had_fever: CitableSymptom = Field(
description="Whether fever/high fever/low-grade fever was observed within 30 days of vaccination, with citations and ongoing status")
had_speech_loss: CitableSymptom = Field(
description="Whether loss of speech/stopped talking/language delay/expressive aphasia/non-verbal was observed within 30 days of vaccination, with citations and ongoing status")
had_irritability: CitableSymptom = Field(
description="Whether irritability/cranky/fussy behavior was observed within 30 days of vaccination, with citations and ongoing status")
had_eye_contact_loss: CitableSymptom = Field(
description="Whether loss of eye contact/fleeting eye contact was observed within 30 days of vaccination, with citations and ongoing status")
had_diarrhea: CitableSymptom = Field(
description="Whether diarrhea/chronic diarrhea/loose stools were observed within 30 days of vaccination, with citations and ongoing status")
had_seizures: CitableSymptom = Field(
description="Whether seizures/infantile spasms/febrile seizures/grand mal seizures were observed within 30 days of vaccination, with citations and ongoing status")
had_lethargy: CitableSymptom = Field(
description="Whether lethargy/listless/tired/sleepy behavior was observed within 30 days of vaccination, with citations and ongoing status")
had_screaming: CitableSymptom = Field(
description="Whether screaming/high-pitched screaming/shrieking cry/unconsolable crying was observed within 30 days of vaccination, with citations and ongoing status")
had_behavioral_changes: CitableSymptom = Field(
description="Whether behavioral changes/personality change/strange behavior/misbehaved/violent behavior was observed within 30 days of vaccination, with citations and ongoing status")
had_rash: CitableSymptom = Field(
description="Whether rash/red spots/red bumps/morbilliform rash/measles-like rash was observed within 30 days of vaccination, with citations and ongoing status")
had_head_banging: CitableSymptom = Field(
description="Whether head banging/self-mutilating behavior/self-injurious behavior was observed within 30 days of vaccination, with citations and ongoing status")
had_social_withdrawal: CitableSymptom = Field(
description="Whether withdrawal/social withdrawal/became distant/aloof behavior was observed within 30 days of vaccination, with citations and ongoing status")
had_injection_site_reaction: CitableSymptom = Field(
description="Whether swelling/redness/lump at injection site was observed within 30 days of vaccination, with citations and ongoing status")
had_balance_loss: CitableSymptom = Field(
description="Whether loss of balance/unsteady on feet was observed within 30 days of vaccination, with citations and ongoing status")
had_appetite_loss: CitableSymptom = Field(
description="Whether appetite decreased/loss of appetite/picky eater/refused to eat was observed within 30 days of vaccination, with citations and ongoing status")
had_staring_spells: CitableSymptom = Field(
description="Whether staring spells/staring off into space/blank staring was observed within 30 days of vaccination, with citations and ongoing status")
had_hand_flapping: CitableSymptom = Field(
description="Whether hand flapping/repetitive motor mannerisms/stimming behavior was observed within 30 days of vaccination, with citations and ongoing status")
had_toe_walking: CitableSymptom = Field(
description="Whether toe walking was observed within 30 days of vaccination, with citations and ongoing status")
had_gastrointestinal_problems: CitableSymptom = Field(
description="Whether gastrointestinal problems/digestive problems/stomach problems were observed within 30 days of vaccination, with citations and ongoing status")
had_hyperactivity: CitableSymptom = Field(
description="Whether hyperactivity/overactive behavior was observed within 30 days of vaccination, with citations and ongoing status")
had_ear_infections: CitableSymptom = Field(
description="Whether ear infections/otitis media was observed within 30 days of vaccination, with citations and ongoing status")
had_vomiting: CitableSymptom = Field(
description="Whether vomiting/projectile vomiting was observed within 30 days of vaccination, with citations and ongoing status")
had_sleep_problems: CitableSymptom = Field(
description="Whether sleep problems/disturbed sleep/insomnia/no sleep was observed within 30 days of vaccination, with citations and ongoing status")
had_encephalopathy: CitableSymptom = Field(
description="Whether encephalopathy/encephalatic condition was observed within 30 days of vaccination, with citations and ongoing status")
class Config:
use_enum_values = True
@field_validator("had_ear_infections", mode="before")
@classmethod
def validate_had_ear_infections(cls, v):
if isinstance(v, dict) and "extremely" in v:
# Fix the field name from "extremely" to "explanation"
v["explanation"] = v.pop("extremely")
return v
class DiagnosisRecord(BaseModel):
asd_severity: CitableEnum[ASDSeverity] = Field(description="ASD severity level, with citations")
diagnosis_name: List[CitableStr] = Field(default_factory=list,
description="Specific diagnosis (e.g., autism, PDD-NOS), each with its own citations and explanations")
diagnosing_professional: Optional[List[CitableStr]] = Field(default=None,
description="Professional who made the diagnosis, each with its own citations and explanations")
@field_validator("diagnosis_name", mode="before")
@classmethod
def validate_diagnosis_name(cls, v):
if v is None:
return []
if isinstance(v, list):
# Filter out any None values and ensure all items are CitableStr
filtered_v = []
for item in v:
if item is not None:
if isinstance(item, CitableStr):
filtered_v.append(item)
elif isinstance(item, dict):
# Handle dict format
if item.get("value") is not None:
filtered_v.append(CitableStr(**item))
elif isinstance(item, str):
# Handle string format
filtered_v.append(CitableStr(value=item, citations=[]))
return filtered_v
elif isinstance(v, str):
# Handle case where diagnosis_name is a string instead of list
return [CitableStr(value=v, citations=[])]
return v
@field_validator("diagnosing_professional", mode="before")
@classmethod
def validate_diagnosing_professional(cls, v):
if v is None:
return None
return v
@field_validator("diagnosing_professional", mode="after")
@classmethod
def validate_diagnosing_professional_after(cls, v, info):
# Handle case where diagnosing_professional is a string and citations are in separate field
if isinstance(v, str):
# Look for separate citations field in the data
data = info.data
citations = data.get("diagnosing_professional_citations", [])
return [CitableStr(value=v, citations=citations)]
return v
class Config:
use_enum_values = True
class InterventionRecord(BaseModel):
interventions: List[CitableEnum[InterventionType]] = Field(
default_factory=list,
description="List of interventions, each with its own citations and explanations")
@field_validator("interventions", mode="before")
@classmethod
def validate_interventions(cls, v):
if v is None:
return []
if isinstance(v, list):
# Filter out any None values and ensure all items are CitableEnum
filtered_v = []
for item in v:
if item is not None:
if isinstance(item, CitableEnum):
filtered_v.append(item)
elif isinstance(item, dict):
# Handle dict format
if item.get("value") is not None:
filtered_v.append(CitableEnum[InterventionType](**item))
elif isinstance(item, str):
# Handle string format
filtered_v.append(CitableEnum[InterventionType](value=item, citations=[]))
return filtered_v
return v
is_intervention_ongoing: CitableBool = Field(description="Whether any interventions are ongoing, with citations")
has_recovery: CitableBool = Field(description="Whether recovery occurred, with citations")
class Config:
use_enum_values = True
class BirthRecord(BaseModel):
was_normal_pre_vaccination: Optional[CitableBool] = Field(default=None,
description="Whether child was developmentally normal before vaccination, with citations")
apgar_score: Optional[List[CitableStr]] = Field(default=None,
description="APGAR score at birth, if reported, each with its own citations and explanations")
@field_validator("was_normal_pre_vaccination", mode="before")
@classmethod
def validate_was_normal_pre_vaccination(cls, v):
if v is None:
return None
return v
@field_validator("apgar_score", mode="before")
@classmethod
def validate_apgar_score(cls, v):
if v is None:
return None
return v
@field_validator("apgar_score", mode="after")
@classmethod
def validate_apgar_score_after(cls, v, info):
# Handle case where apgar_score is a string and citations are in separate field
if isinstance(v, str):
# Look for separate citations field in the data
data = info.data
citations = data.get("apgar_score_citations", [])
return [CitableStr(value=v, citations=citations)]
return v
class Config:
use_enum_values = True
class BehaviorRecord(BaseModel):
repetitive_behaviors: List[CitableStr] = Field(default_factory=list,
description="List of observed repetitive behaviors, each with its own citations and explanations")
class Config:
use_enum_values = True
@field_validator("repetitive_behaviors", mode="before")
@classmethod
def validate_repetitive_behaviors(cls, v):
if v is None:
return []
if isinstance(v, list):
# Filter out any None values and ensure all items are CitableStr
filtered_v = []
for item in v:
if item is not None:
if isinstance(item, CitableStr):
filtered_v.append(item)
elif isinstance(item, dict):
# Handle dict format
if item.get("value") is not None:
filtered_v.append(CitableStr(**item))
elif isinstance(item, str):
# Handle string format
filtered_v.append(CitableStr(value=item, citations=[]))
return filtered_v
if isinstance(v, dict):
# Handle dictionary format from LLM (legacy CitableList format)
value = v.get("value", [])
citations = v.get("citations", [])
# Convert to list of CitableStr objects
return [CitableStr(value=item, citations=citations) for item in value]
raise ValueError(f"Invalid type for repetitive_behaviors: {type(v)}")
@field_validator("repetitive_behaviors", mode="after")
@classmethod
def validate_repetitive_behaviors_after(cls, v, info):
# Handle case where repetitive_behaviors is a list of strings and citations are in separate field
if isinstance(v, list) and all(isinstance(item, str) for item in v):
# Look for separate citations field in the data
data = info.data
citations = data.get("repetitive_behaviors_citations", [])
explanation = data.get("repetitive_behaviors_explanation", "")
# If we have citations, distribute them among the behaviors
if citations:
# Create CitableStr objects with citations and explanation
return [CitableStr(value=item, citations=citations, explanation=explanation) for item in v]
else:
# Create CitableStr objects without citations
return [CitableStr(value=item, citations=[], explanation=explanation) for item in v]
return v
class VAERSReport(BaseModel):
timeline: TimelineRecord = Field(description="Timeline-related data including dates and ages")
developmental_milestones: List[DevelopmentalMilestone] = Field(...,
description="List of developmental milestones")
regression_record: RegressionRecord = Field(
description="Regression-related data including pre/post levels and loss indicators")
early_vaccination_symptoms: Optional[EarlyVaccinationSymptoms] = Field(default=None,
description="Symptoms observed within the first 30 days of vaccination based on VAERS data analysis")
diagnosis_record: DiagnosisRecord = Field(description="Diagnosis-related data including severity and professional")
intervention_record: InterventionRecord = Field(description="Intervention-related data including recovery status")
birth_record: BirthRecord = Field(description="Birth and pre-vaccination developmental status")
behavior_record: BehaviorRecord = Field(description="Repetitive behavior data with explanations and citations")
heavy_metal_tests: Optional[List[HeavyMetalTest]] = Field(default=None, description="Heavy metal test results")
comorbidities: List[CitableEnum[ComorbidityType]] = Field(
default_factory=list,
description="List of co-occurring conditions, each with its own citations and explanations")
environmental_exposures: List[CitableEnum[ExposureType]] = Field(
default_factory=list,
description="List of environmental exposures, each with its own citations and explanations")
@field_validator("comorbidities", mode="before")
@classmethod
def validate_comorbidities(cls, v):
if v is None:
return []
if isinstance(v, list):
# Filter out any None values and ensure all items are CitableEnum
filtered_v = []
for item in v:
if item is not None:
if isinstance(item, CitableEnum):
filtered_v.append(item)
elif isinstance(item, dict):
# Handle dict format
if item.get("value") is not None:
filtered_v.append(CitableEnum[ComorbidityType](**item))
elif isinstance(item, str):
# Handle string format
filtered_v.append(CitableEnum[ComorbidityType](value=item, citations=[]))
return filtered_v
return v
@field_validator("environmental_exposures", mode="before")
@classmethod
def validate_environmental_exposures(cls, v):
if v is None:
return []
if isinstance(v, list):
# Filter out any None values and ensure all items are CitableEnum
filtered_v = []
for item in v:
if item is not None:
if isinstance(item, CitableEnum):
filtered_v.append(item)
elif isinstance(item, dict):
# Handle dict format
if item.get("value") is not None:
filtered_v.append(CitableEnum[ExposureType](**item))
elif isinstance(item, str):
# Handle string format
filtered_v.append(CitableEnum[ExposureType](value=item, citations=[]))
return filtered_v
return v
legal_matter: CitableBool = Field(
description="Whether the report is related to a lawsuit or other legal matter, with citations")
diagnosis_reporting_delay_reason: Optional[CitableStr] = Field(default=None,
description="Reason stated for delay between diagnosis and reporting, with citations")
@field_validator("legal_matter", mode="before")
@classmethod
def validate_legal_matter(cls, v):
if v is None:
# Provide default if missing
return CitableBool(value=False, citations=[], explanation="No legal matter information provided")
return v
@field_validator("diagnosis_reporting_delay_reason", mode="before")
@classmethod
def validate_diagnosis_reporting_delay_reason(cls, v):
if v is None:
return None
if isinstance(v, dict) and v.get("value") is None:
return None
return v
class Config:
use_enum_values = True
@classmethod
def __pydantic_init_subclass__(cls):
super().__pydantic_init_subclass__()
cls.__pydantic_validator__.validate_python = cls._validate_regression_record
@classmethod
def _validate_regression_record(cls, values):
# The RegressionRecord will handle its own validation and level calculation
# We just need to ensure it gets the milestone data it needs
regression_record = values.get("regression_record", {})
# Add milestone data to regression record for validation
regression_record["developmental_milestones"] = values.get("developmental_milestones", [])
# Create the RegressionRecord - it will handle its own validation
values["regression_record"] = RegressionRecord(**regression_record)
return values
@property
def dmsg(self) -> int:
# Since pre_regression_milestones is removed, we can't calculate pre_score
# Return 0 as default or implement alternative logic if needed
return 0
# MILESTONE_TYPE_MAPPING
MILESTONE_TYPE_MAPPING = {
# Language milestones
MilestoneName.BABBLING.value: MilestoneType.LANGUAGE,
MilestoneName.SINGLE_WORDS.value: MilestoneType.LANGUAGE,
MilestoneName.TWO_WORD_PHRASES.value: MilestoneType.LANGUAGE,
MilestoneName.NAMING_OBJECTS.value: MilestoneType.LANGUAGE,
# Social communication milestones
MilestoneName.EYE_CONTACT.value: MilestoneType.SOCIAL_COMMUNICATION,
MilestoneName.RESPONDING_TO_NAME.value: MilestoneType.SOCIAL_COMMUNICATION,
MilestoneName.POINTING.value: MilestoneType.SOCIAL_COMMUNICATION,
MilestoneName.WAVING.value: MilestoneType.SOCIAL_COMMUNICATION,
MilestoneName.CLAPPING.value: MilestoneType.SOCIAL_COMMUNICATION,
MilestoneName.JOINT_ATTENTION.value: MilestoneType.SOCIAL_COMMUNICATION,
MilestoneName.TURN_TAKING.value: MilestoneType.SOCIAL_COMMUNICATION,
MilestoneName.PRETEND_PLAY.value: MilestoneType.SOCIAL_COMMUNICATION,
# Adaptive behavior milestones
MilestoneName.FEEDING_WITH_HELP.value: MilestoneType.ADAPTIVE_BEHAVIOR,
MilestoneName.USING_SPOON.value: MilestoneType.ADAPTIVE_BEHAVIOR,
MilestoneName.DANCING.value: MilestoneType.ADAPTIVE_BEHAVIOR,
MilestoneName.DRESSING_WITH_HELP.value: MilestoneType.ADAPTIVE_BEHAVIOR,
MilestoneName.SELF_FEEDING.value: MilestoneType.ADAPTIVE_BEHAVIOR,
# Gross motor milestones
MilestoneName.SITTING_WITHOUT_SUPPORT.value: "gross_motor",
MilestoneName.WALKING.value: "gross_motor",
}
As you can see, this is a very complex schema which is used to extract structured data from a VAERS report.
The complexity of this schema acts as a very good test for the quality of an LLM.