Pydantic Schema

This is the Pydantic schema used in the course.

from enum import Enum
from typing import Optional, List, Union, TypeVar, Generic

from pydantic import BaseModel, Field, field_validator


# Enums
class LanguageLevel(Enum):
    NO_SPEECH = "no_speech"
    BABBLING_SINGLE_WORDS = "babbling_single_words"
    TWO_WORD_PHRASES = "two_word_phrases"
    SENTENCES_NAMING = "sentences_naming"


class SocialCommunicationLevel(Enum):
    NO_ENGAGEMENT = "no_engagement"
    BASIC_ENGAGEMENT = "basic_engagement"
    RECIPROCAL_INTERACTION = "reciprocal_interaction"
    COMPLEX_INTERACTION = "complex_interaction"


class AdaptiveBehaviorLevel(Enum):
    NO_INDEPENDENCE = "no_independence"
    BASIC_SELF_HELP = "basic_self_help"
    MODERATE_INDEPENDENCE = "moderate_independence"
    AGE_APPROPRIATE = "age_appropriate"


class MilestoneType(Enum):
    LANGUAGE = "language"
    SOCIAL_COMMUNICATION = "social_communication"
    ADAPTIVE_BEHAVIOR = "adaptive_behavior"


class MilestoneName(Enum):
    BABBLING = "babbling"
    SINGLE_WORDS = "single_words"
    TWO_WORD_PHRASES = "two_word_phrases"
    NAMING_OBJECTS = "naming_objects"
    EYE_CONTACT = "eye_contact"
    RESPONDING_TO_NAME = "responding_to_name"
    POINTING = "pointing"
    WAVING = "waving"
    CLAPPING = "clapping"
    JOINT_ATTENTION = "joint_attention"
    TURN_TAKING = "turn_taking"
    FEEDING_WITH_HELP = "feeding_with_help"
    USING_SPOON = "using_spoon"
    DANCING = "dancing"
    DRESSING_WITH_HELP = "dressing_with_help"
    SELF_FEEDING = "self_feeding"
    SITTING_WITHOUT_SUPPORT = "sitting_without_support"
    WALKING = "walking"
    PRETEND_PLAY = "pretend_play"


class RepetitiveBehaviorType(Enum):
    NONE = "none"
    SPINNING = "spinning"
    FLAPPING = "flapping"
    STARING = "staring"
    ZONING_OUT = "zoning_out"
    HEAD_BANGING = "head_banging"
    BITING = "biting"


class ASDSeverity(Enum):
    LEVEL_1 = 1
    LEVEL_2 = 2
    LEVEL_3 = 3


class HeavyMetalType(Enum):
    MERCURY = "mercury"
    LEAD = "lead"
    ARSENIC = "arsenic"
    CADMIUM = "cadmium"
    THALLIUM = "thallium"
    ANTIMONY = "antimony"
    CHROMIUM = "chromium"
    SELENIUM = "selenium"
    MANGANESE = "manganese"
    COBALT = "cobalt"
    VANADIUM = "vanadium"
    MOLYBDENUM = "molybdenum"
    TUNGSTEN = "tungsten"
    URANIUM = "uranium"
    PLUTONIUM = "plutonium"
    BERYLLIUM = "beryllium"
    BISMUTH = "bismuth"
    GALLIUM = "gallium"
    INDIUM = "indium"
    TELLURIUM = "tellurium"


class OtherMetalType(Enum):
    ALUMINUM = "aluminum"
    NICKEL = "nickel"
    IRON = "iron"
    COPPER = "copper"
    ZINC = "zinc"
    MAGNESIUM = "magnesium"
    CALCIUM = "calcium"
    POTASSIUM = "potassium"
    SODIUM = "sodium"
    LITHIUM = "lithium"
    STRONTIUM = "strontium"
    BARIUM = "barium"
    CESIUM = "cesium"
    RUBIDIUM = "rubidium"
    SILVER = "silver"
    GOLD = "gold"
    PLATINUM = "platinum"
    PALLADIUM = "palladium"
    RHODIUM = "rhodium"
    RUTHENIUM = "ruthenium"
    OSMIUM = "osmium"
    IRIDIUM = "iridium"
    TITANIUM = "titanium"
    ZIRCONIUM = "zirconium"
    HAFNIUM = "hafnium"
    NIOBIUM = "niobium"
    TANTALUM = "tantalum"
    SCANDIUM = "scandium"
    YTTRIUM = "yttrium"
    LANTHANUM = "lanthanum"
    CERIUM = "cerium"
    PRASEODYMIUM = "praseodymium"
    NEODYMIUM = "neodymium"
    PROMETHIUM = "promethium"
    SAMARIUM = "samarium"
    EUROPIUM = "europium"
    GADOLINIUM = "gadolinium"
    TERBIUM = "terbium"
    DYSPROSIUM = "dysprosium"
    HOLMIUM = "holmium"
    ERBIUM = "erbium"
    THULIUM = "thulium"
    YTTERBIUM = "ytterbium"
    LUTETIUM = "lutetium"
    ACTINIUM = "actinium"
    THORIUM = "thorium"
    PROTACTINIUM = "protactinium"
    NEPTUNIUM = "neptunium"
    AMERICIUM = "americium"
    CURIUM = "curium"
    BERKELIUM = "berkelium"
    CALIFORNIUM = "californium"
    EINSTEINIUM = "einsteinium"
    FERMIUM = "fermium"
    MENDELEVIUM = "mendelevium"
    NOBELIUM = "nobelium"
    LAWRENCIUM = "lawrencium"


class InterventionType(Enum):
    SPEECH_THERAPY = "speech_therapy"
    OCCUPATIONAL_THERAPY = "occupational_therapy"
    ABA = "aba"
    DETOX = "detox"
    SOCIAL_SKILLS = "social_skills"
    OTHER = "other"


class ComorbidityType(Enum):
    ADHD = "adhd"
    COLITIS = "colitis"
    ALLERGIES = "allergies"
    ASTHMA = "asthma"
    ECZEMA = "eczema"
    SEIZURE_DISORDER = "seizure_disorder"
    OCD = "ocd"
    ANXIETY = "anxiety"
    OTHER = "other"


class ExposureType(Enum):
    MATERNAL_AMALGAM = "maternal_amalgam"
    POWER_PLANT = "power_plant"
    OTHER_MERCURY = "other_mercury"
    UNKNOWN = "unknown"


class CitableMonthYear(BaseModel):
    year: int = Field(..., description='Non-negative integer value for year')
    month: Optional[int] = Field(None, description='Non-negative Integer value for month')
    citations: List[int] = Field(default_factory=list, description="Citations for the year, month values")
    explanation: Optional[str] = Field(default=None,
                                       description="Sentence fragment if sufficient, else an explanation for why this value was extracted")

    @field_validator("citations", mode="before")
    @classmethod
    def validate_citations(cls, v):
        if v is None:
            return []
        return v


# Citable Models
class CitableInt(BaseModel):
    value: Optional[int] = Field(None, ge=0, description="Non-negative integer value or None")
    citations: Optional[List[int]] = Field(default_factory=list, description="Citations for the value")
    explanation: Optional[str] = Field(default=None,
                                       description="Sentence fragment if sufficient, else an explanation for why this value was extracted")

    @field_validator("citations", mode="before")
    @classmethod
    def validate_citations(cls, v):
        if v is None:
            return []
        return v


class CitableFloat(BaseModel):
    value: float = Field(..., ge=0, description="Floating point value which allows decimals")
    citations: Optional[List[int]] = Field(default_factory=list, description="Citations for the value")
    explanation: Optional[str] = Field(default=None,
                                       description="Sentence fragment if sufficient, else an explanation for why this value was extracted")


class CitableBool(BaseModel):
    value: bool = Field(..., description="Boolean value")
    citations: Optional[List[int]] = Field(default_factory=list, description="Citations for the value")
    explanation: Optional[str] = Field(default=None,
                                       description="Sentence fragment if sufficient, else an explanation for why this value was extracted")

    @field_validator("citations", mode="before")
    @classmethod
    def validate_citations(cls, v):
        if v is None:
            return []
        return v


class CitableStr(BaseModel):
    value: Optional[str] = Field(None, description="String value or None")
    citations: Optional[List[int]] = Field(default_factory=list, description="Citations for the value")
    explanation: Optional[str] = Field(default=None,
                                       description="Sentence fragment if sufficient, else an explanation for why this value was extracted")

    @field_validator("citations", mode="before")
    @classmethod
    def validate_citations(cls, v):
        if v is None:
            return []
        return v


EnumT = TypeVar("EnumT", bound=Enum)


class CitableEnum(BaseModel, Generic[EnumT]):
    value: EnumT = Field(..., description="Enum value")
    citations: Optional[List[int]] = Field(default_factory=list, description="Citations for the enum value")
    explanation: Optional[str] = Field(default=None,
                                       description="Sentence fragment if sufficient, else an explanation for why this value was extracted")

    @field_validator("citations", mode="before")
    @classmethod
    def validate_citations(cls, v):
        if v is None:
            return []
        return v


class CitableSymptom(BaseModel):
    """Base class for symptoms that tracks both occurrence and ongoing status"""
    value: bool = Field(..., description="Whether the symptom was observed")
    ongoing: bool = Field(..., description="Whether the symptom is still ongoing/present")
    days_to_symptom_onset: Optional[int] = Field(None, description="Days from vaccination to symptom onset")
    citations: Optional[List[int]] = Field(default_factory=list, description="Citations for the symptom")
    explanation: Optional[str] = Field(default=None,
                                       description="Sentence fragment if sufficient, else an explanation for why this value was extracted")

    @field_validator("citations", mode="before")
    @classmethod
    def validate_citations(cls, v):
        if v is None:
            return []
        return v


# Other Models
class HeavyMetalTest(BaseModel):
    metal_type: Union[HeavyMetalType, str]
    value: float
    unit: str
    is_elevated: bool
    citations: List[int]


class DevelopmentalMilestone(BaseModel):
    milestone_name: CitableEnum[MilestoneName] = Field(description="Milestone name with citations")
    milestone_type: Union[MilestoneType, str]
    milestone_age_in_months: CitableInt = Field(description="Age in months when milestone was achieved")
    milestone_has_regressed: CitableBool = Field(description="Whether this milestone has regressed")
    regression_start_date_in_months: CitableInt = Field(description="Age in months when regression started")
    progressed_after_regression: CitableBool = Field(description="Whether milestone progressed after regression")

    @classmethod
    def validate_milestone_type(cls, values):
        milestone_name = values.get("milestone_name")
        milestone_type = values.get("milestone_type")
        if milestone_name and milestone_type:
            # Only validate if both are enum values, otherwise allow string values
            if isinstance(milestone_name, CitableEnum) and isinstance(milestone_type, MilestoneType):
                expected_type = MILESTONE_TYPE_MAPPING.get(milestone_name.value.value)
                if expected_type != milestone_type:
                    raise ValueError(
                        f"Milestone {milestone_name.value} should have milestone_type {expected_type}, not {milestone_type}"
                    )
        return values


class DevelopmentalScore(BaseModel):
    milestones: List[DevelopmentalMilestone] = Field(default_factory=list,
                                                     description="List of achieved developmental milestones")

    @property
    def language_score(self) -> int:
        achieved = sum(1 for m in self.milestones if m.milestone_type == MilestoneType.LANGUAGE)
        return min(achieved, 3)

    @property
    def social_communication_score(self) -> int:
        achieved = sum(1 for m in self.milestones if m.milestone_type == MilestoneType.SOCIAL_COMMUNICATION)
        return min(achieved // 2, 3)

    @property
    def adaptive_behavior_score(self) -> int:
        achieved = sum(1 for m in self.milestones if m.milestone_type == MilestoneType.ADAPTIVE_BEHAVIOR)
        return min(achieved, 3)

    @property
    def total_score(self) -> int:
        return (
                self.language_score +
                self.social_communication_score +
                self.adaptive_behavior_score
        )


class RegressionRecord(BaseModel):
    pre_regression_language_level: CitableEnum[LanguageLevel] = Field(
        description="Language level before regression, calculated from milestones")
    pre_regression_social_communication_level: CitableEnum[SocialCommunicationLevel] = Field(
        description="Social communication level before regression, calculated from milestones")
    pre_regression_adaptive_behavior_level: CitableEnum[AdaptiveBehaviorLevel] = Field(
        description="Adaptive behavior level before regression, calculated from milestones")
    post_regression_language_level: CitableEnum[LanguageLevel] = Field(
        description="Language level after regression, calculated from milestones")
    post_regression_social_communication_level: CitableEnum[SocialCommunicationLevel] = Field(
        description="Social communication level after regression, calculated from milestones")
    post_regression_adaptive_behavior_level: CitableEnum[AdaptiveBehaviorLevel] = Field(
        description="Adaptive behavior level after regression, calculated from milestones")
    has_language_loss: CitableBool = Field(description="Whether language loss occurred, with citations")
    has_social_loss: CitableBool = Field(description="Whether social communication loss occurred, with citations")
    has_adaptive_loss: CitableBool = Field(description="Whether adaptive behavior loss occurred, with citations")
    has_repetitive_behaviors: CitableBool = Field(
        description="Whether repetitive behaviors were observed, with citations")

    class Config:
        use_enum_values = True

    @classmethod
    def __pydantic_init_subclass__(cls):
        super().__pydantic_init_subclass__()
        cls.__pydantic_validator__.validate_python = cls._validate_levels

    @classmethod
    def _validate_levels(cls, values):
        # Since pre_regression_milestones is removed, we can't calculate pre-regression levels
        # Set default values for pre-regression levels
        values["pre_regression_language_level"] = CitableEnum[LanguageLevel](value=LanguageLevel.NO_SPEECH, citations=[])
        values["pre_regression_social_communication_level"] = CitableEnum[SocialCommunicationLevel](value=SocialCommunicationLevel.NO_ENGAGEMENT, citations=[])
        values["pre_regression_adaptive_behavior_level"] = CitableEnum[AdaptiveBehaviorLevel](value=AdaptiveBehaviorLevel.NO_INDEPENDENCE, citations=[])

        # Calculate post-regression levels from developmental milestones
        post_milestones = [m for m in values.get("developmental_milestones", []) if not m.milestone_has_regressed.value]
        post_score = DevelopmentalScore(milestones=post_milestones)

        # Map integer scores to string enum values
        language_level_mapping = {
            0: LanguageLevel.NO_SPEECH,
            1: LanguageLevel.BABBLING_SINGLE_WORDS,
            2: LanguageLevel.TWO_WORD_PHRASES,
            3: LanguageLevel.SENTENCES_NAMING
        }

        social_level_mapping = {
            0: SocialCommunicationLevel.NO_ENGAGEMENT,
            1: SocialCommunicationLevel.BASIC_ENGAGEMENT,
            2: SocialCommunicationLevel.RECIPROCAL_INTERACTION,
            3: SocialCommunicationLevel.COMPLEX_INTERACTION
        }

        adaptive_level_mapping = {
            0: AdaptiveBehaviorLevel.NO_INDEPENDENCE,
            1: AdaptiveBehaviorLevel.BASIC_SELF_HELP,
            2: AdaptiveBehaviorLevel.MODERATE_INDEPENDENCE,
            3: AdaptiveBehaviorLevel.AGE_APPROPRIATE
        }

        # Pre-regression levels are already set above with default values
        values["post_regression_language_level"] = CitableEnum[LanguageLevel](value=language_level_mapping.get(post_score.language_score,
                                                                              LanguageLevel.NO_SPEECH), citations=[])
        values["post_regression_social_communication_level"] = CitableEnum[SocialCommunicationLevel](value=social_level_mapping.get(
            post_score.social_communication_score, SocialCommunicationLevel.NO_ENGAGEMENT), citations=[])
        values["post_regression_adaptive_behavior_level"] = CitableEnum[AdaptiveBehaviorLevel](value=adaptive_level_mapping.get(
            post_score.adaptive_behavior_score, AdaptiveBehaviorLevel.NO_INDEPENDENCE), citations=[])

        return values


class TimelineRecord(BaseModel):
    date_of_vaccination: Optional[Union[CitableMonthYear, None]] = Field(default=None,
                                                                         description='Month and year of vaccination, with citations')
    regression_onset_date: Optional[Union[CitableMonthYear, None]] = Field(default=None,
                                                                           description="Month and year of regression onset")
    diagnosis_date: Optional[Union[CitableMonthYear, None]] = Field(default=None,
                                                                    description="Month and year of diagnosis")
    age_at_vaccination_months: Optional[Union[CitableInt, CitableFloat, None]] = Field(default=None,
                                                                                       description="Age at vaccination in months, with citations")
    age_at_diagnosis_months: Optional[Union[CitableInt, None]] = Field(default=None,
                                                                       description="Age at diagnosis in months, with citations")

    class Config:
        use_enum_values = True

    @field_validator("age_at_vaccination_months", mode="before")
    @classmethod
    def validate_age_at_vaccination_months(cls, v):
        if v is None:
            return v
        if isinstance(v, dict) and "value" in v:
            # Convert years to months if the value is less than 10 (likely years)
            if isinstance(v["value"], (int, float)) and v["value"] < 10:
                v["value"] = v["value"] * 12
        return v

    @field_validator("date_of_vaccination", mode="after")
    @classmethod
    def validate_date_of_vaccination_after(cls, v, info):
        # Handle case where date_of_vaccination is a string instead of CitableMonthYear
        if isinstance(v, str):
            # Try to parse the date string
            try:
                from datetime import datetime
                date_obj = datetime.strptime(v, "%Y-%m-%d")
                return CitableMonthYear(year=date_obj.year, month=date_obj.month, citations=[],
                                        explanation=f"Vaccination date: {v}")
            except ValueError:
                # If parsing fails, return None
                return None
        return v

    @field_validator("regression_onset_date", mode="before")
    @classmethod
    def validate_regression_onset_date(cls, v):
        if v is None:
            return None
        return v

    @field_validator("diagnosis_date", mode="after")
    @classmethod
    def validate_diagnosis_date_after(cls, v, info):
        # Handle case where diagnosis_date is a string instead of CitableMonthYear
        if isinstance(v, str):
            # Try to parse the date string
            try:
                from datetime import datetime
                date_obj = datetime.strptime(v, "%Y-%m-%d")
                return CitableMonthYear(year=date_obj.year, month=date_obj.month, citations=[],
                                        explanation=f"Diagnosis date: {v}")
            except ValueError:
                # If parsing fails, return None
                return None
        return v

    @field_validator("age_at_vaccination_months", mode="before")
    @classmethod
    def validate_age_at_vaccination_months(cls, v):
        if v is None:
            return v
        if isinstance(v, dict) and "value" in v:
            # Convert years to months if the value is less than 10 (likely years)
            if isinstance(v["value"], (int, float)) and v["value"] < 10:
                v["value"] = v["value"] * 12
        return v

    @field_validator("age_at_diagnosis_months", mode="before")
    @classmethod
    def validate_age_at_diagnosis_months(cls, v):
        if v is None:
            return None
        if isinstance(v, dict) and v.get("value") is None:
            return None
        return v





class EarlyVaccinationSymptoms(BaseModel):
    """Symptoms observed within the first 30 days of vaccination based on VAERS data analysis"""

    # Top symptoms from VAERS analysis
    had_autism_features: CitableSymptom = Field(
        description="Whether autism/autistic features/ASD/PDD/Asperger's syndrome were observed within 30 days of vaccination, with citations and ongoing status")
    had_fever: CitableSymptom = Field(
        description="Whether fever/high fever/low-grade fever was observed within 30 days of vaccination, with citations and ongoing status")
    had_speech_loss: CitableSymptom = Field(
        description="Whether loss of speech/stopped talking/language delay/expressive aphasia/non-verbal was observed within 30 days of vaccination, with citations and ongoing status")
    had_irritability: CitableSymptom = Field(
        description="Whether irritability/cranky/fussy behavior was observed within 30 days of vaccination, with citations and ongoing status")
    had_eye_contact_loss: CitableSymptom = Field(
        description="Whether loss of eye contact/fleeting eye contact was observed within 30 days of vaccination, with citations and ongoing status")
    had_diarrhea: CitableSymptom = Field(
        description="Whether diarrhea/chronic diarrhea/loose stools were observed within 30 days of vaccination, with citations and ongoing status")
    had_seizures: CitableSymptom = Field(
        description="Whether seizures/infantile spasms/febrile seizures/grand mal seizures were observed within 30 days of vaccination, with citations and ongoing status")
    had_lethargy: CitableSymptom = Field(
        description="Whether lethargy/listless/tired/sleepy behavior was observed within 30 days of vaccination, with citations and ongoing status")
    had_screaming: CitableSymptom = Field(
        description="Whether screaming/high-pitched screaming/shrieking cry/unconsolable crying was observed within 30 days of vaccination, with citations and ongoing status")
    had_behavioral_changes: CitableSymptom = Field(
        description="Whether behavioral changes/personality change/strange behavior/misbehaved/violent behavior was observed within 30 days of vaccination, with citations and ongoing status")
    had_rash: CitableSymptom = Field(
        description="Whether rash/red spots/red bumps/morbilliform rash/measles-like rash was observed within 30 days of vaccination, with citations and ongoing status")
    had_head_banging: CitableSymptom = Field(
        description="Whether head banging/self-mutilating behavior/self-injurious behavior was observed within 30 days of vaccination, with citations and ongoing status")
    had_social_withdrawal: CitableSymptom = Field(
        description="Whether withdrawal/social withdrawal/became distant/aloof behavior was observed within 30 days of vaccination, with citations and ongoing status")
    had_injection_site_reaction: CitableSymptom = Field(
        description="Whether swelling/redness/lump at injection site was observed within 30 days of vaccination, with citations and ongoing status")
    had_balance_loss: CitableSymptom = Field(
        description="Whether loss of balance/unsteady on feet was observed within 30 days of vaccination, with citations and ongoing status")
    had_appetite_loss: CitableSymptom = Field(
        description="Whether appetite decreased/loss of appetite/picky eater/refused to eat was observed within 30 days of vaccination, with citations and ongoing status")
    had_staring_spells: CitableSymptom = Field(
        description="Whether staring spells/staring off into space/blank staring was observed within 30 days of vaccination, with citations and ongoing status")
    had_hand_flapping: CitableSymptom = Field(
        description="Whether hand flapping/repetitive motor mannerisms/stimming behavior was observed within 30 days of vaccination, with citations and ongoing status")
    had_toe_walking: CitableSymptom = Field(
        description="Whether toe walking was observed within 30 days of vaccination, with citations and ongoing status")
    had_gastrointestinal_problems: CitableSymptom = Field(
        description="Whether gastrointestinal problems/digestive problems/stomach problems were observed within 30 days of vaccination, with citations and ongoing status")
    had_hyperactivity: CitableSymptom = Field(
        description="Whether hyperactivity/overactive behavior was observed within 30 days of vaccination, with citations and ongoing status")
    had_ear_infections: CitableSymptom = Field(
        description="Whether ear infections/otitis media was observed within 30 days of vaccination, with citations and ongoing status")
    had_vomiting: CitableSymptom = Field(
        description="Whether vomiting/projectile vomiting was observed within 30 days of vaccination, with citations and ongoing status")
    had_sleep_problems: CitableSymptom = Field(
        description="Whether sleep problems/disturbed sleep/insomnia/no sleep was observed within 30 days of vaccination, with citations and ongoing status")
    had_encephalopathy: CitableSymptom = Field(
        description="Whether encephalopathy/encephalatic condition was observed within 30 days of vaccination, with citations and ongoing status")

    class Config:
        use_enum_values = True

    @field_validator("had_ear_infections", mode="before")
    @classmethod
    def validate_had_ear_infections(cls, v):
        if isinstance(v, dict) and "extremely" in v:
            # Fix the field name from "extremely" to "explanation"
            v["explanation"] = v.pop("extremely")
        return v


class DiagnosisRecord(BaseModel):
    asd_severity: CitableEnum[ASDSeverity] = Field(description="ASD severity level, with citations")
    diagnosis_name: List[CitableStr] = Field(default_factory=list,
                                             description="Specific diagnosis (e.g., autism, PDD-NOS), each with its own citations and explanations")
    diagnosing_professional: Optional[List[CitableStr]] = Field(default=None,
                                                                description="Professional who made the diagnosis, each with its own citations and explanations")

    @field_validator("diagnosis_name", mode="before")
    @classmethod
    def validate_diagnosis_name(cls, v):
        if v is None:
            return []
        if isinstance(v, list):
            # Filter out any None values and ensure all items are CitableStr
            filtered_v = []
            for item in v:
                if item is not None:
                    if isinstance(item, CitableStr):
                        filtered_v.append(item)
                    elif isinstance(item, dict):
                        # Handle dict format
                        if item.get("value") is not None:
                            filtered_v.append(CitableStr(**item))
                    elif isinstance(item, str):
                        # Handle string format
                        filtered_v.append(CitableStr(value=item, citations=[]))
            return filtered_v
        elif isinstance(v, str):
            # Handle case where diagnosis_name is a string instead of list
            return [CitableStr(value=v, citations=[])]
        return v

    @field_validator("diagnosing_professional", mode="before")
    @classmethod
    def validate_diagnosing_professional(cls, v):
        if v is None:
            return None
        return v

    @field_validator("diagnosing_professional", mode="after")
    @classmethod
    def validate_diagnosing_professional_after(cls, v, info):
        # Handle case where diagnosing_professional is a string and citations are in separate field
        if isinstance(v, str):
            # Look for separate citations field in the data
            data = info.data
            citations = data.get("diagnosing_professional_citations", [])
            return [CitableStr(value=v, citations=citations)]
        return v

    class Config:
        use_enum_values = True


class InterventionRecord(BaseModel):
    interventions: List[CitableEnum[InterventionType]] = Field(
        default_factory=list,
        description="List of interventions, each with its own citations and explanations")

    @field_validator("interventions", mode="before")
    @classmethod
    def validate_interventions(cls, v):
        if v is None:
            return []
        if isinstance(v, list):
            # Filter out any None values and ensure all items are CitableEnum
            filtered_v = []
            for item in v:
                if item is not None:
                    if isinstance(item, CitableEnum):
                        filtered_v.append(item)
                    elif isinstance(item, dict):
                        # Handle dict format
                        if item.get("value") is not None:
                            filtered_v.append(CitableEnum[InterventionType](**item))
                    elif isinstance(item, str):
                        # Handle string format
                        filtered_v.append(CitableEnum[InterventionType](value=item, citations=[]))
            return filtered_v
        return v

    is_intervention_ongoing: CitableBool = Field(description="Whether any interventions are ongoing, with citations")
    has_recovery: CitableBool = Field(description="Whether recovery occurred, with citations")

    class Config:
        use_enum_values = True


class BirthRecord(BaseModel):
    was_normal_pre_vaccination: Optional[CitableBool] = Field(default=None,
                                                              description="Whether child was developmentally normal before vaccination, with citations")
    apgar_score: Optional[List[CitableStr]] = Field(default=None,
                                                    description="APGAR score at birth, if reported, each with its own citations and explanations")

    @field_validator("was_normal_pre_vaccination", mode="before")
    @classmethod
    def validate_was_normal_pre_vaccination(cls, v):
        if v is None:
            return None
        return v

    @field_validator("apgar_score", mode="before")
    @classmethod
    def validate_apgar_score(cls, v):
        if v is None:
            return None
        return v

    @field_validator("apgar_score", mode="after")
    @classmethod
    def validate_apgar_score_after(cls, v, info):
        # Handle case where apgar_score is a string and citations are in separate field
        if isinstance(v, str):
            # Look for separate citations field in the data
            data = info.data
            citations = data.get("apgar_score_citations", [])
            return [CitableStr(value=v, citations=citations)]
        return v

    class Config:
        use_enum_values = True


class BehaviorRecord(BaseModel):
    repetitive_behaviors: List[CitableStr] = Field(default_factory=list,
                                                   description="List of observed repetitive behaviors, each with its own citations and explanations")

    class Config:
        use_enum_values = True

    @field_validator("repetitive_behaviors", mode="before")
    @classmethod
    def validate_repetitive_behaviors(cls, v):
        if v is None:
            return []
        if isinstance(v, list):
            # Filter out any None values and ensure all items are CitableStr
            filtered_v = []
            for item in v:
                if item is not None:
                    if isinstance(item, CitableStr):
                        filtered_v.append(item)
                    elif isinstance(item, dict):
                        # Handle dict format
                        if item.get("value") is not None:
                            filtered_v.append(CitableStr(**item))
                    elif isinstance(item, str):
                        # Handle string format
                        filtered_v.append(CitableStr(value=item, citations=[]))
            return filtered_v
        if isinstance(v, dict):
            # Handle dictionary format from LLM (legacy CitableList format)
            value = v.get("value", [])
            citations = v.get("citations", [])
            # Convert to list of CitableStr objects
            return [CitableStr(value=item, citations=citations) for item in value]
        raise ValueError(f"Invalid type for repetitive_behaviors: {type(v)}")

    @field_validator("repetitive_behaviors", mode="after")
    @classmethod
    def validate_repetitive_behaviors_after(cls, v, info):
        # Handle case where repetitive_behaviors is a list of strings and citations are in separate field
        if isinstance(v, list) and all(isinstance(item, str) for item in v):
            # Look for separate citations field in the data
            data = info.data
            citations = data.get("repetitive_behaviors_citations", [])
            explanation = data.get("repetitive_behaviors_explanation", "")

            # If we have citations, distribute them among the behaviors
            if citations:
                # Create CitableStr objects with citations and explanation
                return [CitableStr(value=item, citations=citations, explanation=explanation) for item in v]
            else:
                # Create CitableStr objects without citations
                return [CitableStr(value=item, citations=[], explanation=explanation) for item in v]
        return v


class VAERSReport(BaseModel):
    timeline: TimelineRecord = Field(description="Timeline-related data including dates and ages")
    developmental_milestones: List[DevelopmentalMilestone] = Field(...,
                                                                   description="List of developmental milestones")
    regression_record: RegressionRecord = Field(
        description="Regression-related data including pre/post levels and loss indicators")
    early_vaccination_symptoms: Optional[EarlyVaccinationSymptoms] = Field(default=None,
                                                                           description="Symptoms observed within the first 30 days of vaccination based on VAERS data analysis")
    diagnosis_record: DiagnosisRecord = Field(description="Diagnosis-related data including severity and professional")
    intervention_record: InterventionRecord = Field(description="Intervention-related data including recovery status")
    birth_record: BirthRecord = Field(description="Birth and pre-vaccination developmental status")
    behavior_record: BehaviorRecord = Field(description="Repetitive behavior data with explanations and citations")
    heavy_metal_tests: Optional[List[HeavyMetalTest]] = Field(default=None, description="Heavy metal test results")
    comorbidities: List[CitableEnum[ComorbidityType]] = Field(
        default_factory=list,
        description="List of co-occurring conditions, each with its own citations and explanations")
    environmental_exposures: List[CitableEnum[ExposureType]] = Field(
        default_factory=list,
        description="List of environmental exposures, each with its own citations and explanations")

    @field_validator("comorbidities", mode="before")
    @classmethod
    def validate_comorbidities(cls, v):
        if v is None:
            return []
        if isinstance(v, list):
            # Filter out any None values and ensure all items are CitableEnum
            filtered_v = []
            for item in v:
                if item is not None:
                    if isinstance(item, CitableEnum):
                        filtered_v.append(item)
                    elif isinstance(item, dict):
                        # Handle dict format
                        if item.get("value") is not None:
                            filtered_v.append(CitableEnum[ComorbidityType](**item))
                    elif isinstance(item, str):
                        # Handle string format
                        filtered_v.append(CitableEnum[ComorbidityType](value=item, citations=[]))
            return filtered_v
        return v

    @field_validator("environmental_exposures", mode="before")
    @classmethod
    def validate_environmental_exposures(cls, v):
        if v is None:
            return []
        if isinstance(v, list):
            # Filter out any None values and ensure all items are CitableEnum
            filtered_v = []
            for item in v:
                if item is not None:
                    if isinstance(item, CitableEnum):
                        filtered_v.append(item)
                    elif isinstance(item, dict):
                        # Handle dict format
                        if item.get("value") is not None:
                            filtered_v.append(CitableEnum[ExposureType](**item))
                    elif isinstance(item, str):
                        # Handle string format
                        filtered_v.append(CitableEnum[ExposureType](value=item, citations=[]))
            return filtered_v
        return v

    legal_matter: CitableBool = Field(
        description="Whether the report is related to a lawsuit or other legal matter, with citations")
    diagnosis_reporting_delay_reason: Optional[CitableStr] = Field(default=None,
                                                                   description="Reason stated for delay between diagnosis and reporting, with citations")

    @field_validator("legal_matter", mode="before")
    @classmethod
    def validate_legal_matter(cls, v):
        if v is None:
            # Provide default if missing
            return CitableBool(value=False, citations=[], explanation="No legal matter information provided")
        return v

    @field_validator("diagnosis_reporting_delay_reason", mode="before")
    @classmethod
    def validate_diagnosis_reporting_delay_reason(cls, v):
        if v is None:
            return None
        if isinstance(v, dict) and v.get("value") is None:
            return None
        return v

    class Config:
        use_enum_values = True



    @classmethod
    def __pydantic_init_subclass__(cls):
        super().__pydantic_init_subclass__()
        cls.__pydantic_validator__.validate_python = cls._validate_regression_record

    @classmethod
    def _validate_regression_record(cls, values):
        # The RegressionRecord will handle its own validation and level calculation
        # We just need to ensure it gets the milestone data it needs
        regression_record = values.get("regression_record", {})

        # Add milestone data to regression record for validation
        regression_record["developmental_milestones"] = values.get("developmental_milestones", [])

        # Create the RegressionRecord - it will handle its own validation
        values["regression_record"] = RegressionRecord(**regression_record)
        return values



    @property
    def dmsg(self) -> int:
        # Since pre_regression_milestones is removed, we can't calculate pre_score
        # Return 0 as default or implement alternative logic if needed
        return 0


# MILESTONE_TYPE_MAPPING
MILESTONE_TYPE_MAPPING = {
    # Language milestones
    MilestoneName.BABBLING.value: MilestoneType.LANGUAGE,
    MilestoneName.SINGLE_WORDS.value: MilestoneType.LANGUAGE,
    MilestoneName.TWO_WORD_PHRASES.value: MilestoneType.LANGUAGE,
    MilestoneName.NAMING_OBJECTS.value: MilestoneType.LANGUAGE,

    # Social communication milestones
    MilestoneName.EYE_CONTACT.value: MilestoneType.SOCIAL_COMMUNICATION,
    MilestoneName.RESPONDING_TO_NAME.value: MilestoneType.SOCIAL_COMMUNICATION,
    MilestoneName.POINTING.value: MilestoneType.SOCIAL_COMMUNICATION,
    MilestoneName.WAVING.value: MilestoneType.SOCIAL_COMMUNICATION,
    MilestoneName.CLAPPING.value: MilestoneType.SOCIAL_COMMUNICATION,
    MilestoneName.JOINT_ATTENTION.value: MilestoneType.SOCIAL_COMMUNICATION,
    MilestoneName.TURN_TAKING.value: MilestoneType.SOCIAL_COMMUNICATION,
    MilestoneName.PRETEND_PLAY.value: MilestoneType.SOCIAL_COMMUNICATION,

    # Adaptive behavior milestones
    MilestoneName.FEEDING_WITH_HELP.value: MilestoneType.ADAPTIVE_BEHAVIOR,
    MilestoneName.USING_SPOON.value: MilestoneType.ADAPTIVE_BEHAVIOR,
    MilestoneName.DANCING.value: MilestoneType.ADAPTIVE_BEHAVIOR,
    MilestoneName.DRESSING_WITH_HELP.value: MilestoneType.ADAPTIVE_BEHAVIOR,
    MilestoneName.SELF_FEEDING.value: MilestoneType.ADAPTIVE_BEHAVIOR,

    # Gross motor milestones
    MilestoneName.SITTING_WITHOUT_SUPPORT.value: "gross_motor",
    MilestoneName.WALKING.value: "gross_motor",
}

As you can see, this is a very complex schema which is used to extract structured data from a VAERS report.

The complexity of this schema acts as a very good test for the quality of an LLM.