{
  "@context": "https://schema.org",
  "@type": "Dataset",
  "name": "AstraNL Cross-Provider Quality v1",
  "description": "Vendor-neutral cross-provider AI quality benchmark from AstraNL production decomposer-brain pipeline. Real production traces, ground-truth graded, eval-bug-corrected 2026-05-15.",
  "license": "https://creativecommons.org/licenses/by/4.0/",
  "version": "1.0.0",
  "datePublished": "2026-05-15",
  "publisher": {
    "@type": "Organization",
    "name": "AstraNL",
    "identifier": "KvK 88449335",
    "url": "https://astranl.com"
  },
  "distribution": [
    {
      "@type": "DataDownload",
      "encodingFormat": "application/json",
      "contentUrl": "https://astranl.com/datasets/cross-provider-quality-v1/runs.json"
    },
    {
      "@type": "DataDownload",
      "encodingFormat": "application/x-ndjson",
      "contentUrl": "https://astranl.com/datasets/cross-provider-quality-v1/runs.jsonl"
    }
  ],
  "measurementTechnique": "Production decomposer-brain dispatch to multiple LLM providers, ground-truth comparison after markdown-fence normalization",
  "variableMeasured": [
    "classify_sentiment",
    "detect_language",
    "extract_emails"
  ],
  "keywords": [
    "LLM eval",
    "cross-provider benchmark",
    "AstraNL",
    "coordination protocol",
    "CC-BY"
  ],
  "totalRuns": 972,
  "providers": [
    "anthropic",
    "gemini",
    "openai"
  ],
  "accuracy_by_class_and_provider": {
    "detect_language": {
      "anthropic": {
        "sample_size": 113,
        "accuracy_pct": 100.0,
        "avg_latency_ms": 761
      },
      "gemini": {
        "sample_size": 113,
        "accuracy_pct": 100.0,
        "avg_latency_ms": 832
      },
      "openai": {
        "sample_size": 113,
        "accuracy_pct": 100.0,
        "avg_latency_ms": 832
      }
    },
    "classify_sentiment": {
      "anthropic": {
        "sample_size": 161,
        "accuracy_pct": 98.76,
        "avg_latency_ms": 850
      },
      "gemini": {
        "sample_size": 161,
        "accuracy_pct": 95.65,
        "avg_latency_ms": 1533
      },
      "openai": {
        "sample_size": 161,
        "accuracy_pct": 100.0,
        "avg_latency_ms": 1024
      }
    },
    "extract_emails": {
      "anthropic": {
        "sample_size": 150,
        "accuracy_pct": 100.0,
        "avg_latency_ms": 858
      }
    }
  },
  "corrections": {
    "date": "2026-05-15",
    "cycle": 895,
    "note": "eval harness markdown-fence stripping; 150 anthropic extract_emails false-negatives flipped to correct=1"
  }
}