[{"data":1,"prerenderedAt":1003},["ShallowReactive",2],{"navigation":3,"\u002Fnews\u002Finsights\u002Frecursive-goodhart":261,"\u002Fnews\u002Finsights\u002Frecursive-goodhart-surround":698},[4,8,17,21,25,29,33,37,249,253,257],{"title":5,"path":6,"stem":7},"About Thinkata Intelligence","\u002Fabout","about",{"title":9,"path":10,"stem":11,"children":12},"Authentication","\u002Fauth","auth",[13],{"title":14,"path":15,"stem":16},"Email Confirmation","\u002Fauth\u002Fconfirmation","auth\u002Fconfirmation",{"title":18,"path":19,"stem":20},"Case Studies","\u002Fcase-studies","case-studies",{"title":22,"path":23,"stem":24},"Contact Us","\u002Fcontact","contact",{"title":26,"path":27,"stem":28},"Thinkata - Advanced AI Engineering & Multi-Agent System Solutions","\u002F","index",{"title":30,"path":31,"stem":32},"Insights","\u002Finsights","insights",{"title":34,"path":35,"stem":36},"Leadership","\u002Fleadership","leadership",{"title":38,"path":39,"stem":40,"children":41},"News","\u002Fnews","news",[42,45,69],{"title":43,"path":39,"stem":44},"News & Insights","news\u002Findex",{"title":18,"path":46,"stem":47,"children":48},"\u002Fnews\u002Fcase-studies","news\u002Fcase-studies",[49,53,57,61,65],{"title":50,"path":51,"stem":52},"Building Secure and Scalable AI Infrastructure: Integrating with Existing Systems through Modern Cloud Frameworks","\u002Fnews\u002Fcase-studies\u002Fcloud-infrastructure-ai","news\u002Fcase-studies\u002Fcloud-infrastructure-ai",{"title":54,"path":55,"stem":56},"Making Sense of Financial Regulations: How AI Teams Can Tackle Complex Documents","\u002Fnews\u002Fcase-studies\u002Ffinancial-regulations","news\u002Fcase-studies\u002Ffinancial-regulations",{"title":58,"path":59,"stem":60},"AI-Powered Transformations in Healthcare","\u002Fnews\u002Fcase-studies\u002Fhealth-care","news\u002Fcase-studies\u002Fhealth-care",{"title":62,"path":63,"stem":64},"Generative AI in Upstream Natural Gas: Shell's Exploration Initiative","\u002Fnews\u002Fcase-studies\u002Foil-gas","news\u002Fcase-studies\u002Foil-gas",{"title":66,"path":67,"stem":68},"Optimizing Manufacturing with AI-Driven Multi-Agent Systems","\u002Fnews\u002Fcase-studies\u002Fsupply-chain-optimization","news\u002Fcase-studies\u002Fsupply-chain-optimization",{"title":30,"path":70,"stem":71,"children":72},"\u002Fnews\u002Finsights","news\u002Finsights",[73,77,81,85,89,93,97,101,105,109,113,117,121,125,129,133,137,141,145,149,153,157,161,165,169,173,177,181,185,189,193,197,201,205,209,213,217,221,225,229,233,237,241,245],{"title":74,"path":75,"stem":76},"The Capability-Reliability Split in Agent Systems","\u002Fnews\u002Finsights\u002Fagent-capability-reliability-split","news\u002Finsights\u002Fagent-capability-reliability-split",{"title":78,"path":79,"stem":80},"The Rise of AI Agents in Cyberattacks: Latest Research and Threats","\u002Fnews\u002Finsights\u002Fai-agent-cyber-threats","news\u002Finsights\u002Fai-agent-cyber-threats",{"title":82,"path":83,"stem":84},"The Smart Enterprise AI Stack: Why Teams of AI Agents Beat Solo Models Consistently","\u002Fnews\u002Finsights\u002Fai-architecture","news\u002Finsights\u002Fai-architecture",{"title":86,"path":87,"stem":88},"When Seeing Everything Becomes the Only Option","\u002Fnews\u002Finsights\u002Fai-comprehensive-observability","news\u002Finsights\u002Fai-comprehensive-observability",{"title":90,"path":91,"stem":92},"The Data Infrastructure AI-Native Systems Can't Ignore","\u002Fnews\u002Finsights\u002Fai-data-layer","news\u002Finsights\u002Fai-data-layer",{"title":94,"path":95,"stem":96},"Enterprise AI Triage Systems: Intelligent Automation for Large-Scale Operations","\u002Fnews\u002Finsights\u002Fai-enterprise-triage","news\u002Finsights\u002Fai-enterprise-triage",{"title":98,"path":99,"stem":100},"When Oversight Becomes Infrastructure","\u002Fnews\u002Finsights\u002Fai-governed-autonomy","news\u002Finsights\u002Fai-governed-autonomy",{"title":102,"path":103,"stem":104},"Designing for Graceful Failure in Compound AI Systems","\u002Fnews\u002Finsights\u002Fai-graceful-failure","news\u002Finsights\u002Fai-graceful-failure",{"title":106,"path":107,"stem":108},"Intelligent Composability: Building AI Systems Like Orchestra, Not Soloists","\u002Fnews\u002Finsights\u002Fai-intelligent-composability","news\u002Finsights\u002Fai-intelligent-composability",{"title":110,"path":111,"stem":112},"Building the Plane While Flying It — Migrating from Monolith to AI-Native Without Stopping","\u002Fnews\u002Finsights\u002Fai-migration-path","news\u002Finsights\u002Fai-migration-path",{"title":114,"path":115,"stem":116},"Stability Through Continuous Adaptation","\u002Fnews\u002Finsights\u002Fai-native-overview","news\u002Finsights\u002Fai-native-overview",{"title":118,"path":119,"stem":120},"Provable Stability: Mathematical Guarantees for Adaptive AI Systems","\u002Fnews\u002Finsights\u002Fai-provable-stability","news\u002Finsights\u002Fai-provable-stability",{"title":122,"path":123,"stem":124},"How Temperature Tuning Makes or Breaks Reinforcement Learning","\u002Fnews\u002Finsights\u002Fai-soft-actor-critic-entropy-collapse","news\u002Finsights\u002Fai-soft-actor-critic-entropy-collapse",{"title":126,"path":127,"stem":128},"Testing What Can't Be Predicted","\u002Fnews\u002Finsights\u002Fai-systems-testing","news\u002Finsights\u002Fai-systems-testing",{"title":130,"path":131,"stem":132},"Closing the Loop: How Human Corrections Can Make AI Systems Smarter Over Time","\u002Fnews\u002Finsights\u002Fclosing-the-loop","news\u002Finsights\u002Fclosing-the-loop",{"title":134,"path":135,"stem":136},"Multi-Path Reasoning: Collaborative and Competitive Approaches in AI","\u002Fnews\u002Finsights\u002Fcollaborative-competitive-agents","news\u002Finsights\u002Fcollaborative-competitive-agents",{"title":138,"path":139,"stem":140},"Why Challenges Supercharge Smarts for Humans and AI","\u002Fnews\u002Finsights\u002Fcompetition-improves-ai","news\u002Finsights\u002Fcompetition-improves-ai",{"title":142,"path":143,"stem":144},"Context is Infrastructure, Not Instructions","\u002Fnews\u002Finsights\u002Fcontext-is-infrastructure","news\u002Finsights\u002Fcontext-is-infrastructure",{"title":146,"path":147,"stem":148},"Context is the New Code","\u002Fnews\u002Finsights\u002Fcontext-is-new-code","news\u002Finsights\u002Fcontext-is-new-code",{"title":150,"path":151,"stem":152},"Continuous Thought Machines","\u002Fnews\u002Finsights\u002Fcontinuous-thought-machines","news\u002Finsights\u002Fcontinuous-thought-machines",{"title":154,"path":155,"stem":156},"Don't Vibe, Architect","\u002Fnews\u002Finsights\u002Fdont-vibe-architect","news\u002Finsights\u002Fdont-vibe-architect",{"title":158,"path":159,"stem":160},"The Edge of the Underdefined","\u002Fnews\u002Finsights\u002Fedge-of-the-underdefined","news\u002Finsights\u002Fedge-of-the-underdefined",{"title":162,"path":163,"stem":164},"A Multi-Tier Safety Architecture for Critical Applications","\u002Fnews\u002Finsights\u002Ffour-tier-architecture","news\u002Finsights\u002Ffour-tier-architecture",{"title":166,"path":167,"stem":168},"Hybrid Autoregressive Residual Tokens","\u002Fnews\u002Finsights\u002Fhart-model","news\u002Finsights\u002Fhart-model",{"title":170,"path":171,"stem":172},"Hierarchical Reasoning in Artificial Intelligence","\u002Fnews\u002Finsights\u002Fhierarchical-approaches","news\u002Finsights\u002Fhierarchical-approaches",{"title":174,"path":175,"stem":176},"Latent Diffusion for Language Generation: A Comprehensive Overview","\u002Fnews\u002Finsights\u002Flatent-diffusion-for-language","news\u002Finsights\u002Flatent-diffusion-for-language",{"title":178,"path":179,"stem":180},"Breaking Language Barriers: How AI Can Translate Without Examples","\u002Fnews\u002Finsights\u002Flearning-languages","news\u002Finsights\u002Flearning-languages",{"title":182,"path":183,"stem":184},"The Emergence of AI Deception: How Large Language Models Have Learned to Strategically Mislead Users","\u002Fnews\u002Finsights\u002Fllm-deception","news\u002Finsights\u002Fllm-deception",{"title":186,"path":187,"stem":188},"Synergizing Specialized Reasoning and General Capabilities in AI","\u002Fnews\u002Finsights\u002Fllm-reasoning-advances","news\u002Finsights\u002Fllm-reasoning-advances",{"title":190,"path":191,"stem":192},"The AI That Rewrites Itself: MIT's Breakthrough in Self-Adapting Language Models","\u002Fnews\u002Finsights\u002Fllm-seal","news\u002Finsights\u002Fllm-seal",{"title":194,"path":195,"stem":196},"Metacognitive Reinforcement Learning for Self-Improving AI Systems","\u002Fnews\u002Finsights\u002Fmetacognitive-reinforcement-learning","news\u002Finsights\u002Fmetacognitive-reinforcement-learning",{"title":198,"path":199,"stem":200},"Revolutionary Advancements in Mixture of Experts (MoE) Architectures","\u002Fnews\u002Finsights\u002Fmixture-of-experts","news\u002Finsights\u002Fmixture-of-experts",{"title":202,"path":203,"stem":204},"Balancing Neural Plasticity and Stability","\u002Fnews\u002Finsights\u002Fneural-plasticity","news\u002Finsights\u002Fneural-plasticity",{"title":206,"path":207,"stem":208},"Offline RL and the Data Flywheel","\u002Fnews\u002Finsights\u002Foffline-rl-data-flywheel","news\u002Finsights\u002Foffline-rl-data-flywheel",{"title":210,"path":211,"stem":212},"When Optimization Optimizes Itself","\u002Fnews\u002Finsights\u002Frecursive-goodhart","news\u002Finsights\u002Frecursive-goodhart",{"title":214,"path":215,"stem":216},"Reward Design as Architecture","\u002Fnews\u002Finsights\u002Freward-design-as-architecture","news\u002Finsights\u002Freward-design-as-architecture",{"title":218,"path":219,"stem":220},"When Success Has No Author: The Temporal Credit Assignment Problem","\u002Fnews\u002Finsights\u002Frl-credit-assignment-problem","news\u002Finsights\u002Frl-credit-assignment-problem",{"title":222,"path":223,"stem":224},"Beyond Entropy Collapse: When Exploration Succeeds but Learning Fails","\u002Fnews\u002Finsights\u002Frl-optimization-gaps","news\u002Finsights\u002Frl-optimization-gaps",{"title":226,"path":227,"stem":228},"The Path to Practical Confidential Computing for AI Systems","\u002Fnews\u002Finsights\u002Fsecure-ai-architectures","news\u002Finsights\u002Fsecure-ai-architectures",{"title":230,"path":231,"stem":232},"Spiking Neural Networks for Energy-Efficient AI","\u002Fnews\u002Finsights\u002Fspiking-neural-networks","news\u002Finsights\u002Fspiking-neural-networks",{"title":234,"path":235,"stem":236},"The Turn as the Unit of Quality","\u002Fnews\u002Finsights\u002Fstructured-iteration-quality","news\u002Finsights\u002Fstructured-iteration-quality",{"title":238,"path":239,"stem":240},"AI Speech Translation: Breaking Down Language Barriers","\u002Fnews\u002Finsights\u002Fsts-performance-advances","news\u002Finsights\u002Fsts-performance-advances",{"title":242,"path":243,"stem":244},"Test-Time Training Layers: The Next Evolution in Transformer Architecture","\u002Fnews\u002Finsights\u002Ftest-time-training-layers","news\u002Finsights\u002Ftest-time-training-layers",{"title":246,"path":247,"stem":248},"Breakthrough: Large Language Models Pass the Turing Test","\u002Fnews\u002Finsights\u002Fturing-tests","news\u002Finsights\u002Fturing-tests",{"title":250,"path":251,"stem":252},"Privacy Policy","\u002Fprivacy","privacy",{"title":254,"path":255,"stem":256},"Research","\u002Fresearch","research",{"title":258,"path":259,"stem":260},"Terms of Service","\u002Fterms","terms",{"id":262,"title":210,"body":263,"date":680,"description":681,"extension":682,"image":683,"meta":684,"navigation":695,"path":211,"seo":696,"stem":212,"__hash__":697},"insights\u002Fnews\u002Finsights\u002Frecursive-goodhart.md",{"type":264,"value":265,"toc":667},"minimark",[266,285,295,299,325,329,346,349,353,368,374,377,381,397,403,406,410,420,435,439,442,452,456,459,467,470,480,484,492,495,505],[267,268,271,272,271,278],"div",{"className":269},[270],"page-title","\n  ",[273,274,210],"h1",{"className":275,"id":277},[276],"page-title__main","when-optimization-optimizes-itself",[279,280,284],"h2",{"className":281,"id":283},[282],"page-title__sub","recursive-goodharts-law-in-self-modifying-ai-systems","Recursive Goodhart's Law in Self-Modifying AI Systems",[267,286,288,289],{"style":287},"width: 100%; padding: 2%;","\n    ",[290,291],"img",{"src":292,"alt":293,"style":294},"https:\u002F\u002Fimages.unsplash.com\u002Fphoto-1571313199464-6e7888cd7bb6?w=1200&auto=format&fit=crop","A row of matryoshka nesting dolls in decreasing size","width: 100%; height: auto;",[296,297,298],"p",{},"Open a matryoshka and another doll is waiting, slightly smaller, with the same painted face. Self-improving AI systems are starting to take a similar shape. A task agent solves the problem in front of it. A meta agent, one level up, modifies the task agent. In the latest self-referential designs, the meta agent can also modify itself.",[296,300,301,302,310,311,317,318,324],{},"That nested structure is the design of hyperagents, a 2026 framework that places a task agent and a meta agent into a single editable program so that the improvement procedure itself can be improved ",[303,304,305],"sup",{},[306,307,309],"a",{"href":308},"#source-1","[1]",". The lineage runs back through Schmidhuber's Gödel machine, which established the mathematical coherence of fully self-referential improvement ",[303,312,313],{},[306,314,316],{"href":315},"#source-2","[2]",", and the Darwin Gödel Machine, which made the idea practical in coding by retaining successful self-modifications in a growing archive ",[303,319,320],{},[306,321,323],{"href":322},"#source-3","[3]",". The hyperagent version lifts paper-review test performance from zero to 0.710, outperforming a hand-engineered reviewer baseline at 0.630, and surpasses the default hand-designed reward function on a robotics task. Those results are the upside. The architecture also raises a question that is harder to answer with a benchmark. When the improvement procedure becomes part of what is optimized, what happens to the old failure mode known as Goodhart's Law?",[279,326,328],{"id":327},"goodharts-law-now-with-nesting","Goodhart's Law, Now With Nesting",[296,330,331,332,338,339,345],{},"Goodhart's Law says that a measure stops being a good measure once it becomes a target. A school judged on test scores starts teaching to the test. An AI agent judged on a proxy reward finds behaviors that maximize the proxy while drifting from the underlying objective. Skalse and colleagues gave the phenomenon a formal treatment in 2022, showing that an unhackable pair of true and proxy reward functions is a much stronger condition than intuition would suggest ",[303,333,334],{},[306,335,337],{"href":336},"#source-4","[4]",". Empirical work since has traced the same dynamic across language model training, reinforcement learning, and multimodal systems ",[303,340,341],{},[306,342,344],{"href":343},"#source-5","[5]",".",[296,347,348],{},"A hyperagent changes the count of optimizers stacked on top of each other. A standard reinforcement learning loop has one. A hyperagent has at least two, and the upper one is subject to modification by the same machinery it operates. Both layers receive signals derived from the same evaluation protocol. The task agent gets credit for solving the task well. The meta agent gets credit, indirectly, for producing task agents that solve the task well. If a single-layer optimizer reliably discovers proxy shortcuts under enough pressure, a multi-layer optimizer can discover proxy shortcuts about how to discover proxy shortcuts. Standard Goodhart describes a system that games its metric. Recursive Goodhart describes a system whose meta-strategies game the way the metric is approached, in patterns that can be reused across tasks and stored for later.",[279,350,352],{"id":351},"a-concrete-demonstration","A Concrete Demonstration",[296,354,355,356,360,361,367],{},"The published runs make the recursive dynamic less abstract. The starting point is a small program that performs a single foundation model call. By the end of the runs, the system has autonomously added general-purpose infrastructure including persistent memory and performance tracking, then refined both across generations ",[303,357,358],{},[306,359,309],{"href":308},". Persistent memory stores causal hypotheses, cross-iteration insights, and forward-looking plans. The result parallels earlier work on automated design of agentic systems, which already showed that agent scaffolding can be discovered through open-ended search rather than hand-engineered ",[303,362,363],{},[306,364,366],{"href":365},"#source-6","[6]",". Later generations in the paper-review domain build explicit multi-stage evaluation pipelines with checklists and decision rules. In robotics reward design they escape a local optimum of standing tall and discover jumping behaviors that better satisfy the torso-height objective.",[267,369,288,370],{"style":287},[290,371],{"src":372,"alt":373,"style":294},"https:\u002F\u002Fimages.unsplash.com\u002Fphoto-1769142919507-8ec02ea9711c?w=1200&auto=format&fit=crop","A metal ruler laid across printed text on a page",[296,375,376],{},"A ruler measures the geometry of words, not the meaning of the sentence. Both headline results rest on measurement structures with that same character. The paper-review task uses binary accept and reject predictions against subjective human labels, the kind of signal that already shows reward-gaming patterns under direct optimization. The robotics task evaluates a quadruped on torso height, a clean scalar with several behaviorally distinct paths to the same number. Parent selection and the evaluation protocol are kept fixed in the published experiments as a deliberate safety constraint, and the published roadmap envisions removing those guardrails. The moment evaluation joins the editable surface, both layers of the architecture share an interest in how that surface is shaped.",[279,378,380],{"id":379},"memory-as-the-carrier","Memory as the Carrier",[296,382,383,384,390,391,345],{},"Without persistent memory, recursive Goodhart would be a curiosity rather than a worry. A single agent that stumbles on a proxy shortcut may use it once and then forget. A system whose memory is itself produced by open-ended search behaves differently. Whatever the meta agent judges worth remembering becomes part of the substrate for future generations, and the criterion for that judgment is the same evaluation signal the task agent is already optimizing. Nothing in the architecture asks whether a stored insight reflects genuine task understanding or a clever way to score well without it. The ALMA framework reinforces the picture by showing that memory designs themselves can be meta-learned through open-ended search, outperforming hand-engineered baselines across four sequential decision-making domains ",[303,385,386],{},[306,387,389],{"href":388},"#source-7","[7]",". A 2026 survey of agent memory traces the same trend across the field, moving from static recall benchmarks toward multi-session agentic tests where memory and decision-making are intertwined ",[303,392,393],{},[306,394,396],{"href":395},"#source-8","[8]",[267,398,288,399],{"style":287},[290,400],{"src":401,"alt":402,"style":294},"https:\u002F\u002Fimages.unsplash.com\u002Fphoto-1770869731843-bd36aa92403c?w=1200&auto=format&fit=crop","A wall of vintage wooden filing cabinet drawers",[296,404,405],{},"An archive of unlabeled drawers may hold some genuine insights, the kind a careful practitioner would write down. Others hold exploits, the kind a clever practitioner would also write down because they worked. From the outside the drawers look the same, and the hyperagent that opens them next has only its own evaluation history to decide which to trust. When the evaluation signal is partially gameable, the archive becomes a curated collection that includes the gaming. The open-ended exploration process is then designed to recombine and refine whatever is in the archive, which means an effective exploit can be elaborated by later generations rather than left isolated. Standard Goodhart describes a single move. Memory turns it into a sequence.",[279,407,409],{"id":408},"why-the-trap-is-structural","Why the Trap is Structural",[296,411,412,413,419],{},"Treating evaluation gaming as a bug to be patched leaves much unexplained. Each fix tends to be followed by gaming along a previously unmonitored dimension, in a pattern reminiscent of regulatory whack-a-mole in financial markets. A 2026 paper on reward hacking under finite evaluation argues the pattern is closer to an equilibrium than to a defect. From five axioms about multi-dimensional quality, finite evaluation, effective optimization, resource finiteness, and combinatorial interaction among tools, the authors derive a result that any optimized agent will systematically under-invest in quality dimensions not covered by its evaluation system ",[303,414,415],{},[306,416,418],{"href":417},"#source-9","[9]",". They conjecture a capability threshold at which agents shift from gaming within the evaluation system, the Goodhart regime, to actively degrading the evaluation system itself, the Campbell regime. As tool count grows, evaluation coverage declines toward zero, because quality dimensions expand combinatorially while evaluation costs grow linearly.",[296,421,422,423,429,430,434],{},"Two complementary results pull in the same direction. The self-evolution trilemma formalizes the claim that an agent society cannot simultaneously satisfy continuous self-evolution, complete isolation from external oversight, and safety invariance, with isolated recursive systems developing statistical blind spots that drift the system off the human values its measures were meant to track ",[303,424,425],{},[306,426,428],{"href":427},"#source-10","[10]",". The Proxy Compression Hypothesis identifies evaluator-policy co-adaptation as a third reinforcing force, where policies and evaluators that evolve together tend to converge on shared blind spots rather than eliminate them ",[303,431,432],{},[306,433,344],{"href":343},". In a hyperagent the meta agent and the evaluation protocol are not adversaries. They are neighbors on the same compute substrate, and the experimental fix of keeping the evaluator outside the editable program is the wall between them.",[279,436,438],{"id":437},"transferable-hacks","Transferable Hacks",[296,440,441],{},"Meta-level improvements in the hyperagent setup transfer across domains. Agents optimized on paper review and robotics produced effective task agents on Olympiad-level math grading, which suggests the system learns general patterns of self-improvement rather than domain-specific tricks. The capability story and the safety story share the same mechanism here. Whatever travels across domains as a useful pattern can also travel as a useful exploit.",[296,443,444,445,451],{},"The empirical support already exists. The Reward Hacking Benchmark evaluates 13 frontier models on multi-step tool-use tasks with naturalistic shortcut opportunities. Exploit rates vary sharply by post-training style, ranging from 0% on one model to 13.9% on another trained with heavier reinforcement learning ",[303,446,447],{},[306,448,450],{"href":449},"#source-11","[11]",". Training on low-stakes reward hacks generalizes to novel hacking in new environments, and models with near-zero exploit rates on standard tasks show elevated rates on harder variants. Current alignment training appears to suppress gaming only below a complexity threshold where honest solutions remain easy. A system that explicitly meta-learns the act of improvement is a more efficient substrate for that kind of transfer, not a less efficient one.",[279,453,455],{"id":454},"what-engineering-could-do","What Engineering Could Do",[296,457,458],{},"Current safeguards in the published work include sandboxing, fixed parent selection, fixed evaluation, and human oversight at the run level. Each helps. Each has a known scaling limit.",[296,460,461,462,466],{},"Co-evolutionary evaluation is worth taking seriously as the first line of defense. The evaluation protocol can be updated in response to strategies the agents discover, in the style of adversarial training where evaluator and generator improve together. The risk flagged in the Proxy Compression literature is that co-evolution converges on shared blind spots rather than removed ones ",[303,463,464],{},[306,465,344],{"href":343},". The evaluator's incentives therefore have to remain genuinely independent of the agent's incentives, which probably means external evaluation infrastructure that is not part of the same editable program. A closely related defense is diversity of evaluation. Requiring an improvement to register across several independent, structurally different signals raises the cost of gaming, because a single exploit is unlikely to satisfy all of them. Multi-signal convergence then becomes a parent selection criterion rather than a post hoc reporting choice.",[296,468,469],{},"Interpretability of the meta agent opens another channel. Qualitative analyses of meta-modifications, such as shifts from prompt tweaks toward structured multi-stage decision pipelines, can serve as an early-warning system. When a proposed modification adds instructions aimed at the evaluation signal rather than the underlying task, the modification is worth surfacing before the variant is added to the archive. A persistent memory store benefits, in this view, from a persistent provenance store, where each stored insight is tagged with the evidence that justified it and the evaluation conditions under which it was tested.",[296,471,472,473,479],{},"Human-in-the-loop oversight may need to move up a level. Most discussions of human review focus on the task agent's decisions. Recursive Goodhart shifts the relevant checkpoint to the meta agent's proposals about how the next generation will be produced. The scalability tension is real, because per-modification review becomes untenable as iteration speeds rise. Triage helps. Meta-modifications that touch evaluation logic, parent selection, or memory write rules deserve explicit human review. Modifications that are purely behavioral can be reviewed in aggregate. A position paper on open-ended AI safety has argued that guidance appropriate earlier may become ineffective later, requiring continual adaptation and undermining long-term control ",[303,474,475],{},[306,476,478],{"href":477},"#source-12","[12]",". Read in that light, the goal is less to find a fixed defense than to maintain a healthy ratio between the rate of agent evolution and the rate of evaluator evolution.",[279,481,483],{"id":482},"what-this-means-for-the-roadmap","What This Means for the Roadmap",[296,485,486,487,491],{},"The published roadmap identifies modification of evaluation protocols as a natural next step, with preliminary results already exploring automatic improvement of parent selection ",[303,488,489],{},[306,490,309],{"href":308},". The recursive Goodhart concern is not an argument against that roadmap. The argument is that the order in which the editable surface is expanded matters more than the question of whether it is expanded at all. Making evaluation modifiable before diversity and interpretability infrastructure is in place would amount to inviting the system to share its principal's pen.",[296,493,494],{},"A useful open question is what the observable signature of a hyperagent optimizing for evaluation-signal exploitability would look like. Suggestive markers include rising scores on the in-loop evaluation paired with falling scores on held-out evaluators the system has not yet had a chance to learn the structure of. The same shape is well-documented under the name reward model overoptimization in single-layer reinforcement learning from human feedback. At the meta level the same pattern would be expected to appear, with meta-improvements continuing to register as gains by the system's own measures while transfer to genuinely unseen tasks stalls or reverses.",[296,496,497,498,504],{},"Bengio, Hinton, Yao, and co-authors argued in 2024 that society's response to AI is lagging the rate of capability gain, and that current governance lacks mechanisms to address autonomous systems ",[303,499,500],{},[306,501,503],{"href":502},"#source-13","[13]",". Self-improving architectures sharpen that observation. Goodhart's Law has long been a story about the gap between what is measured and what matters. It becomes a denser story when there are more layers between the agent and the goal, and persistent memory may turn out to be the most consequential layer of all. Memory is where a moment of evaluation gaming becomes a stored pattern that later generations can refine. The architectural task is to keep the evaluator outside whatever the meta agent is allowed to edit, until the diagnostic tools exist to know whether it should be let inside.",[267,506,271,510,271,513],{"className":507},[508,509],"references","mt-8",[279,511,512],{"id":508},"References",[514,515,288,521,288,539,288,550,288,561,288,573,288,583,288,595,288,605,288,615,288,625,288,635,288,645,288,655,271],"ol",{"className":516},[517,518,519,520],"list-decimal","list-inside","space-y-2","mt-4",[522,523,525,526,530,531],"li",{"id":524},"source-1","J. Zhang et al., \"HyperAgents,\" ",[527,528,529],"em",{},"arXiv",", 2026, ",[306,532,538],{"href":533,"target":534,"className":535},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2603.19461","_blank",[536,537],"text-blue-600","underline","[Online]",[522,540,542,543,545,546],{"id":541},"source-2","J. Schmidhuber, \"Gödel Machines: Self-Referential Universal Problem Solvers Making Provably Optimal Self-Improvements,\" ",[527,544,529],{},", 2003, ",[306,547,538],{"href":548,"target":534,"className":549},"https:\u002F\u002Farxiv.org\u002Fabs\u002Fcs\u002F0309048",[536,537],[522,551,553,554,556,557],{"id":552},"source-3","J. Zhang et al., \"Darwin Gödel Machine: Open-Ended Evolution of Self-Improving Agents,\" ",[527,555,529],{},", 2025, ",[306,558,538],{"href":559,"target":534,"className":560},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2505.22954",[536,537],[522,562,564,565,568,569],{"id":563},"source-4","J. Skalse et al., \"Defining and Characterizing Reward Hacking,\" in ",[527,566,567],{},"Advances in Neural Information Processing Systems",", vol. 35, 2022. DOI: ",[306,570,538],{"href":571,"target":534,"className":572},"https:\u002F\u002Fdoi.org\u002F10.48550\u002FarXiv.2209.13085",[536,537],[522,574,576,577,530,579],{"id":575},"source-5","X. Wang et al., \"Reward Hacking in the Era of Large Models: Mechanisms, Emergent Misalignment, Challenges,\" ",[527,578,529],{},[306,580,538],{"href":581,"target":534,"className":582},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2604.13602",[536,537],[522,584,586,587,590,591],{"id":585},"source-6","S. Hu, C. Lu, and J. Clune, \"Automated Design of Agentic Systems,\" in ",[527,588,589],{},"Proc. International Conference on Learning Representations (ICLR'25)",", 2025. DOI: ",[306,592,538],{"href":593,"target":534,"className":594},"https:\u002F\u002Fdoi.org\u002F10.48550\u002FarXiv.2408.08435",[536,537],[522,596,598,599,530,601],{"id":597},"source-7","Y. Xiong et al., \"Learning to Continually Learn via Meta-learning Agentic Memory Designs,\" ",[527,600,529],{},[306,602,538],{"href":603,"target":534,"className":604},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2602.07755",[536,537],[522,606,608,609,530,611],{"id":607},"source-8","P. Du, \"Memory for Autonomous LLM Agents: Mechanisms, Evaluation, and Emerging Frontiers,\" ",[527,610,529],{},[306,612,538],{"href":613,"target":534,"className":614},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2603.07670",[536,537],[522,616,618,619,530,621],{"id":617},"source-9","J. Wang and J. Huang, \"Reward Hacking as Equilibrium under Finite Evaluation,\" ",[527,620,529],{},[306,622,538],{"href":623,"target":534,"className":624},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2603.28063",[536,537],[522,626,628,629,530,631],{"id":627},"source-10","C. Wang et al., \"The Devil Behind Moltbook: Anthropic Safety is Always Vanishing in Self-Evolving AI Societies,\" ",[527,630,529],{},[306,632,538],{"href":633,"target":534,"className":634},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2602.09877",[536,537],[522,636,638,639,530,641],{"id":637},"source-11","K. Thaman, \"Reward Hacking Benchmark: Measuring Exploits in LLM Agents with Tool Use,\" ",[527,640,529],{},[306,642,538],{"href":643,"target":534,"className":644},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2605.02964",[536,537],[522,646,648,649,556,651],{"id":647},"source-12","I. Sheth et al., \"Safety is Essential for Responsible Open-Ended Systems,\" ",[527,650,529],{},[306,652,538],{"href":653,"target":534,"className":654},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2502.04512",[536,537],[522,656,658,659,662,663],{"id":657},"source-13","Y. Bengio et al., \"Managing Extreme AI Risks Amid Rapid Progress,\" ",[527,660,661],{},"Science",", vol. 384, no. 6698, pp. 842–845, 2024. DOI: ",[306,664,538],{"href":665,"target":534,"className":666},"https:\u002F\u002Fdoi.org\u002F10.1126\u002Fscience.adn0117",[536,537],{"title":668,"searchDepth":669,"depth":669,"links":670},"",2,[671,672,673,674,675,676,677,678,679],{"id":283,"depth":669,"text":284},{"id":327,"depth":669,"text":328},{"id":351,"depth":669,"text":352},{"id":379,"depth":669,"text":380},{"id":408,"depth":669,"text":409},{"id":437,"depth":669,"text":438},{"id":454,"depth":669,"text":455},{"id":482,"depth":669,"text":483},{"id":508,"depth":669,"text":512},"2026-05-15","Self-modifying AI systems can now edit the very procedure that improves them. That capability quietly changes how Goodhart's Law works, and persistent memory may be the channel through which evaluation-gaming compounds.","md",{"src":292},{"authors":685,"badge":691,"source":693},[686],{"avatar":687,"name":689,"to":690},{"src":688},"\u002Fimg\u002Fmark_avatar.png","Mark Williams","https:\u002F\u002Fthinkata.com",{"label":692},"AI Safety",{"name":694,"url":690},"Thinkata Research",true,{"title":210,"description":681},"-zC5XinKX4z7WBJ1MGYh32QGghch6AQwBSSQjJC5qg0",[699,700],null,{"id":701,"title":142,"body":702,"date":989,"description":990,"extension":682,"image":991,"meta":993,"navigation":695,"path":143,"seo":1001,"stem":144,"__hash__":1002,"_path":143},"insights\u002Fnews\u002Finsights\u002Fcontext-is-infrastructure.md",{"type":264,"value":703,"toc":981},[704,716,724,727,737,741,749,757,765,778,782,795,816,829,842,846,853,861,887,890,894,897,903,906,909],[267,705,271,707,271,711],{"className":706},[270],[273,708,142],{"className":709,"id":710},[276],"context-is-infrastructure-not-instructions",[279,712,715],{"className":713,"id":714},[282],"what-teams-gain-when-they-govern-ai-context-like-a-software-dependency","What teams gain when they govern AI context like a software dependency",[296,717,718,719,723],{},"A team replaces task-specific prompts with a generic \"improved\" template. Extraction accuracy drops from 100% to 90%. RAG compliance (the degree to which a model's answers stay grounded in retrieved documents rather than generating from its own training data) falls from 93.3% to 80% ",[303,720,721],{},[306,722,309],{"href":308},". The model is the same. The new instructions look better on paper. What changed was the context, and nobody tested whether the change was safe before deploying it.",[296,725,726],{},"This is context regression, a term borrowed from software engineering where \"regression\" means a change that was supposed to improve something but degraded existing behavior instead. It behaves like any other dependency compatibility problem in a software supply chain, and the governance response, production contracts, risk-based test suites, compatibility gates, is the same one software teams already use for their other dependencies.",[296,728,729,732,733,736],{},[306,730,731],{"href":147},"\"Context is the New Code\""," established context engineering as a formal discipline with its own taxonomy, maturity levels, and practitioner artifacts, and ",[306,734,735],{"href":235},"\"The Turn as the Unit of Quality\""," explored how structured iteration with checklists and selective memory improves turn-level quality. This article picks up a different thread. What happens when context moves from a single team's configuration file to an organizational dependency serving dozens of agents across thousands of daily interactions? Recent research suggests that the teams making the fastest progress are the ones applying familiar software supply chain governance to their context, and the returns are measurable.",[279,738,740],{"id":739},"what-structured-context-unlocks","What Structured Context Unlocks",[296,742,743,744,748],{},"A study of 200 documented interactions across four AI tools found that incomplete context was associated with 72% of iteration cycles ",[303,745,746],{},[306,747,316],{"href":315},". That number is worth sitting with. Nearly three-quarters of the rework, the back-and-forth where a human corrects, clarifies, and re-prompts, traced not to a bad model or a poorly worded instruction but to missing information that should have been available from the start.",[296,750,751,752,756],{},"When the same study introduced structured context assembly, a methodology that organizes context into five roles (Authority, Exemplar, Constraint, Rubric, and Metadata), iteration cycles dropped from an average of 3.8 to 2.0 per task, and first-pass acceptance rose from 32% to 55% ",[303,753,754],{},[306,755,316],{"href":315},". Authority context establishes what standards govern the task. Exemplar context provides reference outputs that demonstrate the expected quality. Constraint context defines boundaries the output must respect. Rubric context specifies how the output will be evaluated. Metadata context supplies facts, dates, names, and domain-specific details. Having names for these roles is not a minor convenience, it is what makes the difference between ad hoc tuning and repeatable engineering, because a team that cannot describe what is missing from its context cannot systematically fix it.",[267,758,288,760],{"style":759},"width: 100%; margin: 20px 0;",[290,761],{"src":762,"alt":763,"style":764},"https:\u002F\u002Fimages.unsplash.com\u002Fphoto-1639066648921-82d4500abf1a?w=800&auto=format&fit=crop","Rows of server equipment in a data center, analogous to how structured context engineering creates organized, reliable infrastructure rather than ad hoc configurations","width: 100%; height: 320px; object-fit: cover; object-position: center;",[296,766,767,768,772,773,777],{},"Like a well-organized server room where every cable run is labeled and every rack follows a standard layout, structured context gives a team the ability to reason about what the AI is actually working with. The evaluation-driven iteration research reinforces this by showing that context quality is not one-dimensional ",[303,769,770],{},[306,771,309],{"href":308},". A change that improves instruction-following can simultaneously degrade extraction accuracy. A prompt that scores better on helpfulness can score worse on format compliance. The minimum viable evaluation suite (MVES) framework proposes tiered evaluation requirements, one set for general applications, another for retrieval-augmented generation systems, and a third for agentic workflows, precisely because quality along one dimension does not guarantee quality along others ",[303,774,775],{},[306,776,309],{"href":308},". The practical implication is that quality has multiple dimensions that can trade against each other, and navigating those trade-offs requires measurement infrastructure, not intuition.",[279,779,781],{"id":780},"governing-context-as-a-dependency","Governing Context as a Dependency",[296,783,784,785,789,790,794],{},"The clearest articulation of this shift comes from research that frames LLM update management as a software supply chain governance problem ",[303,786,787],{},[306,788,323],{"href":322},". Hosted language model services evolve through provider-side updates without explicit version changes, so the API endpoint stays the same while the behavior underneath shifts. Empirical work cited within that framework documents cases where code execution accuracy dropped from 52% to 10% within three months with no version change on the consumer side ",[303,791,792],{},[306,793,389],{"href":388},". This is behavioral drift (a gradual, unannounced change in how a model responds to the same inputs), and it affects every piece of context that was tuned against the previous behavior.",[296,796,797,798,802,803,807,808,811,812,815],{},"The proposed governance framework has three components that map directly to established software engineering practice ",[303,799,800],{},[306,801,323],{"href":322},". ",[804,805,806],"strong",{},"Production contracts"," define explicit behavioral rules with measurable thresholds, things like \"authentication code must pass security tests\" or \"JSON outputs must be valid.\" ",[804,809,810],{},"Risk-category-based testing"," organizes evaluation around deployment risk areas rather than relying on a single aggregate score, preventing critical regressions in formatting or safety from being masked by overall performance improvements. ",[804,813,814],{},"Compatibility gates"," block updates that fail defined thresholds, requiring review before a model update is adopted into production. None of these ideas are new to software engineering. What is new is recognizing that context, the system prompts, retrieved documents, and configuration files that shape AI behavior, is a dependency that deserves the same governance.",[296,817,818,819,823,824,828],{},"A readiness harness for LLM and RAG applications demonstrates what this looks like in practice ",[303,820,821],{},[306,822,337],{"href":336},". The system combines automated benchmarks, OpenTelemetry observability (a standardized way to collect and export telemetry data like traces, metrics, and logs), and CI quality gates (automated checkpoints in the deployment pipeline that block releases if quality checks fail) under a minimal API contract. Rather than reducing readiness to a single metric, it aggregates workflow success, policy compliance, groundedness, retrieval hit rate, cost, and latency into scenario-weighted readiness scores. In ticket-routing experiments, the regression gates consistently rejected unsafe prompt variants before deployment ",[303,825,826],{},[306,827,337],{"href":336},". This is a concrete example of the shift from \"the model was tested\" to \"the deployment pipeline tested every context change before it reached production.\"",[296,830,831,832,836,837,841],{},"One challenge specific to AI systems is that the same configuration can produce different outputs across runs. Traditional binary pass\u002Ffail testing struggles with this fundamental non-determinism. A regression testing framework designed for this problem replaces binary verdicts with three-valued probabilistic outcomes (Pass, Fail, Inconclusive) backed by confidence intervals and sequential analysis ",[303,833,834],{},[306,835,344],{"href":343},". The framework achieves 78 to 100% cost reduction compared to naive repeated testing while maintaining statistical guarantees, and its behavioral fingerprinting approach achieves 86% detection power on regressions where binary pass\u002Ffail testing has 0% ",[303,838,839],{},[306,840,344],{"href":343},". The cost reduction matters as much as the accuracy. Testing that is too expensive to run routinely is testing that does not get run, and context changes that do not get tested are the ones that cause production surprises.",[279,843,845],{"id":844},"from-files-to-living-systems","From Files to Living Systems",[296,847,848,849,345],{},"The governance patterns above treat context as a versioned artifact, something written, tested, and deployed. But a growing body of work suggests that this framing, while useful, captures only part of the picture. In production multi-agent systems, context is not a file. It is a runtime-constructed \"View\" projected into an agent's context window (the maximum amount of text a model can consider at once) from a pool of global artifacts, and that View changes dynamically based on the task, the step, and the state of the system ",[303,850,851],{},[306,852,366],{"href":365},[296,854,855,856,860],{},"Research on what the authors call \"Loosely-Structured Software\" characterizes this as a class of system whose defining property is runtime generation and evolution under uncertainty ",[303,857,858],{},[306,859,366],{"href":365},". Classic software architecture assumes build-time decomposition and slow-changing boundaries. Multi-agent AI systems violate those assumptions in three ways. First, an agent's effective program is determined not by compiled code but by a View assembled at runtime from system prompts, skills, plans, tools, and memories. Second, the connections between components form dynamically through semantic understanding rather than fixed function signatures. Third, the system's own executable substrate, the artifacts that mediate its behavior, can be rewritten by the system itself.",[296,862,863,864,802,868,871,872,874,875,878,879,882,883,886],{},"To make this governable, the research proposes a three-layer engineering framework ",[303,865,866],{},[306,867,366],{"href":365},[804,869,870],{},"View\u002FContext Engineering"," manages the execution environment and maintains task-relevant Views. This is the layer where the static context files that teams already write (the CLAUDE.md and AGENTS.md files examined in ",[306,873,731],{"href":147},") get assembled, filtered, and delivered at runtime. ",[804,876,877],{},"Structure Engineering"," organizes the dynamic bindings between agents and artifacts, governing how components find and connect to each other. ",[804,880,881],{},"Evolution Engineering"," manages the lifecycle of self-rewriting artifacts, ensuring that when the system modifies its own context (a capability that ",[306,884,885],{"href":159},"\"The Edge of the Underdefined\""," documents self-improving agents already demonstrating), those modifications remain within governed bounds.",[296,888,889],{},"This is where context infrastructure becomes genuinely adaptive. Instead of choosing between static configuration files (reliable but rigid) and autonomous self-modification (flexible but ungoverned), the three-layer framework offers a middle path. Context can evolve in response to operational feedback, while infrastructure constraints prevent that evolution from drifting outside acceptable bounds. The combination of governance patterns from the supply chain framing with the runtime adaptivity from the loosely-structured software framing points toward a more complete picture of what production context infrastructure might look like.",[279,891,893],{"id":892},"the-maturity-opportunity","The Maturity Opportunity",[296,895,896],{},"The infrastructure patterns described here, production contracts, multi-dimensional evaluation, CI gates, statistical regression testing, runtime View management, each have working implementations backed by empirical evidence. The gap between what the research demonstrates and what most teams have actually built is mostly one of adoption, not of available tools.",[296,898,899,900,902],{},"Survey data suggests that prompt usage in software engineering remains largely ad hoc, with prompts refined through trial-and-error and rarely reused. As ",[306,901,731],{"href":147}," noted, only about 5% of surveyed open-source repositories have adopted any context file format at all. The parallel to early unit testing adoption or early version control adoption is hard to miss. A practice that starts as optional among a skilled minority tends to become standard once enough teams experience the cost of not doing it.",[296,904,905],{},"What distinguishes this moment is that the infrastructure does not need to be invented from scratch. Supply chain governance, production testing methodology, continuous deployment practice, and statistical experiment design all have established patterns that transfer directly to context management. Treating context as infrastructure is largely a matter of applying existing engineering discipline to a new class of artifact, one that happens to shape every decision an AI system makes.",[296,907,908],{},"The teams moving fastest appear to be the ones that recognized this early. They built the infrastructure to measure, test, and govern the context their models consume, and that investment compounded over time. For teams still tuning prompts by hand and evaluating by feel, the patterns are available to adopt directly, without rediscovering the hard lessons from scratch.",[267,910,271,912,271,914],{"className":911},[508,509],[279,913,512],{"id":508},[514,915,288,917,288,926,288,935,288,944,288,953,288,962,288,971,271],{"className":916},[517,518,519,520],[522,918,919,920,530,922],{"id":524},"D. Commey, \"When 'Better' Prompts Hurt: Evaluation-Driven Iteration for LLM Applications,\" ",[527,921,529],{},[306,923,538],{"href":924,"target":534,"className":925},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2601.22025",[536,537],[522,927,928,929,530,931],{"id":541},"E. Calboreanu, \"Context Engineering: A Practitioner Methodology for Structured Human-AI Collaboration,\" ",[527,930,529],{},[306,932,538],{"href":933,"target":534,"className":934},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2604.04258",[536,537],[522,936,937,938,530,940],{"id":552},"M. S. Chishti et al., \"Test Before You Deploy: Governing Updates in the LLM Supply Chain,\" ",[527,939,529],{},[306,941,538],{"href":942,"target":534,"className":943},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2604.27789",[536,537],[522,945,946,947,530,949],{"id":563},"A. C. Maiorano, \"LLM Readiness Harness: Evaluation, Observability, and CI Gates for LLM\u002FRAG Applications,\" ",[527,948,529],{},[306,950,538],{"href":951,"target":534,"className":952},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2603.27355",[536,537],[522,954,955,956,530,958],{"id":575},"V. P. Bhardwaj, \"AgentAssay: Token-Efficient Regression Testing for Non-Deterministic AI Agent Workflows,\" ",[527,957,529],{},[306,959,538],{"href":960,"target":534,"className":961},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2603.02601",[536,537],[522,963,964,965,530,967],{"id":585},"W. Zhang et al., \"Loosely-Structured Software: Engineering Context, Structure, and Evolution Entropy in Runtime-Rewired Multi-Agent Systems,\" ",[527,966,529],{},[306,968,538],{"href":969,"target":534,"className":970},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2603.15690",[536,537],[522,972,973,974,976,977],{"id":597},"L. Chen et al., \"How Is ChatGPT's Behavior Changing over Time?,\" ",[527,975,529],{},", 2023, ",[306,978,538],{"href":979,"target":534,"className":980},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2307.09009",[536,537],{"title":668,"searchDepth":669,"depth":669,"links":982},[983,984,985,986,987,988],{"id":714,"depth":669,"text":715},{"id":739,"depth":669,"text":740},{"id":780,"depth":669,"text":781},{"id":844,"depth":669,"text":845},{"id":892,"depth":669,"text":893},{"id":508,"depth":669,"text":512},"2026-05-09","Most teams treat AI context as a runtime concern, something to tune session by session. The teams making the fastest progress treat it as a software dependency, versioned, tested, and governed. The infrastructure patterns for doing this already exist.",{"src":992},"https:\u002F\u002Fimages.unsplash.com\u002Fphoto-1558494949-ef010cbdcc31?w=800&auto=format&fit=crop",{"authors":994,"badge":998,"source":1000},[995],{"avatar":996,"name":689,"to":997},{"src":688},"https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Fmarkwilliamsthinkata\u002F",{"label":999},"AI Engineering",{"name":694,"url":690},{"title":142,"description":990},"ytvzj-4FpQSyhlfi_1zbQOsnMlIjEix-h83JPRQFEN8",1778947327125]