+
+
+
+
+
+
\ No newline at end of file
diff --git a/api/index.html b/api/index.html
new file mode 100644
index 0000000..e7ed126
--- /dev/null
+++ b/api/index.html
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
diff --git a/api/search.js b/api/search.js
new file mode 100644
index 0000000..7b58c46
--- /dev/null
+++ b/api/search.js
@@ -0,0 +1,46 @@
+window.pdocSearch = (function(){
+/** elasticlunr - http://weixsong.github.io * Copyright (C) 2017 Oliver Nightingale * Copyright (C) 2017 Wei Song * MIT Licensed */!function(){function e(e){if(null===e||"object"!=typeof e)return e;var t=e.constructor();for(var n in e)e.hasOwnProperty(n)&&(t[n]=e[n]);return t}var t=function(e){var n=new t.Index;return n.pipeline.add(t.trimmer,t.stopWordFilter,t.stemmer),e&&e.call(n,n),n};t.version="0.9.5",lunr=t,t.utils={},t.utils.warn=function(e){return function(t){e.console&&console.warn&&console.warn(t)}}(this),t.utils.toString=function(e){return void 0===e||null===e?"":e.toString()},t.EventEmitter=function(){this.events={}},t.EventEmitter.prototype.addListener=function(){var e=Array.prototype.slice.call(arguments),t=e.pop(),n=e;if("function"!=typeof t)throw new TypeError("last argument must be a function");n.forEach(function(e){this.hasHandler(e)||(this.events[e]=[]),this.events[e].push(t)},this)},t.EventEmitter.prototype.removeListener=function(e,t){if(this.hasHandler(e)){var n=this.events[e].indexOf(t);-1!==n&&(this.events[e].splice(n,1),0==this.events[e].length&&delete this.events[e])}},t.EventEmitter.prototype.emit=function(e){if(this.hasHandler(e)){var t=Array.prototype.slice.call(arguments,1);this.events[e].forEach(function(e){e.apply(void 0,t)},this)}},t.EventEmitter.prototype.hasHandler=function(e){return e in this.events},t.tokenizer=function(e){if(!arguments.length||null===e||void 0===e)return[];if(Array.isArray(e)){var n=e.filter(function(e){return null===e||void 0===e?!1:!0});n=n.map(function(e){return t.utils.toString(e).toLowerCase()});var i=[];return n.forEach(function(e){var n=e.split(t.tokenizer.seperator);i=i.concat(n)},this),i}return e.toString().trim().toLowerCase().split(t.tokenizer.seperator)},t.tokenizer.defaultSeperator=/[\s\-]+/,t.tokenizer.seperator=t.tokenizer.defaultSeperator,t.tokenizer.setSeperator=function(e){null!==e&&void 0!==e&&"object"==typeof e&&(t.tokenizer.seperator=e)},t.tokenizer.resetSeperator=function(){t.tokenizer.seperator=t.tokenizer.defaultSeperator},t.tokenizer.getSeperator=function(){return t.tokenizer.seperator},t.Pipeline=function(){this._queue=[]},t.Pipeline.registeredFunctions={},t.Pipeline.registerFunction=function(e,n){n in t.Pipeline.registeredFunctions&&t.utils.warn("Overwriting existing registered function: "+n),e.label=n,t.Pipeline.registeredFunctions[n]=e},t.Pipeline.getRegisteredFunction=function(e){return e in t.Pipeline.registeredFunctions!=!0?null:t.Pipeline.registeredFunctions[e]},t.Pipeline.warnIfFunctionNotRegistered=function(e){var n=e.label&&e.label in this.registeredFunctions;n||t.utils.warn("Function is not registered with pipeline. This may cause problems when serialising the index.\n",e)},t.Pipeline.load=function(e){var n=new t.Pipeline;return e.forEach(function(e){var i=t.Pipeline.getRegisteredFunction(e);if(!i)throw new Error("Cannot load un-registered function: "+e);n.add(i)}),n},t.Pipeline.prototype.add=function(){var e=Array.prototype.slice.call(arguments);e.forEach(function(e){t.Pipeline.warnIfFunctionNotRegistered(e),this._queue.push(e)},this)},t.Pipeline.prototype.after=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._queue.indexOf(e);if(-1===i)throw new Error("Cannot find existingFn");this._queue.splice(i+1,0,n)},t.Pipeline.prototype.before=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._queue.indexOf(e);if(-1===i)throw new Error("Cannot find existingFn");this._queue.splice(i,0,n)},t.Pipeline.prototype.remove=function(e){var t=this._queue.indexOf(e);-1!==t&&this._queue.splice(t,1)},t.Pipeline.prototype.run=function(e){for(var t=[],n=e.length,i=this._queue.length,o=0;n>o;o++){for(var r=e[o],s=0;i>s&&(r=this._queue[s](r,o,e),void 0!==r&&null!==r);s++);void 0!==r&&null!==r&&t.push(r)}return t},t.Pipeline.prototype.reset=function(){this._queue=[]},t.Pipeline.prototype.get=function(){return this._queue},t.Pipeline.prototype.toJSON=function(){return this._queue.map(function(e){return t.Pipeline.warnIfFunctionNotRegistered(e),e.label})},t.Index=function(){this._fields=[],this._ref="id",this.pipeline=new t.Pipeline,this.documentStore=new t.DocumentStore,this.index={},this.eventEmitter=new t.EventEmitter,this._idfCache={},this.on("add","remove","update",function(){this._idfCache={}}.bind(this))},t.Index.prototype.on=function(){var e=Array.prototype.slice.call(arguments);return this.eventEmitter.addListener.apply(this.eventEmitter,e)},t.Index.prototype.off=function(e,t){return this.eventEmitter.removeListener(e,t)},t.Index.load=function(e){e.version!==t.version&&t.utils.warn("version mismatch: current "+t.version+" importing "+e.version);var n=new this;n._fields=e.fields,n._ref=e.ref,n.documentStore=t.DocumentStore.load(e.documentStore),n.pipeline=t.Pipeline.load(e.pipeline),n.index={};for(var i in e.index)n.index[i]=t.InvertedIndex.load(e.index[i]);return n},t.Index.prototype.addField=function(e){return this._fields.push(e),this.index[e]=new t.InvertedIndex,this},t.Index.prototype.setRef=function(e){return this._ref=e,this},t.Index.prototype.saveDocument=function(e){return this.documentStore=new t.DocumentStore(e),this},t.Index.prototype.addDoc=function(e,n){if(e){var n=void 0===n?!0:n,i=e[this._ref];this.documentStore.addDoc(i,e),this._fields.forEach(function(n){var o=this.pipeline.run(t.tokenizer(e[n]));this.documentStore.addFieldLength(i,n,o.length);var r={};o.forEach(function(e){e in r?r[e]+=1:r[e]=1},this);for(var s in r){var u=r[s];u=Math.sqrt(u),this.index[n].addToken(s,{ref:i,tf:u})}},this),n&&this.eventEmitter.emit("add",e,this)}},t.Index.prototype.removeDocByRef=function(e){if(e&&this.documentStore.isDocStored()!==!1&&this.documentStore.hasDoc(e)){var t=this.documentStore.getDoc(e);this.removeDoc(t,!1)}},t.Index.prototype.removeDoc=function(e,n){if(e){var n=void 0===n?!0:n,i=e[this._ref];this.documentStore.hasDoc(i)&&(this.documentStore.removeDoc(i),this._fields.forEach(function(n){var o=this.pipeline.run(t.tokenizer(e[n]));o.forEach(function(e){this.index[n].removeToken(e,i)},this)},this),n&&this.eventEmitter.emit("remove",e,this))}},t.Index.prototype.updateDoc=function(e,t){var t=void 0===t?!0:t;this.removeDocByRef(e[this._ref],!1),this.addDoc(e,!1),t&&this.eventEmitter.emit("update",e,this)},t.Index.prototype.idf=function(e,t){var n="@"+t+"/"+e;if(Object.prototype.hasOwnProperty.call(this._idfCache,n))return this._idfCache[n];var i=this.index[t].getDocFreq(e),o=1+Math.log(this.documentStore.length/(i+1));return this._idfCache[n]=o,o},t.Index.prototype.getFields=function(){return this._fields.slice()},t.Index.prototype.search=function(e,n){if(!e)return[];e="string"==typeof e?{any:e}:JSON.parse(JSON.stringify(e));var i=null;null!=n&&(i=JSON.stringify(n));for(var o=new t.Configuration(i,this.getFields()).get(),r={},s=Object.keys(e),u=0;u0&&t.push(e);for(var i in n)"docs"!==i&&"df"!==i&&this.expandToken(e+i,t,n[i]);return t},t.InvertedIndex.prototype.toJSON=function(){return{root:this.root}},t.Configuration=function(e,n){var e=e||"";if(void 0==n||null==n)throw new Error("fields should not be null");this.config={};var i;try{i=JSON.parse(e),this.buildUserConfig(i,n)}catch(o){t.utils.warn("user configuration parse failed, will use default configuration"),this.buildDefaultConfig(n)}},t.Configuration.prototype.buildDefaultConfig=function(e){this.reset(),e.forEach(function(e){this.config[e]={boost:1,bool:"OR",expand:!1}},this)},t.Configuration.prototype.buildUserConfig=function(e,n){var i="OR",o=!1;if(this.reset(),"bool"in e&&(i=e.bool||i),"expand"in e&&(o=e.expand||o),"fields"in e)for(var r in e.fields)if(n.indexOf(r)>-1){var s=e.fields[r],u=o;void 0!=s.expand&&(u=s.expand),this.config[r]={boost:s.boost||0===s.boost?s.boost:1,bool:s.bool||i,expand:u}}else t.utils.warn("field name in user configuration not found in index instance fields");else this.addAllFields2UserConfig(i,o,n)},t.Configuration.prototype.addAllFields2UserConfig=function(e,t,n){n.forEach(function(n){this.config[n]={boost:1,bool:e,expand:t}},this)},t.Configuration.prototype.get=function(){return this.config},t.Configuration.prototype.reset=function(){this.config={}},lunr.SortedSet=function(){this.length=0,this.elements=[]},lunr.SortedSet.load=function(e){var t=new this;return t.elements=e,t.length=e.length,t},lunr.SortedSet.prototype.add=function(){var e,t;for(e=0;e1;){if(r===e)return o;e>r&&(t=o),r>e&&(n=o),i=n-t,o=t+Math.floor(i/2),r=this.elements[o]}return r===e?o:-1},lunr.SortedSet.prototype.locationFor=function(e){for(var t=0,n=this.elements.length,i=n-t,o=t+Math.floor(i/2),r=this.elements[o];i>1;)e>r&&(t=o),r>e&&(n=o),i=n-t,o=t+Math.floor(i/2),r=this.elements[o];return r>e?o:e>r?o+1:void 0},lunr.SortedSet.prototype.intersect=function(e){for(var t=new lunr.SortedSet,n=0,i=0,o=this.length,r=e.length,s=this.elements,u=e.elements;;){if(n>o-1||i>r-1)break;s[n]!==u[i]?s[n]u[i]&&i++:(t.add(s[n]),n++,i++)}return t},lunr.SortedSet.prototype.clone=function(){var e=new lunr.SortedSet;return e.elements=this.toArray(),e.length=e.elements.length,e},lunr.SortedSet.prototype.union=function(e){var t,n,i;this.length>=e.length?(t=this,n=e):(t=e,n=this),i=t.clone();for(var o=0,r=n.toArray();oApplication package.
\n"}}, "docInfo": {"app": {"qualname": 0, "fullname": 1, "annotation": 0, "default_value": 0, "signature": 0, "bases": 0, "doc": 5}}, "length": 1, "save": true}, "index": {"qualname": {"root": {"docs": {}, "df": 0}}, "fullname": {"root": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "p": {"docs": {"app": {"tf": 1}}, "df": 1}}}}}, "annotation": {"root": {"docs": {}, "df": 0}}, "default_value": {"root": {"docs": {}, "df": 0}}, "signature": {"root": {"docs": {}, "df": 0}}, "bases": {"root": {"docs": {}, "df": 0}}, "doc": {"root": {"docs": {"app": {"tf": 1.7320508075688772}}, "df": 1, "a": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "p": {"docs": {}, "df": 0, "l": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "t": {"docs": {}, "df": 0, "i": {"docs": {}, "df": 0, "o": {"docs": {}, "df": 0, "n": {"docs": {"app": {"tf": 1}}, "df": 1}}}}}}}}}}}, "p": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "c": {"docs": {}, "df": 0, "k": {"docs": {}, "df": 0, "a": {"docs": {}, "df": 0, "g": {"docs": {}, "df": 0, "e": {"docs": {"app": {"tf": 1}}, "df": 1}}}}}}}}}}, "pipeline": ["trimmer"], "_isPrebuiltIndex": true};
+
+ // mirrored in build-search-index.js (part 1)
+ // Also split on html tags. this is a cheap heuristic, but good enough.
+ elasticlunr.tokenizer.setSeperator(/[\s\-.;&_'"=,()]+|<[^>]*>/);
+
+ let searchIndex;
+ if (docs._isPrebuiltIndex) {
+ console.info("using precompiled search index");
+ searchIndex = elasticlunr.Index.load(docs);
+ } else {
+ console.time("building search index");
+ // mirrored in build-search-index.js (part 2)
+ searchIndex = elasticlunr(function () {
+ this.pipeline.remove(elasticlunr.stemmer);
+ this.pipeline.remove(elasticlunr.stopWordFilter);
+ this.addField("qualname");
+ this.addField("fullname");
+ this.addField("annotation");
+ this.addField("default_value");
+ this.addField("signature");
+ this.addField("bases");
+ this.addField("doc");
+ this.setRef("fullname");
+ });
+ for (let doc of docs) {
+ searchIndex.addDoc(doc);
+ }
+ console.timeEnd("building search index");
+ }
+
+ return (term) => searchIndex.search(term, {
+ fields: {
+ qualname: {boost: 4},
+ fullname: {boost: 2},
+ annotation: {boost: 2},
+ default_value: {boost: 2},
+ signature: {boost: 2},
+ bases: {boost: 2},
+ doc: {boost: 1},
+ },
+ expand: true
+ });
+})();
\ No newline at end of file
diff --git a/assets/banner.svg b/assets/banner.svg
new file mode 100644
index 0000000..01cc920
--- /dev/null
+++ b/assets/banner.svg
@@ -0,0 +1,123 @@
+
diff --git a/assets/logo.svg b/assets/logo.svg
new file mode 100644
index 0000000..be5bf7f
--- /dev/null
+++ b/assets/logo.svg
@@ -0,0 +1,61 @@
+
diff --git a/branding.md b/branding.md
new file mode 100644
index 0000000..a2e8758
--- /dev/null
+++ b/branding.md
@@ -0,0 +1,51 @@
+# Branding — Python Project Template
+
+> *From zero to hero — production-ready Python, without the ceremony.*
+
+Agents read this file before generating release names, C4 diagrams, README banners, or any document with visual or copy identity. All fields are optional; absent or blank fields fall back to defaults (adjective-animal release names, Mermaid default colors, no wording constraints).
+
+---
+
+## Identity
+
+- **Project name:** Python Project Template
+- **Tagline:** From zero to hero — production-ready Python, without the ceremony.
+- **Mission:** Eliminate boilerplate so engineers ship features, not setup.
+- **Vision:** The standard starting point for any serious Python project — the bedrock every Python engineer reaches for first.
+- **Tone of voice:** Direct, precise, minimal. The Greeks did not decorate the Parthenon with apologies. Neither do we.
+
+## Visual
+
+The palette is drawn from classical marble, parchment, and gold — materials that have carried ideas for millennia. Every colour choice serves legibility first; decoration is secondary.
+
+- **Background/parchment:** `#faf7f2` → `#ede8e0` — warm off-white, the surface on which ideas are set down
+- **Primary text:** `#5c3d1e` → `#3b2410` — deep warm brown, the ink that endures
+- **Accent/gold:** `#c9a84c` → `#e8c96a` — antique gold, used for borders and structural lines only — never body text
+- **Secondary/blue:** `#7baabf` → `#4a7a96` — Aegean steel blue, for labels and secondary hierarchy
+- **Stone/marble:** `#f0ece4` → `#c8c0b8` — the load-bearing colour; columns, shapes, structural chrome
+- **Logo:** `docs/assets/logo.svg`
+- **Banner:** `docs/assets/banner.svg`
+
+> Deep brown `#3b2410` on parchment `#faf7f2` achieves >10:1 contrast (WCAG AAA). Gold is decorative; it never carries meaning that must be read.
+
+## Release Naming
+
+- **Convention:** `adjective-greek-figure`
+- **Theme:** Greek antiquity — philosophers, heroes, gods, mythological figures. Every release name should read like an epithet: something a figure *earned* through their defining quality (e.g. "Resolute Athena", "Precise Pythagoras", "Luminous Hypatia").
+- **Rationale:** Ancient Greece is the origin of the intellectual tradition that underpins Western civilisation — democracy, systematic philosophy, formal logic, and scientific reasoning all trace their lineage to the Greek city-states. Plato and Aristotle invented political philosophy as a genre; Aristotle formalised logic and ethics; the Pythagoreans established that abstract reasoning could describe the physical world. This template stands on the same premise: rigorous method, applied from the beginning, produces something worth building on. The Greek figure in each release name is not decoration — it is a statement about what kind of work this is.
+- **Excluded words:** *(none)*
+
+## Wording
+
+Every word carries weight. The Greeks had a name for ornament that obscures meaning: *kenophonia* — empty noise.
+
+- **Avoid:** `easy`, `simple`, `just`, `quick`, `scaffold` — these words undermine engineer credibility or imply the work is trivial. A temple is not a scaffold.
+- **Prefer:** `minimal`, `precise`, `production-ready`, `zero-boilerplate`, `rigorous`, `from zero to hero`
+
+## Project Summary
+
+A Python project template with a production-ready AI-assisted delivery workflow.
+Ships with quality tooling (ruff, pyright, pytest, hypothesis), Gherkin-driven
+acceptance criteria, and five specialised AI agents covering scope through release.
+Built on the premise that rigorous method, applied from the beginning, produces
+something worth building on. Use this summary in banners, release notes, and document headers.
diff --git a/container.md b/container.md
new file mode 100644
index 0000000..6d8615e
--- /dev/null
+++ b/container.md
@@ -0,0 +1,22 @@
+# C4 — Container Diagram
+
+> Last updated: YYYY-MM-DD
+> Source: docs/adr/ADR-*.md
+
+```mermaid
+C4Container
+ title Container Diagram —
+
+ Person(actor1, "", "")
+
+ System_Boundary(sys, "") {
+ Container(container1, "", "", "")
+ Container(container2, "", "", "")
+ }
+
+ System_Ext(ext1, "", "")
+
+ Rel(actor1, container1, "")
+ Rel(container1, container2, "")
+ Rel(container1, ext1, "")
+```
diff --git a/context.md b/context.md
new file mode 100644
index 0000000..9c683d3
--- /dev/null
+++ b/context.md
@@ -0,0 +1,18 @@
+# C4 — System Context
+
+> Last updated: YYYY-MM-DD
+> Source: docs/domain-model.md, docs/glossary.md, docs/features/completed/
+
+```mermaid
+C4Context
+ title System Context —
+
+ Person(actor1, "", "")
+
+ System(system, "", "<3–5 word system description from discovery.md Scope>")
+
+ System_Ext(ext1, "", "")
+
+ Rel(actor1, system, "")
+ Rel(system, ext1, "")
+```
diff --git a/coverage/.gitignore b/coverage/.gitignore
new file mode 100644
index 0000000..ccccf14
--- /dev/null
+++ b/coverage/.gitignore
@@ -0,0 +1,2 @@
+# Created by coverage.py
+*
diff --git a/coverage/class_index.html b/coverage/class_index.html
new file mode 100644
index 0000000..32d2fe3
--- /dev/null
+++ b/coverage/class_index.html
@@ -0,0 +1,131 @@
+
+
+
+
+ Coverage report
+
+
+
+
+
+
+
+
+
+
+
diff --git a/discovery.md b/discovery.md
new file mode 100644
index 0000000..9b8a33f
--- /dev/null
+++ b/discovery.md
@@ -0,0 +1,21 @@
+# Discovery:
+
+---
+
+## Session: YYYY-MM-DD
+
+### Context
+<3–5 sentence synthesis: who the users are, what the product does, why it exists,
+success/failure conditions, and explicit out-of-scope boundaries.>
+(First session only. Omit this subsection in subsequent sessions.)
+
+### Feature List
+- `` —
+(Write "No changes" if no features were added or modified this session.)
+
+### Domain Model
+| Type | Name | Description | In Scope |
+|------|------|-------------|----------|
+| Noun | | | Yes |
+| Verb | | | Yes |
+(Write "No changes" if domain model was not updated this session.)
diff --git a/features/backlog/.gitkeep b/features/backlog/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/features/completed/.gitkeep b/features/completed/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/features/completed/display-version.feature b/features/completed/display-version.feature
new file mode 100644
index 0000000..0dfc3dd
--- /dev/null
+++ b/features/completed/display-version.feature
@@ -0,0 +1,60 @@
+Feature: Display version
+
+ Reads the application version from pyproject.toml at runtime and logs it at INFO
+ level. Log output is controlled by a verbosity parameter; the version is visible
+ at DEBUG and INFO but suppressed at WARNING and above. An invalid verbosity value
+ raises a descriptive error.
+
+ Status: COMPLETED
+
+ Rules (Business):
+ - Version is read from pyproject.toml at runtime using tomllib
+ - Log verbosity is controlled by a ValidVerbosity parameter passed to main()
+ - Valid verbosity levels are: DEBUG, INFO, WARNING, ERROR, CRITICAL
+ - An invalid verbosity value raises a ValueError with the invalid value and valid options
+ - The version string is logged at INFO level; visible at DEBUG and INFO, not at WARNING+
+
+ Constraints:
+ - No hardcoded __version__ constant — pyproject.toml is the single source of truth
+ - Entry point: app/__main__.py (main(verbosity) function)
+ - Version logic: app/version.py (version() function)
+
+ Rule: Version retrieval
+ As a software-engineer
+ I want to retrieve the application version programmatically
+ So that I can display or log it at runtime
+
+ @id:3f2a1b4c
+ Example: Version string is read from pyproject.toml
+ Given pyproject.toml exists with a version field
+ When version() is called
+ Then the returned string matches the version in pyproject.toml
+
+ @id:7a8b9c0d
+ Example: Version call emits an INFO log message
+ Given pyproject.toml exists with a version field
+ When version() is called
+ Then an INFO log message in the format "Version: " is emitted
+
+ Rule: Verbosity control
+ As a software-engineer
+ I want to control log verbosity via a parameter
+ So that I can tune output for different environments
+
+ @id:a1b2c3d4
+ Example: Version appears in logs at DEBUG and INFO verbosity
+ Given a verbosity level of DEBUG or INFO is passed to main()
+ When main() is called
+ Then the version string appears in the log output
+
+ @id:b2c3d4e5
+ Example: Version is absent from logs at WARNING and above
+ Given a verbosity level of WARNING, ERROR, or CRITICAL is passed to main()
+ When main() is called
+ Then the version string does not appear in the log output
+
+ @id:e5f6a7b8
+ Example: Invalid verbosity raises a descriptive error
+ Given an invalid verbosity string is passed to main()
+ When main() is called
+ Then a ValueError is raised with the invalid value and valid options listed
diff --git a/features/in-progress/.gitkeep b/features/in-progress/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/index.html b/index.html
new file mode 100644
index 0000000..10a2366
--- /dev/null
+++ b/index.html
@@ -0,0 +1,83 @@
+
+
+
+
+
+ Project Documentation
+
+
+
+
+
+
+
diff --git a/post-mortem/.gitkeep b/post-mortem/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/research/README.md b/research/README.md
new file mode 100644
index 0000000..5e60424
--- /dev/null
+++ b/research/README.md
@@ -0,0 +1,16 @@
+# Scientific Research — Index
+
+Theoretical and empirical foundations for the decisions made in this template, organized by domain.
+
+| File | Entries | Domain |
+|---|---|---|
+| `cognitive-science.md` | 1–10 | Pre-mortem, implementation intentions, commitment devices, System 2, adversarial collaboration, accountability, chunking, elaborative encoding, error feedback, prospective memory |
+| `testing.md` | 11–15, 51–54 | Observable behavior testing, test-behavior alignment, first-class tests, property-based testing, mutation testing, Canon TDD, GOOS outer/inner loop, Is TDD Dead, BDD origin |
+| `software-economics.md` | 16 | Cost of change curve (shift left) |
+| `requirements-elicitation.md` | 17–20, 28–30, 43–50 | INVEST, Example Mapping, declarative Gherkin, MoSCoW, active listening, Kipling 5Ws, BA framework, FDD, affinity mapping, Event Storming, CIT, cognitive interview, laddering, funnel technique, RE issues |
+| `domain-modeling.md` | 31, 63–68 | DDD bounded contexts, ubiquitous language, feature identification, DDD Reference, Fowler UL/BC bliki, Vernon IDDD, Verraes UL-not-glossary, Whirlpool |
+| `oop-design.md` | 32–35 | Object Calisthenics, Refactoring (Fowler), GoF Design Patterns, SOLID |
+| `refactoring-empirical.md` | 36–41 | QDIR smell prioritization, smells + architectural refactoring, SPIRIT tool, bad OOP engineering properties, CWC complexity metric, metric threshold unreliability |
+| `adr/ADR-*.md` | 42, 55–58 | Hexagonal Architecture, ADRs, 4+1 View Model, C4 model, information hiding |
+| `ai-agents.md` | 21–27 | Minimal-scope agent design, context isolation, on-demand skills, instruction conflict resolution failure, positional attention degradation, modular prompt de-duplication, three-file separation |
+| `documentation.md` | 59–62, 69–71 | Developer information needs, docs-as-code, Diátaxis documentation framework, blameless post-mortems, arc42 current-state template, Google design docs, RFC/technical spec pattern |
diff --git a/research/ai-agents.md b/research/ai-agents.md
new file mode 100644
index 0000000..02fa05d
--- /dev/null
+++ b/research/ai-agents.md
@@ -0,0 +1,202 @@
+# Scientific Research — AI Agent Design
+
+Foundations for the agent architecture, file structure, and context management decisions in this template.
+
+---
+
+### 21. Minimal-Scope Agent Design
+
+| | |
+|---|---|
+| **Source** | OpenAI. (2024). *Agent definitions*. OpenAI Agents SDK Documentation. https://platform.openai.com/docs/guides/agents/define-agents |
+| **Date** | 2024 |
+| **Alternative** | Anthropic. (2024). *Building effective agents*. Anthropic Engineering Blog. https://www.anthropic.com/engineering/building-effective-agents |
+| **Status** | Confirmed — corrects the belief that subagents should be "lean routing agents" |
+| **Core finding** | "Define the smallest agent that can own a clear task. Add more agents only when you need separate ownership, different instructions, different tool surfaces, or different approval policies." The split criterion is ownership boundary, not instruction volume. |
+| **Mechanism** | Multiple agents competing to own the same concern create authority conflicts and inconsistent tool access. The right unit is the smallest coherent domain that requires exclusive responsibility. |
+| **Where used** | Agent design in `.opencode/agents/*.md` — 5 agents, each owning a distinct domain (PO, system-architect, software-engineer, designer, setup). |
+
+---
+
+### 22. Context Isolation via Subagents
+
+| | |
+|---|---|
+| **Source** | Anthropic. (2025). *Best practices for Claude Code*. Anthropic Documentation. https://www.anthropic.com/engineering/claude-code-best-practices |
+| **Date** | 2025 |
+| **Status** | Confirmed — the primary reason subagents exist is context isolation, not routing |
+| **Core finding** | Subagents run in their own context windows and report back summaries, keeping the main conversation clean for implementation. Every file read in a subagent burns tokens in a child window, not the primary window. |
+| **Mechanism** | Context window is the primary performance constraint for LLM agents. Investigation tasks rapidly exhaust context if done inline. Delegating to a subagent quarantines that cost; the primary agent receives only the distilled result. A fresh context in the subagent also prevents anchoring bias from prior conversation state. |
+| **Where used** | OpenCode `task` tool usage in all agents; `explore` and `general` built-in subagents. |
+
+---
+
+### 23. On-Demand Skill Loading (Context Budget)
+
+| | |
+|---|---|
+| **Source** | Anthropic. (2025). *Best practices for Claude Code*. Anthropic Documentation. https://www.anthropic.com/engineering/claude-code-best-practices |
+| **Date** | 2025 |
+| **Alternative** | OpenCode. (2026). *Agent Skills*. OpenCode Documentation. https://opencode.ai/docs/skills/ |
+| **Status** | Confirmed (vendor guidance) — benefit on task completion quality extrapolated from RAG retrieval literature |
+| **Core finding** | "CLAUDE.md is loaded every session, so only include things that apply broadly. For domain knowledge or workflows only relevant sometimes, use skills instead. Claude loads them on demand without bloating every conversation." Bloated always-loaded files cause Claude to ignore critical instructions. |
+| **Mechanism** | Every token in an unconditionally-loaded file competes for attention against the task prompt. Long always-loaded files push important instructions beyond effective attention range, causing silent non-compliance. Skills are injected only when the task calls for them, preserving the primary context budget. |
+| **Where used** | `AGENTS.md` carries only shared project conventions and commands; all step-specific workflows live in `.opencode/skills/*.md` and are loaded via the `skill` tool only when the relevant step begins. |
+
+---
+
+### 24. Instruction Conflict Resolution Failure in LLMs
+
+| | |
+|---|---|
+| **Source** | Geng et al. (2025). Control Illusion: The Failure of Instruction Hierarchies in Large Language Models. AAAI-26. arXiv:2502.15851. https://arxiv.org/abs/2502.15851 |
+| **Date** | 2025 |
+| **Alternative** | Wallace et al. (2024). The Instruction Hierarchy: Training LLMs to Prioritize Privileged Instructions. arXiv:2404.13208. |
+| **Status** | Confirmed — peer-reviewed (AAAI-26), replicated across 6 models |
+| **Core finding** | LLMs do not reliably prioritize system-prompt instructions over conflicting instructions from other sources. Resolution is inconsistent and biased by pretraining-derived priors, not by prompt structure or position. |
+| **Mechanism** | No structural separation between instruction sources enforces reliable priority at inference time. When the same directive appears in two locations with divergent content, the model selects between them based on statistical priors from pretraining. |
+| **Where used** | Justifies single source of truth in `AGENTS.md`: workflow details duplicated across agent files and skills that drift out of sync produce conflicting instructions the model cannot resolve reliably. |
+
+---
+
+### 25. Positional Attention Degradation in Long Contexts
+
+| | |
+|---|---|
+| **Source** | Liu et al. (2023). Lost in the Middle: How Language Models Use Long Contexts. *Transactions of the Association for Computational Linguistics*. arXiv:2307.03172. https://arxiv.org/abs/2307.03172 |
+| **Date** | 2023 |
+| **Alternative** | McKinnon (2025). arXiv:2511.05850 — effect attenuated for simple retrieval in Gemini 2.5+; persists for multi-hop reasoning. |
+| **Status** | Confirmed with caveat — robust for multi-hop reasoning; attenuated for simple retrieval in frontier models (2025–2026) |
+| **Core finding** | Performance on tasks requiring retrieval from long contexts follows a U-shaped curve: highest when relevant content is at the beginning or end of the context, degraded when content falls in the middle. |
+| **Mechanism** | Transformer attention is not uniform across token positions. Content placed in the middle of a long context receives less attention weight regardless of its relevance. |
+| **Where used** | Supports keeping always-loaded files lean. Duplicated workflow detail in always-loaded files increases total context length, pushing other content into lower-attention positions. |
+
+---
+
+### 26. Modular Prompt De-duplication Reduces Interference
+
+| | |
+|---|---|
+| **Source** | Sharma & Henley (2026). Modular Prompt Optimization. arXiv:2601.04055. https://arxiv.org/abs/2601.04055 |
+| **Date** | 2026 |
+| **Status** | Partially confirmed — single-agent reasoning benchmarks only; not tested on multi-file agent architectures |
+| **Core finding** | Structured prompts with explicit section de-duplication outperform both monolithic prompts and unstructured modular prompts. The mechanism cited is "reducing redundancy and interference between components." |
+| **Mechanism** | Redundant content across prompt sections creates competing attention targets. De-duplication concentrates relevant signal in one canonical location per concern. |
+| **Where used** | Supports the rule that skills and agent routing files contain no duplication of `AGENTS.md` content or of each other. |
+
+---
+
+### 27. Agent File Architecture — Three-File Separation
+
+| | |
+|---|---|
+| **Source** | Convergence of entries 23, 24, 25, 26. |
+| **Date** | — |
+| **Status** | Inferred — no direct A/B test of this architecture exists; supported by convergence of confirmed and partially confirmed findings above |
+| **Core finding** | Three distinct failure modes (instruction conflict on drift, positional attention degradation, redundancy interference) converge to produce a three-file split with defined content rules for each. |
+| **Mechanism** | Each file runs at a different time and serves a different purpose. Mixing concerns across files reintroduces the failure modes the split is designed to prevent. |
+| **Where used** | Structural rule for `AGENTS.md`, `.opencode/agents/*.md`, and `.opencode/skills/*.md`. |
+
+| File | Runs when | Contains | Does NOT contain |
+|---|---|---|---|
+| `AGENTS.md` | Every session, always loaded | Project conventions, shared commands, formats, standards | Step procedures, role-specific rules, path specs |
+| `.opencode/agents/*.md` | When that role is invoked | Role identity, step ownership, skill load instructions, tool permissions, escalation paths | Workflow details, principle lists, path specs, commit formats |
+| `.opencode/skills/*.md` | On demand, when that step begins | Full procedural instructions for that step, self-contained | Duplication of `AGENTS.md` content or other skills |
+
+---
+
+### 72. Actor Model — Message-Passing Ownership
+
+| | |
+|---|---|
+| **Source** | Hewitt, C., Bishop, P., & Steiger, R. (1973). *A universal modular actor formalism for artificial intelligence*. IJCAI. |
+| **Date** | 1973 |
+| **Status** | Confirmed — foundational for single-ownership agent design |
+| **Core finding** | Actors are computational entities that communicate exclusively via asynchronous message passing. Each actor has a single mailbox, processes messages sequentially, and can spawn child actors. No shared state, no direct method calls. |
+| **Mechanism** | The Actor Model eliminates race conditions by construction: an actor can only modify its own state. Message passing creates explicit handoff points where ownership transfers. This maps directly to AI agent design where each agent owns a distinct domain and communicates via structured handoffs (e.g., PO → SA → SE → SA → PO). |
+| **Where used** | Agent ownership boundaries in `.opencode/agents/*.md`; single-feature-at-a-time WIP limit in `FLOW.md`. |
+
+---
+
+### 73. CSP — Synchronous Communication and Deadlock Freedom
+
+| | |
+|---|---|
+| **Source** | Hoare, C. A. R. (1978). *Communicating sequential processes*. Communications of the ACM, 21(8), 666–677. |
+| **Date** | 1978 |
+| **Status** | Confirmed — formal basis for structured handoff protocols |
+| **Core finding** | Processes communicate via synchronous channels (rendezvous). A process that tries to send on a channel blocks until the receiver is ready. This explicit synchronization prevents the "lost update" problem. |
+| **Mechanism** | CSP's channel-based communication ensures that handoffs are atomic: either both parties are ready (handoff succeeds) or the sender waits (no partial state). Applied to AI workflow design: each step transition in `FLOW.md` is a rendezvous point where the outgoing agent commits state before the incoming agent reads it. |
+| **Where used** | Step transition protocol in `FLOW.md` — commit before handoff; session end protocol in `run-session/SKILL.md`. |
+
+---
+
+### 74. Session Types — Protocol Conformance by Construction
+
+| | |
+|---|---|
+| **Source** | Honda, K. (1993). *Types for dyadic interaction*. CONCUR '93. |
+| **Date** | 1993 |
+| **Status** | Confirmed — type-safe communication protocols |
+| **Core finding** | Session types statically verify that communicating parties follow a prescribed protocol. The type checker ensures send/receive sequences match, preventing protocol violations at compile time. |
+| **Mechanism** | Just as session types enforce "send A then receive B then send C", the `FLOW.md` state machine enforces "Step 1 → Step 2 → Step 3 → Step 4 → Step 5". Each state has a defined owner and valid transitions. The auto-detection rules act as a runtime type checker: if the filesystem state doesn't match the expected state, the protocol halts. |
+| **Where used** | `FLOW.md` state machine definition; `flow/SKILL.md` auto-detection rules. |
+
+---
+
+### 75. Statecharts — Hierarchical State Machines with History
+
+| | |
+|---|---|
+| **Source** | Harel, D. (1987). *Statecharts: A visual formalism for complex systems*. Science of Computer Programming, 8(3), 231–274. |
+| **Date** | 1987 |
+| **Status** | Confirmed — hierarchical states for workflow design |
+| **Core finding** | Statecharts extend finite state machines with hierarchy (nested states), orthogonality (parallel regions), and history (return to previous substate). This makes complex systems tractable without state explosion. |
+| **Mechanism** | The `FLOW.md` state machine uses hierarchical grouping: Step 3 contains substates [READY], [RED], [GREEN]. The history mechanism maps to interruption recovery: when resuming, auto-detection determines the exact substate without manual tracking. |
+| **Where used** | `FLOW.md` state design; `flow/SKILL.md` detection rules for interruption recovery. |
+
+---
+
+### 76. Design by Contract — Preconditions and Postconditions
+
+| | |
+|---|---|
+| **Source** | Meyer, B. (1986). *Eiffel: Programming for reusability and extendability*. SIGPLAN Notices, 22(2), 85–94. |
+| **Date** | 1986 |
+| **Status** | Confirmed — explicit contracts for step boundaries |
+| **Core finding** | Software components should specify contracts: preconditions (what must be true before calling), postconditions (what will be true after), and invariants (what remains true). Violations indicate bugs. |
+| **Mechanism** | Each `FLOW.md` state has preconditions (detect rules) and postconditions (success/failure transitions). The prerequisites table is a system-level precondition. When preconditions fail, the protocol halts rather than proceeding with invalid state. |
+| **Where used** | Prerequisites table in `FLOW.md`; per-step preconditions in `flow/SKILL.md`, `architect/SKILL.md`, `implement/SKILL.md`. |
+
+---
+
+### 77. Petri Nets — Places, Transitions, and Token Flow
+
+| | |
+|---|---|
+| **Source** | Petri, C. A. (1962). *Kommunikation mit Automaten*. PhD thesis, University of Bonn. |
+| **Date** | 1962 |
+| **Status** | Confirmed — formal model for concurrent workflow with resource constraints |
+| **Core finding** | Petri Nets model systems as places (conditions), transitions (events), and tokens (resources). A transition fires only when all input places have tokens. This naturally models capacity constraints and competition for resources. |
+| **Mechanism** | The WIP=1 constraint in `FLOW.md` is a Petri Net place with capacity 1: only one feature token can occupy the "in-progress" place at a time. The transition from [IDLE] to [STEP-1-DISCOVERY] requires the "in-progress" place to be empty (no token). This formalizes the single-feature constraint. |
+| **Where used** | WIP limit of 1 in `AGENTS.md` and `FLOW.md`; filesystem-enforced WIP via `docs/features/in-progress/` directory. |
+
+---
+
+## Bibliography
+
+1. Anthropic. (2024). Building effective agents. https://www.anthropic.com/engineering/building-effective-agents
+2. Anthropic. (2025). Best practices for Claude Code. https://www.anthropic.com/engineering/claude-code-best-practices
+3. Geng et al. (2025). Control Illusion. AAAI-26. arXiv:2502.15851. https://arxiv.org/abs/2502.15851
+4. Harel, D. (1987). Statecharts: A visual formalism for complex systems. *Science of Computer Programming*, 8(3), 231–274.
+5. Hewitt, C., Bishop, P., & Steiger, R. (1973). A universal modular actor formalism for artificial intelligence. *IJCAI*.
+6. Hoare, C. A. R. (1978). Communicating sequential processes. *Communications of the ACM*, 21(8), 666–677.
+7. Honda, K. (1993). Types for dyadic interaction. *CONCUR '93*.
+8. Liu, N. F. et al. (2023). Lost in the Middle. *TACL*. arXiv:2307.03172. https://arxiv.org/abs/2307.03172
+9. McKinnon, R. (2025). arXiv:2511.05850. https://arxiv.org/abs/2511.05850
+10. Meyer, B. (1986). Eiffel: Programming for reusability and extendability. *SIGPLAN Notices*, 22(2), 85–94.
+11. OpenAI. (2024). Agent definitions. https://platform.openai.com/docs/guides/agents/define-agents
+12. OpenCode. (2026). Agent Skills. https://opencode.ai/docs/skills/
+13. Petri, C. A. (1962). Kommunikation mit Automaten. PhD thesis, University of Bonn.
+14. Sharma, A., & Henley, A. (2026). Modular Prompt Optimization. arXiv:2601.04055. https://arxiv.org/abs/2601.04055
+15. Wallace, E. et al. (2024). The Instruction Hierarchy. arXiv:2404.13208.
diff --git a/research/architecture.md b/research/architecture.md
new file mode 100644
index 0000000..9ccaf7c
--- /dev/null
+++ b/research/architecture.md
@@ -0,0 +1,156 @@
+# Scientific Research — Architecture
+
+Foundations for the architectural decisions and patterns used in this template.
+
+---
+
+### 42. Hexagonal Architecture — Ports and Adapters
+
+| | |
+|---|---|
+| **Source** | Cockburn, A. (2005). "Hexagonal Architecture." *alistair.cockburn.us*. https://alistair.cockburn.us/hexagonal-architecture/ |
+| **Date** | 2005 |
+| **Alternative** | Freeman, S., & Pryce, N. (2009). *Growing Object-Oriented Software, Guided by Tests*. Addison-Wesley. (Chapter 7: "Ports and Adapters") |
+| **Status** | Confirmed — foundational; widely adopted as Clean Architecture, Onion Architecture |
+| **Core finding** | The application domain should have no knowledge of external systems (databases, filesystems, network, UI). All contact between the domain and the outside world passes through a **port** (an interface / Protocol) and an **adapter** (a concrete implementation of that port). The domain is independently testable without any infrastructure. The key structural rule: dependency arrows point inward — domain code never imports from adapters; adapters import from domain. |
+| **Mechanism** | Two distinct sides of any application: the "driving side" (actors who initiate action — tests, UI, CLI) and the "driven side" (actors the application drives — databases, filesystems, external services). Each driven-side dependency is hidden behind a port. Tests supply a test adapter; production supplies a real adapter. Substituting adapters requires no domain code changes. This is SOLID-D at the architectural layer. |
+| **Where used** | Step 2 (Architecture): if an external dependency is identified during domain analysis, assign it a Protocol. `ports/` and `adapters/` folders emerge when a concrete dependency is confirmed — do not pre-create them. The dependency-inversion principle (SOLID-D) is the goal; the folder names are convention, not law. |
+
+---
+
+### 55. Architecture Decision Records (ADRs)
+
+| | |
+|---|---|
+| **Source** | Nygard, M. T. (2011). "Documenting Architecture Decisions." *cognitect.com*. https://cognitect.com/blog/2011/11/15/documenting-architecture-decisions |
+| **Date** | 2011 |
+| **Alternative** | Keeling, M. (2017). *Design It!: From Programmer to Software Architect*. Pragmatic Bookshelf. (Chapter 6: "Architectural Decisions") |
+| **Status** | Confirmed — widely adopted industry standard; tooled by adr-tools, ADR Manager, Log4Brains |
+| **Core finding** | Architectural decisions should be recorded as short, immutable documents capturing: what was decided, why, and what alternatives were rejected. Without this record, decisions get re-litigated by every new developer (or AI agent) who encounters the codebase, producing rework and re-divergence. |
+| **Mechanism** | An ADR is written at decision time, never edited afterward. If the decision changes, a new ADR is written that supersedes the old one. The append-only record becomes a reliable audit trail. The constraint "one sentence per field" forces clarity — if you can't state the reason in one sentence, the decision is not yet understood. |
+| **Where used** | `docs/adr/ADR-YYYY-MM-DD-.md` (one file per decision). SA creates one file per non-obvious decision after Step 2. The `update-docs` skill reads ADRs as input for C4 diagram annotations. |
+
+---
+
+### 56. The 4+1 View Model of Architecture
+
+| | |
+|---|---|
+| **Source** | Kruchten, P. B. (1995). "The 4+1 View Model of Architecture." *IEEE Software*, 12(6), 42–50. https://doi.org/10.1109/52.469759 |
+| **Date** | 1995 |
+| **Alternative** | Bass, L., Clements, P., & Kazman, R. (2021). *Software Architecture in Practice* (4th ed.). Addison-Wesley. |
+| **Status** | Confirmed — 3,000+ citations; foundational IEEE reference for architectural documentation |
+| **Core finding** | A single architectural diagram cannot communicate all relevant aspects of a system. Four distinct views are required: **Logical** (domain objects and relationships), **Process** (runtime behavior and concurrency), **Development** (module organisation and dependencies), **Physical** (deployment topology). A fifth **Scenarios** view (use cases) ties the four together by showing how each scenario exercises each view. |
+| **Mechanism** | Different stakeholders need different views: a developer needs the Development view; an operator needs the Physical view; a domain expert needs the Logical view. Conflating views into one diagram produces a cluttered diagram that satisfies nobody. The 4+1 model assigns each concern to its appropriate view and cross-validates them through scenarios. |
+| **Where used** | Theoretical foundation for the C4 model (entry 57). The `update-docs` skill generates C4 diagrams that map to: Context diagram (Scenarios view), Container diagram (Physical + Development views), Component diagram (Logical + Development views). |
+
+---
+
+### 57. The C4 Model for Software Architecture
+
+| | |
+|---|---|
+| **Source** | Brown, S. (2018). *The C4 Model for Software Architecture*. Leanpub. https://c4model.com |
+| **Date** | 2018 (ongoing) |
+| **Alternative** | Brown, S. (2023). "The C4 model for visualising software architecture." *InfoQ*. |
+| **Status** | Confirmed — widely adopted; tooled by Structurizr, PlantUML C4, Mermaid C4 |
+| **Core finding** | Software architecture can be communicated at four zoom levels: **Level 1 — System Context** (who uses the system and what external systems it talks to), **Level 2 — Container** (major runnable/deployable units), **Level 3 — Component** (major structural building blocks within a container), **Level 4 — Code** (classes, interfaces; usually auto-generated). Each level answers a specific question; mixing levels in one diagram creates confusion. |
+| **Mechanism** | C4 operationalises the 4+1 View Model (entry 56) into a lightweight notation that can be expressed in text (PlantUML, Mermaid) and version-controlled alongside code. The notation is deliberately constrained: boxes (people, systems, containers, components) and unidirectional arrows with labels. No UML formalism required. Context + Container diagrams cover >90% of communication needs for most teams. |
+| **Where used** | The `update-docs` skill generates and updates C4 diagrams in `docs/context.md` and `docs/container.md`. Context diagram (L1) always generated; Container (L2) generated when multiple containers are identified; Component (L3) generated on demand. Source files are Mermaid so they render in GitHub and are version-controlled. |
+
+---
+
+### 58. Information Hiding — Module Decomposition Criterion
+
+| | |
+|---|---|
+| **Source** | Parnas, D. L. (1972). "On the criteria to be used in decomposing systems into modules." *Communications of the ACM*, 15(12), 1053–1058. https://doi.org/10.1145/361598.361623 |
+| **Date** | 1972 |
+| **Alternative** | Parnas, D. L. (1974). "On a 'buzzword': Hierarchical structure." *Proc. IFIP Congress 74*, 336–339. |
+| **Status** | Confirmed — 4,000+ citations; foundational criterion for all modular decomposition in software engineering |
+| **Core finding** | The correct criterion for decomposing a system into modules is **information hiding**: each module hides a design decision that is likely to change. A module's interface reveals only what callers need; its implementation hides how. Decomposing by execution steps (procedure-based) creates tight coupling to implementation order; decomposing by change-prone decisions (information-hiding) allows each decision to be changed independently. |
+| **Mechanism** | Identify which decisions are most likely to change (data structures, algorithms, I/O formats, external service protocols). Each such decision becomes a module boundary. The module's public interface is defined to be change-stable; the implementation is change-free from the caller's perspective. This is the theoretical basis for SOLID-D (depend on abstractions), Hexagonal Architecture (hide external decisions behind ports), and DDD bounded contexts (hide language decisions behind context boundaries). |
+| **Where used** | Step 2 Architecture: bounded context check ("same word, different meaning across features? → module boundary") and external dep Protocol assignment both apply the information-hiding criterion. The `update-docs` skill uses module boundaries as container/component boundaries in `docs/container.md`. |
+
+---
+
+---
+
+### 59. Architecture Tradeoff Analysis Method (ATAM)
+
+| | |
+|---|---|
+| **Source** | Kazman, R., Klein, M., & Clements, P. (2000). "ATAM: Method for Architecture Evaluation" (CMU/SEI-2000-TR-004). Software Engineering Institute, Carnegie Mellon University. https://resources.sei.cmu.edu/asset_files/TechnicalReport/2000_005_001_13706.pdf |
+| **Date** | 2000 (updated 2018) |
+| **Alternative** | Bass, L., Clements, P., & Kazman, R. (2021). *Software Architecture in Practice* (4th ed.). Addison-Wesley. (Chapters 21–23) |
+| **Status** | Confirmed — SEI standard; used by NASA, DoD, and Fortune 500 organizations |
+| **Core finding** | Architecture should be evaluated early through structured scenario analysis. ATAM discovers **trade-offs** and **sensitivity points** before implementation begins, when change cost is minimal. The method produces a risk-mitigation roadmap rather than a pass/fail verdict. |
+| **Mechanism** | Nine-step process: (1) present ATAM, (2) present business drivers, (3) present architecture, (4) identify architectural approaches, (5) generate quality-attribute utility tree, (6) analyze architectural approaches, (7) brainstorm and prioritize scenarios, (8) re-analyze with broader stakeholder input, (9) present results. Key output: a ranked list of **risk themes** with sensitivity points (architectural decisions that most affect quality attributes). |
+| **Where used** | Step 4 (Verify): the system-architect applies ATAM-style adversarial review — testing the implemented architecture against the quality-attribute scenarios identified in Step 2. The SA who designed the architecture reviews it, eliminating the context-loss problem of external reviewers. |
+
+---
+
+### 60. Conway's Law and the Inverse Conway Maneuver
+
+| | |
+|---|---|
+| **Source** | Conway, M. E. (1968). "How Do Committees Invent?" *Datamation*, 14(4), 28–31. https://www.melconway.com/Home/Committees_Paper.html |
+| **Date** | 1968 (dubbed "Conway's Law" by Brooks, 1975) |
+| **Alternative** | Fowler, M. (2022). "Conway's Law." *martinfowler.com*. https://martinfowler.com/bliki/ConwaysLaw.html |
+| **Status** | Confirmed — universally accepted; Brooks called it "the most important law in software engineering" |
+| **Core finding** | Any organization that designs a system will produce a design whose structure is a copy of the organization's communication structure. The **Inverse Conway Maneuver** deliberately alters team organization to encourage the desired software architecture — aligning Conway's Law with architectural intent rather than fighting it. |
+| **Mechanism** | Three responses to Conway's Law: (1) **Ignore** — architecture clashes with team structure, producing friction; (2) **Accept** — ensure architecture does not conflict with existing communication patterns; (3) **Inverse Conway** — restructure teams (and agent roles) to match the desired architecture. In AI-assisted development, this means the agent who designs a module should be the same agent who reviews it, preserving architectural intent through the build-and-review cycle. |
+| **Where used** | AGENTS.md role design: the system-architect → software-engineer → system-architect loop implements a closed communication path. The SA designs the module boundary; the SE builds within it; the SA verifies the boundary was respected. No external reviewer introduces misaligned mental models. |
+
+---
+
+### 61. The Architect as Decision-Maker
+
+| | |
+|---|---|
+| **Source** | Fowler, M. (2003). "Who Needs an Architect?" *IEEE Software*, 20(5), 11–13. https://martinfowler.com/ieeeSoftware/whoNeedsArchitect.pdf |
+| **Date** | 2003 |
+| **Alternative** | Martin, R. C. (2017). *Clean Architecture: A Craftsman's Guide to Software Structure and Design*. Prentice Hall. (Chapters 1–3) |
+| **Status** | Confirmed — IEEE standard reference; Martin's "Clean Architecture" extends to policy/detail separation |
+| **Core finding** | The architect's job is not to draw diagrams — it is to make **significant decisions** that are hard to change later. The architect is a facilitator who builds consensus around technical direction, not a dictator who issues edicts. The best architects are also programmers who understand implementation constraints firsthand. |
+| **Mechanism** | Fowler distinguishes four architect archetypes: (1) **Architect as decision-maker** — owns the hard-to-change choices; (2) **Architect as expert** — provides technical depth the team lacks; (3) **Architect as facilitator** — brings stakeholders to consensus; (4) **Architect as gatekeeper** — enforces standards. The template's system-architect role combines (1) and (4): making architectural decisions (ADRs) and enforcing them through adversarial review. Martin adds the **policy/detail** separation: the architect owns policy (business rules, interfaces); the developer owns detail (algorithms, data structures). |
+| **Where used** | `system-architect.md` agent definition: the SA owns `docs/domain-model.md`, `docs/system.md`, and `docs/adr/ADR-*.md` (policy layer). The SE owns the implementation code (detail layer). The SA reviews to ensure policy was not violated by detail decisions. |
+
+---
+
+### 62. Team Topologies and Cognitive Load
+
+| | |
+|---|---|
+| **Source** | Skelton, M., & Pais, M. (2019). *Team Topologies: Organizing Business and Technology Teams for Fast Flow*. IT Revolution Press. |
+| **Date** | 2019 |
+| **Alternative** | Narayan, S. (2015). *Agile IT Organization Design*. Addison-Wesley. |
+| **Status** | Confirmed — widely adopted in DevOps and platform engineering; 4.5+ star ratings across retailers |
+| **Core finding** | Team structure should minimize **cognitive load** — the total mental effort required to operate within a system. Cognitive load has three types: (1) **intrinsic** (fundamental complexity of the problem), (2) **extraneous** (unnecessary complexity from poor tooling/process), (3) **germane** (effort to build reusable abstractions). The goal is to maximize germane load (learning) while minimizing extraneous load (friction). |
+| **Mechanism** | Four team types: **Stream-aligned** (delivers customer value end-to-end), **Platform** (provides internal services), **Enabling** (helps stream teams adopt new capabilities), **Complicated-subsystem** (owns complex domain expertise). Three interaction modes: **Collaboration** (joint discovery), **X-as-a-Service** (clean handoff), **Facilitating** (temporary assistance). The SA→SE→SA loop is a **Collaboration** interaction between policy owner (SA) and detail owner (SE), with the SA providing **X-as-a-Service** interfaces (stubs, ADRs) that the SE consumes. |
+| **Where used** | AGENTS.md workflow design: the SA is a **complicated-subsystem** team (architectural expertise) and the SE is **stream-aligned** (feature delivery). The verify step is a **Collaboration** interaction where the SA reviews whether the SE respected the X-as-a-Service boundaries (stubs, protocols, ADRs). |
+
+---
+
+## Bibliography
+
+1. Bass, L., Clements, P., & Kazman, R. (2021). *Software Architecture in Practice* (4th ed.). Addison-Wesley.
+2. Brown, S. (2018). *The C4 Model for Software Architecture*. Leanpub. https://c4model.com
+3. Cockburn, A. (2005). Hexagonal Architecture. *alistair.cockburn.us*. https://alistair.cockburn.us/hexagonal-architecture/
+4. Conway, M. E. (1968). "How Do Committees Invent?" *Datamation*, 14(4), 28–31.
+5. Fowler, M. (2003). "Who Needs an Architect?" *IEEE Software*, 20(5), 11–13.
+6. Fowler, M. (2022). "Conway's Law." *martinfowler.com*. https://martinfowler.com/bliki/ConwaysLaw.html
+7. Freeman, S., & Pryce, N. (2009). *Growing Object-Oriented Software, Guided by Tests*. Addison-Wesley.
+8. Kazman, R., Klein, M., & Clements, P. (2000). "ATAM: Method for Architecture Evaluation" (CMU/SEI-2000-TR-004). SEI, CMU.
+9. Keeling, M. (2017). *Design It!: From Programmer to Software Architect*. Pragmatic Bookshelf.
+10. Kruchten, P. B. (1995). The 4+1 View Model of Architecture. *IEEE Software*, 12(6), 42–50.
+11. Martin, R. C. (2017). *Clean Architecture: A Craftsman's Guide to Software Structure and Design*. Prentice Hall.
+12. Nygard, M. T. (2011). Documenting Architecture Decisions. *cognitect.com*.
+13. Parnas, D. L. (1972). On the criteria to be used in decomposing systems into modules. *CACM*, 15(12), 1053–1058.
+14. Skelton, M., & Pais, M. (2019). *Team Topologies*. IT Revolution Press.
+3. Cockburn, A. (2005). Hexagonal Architecture. *alistair.cockburn.us*. https://alistair.cockburn.us/hexagonal-architecture/
+4. Freeman, S., & Pryce, N. (2009). *Growing Object-Oriented Software, Guided by Tests*. Addison-Wesley.
+5. Keeling, M. (2017). *Design It!: From Programmer to Software Architect*. Pragmatic Bookshelf.
+6. Kruchten, P. B. (1995). The 4+1 View Model of Architecture. *IEEE Software*, 12(6), 42–50. https://doi.org/10.1109/52.469759
+7. Nygard, M. T. (2011). Documenting Architecture Decisions. *cognitect.com*. https://cognitect.com/blog/2011/11/15/documenting-architecture-decisions
+8. Parnas, D. L. (1972). On the criteria to be used in decomposing systems into modules. *CACM*, 15(12), 1053–1058. https://doi.org/10.1145/361598.361623
diff --git a/research/cognitive-science.md b/research/cognitive-science.md
new file mode 100644
index 0000000..2a1b94f
--- /dev/null
+++ b/research/cognitive-science.md
@@ -0,0 +1,150 @@
+# Scientific Research — Cognitive Science
+
+Mechanisms from cognitive and social psychology that justify workflow design decisions in this template.
+
+---
+
+### 1. Pre-mortem (Prospective Hindsight)
+
+| | |
+|---|---|
+| **Source** | Klein, G. (1998). *Sources of Power: How People Make Decisions*. MIT Press. |
+| **Date** | 1998 |
+| **Status** | Confirmed |
+| **Core finding** | Asking "imagine this failed — why?" catches 30% more issues than forward-looking review. |
+| **Mechanism** | Prospective hindsight shifts from prediction (weak) to explanation (strong). The brain is better at explaining past events than predicting future ones. By framing as "it already failed," you activate explanation mode. |
+| **Where used** | PO pre-mortem at scope, software-engineer pre-mortem before handoff. |
+
+---
+
+### 2. Implementation Intentions
+
+| | |
+|---|---|
+| **Source** | Gollwitzer, P. M. (1999). Implementation intentions: Strong effects of simple planning aids. *American Journal of Preventive Medicine*, 16(4), 257–276. |
+| **Date** | 1999 |
+| **Status** | Confirmed |
+| **Core finding** | "If X then Y" plans are 2–3x more likely to execute than general intentions. |
+| **Mechanism** | If-then plans create automatic cue-response links in memory. The brain processes "if function > 20 lines then extract helper" as an action trigger, not a suggestion to consider. |
+| **Where used** | Refactor Self-Check Gates in `implementation/SKILL.md`, Code Quality checks in `verify/SKILL.md`. |
+
+---
+
+### 3. Commitment Devices
+
+| | |
+|---|---|
+| **Source** | Cialdini, R. B. (2001). *Influence: The Psychology of Persuasion* (rev. ed.). HarperBusiness. |
+| **Date** | 2001 |
+| **Status** | Confirmed |
+| **Core finding** | Forcing an explicit micro-commitment (filling in a PASS/FAIL cell) creates resistance to reversals. A checkbox checked is harder to uncheck than a todo noted. |
+| **Mechanism** | Structured tables with PASS/FAIL cells create commitment-device effects. The act of marking "FAIL" requires justification, making silent passes psychologically costly. |
+| **Where used** | SOLID enforcement table, ObjCal enforcement table, Design Patterns table — all require explicit PASS/FAIL with evidence. |
+
+---
+
+### 4. System 2 Before System 1
+
+| | |
+|---|---|
+| **Source** | Kahneman, D. (2011). *Thinking, Fast and Slow*. Farrar, Straus and Giroux. |
+| **Date** | 2011 |
+| **Status** | Confirmed |
+| **Core finding** | System 1 (fast, automatic) is vulnerable to anchoring and confirmation bias. System 2 (slow, deliberate) must be activated before System 1's judgments anchor. |
+| **Mechanism** | Running semantic review *before* automated commands prevents the "all green" dopamine hit from anchoring the reviewer's judgment. Doing hard cognitive work first protects against System 1 shortcuts. |
+| **Where used** | Verification order in `verify/SKILL.md`: semantic alignment check before commands. |
+
+---
+
+### 5. Adversarial Collaboration
+
+| | |
+|---|---|
+| **Source** | Mellers, B. A., Hertwig, R., & Kahneman, D. (2001). Do frequency representations eliminate cooperative bias? *Psychological Review*, 108(4), 709–735. |
+| **Date** | 2001 |
+| **Status** | Confirmed |
+| **Core finding** | Highest-quality thinking emerges when parties hold different hypotheses and are charged with finding flaws in each other's reasoning. |
+| **Mechanism** | Explicitly framing the reviewer as "your job is to break this feature" activates the adversarial collaboration mode. The reviewer seeks disconfirmation rather than confirmation. |
+| **Where used** | Adversarial mandate in `system-architect.md` and `verify/SKILL.md`. |
+
+---
+
+### 6. Accountability to Unknown Audience
+
+| | |
+|---|---|
+| **Source** | Tetlock, P. E. (1983). Accountability: A social determinant of judgment. In *Psychology of Learning and Motivation* (Vol. 17, pp. 295–332). Academic Press. |
+| **Date** | 1983 |
+| **Status** | Confirmed |
+| **Core finding** | Accountability to an unknown audience with unknown views improves reasoning quality. The agent anticipates being audited and adjusts reasoning. |
+| **Mechanism** | The explicit report format (APPROVED/REJECTED with evidence) creates an accountability structure — the reviewer's reasoning will be read by the PO. |
+| **Where used** | Report format in `verify/SKILL.md`, structured evidence columns in all enforcement tables. |
+
+---
+
+### 7. Chunking and Cognitive Load Reduction
+
+| | |
+|---|---|
+| **Source** | Miller, G. A. (1956). The magical number seven, plus or minus two. *Psychological Review*, 63(2), 81–97. |
+| **Date** | 1956 |
+| **Alternative** | Sweller, J. (1988). Cognitive load during problem solving. *Cognitive Science*, 12(2), 257–285. |
+| **Status** | Confirmed |
+| **Core finding** | Structured tables reduce working memory load vs. narrative text. Chunking related items into table rows enables parallel processing. |
+| **Mechanism** | Replacing prose checklists with structured tables (rows × columns) allows the reviewer to process all items in a single pass. |
+| **Where used** | All enforcement tables in `verify/SKILL.md` and `system-architect.md`. |
+
+---
+
+### 8. Elaborative Encoding
+
+| | |
+|---|---|
+| **Source** | Craik, F. I. M., & Lockhart, R. S. (1972). Levels of processing: A framework for memory research. *Journal of Verbal Learning and Verbal Behavior*, 11(6), 671–684. |
+| **Date** | 1972 |
+| **Status** | Confirmed |
+| **Core finding** | Deeper processing — explaining *why* a rule matters — leads to better retention and application than shallow processing. |
+| **Mechanism** | Adding a "Why it matters" column to enforcement tables forces the reviewer to process the rationale, not just scan the rule name. |
+| **Where used** | SOLID table, ObjCal table, Design Patterns table — all have "Why it matters" column. |
+
+---
+
+### 9. Error-Specific Feedback
+
+| | |
+|---|---|
+| **Source** | Hattie, J., & Timperley, H. (2007). The power of feedback. *Review of Educational Research*, 77(1), 81–112. |
+| **Date** | 2007 |
+| **Status** | Confirmed |
+| **Core finding** | Feedback is most effective when it tells the agent exactly what went wrong and what the correct action is. "FAIL: function > 20 lines at file:47" is actionable; "Apply function length rules" is not. |
+| **Mechanism** | The evidence column in enforcement tables requires specific file:line references, turning vague rules into actionable directives. |
+| **Where used** | Evidence column in all enforcement tables. |
+
+---
+
+### 10. Prospective Memory Cues
+
+| | |
+|---|---|
+| **Source** | McDaniel, M. A., & Einstein, G. O. (2000). Strategic and automatic processes in prospective memory retrieval. *Applied Cognitive Psychology*, 14(7), S127–S144. |
+| **Date** | 2000 |
+| **Status** | Confirmed |
+| **Core finding** | Memory for intended actions is better when cues are embedded at the point of action, not in a separate appendix. |
+| **Mechanism** | Placing if-then gates inline (in the REFACTOR section) rather than in a separate "reference" document increases adherence. The cue appears exactly when the developer is about to make the relevant decision. |
+| **Where used** | Refactor Self-Check Gates embedded inline in `refactor/SKILL.md`. |
+
+---
+
+## Bibliography
+
+1. Cialdini, R. B. (2001). *Influence: The Psychology of Persuasion* (rev. ed.). HarperBusiness.
+2. Craik, F. I. M., & Lockhart, R. S. (1972). Levels of processing: A framework for memory research. *Journal of Verbal Learning and Verbal Behavior*, 11(6), 671–684.
+3. Gollwitzer, P. M. (1999). Implementation intentions. *American Journal of Preventive Medicine*, 16(4), 257–276.
+4. Hattie, J., & Timperley, H. (2007). The power of feedback. *Review of Educational Research*, 77(1), 81–112.
+5. Kahneman, D. (2011). *Thinking, Fast and Slow*. Farrar, Straus and Giroux.
+6. Klein, G. (1998). *Sources of Power: How People Make Decisions*. MIT Press.
+7. McDaniel, M. A., & Einstein, G. O. (2000). Strategic and automatic processes in prospective memory retrieval. *Applied Cognitive Psychology*, 14(7), S127–S144.
+8. Mellers, B. A., Hertwig, R., & Kahneman, D. (2001). Do frequency representations eliminate cooperative bias? *Psychological Review*, 108(4), 709–735.
+9. Miller, G. A. (1956). The magical number seven, plus or minus two. *Psychological Review*, 63(2), 81–97.
+10. Sweller, J. (1988). Cognitive load during problem solving. *Cognitive Science*, 12(2), 257–285.
+11. Tetlock, P. E. (1983). Accountability: A social determinant of judgment. In *Psychology of Learning and Motivation* (Vol. 17). Academic Press.
diff --git a/research/documentation.md b/research/documentation.md
new file mode 100644
index 0000000..aebde01
--- /dev/null
+++ b/research/documentation.md
@@ -0,0 +1,116 @@
+# Scientific Research — Documentation
+
+Foundations for living documentation, docs-as-code, information architecture, and post-mortem practices used in this template.
+
+---
+
+### 59. Information Needs in Collocated Software Development Teams
+
+| | |
+|---|---|
+| **Source** | Ko, A. J., DeLine, R., & Venolia, G. (2007). "Information Needs in Collocated Software Development Teams." *Proc. 29th International Conference on Software Engineering (ICSE 2007)*, pp. 344–353. IEEE. https://doi.org/10.1109/ICSE.2007.45 |
+| **Date** | 2007 |
+| **Alternative** | Dagenais, B., & Robillard, M. P. (2010). "Creating and evolving developer documentation." *Proc. FSE 2010*, pp. 127–136. ACM. |
+| **Status** | Confirmed — empirical study; 600+ citations |
+| **Core finding** | Developers spend 35–50% of their working time not writing code but searching for information — navigating code, reading past decisions, and understanding relationships between components. The most frequently sought information is: who wrote this, why was it written this way, and what does this module depend on. Direct questioning of teammates is the most common fallback when documentation is absent, creating serial bottlenecks. |
+| **Mechanism** | Information seeking is triggered by a task, not by curiosity. A developer encountering an unfamiliar component has a specific decision to make. When documentation is absent, the seek-ask-wait loop (find the right person, ask, wait for a response) dominates time. Persistent documentation (ADRs, architecture diagrams, glossary) short-circuits this loop by making the answer findable without a human intermediary. |
+| **Where used** | Justifies the full `update-docs` skill: C4 diagrams answer "what does this module depend on?"; the ADR record answers "why was it written this way?"; the living glossary answers "what does this term mean in this context?". Collectively these eliminate the three most frequent information needs identified by Ko et al. |
+
+---
+
+### 60. Software Engineering at Google — Documentation Chapter
+
+| | |
+|---|---|
+| **Source** | Winters, T., Manshreck, T., & Wright, H. (2020). *Software Engineering at Google: Lessons Learned from Programming Over Time*. O'Reilly. Chapter 10: "Documentation." https://abseil.io/resources/swe-book/html/ch10.html |
+| **Date** | 2020 |
+| **Alternative** | Fitzpatrick, B., & Collins-Sussman, B. (2012). *Team Geek*. O'Reilly. |
+| **Status** | Confirmed — large-scale industry evidence from a codebase with ~2 billion lines of code |
+| **Core finding** | Documentation that lives outside the code repository decays at a rate proportional to how often the code changes — because there is no mechanism that forces the doc to be updated when the code changes. Docs-as-code (documentation in the same repo, reviewed in the same PRs, tested in the same CI pipeline) dramatically reduces divergence because the cost of updating the doc is incurred at the same moment as the cost of the code change. |
+| **Mechanism** | Google's g3doc system co-locates docs with the code they describe. When a PR changes `payments/service.py`, the reviewer also sees `payments/README.md` in the diff and can flag staleness immediately. At scale, Google found that docs with no co-located tests or CI checks become stale within 3–6 months regardless of team discipline. |
+| **Where used** | Justifies co-locating `docs/` within the project repository. Living docs (`docs/context.md`, `docs/container.md`, `docs/glossary.md`) are updated in the same commits as the code they describe. The `update-docs` skill is the mechanism that enforces this — it runs after Step 5 to regenerate diagrams from the current state of the codebase and discovery docs. |
+
+---
+
+### 61. Diátaxis — A Systematic Framework for Technical Documentation
+
+| | |
+|---|---|
+| **Source** | Procida, D. (2021). "Diátaxis — A systematic approach to technical documentation." *diataxis.fr*. https://diataxis.fr |
+| **Date** | 2021 |
+| **Status** | Confirmed — adopted by Django, NumPy, Gatsby, Cloudflare, and the Python Software Foundation |
+| **Core finding** | Technical documentation fails because it conflates four fundamentally different needs into a single undifferentiated text. The four types are: **Tutorials** (learning-oriented; guides a beginner through a complete task), **How-to guides** (task-oriented; solves a specific problem for a practitioner), **Reference** (information-oriented; describes the system accurately and completely), **Explanation** (understanding-oriented; discusses concepts and decisions). Each type has a different audience mental state and requires a different writing mode. Mixing them degrades all four. |
+| **Mechanism** | The two axes of Diátaxis are: **practical ↔ theoretical** (tutorials and how-to guides are practical; reference and explanation are theoretical) and **acquiring ↔ applying** (tutorials and explanation are for acquiring knowledge; how-to guides and reference are for applying it). A document that tries to be both a tutorial and a reference simultaneously will be a poor tutorial (too much information) and a poor reference (not structured for lookup). |
+| **Where used** | Documentation structure in this template maps to Diátaxis: `README.md` = tutorial (getting started), `AGENTS.md` = reference (complete description of roles, skills, commands) and explanation (why the workflow exists), `docs/context.md` and `docs/container.md` = reference (system structure), post-mortems = explanation (why decisions were made). The `update-docs` skill produces reference-type documentation (C4 diagrams, glossary) — not tutorials. |
+
+---
+
+### 62. Blameless Post-Mortems and a Just Culture
+
+| | |
+|---|---|
+| **Source** | Allspaw, J. (2012). "Blameless PostMortems and a Just Culture." *code.etsy.com* (archived). https://www.etsy.com/codeascraft/blameless-postmortems/ |
+| **Date** | 2012 |
+| **Alternative** | Dekker, S. (2006). *The Field Guide to Understanding Human Error*. Ashgate. |
+| **Status** | Confirmed — foundational DevOps/SRE practice; referenced in Google SRE Book (2016) |
+| **Core finding** | Post-mortems that assign blame produce less information and lower long-term system reliability than blameless post-mortems. When individuals believe they will be blamed, they withhold information about contributing factors, preventing the systemic causes from being identified and fixed. A blameless post-mortem treats the incident as a system failure, not an individual failure — asking "what conditions allowed this to happen?" not "who caused this?" |
+| **Mechanism** | Allspaw's model separates two questions: (1) what happened? (factual, blameless) and (2) what changes would prevent recurrence? (systemic). The post-mortem document records both. The output is not an individual's performance review but a list of system changes — process improvements, documentation gaps, tooling additions. Etsy's incident rate fell after adopting blameless post-mortems because engineers began reporting near-misses that they previously concealed. |
+| **Where used** | `docs/post-mortem/` directory. Post-mortems in this template follow the blameless model: they report workflow gaps found, not who made the mistake. The output of each post-mortem is a list of improvements to skills, agents, or workflow documentation. The `update-docs` skill is one such improvement — it emerged from the discovery that architecture and glossary documentation were falling behind the codebase. |
+
+---
+
+### 69. arc42 — Architecture Documentation Template
+
+| | |
+|---|---|
+| **Source** | Starke, G., & Hruschka, P. (2022). *arc42 — Pragmatic, practical and proven: Template for documentation of software and system architecture*. https://arc42.org |
+| **Date** | 2005 (first release); 2022 (current edition) |
+| **Alternative** | Rozanski, N., & Woods, E. (2011). *Software Systems Architecture: Working with Stakeholders Using Viewpoints and Perspectives* (2nd ed.). Addison-Wesley. |
+| **Status** | Confirmed — ISO 25010-aligned; widely adopted in European enterprise software; open-source; used by Siemens, Deutsche Telekom, and others |
+| **Core finding** | Architecture documentation fails when it conflates two distinct audiences: those who need to understand the system now (operators, new developers, AI agents) and those who need to trace historical decisions (auditors, architects). arc42 separates these explicitly: Section 1 (Introduction and Goals) and Section 4 (Solution Strategy) describe the current state — what the system does and the key decisions governing it — while Section 9 (Architectural Decisions) is the append-only ADR log. Both sections exist simultaneously but serve different readers. |
+| **Mechanism** | arc42 provides 12 numbered sections with defined scope for each. The critical separation: current-state sections (1, 4, 5, 6) are rewritten when the system changes; historical sections (9) are append-only. This prevents the common failure mode of treating all architecture documentation as a changelog, which makes it unusable as a reference for onboarding. |
+| **Where used** | Justifies the `docs/system.md` pattern: a rewritten current-state snapshot (equivalent to arc42 Sections 1 + 4) that the SA updates at Step 2, distinct from any append-only decision history. Git history provides the audit trail without requiring a separate ADR log file. |
+
+---
+
+### 70. Google Design Docs — Living Specification Pattern
+
+| | |
+|---|---|
+| **Source** | Winters, T., Manshreck, T., & Wright, H. (2020). *Software Engineering at Google*. O'Reilly. Chapter 10. https://abseil.io/resources/swe-book/html/ch10.html |
+| **Date** | 2020 |
+| **Alternative** | Ousterhout, J. (2018). *A Philosophy of Software Design*. Yaknyam Press. (Chapter 15: "Write the Comments First") |
+| **Status** | Confirmed — large-scale industry evidence; Google's design doc practice predates the book and is widely replicated at Stripe, Notion, Airbnb |
+| **Core finding** | A design doc (also called a technical spec or RFC) is written before implementation and kept current afterward. It is not append-only — it is a living snapshot that reflects how the system works now. Its sections are: goals, non-goals, current state, design decisions, and trade-offs. When the system changes significantly, the design doc is updated (not superseded) so that it remains the authoritative single reference for the system. Archived (not deleted) only when the system is entirely replaced. |
+| **Mechanism** | The design doc is the canonical answer to "what is this system and why does it work this way?" New team members read the design doc, not the git log. The document is kept current because the cost of updating it is low (it is co-located in the repo) and the cost of not updating it is high (onboarding failures, wrong decisions). Unlike ADRs, design docs answer the current state question directly rather than requiring the reader to replay a sequence of decisions. |
+| **Where used** | Justifies the rewrite-not-append model for `docs/system.md`. The SA rewrites `docs/system.md` at Step 2 to reflect the system after each feature — same lifecycle as a Google design doc. This entry extends entry 60 (docs-as-code) with the specific design doc pattern. |
+
+---
+
+### 71. RFC / Technical Spec Pattern — Authoritative Living Reference
+
+| | |
+|---|---|
+| **Source** | Winters, T., Manshreck, T., & Wright, H. (2020). *Software Engineering at Google*. O'Reilly. (RFC culture at Google, Stripe, Notion, Airbnb). See also: Skelton, M., & Pais, M. (2019). *Team Topologies*. IT Revolution Press. (Chapter 7: "Team Interaction Modes") |
+| **Date** | 2020 |
+| **Alternative** | RFC 2119 (Bradner, 1997) for the formal RFC model; internal RFC practices at Stripe (public eng blog, 2021) and Notion (public eng blog, 2022) |
+| **Status** | Confirmed — widely adopted industry practice; independently replicated across large engineering organizations |
+| **Core finding** | A technical spec (RFC, design doc, system doc) is the authoritative description of how the system works now. It is a single document that answers: what is this, who uses it, how is it structured, what are the key constraints. It is not a changelog. When the system changes, the spec is updated in place so it always reflects current reality. When a system is retired, the spec is archived (moved, not deleted) so the record is preserved. The spec is kept current because it is the primary onboarding artifact — the first document a new engineer reads. |
+| **Mechanism** | The pattern's authority comes from its singularity: there is exactly one canonical reference. Multiple documents (a design doc here, an ADR log there, a wiki page somewhere else) create the "which one is correct?" problem that degrades onboarding speed. A single rewritten document with git history for audit purposes gives onboarding speed and audit capability simultaneously. |
+| **Where used** | Confirms the single-document model for `docs/system.md`. One file, always current, SA rewrites it at Step 2. Git history provides the full change record without requiring a separate append-only log. Entries 69, 70, and 71 together form the evidence base for `docs/system.md` replacing the ADR-log format of `docs/architecture.md`. |
+
+---
+
+## Bibliography
+
+1. Allspaw, J. (2012). Blameless PostMortems and a Just Culture. *code.etsy.com*. https://www.etsy.com/codeascraft/blameless-postmortems/
+2. Bradner, S. (1997). Key words for use in RFCs to Indicate Requirement Levels. *RFC 2119*. IETF. https://www.rfc-editor.org/rfc/rfc2119
+3. Dagenais, B., & Robillard, M. P. (2010). Creating and evolving developer documentation. *Proc. FSE 2010*, pp. 127–136. ACM.
+4. Dekker, S. (2006). *The Field Guide to Understanding Human Error*. Ashgate.
+5. Ko, A. J., DeLine, R., & Venolia, G. (2007). Information Needs in Collocated Software Development Teams. *Proc. ICSE 2007*, pp. 344–353. https://doi.org/10.1109/ICSE.2007.45
+6. Ousterhout, J. (2018). *A Philosophy of Software Design*. Yaknyam Press.
+7. Procida, D. (2021). Diátaxis — A systematic approach to technical documentation. *diataxis.fr*. https://diataxis.fr
+8. Rozanski, N., & Woods, E. (2011). *Software Systems Architecture: Working with Stakeholders Using Viewpoints and Perspectives* (2nd ed.). Addison-Wesley.
+9. Skelton, M., & Pais, M. (2019). *Team Topologies*. IT Revolution Press.
+10. Starke, G., & Hruschka, P. (2022). arc42 — Pragmatic, practical and proven. https://arc42.org
+11. Winters, T., Manshreck, T., & Wright, H. (2020). *Software Engineering at Google*. O'Reilly. Chapter 10. https://abseil.io/resources/swe-book/html/ch10.html
diff --git a/research/domain-modeling.md b/research/domain-modeling.md
new file mode 100644
index 0000000..2b550e7
--- /dev/null
+++ b/research/domain-modeling.md
@@ -0,0 +1,115 @@
+# Scientific Research — Domain Modeling
+
+Foundations for bounded context identification, ubiquitous language, and feature decomposition used in this template.
+
+---
+
+### 31. Domain-Driven Design — Bounded Contexts and Feature Identification
+
+| | |
+|---|---|
+| **Source** | Evans, E. (2003). *Domain-Driven Design: Tackling Complexity in the Heart of Software*. Addison-Wesley. |
+| **Date** | 2003 |
+| **Alternative** | Context Mapper (2025). Rapid Object-Oriented Analysis and Design. https://contextmapper.org/docs/rapid-ooad |
+| **Status** | Confirmed — foundational DDD literature |
+| **Core finding** | A Bounded Context is a boundary within which a particular ubiquitous language is consistent. Features are identified by grouping related user stories that share the same language. The decomposition criterion is "single responsibility per context" + "consistency of language." |
+| **Mechanism** | In DDD: (1) Extract ubiquitous language from requirements → (2) Group by language consistency → (3) Each group is a candidate bounded context → (4) Each bounded context maps to a feature. Context Mapper automates this: User Stories → Subdomains (via noun/verb extraction) → Bounded Contexts of type FEATURE. |
+| **Where used** | Stage 1 Discovery: after session synthesis, verify each feature has consistent language. Noun/verb extraction from discovery answers produces candidate entities, formalized by the SA in `docs/domain-model.md` at Step 2. The `Rules (Business):` section in `.feature` files captures the ubiquitous language rules that govern each feature. |
+
+---
+
+### 63. DDD Reference — Pattern Summaries (CC-BY)
+
+| | |
+|---|---|
+| **Source** | Evans, E. (2015). *DDD Reference: Definitions and Pattern Summaries*. domainlanguage.com. https://www.domainlanguage.com/ddd/reference/ |
+| **Date** | 2015 |
+| **Alternative** | Evans, E. (2003). *Domain-Driven Design*. Addison-Wesley. (full book; entry #31) |
+| **Status** | Confirmed — freely available CC-BY canonical summary; maintained by Evans personally |
+| **Core finding** | The open-access pattern summary of all DDD patterns from the 2003 book. More precisely citable than the book for specific pattern definitions. Key patterns: Ubiquitous Language ("Use the model as the backbone of a language. Commit the team to exercising that language relentlessly in all communication within the team and in the code."), Bounded Context, Context Map, Domain Events, Aggregates, Repositories. |
+| **Mechanism** | Each pattern is described with: intent, prescription, and "therefore" consequences. The Ubiquitous Language pattern prescribes: use the same terms in diagrams, writing, and especially speech. Refactor the code when the language changes. Resolve confusion over terms in conversation, the way confusion over ordinary words is resolved — by agreement and precision. |
+| **Where used** | Primary reference for `docs/domain-model.md` structure and the ubiquitous language practice. `update-docs` skill glossary entries derive from this: terms must match code identifiers (Evans' "use the same language in code" prescription). `docs/research/domain-modeling.md`. |
+| **Note** | Supersedes entry #31 as the citable source for specific pattern quotes. Entry #31 remains as the book reference. Use this entry when citing a specific Evans pattern definition. |
+
+---
+
+### 64. UbiquitousLanguage — Fowler Bliki
+
+| | |
+|---|---|
+| **Source** | Fowler, M. (2006). "UbiquitousLanguage." *martinfowler.com*. https://martinfowler.com/bliki/UbiquitousLanguage.html |
+| **Date** | 2006 |
+| **Alternative** | Evans (2015) DDD Reference (entry #63) — the primary source Fowler summarises |
+| **Status** | Confirmed — widely cited secondary source; Fowler wrote the DDD foreword and is considered the authoritative secondary interpreter of Evans |
+| **Core finding** | The ubiquitous language is a practice, not a document. The glossary is a secondary artifact — a snapshot of the current state of the language. The language itself lives in conversation, in the code, and in all written communication. "By using the model-based language pervasively and not being satisfied until it flows, we approach a model that is complete and comprehensible." Domain experts must object to inadequate terms; developers must flag ambiguity. |
+| **Mechanism** | The key test of a ubiquitous language: can a domain expert read the domain layer code and recognize their domain? If the code uses different names than the glossary, the code must be refactored — not the glossary relaxed. The language evolves through experimentation with alternative expressions, followed by code refactoring to match the new model. |
+| **Where used** | `update-docs` skill — grounds the rule "verify each term matches the identifier used in the code's domain layer." `docs/glossary.md` — the glossary is explicitly secondary to the code. `docs/research/domain-modeling.md`. |
+
+---
+
+### 65. BoundedContext — Fowler Bliki
+
+| | |
+|---|---|
+| **Source** | Fowler, M. (2014). "BoundedContext." *martinfowler.com*. https://martinfowler.com/bliki/BoundedContext.html |
+| **Date** | 2014 |
+| **Alternative** | Evans (2015) DDD Reference (entry #63) — Fowler cites Evans directly |
+| **Status** | Confirmed — includes a direct Evans quote; the canonical accessible reference for Bounded Context as a design pattern |
+| **Core finding** | "Total unification of the domain model for a large system will not be feasible or cost-effective" (Evans, quoted directly). The same word can mean different things in different Bounded Contexts — this is not a defect but a reflection of domain reality. "You need a different model when the language changes." A Bounded Context is the boundary within which a particular ubiquitous language is internally consistent. Terms must be qualified by their context when a project has more than one bounded context. |
+| **Mechanism** | Fowler's electricity utility example: the word "meter" meant different things in billing, grid management, and customer service. Attempting to unify these into one definition created confusion. Each bounded context maintains its own model and its own language. Context Maps document the relationships and translation rules between bounded contexts. |
+| **Where used** | `update-docs` skill — `**Bounded context:**` field in `docs/glossary.md` entries is mandatory when the project has more than one bounded context (this is the Evans/Fowler requirement). `docs/research/domain-modeling.md`. |
+
+---
+
+### 66. Implementing Domain-Driven Design
+
+| | |
+|---|---|
+| **Source** | Vernon, V. (2013). *Implementing Domain-Driven Design*. Addison-Wesley. |
+| **Date** | 2013 |
+| **Alternative** | Evans (2003) DDD (entry #31) — Vernon explicitly builds on Evans |
+| **Status** | Confirmed — second most cited DDD book; ~5,000 citations |
+| **Core finding** | Three additions to Evans: (1) **Domain Events as first-class vocabulary** — past-tense verb phrases ("OrderPlaced," "VersionDisplayed") are part of the ubiquitous language and belong in the glossary as a distinct type. (2) **Context Maps as the organizing principle** for multi-context glossaries — each bounded context has its own language documentation; the Context Map shows translation rules between contexts. (3) **Documentation co-located with the code** — docs in the same repository decay at the same rate as the code, dramatically reducing divergence. |
+| **Mechanism** | Vernon's IDDD samples (github.com/VaughnVernon/IDDD_Samples) demonstrate all three in practice. The Product Owner / Business Analyst plays the domain-expert-representative role in glossary maintenance — validating semantic correctness — while developers own structural precision. Neither writes the glossary unilaterally. |
+| **Where used** | `update-docs` skill — `Domain Event` added as a distinct Type value in `docs/glossary.md` entries. Grounds the PO-owned glossary with SE input via `docs/adr/ADR-YYYY-MM-DD-.md` Reason: fields. `docs/research/domain-modeling.md`. |
+
+---
+
+### 67. Ubiquitous Language Is Not a Glossary — Verraes
+
+| | |
+|---|---|
+| **Source** | Verraes, M. (2013). "Ubiquitous Language Is Not a Glossary." *verraes.net*. https://web.archive.org/web/20131004/https://verraes.net/2013/04/ubiquitous-language-is-not-a-glossary/ |
+| **Date** | 2013 |
+| **Alternative** | Fowler (2006) UbiquitousLanguage (entry #64) — the same secondary-artifact point, less pointed |
+| **Status** | Confirmed — original URL is 404; widely documented through community discussion and practitioner secondary accounts; thesis is uncontested in the DDD community |
+| **Core finding** | A glossary is not a ubiquitous language. Teams that maintain a glossary but do not reflect its terms in the code have the *appearance* of a ubiquitous language without the substance. The glossary is a secondary artifact derived from the code and domain-expert conversations — not the reverse. The canonical source of truth is the domain layer code, not the glossary document. A glossary that diverges from the code is lying. |
+| **Mechanism** | The test: can a domain expert read the domain layer code and recognize their domain without a translator? If yes, the ubiquitous language exists. If the only evidence of the language is the glossary document, it does not exist. Consequence: every term added to the glossary must be verified against the corresponding code identifier. |
+| **Where used** | `update-docs` skill — grounds the checklist item "Verify each term matches the identifier used in the code's domain layer." Prevents the common failure mode of glossary-as-theatre. `docs/research/domain-modeling.md`. |
+
+---
+
+### 68. Whirlpool Process of Model Exploration — Evans
+
+| | |
+|---|---|
+| **Source** | Evans, E. (2011). *Whirlpool Process of Model Exploration*. domainlanguage.com. https://www.domainlanguage.com/ddd/whirlpool/ |
+| **Date** | 2011 |
+| **Alternative** | Brandolini, A. (2013). *Introducing EventStorming*. Leanpub. — a later, more structured alternative to Whirlpool |
+| **Status** | Confirmed — freely available; Evans' own post-2003 process guidance |
+| **Core finding** | Model exploration is a cycle: Scenario Exploring → Harvesting Abstractions → Probing the Model → Challenging the Model → back to Scenario Exploring. New vocabulary crystallizes at the Harvesting Abstractions step — concrete scenarios surface candidate terms, which are then named, defined, and reflected in the code. The glossary grows at each Harvesting Abstractions step. |
+| **Mechanism** | The Whirlpool is not a development process — it fits within most iterative processes. It is a model-exploration subprocess triggered whenever the team encounters a poorly understood domain concept. The output of each cycle is a refined model expressed in clearer language, with updated code identifiers and glossary entries. |
+| **Where used** | `update-docs` skill — grounds the timing of glossary updates: after each completed feature (Step 5) corresponds to the Harvesting Abstractions step in the Whirlpool. Discovery sessions (Stage 1) correspond to Scenario Exploring. `docs/research/domain-modeling.md`. |
+
+---
+
+## Bibliography
+
+1. Context Mapper. (2025). Rapid Object-Oriented Analysis and Design. https://contextmapper.org/docs/rapid-ooad
+2. Evans, E. (2003). *Domain-Driven Design: Tackling Complexity in the Heart of Software*. Addison-Wesley.
+3. Evans, E. (2011). *Whirlpool Process of Model Exploration*. domainlanguage.com. https://www.domainlanguage.com/ddd/whirlpool/
+4. Evans, E. (2015). *DDD Reference: Definitions and Pattern Summaries* (CC-BY). domainlanguage.com. https://www.domainlanguage.com/ddd/reference/
+5. Fowler, M. (2006). UbiquitousLanguage. martinfowler.com. https://martinfowler.com/bliki/UbiquitousLanguage.html
+6. Fowler, M. (2014). BoundedContext. martinfowler.com. https://martinfowler.com/bliki/BoundedContext.html
+7. Vernon, V. (2013). *Implementing Domain-Driven Design*. Addison-Wesley.
+8. Verraes, M. (2013). Ubiquitous Language Is Not a Glossary. verraes.net (archived). https://web.archive.org/web/20131004/https://verraes.net/2013/04/ubiquitous-language-is-not-a-glossary/
diff --git a/research/oop-design.md b/research/oop-design.md
new file mode 100644
index 0000000..2c0ae9c
--- /dev/null
+++ b/research/oop-design.md
@@ -0,0 +1,80 @@
+# Scientific Research — OOP Design
+
+Foundations for object-oriented design principles used in this template.
+
+---
+
+### 32. Object Calisthenics — Nine Rules
+
+| | |
+|---|---|
+| **Source** | Bay, J. "Object Calisthenics." *The Thoughtworks Anthology* (PragProg, 2008). Original in IEEE Software/DevX, ~2005. https://www.bennadel.com/resources/uploads/2012/objectcalisthenics.pdf |
+| **Date** | ~2005 |
+| **Status** | Practitioner synthesis |
+| **Core finding** | 9 rules to internalize OOP: (1) One level indentation per method, (2) No ELSE, (3) Wrap primitives/Strings, (4) First class collections, (5) One dot per line, (6) No abbreviations, (7) Classes ≤50 lines, (8) ≤2 instance variables, (9) No getters/setters. 7 of 9 enforce data encapsulation; 1 drives polymorphism; 1 drives naming. |
+| **Mechanism** | Restrictions force decomposition. When you cannot use getters, behavior must move into the object. When you cannot use ELSE, you use polymorphism. When classes must be ≤2 ivars, you discover missing abstractions. |
+| **Where used** | Refactor self-declaration checklist in `refactor/SKILL.md`. |
+
+---
+
+### 33. Refactoring
+
+| | |
+|---|---|
+| **Source** | Fowler, M. (1999/2018). *Refactoring: Improving the Design of Existing Code* (2nd ed.). Addison-Wesley. https://martinfowler.com/books/refactoring.html |
+| **Date** | 1999, 2018 |
+| **Status** | Confirmed — foundational |
+| **Core finding** | Refactoring = behavior-preserving transformations. 68 catalogued refactorings, each small enough to do safely but cumulative effect significant. Code smells (duplicate code, long methods, feature envy) indicate refactoring opportunities. |
+| **Mechanism** | Small steps reduce risk. Each refactoring is reversible. Test suite validates behavior unchanged. |
+| **Where used** | `refactor/SKILL.md`: smell detection triggers refactoring; full protocol and catalogue entries. |
+
+---
+
+### 34. Design Patterns
+
+| | |
+|---|---|
+| **Source** | Gamma, E., Helm, R., Johnson, R., Vlissides, J. (1995). *Design Patterns: Elements of Reusable Object-Oriented Software*. Addison-Wesley. |
+| **Date** | 1995 |
+| **Status** | Confirmed — foundational |
+| **Core finding** | 23 patterns catalogued in 3 categories: Creational (5), Structural (7), Behavioral (11). Key principles: "Favor composition over inheritance," "Program to an interface, not an implementation." |
+| **Mechanism** | Patterns are recurring solutions to common problems. Named and catalogued so developers don't rediscover them. |
+| **Where used** | `design-patterns/SKILL.md`: full GoF catalogue with smell-triggered Python before/after examples. |
+
+---
+
+### 35. SOLID Principles
+
+| | |
+|---|---|
+| **Source** | Martin, R. C. (2000). "Principles of OOD." *ButUncleBob.com*. Acronym coined by Michael Feathers (2004). https://blog.interface-solv.com/wp-content/uploads/2020/07/Principles-Of-OOD.pdf |
+| **Date** | 2000 |
+| **Status** | Confirmed |
+| **Core finding** | S: One reason to change. O: Open extension, closed modification. L: Subtypes substitutable. I: No forced stub methods. D: Depend on abstractions, not concretes. |
+| **Mechanism** | Each principle targets a specific coupling failure mode. Together they produce low coupling, high cohesion. |
+| **Where used** | Refactor self-declaration checklist in `refactor/SKILL.md`: 5-row SOLID table with Python before/after examples. |
+
+---
+
+### 36. refactoring.guru — Code Smells, Refactoring Techniques, and Design Patterns
+
+| | |
+|---|---|
+| **Source** | Shvets, A. (2014–present). *Refactoring.Guru*. https://refactoring.guru |
+| **Date** | 2014–present (continuously updated) |
+| **Status** | Practitioner synthesis — widely used reference |
+| **Core finding** | Three interconnected catalogs: (1) **22 code smells** in 5 categories (Bloaters, OO Abusers, Change Preventers, Dispensables, Couplers); (2) **~70 refactoring techniques** in 6 categories (Composing Methods, Moving Features, Organizing Data, Simplifying Conditionals, Simplifying Method Calls, Dealing with Generalization); (3) **22 GoF design patterns** with visual diagrams and multi-language examples. The unique value is the **interconnected navigation**: each smell links to the techniques that address it, and techniques link to patterns they lead toward. |
+| **Mechanism** | Navigation chain: smell → techniques → patterns. Smell categories group related structural problems (e.g., Bloaters = classes/methods grown too large; Dispensables = code that can safely be removed; Couplers = excessive dependency between classes). Each technique has a before/after structure, prerequisites, and trade-offs. |
+| **Smell categories** | **Bloaters** (Long Method, Large Class, Primitive Obsession, Long Parameter List, Data Clumps); **OO Abusers** (Switch Statements, Temporary Field, Refused Bequest, Alternative Classes with Different Interfaces); **Change Preventers** (Divergent Change, Shotgun Surgery, Parallel Inheritance Hierarchies); **Dispensables** (Comments, Duplicate Code, Lazy Class, Data Class, Dead Code, Speculative Generality); **Couplers** (Feature Envy, Inappropriate Intimacy, Message Chains, Middle Man, Incomplete Library Class) |
+| **Technique categories** | Composing Methods, Moving Features Between Objects, Organizing Data, Simplifying Conditional Expressions, Simplifying Method Calls, Dealing with Generalization |
+| **Where used** | `refactor/SKILL.md`: expanded smell table with all 5 categories. `apply-patterns/SKILL.md`: cross-reference for GoF pattern selection. |
+
+---
+
+## Bibliography
+
+1. Bay, J. (~2005). "Object Calisthenics." *IEEE Software/DevX*. https://www.bennadel.com/resources/uploads/2012/objectcalisthenics.pdf
+2. Fowler, M. (1999/2018). *Refactoring: Improving the Design of Existing Code* (2nd ed.). Addison-Wesley. https://martinfowler.com/books/refactoring.html
+3. Gamma, E., Helm, R., Johnson, R., & Vlissides, J. (1995). *Design Patterns: Elements of Reusable Object-Oriented Software*. Addison-Wesley.
+4. Martin, R. C. (2000). "Principles of OOD." *ButUncleBob.com*. https://blog.interface-solv.com/wp-content/uploads/2020/07/Principles-Of-OOD.pdf
+5. Shvets, A. (2014–present). *Refactoring.Guru*. https://refactoring.guru
diff --git a/research/refactoring-empirical.md b/research/refactoring-empirical.md
new file mode 100644
index 0000000..61d666c
--- /dev/null
+++ b/research/refactoring-empirical.md
@@ -0,0 +1,100 @@
+# Scientific Research — Refactoring (Empirical)
+
+Empirical studies on code smells, refactoring prioritization, and OOP complexity used in this template.
+
+---
+
+### 36. QDIR — Bad-Smells + OO Metrics Prioritization
+
+| | |
+|---|---|
+| **Source** | Malhotra, R., Singh, P. (2020). "Exploiting bad-smells and object-oriented characteristics to prioritize classes for refactoring." *Int. J. Syst. Assur. Eng. Manag.* 11(Suppl 2), 133–144. Springer. |
+| **Date** | 2020 |
+| **URL** | https://doi.org/10.1007/s13198-020-01001-x |
+| **Status** | Confirmed — empirical |
+| **Core finding** | QDIR (Quality Depreciation Index Rule) combines bad-smell severity with OO metrics (LOC, WMC, CBO, RFC, DIT) to prioritize classes for refactoring. Validated on 8 open-source Java systems. |
+| **Mechanism** | Classes with high smell severity AND high OO metrics are prioritized. QDIR = weighted sum. |
+| **Where used** | Refactor prioritization: when smell detected, check OO metrics to prioritize. |
+
+---
+
+### 37. Smells + Architectural Refactoring
+
+| | |
+|---|---|
+| **Source** | Silva, C. et al. (2020). "When Are Smells Indicators of Architectural Refactoring Opportunities." *Proc. 28th Int. Conf. on Program Comprehension*. ACM. |
+| **Date** | 2020 |
+| **URL** | https://doi.org/10.1145/3387904.3389276 |
+| **Status** | Confirmed — empirical |
+| **Core finding** | Study of 50 projects, 52,667 refactored elements. 67.53% of smells co-occur. Smells that co-occur are indicators of architectural refactoring in 88.53% of cases. |
+| **Mechanism** | Single smells are often code-level; co-occurring smells indicate architectural problems. Pattern catalog for smells → specific architectural refactorings. |
+| **Where used** | Smell detection triggers architectural analysis when co-occurrence patterns detected. |
+
+---
+
+### 38. SPIRIT Tool — Code Smell Prioritization
+
+| | |
+|---|---|
+| **Source** | Vidal, S. A., Marcos, C., Díaz-Pace, J. A. (2014). "An Approach to Prioritize Code Smells for Refactoring." *Automated Software Engineering*, 23(3), 501–532. |
+| **Date** | 2014 |
+| **URL** | https://doi.org/10.1007/s10515-014-0175-x |
+| **Status** | Confirmed — tool |
+| **Core finding** | SPIRIT (Smart Identification of Refactoring opportunITies) prioritizes smells by 3 criteria: (1) component stability, (2) impact on modifiability scenarios, (3) smell relevance. Top-ranked smells correlate with expert developer judgment. |
+| **Mechanism** | Semi-automated ranking. Combines version history (stable vs. unstable), impact analysis, and smell type. |
+| **Where used** | Refactor prioritization: stability = has the class changed recently? Unstable + smelly = prioritize. |
+
+---
+
+### 39. Bad Engineering Properties of OOP
+
+| | |
+|---|---|
+| **Source** | Cardelli, L. (1996). "Bad Engineering Properties of Object-Oriented Languages." *ACM Computing Surveys*, 28(4), 150. |
+| **Date** | 1996 |
+| **URL** | https://www.microsoft.com/en-us/research/publication/bad-engineering-properties-of-object-oriented-languages/ |
+| **Status** | Confirmed — foundational critique |
+| **Core finding** | OOP has 5 "economy" problems: (1) Execution (virtual methods prevent inlining), (2) Compilation (no code/interface separation), (3) Small-scale dev (expressive type systems missing), (4) Large-scale dev (poor class extension/modification), (5) Language features (baroque complexity). |
+| **Mechanism** | OOP is not universally superior. Trade-offs exist. Knowing these helps avoid over-engineering. |
+| **Where used** | Anti-pre-pattern: know when OOP adds complexity vs. value. |
+
+---
+
+### 40. Code Complexity Model of OOP
+
+| | |
+|---|---|
+| **Source** | Aluthwaththage, J. H., Thathsarani, H. A. N. N. (2024). "A Novel OO-Based Code Complexity Metric." *Proc. Future Technologies Conference (FTC)*, 616–628. Springer/IEEE. |
+| **Date** | 2024 |
+| **URL** | https://link.springer.com/chapter/10.1007/978-3-031-73125-9_39 |
+| **Alternative** | Misra et al. (2024). "A Suite of Object Oriented Cognitive Complexity Metrics." IEEE. |
+| **Status** | Partially confirmed — recent |
+| **Core finding** | CWC (Combined Weighted Complexity) measures OOP complexity at statement level, considering 8 factors: nesting depth, control types, compound conditions, try-catch, threads, pointers, references, dynamic memory. Addresses gap in existing metrics ignoring cognitive load. |
+| **Mechanism** | Granular complexity scoring. Higher scores indicate more cognitively demanding code. |
+| **Where used** | Complexity measurement: when function > 20 lines, consider CWC-style granular scoring. |
+
+---
+
+### 41. Metric Thresholds for Smell Detection
+
+| | |
+|---|---|
+| **Source** | Bigonha, M. A. S., et al. (2019). "The usefulness of software metric thresholds for detection of bad smells and fault prediction." *Information and Software Technology*, 115, 79–92. |
+| **Date** | 2019 |
+| **URL** | https://doi.org/10.1016/j.infsof.2019.08.005 |
+| **Alternative** | Catal et al. (2018). "Software metrics thresholds calculation techniques." *Info. Softw. Technol.* |
+| **Status** | Confirmed |
+| **Core finding** | Metric thresholds (e.g., LOC > 600) used for smell detection are unreliable. Study on 92 open-source systems found precision too low for practical use. Neither heuristic-based nor ML approaches achieve acceptable accuracy. |
+| **Mechanism** | Fixed thresholds are context-dependent. Thresholds should be project-specific, not universal. |
+| **Where used** | Anti-pre-pattern: do not rely on fixed thresholds. Use co-occurrence patterns (entry 37) instead. |
+
+---
+
+## Bibliography
+
+1. Aluthwaththage, J. H., & Thathsarani, H. A. N. N. (2024). A Novel OO-Based Code Complexity Metric. *Proc. Future Technologies Conference (FTC)*, 616–628. https://link.springer.com/chapter/10.1007/978-3-031-73125-9_39
+2. Bigonha, M. A. S., et al. (2019). The usefulness of software metric thresholds. *Information and Software Technology*, 115, 79–92. https://doi.org/10.1016/j.infsof.2019.08.005
+3. Cardelli, L. (1996). Bad Engineering Properties of Object-Oriented Languages. *ACM Computing Surveys*, 28(4), 150. https://www.microsoft.com/en-us/research/publication/bad-engineering-properties-of-object-oriented-languages/
+4. Malhotra, R., & Singh, P. (2020). Exploiting bad-smells and OO characteristics. *Int. J. Syst. Assur. Eng. Manag.*, 11(Suppl 2), 133–144. https://doi.org/10.1007/s13198-020-01001-x
+5. Silva, C. et al. (2020). When Are Smells Indicators of Architectural Refactoring Opportunities. *Proc. 28th ICPC*. https://doi.org/10.1145/3387904.3389276
+6. Vidal, S. A., Marcos, C., & Díaz-Pace, J. A. (2014). An Approach to Prioritize Code Smells. *Automated Software Engineering*, 23(3), 501–532. https://doi.org/10.1007/s10515-014-0175-x
diff --git a/research/requirements-elicitation.md b/research/requirements-elicitation.md
new file mode 100644
index 0000000..b272727
--- /dev/null
+++ b/research/requirements-elicitation.md
@@ -0,0 +1,246 @@
+# Scientific Research — Requirements Elicitation
+
+Foundations for the PO interview structure, Gherkin criteria, and feature discovery in this template.
+
+---
+
+### 17. INVEST Criteria for User Stories
+
+| | |
+|---|---|
+| **Source** | Wake, B. (2003). *INVEST in Good Stories, and SMART Tasks*. XP123.com. |
+| **Date** | 2003 |
+| **Alternative** | Cohn, M. (2004). *User Stories Applied: For Agile Software Development*. Addison-Wesley. |
+| **Status** | Confirmed |
+| **Core finding** | Stories that are Independent, Negotiable, Valuable, Estimable, Small, and Testable produce fewer downstream defects and smoother development cycles. |
+| **Mechanism** | INVEST serves as a quality gate before stories enter development. "Testable" forces the PO to express observable outcomes (directly enabling Given/When/Then). "Small" forces decomposition. "Independent" prevents hidden ordering dependencies. |
+| **Where used** | INVEST gate in Phase 3 of `scope/SKILL.md`. |
+
+---
+
+### 18. Example Mapping (Rules Layer)
+
+| | |
+|---|---|
+| **Source** | Wynne, M. (2015). *Introducing Example Mapping*. Cucumber Blog. https://cucumber.io/blog/bdd/example-mapping-introduction/ |
+| **Date** | 2015 |
+| **Status** | Confirmed |
+| **Core finding** | Inserting a "rules" layer between stories and examples prevents redundant or contradictory acceptance criteria. A story with many rules needs splitting; a story with many open questions is not ready for development. |
+| **Mechanism** | Four card types: Story (yellow), Rules (blue), Examples (green), Questions (red). The rules layer groups related examples under the business rule they illustrate. Red cards (unanswered questions) are a first-class signal to stop and investigate. |
+| **Where used** | `Rules (Business):` section in each `.feature` file. PO identifies business rules before writing Examples in Stage 2 Step B. |
+
+---
+
+### 19. Declarative Gherkin
+
+| | |
+|---|---|
+| **Source** | Cucumber Team. (2024). *Better Gherkin*. Cucumber Documentation. https://cucumber.io/docs/bdd/better-gherkin/ |
+| **Date** | 2024 |
+| **Status** | Confirmed |
+| **Core finding** | Declarative Gherkin ("When Bob logs in") produces specifications that survive UI changes. Imperative Gherkin ("When I click the Login button") couples specs to implementation details and breaks on every UI redesign. |
+| **Mechanism** | Declarative steps describe *what happens* at the business level. Imperative steps describe *how the user interacts with a specific UI*. AI agents are especially prone to writing imperative Gherkin because they mirror literal steps. |
+| **Where used** | Declarative vs. imperative table in Stage 2 Step B (Criteria) of `scope/SKILL.md`. |
+
+---
+
+### 20. MoSCoW Prioritization (Within-Story Triage)
+
+| | |
+|---|---|
+| **Source** | Clegg, D., & Barker, R. (1994). *Case Method Fast-Track: A RAD Approach*. Addison-Wesley (DSDM origin). |
+| **Date** | 1994 |
+| **Status** | Confirmed |
+| **Core finding** | Classifying requirements as Must/Should/Could/Won't forces explicit negotiation about what is essential vs. desired. When applied *within* a single story, it reveals bloated stories that should be split. |
+| **Mechanism** | DSDM mandates that Musts cannot exceed 60% of total effort. At the story level: if a story has 12 Examples and only 3 are Musts, the remaining 9 can be deferred. This prevents gold-plating and keeps stories small. |
+| **Where used** | MoSCoW triage in Stage 2 Step B (Criteria) of `scope/SKILL.md`. |
+
+---
+
+### 28. Active Listening — Paraphrase-Clarify-Summarize
+
+| | |
+|---|---|
+| **Source** | Rogers, C. R., & Farson, R. E. (1957). *Active Listening*. Industrial Relations Center, University of Chicago. |
+| **Date** | 1957 |
+| **Alternative** | McNaughton, D. et al. (2008). Learning to Listen. *Topics in Early Childhood Special Education*, 27(4), 223–231. |
+| **Status** | Confirmed — foundational clinical research; widely replicated |
+| **Core finding** | Active listening — paraphrasing what was heard in the listener's own words, asking clarifying questions, then summarizing the main points and intent — reduces misunderstanding, builds trust, and confirms mutual understanding before proceeding. |
+| **Mechanism** | Paraphrasing forces the listener to reconstruct the speaker's meaning, surfacing gaps immediately. Clarifying questions address residual ambiguity. Summarizing creates a shared record that both parties can confirm or correct. |
+| **Where used** | PO summarization protocol in `scope/SKILL.md`: after each interview round, PO produces a "Here is what I understood" block before proceeding. |
+
+---
+
+### 28a. Active Listening — Three-Level Structure
+
+| | |
+|---|---|
+| **Source** | Synthesis of: Nielsen (2010); Farrell (2017); Ambler (2002); Wynne (2015). |
+| **Date** | 2010–2015 |
+| **Status** | Synthesized rule of thumb — each component individually confirmed |
+| **Core finding** | Active listening in requirements interviews operates at three granularities: **Level 1** (per answer) — immediate paraphrase; **Level 2** (per topic cluster) — transition summary; **Level 3** (end of interview) — full synthesis serving four downstream purposes. |
+| **Level 3 — four uses** | 1. Accuracy gate (NN/G). 2. Scope crystallization (Ambler/FDD). 3. Input to domain modeling (Ambler/FDD). 4. Baseline trigger (Wynne/Cucumber). |
+| **Where used** | Stage 1 Discovery sessions in `scope/SKILL.md`. |
+
+---
+
+### 29. The Kipling Method — Five Ws and One H
+
+| | |
+|---|---|
+| **Source** | Kipling, R. (1902). *Just So Stories*. Macmillan. |
+| **Date** | 1902 |
+| **Alternative** | Hermagoras of Temnos (2nd century BCE) — seven circumstances of rhetoric. |
+| **Status** | Practitioner synthesis — journalism, business analysis, investigative methodology |
+| **Core finding** | The six interrogative questions (Who, What, When, Where, Why, How) form a complete framework for gathering all essential facts about any situation. Together they ensure completeness and prevent gaps. |
+| **Where used** | Stage 1 Discovery, General questions (first session): the initial seven questions are an adaptation of the 5W1H framework. |
+
+---
+
+### 30. BA Requirements Question Framework
+
+| | |
+|---|---|
+| **Source** | Brandenburg, L. (2025). *Requirements Discovery Checklist Pack*. TechCanvass. |
+| **Date** | 2025 |
+| **Status** | Practitioner synthesis — consolidated BA methodology, not peer-reviewed |
+| **Core finding** | Ten questions consistently make the most difference in requirements elicitation: (1) What problem are we solving? (2) What happens if we do nothing? (3) Who uses this? (4) What does success look like? (5) Walk me through how this works today. (6) Where does this usually break? (7) What decisions will this help? (8) What should definitely not happen? (9) What happens if input is wrong? (10) What assumptions are we making? |
+| **Where used** | Stage 1 Discovery, General questions: the "Success", "Failure", and "Out-of-scope" questions map to this framework. |
+
+---
+
+### 43. Feature-Driven Development — Domain Modeling to Feature List
+
+| | |
+|---|---|
+| **Source** | Ambler, S. W. (2002). *Agile Modeling*. Wiley. https://www.agilemodeling.com/essays/fdd.htm |
+| **Date** | 2002 |
+| **Alternative** | Palmer, S. R., & Felsing, J. M. (2002). *A Practical Guide to Feature-Driven Development*. Prentice Hall. |
+| **Status** | Confirmed |
+| **Core finding** | FDD requires domain modeling *before* feature naming. Features are expressed as "Action result object" triples. Features group into Feature Sets (shared domain object), which group into Subject Areas. |
+| **Mechanism** | Domain modeling extracts the vocabulary (nouns = candidate classes, verbs = candidate methods). Feature identification then asks: "what verbs act on each noun?" |
+| **Where used** | Stage 1 Discovery in `scope/SKILL.md`: after session synthesis, PO performs domain analysis (nouns/verbs → subject areas → FDD "Action object" feature names) for first session. |
+
+---
+
+### 44. Affinity Mapping / KJ Method — Bottom-Up Feature Identification
+
+| | |
+|---|---|
+| **Source** | Krause, R., & Pernice, K. (2024). Affinity Diagramming. *Nielsen Norman Group*. https://www.nngroup.com/articles/affinity-diagram/ |
+| **Date** | 2024 (method origin: Kawakita, J., 1960s) |
+| **Alternative** | Kawakita, J. (1967). *Abduction*. Chuokoronsha. |
+| **Status** | Confirmed |
+| **Core finding** | Affinity diagramming groups raw observations/requirements into clusters by bottom-up similarity — no categories are named until grouping is complete. This prevents confirmation bias from top-down pre-labelling. |
+| **Where used** | Stage 1 Discovery in `scope/SKILL.md` (alternative to FDD domain modeling): PO uses affinity mapping on interview answers to derive feature clusters. Best suited when working from interview transcripts solo. |
+
+---
+
+### 45. Event Storming — Domain Events to Functional Areas
+
+| | |
+|---|---|
+| **Source** | Brandolini, A. (2013–present). *EventStorming*. Leanpub / eventstorming.com. https://eventstorming.com |
+| **Date** | 2013 |
+| **Status** | Confirmed |
+| **Core finding** | Event Storming is a collaborative workshop where domain experts place past-tense domain events on a timeline. Sorting the events creates natural Functional Area clusters — these are candidate feature groups. The workshop also produces Ubiquitous Language, a Problem Inventory, and Actor roles. |
+| **Mechanism** | Temporal sequencing of domain events forces resolution of conflicting mental models across organisational silos. Clusters emerge from shared vocabulary and causal proximity. |
+| **Where used** | Optional alternative in Stage 1 Discovery in `scope/SKILL.md` for cross-silo discovery. |
+
+---
+
+### 46. Critical Incident Technique — Gap-Finding via Past Events
+
+| | |
+|---|---|
+| **Source** | Flanagan, J. C. (1954). "The critical incident technique." *Psychological Bulletin*, 51(4), 327–357. https://doi.org/10.1037/h0061470 |
+| **Date** | 1954 |
+| **Alternative** | Rosala, M. (2020). The Critical Incident Technique in UX. *Nielsen Norman Group*. https://www.nngroup.com/articles/critical-incident-technique/ |
+| **Status** | Confirmed — foundational; ~200 follow-on empirical studies |
+| **Core finding** | Anchoring an interview on a specific past incident ("Tell me about a time when X broke down") breaks schema-based recall. Stakeholders describing actual past events report real workarounds, edge cases, and failure modes that never surface when asked "how does this usually work?" |
+| **Mechanism** | Direct questions elicit the stakeholder's mental schema — a sanitized, gap-free description of how things *should* work. Incidents bypass the schema because episodic memory is anchored to specific sensory and emotional detail. |
+| **Where used** | Cross-cutting and per-feature questions (gap-finding) in Stage 1 Discovery in `scope/SKILL.md`. |
+
+---
+
+### 47. Cognitive Interview — Memory-Enhancing Elicitation Technique
+
+| | |
+|---|---|
+| **Source** | Fisher, R. P., & Geiselman, R. E. (1992). *Memory-Enhancing Techniques for Investigative Interviewing: The Cognitive Interview*. Charles C. Thomas. |
+| **Date** | 1984 (original); 1987 (enhanced CI); 1992 (manual) |
+| **Alternative** | Moody, W., Will, R. P., & Blanton, J. E. (1996). Enhancing knowledge elicitation using the cognitive interview. *Expert Systems with Applications*, 10(1), 127–133. |
+| **Status** | Confirmed — meta-analysis: Köhnken et al. (1999), *Psychology, Crime & Law*, 5(1-2), 3–27. |
+| **Core finding** | The enhanced CI elicits ~35% more correct information than standard interviews with equal accuracy rates. |
+| **Mechanism** | Four retrieval mnemonics: (1) mental reinstatement of context; (2) report everything; (3) temporal reversal; (4) perspective change. Each mnemonic opens a different memory access route, collectively surfacing what direct questions cannot. |
+| **Where used** | Cross-cutting and per-feature questions (gap-finding) in Stage 1 Discovery in `scope/SKILL.md`. |
+
+---
+
+### 48. Laddering / Means-End Chain — Surfacing Unstated Motivations
+
+| | |
+|---|---|
+| **Source** | Reynolds, T. J., & Gutman, J. (1988). "Laddering theory, method, analysis, and interpretation." *Journal of Advertising Research*, 28(1), 11–31. |
+| **Date** | 1988 |
+| **Status** | Confirmed — operationalised in IS research (Hunter & Beck 2000) |
+| **Core finding** | Repeatedly asking "Why is that important to you?" climbs a means-end chain from concrete attribute → functional consequence → psychosocial consequence → terminal value. The stakeholder's first answer is rarely the real constraint. |
+| **Mechanism** | The Gherkin "So that [benefit]" clause is structurally a single-rung means-end ladder. Full laddering reveals value conflicts between stakeholders whose surface requirements look identical but whose ladders diverge at the consequence level. |
+| **Where used** | Cross-cutting and per-feature questions (gap-finding) in Stage 1 Discovery in `scope/SKILL.md`. |
+
+---
+
+### 49. Funnel Technique — Question Ordering to Prevent Priming
+
+| | |
+|---|---|
+| **Source** | Rosala, M., & Moran, K. (2022). The Funnel Technique in Qualitative User Research. *Nielsen Norman Group*. https://www.nngroup.com/articles/the-funnel-technique-in-qualitative-user-research/ |
+| **Date** | 2022 |
+| **Alternative** | Christel, M. G., & Kang, K. C. (1992). *Issues in Requirements Elicitation*. CMU/SEI-92-TR-012. |
+| **Status** | Confirmed — standard NNG qualitative research protocol |
+| **Core finding** | Starting with broad open-ended questions before narrowing to specifics prevents the interviewer from priming the interviewee's responses. |
+| **Mechanism** | Priming bias is structural: any category name the interviewer introduces activates a schema that filters what the interviewee considers worth reporting. The funnel sequences questions so the interviewee's own categories emerge first. |
+| **Where used** | Within each Stage 1 Discovery session in `scope/SKILL.md`. |
+
+---
+
+### 50. Issues in Requirements Elicitation — Why Direct Questions Fail
+
+| | |
+|---|---|
+| **Source** | Christel, M. G., & Kang, K. C. (1992). *Issues in Requirements Elicitation*. CMU/SEI-92-TR-012. Software Engineering Institute, Carnegie Mellon University. https://www.sei.cmu.edu/library/abstracts/reports/92tr012.cfm |
+| **Date** | 1992 |
+| **Alternative** | Sommerville, I., & Sawyer, P. (1997). *Requirements Engineering: A Good Practice Guide*. Wiley. |
+| **Status** | Confirmed — foundational SEI technical report |
+| **Core finding** | Stakeholders have three structural problems that make direct questioning insufficient: (1) they omit information that is "obvious" to them; (2) they have trouble communicating needs they have never had to articulate; (3) they may not know what they want until they see what they don't want. |
+| **Mechanism** | Expert knowledge is largely procedural and tacit. When asked "how does the system work?", experts describe what they believe happens, not what actually happens. Gap-finding techniques are required because they bypass the expert's mental schema. |
+| **Where used** | Theoretical justification for the 3-session interview structure and use of CIT, CI, and Laddering in `scope/SKILL.md`. |
+
+---
+
+## Bibliography
+
+1. Ambler, S. W. (2002). *Agile Modeling*. Wiley. https://www.agilemodeling.com/essays/fdd.htm
+2. Brandenburg, L. (2025). *Requirements Discovery Checklist Pack*. TechCanvass.
+3. Brandolini, A. (2013–present). *EventStorming*. https://eventstorming.com
+4. Christel, M. G., & Kang, K. C. (1992). *Issues in Requirements Elicitation*. CMU/SEI-92-TR-012. https://www.sei.cmu.edu/library/abstracts/reports/92tr012.cfm
+5. Clegg, D., & Barker, R. (1994). *Case Method Fast-Track: A RAD Approach*. Addison-Wesley.
+6. Cohn, M. (2004). *User Stories Applied*. Addison-Wesley.
+7. Cucumber Team. (2024). Better Gherkin. https://cucumber.io/docs/bdd/better-gherkin/
+8. Farrell, S. (2017). UX Research Cheat Sheet. *Nielsen Norman Group*. https://www.nngroup.com/articles/ux-research-cheat-sheet/
+9. Fisher, R. P., & Geiselman, R. E. (1992). *Memory-Enhancing Techniques for Investigative Interviewing*. Charles C. Thomas.
+10. Flanagan, J. C. (1954). The critical incident technique. *Psychological Bulletin*, 51(4), 327–357. https://doi.org/10.1037/h0061470
+11. Kawakita, J. (1967). *Abduction*. Chuokoronsha.
+12. Kipling, R. (1902). *Just So Stories*. Macmillan.
+13. Köhnken, G., Milne, R., Memon, A., & Bull, R. (1999). The cognitive interview: A meta-analysis. *Psychology, Crime & Law*, 5(1-2), 3–27.
+14. Krause, R., & Pernice, K. (2024). Affinity Diagramming. *Nielsen Norman Group*. https://www.nngroup.com/articles/affinity-diagram/
+15. McNaughton, D. et al. (2008). Learning to Listen. *Topics in Early Childhood Special Education*, 27(4), 223–231.
+16. Moody, W., Will, R. P., & Blanton, J. E. (1996). Enhancing knowledge elicitation using the cognitive interview. *Expert Systems with Applications*, 10(1), 127–133.
+17. Nielsen, J. (2010). *Interviewing Users*. Nielsen Norman Group. https://www.nngroup.com/articles/interviewing-users/
+18. Palmer, S. R., & Felsing, J. M. (2002). *A Practical Guide to Feature-Driven Development*. Prentice Hall.
+19. Reynolds, T. J., & Gutman, J. (1988). Laddering theory, method, analysis, and interpretation. *Journal of Advertising Research*, 28(1), 11–31.
+20. Rogers, C. R., & Farson, R. E. (1957). *Active Listening*. Industrial Relations Center, University of Chicago.
+21. Rosala, M. (2020). The Critical Incident Technique in UX. *Nielsen Norman Group*. https://www.nngroup.com/articles/critical-incident-technique/
+22. Rosala, M., & Moran, K. (2022). The Funnel Technique. *Nielsen Norman Group*. https://www.nngroup.com/articles/the-funnel-technique-in-qualitative-user-research/
+23. Wake, B. (2003). INVEST in Good Stories, and SMART Tasks. *XP123.com*.
+24. Wynne, M. (2015). Introducing Example Mapping. *Cucumber Blog*. https://cucumber.io/blog/bdd/example-mapping-introduction/
diff --git a/research/software-economics.md b/research/software-economics.md
new file mode 100644
index 0000000..becd695
--- /dev/null
+++ b/research/software-economics.md
@@ -0,0 +1,24 @@
+# Scientific Research — Software Economics
+
+Foundations for the shift-left, early defect detection, and workflow ordering decisions in this template.
+
+---
+
+### 16. Cost of Change Curve (Shift Left)
+
+| | |
+|---|---|
+| **Source** | Boehm, B. W. (1981). *Software Engineering Economics*. Prentice-Hall. |
+| **Date** | 1981 |
+| **Alternative** | Boehm, B., & Papaccio, P. N. (1988). Understanding and controlling software costs. *IEEE Transactions on Software Engineering*, 14(10), 1462–1477. |
+| **Status** | Confirmed |
+| **Core finding** | The cost to fix a defect multiplies by roughly 10x per SDLC phase: requirements (1x) → design (5x) → coding (10x) → testing (20x) → production (200x). A defect caught during requirements costs 200x less than the same defect found after release. |
+| **Mechanism** | Defects compound downstream: a wrong requirement becomes a wrong design, which becomes wrong code, which becomes wrong tests, all of which must be unwound. Catching errors at the source eliminates the entire cascade. This is the empirical foundation for "shift left" — investing earlier in quality always dominates fixing later. |
+| **Where used** | Justifies the multi-session PO elicitation model: every acceptance criterion clarified at scope prevents 10–200x rework downstream. Also justifies the adversarial pre-mortem at the end of each elicitation cycle, and the adversarial mandate in `verify/SKILL.md`. The entire 5-step pipeline is ordered to surface defects at the earliest (cheapest) phase. |
+
+---
+
+## Bibliography
+
+1. Boehm, B. W. (1981). *Software Engineering Economics*. Prentice-Hall.
+2. Boehm, B., & Papaccio, P. N. (1988). Understanding and controlling software costs. *IEEE Transactions on Software Engineering*, 14(10), 1462–1477.
diff --git a/research/testing.md b/research/testing.md
new file mode 100644
index 0000000..6ebdd87
--- /dev/null
+++ b/research/testing.md
@@ -0,0 +1,137 @@
+# Scientific Research — Testing
+
+Foundations for test design, TDD, BDD, and property-based testing used in this template.
+
+---
+
+### 11. Observable Behavior Testing
+
+| | |
+|---|---|
+| **Source** | Fowler, M. (2018). *The Practical Test Pyramid*. Thoughtworks. https://martinfowler.com/articles/practical-test-pyramid.html |
+| **Date** | 2018 |
+| **Status** | Confirmed |
+| **Core finding** | Tests should answer "if I enter X and Y, will the result be Z?" — not "will method A call class B first?" |
+| **Mechanism** | A test is behavioral if its assertion describes something a caller/user can observe without knowing the implementation. The test should still pass if you completely rewrite the internals. |
+| **Where used** | Contract test rule in `implementation/SKILL.md`: "Write every test as if you cannot see the production code." |
+
+---
+
+### 12. Test-Behavior Alignment
+
+| | |
+|---|---|
+| **Source** | Google Testing Blog (2013). *Testing on the Toilet: Test Behavior, Not Implementation*. |
+| **Date** | 2013 |
+| **Status** | Confirmed |
+| **Core finding** | Test setup may need to change if implementation changes, but the actual test shouldn't need to change if the code's user-facing behavior doesn't change. |
+| **Mechanism** | Tests that are tightly coupled to implementation break on refactoring and become a drag on design improvement. Behavioral tests survive internal rewrites. |
+| **Where used** | Contract test rule in `implement/SKILL.md`, system-architect verification check in `verify/SKILL.md`. |
+
+---
+
+### 13. Tests as First-Class Citizens
+
+| | |
+|---|---|
+| **Source** | Martin, R. C. (2017). *First-Class Tests*. Clean Coder Blog. |
+| **Date** | 2017 |
+| **Status** | Confirmed |
+| **Core finding** | Tests should be treated as first-class citizens of the system — not coupled to implementation. Bad tests are worse than no tests because they give false confidence. |
+| **Mechanism** | Tests written as "contract tests" — describing what the caller observes — remain stable through refactoring. Tests that verify implementation details are fragile and create maintenance burden. |
+| **Where used** | Contract test rule in `implement/SKILL.md`, verification check in `verify/SKILL.md`. |
+
+---
+
+### 14. Property-Based Testing (Invariant Discovery)
+
+| | |
+|---|---|
+| **Source** | MacIver, D. R. (2016). *What is Property Based Testing?* Hypothesis. https://hypothesis.works/articles/what-is-property-based-testing/ |
+| **Date** | 2016 |
+| **Status** | Confirmed |
+| **Core finding** | Property-based testing is "the construction of tests such that, when these tests are fuzzed, failures reveal problems that could not have been revealed by direct fuzzing." Property tests test *invariants* — things that must always be true about the contract. |
+| **Mechanism** | Meaningful property tests assert invariants: `assert Score(x).value >= 0` tests the contract. Tautological tests assert reconstruction: `assert Score(x).value == x` tests the implementation. |
+| **Where used** | Meaningful vs. Tautological table in `implementation/SKILL.md`. |
+
+---
+
+### 15. Mutation Testing (Test Quality Verification)
+
+| | |
+|---|---|
+| **Source** | King, K. N. (1991). *The Gamma (formerly mutants)*. |
+| **Date** | 1991 |
+| **Alternative** | Mutation testing tools: Cosmic Ray, mutmut (Python) |
+| **Status** | Confirmed |
+| **Core finding** | A meaningful test fails when a mutation (small deliberate code change) is introduced. A tautological test passes even with mutations because it doesn't constrain the behavior. |
+| **Mechanism** | If a test survives every mutation of the production code without failing, it tests nothing. Only tests that fail on purposeful "damage" to the code are worth keeping. |
+| **Where used** | Implicitly encouraged: tests must describe contracts, not implementation, which is the theoretical complement to mutation testing. |
+
+---
+
+### 51. Canon TDD — Authoritative Red-Green-Refactor Definition
+
+| | |
+|---|---|
+| **Source** | Beck, K. (2023). "Canon TDD." *tidyfirst.substack.com*. December 11, 2023. https://tidyfirst.substack.com/p/canon-tdd |
+| **Date** | 2023 |
+| **Alternative** | Fowler, M. (2023). "Test Driven Development." *martinfowler.com*. https://martinfowler.com/bliki/TestDrivenDevelopment.html |
+| **Status** | Confirmed — canonical source; explicitly authored to stop strawman critiques |
+| **Core finding** | The canonical TDD loop is: (1) write a list of test scenarios; (2) convert exactly one item into a runnable test; (3) make it pass; (4) optionally refactor; (5) repeat. Writing all test code before any implementation is an explicit anti-pattern. |
+| **Mechanism** | The interleaving of test-writing and implementation is not cosmetic — each test drives interface decisions at the moment they are cheapest to make. |
+| **Where used** | Justifies one-@id-at-a-time interleaved TDD in Step 3 of `implementation/SKILL.md`. |
+
+---
+
+### 52. GOOS — Outer/Inner TDD Loop
+
+| | |
+|---|---|
+| **Source** | Freeman, S., & Pryce, N. (2009). *Growing Object-Oriented Software, Guided by Tests*. Addison-Wesley. |
+| **Date** | 2009 |
+| **Status** | Confirmed — canonical ATDD/BDD integration model |
+| **Core finding** | Acceptance tests and unit tests operate at two separate, nested timescales. The outer loop: write one failing acceptance test before any implementation. The inner loop: drive implementation with unit-level Red-Green-Refactor cycles until the acceptance test passes. |
+| **Mechanism** | The outer loop provides direction (what to build); the inner loop provides momentum (how to build it). The acceptance test stays red throughout all inner cycles and goes green only when the feature is complete. |
+| **Where used** | Justifies the two-level structure in Step 3: outer loop per `@id` acceptance test, inner loop per unit. |
+
+---
+
+### 53. Is TDD Dead? — Anti-Bureaucracy Evidence
+
+| | |
+|---|---|
+| **Source** | Beck, K., Fowler, M., & Hansson, D. H. (2014). "Is TDD Dead?" Video series. *martinfowler.com*. https://martinfowler.com/articles/is-tdd-dead/ |
+| **Date** | 2014 |
+| **Status** | Confirmed — primary evidence for what TDD practitioners reject as overhead |
+| **Core finding** | Per-cycle human reviewer gates, per-cycle checklists, and tests with zero delta coverage are all explicitly identified as harmful overhead. The green bar is the quality gate — not a checklist. |
+| **Mechanism** | Administrative overhead added to TDD workflows increases the cost per cycle without increasing coverage or catching defects. The optimal TDD loop is as lean as productive. |
+| **Where used** | Justifies removing per-test reviewer gates. Self-declaration moves to end-of-feature (once), preserving accountability at feature granularity without interrupting cycle momentum. |
+
+---
+
+### 54. Introducing BDD — Behavioural-Driven Development Origin
+
+| | |
+|---|---|
+| **Source** | North, D. (2006). "Introducing BDD." *Better Software Magazine*. https://dannorth.net/introducing-bdd/ |
+| **Date** | 2006 |
+| **Alternative** | Fowler, M. (2013). "Given When Then." *martinfowler.com*. https://martinfowler.com/bliki/GivenWhenThen.html |
+| **Status** | Confirmed — primary BDD source |
+| **Core finding** | BDD evolved directly from TDD to address persistent practitioner confusion. BDD reframes TDD vocabulary around observable behavior: scenarios instead of tests, Given-When-Then instead of Arrange-Act-Assert. |
+| **Mechanism** | "Given" captures preconditions (Arrange), "When" captures the triggering event (Act), "Then" captures the observable outcome (Assert). Translating to G/W/T shifts focus from implementation mechanics to user-observable behavior. |
+| **Where used** | Theoretical link between Gherkin `@id` Examples (Step 1 output) and the TDD inner loop (Step 3). |
+
+---
+
+## Bibliography
+
+1. Beck, K. (2023). "Canon TDD." *tidyfirst.substack.com*. https://tidyfirst.substack.com/p/canon-tdd
+2. Beck, K., Fowler, M., & Hansson, D. H. (2014). "Is TDD Dead?" *martinfowler.com*. https://martinfowler.com/articles/is-tdd-dead/
+3. Fowler, M. (2018). *The Practical Test Pyramid*. https://martinfowler.com/articles/practical-test-pyramid.html
+4. Freeman, S., & Pryce, N. (2009). *Growing Object-Oriented Software, Guided by Tests*. Addison-Wesley.
+5. Google Testing Blog. (2013). Testing on the Toilet: Test Behavior, Not Implementation.
+6. King, K. N. (1991). *The Gamma (formerly mutants)*.
+7. MacIver, D. R. (2016). What is Property Based Testing? *Hypothesis*. https://hypothesis.works/articles/what-is-property-based-testing/
+8. Martin, R. C. (2017). First-Class Tests. *Clean Coder Blog*.
+9. North, D. (2006). Introducing BDD. *Better Software Magazine*. https://dannorth.net/introducing-bdd/
diff --git a/research/version-control.md b/research/version-control.md
new file mode 100644
index 0000000..195e9cc
--- /dev/null
+++ b/research/version-control.md
@@ -0,0 +1,57 @@
+# Version Control & Branching Strategies
+
+## 63. Pro Git — Scott Chacon & Ben Straub
+
+**Source**: Chacon, S., & Straub, B. (2014). *Pro Git* (2nd ed.). Apress. Free online: https://git-scm.com/book
+
+**Key Insight**: Git's distributed model makes branching and merging cheap daily operations, not rare scary events. The book covers the full Git object model (blobs, trees, commits, refs), which explains why operations like `rebase` rewrite history while `revert` appends it — critical for our "no history rewrite" safety protocol.
+
+**Relevance**: Foundation for all Git operations in the project. The object model chapter explains why `git revert` is safe on shared branches while `rebase` is not.
+
+---
+
+## 64. A Successful Git Branching Model — Vincent Driessen
+
+**Source**: Driessen, V. (2010). A successful Git branching model. https://nvie.com/posts/a-successful-git-branching-model/
+
+**Key Insight**: The "git-flow" model defines `master`/`develop` as infinite-lifetime branches, with `feature/*`, `release/*`, and `hotfix/*` as short-lived supporting branches. The `--no-ff` merge is explicitly recommended to preserve feature boundaries in history, making whole-feature reverts possible.
+
+> "The `--no-ff` flag causes the merge to always create a new commit object, even if the merge could be performed with a fast-forward. This avoids losing information about the historical existence of a feature branch."
+
+**Relevance**: Direct basis for our branch model. We use `feat/` and `fix/` branches, merge to `main` with `--no-ff`, and delete branches after merge.
+
+---
+
+## 65. Git Cheat Sheet — Git SCM
+
+**Source**: Git SCM. Git Cheat Sheet. https://git-scm.com/cheat-sheet
+
+**Key Insight**: Quick reference for everyday commands. Covers `git merge-tree` for conflict detection without touching working tree, `git log --follow` for renamed files, and `git reflog` for recovery — all relevant to our workflow.
+
+**Relevance**: Operational reference for the SE when executing branch operations.
+
+---
+
+## 66. Common Git Issues & Anti-Patterns
+
+**Source**: Fowler, M. (2013). Patterns for Managing Source Code Branches. https://martinfowler.com/articles/branching-patterns.html
+
+**Key Insight**: Fowler contrasts "feature branching" (short-lived branches, frequent integration) with "release branching" (long-lived stabilization branches). Our model is feature branching: branches live only for the duration of one feature, then merge to `main`.
+
+**Anti-patterns to avoid**:
+- **Long-lived feature branches**: increase merge conflict risk and integration pain
+- **Force push on shared branches**: destroys history that others may have fetched
+- **Squash merge on collaborative branches**: erases individual commit authorship and makes bisect harder
+- **Committing directly to main**: bypasses review and breaks the closed loop
+
+**Relevance**: Validates our WIP=1 approach and our safety protocol against force push and history rewrite.
+
+---
+
+## 67. Merge vs. Rebase — When to Use Each
+
+**Source**: Atlassian Git Tutorial. Merging vs. Rebasing. https://www.atlassian.com/git/tutorials/merging-vs-rebasing
+
+**Key Insight**: Rebase rewrites commit history by replaying commits on top of a new base. This is fine for local, unpushed branches but dangerous for shared branches because it changes commit SHAs that others may reference. Merge preserves history but creates merge commits.
+
+**Our rule**: Never rebase a pushed branch. Use `git merge main` on the feature branch to resolve conflicts, then `--no-ff` merge the feature branch to `main`.
diff --git a/scope_journal.md b/scope_journal.md
new file mode 100644
index 0000000..6fe6902
--- /dev/null
+++ b/scope_journal.md
@@ -0,0 +1,32 @@
+# Scope Journal:
+
+---
+
+## YYYY-MM-DD — Session 1
+Status: IN-PROGRESS
+
+### General
+
+| ID | Question | Answer |
+|----|----------|--------|
+| Q1 | Who are the users? | ... |
+| Q2 | What does the product do at a high level? | ... |
+| Q3 | Why does it exist — what problem does it solve? | ... |
+| Q4 | When and where is it used? | ... |
+| Q5 | Success — what does "done" look like? | ... |
+| Q6 | Failure — what must never happen? | ... |
+| Q7 | Out-of-scope — what are we explicitly not building? | ... |
+
+###
+
+| ID | Question | Answer |
+|----|----------|--------|
+| Q8 | ... | ... |
+
+### Feature:
+
+| ID | Question | Answer |
+|----|----------|--------|
+| Q9 | ... | ... |
+
+Status: COMPLETE
diff --git a/system.md b/system.md
new file mode 100644
index 0000000..efb9650
--- /dev/null
+++ b/system.md
@@ -0,0 +1,40 @@
+# System:
+
+> Last updated: YYYY-MM-DD —
+
+**Purpose:**
+
+---
+
+## Actors
+
+| Actor | Needs |
+|-------|-------|
+| | |
+
+---
+
+## Structure
+
+| Module | Responsibility |
+|--------|----------------|
+| | |
+
+---
+
+## Key Decisions
+
+-
+
+---
+
+## External Dependencies
+
+| Dependency | What it provides | Why not replaced |
+|------------|------------------|-----------------|
+
+---
+
+## Active Constraints
+
+-
diff --git a/tests/report.html b/tests/report.html
new file mode 100644
index 0000000..8b55787
--- /dev/null
+++ b/tests/report.html
@@ -0,0 +1,1096 @@
+
+
+
+
+ Test Report
+
+
+
+
+
Test Report
+
Report generated on 21-Apr-2026 at 17:43:00 by pytest-html
+ v4.2.0
+
+
Environment
+
+
+
+
+
+
+
+
+
+
+
+
+
No results found. Check the filters.
+
+
+
+
+
+
+
+
+
+
+
+
+
<
+
+
+
+
+
>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Summary
+
+
+
1 test took 00:00:01.
+
(Un)check the boxes to filter the results.
+
+
+
There are still tests running. Reload this page to get the latest results!