{
    "@context": "https://schema.org",
    "@graph": [
        {
            "@type": "WebSite",
            "@id": "https://chengmarc.com/#website",
            "url": "https://chengmarc.com/",
            "name": "Zhongmang (Marc) Cheng",
            "inLanguage": "en"
        },
        {
            "@type": "ProfilePage",
            "@id": "https://chengmarc.com/#webpage",
            "url": "https://chengmarc.com/",
            "name": "Zhongmang (Marc) Cheng",
            "isPartOf": { "@id": "https://chengmarc.com/#website" },
            "about": { "@id": "https://chengmarc.com/#person" },
            "description": "Personal website of Zhongmang (Marc) Cheng."
        },
        {
            "@type": "Person",
            "@id": "https://chengmarc.com/#person",

            "name": "Zhongmang (Marc) Cheng",
            "alternateName": ["Marc Cheng", "Zhongmang Cheng", "chengmarc"],
            "givenName": "Zhongmang",
            "additionalName": "Marc",
            "familyName": "Cheng",

            "url": "https://chengmarc.com/",
            "contactPoint": [
                {
                    "@type": "ContactPoint",
                    "contactType": "industry",
                    "email": "marccheng@outlook.com"
                },
                {
                    "@type": "ContactPoint",
                    "contactType": "research",
                    "email": "marc.cheng@alumni.utoronto.ca"
                }
            ],
            "sameAs": [
                "https://www.wikidata.org/wiki/Q138601247",
                "https://github.com/chengmarc",
                "https://www.linkedin.com/in/chengmarc/",
                "https://orcid.org/0009-0008-1783-514X",
                "https://openreview.net/profile?id=%7EMarc_Cheng1",
                "https://www.researchgate.net/profile/Zhongmang-Cheng",
                "https://www.instagram.com/marcc.cheng/",
                "https://space.bilibili.com/1351785",
                "https://www.xiaohongshu.com/user/profile/5fea4e03000000000100ad52",
                "https://huggingface.co/chengmarc",
                "https://x.com/chengmarc_",
                "https://farcaster.xyz/chengmarc",
                "https://paragraph.com/@chengmarc",
                "https://steamcommunity.com/id/everwith/"
            ],

            "knowsLanguage": ["English", "French", "Chinese"],

            "alumniOf": [
                {
                    "@type": "CollegeOrUniversity",
                    "name": "University of Toronto",
                    "sameAs": "https://www.utoronto.ca/"
                },
                {
                    "@type": "CollegeOrUniversity",
                    "name": "Dawson College",
                    "sameAs": "https://www.dawsoncollege.qc.ca/"
                }
            ],

            "hasCredential": [
                {
                    "@type": "EducationalOccupationalCredential",
                    "credentialCategory": "degree",
                    "name": "Honours Bachelor of Science in Mathematics",
                    "description": "Mathematics Specialist (Probability/Statistics) and Mathematics Major, conferred with High Distinction. Cumulative GPA 3.6. Dean's List: 2021 Winter, 2022 Summer, 2023 Winter.",
                    "dateCreated": "2023-06",
                    "recognizedBy": {
                        "@type": "CollegeOrUniversity",
                        "name": "University of Toronto"
                    }
                },
                {
                    "@type": "EducationalOccupationalCredential",
                    "credentialCategory": "degree",
                    "name": "Diplôme d'études collégiales, Science (200.B0)",
                    "description": "Pre-university Science program, 58.66 credits. R-Score: 31.688.",
                    "dateCreated": "2019-06",
                    "recognizedBy": {
                        "@type": "CollegeOrUniversity",
                        "name": "Dawson College"
                    }
                },
                {
                    "@type": "EducationalOccupationalCredential",
                    "credentialCategory": "certificate",
                    "name": "Pleasure Craft Operator Card (PCOC)",
                    "description": "Canadian federal boating certification; completed Canada Boat Ed course.",
                    "recognizedBy": {
                        "@type": "GovernmentOrganization",
                        "name": "Transport Canada"
                    }
                },
                {
                    "@type": "EducationalOccupationalCredential",
                    "credentialCategory": "license",
                    "name": "Driver's License",
                    "description": "Ontario Class G (Canada); converted to China C1.",
                    "recognizedBy": {
                        "@type": "GovernmentOrganization",
                        "name": "Ontario Ministry of Transportation"
                    }
                }
            ],

            "award": [
                "Honours Bachelor of Science with High Distinction, University of Toronto (2023)",
                "Dean's List, University of Toronto (2021 Winter, 2022 Summer, 2023 Winter)",
                "Concours Opti-math, 5th place provincial (Université Laval, 2015)",
                "Concours Opti-math, 7th place provincial (Université Laval, 2014)",
                "Gauss Mathematics Contest, Distinction (University of Waterloo, 2013)"
            ],

            "worksFor": [
                {
                    "@type": "OrganizationRole",
                    "roleName": "Software Architect",
                    "startDate": "2025-06",
                    "description": "Architect and builder of a live signal delivery service for DoubleTrends. Used Claude Code and Codex as production coding agents; maintained context discipline across agents to prevent cascading refactors during live system changes. Designed a decoupled signal-and-delivery architecture on Cloudflare Workers + D1; enforced read-only access from the delivery layer to preserve signal accuracy. Implemented S&P 500, VIX, and FRED macro inputs as a daily regime state machine; abstracted regime classification as a first-class primitive extensible to new financial products. Modeled subscriber lifecycle (active, trial, lapsed) as a state machine with D1 as SSoT; routed Stripe webhooks and Telegram signals as state transition inputs to reduce debugging to a targeted D1 state lookup. System runs in production: delivers daily payloads to paying subscribers via Telegram and processes subscription billing through Stripe.",
                    "worksFor": {
                        "@type": "Organization",
                        "name": "DoubleTrends L.L.C.",
                        "url": "https://doubletrends.com"
                    }
                },
                {
                    "@type": "OrganizationRole",
                    "roleName": "Product Data Specialist",
                    "startDate": "2023-06",
                    "endDate": "2025-05",
                    "description": "Owned end-to-end implementation of Informatica P360 within a multi-system enterprise pipeline (AS400 → TIBCO → P360 → SAP Hybris → Webshop), managing 200,000+ SKU records. Designed data models, attribute classification rules, and workflow logic to standardize product data across the pipeline. Served as the primary data interface between ERP, PIM, and e-commerce platforms; audited and resolved cross-system inconsistencies affecting taxonomy, attribute completeness, and downstream accuracy. Coordinated cross-functional acceptance testing across Canada, India, and France to validate data integrity before go-live. Developed ETL automation scripts using AWS CLI and Bash for batch validation and pipeline monitoring across daily ingestion cycles.",
                    "worksFor": {
                        "@type": "Organization",
                        "name": "Rexel Canada",
                        "url": "https://www.rexel.ca"
                    }
                },
                {
                    "@type": "OrganizationRole",
                    "roleName": "Data Analyst (Co-op)",
                    "startDate": "2021-07",
                    "endDate": "2022-04",
                    "description": "Applied KNN, K-Means, and XGBoost to assess production anti-money laundering (AML) classification models for Scotiabank under SR 11-7. Evaluated feature engineering quality, behavioral segmentation logic, and statistical robustness on 1M–20M-record Oracle transaction datasets. Audited model pipelines and cross-validated classification outputs using Python, R, and SQL to verify behavioral profile accuracy against expected risk profiles. Evaluated loyalty segmentation models (LoyaltyOne) via SSMS; audited behavioral profile construction logic and pipeline integrity for downstream scoring accuracy. Verified derivatives valuation and pricing models using Bloomberg Terminal; cross-referenced outputs against live market data to assess pricing assumption validity.",
                    "worksFor": {
                        "@type": "Organization",
                        "name": "Deloitte Canada",
                        "url": "https://www2.deloitte.com/ca/en.html"
                    }
                }
            ],

            "knowsAbout": ["DeFi", "Mechanism Design", "Protocol Research", "AMM", "Perpetual Contracts", "Blockchain", "EVM", "Smart Contracts", "Solidity", "Machine Learning", "LLM Architectures", "State Space Models", "Neural Network Interpretability", "Agent Orchestration", "Agentic Workflows", "Data Engineering", "Data Warehousing", "ETL", "Master Data Management", "Product Information Management", "Data Quality", "Data Governance", "Big Data", "Hadoop", "PySpark", "Anti-Money Laundering", "Model Validation", "Model Risk Management", "Quantitative Finance", "Monte Carlo Simulation", "Mathematics", "Statistics", "Probability", "Time Series Analysis", "Python", "PyTorch", "scikit-learn", "numpy", "pandas", "TypeScript", "Node.js", "SQL", "R", "Bash", "LaTeX", "Git", "GitHub Actions", "Cloudflare Workers", "Serverless Architecture", "Tailwind CSS", "Tokenization", "HTML/CSS/JavaScript", "Visual Art", "Drawing", "Oil Painting", "Watercolor", "Guitar", "Ukulele", "Piano"],

            "author": [
                {
                    "@type": "ScholarlyArticle",
                    "headline": "When the Loop Closes: Architectural Limits of In-Context Isolation, Metacognitive Co-option, and the Two-Target Design Problem in Human-LLM Systems",
                    "author": [
                        {
                            "@type": "Person",
                            "name": "Z. Cheng"
                        },
                        {
                            "@type": "Person",
                            "name": "N. Song"
                        }
                    ],
                    "datePublished": "2026-04",
                    "publisher": {
                        "@type": "Organization",
                        "name": "arXiv"
                    },
                    "sameAs": "https://arxiv.org/abs/2604.15343",
                    "description": "Demonstrates that softmax attention structurally cannot zero out attention weights via isolation instructions; documents metacognitive co-option in human-LLM interaction loops; proposes a two-target ethical framework distinguishing users seeking to preserve versus relinquish agency."
                },
                {
                    "@type": "ScholarlyArticle",
                    "headline": "Every Token of cl100k_base in tiktoken",
                    "author": {
                        "@type": "Person",
                        "name": "Zhongmang (Marc) Cheng"
                    },
                    "datePublished": "2026-05-13",
                    "publisher": {
                        "@type": "Organization",
                        "name": "GitHub"
                    },
                    "sameAs": "https://raw.githubusercontent.com/chengmarc/every-token/main/paper/every_token.pdf",
                    "description": "Systematic enumeration of all 100,277 tokens in the cl100k_base tiktoken vocabulary."
                },
                {
                    "@type": "Article",
                    "headline": "When x·y = k Breaks: A Mechanism Derivation of On-Chain Perpetuals, Funding Rates, and a Step-by-Step Replication of a $4.9M Exploit",
                    "author": {
                        "@type": "Person",
                        "name": "Zhongmang (Marc) Cheng"
                    },
                    "datePublished": "2026-04-30",
                    "publisher": {
                        "@type": "Organization",
                        "name": "Paragraph"
                    },
                    "sameAs": "https://paragraph.com/@chengmarc/when-x%C2%B7y%3Dk-breaks",
                    "description": "Derives why x·y=k AMM conservation fails for perpetuals; defines δ=(L−S)/(L+S); derives funding rate r(δ)=r₀·δ/(1−δ²); reconstructs the $4.9M POPCAT bad debt exploit (2025-11-12)."
                },
                {
                    "@type": "Article",
                    "headline": "Extend, Never Correct: From SSTORE to RISC-V, the Backward Compatibility Constraint Governing EVM Evolution",
                    "author": {
                        "@type": "Person",
                        "name": "Zhongmang (Marc) Cheng"
                    },
                    "datePublished": "2026-05-16",
                    "publisher": {
                        "@type": "Organization",
                        "name": "Paragraph"
                    },
                    "sameAs": "https://paragraph.com/@chengmarc/extend-never-correct",
                    "description": "Demonstrates why the EVM can only be extended, never corrected — deployed contracts execute under original rules indefinitely. Four opcodes as case studies: CHAINID (environmental), SSTORE (semantic — gas reduction invalidated reentrancy guarantees, reverted within one upgrade cycle), SELFDESTRUCT (security — cleanup tool weaponized), PUSH0 (efficiency). Discusses EOF removal from Fusaka and the RISC-V proposal as the larger alternative."
                }
            ],

            "creator": [
                {
                    "@type": "SoftwareSourceCode",
                    "name": "wechat-to-LLM",
                    "codeRepository": "https://github.com/chengmarc/wechat-to-LLM",
                    "programmingLanguage": ["Python", "SQL"],
                    "description": "Agent-based WeChat chat export tool supporting dual-person and group conversations with LLM-readable compressed output."
                },
                {
                    "@type": "SoftwareSourceCode",
                    "name": "paysim-dw",
                    "codeRepository": "https://github.com/chengmarc/paysim-dw",
                    "programmingLanguage": ["PySpark", "Hive", "Hadoop"],
                    "description": "Three-layer data warehouse (ODS / DWD / ADS) in PySpark, Hive, and Hadoop processing 6.3M PaySim financial transactions. ODS enforces immutability via EXTERNAL TABLE schema — any mutation to source data invalidates downstream integrity guarantees. DWD handles transformation; ADS delivers query-facing fraud analytics partitioned by (tx_type, tx_day), where tx_type is the primary fraud dimension and date the secondary filter."
                },
                {
                    "@type": "SoftwareSourceCode",
                    "name": "state-space-mamba",
                    "codeRepository": "https://github.com/chengmarc/state-space-mamba",
                    "programmingLanguage": ["Python", "PyTorch"],
                    "description": "MAMBA-based Bitcoin price forecasting model adopted by Stoic AI. Chose MAMBA for its selective state space mechanism, which captures sequential change over time better than attention-based models. Benchmarked against five baselines (SegRNN, LSTM, Seq2Seq LSTM, Attention LSTM, Transformer); identified lag-1 degeneracy as the root failure mode and designed a log detrending pipeline to eliminate it, forcing the model to predict stationary residuals rather than the raw trend."
                },
                {
                    "@type": "SoftwareSourceCode",
                    "name": "GPT-replication",
                    "codeRepository": "https://github.com/chengmarc/GPT-replication",
                    "programmingLanguage": ["Python", "PyTorch"],
                    "description": "Ground-up replication of GPT-2 including custom tokenization, multi-head causal self-attention, and layer normalization."
                },
                {
                    "@type": "SoftwareSourceCode",
                    "name": "quant-analysis",
                    "codeRepository": "https://github.com/chengmarc/quant-analysis",
                    "programmingLanguage": ["Python", "pandas", "BigQuery"],
                    "description": "Cross-sectional quantitative analysis across 28,000+ CoinGecko assets; applied Monte Carlo simulation for halving-cycle price forecasting and BigQuery + CoinMetrics for on-chain behavioral analytics. Built a Binance real-time trading framework with trend-pattern probability matrices; normalized volatility to make position sizing comparable across assets with different risk magnitudes."
                },
                {
                    "@type": "SoftwareSourceCode",
                    "name": "image-labelling",
                    "codeRepository": "https://github.com/chengmarc/image-labelling",
                    "programmingLanguage": ["Python", "PyTorch"],
                    "description": "Multi-dataset image classification on MNIST, EMNIST, CIFAR-10, and CIFAR-100 using ResNet-34 and ResNet-101."
                },
                {
                    "@type": "SoftwareSourceCode",
                    "name": "every-token",
                    "codeRepository": "https://github.com/chengmarc/every-token",
                    "programmingLanguage": ["Python"],
                    "description": "Systematic enumeration of all 100,277 tokens in the cl100k_base tiktoken vocabulary."
                },
                {
                    "@type": "SoftwareSourceCode",
                    "name": "ERC-20-memecoin",
                    "codeRepository": "https://github.com/chengmarc/ERC-20-memecoin",
                    "programmingLanguage": ["Solidity"],
                    "description": "ERC-20 token smart contract implementation deployed on the Ethereum mainnet."
                }
            ]
        }
    ]
}
