# core URL: /api/lexer/core/ Section: lexer -------------------------------------------------------------------------------- core - Patitas window.BENGAL_THEME_DEFAULTS = { appearance: 'light', palette: 'brown-bengal' }; window.Bengal = window.Bengal || {}; window.Bengal.enhanceBaseUrl = '/patitas/assets/js/enhancements'; window.Bengal.watchDom = true; window.Bengal.debug = false; window.Bengal.enhanceUrls = { 'toc': '/patitas/assets/js/enhancements/toc.632a9783.js', 'docs-nav': '/patitas/assets/js/enhancements/docs-nav.57e4b129.js', 'tabs': '/patitas/assets/js/enhancements/tabs.aac9e817.js', 'lightbox': '/patitas/assets/js/enhancements/lightbox.1ca22aa1.js', 'interactive': '/patitas/assets/js/enhancements/interactive.fc077855.js', 'mobile-nav': '/patitas/assets/js/enhancements/mobile-nav.d991657f.js', 'action-bar': '/patitas/assets/js/enhancements/action-bar.d62417f4.js', 'copy-link': '/patitas/assets/js/enhancements/copy-link.7d9a5c29.js', 'data-table': '/patitas/assets/js/enhancements/data-table.1f5bc1eb.js', 'lazy-loaders': '/patitas/assets/js/enhancements/lazy-loaders.a5c38245.js', 'holo': '/patitas/assets/js/enhancements/holo.ee13c841.js', 'link-previews': '/patitas/assets/js/enhancements/link-previews.8d906535.js' }; (function () { try { var defaults = window.BENGAL_THEME_DEFAULTS || { appearance: 'system', palette: '' }; var defaultAppearance = defaults.appearance; if (defaultAppearance === 'system') { defaultAppearance = (window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches) ? 'dark' : 'light'; } var storedTheme = localStorage.getItem('bengal-theme'); var storedPalette = localStorage.getItem('bengal-palette'); var theme = storedTheme ? (storedTheme === 'system' ? defaultAppearance : storedTheme) : defaultAppearance; var palette = storedPalette ?? defaults.palette; document.documentElement.setAttribute('data-theme', theme); if (palette) { document.documentElement.setAttribute('data-palette', palette); } } catch (e) { document.documentElement.setAttribute('data-theme', 'light'); } })(); { "prerender": [ { "where": { "and": [ { "href_matches": "/docs/*" }, { "not": { "selector_matches": "[data-external], [target=_blank], .external" } } ] }, "eagerness": "conservative" } ], "prefetch": [ { "where": { "and": [ { "href_matches": "/*" }, { "not": { "selector_matches": "[data-external], [target=_blank], .external" } } ] }, "eagerness": "conservative" } ] } Skip to main content Magnifying Glass ESC Recent Clear Magnifying Glass No results for "" Start typing to search... ↑↓ Navigate ↵ Open ESC Close Powered by Lunr ฅᨐฅ DocumentationArrow ClockwiseGet StartedCodeSyntaxDirectivesStarburstExtendingBookmarkReferenceInfoAboutTroubleshootingReleasesDevGitHubPatitas API Reference Magnifying Glass Search ⌘K Palette Appearance Chevron Down Mode Monitor System Sun Light Moon Dark Palette Snow Lynx Brown Bengal Silver Bengal Charcoal Bengal Blue Bengal List ฅᨐฅ Magnifying Glass Search X Close Documentation Caret Down Arrow Clockwise Get Started Code Syntax Directives Starburst Extending Bookmark Reference Info About Troubleshooting Releases Dev Caret Down GitHub Patitas API Reference Palette Appearance Chevron Down Mode Monitor System Sun Light Moon Dark Palette Snow Lynx Brown Bengal Silver Bengal Charcoal Bengal Blue Bengal Patitas API Reference Caret Right Directives Caret Right Builtins admonition container dropdown tabs contracts decorator options protocol registry Caret Right Lexer Caret Right Classifiers directive fence footnote heading html link_ref list quote thematic Caret Right Scanners block directive fence html core modes Caret Right Parsing Caret Right Blocks Caret Right List blank_line indent item_blocks marker mixin nested trace types core directive footnote table Caret Right Inline core emphasis links match_registry special tokens charsets containers token_nav Caret Right Plugins autolinks footnotes math strikethrough table task_lists Caret Right Renderers html Caret Right Roles Caret Right Builtins formatting icons math reference protocol registry Caret Right Utils hashing logger text errors highlighting icons location nodes parser patitas protocols stringbuilder tokens Patitas API ReferenceLexer ᗢ Caret Down Link Copy URL External Open LLM text Copy Copy LLM text Share with AI Ask Claude Ask ChatGPT Ask Gemini Ask Copilot Module lexer.core State-machine lexer with O(n) guaranteed performance. Implements a window-based approach: scan entire lines, classify, then commit. This eliminates position rewinds and guarantees forward progress. No regex in the hot path. Zero ReDoS vulnerability by construction. Thread Safety: Lexer instances are single-use. Create one per source string. All state is instance-local; no shared mutable state. 1Class Classes Lexer 12 ▼ State-machine lexer with O(n) guaranteed performance. Uses a window-based approach for block s… State-machine lexer with O(n) guaranteed performance. Uses a window-based approach for block scanning: 1. Scan to end of line (find window) 2. Classify the line (pure logic, no position changes) 3. Commit position (always advances) This eliminates rewinds and guarantees forward progress. Usage: >>> lexer = Lexer("# Hello World") >>> for token in lexer.tokenize(): ... print(token) Token(ATX_HEADING, '# Hello', 1:1) Token(BLANK_LINE, '', 2:1) Token(PARAGRAPH_LINE, 'World', 3:1) Token(EOF, '', 3:6) Thread Safety: Lexer instances are single-use. Create one per source string. All state is instance-local; no shared mutable state. Methods tokenize 0 Iterator[Token] ▼ Tokenize source into token stream. def tokenize(self) -> Iterator[Token] Returns Iterator[Token] Internal Methods 11 ▼ __init__ 3 ▼ Initialize lexer with source text. def __init__(self, source: str, source_file: str | None = None, text_transformer: Callable[[str], str] | None = None) -> None Parameters Name Type Description source — Markdown source text source_file — Optional source file path for error messages Default: None text_transformer — Optional callback to transform plain text lines Default: None _dispatch_mode 0 Iterator[Token] ▼ Dispatch to appropriate scanner based on current mode. def _dispatch_mode(self) -> Iterator[Token] Returns Iterator[Token] _find_line_end 0 int ▼ Find the end of the current line (position of \n or EOF). Uses str.find for O(… def _find_line_end(self) -> int Find the end of the current line (position of \n or EOF). Uses str.find for O(n) with low constant factor (C implementation). Returns int Position of newline or end of source. _calc_indent 1 tuple[int, int] ▼ Calculate indent level and content start position. Spaces count as 1, tabs exp… def _calc_indent(self, line: str) -> tuple[int, int] Calculate indent level and content start position. Spaces count as 1, tabs expand to next multiple of 4. Parameters Name Type Description line — Line content Returns tuple[int, int] (indent_spaces, content_start_index) _expand_tabs 2 str ▼ Expand tabs in text to spaces based on start_col (1-indexed). def _expand_tabs(self, text: str, start_col: int = 1) -> str Parameters Name Type Description text — start_col — Default: 1 Returns str _commit_to 1 ▼ Commit position to line_end, consuming newline if present. Sets self._consumed… def _commit_to(self, line_end: int) -> None Commit position to line_end, consuming newline if present. Sets self._consumed_newline to indicate if a newline was consumed. Uses optimized string operations instead of character-by-character loop. Parameters Name Type Description line_end — Position to commit to. _peek 0 str ▼ Peek at current character without advancing. def _peek(self) -> str Returns str Current character or empty string at end of input. _advance 0 str ▼ Advance position by one character. Updates line/column tracking. def _advance(self) -> str Returns str The consumed character. _save_location 0 ▼ Save current location for O(1) token location creation. Call this at the START… def _save_location(self) -> None Save current location for O(1) token location creation. Call this at the START of scanning a line, before any position changes. _location 0 SourceLocation ▼ Get current source location. def _location(self) -> SourceLocation Returns SourceLocation SourceLocation at current position. _location_from 3 SourceLocation ▼ Get source location from saved position. O(1) - uses pre-saved location from _… def _location_from(self, start_pos: int, start_col: int | None = None, end_pos: int | None = None) -> SourceLocation Get source location from saved position. O(1) - uses pre-saved location from _save_location() call. Parameters Name Type Description start_pos — Start position in source. start_col — Optional column override (1-indexed). Default: None end_pos — Optional end position override. Default: None Returns SourceLocation SourceLocation spanning from start_pos to current or end_pos. Next → lexer List © 2026 Patitas built in ᓚᘏᗢ { "linkPreviews": { "enabled": true, "hoverDelay": 200, "hideDelay": 150, "showSection": true, "showReadingTime": true, "showWordCount": true, "showDate": true, "showTags": true, "maxTags": 3, "includeSelectors": [".prose"], "excludeSelectors": ["nav", ".toc", ".breadcrumb", ".pagination", ".card", "[class*='-card']", ".tab-nav", "[class*='-widget']", ".child-items", ".content-tiles"], "allowedHosts": [], "allowedSchemes": ["https"], "hostFailureThreshold": 3 } } window.BENGAL_LAZY_ASSETS = { tabulator: '/patitas/assets/js/tabulator.min.js', dataTable: '/patitas/assets/js/data-table.js', mermaidToolbar: '/patitas/assets/js/mermaid-toolbar.9de5abba.js', mermaidTheme: '/patitas/assets/js/mermaid-theme.344822c5.js', graphMinimap: '/patitas/assets/js/graph-minimap.ff04e939.js', graphContextual: '/patitas/assets/js/graph-contextual.355458ba.js' }; window.BENGAL_ICONS = { close: '/patitas/assets/icons/close.911d4fe1.svg', enlarge: '/patitas/assets/icons/enlarge.652035e5.svg', copy: '/patitas/assets/icons/copy.3d56e945.svg', 'download-svg': '/patitas/assets/icons/download.04f07e1b.svg', 'download-png': '/patitas/assets/icons/image.c34dfd40.svg', 'zoom-in': '/patitas/assets/icons/zoom-in.237b4a83.svg', 'zoom-out': '/patitas/assets/icons/zoom-out.38857c77.svg', reset: '/patitas/assets/icons/reset.d26dba29.svg' }; Arrow Up -------------------------------------------------------------------------------- Metadata: - Word Count: 1184 - Reading Time: 6 minutes