# python_sm URL: /api/lexers/python_sm/ Section: lexers -------------------------------------------------------------------------------- python_sm - Rosettes window.BENGAL_THEME_DEFAULTS = { appearance: 'light', palette: 'brown-bengal' }; window.Bengal = window.Bengal || {}; window.Bengal.enhanceBaseUrl = '/rosettes/assets/js/enhancements'; window.Bengal.watchDom = true; window.Bengal.debug = false; window.Bengal.enhanceUrls = { 'toc': '/rosettes/assets/js/enhancements/toc.632a9783.js', 'docs-nav': '/rosettes/assets/js/enhancements/docs-nav.57e4b129.js', 'tabs': '/rosettes/assets/js/enhancements/tabs.aac9e817.js', 'lightbox': '/rosettes/assets/js/enhancements/lightbox.1ca22aa1.js', 'interactive': '/rosettes/assets/js/enhancements/interactive.fc077855.js', 'mobile-nav': '/rosettes/assets/js/enhancements/mobile-nav.d991657f.js', 'action-bar': '/rosettes/assets/js/enhancements/action-bar.d62417f4.js', 'copy-link': '/rosettes/assets/js/enhancements/copy-link.7d9a5c29.js', 'data-table': '/rosettes/assets/js/enhancements/data-table.1f5bc1eb.js', 'lazy-loaders': '/rosettes/assets/js/enhancements/lazy-loaders.a5c38245.js', 'holo': '/rosettes/assets/js/enhancements/holo.ee13c841.js', 'link-previews': '/rosettes/assets/js/enhancements/link-previews.8d906535.js' }; (function () { try { var defaults = window.BENGAL_THEME_DEFAULTS || { appearance: 'system', palette: '' }; var defaultAppearance = defaults.appearance; if (defaultAppearance === 'system') { defaultAppearance = (window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches) ? 'dark' : 'light'; } var storedTheme = localStorage.getItem('bengal-theme'); var storedPalette = localStorage.getItem('bengal-palette'); var theme = storedTheme ? (storedTheme === 'system' ? defaultAppearance : storedTheme) : defaultAppearance; var palette = storedPalette ?? defaults.palette; document.documentElement.setAttribute('data-theme', theme); if (palette) { document.documentElement.setAttribute('data-palette', palette); } } catch (e) { document.documentElement.setAttribute('data-theme', 'light'); } })(); { "prerender": [ { "where": { "and": [ { "href_matches": "/docs/*" }, { "not": { "selector_matches": "[data-external], [target=_blank], .external" } } ] }, "eagerness": "conservative" } ], "prefetch": [ { "where": { "and": [ { "href_matches": "/*" }, { "not": { "selector_matches": "[data-external], [target=_blank], .external" } } ] }, "eagerness": "conservative" } ] } Skip to main content Magnifying Glass ESC Recent Clear Magnifying Glass No results for "" Start typing to search... ↑↓ Navigate ↵ Open ESC Close Powered by Lunr ⌾⌾⌾ DocumentationInfoAboutArrow ClockwiseGet StartedCodeHighlightingPaletteStylingStarburstExtendingFormattersNoteTutorialsBookmarkReferenceReleasesDevGitHubAPI Reference Magnifying Glass Search ⌘K Palette Appearance Chevron Down Mode Monitor System Sun Light Moon Dark Palette Snow Lynx Brown Bengal Silver Bengal Charcoal Bengal Blue Bengal List ⌾⌾⌾ Magnifying Glass Search X Close Documentation Caret Down Info About Arrow Clockwise Get Started Code Highlighting Palette Styling Starburst Extending Formatters Note Tutorials Bookmark Reference Releases Dev Caret Down GitHub API Reference Palette Appearance Chevron Down Mode Monitor System Sun Light Moon Dark Palette Snow Lynx Brown Bengal Silver Bengal Charcoal Bengal Blue Bengal Rosettes API Reference Caret Right Formatters html null terminal Caret Right Lexers _scanners _state_machine bash_sm c_sm clojure_sm cpp_sm css_sm csv_sm cuda_sm cue_sm dart_sm diff_sm dockerfile_sm elixir_sm gleam_sm go_sm graphql_sm groovy_sm haskell_sm hcl_sm html_sm ini_sm java_sm javascript_sm jinja_sm json_sm julia_sm kida_sm kotlin_sm lua_sm makefile_sm markdown_sm mojo_sm nginx_sm nim_sm perl_sm php_sm pkl_sm plaintext_sm powershell_sm protobuf_sm python_sm r_sm ruby_sm rust_sm scala_sm sql_sm stan_sm swift_sm toml_sm tree_sm triton_sm typescript_sm v_sm xml_sm yaml_sm zig_sm Caret Right Themes _mapping _palette _roles palettes _config _escape _formatter_registry _parallel _protocol _registry _types delegate rosettes Rosettes API ReferenceLexers ᗢ Caret Down Link Copy URL External Open LLM text Copy Copy LLM text Share with AI Ask Claude Ask ChatGPT Ask Gemini Ask Copilot Module lexers.python_sm Hand-written Python lexer using state machine approach. O(n) guaranteed, zero regex, thread-safe. Design Philosophy: This is the reference implementation for Rosettes lexers. It demonstrates: State Machine Architecture: Character-by-character processing with explicit state (position, line, column) as local variables. Frozen Lookup Tables: Keywords, builtins, and operators as frozensets for O(1) membership testing and thread-safety. Fast Path / Slow Path: Simple cases (identifiers, operators) handled inline; complex cases (strings, numbers) delegated to helper methods. Architecture: Main Loop (tokenize): pos = 0 while pos < length: char = code[pos] # Dispatch based on first character if char is whitespace: ... elif char is comment: ... elif char is string: ... # etc. Helper Methods: _scan_string_literal(): Handles prefixed and triple-quoted strings _scan_number(): Handles int, float, hex, octal, binary, complex _classify_word(): Maps identifiers to KEYWORD, BUILTIN, NAME Python Language Support: All Python 3.x syntax including 3.14 F-strings (prefix detection) Type hints (annotations) Walrus operator (:=) Match/case statements (3.10+) Type parameter syntax (3.12+) Unicode identifiers (PEP 3131) Performance: ~50µs per 100-line file O(n) guaranteed (single pass, no backtracking) ~500 tokens/ms throughput Thread-Safety: All state is local to tokenize(). Class attributes are frozen: _KEYWORDS: frozenset _BUILTINS: frozenset _TWO_CHAR_OPS: frozenset etc. Adding New Lexers: Use this file as a template. Key patterns to follow: Frozen lookup tables as module constants Local variables for all state (pos, line, col) Character-by-character dispatch in main loop Helper methods for complex constructs See Also: rosettes.lexers._state_machine: Base class and helper functions rosettes._registry: How lexers are registered 1Class Classes PythonStateMachineLexer 13 ▼ Hand-written Python 3 lexer. O(n) guaranteed, zero regex, thread-safe. Handles all Python 3.x synt… Hand-written Python 3 lexer. O(n) guaranteed, zero regex, thread-safe. Handles all Python 3.x syntax including f-strings, type hints, walrus operator. This is the reference implementation for Rosettes lexers. Use it as a template when adding new language support. Performance: ~50µs per 100-line file, ~500 tokens/ms throughput. Attributes Name Type Description name — Canonical language name ("python") aliases — Alternative names for registry lookup ("py", "python3", "py3") filenames — Glob patterns for file detection (".py", ".pyw", "*.pyi") mimetypes — MIME types ("text/x-python", "application/x-python") Thread-Safety: All class attributes are frozen (frozenset). The tokenize() method uses only local variables for state (pos, line, col). Methods tokenize 2 Iterator[Token] ▼ Tokenize Python source code. Single-pass, character-by-character. O(n) guarant… def tokenize(self, code: str, config: LexerConfig | None = None) -> Iterator[Token] Tokenize Python source code. Single-pass, character-by-character. O(n) guaranteed. Parameters Name Type Description code — config — Default: None Returns Iterator[Token] Internal Methods 8 ▼ _scan_string_literal 2 tuple[TokenType, int, in… ▼ Scan a string literal with optional prefix. Returns (token_type, end_position,… def _scan_string_literal(self, code: str, pos: int) -> tuple[TokenType, int, int] Scan a string literal with optional prefix. Returns (token_type, end_position, newline_count). Parameters Name Type Description code — pos — Returns tuple[TokenType, int, int] _scan_number 2 tuple[TokenType, int] ▼ Scan a numeric literal. Returns (token_type, end_position). def _scan_number(self, code: str, pos: int) -> tuple[TokenType, int] Parameters Name Type Description code — pos — Returns tuple[TokenType, int] _scan_digits_with_underscore 2 int ▼ Scan digits with optional underscores. def _scan_digits_with_underscore(self, code: str, pos: int) -> int Parameters Name Type Description code — pos — Returns int _scan_hex_digits 2 int ▼ Scan hex digits with optional underscores. def _scan_hex_digits(self, code: str, pos: int) -> int Parameters Name Type Description code — pos — Returns int _scan_octal_digits 2 int ▼ Scan octal digits with optional underscores. def _scan_octal_digits(self, code: str, pos: int) -> int Parameters Name Type Description code — pos — Returns int _scan_binary_digits 2 int ▼ Scan binary digits with optional underscores. def _scan_binary_digits(self, code: str, pos: int) -> int Parameters Name Type Description code — pos — Returns int _scan_exponent 2 int ▼ Scan optional exponent part of number. def _scan_exponent(self, code: str, pos: int) -> int Parameters Name Type Description code — pos — Returns int _classify_word 1 TokenType ▼ Classify an identifier into the appropriate token type. def _classify_word(self, word: str) -> TokenType Parameters Name Type Description word — Returns TokenType ← Previous protobuf_sm Next → r_sm List © 2026 Rosettes built in ᓚᘏᗢ { "linkPreviews": { "enabled": true, "hoverDelay": 200, "hideDelay": 150, "showSection": true, "showReadingTime": true, "showWordCount": true, "showDate": true, "showTags": true, "maxTags": 3, "includeSelectors": [".prose"], "excludeSelectors": ["nav", ".toc", ".breadcrumb", ".pagination", ".card", "[class*='-card']", ".tab-nav", "[class*='-widget']", ".child-items", ".content-tiles"], "allowedHosts": [], "allowedSchemes": ["https"], "hostFailureThreshold": 3 } } window.BENGAL_LAZY_ASSETS = { tabulator: '/rosettes/assets/js/tabulator.min.js', dataTable: '/rosettes/assets/js/data-table.js', mermaidToolbar: '/rosettes/assets/js/mermaid-toolbar.9de5abba.js', mermaidTheme: '/rosettes/assets/js/mermaid-theme.344822c5.js', graphMinimap: '/rosettes/assets/js/graph-minimap.ff04e939.js', graphContextual: '/rosettes/assets/js/graph-contextual.355458ba.js' }; window.BENGAL_ICONS = { close: '/rosettes/assets/icons/close.911d4fe1.svg', enlarge: '/rosettes/assets/icons/enlarge.652035e5.svg', copy: '/rosettes/assets/icons/copy.3d56e945.svg', 'download-svg': '/rosettes/assets/icons/download.04f07e1b.svg', 'download-png': '/rosettes/assets/icons/image.c34dfd40.svg', 'zoom-in': '/rosettes/assets/icons/zoom-in.237b4a83.svg', 'zoom-out': '/rosettes/assets/icons/zoom-out.38857c77.svg', reset: '/rosettes/assets/icons/reset.d26dba29.svg' }; Arrow Up -------------------------------------------------------------------------------- Metadata: - Word Count: 1168 - Reading Time: 6 minutes