# html_sm URL: /api/lexers/html_sm/ Section: lexers -------------------------------------------------------------------------------- html_sm - Rosettes window.BENGAL_THEME_DEFAULTS = { appearance: 'light', palette: 'brown-bengal' }; window.Bengal = window.Bengal || {}; window.Bengal.enhanceBaseUrl = '/rosettes/assets/js/enhancements'; window.Bengal.watchDom = true; window.Bengal.debug = false; window.Bengal.enhanceUrls = { 'toc': '/rosettes/assets/js/enhancements/toc.632a9783.js', 'docs-nav': '/rosettes/assets/js/enhancements/docs-nav.57e4b129.js', 'tabs': '/rosettes/assets/js/enhancements/tabs.aac9e817.js', 'lightbox': '/rosettes/assets/js/enhancements/lightbox.1ca22aa1.js', 'interactive': '/rosettes/assets/js/enhancements/interactive.fc077855.js', 'mobile-nav': '/rosettes/assets/js/enhancements/mobile-nav.d991657f.js', 'action-bar': '/rosettes/assets/js/enhancements/action-bar.d62417f4.js', 'copy-link': '/rosettes/assets/js/enhancements/copy-link.7d9a5c29.js', 'data-table': '/rosettes/assets/js/enhancements/data-table.1f5bc1eb.js', 'lazy-loaders': '/rosettes/assets/js/enhancements/lazy-loaders.a5c38245.js', 'holo': '/rosettes/assets/js/enhancements/holo.ee13c841.js', 'link-previews': '/rosettes/assets/js/enhancements/link-previews.8d906535.js' }; (function () { try { var defaults = window.BENGAL_THEME_DEFAULTS || { appearance: 'system', palette: '' }; var defaultAppearance = defaults.appearance; if (defaultAppearance === 'system') { defaultAppearance = (window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches) ? 'dark' : 'light'; } var storedTheme = localStorage.getItem('bengal-theme'); var storedPalette = localStorage.getItem('bengal-palette'); var theme = storedTheme ? (storedTheme === 'system' ? defaultAppearance : storedTheme) : defaultAppearance; var palette = storedPalette ?? defaults.palette; document.documentElement.setAttribute('data-theme', theme); if (palette) { document.documentElement.setAttribute('data-palette', palette); } } catch (e) { document.documentElement.setAttribute('data-theme', 'light'); } })(); { "prerender": [ { "where": { "and": [ { "href_matches": "/docs/*" }, { "not": { "selector_matches": "[data-external], [target=_blank], .external" } } ] }, "eagerness": "conservative" } ], "prefetch": [ { "where": { "and": [ { "href_matches": "/*" }, { "not": { "selector_matches": "[data-external], [target=_blank], .external" } } ] }, "eagerness": "conservative" } ] } Skip to main content Magnifying Glass ESC Recent Clear Magnifying Glass No results for "" Start typing to search... ↑↓ Navigate ↵ Open ESC Close Powered by Lunr ⌾⌾⌾ DocumentationInfoAboutArrow ClockwiseGet StartedCodeHighlightingPaletteStylingStarburstExtendingFormattersNoteTutorialsBookmarkReferenceReleasesDevGitHubAPI Reference Magnifying Glass Search ⌘K Palette Appearance Chevron Down Mode Monitor System Sun Light Moon Dark Palette Snow Lynx Brown Bengal Silver Bengal Charcoal Bengal Blue Bengal List ⌾⌾⌾ Magnifying Glass Search X Close Documentation Caret Down Info About Arrow Clockwise Get Started Code Highlighting Palette Styling Starburst Extending Formatters Note Tutorials Bookmark Reference Releases Dev Caret Down GitHub API Reference Palette Appearance Chevron Down Mode Monitor System Sun Light Moon Dark Palette Snow Lynx Brown Bengal Silver Bengal Charcoal Bengal Blue Bengal Rosettes API Reference Caret Right Formatters html null terminal Caret Right Lexers _scanners _state_machine bash_sm c_sm clojure_sm cpp_sm css_sm csv_sm cuda_sm cue_sm dart_sm diff_sm dockerfile_sm elixir_sm gleam_sm go_sm graphql_sm groovy_sm haskell_sm hcl_sm html_sm ini_sm java_sm javascript_sm jinja_sm json_sm julia_sm kida_sm kotlin_sm lua_sm makefile_sm markdown_sm mojo_sm nginx_sm nim_sm perl_sm php_sm pkl_sm plaintext_sm powershell_sm protobuf_sm python_sm r_sm ruby_sm rust_sm scala_sm sql_sm stan_sm swift_sm toml_sm tree_sm triton_sm typescript_sm v_sm xml_sm yaml_sm zig_sm Caret Right Themes _mapping _palette _roles palettes _config _escape _formatter_registry _parallel _protocol _registry _types delegate rosettes Rosettes API ReferenceLexers ᗢ Caret Down Link Copy URL External Open LLM text Copy Copy LLM text Share with AI Ask Claude Ask ChatGPT Ask Gemini Ask Copilot Module lexers.html_sm Hand-written HTML lexer using state machine approach. O(n) guaranteed, zero regex, thread-safe. Language Support: HTML5 syntax Comments (<!-- -->) DOCTYPE declarations Tags with attributes (quoted and unquoted values) Self-closing tags Script and style blocks (minimal handling) Token Classification: Tag names: NAME_TAG (div, span, p, etc.) Attribute names: NAME_ATTRIBUTE (class, id, href, etc.) Attribute values: STRING Comments: COMMENT_MULTILINE Text content: TEXT Performance: ~45µs per 100-line file. Thread-Safety: Uses only local variables in tokenize(). See Also: rosettes.lexers.xml_sm: XML lexer (stricter syntax) rosettes.lexers.css_sm: CSS lexer (for style content) 1Class Classes HtmlStateMachineLexer 1 ▼ HTML lexer with tag, attribute, and comment parsing. Handles HTML5 syntax including comments, doct… HTML lexer with tag, attribute, and comment parsing. Handles HTML5 syntax including comments, doctype, and tag attributes. Methods tokenize 2 Iterator[Token] ▼ def tokenize(self, code: str, config: LexerConfig | None = None) -> Iterator[Token] Parameters Name Type Description code — config — Default: None Returns Iterator[Token] ← Previous hcl_sm Next → ini_sm List © 2026 Rosettes built in ᓚᘏᗢ { "linkPreviews": { "enabled": true, "hoverDelay": 200, "hideDelay": 150, "showSection": true, "showReadingTime": true, "showWordCount": true, "showDate": true, "showTags": true, "maxTags": 3, "includeSelectors": [".prose"], "excludeSelectors": ["nav", ".toc", ".breadcrumb", ".pagination", ".card", "[class*='-card']", ".tab-nav", "[class*='-widget']", ".child-items", ".content-tiles"], "allowedHosts": [], "allowedSchemes": ["https"], "hostFailureThreshold": 3 } } window.BENGAL_LAZY_ASSETS = { tabulator: '/rosettes/assets/js/tabulator.min.js', dataTable: '/rosettes/assets/js/data-table.js', mermaidToolbar: '/rosettes/assets/js/mermaid-toolbar.9de5abba.js', mermaidTheme: '/rosettes/assets/js/mermaid-theme.344822c5.js', graphMinimap: '/rosettes/assets/js/graph-minimap.ff04e939.js', graphContextual: '/rosettes/assets/js/graph-contextual.355458ba.js' }; window.BENGAL_ICONS = { close: '/rosettes/assets/icons/close.911d4fe1.svg', enlarge: '/rosettes/assets/icons/enlarge.652035e5.svg', copy: '/rosettes/assets/icons/copy.3d56e945.svg', 'download-svg': '/rosettes/assets/icons/download.04f07e1b.svg', 'download-png': '/rosettes/assets/icons/image.c34dfd40.svg', 'zoom-in': '/rosettes/assets/icons/zoom-in.237b4a83.svg', 'zoom-out': '/rosettes/assets/icons/zoom-out.38857c77.svg', reset: '/rosettes/assets/icons/reset.d26dba29.svg' }; Arrow Up -------------------------------------------------------------------------------- Metadata: - Word Count: 660 - Reading Time: 3 minutes