diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000000..979a9c17ae
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,24 @@
+# Auto detect text files and perform LF normalization
+* text=auto
+
+# Explicitly declare text files
+*.md text diff=markdown
+*.txt text
+*.csv text
+*.yml text
+*.yaml text
+*.json text
+*.xml text
+*.html text diff=html
+*.css text diff=css
+
+# Denote binary files
+*.png binary
+*.jpg binary
+*.jpeg binary
+*.gif binary
+*.ico binary
+*.pdf binary
+*.zip binary
+*.gz binary
+*.tar binary
diff --git a/lib/docs/filters/deno/clean_html.rb b/lib/docs/filters/deno/clean_html.rb
index c111a125bf..1536e473b5 100644
--- a/lib/docs/filters/deno/clean_html.rb
+++ b/lib/docs/filters/deno/clean_html.rb
@@ -2,31 +2,19 @@ module Docs
class Deno
class CleanHtmlFilter < Filter
def call
- if result[:path].start_with?('api/deno/')
- @doc = at_css('main[id!="content"] article', 'main[id!="content"]')
- else
- @doc = at_css('main article .markdown-body')
- end
+ @doc = at_css('main, article, [role="main"], .markdown-body') || doc
- if at_css('.text-2xl')
- doc.prepend_child at_css('.text-2xl').remove
- at_css('.text-2xl').name = 'h1'
- end
+ css('nav, footer, .sidebar, .breadcrumb, .toc,
+ .page-nav, .edit-link, .header-anchor, script, style').remove
- css('code').each do |node|
+ css('pre > code').each do |node|
if node['class']
lang = node['class'][/language-(\w+)/, 1]
+ node.parent['data-language'] = lang if lang
end
- node['data-language'] = lang || 'ts'
- node.remove_attribute('class')
- if node.parent.name == 'div'
- node.content = node.content.strip
- end
+ node.parent['data-language'] ||= 'typescript'
end
- css('a.header-anchor').remove()
- css('.breadcrumbs').remove()
-
doc
end
end
diff --git a/lib/docs/filters/deno/entries.rb b/lib/docs/filters/deno/entries.rb
index 512dd4d16b..29137c5fbb 100644
--- a/lib/docs/filters/deno/entries.rb
+++ b/lib/docs/filters/deno/entries.rb
@@ -1,25 +1,19 @@
module Docs
class Deno
class EntriesFilter < Docs::EntriesFilter
+ TYPES_BY_PATH = {
+ 'api' => 'API',
+ 'runtime' => 'Runtime',
+ }
def get_name
- if result[:path].start_with?('api/deno/')
- at_css('main[id!="content"]')['id'][/\Asymbol_([.\w]+)/, 1]
- else
- at_css('main article h1').content
- end
+ name = at_css('h1')
+ name ? name.content.strip : slug.split('/').last
end
def get_type
- if result[:path].start_with?('api/deno/')
- 'API'
- elsif result[:path].start_with?('runtime/reference/cli')
- 'CLI'
- else
- at_css('main article nav ul :first span').content
- end
+ TYPES_BY_PATH[slug.split('/').first] || 'Guide'
end
-
end
end
end
diff --git a/lib/docs/scrapers/deno.rb b/lib/docs/scrapers/deno.rb
index 4dfb34564f..055fb87ab9 100644
--- a/lib/docs/scrapers/deno.rb
+++ b/lib/docs/scrapers/deno.rb
@@ -1,42 +1,199 @@
module Docs
class Deno < UrlScraper
self.name = 'Deno'
- self.type = 'simple'
+ self.type = 'deno'
+ self.base_url = 'https://docs.deno.com/'
+ self.root_path = 'api/'
+ self.initial_paths = %w(
+ api/
+ api/deno/
+ runtime/
+ runtime/fundamentals/
+ runtime/reference/
+ )
self.links = {
home: 'https://deno.com/',
code: 'https://github.com/denoland/deno'
}
- # https://github.com/denoland/manual/blob/main/LICENSE
- # https://github.com/denoland/deno/blob/main/LICENSE.md
+ html_filters.push 'deno/clean_html', 'deno/entries'
+
+ options[:root_title] = 'Deno'
+ options[:title] = false
+ options[:follow_links] = true
+ options[:only_patterns] = [
+ /\Aapi\//,
+ /\Aruntime\//,
+ ]
+ options[:skip_patterns] = [
+ /\Ablog\//,
+ /\Adeploy\//,
+ /\Asubhosting\//,
+ ]
+
options[:attribution] = <<-HTML
- © 2018–2025 the Deno authors
+ © 2018–2025 the Deno authors
Licensed under the MIT License.
HTML
-
- html_filters.push 'deno/entries', 'deno/clean_html'
+ # ── Versions ──────────────────────────────────────────────────────
version '2' do
- self.release = '2.4.4'
- self.base_url = 'https://docs.deno.com/'
- self.root_path = 'runtime'
- options[:only_patterns] = [/\Aruntime/, /\Aapi\/deno\/~/, /\Adeploy/, /\Asubhosting/]
- options[:skip_patterns] = [
- /\Aruntime\/manual/,
- /\Aapi\/deno\/.+\.prototype\z/, # all prototype pages get redirected to the main page
- /\Aapi\/deno\/~\/Deno\.jupyter\.MediaBundle.+/, # docs unavailable
- /\Aapi\/deno\/~\/Deno\.OpMetrics/, # deprecated in deno 2
- ]
- options[:trailing_slash] = false
+ self.release = '2.3.1'
end
version '1' do
- self.release = '1.27.0'
+ self.release = '1.46.3'
+ self.base_url = 'https://docs.deno.com/api/'
end
+ # ── Latest version lookup ─────────────────────────────────────────
+
def get_latest_version(opts)
get_latest_github_release('denoland', 'deno', opts)
end
+
+ private
+
+ # ── Module categorisation ─────────────────────────────────────────
+
+ MODULE_CATEGORIES = {
+ 'Deno' => %w[Deno],
+ 'Web APIs' => %w[fetch Request Response Headers URL URLSearchParams
+ FormData Blob File ReadableStream WritableStream
+ TransformStream TextEncoder TextDecoder
+ WebSocket EventSource AbortController AbortSignal
+ crypto CryptoKey SubtleCrypto],
+ 'I/O' => %w[open read write close seek],
+ 'File System' => %w[readFile writeFile readDir mkdir remove rename
+ stat lstat realPath readLink symlink link
+ truncate copyFile chmod chown],
+ 'Network' => %w[listen connect serve serveHttp
+ listenTls connectTls],
+ 'Subprocess' => %w[run Command ChildProcess],
+ 'Testing' => %w[test bench],
+ 'Permissions' => %w[permissions],
+ }.freeze
+
+ def categorize_module(name)
+ MODULE_CATEGORIES.each do |category, modules|
+ return category if modules.any? { |m| name.include?(m) }
+ end
+ 'Other'
+ end
+
+ # ── Page parsing ──────────────────────────────────────────────────
+
+ def parse_page(response)
+ doc = Nokogiri::HTML.parse(response.body)
+ return nil if doc.at_css('meta[http-equiv="refresh"]')
+
+ content = doc.at_css('main, article, [role="main"], .markdown-body')
+ return nil unless content
+
+ # Remove navigation, sidebars, and footers
+ content.css('nav, footer, .sidebar, .breadcrumb, .toc,
+ .page-nav, .edit-link, .header-anchor').each(&:remove)
+
+ # Remove script and style tags
+ content.css('script, style').each(&:remove)
+
+ content
+ end
+
+ # ── Link resolution ───────────────────────────────────────────────
+
+ def resolve_links(content, base_url)
+ content.css('a[href]').each do |link|
+ href = link['href']
+ next if href.nil? || href.empty?
+ next if href.start_with?('#')
+ next if href.match?(%r{\Ahttps?://}) && !href.start_with?(self.class.base_url)
+
+ begin
+ absolute = URI.join(base_url, href).to_s
+ link['href'] = absolute
+ rescue URI::InvalidURIError
+ # Leave malformed URIs as-is
+ end
+ end
+ content
+ end
+
+ # ── Code example extraction ───────────────────────────────────────
+
+ def extract_code_examples(content)
+ examples = []
+ content.css('pre > code, pre.highlight, .code-block').each_with_index do |block, idx|
+ lang = detect_language(block)
+ source = block.text.strip
+ next if source.empty?
+
+ examples << {
+ index: idx,
+ language: lang,
+ source: source,
+ lines: source.lines.count,
+ }
+ end
+ examples
+ end
+
+ def detect_language(code_node)
+ # Check class attribute for language hints
+ classes = (code_node['class'] || '').split
+ lang_class = classes.find { |c| c.start_with?('language-', 'lang-', 'highlight-') }
+ if lang_class
+ return lang_class.sub(/\A(?:language|lang|highlight)-/, '')
+ end
+
+ # Check data attributes
+ data_lang = code_node['data-language'] || code_node['data-lang']
+ return data_lang if data_lang
+
+ # Check parent element
+ parent = code_node.parent
+ if parent
+ parent_lang = parent['data-language'] || parent['data-lang']
+ return parent_lang if parent_lang
+
+ parent_classes = (parent['class'] || '').split
+ parent_lang_class = parent_classes.find { |c| c.start_with?('language-', 'lang-') }
+ if parent_lang_class
+ return parent_lang_class.sub(/\A(?:language|lang)-/, '')
+ end
+ end
+
+ 'text'
+ end
+
+ # ── Version handling ──────────────────────────────────────────────
+
+ def version_url(version, path)
+ if version && !version.empty?
+ "#{self.class.base_url}#{path}@#{version}"
+ else
+ "#{self.class.base_url}#{path}"
+ end
+ end
+
+ def parse_version_from_url(url)
+ match = url.match(/@([\d.]+)/)
+ match ? match[1] : nil
+ end
+
+ def normalize_version(version_string)
+ return nil if version_string.nil? || version_string.empty?
+
+ # Strip leading 'v' if present
+ cleaned = version_string.sub(/\Av/, '')
+
+ # Validate semver-like format
+ parts = cleaned.split('.')
+ return nil unless parts.length.between?(1, 3)
+ return nil unless parts.all? { |p| p.match?(/\A\d+\z/) }
+
+ cleaned
+ end
end
end
diff --git a/lib/tasks/sprites.thor b/lib/tasks/sprites.thor
index 54df982f07..d12d314585 100644
--- a/lib/tasks/sprites.thor
+++ b/lib/tasks/sprites.thor
@@ -222,7 +222,7 @@ class SpritesCLI < Thor
scss_erb_files.each do |erb_path|
scss_path = erb_path.gsub('.erb', '')
File.open(scss_path, 'w') do |f|
- f.write(ERB.new(File.open(erb_path).read).result)
+ f.write(ERB.new(File.read(erb_path)).result)
logger.info("Compiling #{erb_path} to #{scss_path}")
end
end
diff --git a/test/lib/docs/scrapers/deno_test.rb b/test/lib/docs/scrapers/deno_test.rb
new file mode 100644
index 0000000000..c497f1813e
--- /dev/null
+++ b/test/lib/docs/scrapers/deno_test.rb
@@ -0,0 +1,84 @@
+require_relative '../../test_helper'
+
+class DenoScraperTest < Minitest::Test
+ def setup
+ @scraper_class = Docs::Deno
+ end
+
+ def test_scraper_name
+ assert_equal 'Deno', @scraper_class.name
+ end
+
+ def test_scraper_type
+ assert_equal 'deno', @scraper_class.type
+ end
+
+ def test_base_url
+ assert_equal 'https://docs.deno.com/', @scraper_class.base_url
+ end
+
+ def test_root_path
+ assert_equal 'api/', @scraper_class.root_path
+ end
+
+ def test_initial_paths_present
+ assert_kind_of Array, @scraper_class.initial_paths
+ refute_empty @scraper_class.initial_paths
+ assert_includes @scraper_class.initial_paths, 'api/'
+ assert_includes @scraper_class.initial_paths, 'runtime/'
+ end
+
+ def test_links_defined
+ links = @scraper_class.links
+ assert_kind_of Hash, links
+ assert links.key?(:home)
+ assert links.key?(:code)
+ assert_match %r{\Ahttps://}, links[:home]
+ assert_match %r{github\.com}, links[:code]
+ end
+
+ def test_only_patterns_defined
+ patterns = @scraper_class.options[:only_patterns]
+ assert_kind_of Array, patterns
+ refute_empty patterns
+ assert patterns.any? { |p| p.is_a?(Regexp) }
+ end
+
+ def test_skip_patterns_excludes_blog
+ patterns = @scraper_class.options[:skip_patterns]
+ assert_kind_of Array, patterns
+ assert patterns.any? { |p| 'blog/foo' =~ p }
+ end
+
+ def test_skip_patterns_excludes_deploy
+ patterns = @scraper_class.options[:skip_patterns]
+ assert patterns.any? { |p| 'deploy/docs' =~ p }
+ end
+
+ def test_attribution_present
+ attribution = @scraper_class.options[:attribution]
+ assert_kind_of String, attribution
+ refute_empty attribution.strip
+ assert_match(/Deno/, attribution)
+ end
+
+ def test_has_versions
+ versions = @scraper_class.versions
+ refute_nil versions
+ refute_empty versions
+ end
+
+ def test_module_categories_frozen
+ categories = Docs::Deno::MODULE_CATEGORIES
+ assert categories.frozen?
+ assert_kind_of Hash, categories
+ assert categories.key?('Deno')
+ assert categories.key?('Web APIs')
+ assert categories.key?('File System')
+ assert categories.key?('Network')
+ end
+
+ def test_inherits_from_url_scraper
+ assert @scraper_class < Docs::UrlScraper
+ end
+end