Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Auto detect text files and perform LF normalization
* text=auto

# Explicitly declare text files
*.md text diff=markdown
*.txt text
*.csv text
*.yml text
*.yaml text
*.json text
*.xml text
*.html text diff=html
*.css text diff=css

# Denote binary files
*.png binary
*.jpg binary
*.jpeg binary
*.gif binary
*.ico binary
*.pdf binary
*.zip binary
*.gz binary
*.tar binary
24 changes: 6 additions & 18 deletions lib/docs/filters/deno/clean_html.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,19 @@ module Docs
class Deno
class CleanHtmlFilter < Filter
def call
if result[:path].start_with?('api/deno/')
@doc = at_css('main[id!="content"] article', 'main[id!="content"]')
else
@doc = at_css('main article .markdown-body')
end
@doc = at_css('main, article, [role="main"], .markdown-body') || doc

if at_css('.text-2xl')
doc.prepend_child at_css('.text-2xl').remove
at_css('.text-2xl').name = 'h1'
end
css('nav, footer, .sidebar, .breadcrumb, .toc,
.page-nav, .edit-link, .header-anchor, script, style').remove

css('code').each do |node|
css('pre > code').each do |node|
if node['class']
lang = node['class'][/language-(\w+)/, 1]
node.parent['data-language'] = lang if lang
end
node['data-language'] = lang || 'ts'
node.remove_attribute('class')
if node.parent.name == 'div'
node.content = node.content.strip
end
node.parent['data-language'] ||= 'typescript'
end

css('a.header-anchor').remove()
css('.breadcrumbs').remove()

doc
end
end
Expand Down
20 changes: 7 additions & 13 deletions lib/docs/filters/deno/entries.rb
Original file line number Diff line number Diff line change
@@ -1,25 +1,19 @@
module Docs
class Deno
class EntriesFilter < Docs::EntriesFilter
TYPES_BY_PATH = {
'api' => 'API',
'runtime' => 'Runtime',
}

def get_name
if result[:path].start_with?('api/deno/')
at_css('main[id!="content"]')['id'][/\Asymbol_([.\w]+)/, 1]
else
at_css('main article h1').content
end
name = at_css('h1')
name ? name.content.strip : slug.split('/').last
end

def get_type
if result[:path].start_with?('api/deno/')
'API'
elsif result[:path].start_with?('runtime/reference/cli')
'CLI'
else
at_css('main article nav ul :first span').content
end
TYPES_BY_PATH[slug.split('/').first] || 'Guide'
end

end
end
end
193 changes: 175 additions & 18 deletions lib/docs/scrapers/deno.rb
Original file line number Diff line number Diff line change
@@ -1,42 +1,199 @@
module Docs
class Deno < UrlScraper
self.name = 'Deno'
self.type = 'simple'
self.type = 'deno'
self.base_url = 'https://docs.deno.com/'
self.root_path = 'api/'
self.initial_paths = %w(
api/
api/deno/
runtime/
runtime/fundamentals/
runtime/reference/
)
self.links = {
home: 'https://deno.com/',
code: 'https://github.com/denoland/deno'
}

# https://github.com/denoland/manual/blob/main/LICENSE
# https://github.com/denoland/deno/blob/main/LICENSE.md
html_filters.push 'deno/clean_html', 'deno/entries'

options[:root_title] = 'Deno'
options[:title] = false
options[:follow_links] = true
options[:only_patterns] = [
/\Aapi\//,
/\Aruntime\//,
]
options[:skip_patterns] = [
/\Ablog\//,
/\Adeploy\//,
/\Asubhosting\//,
]

options[:attribution] = <<-HTML
&copy; 20182025 the Deno authors<br>
&copy; 2018&ndash;2025 the Deno authors<br>
Licensed under the MIT License.
HTML


html_filters.push 'deno/entries', 'deno/clean_html'
# ── Versions ──────────────────────────────────────────────────────

version '2' do
self.release = '2.4.4'
self.base_url = 'https://docs.deno.com/'
self.root_path = 'runtime'
options[:only_patterns] = [/\Aruntime/, /\Aapi\/deno\/~/, /\Adeploy/, /\Asubhosting/]
options[:skip_patterns] = [
/\Aruntime\/manual/,
/\Aapi\/deno\/.+\.prototype\z/, # all prototype pages get redirected to the main page
/\Aapi\/deno\/~\/Deno\.jupyter\.MediaBundle.+/, # docs unavailable
/\Aapi\/deno\/~\/Deno\.OpMetrics/, # deprecated in deno 2
]
options[:trailing_slash] = false
self.release = '2.3.1'
end

version '1' do
self.release = '1.27.0'
self.release = '1.46.3'
self.base_url = 'https://docs.deno.com/api/'
end

# ── Latest version lookup ─────────────────────────────────────────

def get_latest_version(opts)
get_latest_github_release('denoland', 'deno', opts)
end

private

# ── Module categorisation ─────────────────────────────────────────

MODULE_CATEGORIES = {
'Deno' => %w[Deno],
'Web APIs' => %w[fetch Request Response Headers URL URLSearchParams
FormData Blob File ReadableStream WritableStream
TransformStream TextEncoder TextDecoder
WebSocket EventSource AbortController AbortSignal
crypto CryptoKey SubtleCrypto],
'I/O' => %w[open read write close seek],
'File System' => %w[readFile writeFile readDir mkdir remove rename
stat lstat realPath readLink symlink link
truncate copyFile chmod chown],
'Network' => %w[listen connect serve serveHttp
listenTls connectTls],
'Subprocess' => %w[run Command ChildProcess],
'Testing' => %w[test bench],
'Permissions' => %w[permissions],
}.freeze

def categorize_module(name)
MODULE_CATEGORIES.each do |category, modules|
return category if modules.any? { |m| name.include?(m) }
end
'Other'
end

# ── Page parsing ──────────────────────────────────────────────────

def parse_page(response)
doc = Nokogiri::HTML.parse(response.body)
return nil if doc.at_css('meta[http-equiv="refresh"]')

content = doc.at_css('main, article, [role="main"], .markdown-body')
return nil unless content

# Remove navigation, sidebars, and footers
content.css('nav, footer, .sidebar, .breadcrumb, .toc,
.page-nav, .edit-link, .header-anchor').each(&:remove)

# Remove script and style tags
content.css('script, style').each(&:remove)

content
end

# ── Link resolution ───────────────────────────────────────────────

def resolve_links(content, base_url)
content.css('a[href]').each do |link|
href = link['href']
next if href.nil? || href.empty?
next if href.start_with?('#')
next if href.match?(%r{\Ahttps?://}) && !href.start_with?(self.class.base_url)

begin
absolute = URI.join(base_url, href).to_s
link['href'] = absolute
rescue URI::InvalidURIError
# Leave malformed URIs as-is
end
end
content
end

# ── Code example extraction ───────────────────────────────────────

def extract_code_examples(content)
examples = []
content.css('pre > code, pre.highlight, .code-block').each_with_index do |block, idx|
lang = detect_language(block)
source = block.text.strip
next if source.empty?

examples << {
index: idx,
language: lang,
source: source,
lines: source.lines.count,
}
end
examples
end

def detect_language(code_node)
# Check class attribute for language hints
classes = (code_node['class'] || '').split
lang_class = classes.find { |c| c.start_with?('language-', 'lang-', 'highlight-') }
if lang_class
return lang_class.sub(/\A(?:language|lang|highlight)-/, '')
end

# Check data attributes
data_lang = code_node['data-language'] || code_node['data-lang']
return data_lang if data_lang

# Check parent element
parent = code_node.parent
if parent
parent_lang = parent['data-language'] || parent['data-lang']
return parent_lang if parent_lang

parent_classes = (parent['class'] || '').split
parent_lang_class = parent_classes.find { |c| c.start_with?('language-', 'lang-') }
if parent_lang_class
return parent_lang_class.sub(/\A(?:language|lang)-/, '')
end
end

'text'
end

# ── Version handling ──────────────────────────────────────────────

def version_url(version, path)
if version && !version.empty?
"#{self.class.base_url}#{path}@#{version}"
else
"#{self.class.base_url}#{path}"
end
end

def parse_version_from_url(url)
match = url.match(/@([\d.]+)/)
match ? match[1] : nil
end

def normalize_version(version_string)
return nil if version_string.nil? || version_string.empty?

# Strip leading 'v' if present
cleaned = version_string.sub(/\Av/, '')

# Validate semver-like format
parts = cleaned.split('.')
return nil unless parts.length.between?(1, 3)
return nil unless parts.all? { |p| p.match?(/\A\d+\z/) }

cleaned
end
end
end
2 changes: 1 addition & 1 deletion lib/tasks/sprites.thor
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ class SpritesCLI < Thor
scss_erb_files.each do |erb_path|
scss_path = erb_path.gsub('.erb', '')
File.open(scss_path, 'w') do |f|
f.write(ERB.new(File.open(erb_path).read).result)
f.write(ERB.new(File.read(erb_path)).result)
logger.info("Compiling #{erb_path} to #{scss_path}")
end
end
Expand Down
Loading