#!/usr/bin/env ruby
require 'open-uri'
require 'open3'
require 'yaml'
require 'fileutils'
require 'set'
require 'json'
require 'tsort'
Dir.chdir(File.dirname(__dir__))
BASE_URL = 'https://raw.githubusercontent.com/github/linguist/master'
def camelize(str)
str.split(/[-_\.]/).map { |a| a.capitalize }.join("")
end
def except(hash, *keys)
h = hash.dup
keys.each { |k| h.delete(k) }
h
end
def load_data()
packages = Hash[YAML.load_stream(File.read('packages.yaml'))
.group_by { |a| a.fetch("name") }
.map { |a, b| [a, b.first] }]
deps = Hash.new { |h, k| h[k] = [] }
for package in packages.values
for name in [package.fetch("after", [])].flatten
packages[name] or raise "#{package["name"]} depends on unknown package: #{name}"
deps[name] << package["name"]
end
end
each_node = lambda {|&b| packages.keys.each(&b) }
each_child = lambda {|n, &b| deps[n].each(&b) }
languages = load_languages
# Reason can have ocaml as interpreter but let's not depend on it...
languages["Reason"]["interpreters"] -= ["ocaml"]
packages = TSort.tsort(each_node, each_child).map { |a| packages[a] }
for package in packages
for filetype in package["filetypes"]
if filetype["linguist"]
if filetype["extensions"]
raise "#{package["name"]} #{filetype["name"]}: extensions can't be set when linguist is defined"
end
if filetype["filenames"]
raise "#{package["name"]} #{filetype["name"]}: filenames can't be set when linguist is defined"
end
linguist = languages.fetch(filetype["linguist"])
filetype["extensions"] = (linguist["extensions"] || []).map { |e| e[1..-1] } |
filetype.fetch("extra_extensions", []) -
filetype.fetch("ignored_extensions", []).uniq
filetype["filenames"] = (linguist["filenames"] || []) |
filetype.fetch("extra_filenames", []) -
filetype.fetch("ignored_filenames", []).uniq
filetype["interpreters"] = (linguist["interpreters"] || []) |
filetype.fetch("extra_interpreters", []) -
filetype.fetch("ignored_interpreters", []).uniq
else
filetype["extensions"] ||= []
filetype["filenames"] ||= []
filetype["interpreters"] ||= []
end
end
end
heuristics = YAML.load_stream(File.read('heuristics.yaml'))
[packages, transform_patterns(heuristics)]
end
def parallel(*procs)
threads = procs.map { |p| Thread.new { method(p).call } }
threads.map(&:join).map(&:value)
end
def read_strings(data, keys, print=false)
if data.is_a?(Hash)
data.flat_map do |key, val|
read_strings(val, keys, keys.include?(key))
end
elsif data.is_a?(Array)
data.flat_map { |d| read_strings(d, keys, print) }
elsif data.is_a?(String)
print ? [data] : []
else
[]
end
end
def patterns_to_vim_patterns(patterns)
stdin, stdout, stderr = Open3.popen3('vim', '-V', '--clean', '/dev/stdin', '-es', '-c', "echo expand('%:p:h') | source #{__dir__}/eregex.vim", '-c', "for line in range(0, line('$')) | call setline(line, ExtendedRegex2VimRegex(getline(line))) | endfor", '-c', ':wq! /dev/stdout', chdir: __dir__)
stdin.write(patterns.join("\n"))
stdin.close
stdout.readlines.map(&:chomp).map do |r|
r.gsub('\b', '\(\<\|\>\)')
end
end
def each_hash(data, &block)
if data.is_a?(Hash)
yield data
data.each do |key, val|
each_hash(val, &block)
end
elsif data.is_a?(Array)
data.map { |d| each_hash(d, &block) }
end
end
def transform_patterns(heuristics)
patterns = []
each_hash(heuristics) do |h|
if h.has_key?("pattern")
patterns << h["pattern"]
end
end
patterns_mapping = Hash[patterns.zip(patterns_to_vim_patterns(patterns))]
each_hash(heuristics) do |h|
if h.has_key?("pattern")
h["pattern"] = patterns_mapping.fetch(h["pattern"])
end
end
heuristics
end
def load_languages
url = "#{BASE_URL}/lib/linguist/languages.yml"
data = URI.open(url) { |io| YAML.load(io.read) }
end
def parse_remote(remote)
match = remote.match(/(?<repo>[^@:]+)(?:@(?<branch>[^:]+))?(?::(?<path>.*))?/)
[match[:repo], match[:branch] || "master", match[:path]]
end
def copy_file(package, src, dest)
FileUtils.mkdir_p(File.dirname(dest))
name = package.fetch("name")
open(src, "r") do |input|
open(dest, "a+") do |output|
if name == "jsx"
output << "if !exists('g:polyglot_disabled') || (index(g:polyglot_disabled, 'javascript') == -1 && index(g:polyglot_disabled, 'jsx') == -1)\n\n"
else
output << "if !exists('g:polyglot_disabled') || index(g:polyglot_disabled, '#{name}') == -1\n\n"
end
IO.copy_stream(input, output)
output << "\nendif\n"
end
end
end
def download(packages)
packages.map { |p| p["remote"] or raise "No remote for: " + p["name"] }.uniq.each_slice(20) do |remotes|
remotes.map do |remote|
Thread.new do
repo, branch, path = parse_remote(remote)
dir = "tmp/" + repo
unless File.exist?(dir)
FileUtils.mkdir_p(dir)
url = "https://codeload.github.com/#{repo}/tar.gz/#{branch}"
`curl --silent -fL #{url} | tar -zx -C "#{dir}" --strip 1`
end
progress
end
end.map(&:join)
end
end
$i = 0
LYRICS = "Never gonna give you up. Never gonna let you down. " +
"Never gonna run around and desert you. " +
"Never gonna make you cry. Never gonna say goodbye. " +
"Never gonna tell a lie and hurt you."
$mutex = Mutex.new
def progress
$mutex.synchronize do
$stdout.write(LYRICS[$i] || ".")
$i += 1
end
end
def indent(str, amount)
str.gsub(/^(?!$)/, " " * amount).gsub(/\s+$/, "").gsub(/^ +\n/, "")
end
def pattern_to_condition(rule)
if rule.has_key?("or")
return rule["or"].map { |p| pattern_to_condition(p) }.join(" || ")
end
if rule.has_key?("or")
return rule["and"].map { |p| pattern_to_condition(p) }.join(" && ")
end
operator = (rule["negative"] ? "!" : "=") + "~" + (rule["ignore_case"] ? "?" : "#")
return "line #{operator} '#{rule["pattern"]}'"
end
def rules_to_code(rules)
output = ""
vars = []
each_hash(rules) do |h|
if h.has_key?("set")
vars << h["set"]
end
end
if vars.size > 0
output << vars.uniq.sort.map do |var|
"let #{var} = 0"
end.join("\n") + "\n"
end
output << rule_to_code(rules)
end
def rule_to_code(rule)
if rule.has_key?("lines")
if rule["lines"] == 1
return <<~EOS
let line = getline(1)
#{indent(rule_to_code(except(rule, "lines")), 0)}
EOS
else
return <<~EOS
for lnum in range(1, min([line("$"), #{rule["lines"]}]))
let line = getline(lnum)
#{indent(rule_to_code(except(rule, "lines")), 2)}
endfor
EOS
end
end
if rule.has_key?("rules")
return rule["rules"].map { |r| indent(rule_to_code(r), 0) }.join("\n")
end
if rule.has_key?("pattern") || rule.has_key?("or") || rule.has_key?("and")
return <<~EOS
if #{pattern_to_condition(rule)}
#{indent(rule_to_code(except(rule, "pattern", "or", "and", "ignore_case", "negative")), 2)}
endif
EOS
end
if rule.has_key?("if_set")
return <<~EOS
if #{rule["negative"] ? "!" : ""}#{rule["if_set"]}
#{indent(rule_to_code(except(rule, "if_set", "negative")), 2)}
endif
EOS
end
if rule.has_key?("set")
return <<~EOS
let #{rule["set"]} = 1
#{indent(rule_to_code(except(rule, "set")), 0)}
EOS
end
if (rule.keys - ["filetype", "override", "set"]).size > 0
raise "Unknown rule: #{JSON.generate(rule)}"
end
if rule.has_key?("override")
return <<~EOS
if exists("#{rule["override"]}")
exe "setf " . #{rule["override"]} | return
endif
EOS
end
if rule.has_key?("filetype")
return "setf #{rule["filetype"]} | return"
end
return ""
end
def extract(packages)
all_dirs = %w(syntax indent doc compiler autoload ftplugin ctags extras after)
default_dirs = %w(
syntax indent doc compiler autoload ftplugin ctags extras
after/syntax after/indent after/ftplugin
)
FileUtils.rm_rf(all_dirs)
output = []
packages.map do |package|
repo, branch, path = parse_remote(package["remote"])
dir = "tmp/" + repo
dirs = package.fetch("dirs", default_dirs)
ignored_dirs = package.fetch("ignored_dirs", [])
if ignored_dirs.size > 0
dirs = dirs.reject { |d| ignored_dirs.any? { |id| d.start_with?(id) } }
end
dirs |= package.fetch("extra_dirs", [])
for subdir in dirs
subtree = "#{dir}/#{path ? path + "/" : ""}"
subpath = "#{subtree}#{subdir}"
if FileTest.directory?(subpath)
if repo == "vim/vim" && (["glob", "globs"] & package.keys).size == 0
raise "Package from vim/vim should define glob or globs: #{package["name"]}"
end
glob = package.fetch("glob", package.fetch('globs', '**/*.{vim,ctags,vital,txt}'))
Dir.glob("#{subdir}/#{glob}", base: subtree).each do |p|
next unless File.file?("#{subtree}/#{p}")
if p.include?("samba")
raise package["name"]
end
copy_file(package, "#{subtree}/#{p}", p)
end
elsif File.exist?(subpath)
copy_file(package, subpath, subdir)
end
end
if branch != "master" || path
if path
output << "- [#{package["name"]}](https://github.com/#{repo}/tree/#{branch}/#{path})"
else
output << "- [#{package["name"]}](https://github.com/#{repo}/tree/#{branch})"
end
else
output << "- [#{package["name"]}](https://github.com/#{repo})"
end
progress
end
readme = File.read('README.md')
readme.gsub!(
%r{(?<=<!--Package Count-->).*?(?=<!--/Package Count-->)},
output.size.to_s
)
readme.gsub!(
%r{(?<=<!--Language Packs-->).*?(?=<!--/Language Packs-->)}m,
"\n" + output.sort.join("\n") + "\n"
)
File.write('README.md', readme)
end
def generate_ftdetect(packages, heuristics)
output = <<~EOS
" don't spam the user when Vim is started in Vi compatibility mode
let s:cpo_save = &cpo
set cpo&vim
" Disable all native vim ftdetect
if exists('g:polyglot_test')
autocmd!
endif
let s:disabled_packages = {}
if exists('g:polyglot_disabled')
for pkg in g:polyglot_disabled
let s:disabled_packages[pkg] = 1
endfor
endif
function! s:SetDefault(name, value)
if !exists(a:name)
let {a:name} = a:value
endif
endfunction
call s:SetDefault('g:markdown_enable_spell_checking', 0)
call s:SetDefault('g:markdown_enable_input_abbreviations', 0)
call s:SetDefault('g:markdown_enable_mappings', 0)
" Enable jsx syntax by default
call s:SetDefault('g:jsx_ext_required', 0)
" Needed for sql highlighting
call s:SetDefault('g:javascript_sql_dialect', 'sql')
" Make csv loading faster
call s:SetDefault('g:csv_start', 1)
call s:SetDefault('g:csv_end', 2)
" Disable json concealing by default
call s:SetDefault('g:vim_json_syntax_conceal', 0)
call s:SetDefault('g:filetype_euphoria', 'elixir')
if !exists('g:python_highlight_all')
call s:SetDefault('g:python_highlight_builtins', 1)
call s:SetDefault('g:python_highlight_builtin_objs', 1)
call s:SetDefault('g:python_highlight_builtin_types', 1)
call s:SetDefault('g:python_highlight_builtin_funcs', 1)
call s:SetDefault('g:python_highlight_builtin_funcs_kwarg', 1)
call s:SetDefault('g:python_highlight_exceptions', 1)
call s:SetDefault('g:python_highlight_string_formatting', 1)
call s:SetDefault('g:python_highlight_string_format', 1)
call s:SetDefault('g:python_highlight_string_templates', 1)
call s:SetDefault('g:python_highlight_indent_errors', 1)
call s:SetDefault('g:python_highlight_space_errors', 1)
call s:SetDefault('g:python_highlight_doctests', 1)
call s:SetDefault('g:python_highlight_func_calls', 1)
call s:SetDefault('g:python_highlight_class_vars', 1)
call s:SetDefault('g:python_highlight_operators', 1)
call s:SetDefault('g:python_highlight_file_headers_as_comments', 1)
call s:SetDefault('g:python_slow_sync', 1)
endif
EOS
extensions = Hash.new { |h, k| h[k] = [] }
for package in packages
for filetype in package["filetypes"]
for ext in filetype["extensions"]
extensions[ext] << filetype["name"]
end
end
end
ambiguous_extensions = extensions
.select { |a, b| b.uniq.size > 1 }.keys.sort
expected_filetypes = detect_filetypes
for package in packages
name = package.fetch("name")
output << "if !has_key(s:disabled_packages, '#{name}')\n"
filetypes = package["filetypes"] or raise "Unknown filetype for: #{package["name"]}"
package_heuristics = []
for filetype in filetypes
name = filetype.fetch("name")
syntax = filetype["syntax"] ? " | set syntax=#{filetype["syntax"]}" : ""
set_command = "setf #{name}"
if filetype["syntax"]
set_command = "if !did_filetype() | set ft=#{name} syntax=#{filetype["syntax"]} | endif"
end
if filetype["custom_set"]
set_command = filetype["custom_set"]
end
extensions = filetype["extensions"]
filenames = filetype["filenames"]
if expected_filetypes[name] && !filetype["syntax"]
for e in expected_filetypes.fetch(name)[:extensions] - extensions - expand_all(filetype.fetch("ignored_extensions", []))
puts "Probable missing extension for #{name}: #{e}"
end
for e in expected_filetypes.fetch(name)[:filenames] - expand_all(filenames).flat_map { |e| [e, e.gsub(/^\./, '')] } - expand_all(filetype.fetch("ignored_filenames", [])) - ['*']
puts "Probable missing filename for #{name}: #{e}"
end
end
for extension in extensions.sort
outer_filetype = filetype["outer_filetype"]
if outer_filetype
output << " au BufNewFile *.*.#{extension} execute \"do BufNewFile filetypedetect \" . expand(\"<afile>:r\") | #{outer_filetype}\n"
output << " au BufReadPre *.*.#{extension} execute \"do BufRead filetypedetect \" . expand(\"<afile>:r\") | #{outer_filetype}\n"
end
heuristic = heuristics.find { |h| h["extensions"].include?(extension) }
if heuristic
package_heuristics << heuristic
else
# if ambiguous_extensions.include?(extension)
# puts "Ambiguous extension without heuristic: #{extension} => #{filetype["name"]}"
# end
#
output << " au BufNewFile,BufRead *.#{extension} #{set_command}\n"
end
end
for filename in filenames.sort
if filename[0] == "."
filename = "{.,}" + filename[1..]
end
output << " au BufNewFile,BufRead #{filename} #{set_command}\n"
end
end
for heuristic in package_heuristics.uniq
extensions = heuristic["extensions"].map { |e| "*.#{e}" }
output << " au! BufNewFile,BufRead #{extensions.join(",")} call polyglot#Detect#{camelize(heuristic["extensions"].first)}Filetype()\n"
end
output << "endif\n\n"
end
output << <<~EOS
au BufNewFile,BufRead,StdinReadPost *
\\ if !did_filetype() && expand("<amatch>") !~ g:ft_ignore_pat
\\ | call polyglot#Heuristics() | endif
" restore Vi compatibility settings
let &cpo = s:cpo_save
unlet s:cpo_save
EOS
File.write('ftdetect/polyglot.vim', output)
output = <<~EOS
" Line continuation is used here, remove 'C' from 'cpoptions'
let s:cpo_save = &cpo
set cpo&vim
func! polyglot#Heuristics()
" Try to detect filetype from shebang
let l:filetype = polyglot#Shebang()
if l:filetype != ""
exec "setf " . l:filetype
return
endif
endfunc
let s:interpreters = {
EOS
for filetype in packages.flat_map { |p| p.fetch("filetypes", []) }.sort_by { |a| a["name"] }
for interpreter in filetype["interpreters"]
output << " \\ '#{interpreter}': '#{filetype["name"]}',\n"
end
end
output << <<~EOS
\\ }
let s:r_hashbang = '^#!\\s*\\(\\S\\+\\)\\s*\\(.*\\)\\s*'
let s:r_envflag = '%(\\S\\+=\\S\\+\\|-[iS]\\|--ignore-environment\\|--split-string\\)'
let s:r_env = '^\\%(\\' . s:r_envflag . '\\s\\+\\)*\\(\\S\\+\\)'
func! polyglot#Shebang()
let l:line1 = getline(1)
if l:line1 !~# "^#!"
return
endif
let l:pathrest = matchlist(l:line1, s:r_hashbang)
if len(l:pathrest) == 0
return
endif
let [_, l:path, l:rest; __] = l:pathrest
let l:script = split(l:path, "/")[-1]
if l:script == "env"
let l:argspath = matchlist(l:rest, s:r_env)
if len(l:argspath) == 0
return
endif
let l:script = l:argspath[1]
endif
if has_key(s:interpreters, l:script)
return s:interpreters[l:script]
endif
for interpreter in keys(s:interpreters)
if l:script =~# '^' . interpreter
return s:interpreters[interpreter]
endif
endfor
endfunc
EOS
for heuristic in heuristics
output << <<~EOS
func! polyglot#Detect#{camelize(heuristic["extensions"].first)}Filetype()
#{indent(rules_to_code(heuristic), 2)}
endfunc
EOS
end
output << <<~EOS
" Restore 'cpoptions'
let &cpo = s:cpo_save
unlet s:cpo_save
EOS
File.write('autoload/polyglot.vim', output)
end
def generate_tests(packages)
output = <<~EOS
function! TestFiletype(filetype)
try
enew
exec 'set ft=' . a:filetype
catch
echo 'Error loading filetype ' . a:filetype . ':'
echo v:exception
echo v:throwpoint
exec ':cq!'
endtry
endfunction
EOS
for package in packages
for filetype in package.fetch("filetypes", [])
output << "call TestFiletype('#{filetype["name"]}')\n"
end
end
File.write('scripts/test_filetypes.vim', output)
end
def brace_expansion(s)
r=1 # Dummy value to forward-declare the parse function `r`
t=->x{ # Function to parse a bracket block
x=x[0].gsub(/^{(.*)}$/){$1} # Remove outer brackets if both are present
# x[0] is required because of quirks in the `scan` function
x=x.scan(/(({(\g<1>|,)*}|[^,{}]|(?<=,|^)(?=,|$))+)/)
# Regex black magic: collect elements of outer bracket
x.map{|i|i=i[0];i[?{]?r[i]:i}.flatten # For each element with brackets, run parse function
}
r=->x{ # Function to parse bracket expansions a{b,c}{d,e}
i=x.scan(/({(\g<1>)*}|[^{} ]+)/) # Regex black magic: scan for adjacent sets of brackets
i=i.map(&t) # Map all elements against the bracket parser function `t`
i.shift.product(*i).map &:join # Combine the adjacent sets with cartesian product and join them together
}
s.split.map(&r).flatten
end
def square_expansion(s)
return [s] unless s.include?('[')
s.scan(/(\[[^\]]+\]|[^\[]+)/).map { |x| x[0] }
.map { |x| x[0] == "[" ? x[1..-2].split("") : [x] }
.reduce(&:product).map(&:flatten).map(&:join)
end
def comma_expanson(s)
s.scan(/{[^{]+}|[^{]+/).map { |a| a[0] == "{" ? a : a.split(",", -1) }.reduce([]) do |a, b|
a.size > 0 ?
(b.is_a?(String) ?
a[0..-2] + [a[-1] + b] :
a[0..-2] + [a[-1] + b[0]] + b[1..-1]) :
[b].flatten
end
end
def expand_all(pattern)
if pattern.is_a?(Array)
return pattern.flat_map { |p| expand_all(p) }
end
comma_expanson(pattern).flat_map do |e|
brace_expansion(e).flat_map do |e2|
square_expansion(e2)
end
end
end
def detect_filetypes
filetypes = Dir['tmp/**/ftdetect/*.vim'].flat_map do |file|
contents = File.read(file).gsub(/^\s*au(tocmd)?!?\s*$/, '')
results = contents.scan(/^\s*(?:au!|au|au[^g][^ ]*) +(?:\S+)\s+(\S+)[\s\\]+([^\n]+)/)
results = results.map do |a, b|
[
a,
b.gsub(/call (?:s:setf|s:StarSetf)\('([^']+)'\)/i, 'setf \1')
.gsub(/set(?:local)?\s+(?:ft|filetype)=(\S+)/, 'setf \1')
.gsub(/setf\S*/, 'setf')
.gsub(/.*setf\s+(\S+).*/, 'setf \1')
]
end.select { |a, b| b.match(/setf \S+/) }.map { |a, b| [a, b.split(" ")[1]] }
results
end
Hash[filetypes.flat_map do |ext, filetype|
expand_all(ext).map { |e| [filetype, e] }
end.group_by { |a, b| a }.map { |a, b| [a, b.map { |c, d| d }] }.map { |a, b|
[a, {
extensions: b.select { |x| x.match(/^\*\.[^\/]+$/) }.map { |a| a.strip[2..] },
filenames: expand_all(b.select { |x| !x.match(/^\*\.[^\/]+$/) })
}]
}]
end
def generate_plugins(packages)
FileUtils.mkdir_p('autoload/polyglot')
output = "let s:globs = {\n"
patterns = Hash.new { |h, k| h[k] = [] }
for package in packages
for filetype in package["filetypes"]
extensions = (filetype["extensions"] || []).map { |e| "*.#{e}" }
files = (filetype["filenames"] || []).reject { |e| e.match(/\*\*|\//) }
patterns[filetype["name"]].concat(extensions)
patterns[filetype["name"]].concat(files)
end
end
for filetype in patterns.keys.sort
output << " \\ '#{filetype}': '{#{patterns[filetype].uniq.join(",")},}',\n"
end
output << " \\}\n\n"
output << <<~EOS
func! sleuth#GlobForFiletype(type)
return get(s:globs, a:type, '')
endfunc
EOS
File.write('autoload/sleuth.vim', output)
end
if __FILE__ == $0
if !ENV["DEV"]
FileUtils.rm_rf("tmp")
end
packages, heuristics = load_data()
download(packages)
extract(packages)
generate_ftdetect(packages, heuristics)
generate_plugins(packages)
generate_tests(packages)
puts(" Bye! Have a wonderful time!")
if !ENV["DEV"]
FileUtils.rm_rf("tmp")
end
end