#!/usr/bin/env ruby require 'open-uri' require 'open3' require 'yaml' require 'fileutils' require 'set' require 'json' require 'tsort' Dir.chdir(File.dirname(__dir__)) BASE_URL = 'https://raw.githubusercontent.com/github/linguist/master' def camelize(str) str.split(/[-_\.]/).map { |a| a.capitalize }.join("") end def except(hash, *keys) h = hash.dup keys.each { |k| h.delete(k) } h end def load_data() packages = Hash[YAML.load_stream(File.read('packages.yaml')) .group_by { |a| a.fetch("name") } .map { |a, b| [a, b.first] }] deps = Hash.new { |h, k| h[k] = [] } for package in packages.values for name in [package.fetch("after", [])].flatten packages[name] or raise "#{package["name"]} depends on unknown package: #{name}" deps[name] << package["name"] end end puts deps["javascript"] each_node = lambda {|&b| packages.keys.each(&b) } each_child = lambda {|n, &b| deps[n].each(&b) } languages = load_languages # Reason can have ocaml as interpreter but let's not depend on it... languages["Reason"]["interpreters"] -= ["ocaml"] packages = TSort.tsort(each_node, each_child).map { |a| packages[a] } for package in packages for filetype in package["filetypes"] if filetype["linguist"] if filetype["extensions"] raise "#{package["name"]} #{filetype["name"]}: extensions can't be set when linguist is defined" end if filetype["filenames"] raise "#{package["name"]} #{filetype["name"]}: filenames can't be set when linguist is defined" end linguist = languages.fetch(filetype["linguist"]) filetype["extensions"] = (linguist["extensions"] || []).map { |e| e[1..-1] } | filetype.fetch("extra_extensions", []) extra_ignored_extensions = filetype.fetch("ignored_extensions", []) - filetype["extensions"] if extra_ignored_extensions.size > 0 raise StandardError.new("[#{filetype["name"]}]: Unnecessary ignored extensions: #{extra_ignored_extensions.join(", ")}") end filetype["extensions"] -= filetype.fetch("ignored_extensions", []) filetype["filenames"] = (( (linguist["filenames"] || []) | filetype.fetch("extra_filenames", []) ) - filetype.fetch("ignored_filenames", [])).uniq filetype["interpreters"] = (( (linguist["interpreters"] || []) | filetype.fetch("extra_interpreters", []) ) - filetype.fetch("ignored_interpreters", []).uniq) else filetype["extensions"] ||= [] filetype["filenames"] ||= [] filetype["interpreters"] ||= [] filetype.keys.each do |key| if key.start_with?("extra_") raise "[#{filetype["name"]}]: #{key} is not allowed if linguist is not used" end end end end end heuristics = YAML.load_stream(File.read('heuristics.yaml')) [packages, transform_patterns(heuristics)] end def parallel(*procs) threads = procs.map { |p| Thread.new { method(p).call } } threads.map(&:join).map(&:value) end def read_strings(data, keys, print=false) if data.is_a?(Hash) data.flat_map do |key, val| read_strings(val, keys, keys.include?(key)) end elsif data.is_a?(Array) data.flat_map { |d| read_strings(d, keys, print) } elsif data.is_a?(String) print ? [data] : [] else [] end end def patterns_to_vim_patterns(patterns) stdin, stdout, stderr = Open3.popen3('vim', '-V', '--clean', '/dev/stdin', '-es', '-c', "echo expand('%:p:h') | source #{__dir__}/eregex.vim", '-c', "for line in range(0, line('$')) | call setline(line, ExtendedRegex2VimRegex(getline(line))) | endfor", '-c', ':wq! /dev/stdout', chdir: __dir__) stdin.write(patterns.join("\n")) stdin.close stdout.readlines.map(&:chomp).map do |r| r.gsub('\b', '\(\<\|\>\)') end end def each_hash(data, &block) if data.is_a?(Hash) yield data data.each do |key, val| each_hash(val, &block) end elsif data.is_a?(Array) data.map { |d| each_hash(d, &block) } end end def transform_patterns(heuristics) patterns = [] each_hash(heuristics) do |h| if h.has_key?("pattern") patterns << h["pattern"] end end patterns_mapping = Hash[patterns.zip(patterns_to_vim_patterns(patterns))] each_hash(heuristics) do |h| if h.has_key?("pattern") h["pattern"] = patterns_mapping.fetch(h["pattern"]) end end heuristics end def load_languages url = "#{BASE_URL}/lib/linguist/languages.yml" data = URI.open(url) { |io| YAML.load(io.read) } end def parse_remote(remote) match = remote.match(/(?[^@:]+)(?:@(?[^:]+))?(?::(?.*))?/) dir = "tmp/" + match[:repo] + (match[:branch] ? "-#{match[:branch]}" : "") [match[:repo], match[:branch] || "master", match[:path], dir] end def copy_file(package, src, dest) FileUtils.mkdir_p(File.dirname(dest)) name = package.fetch("name") header = '" Polyglot metafile' if File.exist?(dest) meta_dest = dest new_dest = dest i = 0 while File.exist?(new_dest) i += 1 new_dest = "#{dest.gsub(/\.vim$/, '')}-#{i}.vim" end if File.read(dest).include?(header) dest = new_dest else FileUtils.mv(dest, new_dest) File.write(meta_dest, "#{header}\n") open(meta_dest, "a+") do |output| output << "source :h/#{File.basename(new_dest)}\n" end dest = "#{dest.gsub(/\.vim$/, '')}-#{i+1}.vim" end open(meta_dest, "a+") do |output| output << "source :h/#{File.basename(dest)}\n" end end open(src, "r") do |input| open(dest, "w") do |output| if name == "jsx" output << "if !exists('g:polyglot_disabled') || (index(g:polyglot_disabled, 'javascript') == -1 && index(g:polyglot_disabled, 'jsx') == -1)\n\n" else output << "if !exists('g:polyglot_disabled') || index(g:polyglot_disabled, '#{name}') == -1\n\n" end contents = File.read(input) contents.gsub!(' ..= ', ' .= ') output << contents output << "\nendif\n" end end end def download(packages) packages.map { |p| p["remote"] or raise "No remote for: " + p["name"] }.uniq.each_slice(20) do |remotes| remotes.map do |remote| Thread.new do repo, branch, path, dir = parse_remote(remote) unless File.exist?(dir) FileUtils.mkdir_p(dir) url = "https://codeload.github.com/#{repo}/tar.gz/#{branch}" `curl --silent -fL #{url} | tar -zx -C "#{dir}" --strip 1` end progress end end.map(&:join) end end $i = 0 LYRICS = "Never gonna give you up. Never gonna let you down. " + "Never gonna run around and desert you. " + "Never gonna make you cry. Never gonna say goodbye. " + "Never gonna tell a lie and hurt you." $mutex = Mutex.new def progress $mutex.synchronize do $stdout.write(LYRICS[$i] || ".") $i += 1 end end def indent(str, amount) str.gsub(/^(?!$)/, " " * amount).gsub(/\s+$/, "").gsub(/^ +\n/, "") end def pattern_to_condition(rule) if rule.has_key?("or") return rule["or"].map { |p| pattern_to_condition(p) }.join(" || ") end if rule.has_key?("or") return rule["and"].map { |p| pattern_to_condition(p) }.join(" && ") end operator = (rule["negative"] ? "!" : "=") + "~" + (rule["ignore_case"] ? "?" : "#") return "line #{operator} '#{rule["pattern"]}'" end def rules_to_code(rules) output = "" vars = [] each_hash(rules) do |h| if h.has_key?("set") vars << h["set"] end end if vars.size > 0 output << vars.uniq.sort.map do |var| "let #{var} = 0" end.join("\n") + "\n" end output << rule_to_code(rules) end def rule_to_code(rule) if rule.has_key?("lines") if rule["lines"] == 1 return <<~EOS let line = getline(1) #{indent(rule_to_code(except(rule, "lines")), 0)} EOS else return <<~EOS for lnum in range(1, min([line("$"), #{rule["lines"]}])) let line = getline(lnum) #{indent(rule_to_code(except(rule, "lines")), 2)} endfor EOS end end if rule.has_key?("pattern") || rule.has_key?("or") || rule.has_key?("and") return <<~EOS if #{pattern_to_condition(rule)} #{indent(rule_to_code(except(rule, "pattern", "or", "and", "ignore_case", "negative")), 2)} endif EOS end if rule.has_key?("rules") return rule["rules"].map { |r| indent(rule_to_code(r), 0) }.join("\n") end if rule.has_key?("if_set") return <<~EOS if #{rule["negative"] ? "!" : ""}#{rule["if_set"]} #{indent(rule_to_code(except(rule, "if_set", "negative")), 2)} endif EOS end if rule.has_key?("if_exists") return <<~EOS if #{rule["negative"] ? "!" : ""}exists("#{rule["if_exists"]}") #{indent(rule_to_code(except(rule, "if_exists", "negative")), 2)} endif EOS end if rule.has_key?("set") return <<~EOS let #{rule["set"]} = 1 #{indent(rule_to_code(except(rule, "set")), 0)} EOS end if (rule.keys - ["filetype", "override", "set"]).size > 0 raise "Unknown rule: #{JSON.generate(rule)}" end if rule.has_key?("override") return <<~EOS if exists("#{rule["override"]}") exe "setf " . #{rule["override"]} | return endif EOS end if rule.has_key?("filetype") return "setf #{rule["filetype"]} | return" end return "" end def extract(packages) all_dirs = %w(syntax indent doc compiler autoload ftplugin ctags extras after) default_dirs = %w( syntax indent doc compiler autoload ftplugin ctags extras after/syntax after/indent after/ftplugin ) FileUtils.rm_rf(all_dirs) output = [] # We need to reverse packages so they are included in proper order packages.reverse.map do |package| repo, branch, path, dir = parse_remote(package["remote"]) dirs = package.fetch("dirs", default_dirs) ignored_dirs = package.fetch("ignored_dirs", []) if ignored_dirs.size > 0 dirs = dirs.reject { |d| ignored_dirs.any? { |id| d.start_with?(id) } } end dirs |= package.fetch("extra_dirs", []) for subdir in dirs subtree = "#{dir}/#{path ? path + "/" : ""}" subpath = "#{subtree}#{subdir}" if FileTest.directory?(subpath) if repo == "vim/vim" && (["glob", "globs"] & package.keys).size == 0 raise "Package from vim/vim should define glob or globs: #{package["name"]}" end glob = package.fetch("glob", package.fetch('globs', '**/*.{vim,ctags,vital,txt}')) Dir.glob("#{subdir}/#{glob}", base: subtree).each do |p| next unless File.file?("#{subtree}/#{p}") if p.include?("samba") raise package["name"] end copy_file(package, "#{subtree}/#{p}", p) end elsif File.exist?(subpath) copy_file(package, subpath, subdir) end end if branch != "master" || path if path output << "- [#{package["name"]}](https://github.com/#{repo}/tree/#{branch}/#{path})" else output << "- [#{package["name"]}](https://github.com/#{repo}/tree/#{branch})" end else output << "- [#{package["name"]}](https://github.com/#{repo})" end progress end readme = File.read('README.md') readme.gsub!( %r{(?<=).*?(?=)}, output.size.to_s ) readme.gsub!( %r{(?<=).*?(?=)}m, "\n" + output.sort.join("\n") + "\n" ) File.write('README.md', readme) end def generate_ftdetect(packages, heuristics) output = "\n" extensions = Hash.new { |h, k| h[k] = [] } for package in packages for filetype in package["filetypes"] for ext in filetype["extensions"] extensions[ext] << filetype["name"] end end end ambiguous_extensions = extensions .select { |a, b| b.uniq.size > 1 }.keys.sort expected_filetypes = detect_filetypes('tmp/**/ftdetect/*.vim') native_filetypes = detect_filetypes('tmp/vim/vim/runtime/filetype.vim') native_extensions = native_filetypes.flat_map { |k, v| v["extensions"] } all_filetypes = Hash.new { |h, k| h[k] = { extensions: [], filenames: [] } } for k, v in expected_filetypes all_filetypes[k][:extensions].concat(v[:extensions]) all_filetypes[k][:filenames].concat(v[:filenames]) end for k, v in native_filetypes all_filetypes[k][:extensions].concat(v[:extensions]) all_filetypes[k][:filenames].concat(v[:filenames]) end for package in packages name = package.fetch("name") to_disable = [] for filetype in package["filetypes"] for extension in filetype["extensions"] if native_filetypes.has_key?(extension) to_disable << "*." + extension end end end if to_disable.size > 0 output << "if !has_key(s:disabled_packages, '#{name}')\n" output << " au! BufRead,BufNewFile #{to_disable.join(",")}\n" output << "endif\n\n" end end for package in packages filetypes = package["filetypes"] or raise "Unknown filetype for: #{package["name"]}" package_heuristics = [] autocommands = "" for filetype in filetypes name = filetype.fetch("name") syntax = filetype["syntax"] ? " | set syntax=#{filetype["syntax"]}" : "" set_command = "setf #{name}" if filetype["syntax"] set_command = "if !did_filetype() | set ft=#{name} syntax=#{filetype["syntax"]} | endif" end if filetype["custom_set"] set_command = filetype["custom_set"] end extensions = filetype["extensions"] filenames = filetype["filenames"] expected_extensions = (all_filetypes.has_key?(name) ? all_filetypes.fetch(name)[:extensions] : []) ignored_extensions = expand_all(filetype.fetch("ignored_extensions", [])) ignored_warnings = expand_all(filetype.fetch("ignored_warnings", [])) if all_filetypes[name] && !filetype["syntax"] for e in expected_extensions - extensions - ignored_extensions - ignored_warnings puts "Probable missing extension for #{name}: #{e}" end for e in all_filetypes.fetch(name)[:filenames] - expand_all(filenames).flat_map { |e| [e, e.gsub(/^\./, '')] } - expand_all(filetype.fetch("ignored_warnings", [])) - ['*'] puts "Probable missing filename for #{name}: #{e}" end end for extension in extensions.sort outer_filetype = filetype["outer_filetype"] if outer_filetype autocommands << " au BufNewFile *.*.#{extension} execute \"do BufNewFile filetypedetect \" . expand(\":r\") | #{outer_filetype}\n" autocommands << " au BufReadPre *.*.#{extension} execute \"do BufRead filetypedetect \" . expand(\":r\") | #{outer_filetype}\n" end heuristic = heuristics.find { |h| h["extensions"].include?(extension) } if heuristic package_heuristics << heuristic else # if ambiguous_extensions.include?(extension) # puts "Ambiguous extension without heuristic: #{extension} => #{filetype["name"]}" # end # autocommands << " au BufNewFile,BufRead *.#{extension} #{set_command}\n" end end for filename in filenames.sort if filename[0] == "." filename = "{.,}" + filename[1..] end if filename[-1] == "*" autocommands << " au BufNewFile,BufRead #{filename} call s:StarSetf('#{name}')\n" else autocommands << " au BufNewFile,BufRead #{filename} #{set_command}\n" end end end for heuristic in package_heuristics.uniq extensions = heuristic["extensions"].map { |e| "*.#{e}" } autocommands << " au! BufNewFile,BufRead #{extensions.join(",")} call polyglot#Detect#{camelize(heuristic["extensions"].first)}Filetype()\n" end if autocommands != "" output << "if !has_key(s:disabled_packages, '#{package["name"]}')\n" output << autocommands output << "endif\n\n" end end ftdetect = File.read('ftdetect/polyglot.vim') File.write('ftdetect/polyglot.vim', ftdetect.gsub(/(?<=" filetypes\n).*(?=\n" end filetypes)/m, output)) output = <<~EOS " Line continuation is used here, remove 'C' from 'cpoptions' let s:cpo_save = &cpo set cpo&vim func! polyglot#Heuristics() " Try to detect filetype from shebang let l:filetype = polyglot#Shebang() if l:filetype != "" exec "setf " . l:filetype return endif endfunc let s:interpreters = { EOS for filetype in packages.flat_map { |p| p.fetch("filetypes", []) }.sort_by { |a| a["name"] } for interpreter in filetype["interpreters"] output << " \\ '#{interpreter}': '#{filetype["name"]}',\n" end end output << <<~EOS \\ } let s:r_hashbang = '^#!\\s*\\(\\S\\+\\)\\s*\\(.*\\)\\s*' let s:r_envflag = '%(\\S\\+=\\S\\+\\|-[iS]\\|--ignore-environment\\|--split-string\\)' let s:r_env = '^\\%(\\' . s:r_envflag . '\\s\\+\\)*\\(\\S\\+\\)' func! polyglot#Shebang() let l:line1 = getline(1) if l:line1 !~# "^#!" return endif let l:pathrest = matchlist(l:line1, s:r_hashbang) if len(l:pathrest) == 0 return endif let [_, l:path, l:rest; __] = l:pathrest let l:script = split(l:path, "/")[-1] if l:script == "env" let l:argspath = matchlist(l:rest, s:r_env) if len(l:argspath) == 0 return endif let l:script = l:argspath[1] endif if has_key(s:interpreters, l:script) return s:interpreters[l:script] endif for interpreter in keys(s:interpreters) if l:script =~# '^' . interpreter return s:interpreters[interpreter] endif endfor endfunc EOS for heuristic in heuristics output << <<~EOS func! polyglot#Detect#{camelize(heuristic["extensions"].first)}Filetype() #{indent(rules_to_code(heuristic), 2)} endfunc EOS end output << <<~EOS " Restore 'cpoptions' let &cpo = s:cpo_save unlet s:cpo_save EOS File.write('autoload/polyglot.vim', output) end def generate_tests(packages) output = <<~EOS function! TestFiletype(filetype) try enew exec 'set ft=' . a:filetype catch echo 'Error loading filetype ' . a:filetype . ':' echo v:exception echo v:throwpoint exec ':cq!' endtry endfunction EOS for package in packages for filetype in package.fetch("filetypes", []) output << "call TestFiletype('#{filetype["name"]}')\n" end end File.write('scripts/test_filetypes.vim', output) end def brace_expansion(s) r=1 # Dummy value to forward-declare the parse function `r` t=->x{ # Function to parse a bracket block x=x[0].gsub(/^{(.*)}$/){$1} # Remove outer brackets if both are present # x[0] is required because of quirks in the `scan` function x=x.scan(/(({(\g<1>|,)*}|[^,{}]|(?<=,|^)(?=,|$))+)/) # Regex black magic: collect elements of outer bracket x.map{|i|i=i[0];i[?{]?r[i]:i}.flatten # For each element with brackets, run parse function } r=->x{ # Function to parse bracket expansions a{b,c}{d,e} i=x.scan(/({(\g<1>)*}|[^{} ]+)/) # Regex black magic: scan for adjacent sets of brackets i=i.map(&t) # Map all elements against the bracket parser function `t` i.shift.product(*i).map &:join # Combine the adjacent sets with cartesian product and join them together } s.split.map(&r).flatten end def square_expansion(s) return [s] unless s.include?('[') s.scan(/(\[[^\]]+\]|[^\[]+)/).map { |x| x[0] } .map { |x| x[0] == "[" ? x[1..-2].split("") : [x] } .reduce(&:product).map(&:flatten).map(&:join) end def comma_expanson(s) s.scan(/{[^{]+}|[^{]+/).map { |a| a[0] == "{" ? a : a.split(",", -1) }.reduce([]) do |a, b| a.size > 0 ? (b.is_a?(String) ? a[0..-2] + [a[-1] + b] : a[0..-2] + [a[-1] + b[0]] + b[1..-1]) : [b].flatten end end def expand_all(pattern) if pattern.is_a?(Array) return pattern.flat_map { |p| expand_all(p) } end comma_expanson(pattern).flat_map do |e| brace_expansion(e).flat_map do |e2| square_expansion(e2) end end end def detect_filetypes(glob) filetypes = Dir[glob].flat_map do |file| contents = File.read(file).gsub(/^\s*au(tocmd)?!?\s*$/, '') results = contents.scan(/^\s*(?:au!|au|au[^g][^ ]*) +(?:\S+)\s+(\S+)[\s\\]+([^\n]+)/) results = results.map do |a, b| [ a, b.gsub(/call (?:s:setf|s:StarSetf)\('([^']+)'\)/i, 'setf \1') .gsub(/set(?:local)?\s+(?:ft|filetype)=(\S+)/, 'setf \1') .gsub(/setf\S*/, 'setf') .gsub(/.*setf\s+(\S+).*/, 'setf \1') ] end.select { |a, b| b.match(/setf \S+/) }.map { |a, b| [a, b.split(" ")[1]] } results end Hash[filetypes.flat_map do |ext, filetype| expand_all(ext).map { |e| [filetype, e] } end.group_by { |a, b| a }.map { |a, b| [a, b.map { |c, d| d }] }.map { |a, b| [a, { extensions: b.select { |x| x.match(/^\*\.[^\/]+$/) }.map { |a| a.strip[2..] }, filenames: expand_all(b.select { |x| !x.match(/^\*\.[^\/]+$/) }) }] }] end def generate_plugins(packages) FileUtils.mkdir_p('autoload/polyglot') output = "let s:globs = {\n" patterns = Hash.new { |h, k| h[k] = [] } for package in packages for filetype in package["filetypes"] extensions = (filetype["extensions"] || []).map { |e| "*.#{e}" } files = (filetype["filenames"] || []).reject { |e| e.match(/\*\*|\//) || e[0] == "." } patterns[filetype["name"]].concat(extensions) patterns[filetype["name"]].concat(files) end end for filetype in patterns.keys.sort output << " \\ '#{filetype}': '#{patterns[filetype].uniq.join(",")}',\n" end output << " \\}\n\n" output << <<~EOS func! sleuth#GlobForFiletype(type) return get(s:globs, a:type, '') endfunc EOS File.write('autoload/sleuth.vim', output) end if __FILE__ == $0 if !ENV["DEV"] FileUtils.rm_rf("tmp") end packages, heuristics = load_data() download(packages) extract(packages) generate_ftdetect(packages, heuristics) generate_plugins(packages) generate_tests(packages) puts(" Bye! Have a wonderful time!") if !ENV["DEV"] FileUtils.rm_rf("tmp") end end