summaryrefslogblamecommitdiffstats
path: root/scripts/build
blob: 150f45c2b4b72bdea3a510546751fd8264e91f21 (plain) (tree)
1
2
3
4
5
6
7
8
9
                   
 



                   
             
              
               
 
                                
 
                                                                     
 





































































                                                                                                          
 



                                                           
 












                                                      
 





                                                                                                                                                                                                                                                                                                        

     
 









                                         
 
 






                                  
                                                                           


                                                         

       
            
   
 



                                                  
 



                                                                                
 
                                 
                                       

                              

                                
                      

                                                                                                                                                                                                           
                                                                                                        





                                   
 


                                                                                                          
                   






                                                                      



                   
   
 
      



                                                                  
 






                                    
 







































































































                                                                                     

             
                           
                                                        

                                              



                                                                             

                                           


                                                  




                                                                                          
                                                   


                                 

                                                  




                                           








                                                                                             

            
 
                                 
 



                                                              
 



                                                                 
 

                                 
 
                                           




                                                                      










                                        










































                                                                         

     






                                            
       



                                                

                                       
 
                         
                                

                                                             


                                                                                        

                           
                             
                                   
                                                                              
 
                                  
 









                                                                                                
 




                                                                                                                                
                                                                                                                                                                                         



                                                            


                                                   
                                                                                                                                              
                                                                                                                                           
           
 


                                                                               
            



                                                                                               

                                                                             









                                                                        




                                                                                                                                             



                         



                                                                     



                                       
 
                                             





















































































                                                                                                

   
                            














                                                          
                         





                                                            


















































                                                                                                                     
                                                             


















                                                                                         
                                                                      

      

   















                                         
#!/usr/bin/env ruby

require 'open-uri'
require 'open3'
require 'yaml'
require 'fileutils'
require 'set'
require 'json'
require 'tsort'

Dir.chdir(File.dirname(__dir__))

BASE_URL = 'https://raw.githubusercontent.com/github/linguist/master'

def camelize(str)
  str.split(/[-_\.]/).map { |a| a.capitalize }.join("")
end

def except(hash, *keys)
  h = hash.dup
  keys.each { |k| h.delete(k) }
  h
end

def load_data()
  packages = Hash[YAML.load_stream(File.read('packages.yaml'))
    .group_by { |a| a.fetch("name") }
    .map { |a, b| [a, b.first] }]

  deps = Hash.new { |h, k| h[k] = [] }

  for package in packages.values
    for name in [package.fetch("after", [])].flatten
      packages[name] or raise "#{package["name"]} depends on unknown package: #{name}"
      deps[name] << package["name"]
    end
  end

  each_node = lambda {|&b| packages.keys.each(&b) }
  each_child = lambda {|n, &b| deps[n].each(&b) }

  languages = load_languages

  # Reason can have ocaml as interpreter but let's not depend on it...
  languages["Reason"]["interpreters"] -= ["ocaml"]

  packages = TSort.tsort(each_node, each_child).map { |a| packages[a] }

  for package in packages
    for filetype in package["filetypes"]
      if filetype["linguist"]
        if filetype["extensions"]
          raise "#{package["name"]} #{filetype["name"]}: extensions can't be set when linguist is defined"
        end

        if filetype["filenames"]
          raise "#{package["name"]} #{filetype["name"]}: filenames can't be set when linguist is defined"
        end

        linguist = languages.fetch(filetype["linguist"])

        filetype["extensions"] = (linguist["extensions"] || []).map { |e| e[1..-1] } |
          filetype.fetch("extra_extensions", []) -
          filetype.fetch("ignored_extensions", []).uniq

        filetype["filenames"] = (linguist["filenames"] || []) |
          filetype.fetch("extra_filenames", []) -
          filetype.fetch("ignored_filenames", []).uniq
        
        filetype["interpreters"] = (linguist["interpreters"] || []) |
          filetype.fetch("extra_interpreters", []) -
          filetype.fetch("ignored_interpreters", []).uniq
      else
        filetype["extensions"] ||= []
        filetype["filenames"] ||= []
        filetype["interpreters"] ||= []
      end
    end
  end

  heuristics = YAML.load_stream(File.read('heuristics.yaml'))

  [packages, transform_patterns(heuristics)]
end

def parallel(*procs)
  threads = procs.map { |p| Thread.new { method(p).call } }
  threads.map(&:join).map(&:value)
end

def read_strings(data, keys, print=false)
  if data.is_a?(Hash)
    data.flat_map do |key, val|
      read_strings(val, keys, keys.include?(key))
    end
  elsif data.is_a?(Array)
    data.flat_map { |d| read_strings(d, keys, print) }
  elsif data.is_a?(String)
    print ? [data] : []
  else
    []
  end
end

def patterns_to_vim_patterns(patterns)
  stdin, stdout, stderr = Open3.popen3('vim', '-V', '--clean', '/dev/stdin', '-es', '-c', "echo expand('%:p:h') | source #{__dir__}/eregex.vim", '-c', "for line in range(0, line('$')) | call setline(line, ExtendedRegex2VimRegex(getline(line))) | endfor", '-c', ':wq! /dev/stdout', chdir: __dir__)
  stdin.write(patterns.join("\n"))
  stdin.close
  stdout.readlines.map(&:chomp).map do |r|
    r.gsub('\b', '\(\<\|\>\)')
  end
end

def each_hash(data, &block)
  if data.is_a?(Hash)
    yield data
    data.each do |key, val|
      each_hash(val, &block)
    end
  elsif data.is_a?(Array)
    data.map { |d| each_hash(d, &block) }
  end
end


def transform_patterns(heuristics)
  patterns = []
  each_hash(heuristics) do |h|
    if h.has_key?("pattern")
      patterns << h["pattern"]
    end
  end
  patterns_mapping = Hash[patterns.zip(patterns_to_vim_patterns(patterns))]
  each_hash(heuristics) do |h|
    if h.has_key?("pattern")
      h["pattern"] = patterns_mapping.fetch(h["pattern"])
    end
  end
  heuristics
end

def load_languages
  url = "#{BASE_URL}/lib/linguist/languages.yml"
  data = URI.open(url) { |io| YAML.load(io.read) }
end

def parse_remote(remote)
  match = remote.match(/(?<repo>[^@:]+)(?:@(?<branch>[^:]+))?(?::(?<path>.*))?/)
  [match[:repo], match[:branch] || "master", match[:path]]
end

def copy_file(package, src, dest)
  FileUtils.mkdir_p(File.dirname(dest))
  name = package.fetch("name")

  open(src, "r") do |input|
    open(dest, "a+") do |output|
      if name == "jsx"
        output << "if !exists('g:polyglot_disabled') || !(index(g:polyglot_disabled, 'typescript') != -1 || index(g:polyglot_disabled, 'typescript') != -1 || index(g:polyglot_disabled, 'jsx') != -1)\n\n"
      else
        output << "if !exists('g:polyglot_disabled') || index(g:polyglot_disabled, '#{name}') == -1\n\n"
      end
      IO.copy_stream(input, output)
      output << "\nendif\n"
    end
  end
end

def download(packages)
  packages.map { |p| p["remote"] or raise "No remote for: " + p["name"] }.uniq.each_slice(20) do |remotes|
    remotes.map do |remote|
      Thread.new do
        repo, branch, path = parse_remote(remote)
        dir = "tmp/" + repo
        unless File.exist?(dir)
          FileUtils.mkdir_p(dir)
          url = "https://codeload.github.com/#{repo}/tar.gz/#{branch}"
          `curl --silent -fL #{url} | tar -zx -C "#{dir}" --strip 1`
        end
        progress
      end
    end.map(&:join)
  end
end

$i = 0
LYRICS = "Never gonna give you up.  Never gonna let you down.  " +
         "Never gonna run around and desert you.  " +
         "Never gonna make you cry.  Never gonna say goodbye.  " +
         "Never gonna tell a lie and hurt you."

$mutex = Mutex.new
def progress
  $mutex.synchronize do
    $stdout.write(LYRICS[$i] || ".")
    $i += 1
  end
end

def indent(str, amount)
  str.gsub(/^(?!$)/, " " * amount).gsub(/\s+$/, "").gsub(/^ +\n/, "")
end

def pattern_to_condition(rule)
  operator = (rule["negative"] ? "!" : "=") + "~" + (rule["ignore_case"] ? "?" : "#")

  "line #{operator} '#{rule["pattern"]}'"
end

def rules_to_code(rules)
  output = ""

  vars = []
  each_hash(rules) do |h|
    if h.has_key?("set")
      vars << h["set"]
    end
  end


  if vars.size > 0
    output << vars.uniq.sort.map do |var|
      "let #{var} = 0"
    end.join("\n") + "\n"
  end

  output << rule_to_code(rules)
end

def rule_to_code(rule)
  if rule.has_key?("lines")
    if rule["lines"] == 1
      return <<~EOS
        let line = getline(1)

        #{indent(rule_to_code(except(rule, "lines")), 0)}
      EOS
    else
      return <<~EOS
        for lnum in range(1, min([line("$"), #{rule["lines"]}]))
          let line = getline(lnum)

        #{indent(rule_to_code(except(rule, "lines")), 2)}
        endfor
      EOS
    end
  end

  if rule.has_key?("rules")
    return rule["rules"].map { |r| indent(rule_to_code(r), 0) }.join("\n")
  end

  if rule.has_key?("pattern")
    return <<~EOS
      if #{pattern_to_condition(rule)}
      #{indent(rule_to_code(except(rule, "pattern", "ignore_case", "negative")), 2)}
      endif
    EOS
  end

  if rule.has_key?("if_set")
    return <<~EOS
      if #{rule["negative"] ? "!" : ""}#{rule["if_set"]}
      #{indent(rule_to_code(except(rule, "if_set", "negative")), 2)}
      endif
    EOS
  end

  if rule.has_key?("set")
    return <<~EOS
      let #{rule["set"]} = 1
      #{indent(rule_to_code(except(rule, "set")), 0)}
    EOS
  end

  if (rule.keys - ["filetype", "override", "set"]).size > 0
    raise "Unknown rule: #{JSON.generate(rule)}"
  end
  
  if rule.has_key?("override")
    return <<~EOS
      if exists("#{rule["override"]}")
        exe "setf " . #{rule["override"]} | return
      endif
    EOS
  end

  if rule.has_key?("filetype")
    return "setf #{rule["filetype"]} | return"
  end

  return ""
end

def extract(packages)
  all_dirs = %w(syntax indent doc compiler autoload ftplugin ctags extras after)

  default_dirs = %w(
    syntax indent doc compiler autoload ftplugin ctags extras
    after/syntax after/indent after/ftplugin
  )

  FileUtils.rm_rf(all_dirs)

  output = []
  packages.map do |package|
    repo, branch, path = parse_remote(package["remote"])
    dir = "tmp/" + repo
    dirs = package.fetch("dirs", default_dirs)
    ignored_dirs = package.fetch("ignored_dirs", [])
    if ignored_dirs.size > 0
      dirs = dirs.reject { |d| ignored_dirs.any? { |id| d.start_with?(id) } }
    end
    dirs |= package.fetch("extra_dirs", [])
    for subdir in dirs
      subtree = "#{dir}/#{path ? path + "/" : ""}"
      subpath = "#{subtree}#{subdir}"
      if FileTest.directory?(subpath)
        if repo == "vim/vim" && (["glob", "globs"] & package.keys).size == 0
          raise "Package from vim/vim should define glob or globs: #{package["name"]}"
        end
        glob = package.fetch("glob", package.fetch('globs', '**/*.{vim,ctags,vital,txt}'))
        Dir.glob("#{subdir}/#{glob}", base: subtree).each do |p|
          next unless File.file?("#{subtree}/#{p}")
          if p.include?("samba")
            raise package["name"]
          end
          copy_file(package, "#{subtree}/#{p}", p)
        end
      elsif File.exist?(subpath)
        copy_file(package, subpath, subdir)
      end
    end

    if branch != "master" || path
      if path
        output << "- [#{package["name"]}](https://github.com/#{repo}/tree/#{branch}/#{path})"
      else
        output << "- [#{package["name"]}](https://github.com/#{repo}/tree/#{branch})"
      end
    else
      output << "- [#{package["name"]}](https://github.com/#{repo})"
    end
    progress
  end

  readme = File.read('README.md')

  readme.gsub!(
    %r{(?<=<!--Package Count-->).*?(?=<!--/Package Count-->)},
    output.size.to_s
  )

  readme.gsub!(
    %r{(?<=<!--Language Packs-->).*?(?=<!--/Language Packs-->)}m,
    "\n" + output.sort.join("\n") + "\n"
  )

  File.write('README.md', readme)
end

def generate_ftdetect(packages, heuristics)
  output = <<~EOS
    " don't spam the user when Vim is started in Vi compatibility mode
    let s:cpo_save = &cpo
    set cpo&vim

    " Disable all native vim ftdetect
    if exists('g:polyglot_test')
      autocmd!
    endif

    let s:disabled_packages = {}

    if exists('g:polyglot_disabled')
      for pkg in g:polyglot_disabled
        let s:disabled_packages[pkg] = 1
      endfor
    endif

    function! s:SetDefault(name, value)
      if !exists(a:name)
        let {a:name} = a:value
      endif
    endfunction

    call s:SetDefault('g:markdown_enable_spell_checking', 0)
    call s:SetDefault('g:markdown_enable_input_abbreviations', 0)
    call s:SetDefault('g:markdown_enable_mappings', 0)

    " Enable jsx syntax by default
    call s:SetDefault('g:jsx_ext_required', 0)

    " Make csv loading faster
    call s:SetDefault('g:csv_start', 1)
    call s:SetDefault('g:csv_end', 2)

    " Disable json concealing by default
    call s:SetDefault('g:vim_json_syntax_conceal', 0)

    call s:SetDefault('g:filetype_euphoria', 'elixir')

    if !exists('g:python_highlight_all')
      call s:SetDefault('g:python_highlight_builtins', 1)
      call s:SetDefault('g:python_highlight_builtin_objs', 1)
      call s:SetDefault('g:python_highlight_builtin_types', 1)
      call s:SetDefault('g:python_highlight_builtin_funcs', 1)
      call s:SetDefault('g:python_highlight_builtin_funcs_kwarg', 1)
      call s:SetDefault('g:python_highlight_exceptions', 1)
      call s:SetDefault('g:python_highlight_string_formatting', 1)
      call s:SetDefault('g:python_highlight_string_format', 1)
      call s:SetDefault('g:python_highlight_string_templates', 1)
      call s:SetDefault('g:python_highlight_indent_errors', 1)
      call s:SetDefault('g:python_highlight_space_errors', 1)
      call s:SetDefault('g:python_highlight_doctests', 1)
      call s:SetDefault('g:python_highlight_func_calls', 1)
      call s:SetDefault('g:python_highlight_class_vars', 1)
      call s:SetDefault('g:python_highlight_operators', 1)
      call s:SetDefault('g:python_highlight_file_headers_as_comments', 1)
      call s:SetDefault('g:python_slow_sync', 1)
    endif
  EOS

  extensions = Hash.new { |h, k| h[k] = [] }

  for package in packages
    for filetype in package["filetypes"]
      for ext in filetype["extensions"]
        extensions[ext] << filetype["name"]
      end
    end
  end

  ambiguous_extensions = extensions
    .select { |a, b| b.uniq.size > 1 }.keys.sort

  expected_filetypes = detect_filetypes

  for package in packages
    name = package.fetch("name")

    output << "if !has_key(s:disabled_packages, '#{name}')\n"

    filetypes = package["filetypes"] or raise "Unknown filetype for: #{package["name"]}"

    package_heuristics = []

    for filetype in filetypes
      name = filetype.fetch("name")
      syntax = filetype["syntax"] ? " | set syntax=#{filetype["syntax"]}" : ""

      set_command = "setf #{name}"

      if filetype["syntax"]
        set_command = "if !did_filetype() | set ft=#{name} syntax=#{filetype["syntax"]} | endif"
      end

      if filetype["custom_set"]
        set_command = filetype["custom_set"]
      end

      extensions = filetype["extensions"]
      filenames = filetype["filenames"]

      if expected_filetypes[name] && !filetype["syntax"]
        for e in expected_filetypes.fetch(name)[:extensions] - extensions - expand_all(filetype.fetch("ignored_extensions", []))
          puts "Probable missing extension for #{name}: #{e}"
        end

        for e in expected_filetypes.fetch(name)[:filenames] - expand_all(filenames).flat_map { |e| [e, e.gsub(/^\./, '')] } - expand_all(filetype.fetch("ignored_filenames", [])) - ['*']
          puts "Probable missing filename for #{name}: #{e}"
        end
      end

      for extension in extensions.sort
        outer_filetype = filetype["outer_filetype"]
        if outer_filetype
          output << "  au BufNewFile *.*.#{extension} execute \"do BufNewFile filetypedetect \" . expand(\"<afile>:r\") | #{outer_filetype}\n"
          output << "  au BufReadPre *.*.#{extension} execute \"do BufRead filetypedetect \" . expand(\"<afile>:r\") | #{outer_filetype}\n"
        end

        heuristic = heuristics.find { |h| h["extensions"].include?(extension) }
        if heuristic
          package_heuristics << heuristic
        else
          # if ambiguous_extensions.include?(extension)
          #   puts "Ambiguous extension without heuristic: #{extension} => #{filetype["name"]}"
          # end
          #
          output << "  au BufNewFile,BufRead *.#{extension} #{set_command}\n"
        end
      end

      for filename in filenames.sort
        if filename[0] == "."
          filename = "{.,}" + filename[1..]
        end
        output << "  au BufNewFile,BufRead #{filename} #{set_command}\n"
      end
    end

    for heuristic in package_heuristics.uniq
      extensions = heuristic["extensions"].map { |e| "*.#{e}" }
      output << "  au! BufNewFile,BufRead #{extensions.join(",")} call polyglot#Detect#{camelize(heuristic["extensions"].first)}Filetype()\n"
    end

    output << "endif\n\n"
  end

  output << <<~EOS
    au BufNewFile,BufRead,StdinReadPost * 
      \\ if !did_filetype() && expand("<amatch>") !~ g:ft_ignore_pat 
      \\ | call polyglot#Heuristics() | endif

    " restore Vi compatibility settings
    let &cpo = s:cpo_save
    unlet s:cpo_save
  EOS
 
  File.write('ftdetect/polyglot.vim', output)

  output = <<~EOS
    " Line continuation is used here, remove 'C' from 'cpoptions'
    let s:cpo_save = &cpo
    set cpo&vim

    func! polyglot#Heuristics()
      " Try to detect filetype from shebang
      let l:filetype = polyglot#Shebang()
      if l:filetype != ""
        exec "setf " . l:filetype
        return
      endif
    endfunc
    
    let s:interpreters = {
    EOS

    for filetype in packages.flat_map { |p| p.fetch("filetypes", []) }.sort_by { |a| a["name"] }
      for interpreter in filetype["interpreters"]
        output << "  \\ '#{interpreter}': '#{filetype["name"]}',\n"
      end
    end

  output << <<~EOS
      \\ }

    let s:r_hashbang = '^#!\\s*\\(\\S\\+\\)\\s*\\(.*\\)\\s*'
    let s:r_envflag = '%(\\S\\+=\\S\\+\\|-[iS]\\|--ignore-environment\\|--split-string\\)'
    let s:r_env = '^\\%(\\' . s:r_envflag . '\\s\\+\\)*\\(\\S\\+\\)'

    func! polyglot#Shebang()
      let l:line1 = getline(1)

      if l:line1 !~# "^#!"
        return
      endif

      let l:pathrest = matchlist(l:line1, s:r_hashbang)

      if len(l:pathrest) == 0
        return 
      endif

      let [_, l:path, l:rest; __] = l:pathrest

      let l:script = split(l:path, "/")[-1]

      if l:script == "env"
        let l:argspath = matchlist(l:rest, s:r_env)
        if len(l:argspath) == 0
          return
        endif

        let l:script = l:argspath[1]
      endif

      if has_key(s:interpreters, l:script)
        return s:interpreters[l:script]
      endif

      for interpreter in keys(s:interpreters)
        if l:script =~# '^' . interpreter
          return s:interpreters[interpreter]
        endif
      endfor
    endfunc

  EOS

  for heuristic in heuristics
    output << <<~EOS
      func! polyglot#Detect#{camelize(heuristic["extensions"].first)}Filetype()
      #{indent(rules_to_code(heuristic), 2)}
      endfunc

    EOS
  end

  output << <<~EOS
    " Restore 'cpoptions'
    let &cpo = s:cpo_save
    unlet s:cpo_save
  EOS

  File.write('autoload/polyglot.vim', output)
end

def generate_tests(packages)
  output = <<~EOS
    function! TestFiletype(filetype)
      try
        enew
        exec 'set ft=' . a:filetype
      catch
        echo 'Error loading filetype ' . a:filetype  . ':'
        echo v:exception
        echo v:throwpoint
        exec ':cq!'
      endtry
    endfunction

  EOS

  for package in packages
    for filetype in package.fetch("filetypes", [])
      output << "call TestFiletype('#{filetype["name"]}')\n"
    end
  end

  File.write('scripts/test_filetypes.vim', output)


end

def brace_expansion(s)
  r=1                                       # Dummy value to forward-declare the parse function `r`
  t=->x{                                    # Function to parse a bracket block
    x=x[0].gsub(/^{(.*)}$/){$1}             # Remove outer brackets if both are present
                                            # x[0] is required because of quirks in the `scan` function
    x=x.scan(/(({(\g<1>|,)*}|[^,{}]|(?<=,|^)(?=,|$))+)/)
                                            # Regex black magic: collect elements of outer bracket
    x.map{|i|i=i[0];i[?{]?r[i]:i}.flatten   # For each element with brackets, run parse function
  }
  r=->x{                                    # Function to parse bracket expansions a{b,c}{d,e}
    i=x.scan(/({(\g<1>)*}|[^{} ]+)/)        # Regex black magic: scan for adjacent sets of brackets
    i=i.map(&t)                             # Map all elements against the bracket parser function `t`
    i.shift.product(*i).map &:join          # Combine the adjacent sets with cartesian product and join them together
  }
  s.split.map(&r).flatten
end

def square_expansion(s)
  return [s] unless s.include?('[')
  s.scan(/(\[[^\]]+\]|[^\[]+)/).map { |x| x[0] }
    .map { |x| x[0] == "[" ? x[1..-2].split("") : [x] }
    .reduce(&:product).map(&:flatten).map(&:join)
end

def comma_expanson(s)
  s.scan(/{[^{]+}|[^{]+/).map { |a| a[0] == "{" ? a : a.split(",", -1) }.reduce([]) do |a, b|
    a.size > 0 ?
      (b.is_a?(String) ?
         a[0..-2] + [a[-1] + b] :
         a[0..-2] + [a[-1] + b[0]] + b[1..-1]) :
      [b].flatten
  end
end

def expand_all(pattern)
  if pattern.is_a?(Array)
    return pattern.flat_map { |p| expand_all(p) }
  end

  comma_expanson(pattern).flat_map do |e|
    brace_expansion(e).flat_map do |e2|
      square_expansion(e2)
    end
  end
end

def detect_filetypes
  filetypes = Dir['tmp/**/ftdetect/*.vim'].flat_map do |file|
    contents = File.read(file).gsub(/^\s*au(tocmd)?!?\s*$/, '')
    results = contents.scan(/^\s*(?:au!|au|au[^g][^ ]*) +(?:\S+)\s+(\S+)[\s\\]+([^\n]+)/)
    results = results.map do |a, b|
      [
        a,
        b.gsub(/call (?:s:setf|s:StarSetf)\('([^']+)'\)/i, 'setf \1')
          .gsub(/set(?:local)?\s+(?:ft|filetype)=(\S+)/, 'setf \1')
        .gsub(/setf\S*/, 'setf')
        .gsub(/.*setf\s+(\S+).*/, 'setf \1')
      ]
    end.select { |a, b| b.match(/setf \S+/) }.map { |a, b| [a, b.split(" ")[1]] }
    results
  end

  Hash[filetypes.flat_map do |ext, filetype|
    expand_all(ext).map { |e| [filetype, e] }
  end.group_by { |a, b| a }.map { |a, b| [a, b.map { |c, d| d }] }.map { |a, b|
    [a, {
      extensions: b.select { |x| x.match(/^\*\.[^\/]+$/) }.map { |a| a.strip[2..] },
      filenames: expand_all(b.select { |x| !x.match(/^\*\.[^\/]+$/) })
    }]
  }]
end

if __FILE__ == $0
  if !ENV["DEV"]
    FileUtils.rm_rf("tmp")
  end

  packages, heuristics = load_data()
  download(packages)
  extract(packages)
  generate_ftdetect(packages, heuristics)
  generate_tests(packages)
  puts(" Bye! Have a wonderful time!")

  if !ENV["DEV"]
    FileUtils.rm_rf("tmp")
  end
end