From 548be074597207ef47ff4b037a51f4baaa173d50 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Mon, 17 Nov 2025 12:08:54 +0100 Subject: [PATCH] Remove `bin/prism encoding` I don't think there will be many more encodings added to ruby at this time --- bin/prism | 76 ------------------------------------------------------- 1 file changed, 76 deletions(-) diff --git a/bin/prism b/bin/prism index b6301b3dd9..1401a89e27 100755 --- a/bin/prism +++ b/bin/prism @@ -12,7 +12,6 @@ module Prism when "bundle" then bundle(argv) when "console" then console when "dot" then dot(argv) - when "encoding" then encoding(argv) when "error" then error(argv) when "lex" then lex(argv) when "lex_compat" then lex_compat(argv) @@ -29,7 +28,6 @@ module Prism bin/prism bundle [...] bin/prism console bin/prism dot [source] - bin/prism encoding [encoding] bin/prism error [name] [source] bin/prism lex [source] bin/prism lex_compat [source] @@ -148,20 +146,6 @@ module Prism ) end - # bin/prism encoding [encoding] - def encoding(argv) - found = Encoding.find(argv.shift) - found = Encoding::ASCII_8BIT if found == Encoding::US_ASCII - - if !found.ascii_compatible? - warn("Encoding `#{found.name}' is not ASCII compatible") - exit(1) - end - - lookup_table(found) - unicode_lists(found) if found == Encoding::UTF_8 || found == Encoding::UTF8_MAC - end - # bin/prism error [name] [source] def error(argv) name = argv.shift @@ -377,66 +361,6 @@ module Prism pp parser_tokens end - # Generate the list of values that will be used in a lookup table for a - # given encoding. - def lookup_table_values(encoding) - (0...256).each_slice(16).map.with_index do |slice, row_index| - slice.map do |codepoint| - character = codepoint.chr(encoding) - - values = 0 - values |= (1 << 0) if character.match?(/[[:alpha:]]/) - values |= (1 << 1) if character.match?(/[[:alnum:]]/) - values |= (1 << 2) if character.match?(/[[:upper:]]/) - values - rescue RangeError - 0 - end - end - end - - # Generate a lookup table for a given encoding. - def lookup_table(encoding) - encoding_values = lookup_table_values(encoding) - if encoding_values == lookup_table_values(Encoding::US_ASCII) - puts "static const uint8_t pm_encoding_ascii_table[256] = {" - else - puts "static const uint8_t pm_encoding_#{encoding.name.downcase}_table[256] = {" - end - - puts "// #{(0...16).map { |value| value.to_s(16).upcase }.join(" ")}" - encoding_values.each_with_index do |row, row_index| - puts " #{row.join(", ")}, // #{row_index.to_s(16).upcase}x" - end - puts "};" - end - - # Generate lists of unicode codepoints for a given encoding. - def unicode_lists(encoding) - encoding = Encoding::UTF_8 - range = (0x100..0xD7FF).to_a.concat((0xE000..0x10FFFF).to_a) - - { alpha: /[[:alpha:]]/, alnum: /[[:alnum:]]/, isupper: /[[:upper:]]/ }.map do |kind, regex| - codepoints = range.select { |codepoint| codepoint.chr(encoding).match?(regex) } - - previous = nil - groups = - codepoints.slice_before do |codepoint| - (!previous.nil? && (codepoint - previous) != 1).tap { previous = codepoint } - end - - matched = - groups.flat_map do |group| - ["0x#{group.first.to_s(16).upcase}", "0x#{group.last.to_s(16).upcase}"] - end - - puts "\n#define UNICODE_#{kind.upcase}_CODEPOINTS_LENGTH #{matched.length}" - puts "unicode_codepoint_t unicode_#{kind}_codepoints[UNICODE_#{kind.upcase}_CODEPOINTS_LENGTH] = {" - matched.each_slice(2) { |slice| puts " #{slice.join(", ")}," } - puts "};" - end - end - # Parse the source code indicated by the command-line arguments. def parse_source(argv) command_line = +""