diff --git a/bignum.c b/bignum.c index 8d3eac8e0a9173..ed53d75149085d 100644 --- a/bignum.c +++ b/bignum.c @@ -4515,7 +4515,7 @@ rb_str2big_gmp(VALUE arg, int base, int badcheck) #if HAVE_LONG_LONG -static VALUE +VALUE rb_ull2big(unsigned LONG_LONG n) { long i; @@ -4537,7 +4537,7 @@ rb_ull2big(unsigned LONG_LONG n) return big; } -static VALUE +VALUE rb_ll2big(LONG_LONG n) { long neg = 0; @@ -4575,7 +4575,7 @@ rb_ll2inum(LONG_LONG n) #endif /* HAVE_LONG_LONG */ #ifdef HAVE_INT128_T -static VALUE +VALUE rb_uint128t2big(uint128_t n) { long i; diff --git a/doc/language/globals.md b/doc/language/globals.md index b22363f1da1b71..716e62a7df87d5 100644 --- a/doc/language/globals.md +++ b/doc/language/globals.md @@ -176,7 +176,7 @@ No \English. ### `$/` (Input Record Separator) An input record separator, initially newline. -Set by the command-line option `-0`. +Set by the [command-line option `-0`]. Setting to non-nil value by other than the command-line option is deprecated. @@ -188,7 +188,9 @@ Aliased as `$-0`. ### `$\` (Output Record Separator) An output record separator, initially `nil`. -Copied from `$/` when the command-line option `-l` is given. + +Copied from `$/` when the [command-line option `-l`] is +given. Setting to non-nil value by other than the command-line option is deprecated. @@ -328,8 +330,8 @@ The value returned by method ARGF.filename. ### `$DEBUG` -Initially `true` if command-line option `-d` or `--debug` is given, -otherwise initially `false`; +Initially `true` if [command-line option `-d`] or +[`--debug`][command-line option `-d`] is given, otherwise initially `false`; may be set to either value in the running program. When `true`, prints each raised exception to `$stderr`. @@ -338,8 +340,8 @@ Aliased as `$-d`. ### `$VERBOSE` -Initially `true` if command-line option `-v` or `-w` is given, -otherwise initially `false`; +Initially `true` if [command-line option `-v`] or +[`-w`][command-line option `-w`] is given, otherwise initially `false`; may be set to either value, or to `nil`, in the running program. When `true`, enables Ruby warnings. @@ -353,7 +355,7 @@ Aliased as `$-v` and `$-w`. ### `$-F` The default field separator in String#split; must be a String or a -Regexp, and can be set with command-line option `-F`. +Regexp, and can be set with [command-line option `-F`]. Setting to non-nil value by other than the command-line option is deprecated. @@ -362,28 +364,28 @@ Aliased as `$;`. ### `$-a` -Whether command-line option `-a` was given; read-only. +Whether [command-line option `-a`] was given; read-only. ### `$-i` -Contains the extension given with command-line option `-i`, +Contains the extension given with [command-line option `-i`], or `nil` if none. An alias of ARGF.inplace_mode. ### `$-l` -Whether command-line option `-l` was set; read-only. +Whether [command-line option `-l`] was set; read-only. ### `$-p` -Whether command-line option `-p` was given; read-only. +Whether [command-line option `-p`] was given; read-only. ### `$F` -If the command line option `-a` is given, the array obtained by -splitting `$_` by `$-F` is assigned at the start of each `-l`/`-p` -loop. +If the [command-line option `-a`] is given, the array +obtained by splitting `$_` by `$-F` is assigned at the start of each +`-l`/`-p` loop. ## Deprecated @@ -594,3 +596,14 @@ Output: "Bar\n" "Baz\n" ``` + + +[command-line option `-0`]: rdoc-ref:language/options.md@0-3A+Set+-24-2F+-28Input+Record+Separator-29 +[command-line option `-F`]: rdoc-ref:language/options.md@F-3A+Set+Input+Field+Separator +[command-line option `-a`]: rdoc-ref:language/options.md@a-3A+Split+Input+Lines+into+Fields +[command-line option `-d`]: rdoc-ref:language/options.md@d-3A+Set+-24DEBUG+to+true +[command-line option `-i`]: rdoc-ref:language/options.md@i-3A+Set+ARGF+In-Place+Mode +[command-line option `-l`]: rdoc-ref:language/options.md@l-3A+Set+Output+Record+Separator-3B+Chop+Lines +[command-line option `-p`]: rdoc-ref:language/options.md@p-3A+-n-2C+with+Printing +[command-line option `-v`]: rdoc-ref:language/options.md@v-3A+Print+Version-3B+Set+-24VERBOSE +[command-line option `-w`]: rdoc-ref:language/options.md@w-3A+Synonym+for+-W1 diff --git a/doc/string/aref.rdoc b/doc/string/aref.rdoc index bd33c4c0504523..ee4c3d33d4e655 100644 --- a/doc/string/aref.rdoc +++ b/doc/string/aref.rdoc @@ -8,7 +8,7 @@ returns the 1-character substring found in self at character offset index: 'hello'[0] # => "h" 'hello'[4] # => "o" 'hello'[5] # => nil - 'тест'[2] # => "с" + 'Привет'[2] # => "и" 'こんにちは'[4] # => "は" With negative integer argument +index+ given, @@ -92,7 +92,7 @@ returns the matching substring of +self+, if found: 'hello'['ell'] # => "ell" 'hello'[''] # => "" 'hello'['nosuch'] # => nil - 'тест'['ес'] # => "ес" + 'Привет'['ив'] # => "ив" 'こんにちは'['んにち'] # => "んにち" Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/capitalize.rdoc b/doc/string/capitalize.rdoc new file mode 100644 index 00000000000000..9b26c0215342db --- /dev/null +++ b/doc/string/capitalize.rdoc @@ -0,0 +1,28 @@ +Returns a string containing the characters in +self+, +each with possibly changed case: + +- The first character made uppercase. +- All other characters are made lowercase. + +Examples: + + 'hello'.capitalize # => "Hello" + 'HELLO'.capitalize # => "Hello" + 'straße'.capitalize # => "Straße" # Lowercase 'ß' not changed. + 'STRAẞE'.capitalize # => "Straße" # Uppercase 'ẞ' downcased to 'ß'. + 'привет'.capitalize # => "Привет" + 'ПРИВЕТ'.capitalize # => "Привет" + +Some characters (and some character sets) do not have upcase and downcase versions; +see {Case Mapping}[rdoc-ref:case_mapping.rdoc]: + + s = '1, 2, 3, ...' + s.capitalize == s # => true + s = 'こんにちは' + s.capitalize == s # => true + +The casing is affected by the given +mapping+, +which may be +:ascii+, +:fold+, or +:turkic+; +see {Case Mappings}[rdoc-ref:case_mapping.rdoc@Case+Mappings]. + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/downcase.rdoc b/doc/string/downcase.rdoc index 0fb67daaebe634..d5fffa037b89d0 100644 --- a/doc/string/downcase.rdoc +++ b/doc/string/downcase.rdoc @@ -1,12 +1,20 @@ Returns a new string containing the downcased characters in +self+: - 'Hello, World!'.downcase # => "hello, world!" - 'ТЕСТ'.downcase # => "тест" - 'よろしくお願いします'.downcase # => "よろしくお願いします" + 'HELLO'.downcase # => "hello" + 'STRAẞE'.downcase # => "straße" + 'ПРИВЕТ'.downcase # => "привет" + 'RubyGems.org'.downcase # => "rubygems.org" -Some characters do not have upcased and downcased versions. +Some characters (and some character sets) do not have upcase and downcase versions; +see {Case Mapping}[rdoc-ref:case_mapping.rdoc]: -The casing may be affected by the given +mapping+; -see {Case Mapping}[rdoc-ref:case_mapping.rdoc]. + s = '1, 2, 3, ...' + s.downcase == s # => true + s = 'こんにちは' + s.downcase == s # => true + +The casing is affected by the given +mapping+, +which may be +:ascii+, +:fold+, or +:turkic+; +see {Case Mappings}[rdoc-ref:case_mapping.rdoc@Case+Mappings]. Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/swapcase.rdoc b/doc/string/swapcase.rdoc index 93859ec8237fcc..916e711b7ec7b2 100644 --- a/doc/string/swapcase.rdoc +++ b/doc/string/swapcase.rdoc @@ -5,15 +5,28 @@ Returns a string containing the characters in +self+, with cases reversed: Examples: - 'Hello World!'.swapcase # => "hELLO wORLD!" - 'тест'.swapcase # => "ТЕСТ" + 'Hello'.swapcase # => "hELLO" + 'Straße'.swapcase # => "sTRASSE" + 'Привет'.swapcase # => "пРИВЕТ" + 'RubyGems.org'.swapcase # => "rUBYgEMS.ORG" -Some characters (and even character sets) do not have casing: +The sizes of +self+ and the upcased result may differ: - '12345'.swapcase # => "12345" - 'こんにちは'.swapcase # => "こんにちは" + s = 'Straße' + s.size # => 6 + s.swapcase # => "sTRASSE" + s.swapcase.size # => 7 -The casing may be affected by the given +mapping+; -see {Case Mapping}[rdoc-ref:case_mapping.rdoc]. +Some characters (and some character sets) do not have upcase and downcase versions; +see {Case Mapping}[rdoc-ref:case_mapping.rdoc]: + + s = '1, 2, 3, ...' + s.swapcase == s # => true + s = 'こんにちは' + s.swapcase == s # => true + +The casing is affected by the given +mapping+, +which may be +:ascii+, +:fold+, or +:turkic+; +see {Case Mappings}[rdoc-ref:case_mapping.rdoc@Case+Mappings]. Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/string/upcase.rdoc b/doc/string/upcase.rdoc index 5a9cce217f7972..ad859e89736687 100644 --- a/doc/string/upcase.rdoc +++ b/doc/string/upcase.rdoc @@ -1,6 +1,9 @@ Returns a new string containing the upcased characters in +self+: - 'Hello, World!'.upcase # => "HELLO, WORLD!" + 'hello'.upcase # => "HELLO" + 'straße'.upcase # => "STRASSE" + 'привет'.upcase # => "ПРИВЕТ" + 'RubyGems.org'.upcase # => "RUBYGEMS.ORG" The sizes of +self+ and the upcased result may differ: @@ -9,12 +12,16 @@ The sizes of +self+ and the upcased result may differ: s.upcase # => "STRASSE" s.upcase.size # => 7 -Some characters (and some character sets) do not have upcased and downcased versions: +Some characters (and some character sets) do not have upcase and downcase versions; +see {Case Mapping}[rdoc-ref:case_mapping.rdoc]: - s = 'よろしくお願いします' + s = '1, 2, 3, ...' + s.upcase == s # => true + s = 'こんにちは' s.upcase == s # => true -The casing may be affected by the given +mapping+; -see {Case Mapping}[rdoc-ref:case_mapping.rdoc]. +The casing is affected by the given +mapping+, +which may be +:ascii+, +:fold+, or +:turkic+; +see {Case Mappings}[rdoc-ref:case_mapping.rdoc@Case+Mappings]. Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/stringio/stringio.md b/doc/stringio/stringio.md index 345fc5f20707df..aebfa6d6f1f4c2 100644 --- a/doc/stringio/stringio.md +++ b/doc/stringio/stringio.md @@ -1,18 +1,39 @@ -\IO streams for strings, with access similar to -{IO}[rdoc-ref:IO]; -see {IO}[rdoc-ref:IO]. +\Class \StringIO supports accessing a string as a stream, +similar in some ways to [class IO][io class]. -### About the Examples +You can create a \StringIO instance using: + +- StringIO.new: returns a new \StringIO object containing the given string. +- StringIO.open: passes a new \StringIO object to the given block. + +Like an \IO stream, a \StringIO stream has certain properties: + +- **Read/write mode**: whether the stream may be read, written, appended to, etc.; + see [Read/Write Mode][read/write mode]. +- **Data mode**: text-only or binary; + see [Data Mode][data mode]. +- **Encodings**: internal and external encodings; + see [Encodings][encodings]. +- **Position**: where in the stream the next read or write is to occur; + see [Position][position]. +- **Line number**: a special, line-oriented, "position" (different from the position mentioned above); + see [Line Number][line number]. +- **Open/closed**: whether the stream is open or closed, for reading or writing. + see [Open/Closed Streams][open/closed streams]. +- **BOM**: byte mark order; + see [Byte Order Mark][bom (byte order mark)]. + +## About the Examples Examples on this page assume that \StringIO has been required: -``` +```ruby require 'stringio' ``` -And that these constants have been defined: +And that this constant has been defined: -``` +```ruby TEXT = <'r': read-only | No | Anywhere | Error | +| 'w': write-only | Yes | Error | Anywhere | +| 'a': append-only | No | Error | End only | +| 'r+': read/write | No | Anywhere | Anywhere | +| 'w+': read-write | Yes | Anywhere | Anywhere | +| 'a+': read/append | No | Anywhere | End only | + +Each section below describes a read/write mode. + +Any of the modes may be given as a string or as file constants; +example: + +```ruby +strio = StringIO.new('foo', 'a') +strio = StringIO.new('foo', File::WRONLY | File::APPEND) +``` + +#### `'r'`: Read-Only + +Mode specified as one of: + +- String: `'r'`. +- Constant: `File::RDONLY`. + +Initial state: + +```ruby +strio = StringIO.new('foobarbaz', 'r') +strio.pos # => 0 # Beginning-of-stream. +strio.string # => "foobarbaz" # Not cleared. +``` + +May be read anywhere: + +```ruby +strio.gets(3) # => "foo" +strio.gets(3) # => "bar" +strio.pos = 9 +strio.gets(3) # => nil +``` + +May not be written: + +```ruby +strio.write('foo') # Raises IOError: not opened for writing +``` + +#### `'w'`: Write-Only + +Mode specified as one of: + +- String: `'w'`. +- Constant: `File::WRONLY`. + +Initial state: + +```ruby +strio = StringIO.new('foo', 'w') +strio.pos # => 0 # Beginning of stream. +strio.string # => "" # Initially cleared. +``` + +May be written anywhere (even past end-of-stream): + +```ruby +strio.write('foobar') +strio.string # => "foobar" +strio.rewind +strio.write('FOO') +strio.string # => "FOObar" +strio.pos = 3 +strio.write('BAR') +strio.string # => "FOOBAR" +strio.pos = 9 +strio.write('baz') +strio.string # => "FOOBAR\u0000\u0000\u0000baz" # Null-padded. +``` + +May not be read: + +```ruby +strio.read # Raises IOError: not opened for reading +``` + +#### `'a'`: Append-Only + +Mode specified as one of: + +- String: `'a'`. +- Constant: `File::WRONLY | File::APPEND`. + +Initial state: + +```ruby +strio = StringIO.new('foo', 'a') +strio.pos # => 0 # Beginning-of-stream. +strio.string # => "foo" # Not cleared. +``` + +May be written only at the end; position does not affect writing: + +```ruby +strio.write('bar') +strio.string # => "foobar" +strio.write('baz') +strio.string # => "foobarbaz" +strio.pos = 400 +strio.write('bat') +strio.string # => "foobarbazbat" +``` + +May not be read: + +```ruby +strio.gets # Raises IOError: not opened for reading +``` + +#### `'r+'`: Read/Write + +Mode specified as one of: + +- String: `'r+'`. +- Constant: `File::RDRW`. + +Initial state: + +```ruby +strio = StringIO.new('foobar', 'r+') +strio.pos # => 0 # Beginning-of-stream. +strio.string # => "foobar" # Not cleared. +``` + +May be written anywhere (even past end-of-stream): + +```ruby +strio.write('FOO') +strio.string # => "FOObar" +strio.write('BAR') +strio.string # => "FOOBAR" +strio.write('BAZ') +strio.string # => "FOOBARBAZ" +strio.pos = 12 +strio.write('BAT') +strio.string # => "FOOBARBAZ\u0000\u0000\u0000BAT" # Null padded. +``` + +May be read anywhere: + +```ruby +strio.pos = 0 +strio.gets(3) # => "FOO" +strio.pos = 6 +strio.gets(3) # => "BAZ" +strio.pos = 400 +strio.gets(3) # => nil +``` + +#### `'w+'`: Read/Write (Initially Clear) + +Mode specified as one of: + +- String: `'w+'`. +- Constant: `File::RDWR | File::TRUNC`. + +Initial state: + +```ruby +strio = StringIO.new('foo', 'w+') +strio.pos # => 0 # Beginning-of-stream. +strio.string # => "" # Truncated. +``` + +May be written anywhere (even past end-of-stream): + +```ruby +strio.write('foobar') +strio.string # => "foobar" +strio.rewind +strio.write('FOO') +strio.string # => "FOObar" +strio.write('BAR') +strio.string # => "FOOBAR" +strio.write('BAZ') +strio.string # => "FOOBARBAZ" +strio.pos = 12 +strio.write('BAT') +strio.string # => "FOOBARBAZ\u0000\u0000\u0000BAT" # Null-padded. +``` + +May be read anywhere: + +```ruby +strio.rewind +strio.gets(3) # => "FOO" +strio.gets(3) # => "BAR" +strio.pos = 12 +strio.gets(3) # => "BAT" +strio.pos = 400 +strio.gets(3) # => nil +``` + +#### `'a+'`: Read/Append + +Mode specified as one of: + +- String: `'a+'`. +- Constant: `File::RDWR | File::APPEND`. + +Initial state: + +```ruby +strio = StringIO.new('foo', 'a+') +strio.pos # => 0 # Beginning-of-stream. +strio.string # => "foo" # Not cleared. +``` + +May be written only at the end; #rewind; position does not affect writing: + +```ruby +strio.write('bar') +strio.string # => "foobar" +strio.write('baz') +strio.string # => "foobarbaz" +strio.pos = 400 +strio.write('bat') +strio.string # => "foobarbazbat" +``` + +May be read anywhere: + +```ruby +strio.rewind +strio.gets(3) # => "foo" +strio.gets(3) # => "bar" +strio.pos = 9 +strio.gets(3) # => "bat" +strio.pos = 400 +strio.gets(3) # => nil +``` + +### Data Mode + +To specify whether the stream is to be treated as text or as binary data, +either of the following may be suffixed to any of the string read/write modes above: + +- `'t'`: Text; + initializes the encoding as Encoding::UTF_8. +- `'b'`: Binary; + initializes the encoding as Encoding::ASCII_8BIT. + +If neither is given, the stream defaults to text data. + +Examples: + +```ruby +strio = StringIO.new('foo', 'rt') +strio.external_encoding # => # +data = "\u9990\u9991\u9992\u9993\u9994" +strio = StringIO.new(data, 'rb') +strio.external_encoding # => # +``` + +When the data mode is specified, the read/write mode may not be omitted: + +```ruby +StringIO.new(data, 'b') # Raises ArgumentError: invalid access mode b +``` + +A text stream may be changed to binary by calling instance method #binmode; +a binary stream may not be changed to text. + +### Encodings + +A stream has an encoding; see the [encodings document][encodings document]. + +The initial encoding for a new or re-opened stream depends on its [data mode][data mode]: -RUSSIAN = 'тест' -DATA = "\u9990\u9991\u9992\u9993\u9994" +- Text: `Encoding::UTF_8`. +- Binary: `Encoding::ASCII_8BIT`. + +These instance methods are relevant: + +- #external_encoding: returns the current encoding of the stream as an `Encoding` object. +- #internal_encoding: returns +nil+; a stream does not have an internal encoding. +- #set_encoding: sets the encoding for the stream. +- #set_encoding_by_bom: sets the encoding for the stream to the stream's BOM (byte order mark). + +Examples: + +```ruby +strio = StringIO.new('foo', 'rt') # Text mode. +strio.external_encoding # => # +data = "\u9990\u9991\u9992\u9993\u9994" +strio = StringIO.new(data, 'rb') # Binary mode. +strio.external_encoding # => # +strio = StringIO.new('foo') +strio.external_encoding # => # +strio.set_encoding('US-ASCII') +strio.external_encoding # => # +``` + +### Position + +A stream has a _position_, and integer offset (in bytes) into the stream. +The initial position of a stream is zero. + +#### Getting and Setting the Position + +Each of these methods initializes (to zero) the position of a new or re-opened stream: + +- ::new: returns a new stream. +- ::open: passes a new stream to the block. +- #reopen: re-initializes the stream. + +Each of these methods queries, gets, or sets the position, without otherwise changing the stream: + +- #eof?: returns whether the position is at end-of-stream. +- #pos: returns the position. +- #pos=: sets the position. +- #rewind: sets the position to zero. +- #seek: sets the position. + +Examples: + +```ruby +strio = StringIO.new('foobar') +strio.pos # => 0 +strio.pos = 3 +strio.pos # => 3 +strio.eof? # => false +strio.rewind +strio.pos # => 0 +strio.seek(0, IO::SEEK_END) +strio.pos # => 6 +strio.eof? # => true +``` + +#### Position Before and After Reading + +Except for #pread, a stream reading method (see [Basic Reading][basic reading]) +begins reading at the current position. + +Except for #pread, a read method advances the position past the read substring. + +Examples: + +```ruby +strio = StringIO.new(TEXT) +strio.string # => "First line\nSecond line\n\nFourth line\nFifth line\n" +strio.pos # => 0 +strio.getc # => "F" +strio.pos # => 1 +strio.gets # => "irst line\n" +strio.pos # => 11 +strio.pos = 24 +strio.gets # => "Fourth line\n" +strio.pos # => 36 + +strio = StringIO.new('тест') # Four 2-byte characters. +strio.pos = 0 # At first byte of first character. +strio.read # => "тест" +strio.pos = 1 # At second byte of first character. +strio.read # => "\x82ест" +strio.pos = 2 # At first of second character. +strio.read # => "ест" + +strio = StringIO.new(TEXT) +strio.pos = 15 +a = [] +strio.each_line {|line| a.push(line) } +a # => ["nd line\n", "\n", "Fourth line\n", "Fifth line\n"] +strio.pos # => 47 ## End-of-stream. +``` + +#### Position Before and After Writing + +Each of these methods begins writing at the current position, +and advances the position to the end of the written substring: + +- #putc: writes the given character. +- #write: writes the given objects as strings. +- [Kernel#puts][kernel#puts]: writes given objects as strings, each followed by newline. + +Examples: + +```ruby +strio = StringIO.new('foo') +strio.pos # => 0 +strio.putc('b') +strio.string # => "boo" +strio.pos # => 1 +strio.write('r') +strio.string # => "bro" +strio.pos # => 2 +strio.puts('ew') +strio.string # => "brew\n" +strio.pos # => 5 +strio.pos = 8 +strio.write('foo') +strio.string # => "brew\n\u0000\u0000\u0000foo" +strio.pos # => 11 +``` + +Each of these methods writes _before_ the current position, and decrements the position +so that the written data is next to be read: + +- #ungetbyte: unshifts the given byte. +- #ungetc: unshifts the given character. + +Examples: + +```ruby +strio = StringIO.new('foo') +strio.pos = 2 +strio.ungetc('x') +strio.pos # => 1 +strio.string # => "fxo" +strio.ungetc('x') +strio.pos # => 0 +strio.string # => "xxo" +``` + +This method does not affect the position: + +- #truncate: truncates the stream's string to the given size. + +Examples: + +```ruby +strio = StringIO.new('foobar') +strio.pos # => 0 +strio.truncate(3) +strio.string # => "foo" +strio.pos # => 0 +strio.pos = 500 +strio.truncate(0) +strio.string # => "" +strio.pos # => 500 +``` + +### Line Number + +A stream has a line number, which initially is zero: + +- Method #lineno returns the line number. +- Method #lineno= sets the line number. + +The line number can be affected by reading (but never by writing); +in general, the line number is incremented each time the record separator (default: `"\n"`) is read. + +Examples: + +```ruby +strio = StringIO.new(TEXT) +strio.string # => "First line\nSecond line\n\nFourth line\nFifth line\n" +strio.lineno # => 0 +strio.gets # => "First line\n" +strio.lineno # => 1 +strio.getc # => "S" +strio.lineno # => 1 +strio.gets # => "econd line\n" +strio.lineno # => 2 +strio.gets # => "\n" +strio.lineno # => 3 +strio.gets # => "Fourth line\n" +strio.lineno # => 4 +``` + +Setting the position does not affect the line number: + +```ruby +strio.pos = 0 +strio.lineno # => 4 +strio.gets # => "First line\n" +strio.pos # => 11 +strio.lineno # => 5 +``` + +And setting the line number does not affect the position: + +```ruby +strio.lineno = 10 +strio.pos # => 11 +strio.gets # => "Second line\n" +strio.lineno # => 11 +strio.pos # => 23 ``` + +### Open/Closed Streams + +A new stream is open for either reading or writing, and may be open for both; +see [Read/Write Mode][read/write mode]. + +Each of these methods initializes the read/write mode for a new or re-opened stream: + +- ::new: returns a new stream. +- ::open: passes a new stream to the block. +- #reopen: re-initializes the stream. + +Other relevant methods: + +- #close: closes the stream for both reading and writing. +- #close_read: closes the stream for reading. +- #close_write: closes the stream for writing. +- #closed?: returns whether the stream is closed for both reading and writing. +- #closed_read?: returns whether the stream is closed for reading. +- #closed_write?: returns whether the stream is closed for writing. + +### BOM (Byte Order Mark) + +The string provided for ::new, ::open, or #reopen +may contain an optional [BOM][bom] (byte order mark) at the beginning of the string; +the BOM can affect the stream's encoding. + +The BOM (if provided): + +- Is stored as part of the stream's string. +- Does _not_ immediately affect the encoding. +- Is _initially_ considered part of the stream. + +```ruby +utf8_bom = "\xEF\xBB\xBF" +string = utf8_bom + 'foo' +string.bytes # => [239, 187, 191, 102, 111, 111] +strio.string.bytes.take(3) # => [239, 187, 191] # The BOM. +strio = StringIO.new(string, 'rb') +strio.string.bytes # => [239, 187, 191, 102, 111, 111] # BOM is part of the stored string. +strio.external_encoding # => # # Default for a binary stream. +strio.gets # => "\xEF\xBB\xBFfoo" # BOM is part of the stream. +``` + +You can call instance method #set_encoding_by_bom to "activate" the stored BOM; +after doing so the BOM: + +- Is _still_ stored as part of the stream's string. +- _Determines_ (and may have changed) the stream's encoding. +- Is _no longer_ considered part of the stream. + +```ruby +strio.set_encoding_by_bom +strio.string.bytes # => [239, 187, 191, 102, 111, 111] # BOM is still part of the stored string. +strio.external_encoding # => # # The new encoding. +strio.rewind # => 0 +strio.gets # => "foo" # BOM is not part of the stream. +``` + +## Basic Stream \IO + +### Basic Reading + +You can read from the stream using these instance methods: + +- #getbyte: reads and returns the next byte. +- #getc: reads and returns the next character. +- #gets: reads and returns all or part of the next line. +- #read: reads and returns all or part of the remaining data in the stream. +- #readlines: reads the remaining data the stream and returns an array of its lines. +- [Kernel#readline][kernel#readline]: like #gets, but raises an exception if at end-of-stream. + +You can iterate over the stream using these instance methods: + +- #each_byte: reads each remaining byte, passing it to the block. +- #each_char: reads each remaining character, passing it to the block. +- #each_codepoint: reads each remaining codepoint, passing it to the block. +- #each_line: reads all or part of each remaining line, passing the read string to the block + +This instance method is useful in a multi-threaded application: + +- #pread: reads and returns all or part of the stream. + +### Basic Writing + +You can write to the stream, advancing the position, using these instance methods: + +- #putc: writes a given character. +- #write: writes the given objects as strings. +- [Kernel#puts][kernel#puts] writes given objects as strings, each followed by newline. + +You can "unshift" to the stream using these instance methods; +each writes _before_ the current position, and decrements the position +so that the written data is next to be read. + +- #ungetbyte: unshifts the given byte. +- #ungetc: unshifts the given character. + +One more writing method: + +- #truncate: truncates the stream's string to the given size. + +## Line \IO + +Reading: + +- #gets: reads and returns the next line. +- [Kernel#readline][kernel#readline]: like #gets, but raises an exception if at end-of-stream. +- #readlines: reads the remaining data the stream and returns an array of its lines. +- #each_line: reads each remaining line, passing it to the block + +Writing: + +- [Kernel#puts][kernel#puts]: writes given objects, each followed by newline. + +## Character \IO + +Reading: + +- #each_char: reads each remaining character, passing it to the block. +- #getc: reads and returns the next character. + +Writing: + +- #putc: writes the given character. +- #ungetc.: unshifts the given character. + +## Byte \IO + +Reading: + +- #each_byte: reads each remaining byte, passing it to the block. +- #getbyte: reads and returns the next byte. + +Writing: + +- #ungetbyte: unshifts the given byte. + +## Codepoint \IO + +Reading: + +- #each_codepoint: reads each remaining codepoint, passing it to the block. + +[bom]: https://en.wikipedia.org/wiki/Byte_order_mark +[encodings document]: https://docs.ruby-lang.org/en/master/encodings_rdoc.html +[io class]: https://docs.ruby-lang.org/en/master/IO.html +[kernel#puts]: https://docs.ruby-lang.org/en/master/Kernel.html#method-i-puts +[kernel#readline]: https://docs.ruby-lang.org/en/master/Kernel.html#method-i-readline + +[basic reading]: rdoc-ref:StringIO@Basic+Reading +[basic writing]: rdoc-ref:StringIO@Basic+Writing +[bom (byte order mark)]: rdoc-ref:StringIO@BOM+-28Byte+Order+Mark-29 +[data mode]: rdoc-ref:StringIO@Data+Mode +[encodings]: rdoc-ref:StringIO@Encodings +[end-of-stream]: rdoc-ref:StringIO@End-of-Stream +[line number]: rdoc-ref:StringIO@Line+Number +[open/closed streams]: rdoc-ref:StringIO@Open-2FClosed+Streams +[position]: rdoc-ref:StringIO@Position +[read/write mode]: rdoc-ref:StringIO@Read-2FWrite+Mode diff --git a/ext/stringio/stringio.c b/ext/stringio/stringio.c index 603d9c5e22611e..5556426fdc387a 100644 --- a/ext/stringio/stringio.c +++ b/ext/stringio/stringio.c @@ -774,7 +774,7 @@ strio_set_lineno(VALUE self, VALUE lineno) * binmode -> self * * Sets the data mode in +self+ to binary mode; - * see {Data Mode}[rdoc-ref:File@Data+Mode]. + * see {Data Mode}[rdoc-ref:StringIO@Data+Mode]. * */ static VALUE diff --git a/internal/bignum.h b/internal/bignum.h index e5b6b425631f80..0692bafed30d73 100644 --- a/internal/bignum.h +++ b/internal/bignum.h @@ -169,7 +169,13 @@ VALUE rb_str2big_gmp(VALUE arg, int base, int badcheck); VALUE rb_int_parse_cstr(const char *str, ssize_t len, char **endp, size_t *ndigits, int base, int flags); RUBY_SYMBOL_EXPORT_END +#if HAVE_LONG_LONG +VALUE rb_ull2big(unsigned LONG_LONG n); +VALUE rb_ll2big(LONG_LONG n); +#endif + #if defined(HAVE_INT128_T) +VALUE rb_uint128t2big(uint128_t n); VALUE rb_int128t2big(int128_t n); #endif diff --git a/internal/numeric.h b/internal/numeric.h index 58f42f41ac4bea..75181a7f168620 100644 --- a/internal/numeric.h +++ b/internal/numeric.h @@ -127,6 +127,49 @@ VALUE rb_int_bit_length(VALUE num); VALUE rb_int_uminus(VALUE num); VALUE rb_int_comp(VALUE num); +// Unified 128-bit integer structures that work with or without native support: +union rb_uint128 { +#ifdef WORDS_BIGENDIAN + struct { + uint64_t high; + uint64_t low; + } parts; +#else + struct { + uint64_t low; + uint64_t high; + } parts; +#endif +#ifdef HAVE_UINT128_T + uint128_t value; +#endif +}; +typedef union rb_uint128 rb_uint128_t; + +union rb_int128 { +#ifdef WORDS_BIGENDIAN + struct { + uint64_t high; + uint64_t low; + } parts; +#else + struct { + uint64_t low; + uint64_t high; + } parts; +#endif +#ifdef HAVE_UINT128_T + int128_t value; +#endif +}; +typedef union rb_int128 rb_int128_t; + +// Conversion functions for 128-bit integers: +rb_uint128_t rb_numeric_to_uint128(VALUE x); +rb_int128_t rb_numeric_to_int128(VALUE x); +VALUE rb_uint128_to_numeric(rb_uint128_t n); +VALUE rb_int128_to_numeric(rb_int128_t n); + static inline bool INT_POSITIVE_P(VALUE num) { diff --git a/io_buffer.c b/io_buffer.c index 89f169176f48b5..0d2cbdb4b7e704 100644 --- a/io_buffer.c +++ b/io_buffer.c @@ -1864,6 +1864,9 @@ io_buffer_validate_type(size_t size, size_t offset) // :u64, :U64 | unsigned 64-bit integer. // :s64, :S64 | signed 64-bit integer. // +// :u128, :U128 | unsigned 128-bit integer. +// :s128, :S128 | signed 128-bit integer. +// // :f32, :F32 | 32-bit floating point number. // :f64, :F64 | 64-bit floating point number. @@ -1895,6 +1898,45 @@ ruby_swapf64(double value) return swap.value; } +// Structures and conversion functions are now in numeric.h/numeric.c +// Unified swap function for 128-bit integers (works with both signed and unsigned) +// Since both rb_uint128_t and rb_int128_t have the same memory layout, +// we can use a union to make the swap function work with both types +static inline rb_uint128_t +ruby_swap128_uint(rb_uint128_t x) +{ + rb_uint128_t result; +#ifdef HAVE_UINT128_T +#if __has_builtin(__builtin_bswap128) + result.value = __builtin_bswap128(x.value); +#else + // Manual byte swap for 128-bit integers + uint64_t low = (uint64_t)x.value; + uint64_t high = (uint64_t)(x.value >> 64); + low = ruby_swap64(low); + high = ruby_swap64(high); + result.value = ((uint128_t)low << 64) | high; +#endif +#else + // Fallback swap function using two 64-bit integers + // For big-endian data on little-endian host (or vice versa): + // 1. Swap bytes within each 64-bit part + // 2. Swap the order of the parts (since big-endian stores high first, little-endian stores low first) + result.parts.low = ruby_swap64(x.parts.high); + result.parts.high = ruby_swap64(x.parts.low); +#endif + return result; +} + +static inline rb_int128_t +ruby_swap128_int(rb_int128_t x) +{ + // Cast to unsigned, swap, then cast back + rb_uint128_t u = *(rb_uint128_t*)&x; + rb_uint128_t swapped = ruby_swap128_uint(u); + return *(rb_int128_t*)&swapped; +} + #define IO_BUFFER_DECLARE_TYPE(name, type, endian, wrap, unwrap, swap) \ static ID RB_IO_BUFFER_DATA_TYPE_##name; \ \ @@ -1941,6 +1983,11 @@ IO_BUFFER_DECLARE_TYPE(U64, uint64_t, RB_IO_BUFFER_BIG_ENDIAN, RB_ULL2NUM, RB_NU IO_BUFFER_DECLARE_TYPE(s64, int64_t, RB_IO_BUFFER_LITTLE_ENDIAN, RB_LL2NUM, RB_NUM2LL, ruby_swap64) IO_BUFFER_DECLARE_TYPE(S64, int64_t, RB_IO_BUFFER_BIG_ENDIAN, RB_LL2NUM, RB_NUM2LL, ruby_swap64) +IO_BUFFER_DECLARE_TYPE(u128, rb_uint128_t, RB_IO_BUFFER_LITTLE_ENDIAN, rb_uint128_to_numeric, rb_numeric_to_uint128, ruby_swap128_uint) +IO_BUFFER_DECLARE_TYPE(U128, rb_uint128_t, RB_IO_BUFFER_BIG_ENDIAN, rb_uint128_to_numeric, rb_numeric_to_uint128, ruby_swap128_uint) +IO_BUFFER_DECLARE_TYPE(s128, rb_int128_t, RB_IO_BUFFER_LITTLE_ENDIAN, rb_int128_to_numeric, rb_numeric_to_int128, ruby_swap128_int) +IO_BUFFER_DECLARE_TYPE(S128, rb_int128_t, RB_IO_BUFFER_BIG_ENDIAN, rb_int128_to_numeric, rb_numeric_to_int128, ruby_swap128_int) + IO_BUFFER_DECLARE_TYPE(f32, float, RB_IO_BUFFER_LITTLE_ENDIAN, DBL2NUM, NUM2DBL, ruby_swapf32) IO_BUFFER_DECLARE_TYPE(F32, float, RB_IO_BUFFER_BIG_ENDIAN, DBL2NUM, NUM2DBL, ruby_swapf32) IO_BUFFER_DECLARE_TYPE(f64, double, RB_IO_BUFFER_LITTLE_ENDIAN, DBL2NUM, NUM2DBL, ruby_swapf64) @@ -1965,6 +2012,10 @@ io_buffer_buffer_type_size(ID buffer_type) IO_BUFFER_DATA_TYPE_SIZE(U64) IO_BUFFER_DATA_TYPE_SIZE(s64) IO_BUFFER_DATA_TYPE_SIZE(S64) + IO_BUFFER_DATA_TYPE_SIZE(u128) + IO_BUFFER_DATA_TYPE_SIZE(U128) + IO_BUFFER_DATA_TYPE_SIZE(s128) + IO_BUFFER_DATA_TYPE_SIZE(S128) IO_BUFFER_DATA_TYPE_SIZE(f32) IO_BUFFER_DATA_TYPE_SIZE(F32) IO_BUFFER_DATA_TYPE_SIZE(f64) @@ -2021,6 +2072,11 @@ rb_io_buffer_get_value(const void* base, size_t size, ID buffer_type, size_t *of IO_BUFFER_GET_VALUE(s64) IO_BUFFER_GET_VALUE(S64) + IO_BUFFER_GET_VALUE(u128) + IO_BUFFER_GET_VALUE(U128) + IO_BUFFER_GET_VALUE(s128) + IO_BUFFER_GET_VALUE(S128) + IO_BUFFER_GET_VALUE(f32) IO_BUFFER_GET_VALUE(F32) IO_BUFFER_GET_VALUE(f64) @@ -2050,6 +2106,10 @@ rb_io_buffer_get_value(const void* base, size_t size, ID buffer_type, size_t *of * * +:U64+: unsigned integer, 8 bytes, big-endian * * +:s64+: signed integer, 8 bytes, little-endian * * +:S64+: signed integer, 8 bytes, big-endian + * * +:u128+: unsigned integer, 16 bytes, little-endian + * * +:U128+: unsigned integer, 16 bytes, big-endian + * * +:s128+: signed integer, 16 bytes, little-endian + * * +:S128+: signed integer, 16 bytes, big-endian * * +:f32+: float, 4 bytes, little-endian * * +:F32+: float, 4 bytes, big-endian * * +:f64+: double, 8 bytes, little-endian @@ -2287,6 +2347,11 @@ rb_io_buffer_set_value(const void* base, size_t size, ID buffer_type, size_t *of IO_BUFFER_SET_VALUE(s64); IO_BUFFER_SET_VALUE(S64); + IO_BUFFER_SET_VALUE(u128); + IO_BUFFER_SET_VALUE(U128); + IO_BUFFER_SET_VALUE(s128); + IO_BUFFER_SET_VALUE(S128); + IO_BUFFER_SET_VALUE(f32); IO_BUFFER_SET_VALUE(F32); IO_BUFFER_SET_VALUE(f64); @@ -3859,6 +3924,11 @@ Init_IO_Buffer(void) IO_BUFFER_DEFINE_DATA_TYPE(s64); IO_BUFFER_DEFINE_DATA_TYPE(S64); + IO_BUFFER_DEFINE_DATA_TYPE(u128); + IO_BUFFER_DEFINE_DATA_TYPE(U128); + IO_BUFFER_DEFINE_DATA_TYPE(s128); + IO_BUFFER_DEFINE_DATA_TYPE(S128); + IO_BUFFER_DEFINE_DATA_TYPE(f32); IO_BUFFER_DEFINE_DATA_TYPE(F32); IO_BUFFER_DEFINE_DATA_TYPE(f64); diff --git a/numeric.c b/numeric.c index bc0edd6abe91f9..d9e837644ffa05 100644 --- a/numeric.c +++ b/numeric.c @@ -3420,6 +3420,229 @@ rb_num2ull(VALUE val) #endif /* HAVE_LONG_LONG */ +// Conversion functions for unified 128-bit integer structures, +// These work with or without native 128-bit integer support. + +#ifndef HAVE_UINT128_T +// Helper function to build 128-bit value from bignum digits (fallback path). +static inline void +rb_uint128_from_bignum_digits_fallback(rb_uint128_t *result, BDIGIT *digits, size_t length) +{ + // Build the 128-bit value from bignum digits: + for (long i = length - 1; i >= 0; i--) { + // Shift both low and high parts: + uint64_t carry = result->parts.low >> (64 - (SIZEOF_BDIGIT * CHAR_BIT)); + result->parts.low = (result->parts.low << (SIZEOF_BDIGIT * CHAR_BIT)) | digits[i]; + result->parts.high = (result->parts.high << (SIZEOF_BDIGIT * CHAR_BIT)) | carry; + } +} + +// Helper function to convert absolute value of negative bignum to two's complement. +// Ruby stores negative bignums as absolute values, so we need to convert to two's complement. +static inline void +rb_uint128_twos_complement_negate(rb_uint128_t *value) +{ + if (value->parts.low == 0) { + value->parts.high = ~value->parts.high + 1; + } + else { + value->parts.low = ~value->parts.low + 1; + value->parts.high = ~value->parts.high + (value->parts.low == 0 ? 1 : 0); + } +} +#endif + +rb_uint128_t +rb_numeric_to_uint128(VALUE x) +{ + rb_uint128_t result = {0}; + if (RB_FIXNUM_P(x)) { + long value = RB_FIX2LONG(x); + if (value < 0) { + rb_raise(rb_eRangeError, "negative integer cannot be converted to unsigned 128-bit integer"); + } +#ifdef HAVE_UINT128_T + result.value = (uint128_t)value; +#else + result.parts.low = (uint64_t)value; + result.parts.high = 0; +#endif + return result; + } + else if (RB_BIGNUM_TYPE_P(x)) { + if (BIGNUM_NEGATIVE_P(x)) { + rb_raise(rb_eRangeError, "negative integer cannot be converted to unsigned 128-bit integer"); + } + size_t length = BIGNUM_LEN(x); +#ifdef HAVE_UINT128_T + if (length > roomof(SIZEOF_INT128_T, SIZEOF_BDIGIT)) { + rb_raise(rb_eRangeError, "bignum too big to convert into 'unsigned 128-bit integer'"); + } + BDIGIT *digits = BIGNUM_DIGITS(x); + result.value = 0; + for (long i = length - 1; i >= 0; i--) { + result.value = (result.value << (SIZEOF_BDIGIT * CHAR_BIT)) | digits[i]; + } +#else + // Check if bignum fits in 128 bits (16 bytes) + if (length > roomof(16, SIZEOF_BDIGIT)) { + rb_raise(rb_eRangeError, "bignum too big to convert into 'unsigned 128-bit integer'"); + } + BDIGIT *digits = BIGNUM_DIGITS(x); + rb_uint128_from_bignum_digits_fallback(&result, digits, length); +#endif + return result; + } + else { + rb_raise(rb_eTypeError, "not an integer"); + } +} + +rb_int128_t +rb_numeric_to_int128(VALUE x) +{ + rb_int128_t result = {0}; + if (RB_FIXNUM_P(x)) { + long value = RB_FIX2LONG(x); +#ifdef HAVE_UINT128_T + result.value = (int128_t)value; +#else + if (value < 0) { + // Two's complement representation: for negative values, sign extend + // Convert to unsigned: for -1, we want all bits set + result.parts.low = (uint64_t)value; // This will be the two's complement representation + result.parts.high = UINT64_MAX; // Sign extend: all bits set for negative + } + else { + result.parts.low = (uint64_t)value; + result.parts.high = 0; + } +#endif + return result; + } + else if (RB_BIGNUM_TYPE_P(x)) { + size_t length = BIGNUM_LEN(x); +#ifdef HAVE_UINT128_T + if (length > roomof(SIZEOF_INT128_T, SIZEOF_BDIGIT)) { + rb_raise(rb_eRangeError, "bignum too big to convert into 'signed 128-bit integer'"); + } + BDIGIT *digits = BIGNUM_DIGITS(x); + uint128_t unsigned_result = 0; + for (long i = length - 1; i >= 0; i--) { + unsigned_result = (unsigned_result << (SIZEOF_BDIGIT * CHAR_BIT)) | digits[i]; + } + if (BIGNUM_NEGATIVE_P(x)) { + // Convert from two's complement + // Maximum negative value is 2^127 + if (unsigned_result > ((uint128_t)1 << 127)) { + rb_raise(rb_eRangeError, "bignum too big to convert into 'signed 128-bit integer'"); + } + result.value = -(int128_t)(unsigned_result - 1) - 1; + } + else { + // Maximum positive value is 2^127 - 1 + if (unsigned_result > (((uint128_t)1 << 127) - 1)) { + rb_raise(rb_eRangeError, "bignum too big to convert into 'signed 128-bit integer'"); + } + result.value = (int128_t)unsigned_result; + } +#else + if (length > roomof(16, SIZEOF_BDIGIT)) { + rb_raise(rb_eRangeError, "bignum too big to convert into 'signed 128-bit integer'"); + } + BDIGIT *digits = BIGNUM_DIGITS(x); + rb_uint128_t unsigned_result = {0}; + rb_uint128_from_bignum_digits_fallback(&unsigned_result, digits, length); + if (BIGNUM_NEGATIVE_P(x)) { + // Check if value fits in signed 128-bit (max negative is 2^127) + uint64_t max_neg_high = (uint64_t)1 << 63; + if (unsigned_result.parts.high > max_neg_high || (unsigned_result.parts.high == max_neg_high && unsigned_result.parts.low > 0)) { + rb_raise(rb_eRangeError, "bignum too big to convert into 'signed 128-bit integer'"); + } + // Convert from absolute value to two's complement (Ruby stores negative as absolute value) + rb_uint128_twos_complement_negate(&unsigned_result); + result.parts.low = unsigned_result.parts.low; + result.parts.high = (int64_t)unsigned_result.parts.high; // Sign extend + } + else { + // Check if value fits in signed 128-bit (max positive is 2^127 - 1) + // Max positive: high = 0x7FFFFFFFFFFFFFFF, low = 0xFFFFFFFFFFFFFFFF + uint64_t max_pos_high = ((uint64_t)1 << 63) - 1; + if (unsigned_result.parts.high > max_pos_high) { + rb_raise(rb_eRangeError, "bignum too big to convert into 'signed 128-bit integer'"); + } + result.parts.low = unsigned_result.parts.low; + result.parts.high = unsigned_result.parts.high; + } +#endif + return result; + } + else { + rb_raise(rb_eTypeError, "not an integer"); + } +} + +VALUE +rb_uint128_to_numeric(rb_uint128_t n) +{ +#ifdef HAVE_UINT128_T + if (n.value <= (uint128_t)RUBY_FIXNUM_MAX) { + return LONG2FIX((long)n.value); + } + return rb_uint128t2big(n.value); +#else + // If high part is zero and low part fits in fixnum + if (n.parts.high == 0 && n.parts.low <= (uint64_t)RUBY_FIXNUM_MAX) { + return LONG2FIX((long)n.parts.low); + } + // Convert to bignum by building it from the two 64-bit parts + VALUE bignum = rb_ull2big(n.parts.low); + if (n.parts.high > 0) { + VALUE high_bignum = rb_ull2big(n.parts.high); + // Multiply high part by 2^64 and add to low part + VALUE shifted_value = rb_int_lshift(high_bignum, INT2FIX(64)); + bignum = rb_int_plus(bignum, shifted_value); + } + return bignum; +#endif +} + +VALUE +rb_int128_to_numeric(rb_int128_t n) +{ +#ifdef HAVE_UINT128_T + if (FIXABLE(n.value)) { + return LONG2FIX((long)n.value); + } + return rb_int128t2big(n.value); +#else + int64_t high = (int64_t)n.parts.high; + // If it's a small positive value that fits in fixnum + if (high == 0 && n.parts.low <= (uint64_t)RUBY_FIXNUM_MAX) { + return LONG2FIX((long)n.parts.low); + } + // Check if it's negative (high bit of high part is set) + if (high < 0) { + // Negative value - convert from two's complement to absolute value + rb_uint128_t unsigned_value = {0}; + if (n.parts.low == 0) { + unsigned_value.parts.low = 0; + unsigned_value.parts.high = ~n.parts.high + 1; + } + else { + unsigned_value.parts.low = ~n.parts.low + 1; + unsigned_value.parts.high = ~n.parts.high + (unsigned_value.parts.low == 0 ? 1 : 0); + } + VALUE bignum = rb_uint128_to_numeric(unsigned_value); + return rb_int_uminus(bignum); + } + else { + // Positive value + return rb_uint128_to_numeric(*(rb_uint128_t*)&n); + } +#endif +} + /******************************************************************** * * Document-class: Integer diff --git a/string.c b/string.c index 34c79eca8e5cff..9327306384a232 100644 --- a/string.c +++ b/string.c @@ -7966,7 +7966,7 @@ rb_str_upcase_bang(int argc, VALUE *argv, VALUE str) /* * call-seq: - * upcase(mapping) -> string + * upcase(mapping = :ascii) -> new_string * * :include: doc/string/upcase.rdoc */ @@ -8052,7 +8052,7 @@ rb_str_downcase_bang(int argc, VALUE *argv, VALUE str) /* * call-seq: - * downcase(mapping) -> string + * downcase(mapping = :ascii) -> new_string * * :include: doc/string/downcase.rdoc * @@ -8118,29 +8118,10 @@ rb_str_capitalize_bang(int argc, VALUE *argv, VALUE str) /* * call-seq: - * capitalize(mapping = :ascii) -> string + * capitalize(mapping = :ascii) -> new_string * - * Returns a string containing the characters in +self+, - * each with possibly changed case: + * :include: doc/string/capitalize.rdoc * - * - The first character is upcased. - * - All other characters are downcased. - * - * Examples: - * - * 'hello world'.capitalize # => "Hello world" - * 'HELLO WORLD'.capitalize # => "Hello world" - * - * Some characters do not have upcase and downcase, and so are not changed; - * see {Case Mapping}[rdoc-ref:case_mapping.rdoc]: - * - * '1, 2, 3, ...'.capitalize # => "1, 2, 3, ..." - * - * The casing is affected by the given +mapping+, - * which may be +:ascii+, +:fold+, or +:turkic+; - * see {Case Mappings}[rdoc-ref:case_mapping.rdoc@Case+Mappings]. - * - * Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. */ static VALUE @@ -8197,7 +8178,7 @@ rb_str_swapcase_bang(int argc, VALUE *argv, VALUE str) /* * call-seq: - * swapcase(mapping) -> new_string + * swapcase(mapping = :ascii) -> new_string * * :include: doc/string/swapcase.rdoc * diff --git a/test/ruby/test_io_buffer.rb b/test/ruby/test_io_buffer.rb index 1e4a6e2fd86c40..9ff22b4bb356eb 100644 --- a/test/ruby/test_io_buffer.rb +++ b/test/ruby/test_io_buffer.rb @@ -405,6 +405,11 @@ def test_zero_length_get_string :u64 => [0, 2**64-1], :s64 => [-2**63, 0, 2**63-1], + :U128 => [0, 2**64, 2**127-1, 2**128-1], + :S128 => [-2**127, -2**63-1, -1, 0, 2**63, 2**127-1], + :u128 => [0, 2**64, 2**127-1, 2**128-1], + :s128 => [-2**127, -2**63-1, -1, 0, 2**63, 2**127-1], + :F32 => [-1.0, 0.0, 0.5, 1.0, 128.0], :F64 => [-1.0, 0.0, 0.5, 1.0, 128.0], } @@ -759,4 +764,146 @@ def test_bug_21210 assert_predicate buf, :valid? end + + def test_128_bit_integers + buffer = IO::Buffer.new(32) + + # Test unsigned 128-bit integers + test_values_u128 = [ + 0, + 1, + 2**64 - 1, + 2**64, + 2**127 - 1, + 2**128 - 1, + ] + + test_values_u128.each do |value| + buffer.set_value(:u128, 0, value) + assert_equal value, buffer.get_value(:u128, 0), "u128: #{value}" + + buffer.set_value(:U128, 0, value) + assert_equal value, buffer.get_value(:U128, 0), "U128: #{value}" + end + + # Test signed 128-bit integers + test_values_s128 = [ + -2**127, + -2**63 - 1, + -1, + 0, + 1, + 2**63, + 2**127 - 1, + ] + + test_values_s128.each do |value| + buffer.set_value(:s128, 0, value) + assert_equal value, buffer.get_value(:s128, 0), "s128: #{value}" + + buffer.set_value(:S128, 0, value) + assert_equal value, buffer.get_value(:S128, 0), "S128: #{value}" + end + + # Test size_of + assert_equal 16, IO::Buffer.size_of(:u128) + assert_equal 16, IO::Buffer.size_of(:U128) + assert_equal 16, IO::Buffer.size_of(:s128) + assert_equal 16, IO::Buffer.size_of(:S128) + assert_equal 32, IO::Buffer.size_of([:u128, :u128]) + end + + def test_integer_endianness_swapping + # Test that byte order is swapped correctly for all signed and unsigned integers > 1 byte + host_is_le = IO::Buffer::HOST_ENDIAN == IO::Buffer::LITTLE_ENDIAN + host_is_be = IO::Buffer::HOST_ENDIAN == IO::Buffer::BIG_ENDIAN + + # Test values that will produce different byte patterns when swapped + # Format: [little_endian_type, big_endian_type, test_value, expected_swapped_value] + # expected_swapped_value is the result when writing as le_type and reading as be_type + # (or vice versa) on a little-endian host + test_cases = [ + [:u16, :U16, 0x1234, 0x3412], + [:s16, :S16, 0x1234, 0x3412], + [:u32, :U32, 0x12345678, 0x78563412], + [:s32, :S32, 0x12345678, 0x78563412], + [:u64, :U64, 0x0123456789ABCDEF, 0xEFCDAB8967452301], + [:s64, :S64, 0x0123456789ABCDEF, -1167088121787636991], + [:u128, :U128, 0x0123456789ABCDEF0123456789ABCDEF, 0xEFCDAB8967452301EFCDAB8967452301], + [:u128, :U128, 0x0123456789ABCDEFFEDCBA9876543210, 0x1032547698BADCFEEFCDAB8967452301], + [:u128, :U128, 0xFEDCBA98765432100123456789ABCDEF, 0xEFCDAB89674523011032547698BADCFE], + [:u128, :U128, 0x123456789ABCDEF0FEDCBA9876543210, 0x1032547698BADCFEF0DEBC9A78563412], + [:s128, :S128, 0x0123456789ABCDEF0123456789ABCDEF, -21528975894082904073953971026863512831], + [:s128, :S128, 0x0123456789ABCDEFFEDCBA9876543210, 0x1032547698BADCFEEFCDAB8967452301], + ] + + test_cases.each do |le_type, be_type, value, expected_swapped| + buffer_size = IO::Buffer.size_of(le_type) + buffer = IO::Buffer.new(buffer_size * 2) + + # Test little-endian round-trip + buffer.set_value(le_type, 0, value) + result_le = buffer.get_value(le_type, 0) + assert_equal value, result_le, "#{le_type}: round-trip failed" + + # Test big-endian round-trip + buffer.set_value(be_type, buffer_size, value) + result_be = buffer.get_value(be_type, buffer_size) + assert_equal value, result_be, "#{be_type}: round-trip failed" + + # Verify byte patterns are different when endianness differs from host + le_bytes = buffer.get_string(0, buffer_size) + be_bytes = buffer.get_string(buffer_size, buffer_size) + + if host_is_le + # On little-endian host: le_type should match host, be_type should be swapped + # So the byte patterns should be different (unless value is symmetric) + # Read back with opposite endianness to verify swapping + result_le_read_as_be = buffer.get_value(be_type, 0) + result_be_read_as_le = buffer.get_value(le_type, buffer_size) + + # The swapped reads should NOT equal the original value (unless it's symmetric) + # For most values, this will be different + if value != 0 && value != -1 && value.abs != 1 + refute_equal value, result_le_read_as_be, "#{le_type} written, read as #{be_type} should be swapped on LE host" + refute_equal value, result_be_read_as_le, "#{be_type} written, read as #{le_type} should be swapped on LE host" + end + + # Verify that reading back with correct endianness works + assert_equal value, buffer.get_value(le_type, 0), "#{le_type} should read correctly on LE host" + assert_equal value, buffer.get_value(be_type, buffer_size), "#{be_type} should read correctly on LE host (with swapping)" + elsif host_is_be + # On big-endian host: be_type should match host, le_type should be swapped + result_le_read_as_be = buffer.get_value(be_type, 0) + result_be_read_as_le = buffer.get_value(le_type, buffer_size) + + # The swapped reads should NOT equal the original value (unless it's symmetric) + if value != 0 && value != -1 && value.abs != 1 + refute_equal value, result_le_read_as_be, "#{le_type} written, read as #{be_type} should be swapped on BE host" + refute_equal value, result_be_read_as_le, "#{be_type} written, read as #{le_type} should be swapped on BE host" + end + + # Verify that reading back with correct endianness works + assert_equal value, buffer.get_value(be_type, buffer_size), "#{be_type} should read correctly on BE host" + assert_equal value, buffer.get_value(le_type, 0), "#{le_type} should read correctly on BE host (with swapping)" + end + + # Verify that when we write with one endianness and read with the opposite, + # we get the expected swapped value + buffer.set_value(le_type, 0, value) + swapped_value_le_to_be = buffer.get_value(be_type, 0) + assert_equal expected_swapped, swapped_value_le_to_be, "#{le_type} written, read as #{be_type} should produce expected swapped value" + + # Also verify the reverse direction + buffer.set_value(be_type, buffer_size, value) + swapped_value_be_to_le = buffer.get_value(le_type, buffer_size) + assert_equal expected_swapped, swapped_value_be_to_le, "#{be_type} written, read as #{le_type} should produce expected swapped value" + + # Verify that writing the swapped value back and reading with original endianness + # gives us the original value (double-swap should restore original) + buffer.set_value(be_type, 0, swapped_value_le_to_be) + round_trip_value = buffer.get_value(le_type, 0) + assert_equal value, round_trip_value, "#{le_type}/#{be_type}: double-swap should restore original value" + end + end end