Skip to content

Commit

Permalink
Remove @buffer and process only use @scanner.
Browse files Browse the repository at this point in the history
  • Loading branch information
naitoh committed Jan 7, 2024
1 parent 81c556d commit 4c56eb8
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 28 deletions.
2 changes: 1 addition & 1 deletion lib/rexml/parsers/baseparser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ def pull_event
else
@document_status = :after_doctype
if @source.encoding == "UTF-8"
@source.buffer.force_encoding(::Encoding::UTF_8)
@source.scanner.string = @source.scanner.rest.force_encoding(::Encoding::UTF_8)
end
end
end
Expand Down
46 changes: 19 additions & 27 deletions lib/rexml/source.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ def SourceFactory::create_from(arg)
# objects and provides consumption of text
class Source
include Encoding
# The current buffer (what we're going to read next)
attr_reader :buffer
# The current scanner (what we're going to read next)
attr_reader :scanner
# The line number of the last consumed text
attr_reader :line
Expand All @@ -42,8 +41,8 @@ class Source
# @param encoding if non-null, sets the encoding of the source to this
# value, overriding all encoding detection
def initialize(arg, encoding=nil)
@orig = @buffer = arg
@scanner = StringScanner.new(@buffer)
@orig = arg
@scanner = StringScanner.new(@orig)
if encoding
self.encoding = encoding
else
Expand All @@ -64,10 +63,8 @@ def read
end

def match(pattern, cons=false)
@scanner.string = @buffer
if cons
@scanner.scan(pattern)
@buffer = @scanner.rest if @scanner.matched?
else
@scanner.check(pattern)
end
Expand All @@ -88,34 +85,35 @@ def current_line
end

private

def detect_encoding
buffer_encoding = @buffer.encoding
scanner_encoding = @scanner.rest.encoding
detected_encoding = "UTF-8"
begin
@buffer.force_encoding("ASCII-8BIT")
if @buffer[0, 2] == "\xfe\xff"
@buffer[0, 2] = ""
@scanner.string = @scanner.rest.force_encoding("ASCII-8BIT")
if @scanner.rest[0, 2] == "\xfe\xff"
@scanner.string = @scanner.rest.delete_prefix("\xfe\xff")
detected_encoding = "UTF-16BE"
elsif @buffer[0, 2] == "\xff\xfe"
@buffer[0, 2] = ""
elsif @scanner.rest[0, 2] == "\xff\xfe"
@scanner.string = @scanner.rest.delete_prefix("\xff\xfe")
detected_encoding = "UTF-16LE"
elsif @buffer[0, 3] == "\xef\xbb\xbf"
@buffer[0, 3] = ""
elsif @scanner.rest[0, 3] == "\xef\xbb\xbf"
@scanner.string = @scanner.rest.delete_prefix("\xef\xbb\xbf")
detected_encoding = "UTF-8"
end
ensure
@buffer.force_encoding(buffer_encoding)
@scanner.string = @scanner.rest.force_encoding(scanner_encoding)
end
self.encoding = detected_encoding
end

def encoding_updated
if @encoding != 'UTF-8'
@buffer = decode(@buffer)
@scanner.string = decode(@scanner.rest)
@to_utf = true
else
@to_utf = false
@buffer.force_encoding ::Encoding::UTF_8
@scanner.string = @scanner.rest.force_encoding(::Encoding::UTF_8)
end
end
end
Expand All @@ -138,7 +136,7 @@ def initialize(arg, block_size=500, encoding=nil)
end

if !@to_utf and
@buffer.respond_to?(:force_encoding) and
@orig.respond_to?(:force_encoding) and
@source.respond_to?(:external_encoding) and
@source.external_encoding != ::Encoding::UTF_8
@force_utf8 = true
Expand All @@ -149,32 +147,26 @@ def initialize(arg, block_size=500, encoding=nil)

def read
begin
@buffer << readline
@scanner.string = @buffer
@scanner.string = @scanner.rest + readline
rescue Exception, NameError
@source = nil
end
end

def match( pattern, cons=false )
@scanner.string = @buffer
if cons
@scanner.scan(pattern)
@buffer = @scanner.rest if @scanner.matched?
else
@scanner.check(pattern)
end
while !@scanner.matched? and @source
begin
@buffer << readline
@scanner.string = @buffer
@scanner << readline
if cons
@scanner.scan(pattern)
@buffer = @scanner.rest if @scanner.matched?
else
@scanner.check(pattern)
end
@buffer = @scanner.rest if cons and @scanner.matched?
rescue
@source = nil
end
Expand Down Expand Up @@ -237,7 +229,7 @@ def encoding_updated
@source.set_encoding(@encoding, @encoding)
end
@line_break = encode(">")
@pending_buffer, @buffer = @buffer, ""
@pending_buffer, @scanner.string = @scanner.rest, ""
@pending_buffer.force_encoding(@encoding)
super
end
Expand Down

0 comments on commit 4c56eb8

Please sign in to comment.