root/trunk/activesupport/lib/active_support/multibyte/chars.rb
| Revision 8460, 5.4 kB (checked in by bitsweat, 9 months ago) |
|---|
| Line | |
|---|---|
| 1 | require 'active_support/multibyte/handlers/utf8_handler' |
| 2 | require 'active_support/multibyte/handlers/passthru_handler' |
| 3 | |
| 4 | # Encapsulates all the functionality related to the Chars proxy. |
| 5 | module ActiveSupport::Multibyte #:nodoc: |
| 6 | # Chars enables you to work transparently with multibyte encodings in the Ruby String class without having extensive |
| 7 | # knowledge about the encoding. A Chars object accepts a string upon initialization and proxies String methods in an |
| 8 | # encoding safe manner. All the normal String methods are also implemented on the proxy. |
| 9 | # |
| 10 | # String methods are proxied through the Chars object, and can be accessed through the +chars+ method. Methods |
| 11 | # which would normally return a String object now return a Chars object so methods can be chained. |
| 12 | # |
| 13 | # "The Perfect String ".chars.downcase.strip.normalize #=> "the perfect string" |
| 14 | # |
| 15 | # Chars objects are perfectly interchangeable with String objects as long as no explicit class checks are made. |
| 16 | # If certain methods do explicitly check the class, call +to_s+ before you pass chars objects to them. |
| 17 | # |
| 18 | # bad.explicit_checking_method "T".chars.downcase.to_s |
| 19 | # |
| 20 | # The actual operations on the string are delegated to handlers. Theoretically handlers can be implemented for |
| 21 | # any encoding, but the default handler handles UTF-8. This handler is set during initialization, if you want to |
| 22 | # use you own handler, you can set it on the Chars class. Look at the UTF8Handler source for an example how to |
| 23 | # implement your own handler. If you your own handler to work on anything but UTF-8 you probably also |
| 24 | # want to override Chars#handler. |
| 25 | # |
| 26 | # ActiveSupport::Multibyte::Chars.handler = MyHandler |
| 27 | # |
| 28 | # Note that a few methods are defined on Chars instead of the handler because they are defined on Object or Kernel |
| 29 | # and method_missing can't catch them. |
| 30 | class Chars |
| 31 | |
| 32 | attr_reader :string # The contained string |
| 33 | alias_method :to_s, :string |
| 34 | |
| 35 | include Comparable |
| 36 | |
| 37 | # The magic method to make String and Chars comparable |
| 38 | def to_str |
| 39 | # Using any other ways of overriding the String itself will lead you all the way from infinite loops to |
| 40 | # core dumps. Don't go there. |
| 41 | @string |
| 42 | end |
| 43 | |
| 44 | # Make duck-typing with String possible |
| 45 | def respond_to?(method) |
| 46 | super || @string.respond_to?(method) || handler.respond_to?(method) || |
| 47 | (method.to_s =~ /(.*)!/ && handler.respond_to?($1)) || false |
| 48 | end |
| 49 | |
| 50 | # Create a new Chars instance. |
| 51 | def initialize(str) |
| 52 | @string = str.respond_to?(:string) ? str.string : str |
| 53 | end |
| 54 | |
| 55 | # Returns -1, 0 or +1 depending on whether the Chars object is to be sorted before, equal or after the |
| 56 | # object on the right side of the operation. It accepts any object that implements +to_s+. See String.<=> |
| 57 | # for more details. |
| 58 | def <=>(other); @string <=> other.to_s; end |
| 59 | |
| 60 | # Works just like String#split, with the exception that the items in the resulting list are Chars |
| 61 | # instances instead of String. This makes chaining methods easier. |
| 62 | def split(*args) |
| 63 | @string.split(*args).map { |i| i.chars } |
| 64 | end |
| 65 | |
| 66 | # Gsub works exactly the same as gsub on a normal string. |
| 67 | def gsub(*a, &b); @string.gsub(*a, &b).chars; end |
| 68 | |
| 69 | # Like String.=~ only it returns the character offset (in codepoints) instead of the byte offset. |
| 70 | def =~(other) |
| 71 | handler.translate_offset(@string, @string =~ other) |
| 72 | end |
| 73 | |
| 74 | # Try to forward all undefined methods to the handler, when a method is not defined on the handler, send it to |
| 75 | # the contained string. Method_missing is also responsible for making the bang! methods destructive. |
| 76 | def method_missing(m, *a, &b) |
| 77 | begin |
| 78 | # Simulate methods with a ! at the end because we can't touch the enclosed string from the handlers. |
| 79 | if m.to_s =~ /^(.*)\!$/ && handler.respond_to?($1) |
| 80 | result = handler.send($1, @string, *a, &b) |
| 81 | if result == @string |
| 82 | result = nil |
| 83 | else |
| 84 | @string.replace result |
| 85 | end |
| 86 | elsif handler.respond_to?(m) |
| 87 | result = handler.send(m, @string, *a, &b) |
| 88 | else |
| 89 | result = @string.send(m, *a, &b) |
| 90 | end |
| 91 | rescue Handlers::EncodingError |
| 92 | @string.replace handler.tidy_bytes(@string) |
| 93 | retry |
| 94 | end |
| 95 | |
| 96 | if result.kind_of?(String) |
| 97 | result.chars |
| 98 | else |
| 99 | result |
| 100 | end |
| 101 | end |
| 102 | |
| 103 | # Set the handler class for the Char objects. |
| 104 | def self.handler=(klass) |
| 105 | @@handler = klass |
| 106 | end |
| 107 | |
| 108 | # Returns the proper handler for the contained string depending on $KCODE and the encoding of the string. This |
| 109 | # method is used internally to always redirect messages to the proper classes depending on the context. |
| 110 | def handler |
| 111 | if utf8_pragma? |
| 112 | @@handler |
| 113 | else |
| 114 | ActiveSupport::Multibyte::Handlers::PassthruHandler |
| 115 | end |
| 116 | end |
| 117 | |
| 118 | private |
| 119 | |
| 120 | # +utf8_pragma+ checks if it can send this string to the handlers. It makes sure @string isn't nil and $KCODE is |
| 121 | # set to 'UTF8'. |
| 122 | def utf8_pragma? |
| 123 | !@string.nil? && ($KCODE == 'UTF8') |
| 124 | end |
| 125 | end |
| 126 | end |
| 127 | |
| 128 | # When we can load the utf8proc library, override normalization with the faster methods |
| 129 | begin |
| 130 | require 'utf8proc_native' |
| 131 | require 'active_support/multibyte/handlers/utf8_handler_proc' |
| 132 | ActiveSupport::Multibyte::Chars.handler = ActiveSupport::Multibyte::Handlers::UTF8HandlerProc |
| 133 | rescue LoadError |
| 134 | ActiveSupport::Multibyte::Chars.handler = ActiveSupport::Multibyte::Handlers::UTF8Handler |
| 135 | end |
Note: See TracBrowser for help on using the browser.