Changeset 8213
- Timestamp:
- 11/26/07 03:45:54 (1 year ago)
- Files:
-
- trunk/actionpack/CHANGELOG (modified) (1 diff)
- trunk/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb (modified) (1 diff)
- trunk/actionpack/lib/action_controller/vendor/html-scanner/html/sanitizer.rb (added)
- trunk/actionpack/lib/action_view/helpers/sanitize_helper.rb (modified) (11 diffs)
- trunk/actionpack/test/controller/html-scanner/sanitizer_test.rb (added)
- trunk/actionpack/test/template/sanitize_helper_test.rb (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/actionpack/CHANGELOG
r8211 r8213 1 1 *SVN* 2 3 * Refactor sanitizer helpers into HTML classes and make it easy to swap them out with custom implementations. Closes #10129. [rick] 2 4 3 5 * Add deprecation for old subtemplate syntax for ActionMailer templates, use render :partial [rick] trunk/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb
r7528 r8213 2 2 require 'html/node' 3 3 require 'html/selector' 4 require 'html/sanitizer' 4 5 5 6 module HTML #:nodoc: trunk/actionpack/lib/action_view/helpers/sanitize_helper.rb
r7825 r8213 50 50 # 51 51 def sanitize(html, options = {}) 52 return html if html.blank? || !html.include?('<') 53 54 attrs = options[:attributes] || sanitized_allowed_attributes 55 tags = options[:tags] || sanitized_allowed_tags 56 57 returning [] do |new_text| 58 tokenizer = HTML::Tokenizer.new(html) 59 parent = [] 60 61 while token = tokenizer.next 62 node = HTML::Node.parse(nil, 0, 0, token, false) 63 64 new_text << case node 65 when HTML::Tag 66 if node.closing == :close 67 parent.shift 68 else 69 parent.unshift node.name 70 end 71 72 node.attributes.keys.each do |attr_name| 73 value = node.attributes[attr_name].to_s 74 75 if !attrs.include?(attr_name) || contains_bad_protocols?(attr_name, value) 76 node.attributes.delete(attr_name) 77 else 78 node.attributes[attr_name] = attr_name == 'style' ? sanitize_css(value) : CGI::escapeHTML(value) 79 end 80 end if node.attributes 81 82 tags.include?(node.name) ? node : nil 83 else 84 sanitized_bad_tags.include?(parent.first) ? nil : node.to_s.gsub(/</, "<") 85 end 86 end 87 end.join 52 self.class.white_list_sanitizer.sanitize(html, options) 88 53 end 89 54 90 55 # Sanitizes a block of css code. Used by #sanitize when it comes across a style attribute 91 56 def sanitize_css(style) 92 # disallow urls 93 style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ') 94 95 # gauntlet 96 if style !~ /^([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*$/ || 97 style !~ /^(\s*[-\w]+\s*:\s*[^:;]*(;|$))*$/ 98 return '' 99 end 100 101 returning [] do |clean| 102 style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val| 103 if sanitized_allowed_css_properties.include?(prop.downcase) 104 clean << prop + ': ' + val + ';' 105 elsif sanitized_shorthand_css_properties.include?(prop.split('-')[0].downcase) 106 unless val.split().any? do |keyword| 107 !sanitized_allowed_css_keywords.include?(keyword) && 108 keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/ 109 end 110 clean << prop + ': ' + val + ';' 111 end 112 end 113 end 114 end.join(' ') 57 self.class.white_list_sanitizer.sanitize_css(style) 115 58 end 116 59 … … 130 73 # # => Welcome to my website! 131 74 def strip_tags(html) 132 return html if html.blank? || !html.index("<") 133 tokenizer = HTML::Tokenizer.new(html) 134 135 text = returning [] do |text| 136 while token = tokenizer.next 137 node = HTML::Node.parse(nil, 0, 0, token, false) 138 # result is only the content of any Text nodes 139 text << node.to_s if node.class == HTML::Text 140 end 141 end 142 143 # strip any comments, and if they have a newline at the end (ie. line with 144 # only a comment) strip that too 145 result = text.join.gsub(/<!--(.*?)-->[\n]?/m, "") 146 147 # Recurse - handle all dirty nested tags 148 result == html ? result : strip_tags(result) 75 self.class.full_sanitizer.sanitize(html) 149 76 end 150 77 … … 161 88 # # => Blog: Visit 162 89 def strip_links(html) 163 if !html.blank? && (html.index("<a") || html.index("<href")) && html.index(">") 164 tokenizer = HTML::Tokenizer.new(html) 165 result = returning [] do |result| 166 while token = tokenizer.next 167 node = HTML::Node.parse(nil, 0, 0, token, false) 168 result << node.to_s unless node.is_a?(HTML::Tag) && ["a", "href"].include?(node.name) 169 end 170 end.join 171 result == html ? result : strip_links(result) # Recurse - handle all dirty nested links 172 else 173 html 174 end 175 end 176 177 # A regular expression of the valid characters used to separate protocols like 178 # the ':' in 'http://foo.com' 179 @@sanitized_protocol_separator = /:|(�*58)|(p)|(%|%)3A/ 180 mattr_accessor :sanitized_protocol_separator, :instance_writer => false 181 182 # Specifies a Set of HTML attributes that can have URIs. 183 @@sanitized_uri_attributes = Set.new(%w(href src cite action longdesc xlink:href lowsrc)) 184 mattr_reader :sanitized_uri_attributes 185 186 # Specifies a Set of 'bad' tags that the #sanitize helper will remove completely, as opposed 187 # to just escaping harmless tags like <font> 188 @@sanitized_bad_tags = Set.new(%w(script)) 189 mattr_reader :sanitized_bad_tags 190 191 # Specifies the default Set of tags that the #sanitize helper will allow unscathed. 192 @@sanitized_allowed_tags = Set.new(%w(strong em b i p code pre tt output samp kbd var sub 193 sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dt dd abbr 194 acronym a img blockquote del ins fieldset legend)) 195 mattr_reader :sanitized_allowed_tags 196 197 # Specifies the default Set of html attributes that the #sanitize helper will leave 198 # in the allowed tag. 199 @@sanitized_allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr)) 200 mattr_reader :sanitized_allowed_attributes 201 202 # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept. 203 @@sanitized_allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse 204 border-color border-left-color border-right-color border-top-color clear color cursor direction display 205 elevation float font font-family font-size font-style font-variant font-weight height letter-spacing line-height 206 overflow pause pause-after pause-before pitch pitch-range richness speak speak-header speak-numeral speak-punctuation 207 speech-rate stress text-align text-decoration text-indent unicode-bidi vertical-align voice-family volume white-space 208 width)) 209 mattr_reader :sanitized_allowed_css_properties 210 211 # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept. 212 @@sanitized_allowed_css_keywords = Set.new(%w(auto aqua black block blue bold both bottom brown center 213 collapse dashed dotted fuchsia gray green !important italic left lime maroon medium none navy normal 214 nowrap olive pointer purple red right solid silver teal top transparent underline white yellow)) 215 mattr_reader :sanitized_allowed_css_keywords 216 217 # Specifies the default Set of allowed shorthand css properties for the #sanitize and #sanitize_css helpers. 218 @@sanitized_shorthand_css_properties = Set.new(%w(background border margin padding)) 219 mattr_reader :sanitized_shorthand_css_properties 220 221 # Specifies the default Set of protocols that the #sanitize helper will leave in 222 # protocol attributes. 223 @@sanitized_allowed_protocols = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto feed svn urn aim rsync tag ssh sftp rtsp afs)) 224 mattr_reader :sanitized_allowed_protocols 90 self.class.link_sanitizer.sanitize(html) 91 end 225 92 226 93 module ClassMethods #:nodoc: 227 94 def self.extended(base) 228 95 class << base 96 attr_writer :full_sanitizer, :link_sanitizer, :white_list_sanitizer 97 229 98 # we want these to be class methods on ActionView::Base, they'll get mattr_readers for these below. 230 [:sanitized_protocol_separator, :sanitized_uri_attributes, :sanitized_bad_tags, :sanitized_allowed_tags,99 helper_def = [:sanitized_protocol_separator, :sanitized_uri_attributes, :sanitized_bad_tags, :sanitized_allowed_tags, 231 100 :sanitized_allowed_attributes, :sanitized_allowed_css_properties, :sanitized_allowed_css_keywords, 232 :sanitized_shorthand_css_properties, :sanitized_allowed_protocols, :sanitized_protocol_separator=].each do |prop| 233 delegate prop, :to => SanitizeHelper 234 end 101 :sanitized_shorthand_css_properties, :sanitized_allowed_protocols, :sanitized_protocol_separator=].collect! do |prop| 102 prop = prop.to_s 103 "def #{prop}(#{:value if prop =~ /=$/}) white_list_sanitizer.#{prop.sub /sanitized_/, ''} #{:value if prop =~ /=$/} end" 104 end.join("\n") 105 eval helper_def 235 106 end 236 107 end 108 109 # Gets the HTML::FullSanitizer instance used by strip_tags. Replace with 110 # any object that responds to #sanitize 111 # 112 # Rails::Initializer.run do |config| 113 # config.action_view.full_sanitizer = MySpecialSanitizer.new 114 # end 115 # 116 def full_sanitizer 117 @full_sanitizer ||= HTML::FullSanitizer.new 118 end 119 120 # Gets the HTML::LinkSanitizer instance used by strip_links. Replace with 121 # any object that responds to #sanitize 122 # 123 # Rails::Initializer.run do |config| 124 # config.action_view.link_sanitizer = MySpecialSanitizer.new 125 # end 126 # 127 def link_sanitizer 128 @link_sanitizer ||= HTML::LinkSanitizer.new 129 end 130 131 # Gets the HTML::WhiteListSanitizer instance used by sanitize and sanitize_css. 132 # Replace with any object that responds to #sanitize 133 # 134 # Rails::Initializer.run do |config| 135 # config.action_view.white_list_sanitizer = MySpecialSanitizer.new 136 # end 137 # 138 def white_list_sanitizer 139 @white_list_sanitizer ||= HTML::WhiteListSanitizer.new 140 end 237 141 238 142 # Adds valid HTML attributes that the #sanitize helper checks for URIs. … … 243 147 # 244 148 def sanitized_uri_attributes=(attributes) 245 H elpers::SanitizeHelper.sanitized_uri_attributes.merge(attributes)149 HTML::WhiteListSanitizer.uri_attributes.merge(attributes) 246 150 end 247 151 … … 253 157 # 254 158 def sanitized_bad_tags=(attributes) 255 H elpers::SanitizeHelper.sanitized_bad_tags.merge(attributes)159 HTML::WhiteListSanitizer.bad_tags.merge(attributes) 256 160 end 257 161 # Adds to the Set of allowed tags for the #sanitize helper. … … 262 166 # 263 167 def sanitized_allowed_tags=(attributes) 264 H elpers::SanitizeHelper.sanitized_allowed_tags.merge(attributes)168 HTML::WhiteListSanitizer.allowed_tags.merge(attributes) 265 169 end 266 170 … … 272 176 # 273 177 def sanitized_allowed_attributes=(attributes) 274 H elpers::SanitizeHelper.sanitized_allowed_attributes.merge(attributes)178 HTML::WhiteListSanitizer.allowed_attributes.merge(attributes) 275 179 end 276 180 … … 282 186 # 283 187 def sanitized_allowed_css_properties=(attributes) 284 H elpers::SanitizeHelper.sanitized_allowed_css_properties.merge(attributes)188 HTML::WhiteListSanitizer.allowed_css_properties.merge(attributes) 285 189 end 286 190 … … 292 196 # 293 197 def sanitized_allowed_css_keywords=(attributes) 294 H elpers::SanitizeHelper.sanitized_allowed_css_keywords.merge(attributes)198 HTML::WhiteListSanitizer.allowed_css_keywords.merge(attributes) 295 199 end 296 200 … … 302 206 # 303 207 def sanitized_shorthand_css_properties=(attributes) 304 H elpers::SanitizeHelper.sanitized_shorthand_css_properties.merge(attributes)208 HTML::WhiteListSanitizer.shorthand_css_properties.merge(attributes) 305 209 end 306 210 … … 312 216 # 313 217 def sanitized_allowed_protocols=(attributes) 314 Helpers::SanitizeHelper.sanitized_allowed_protocols.merge(attributes) 315 end 316 end 317 318 private 319 def contains_bad_protocols?(attr_name, value) 320 sanitized_uri_attributes.include?(attr_name) && 321 (value =~ /(^[^\/:]*):|(�*58)|(p)|(%|%)3A/ && !sanitized_allowed_protocols.include?(value.split(sanitized_protocol_separator).first)) 322 end 218 HTML::WhiteListSanitizer.allowed_protocols.merge(attributes) 219 end 220 end 323 221 end 324 222 end trunk/actionpack/test/template/sanitize_helper_test.rb
r7825 r8213 2 2 require "#{File.dirname(__FILE__)}/../testing_sandbox" 3 3 4 # The exhaustive tests are in test/controller/html/sanitizer_test.rb. 5 # This tests the that the helpers hook up correctly to the sanitizer classes. 4 6 class SanitizeHelperTest < Test::Unit::TestCase 5 7 include ActionView::Helpers::SanitizeHelper … … 22 24 end 23 25 24 def test_sanitize_plaintext25 raw = "<plaintext><span>foo</span></plaintext>"26 assert_sanitized raw, "<span>foo</span>"27 end28 29 def test_sanitize_script30 raw = "a b c<script language=\"Javascript\">blah blah blah</script>d e f"31 assert_sanitized raw, "a b cd e f"32 end33 34 def test_sanitize_js_handlers35 raw = %{onthis="do that" <a href="#" onclick="hello" name="foo" onbogus="remove me">hello</a>}36 assert_sanitized raw, %{onthis="do that" <a name="foo" href="#">hello</a>}37 end38 39 def test_sanitize_javascript_href40 raw = %{href="javascript:bang" <a href="javascript:bang" name="hello">foo</a>, <span href="javascript:bang">bar</span>}41 assert_sanitized raw, %{href="javascript:bang" <a name="hello">foo</a>, <span>bar</span>}42 end43 44 def test_sanitize_image_src45 raw = %{src="javascript:bang" <img src="javascript:bang" width="5">foo</img>, <span src="javascript:bang">bar</span>}46 assert_sanitized raw, %{src="javascript:bang" <img width="5">foo</img>, <span>bar</span>}47 end48 49 ActionView::Helpers::SanitizeHelper.sanitized_allowed_tags.each do |tag_name|50 define_method "test_should_allow_#{tag_name}_tag" do51 assert_sanitized "start <#{tag_name} title=\"1\" onclick=\"foo\">foo <bad>bar</bad> baz</#{tag_name}> end", %(start <#{tag_name} title="1">foo bar baz</#{tag_name}> end)52 end53 end54 55 def test_should_allow_anchors56 assert_sanitized %(<a href="foo" onclick="bar"><script>baz</script></a>), %(<a href="foo"></a>)57 end58 59 # RFC 3986, sec 4.260 def test_allow_colons_in_path_component61 assert_sanitized("<a href=\"./this:that\">foo</a>")62 end63 64 %w(src width height alt).each do |img_attr|65 define_method "test_should_allow_image_#{img_attr}_attribute" do66 assert_sanitized %(<img #{img_attr}="foo" onclick="bar" />), %(<img #{img_attr}="foo" />)67 end68 end69 70 def test_should_handle_non_html71 assert_sanitized 'abc'72 end73 74 def test_should_handle_blank_text75 assert_sanitized nil76 assert_sanitized ''77 end78 79 def test_should_allow_custom_tags80 text = "<u>foo</u>"81 assert_equal(text, sanitize(text, :tags => %w(u)))82 end83 84 def test_should_allow_only_custom_tags85 text = "<u>foo</u> with <i>bar</i>"86 assert_equal("<u>foo</u> with bar", sanitize(text, :tags => %w(u)))87 end88 89 def test_should_allow_custom_tags_with_attributes90 text = %(<fieldset foo="bar">foo</fieldset>)91 assert_equal(text, sanitize(text, :attributes => ['foo']))92 end93 94 [%w(img src), %w(a href)].each do |(tag, attr)|95 define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols" do96 assert_sanitized %(<#{tag} #{attr}="javascript:bang" title="1">boo</#{tag}>), %(<#{tag} title="1">boo</#{tag}>)97 end98 end99 100 def test_should_flag_bad_protocols101 %w(about chrome data disk hcp help javascript livescript lynxcgi lynxexec ms-help ms-its mhtml mocha opera res resource shell vbscript view-source vnd.ms.radio wysiwyg).each do |proto|102 assert contains_bad_protocols?('src', "#{proto}://bad")103 end104 end105 106 def test_should_accept_good_protocols107 sanitized_allowed_protocols.each do |proto|108 assert !contains_bad_protocols?('src', "#{proto}://good")109 end110 end111 112 def test_should_reject_hex_codes_in_protocol113 assert contains_bad_protocols?('src', "%6A%61%76%61%73%63%72%69%70%74%3A%61%6C%65%72%74%28%22%58%53%53%22%29")114 assert_sanitized %(<a href="%6A%61%76%61%73%63%72%69%70%74%3A%61%6C%65%72%74%28%22%58%53%53%22%29">1</a>), "<a>1</a>"115 end116 117 def test_should_block_script_tag118 assert_sanitized %(<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>), ""119 end120 121 [%(<IMG SRC="javascript:alert('XSS');">),122 %(<IMG SRC=javascript:alert('XSS')>),123 %(<IMG SRC=JaVaScRiPt:alert('XSS')>),124 %(<IMG """><SCRIPT>alert("XSS")</SCRIPT>">),125 %(<IMG SRC=javascript:alert("XSS")>),126 %(<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>),127 %(<IMG SRC=javascript:alert('XSS')>),128 %(<IMG SRC=javascript:alert('XSS')>),129 %(<IMG SRC=javascript:alert('XSS')>),130 %(<IMG SRC="jav\tascript:alert('XSS');">),131 %(<IMG SRC="jav	ascript:alert('XSS');">),132 %(<IMG SRC="jav
ascript:alert('XSS');">),133 %(<IMG SRC="jav
ascript:alert('XSS');">),134 %(<IMG SRC="  javascript:alert('XSS');">),135 %(<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>)].each_with_index do |img_hack, i|136 define_method "test_should_not_fall_for_xss_image_hack_#{i+1}" do137 assert_sanitized img_hack, "<img>"138 end139 end140 141 def test_should_sanitize_tag_broken_up_by_null142 assert_sanitized %(<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>), "alert(\"XSS\")"143 end144 145 def test_should_sanitize_invalid_script_tag146 assert_sanitized %(<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>), ""147 end148 149 def test_should_sanitize_script_tag_with_multiple_open_brackets150 assert_sanitized %(<<SCRIPT>alert("XSS");//<</SCRIPT>), "<"151 assert_sanitized %(<iframe src=http://ha.ckers.org/scriptlet.html\n<a), %(<a)152 end153 154 def test_should_sanitize_unclosed_script155 assert_sanitized %(<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>), "<b>"156 end157 158 def test_should_sanitize_half_open_scripts159 assert_sanitized %(<IMG SRC="javascript:alert('XSS')"), "<img>"160 end161 162 def test_should_not_fall_for_ridiculous_hack163 img_hack = %(<IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n>)164 assert_sanitized img_hack, "<img>"165 end166 167 def test_should_sanitize_attributes168 assert_sanitized %(<SPAN title="'><script>alert()</script>">blah</SPAN>), %(<span title="'><script>alert()</script>">blah</span>)169 end170 171 26 def test_should_sanitize_illegal_style_properties 172 27 raw = %(display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;) 173 28 expected = %(display: block; width: 100%; height: 100%; background-color: black; background-image: ; background-x: center; background-y: center;) 174 29 assert_equal expected, sanitize_css(raw) 175 end176 177 def test_should_sanitize_xul_style_attributes178 raw = %(-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss'))179 assert_equal '', sanitize_css(raw)180 end181 182 def test_should_sanitize_invalid_tag_names183 assert_sanitized(%(a b c<script/XSS src="http://ha.ckers.org/xss.js"></script>d e f), "a b cd e f")184 end185 186 def test_should_sanitize_non_alpha_and_non_digit_characters_in_tags187 assert_sanitized('<a onclick!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>foo</a>', "<a>foo</a>")188 end189 190 def test_should_sanitize_invalid_tag_names_in_single_tags191 assert_sanitized('<img/src="http://ha.ckers.org/xss.js"/>', "<img />")192 end193 194 def test_should_sanitize_img_dynsrc_lowsrc195 assert_sanitized(%(<img lowsrc="javascript:alert('XSS')" />), "<img />")196 end197 198 def test_should_sanitize_div_background_image_unicode_encoded199 raw = %(background-image:\0075\0072\006C\0028'\006a\0061\0076\0061\0073\0063\0072\0069\0070\0074\003a\0061\006c\0065\0072\0074\0028.1027\0058.1053\0053\0027\0029'\0029)200 assert_equal '', sanitize_css(raw)201 end202 203 def test_should_sanitize_div_style_expression204 raw = %(width: expression(alert('XSS'));)205 assert_equal '', sanitize_css(raw)206 end207 208 def test_should_sanitize_style_attribute209 raw = %(<div style="display:block; background:url(http://rubyonrails.com); background-image: url(rubyonrails)">foo</div>)210 assert_equal %(<div style="display: block; background: ; background-image: ;">foo</div>), sanitize(raw, :attributes => 'style')211 end212 213 def test_should_sanitize_img_vbscript214 assert_sanitized %(<img src='vbscript:msgbox("XSS")' />), '<img />'215 30 end 216 31