Ruby on Rails | Screencasts | Download | Documentation | Weblog | Community | Source

Ticket #9487: parallel_calculations_and_distinct_records_with_tests.diff

File parallel_calculations_and_distinct_records_with_tests.diff, 10.2 kB (added by jcoglan, 5 months ago)
  • activerecord/lib/active_record/calculations.rb

    old new  
    11module ActiveRecord 
    22  module Calculations #:nodoc: 
    3     CALCULATIONS_OPTIONS = [:conditions, :joins, :order, :select, :group, :having, :distinct, :limit, :offset, :include] 
     3    CALCULATIONS_OPTIONS = [:conditions, :joins, :order, :select, :group, :having, :distinct, :distinct_records, :limit, :offset, :include] 
    44    def self.included(base) 
    55      base.extend(ClassMethods) 
    66    end 
     
    105105      # * <tt>:select</tt> - By default, this is * as in SELECT * FROM, but can be changed if you for example want to do a join, but not 
    106106      #   include the joined columns. 
    107107      # * <tt>:distinct</tt> - Set this to true to make this a distinct calculation, such as SELECT COUNT(DISTINCT posts.id) ... 
     108      # * <tt>:distinct_records</tt>: Set this to true if you want to calculate over distinct records but the values you are summing are not 
     109      #   necessarily unique. You should use this if you are using <tt>:conditions</tt> based on tables included using <tt>:include</tt> or 
     110      #   <tt>:joins</tt>. Those LEFT OUTER JOIN statements can lead to duplicates of the base record in the result set, and using 
     111      #   this option avoids such problems. 
    108112      # 
    109113      # Examples: 
    110114      #   Person.calculate(:count, :all) # The same as Person.count 
    111115      #   Person.average(:age) # SELECT AVG(age) FROM people... 
    112116      #   Person.minimum(:age, :conditions => ['last_name != ?', 'Drake']) # Selects the minimum age for everyone with a last name other than 'Drake' 
    113117      #   Person.minimum(:age, :having => 'min(age) > 17', :group => :last_name) # Selects the minimum age for any family without any minors 
    114       def calculate(operation, column_name, options = {}) 
     118      # 
     119      # You can also execute several calculations using a single query to speed things along. To do this, pass a hash as the first argument. 
     120      # the +column_name+ argument is omitted in this case. 
     121      # 
     122      # Examples: 
     123      # 
     124      #   Person.calculate(:how_many => [:count, '*'], :total_age => [:sum, :age]) 
     125      #   #=> {:how_many => 12, :total_age => 387} 
     126      #    
     127      #   Person.calculate({:how_many => [:count, '*'], :total_age => [:sum, :age]}, :conditions => ['age < ?', 30]) 
     128      #   #=> {:how_many => 7, :total_age => 94} 
     129      def calculate(operation, *args) 
     130        column_name = operation.is_a?(Hash) ? nil : args.shift 
     131        options = args.first || {} 
     132 
    115133        validate_calculation_options(operation, options) 
    116134        column_name     = options[:select] if options[:select] 
    117135        column_name     = '*' if column_name == :all 
     
    149167        end 
    150168 
    151169        def construct_calculation_sql(operation, column_name, options) #:nodoc: 
    152           operation = operation.to_s.downcase 
     170          operation = operation.to_s.downcase unless operation.is_a?(Hash) 
    153171          options = options.symbolize_keys 
    154172 
    155173          scope           = scope(:find) 
     
    167185            end 
    168186          end 
    169187 
    170           sql = "SELECT #{operation}(#{'DISTINCT ' if options[:distinct]}#{column_name}) AS #{aggregate_alias}" 
     188          if calculations = extract_query_information_from_operation(operation) 
     189            sql = "SELECT " + calculations.collect do |calc| 
     190              calculation_column_name = options[:distinct_records] ? calc[:distinct_alias] : calc[:column] 
     191              calculation_column_name = '*' if calc[:column] == '*' || calc[:column].blank? 
     192              "#{calc[:operation]}(#{'DISTINCT ' if options[:distinct]}#{calculation_column_name}) AS #{column_alias_for(calc[:operation], calc[:alias])}" 
     193            end.join(", ") 
     194          else 
     195            calculation_column_name = options[:distinct_records] ? 'calculation_column' : column_name 
     196            sql = "SELECT #{operation}(#{'DISTINCT ' if options[:distinct]}#{calculation_column_name}) AS #{aggregate_alias}" 
     197          end 
    171198 
    172199          # A (slower) workaround if we're using a backend, like sqlite, that doesn't support COUNT DISTINCT. 
    173200          sql = "SELECT COUNT(*) AS #{aggregate_alias}" if use_workaround 
    174201 
    175202          sql << ", #{options[:group_field]} AS #{options[:group_alias]}" if options[:group] 
    176           sql << " FROM (SELECT DISTINCT #{column_name}" if use_workaround 
     203          if options[:distinct_records] 
     204            sql << " FROM (SELECT DISTINCT " 
     205            if calculations 
     206              sql << "#{table_name}.#{primary_key}" 
     207              calculations.each do |calc| 
     208                sql << ", #{calc[:column]} AS #{calc[:distinct_alias]}" unless calc[:column] == '*' || calc[:column].blank? 
     209              end 
     210            else 
     211              select_columns = ["#{table_name}.#{primary_key}"] 
     212              select_columns << column_name unless (select_columns + [primary_key]).include?(column_name) 
     213              sql << "#{select_columns * ', '} AS #{calculation_column_name}" 
     214            end 
     215          else 
     216            sql << " FROM (SELECT DISTINCT #{column_name}" if use_workaround 
     217          end 
    177218          sql << " FROM #{connection.quote_table_name(table_name)} " 
    178219          if merged_includes.any? 
    179220            join_dependency = ActiveRecord::Associations::ClassMethods::JoinDependency.new(self, merged_includes, options[:joins]) 
     
    200241 
    201242          sql << " ORDER BY #{options[:order]} "       if options[:order] 
    202243          add_limit!(sql, options, scope) 
    203           sql << ')' if use_workaround 
     244          sql << ')' if use_workaround || options[:distinct_records] 
     245          sql << ' AS calculation_table' if options[:distinct_records] 
    204246          sql 
    205247        end 
    206248 
     249        def extract_query_information_from_operation(operation) 
     250          return nil unless operation.is_a?(Hash) 
     251          calculations = [] 
     252          operation.each do |key, value| 
     253            value = value.to_s.to_a unless value.is_a?(Array) 
     254            calculations << {:alias => key.to_s, :distinct_alias => "#{key}_#{value[0]}_alias", 
     255                :operation => value[0].to_s.downcase, :column => value[1].to_s} 
     256          end 
     257          calculations 
     258        end 
     259 
    207260        def execute_simple_calculation(operation, column_name, column, options) #:nodoc: 
    208           value = connection.select_value(construct_calculation_sql(operation, column_name, options)) 
    209           type_cast_calculated_value(value, column, operation) 
     261          results = connection.select_one(construct_calculation_sql(operation, column_name, options)) 
     262          if operation.is_a?(Hash) 
     263            calculation_results = {} 
     264            calculations = extract_query_information_from_operation(operation) 
     265            results.each do |key, value| 
     266              calculation = calculations.find { |calc| key == column_alias_for(calc[:operation], calc[:alias]) } 
     267              calculation_results[calculation[:alias].to_sym] = type_cast_calculated_value(value, column_for(calculation[:column]), calculation[:operation]) 
     268            end 
     269            calculation_results 
     270          else 
     271            type_cast_calculated_value(results.values.first, column, operation) 
     272          end 
    210273        end 
    211274 
    212275        def execute_grouped_calculation(operation, column_name, column, options) #:nodoc: 
  • activerecord/test/cases/calculations_test.rb

    old new  
    1515    assert_equal 318, Account.sum(:credit_limit) 
    1616  end 
    1717 
     18  def test_should_sum_field_without_repeated_records 
     19    assert_equal 7, Author.sum("#{Author.table_name}.id", :include => :posts) 
     20    assert_equal 3, Author.sum("#{Author.table_name}.id", :include => :posts, :distinct_records => true) 
     21    assert_equal 12, Post.sum(:author_id, :include => :comments) 
     22    assert_equal 7, Post.sum(:author_id, :include => :comments, :distinct_records => true) 
     23  end 
     24 
    1825  def test_should_average_field 
    1926    value = Account.average(:credit_limit) 
    2027    assert_kind_of Float, value 
     
    215222        # empty options are valid 
    216223        Company.send(:validate_calculation_options, func) 
    217224        # these options are valid for all calculations 
    218         [:select, :conditions, :joins, :order, :group, :having, :distinct].each do |opt| 
     225        [:select, :conditions, :joins, :order, :group, :having, :distinct, :distinct_records].each do |opt|  
    219226          Company.send(:validate_calculation_options, func, opt => true) 
    220227        end 
    221228      end 
     
    248255  def test_count_with_too_many_parameters_raises 
    249256    assert_raise(ArgumentError) { Account.count(1, 2, 3) } 
    250257  end 
     258 
     259  def test_parallel_calculations 
     260    account_calculations = Account.calculate(:how_many => :count, :total_credit => [:sum, :credit_limit], :max_limit => [:max, :credit_limit]) 
     261    assert_equal 6, account_calculations[:how_many] 
     262    assert_equal 318, account_calculations[:total_credit] 
     263    assert_equal 60, account_calculations[:max_limit] 
     264  end 
     265 
     266  def test_parallel_calculations_with_conditions 
     267    account_calculations = Account.calculate({:how_many => [:count, '*'], :total_credit => [:sum, :credit_limit]}, :conditions => {:credit_limit => 50}) 
     268    assert_equal 3, account_calculations[:how_many] 
     269    assert_equal 150, account_calculations[:total_credit] 
     270  end 
     271 
     272  def test_parallel_calculations_with_conditions_and_distinct 
     273    account_calculations = Account.calculate( 
     274        {:how_many => [:count, :credit_limit], :total_credit => [:sum, :credit_limit]}, 
     275        :conditions => {:credit_limit => 50}, 
     276        :distinct => true 
     277    ) 
     278    assert_equal 1, account_calculations[:how_many] 
     279    assert_equal 50, account_calculations[:total_credit] 
     280  end 
     281 
     282  def test_parallel_calculation_with_distinct_records 
     283    author_calculations = Author.calculate({:total_id => [:sum, "#{Author.table_name}.id"]}, :include => :posts) 
     284    distinct_author_calculations = Author.calculate({:total_id => [:sum, "#{Author.table_name}.id"]}, :include => :posts, :distinct_records => true) 
     285    assert_equal 7, author_calculations[:total_id] 
     286    assert_equal 3, distinct_author_calculations[:total_id] 
     287  end 
    251288end