今日のRuby

記述統計の基本的な関数群

module Statistics

  class << self

    def mean(*args)
      return args.flatten.reduce{|i, j| i + j}.to_f / args.flatten.length 
    end

    def median(*args)
      numbers = args.flatten.sort
      if numbers.size % 2 == 0
        m = numbers.size / 2 - 1
        return (numbers[m] + numbers[m+1]) / 2.0
      else
        m = numbers.size / 2 - 1
        return numbers[m]
      end
    end

    def variant(*args)
      args = args.flatten
      m = mean(args)
      return args.map{|i| (i - m) ** 2}.reduce{|i, j| i+j}.to_f  / (args.length - 1)
    end

    def standard_deviasion(*args)
      return variant(args) ** 0.5
    end

    def mode(*args)
      freq = {}
      args.flatten.each{|i|
        if freq[i]
          freq[i] = freq[i] + 1
        else
          freq[i] = 1
        end
      }
      m = nil
      freq.each{|p|
        if !m || m < p[1]
          m = p[0]
        end
      }
      return m
    end
    
  end
    
end

Medianの定義がわかっていなくて、ちょっとハマってしまった。

テストケースはこんな感じ

require 'test/unit'
require File.join(File.dirname(__FILE__), "statistics")

class StatisticsTest < Test::Unit::TestCase
  
  def setup
    @data = [16, 26, 29, 26, 9, 0, 15, 21, 21, 17, 23, 23, 9,
             10, 14, 4, 23, 7, 1, 10, 4, 12, 9, 26, 4, 21, 9,
             20, 26, 29, 6, 27, 28, 21, 19, 11, 18, 21, 17, 4,
             29, 10, 24, 21, 22, 29, 3, 20, 3, 27, 25, 23, 26,
             20, 4, 19, 16, 23, 23, 3, 9, 29, 27, 6, 7, 10, 21,
             23, 8, 2, 23, 22, 17, 22, 6, 11, 10, 10, 5, 28, 8,
             12, 10, 19, 28, 1, 9, 0, 14, 19, 20, 15, 28, 12, 27,
             14, 20, 19, 8, 29]
  end

  def test_base
    assert(Statistics.mean(@data) == 16.14)
    assert(Statistics.variant(@data) - 74.1620202 < 0.000001)
    assert(Statistics.standard_deviasion(@data) - 8.61173751 < 0.000001)
    assert(Statistics.mode(@data) == 23)
    assert(Statistics.median(@data) == 17.5)
  end
 
end

variantとsdのところが誤差を許容しているのは、テストに使っている値をExcelで作ったから。
Excelデフォルトの精度と、Rubyのそれとの差を埋めるのが面倒だったので、
こういう書き方になっております。