|
| 1 | +require 'benchmark/ips' |
| 2 | + |
| 3 | +def before_n_times(n, &block) |
| 4 | + n.times { instance_exec(&block) } |
| 5 | +end |
| 6 | + |
| 7 | +def yield_n_times(n) |
| 8 | + before_n_times(n) { yield } |
| 9 | +end |
| 10 | + |
| 11 | +def capture_block_and_yield_n_times(n, &block) |
| 12 | + before_n_times(n) { yield } |
| 13 | +end |
| 14 | + |
| 15 | +def capture_block_and_call_n_times(n, &block) |
| 16 | + before_n_times(n) { block.call } |
| 17 | +end |
| 18 | + |
| 19 | +[10, 25, 50, 100, 1000, 10000].each do |count| |
| 20 | + puts "\n\nInvoking the block #{count} times\n" |
| 21 | + |
| 22 | + Benchmark.ips do |x| |
| 23 | + x.report("Yield #{count} times ") do |
| 24 | + yield_n_times(count) { } |
| 25 | + end |
| 26 | + |
| 27 | + x.report("Capture block and yield #{count} times") do |
| 28 | + capture_block_and_yield_n_times(count) { } |
| 29 | + end |
| 30 | + |
| 31 | + x.report("Capture block and call #{count} times ") do |
| 32 | + capture_block_and_call_n_times(count) { } |
| 33 | + end |
| 34 | + end |
| 35 | +end |
| 36 | + |
| 37 | +__END__ |
| 38 | + |
| 39 | +This attemps to measure the performance of how `routes` works in RSpec. It's |
| 40 | +actually a method which delegates to `before`. RSpec executes `before` hooks by |
| 41 | +capturing the block and then performing an `instance_exec` on it later in the |
| 42 | +example context. |
| 43 | + |
| 44 | +rspec-core has already performed [many related benchmarks about |
| 45 | +this](https://github.com/rspec/rspec-core/tree/master/benchmarks): |
| 46 | + |
| 47 | +- [call vs yield](https://github.com/rspec/rspec-core/blob/master/benchmarks/call_v_yield.rb) |
| 48 | +- [capture block vs yield](https://github.com/rspec/rspec-core/blob/master/benchmarks/capture_block_vs_yield.rb) |
| 49 | +- [flat map vs inject](https://github.com/rspec/rspec-core/blob/master/benchmarks/flat_map_vs_inject.rb) |
| 50 | + |
| 51 | +The results are very interesting: |
| 52 | + |
| 53 | +> This benchmark demonstrates that capturing a block (e.g. `&block`) has |
| 54 | +> a high constant cost, taking about 5x longer than a single `yield` |
| 55 | +> (even if the block is never used!). |
| 56 | +> |
| 57 | +> However, fowarding a captured block can be faster than using `yield` |
| 58 | +> if the block is used many times (the breakeven point is at about 20-25 |
| 59 | +> invocations), so it appears that he per-invocation cost of `yield` |
| 60 | +> is higher than that of a captured-and-forwarded block. |
| 61 | +> |
| 62 | +> Note that there is no circumstance where using `block.call` is faster. |
| 63 | +> |
| 64 | +> See also `flat_map_vs_inject.rb`, which appears to contradict these |
| 65 | +> results a little bit. |
| 66 | +> |
| 67 | +> -- https://github.com/rspec/rspec-core/blob/master/benchmarks/capture_block_vs_yield.rb#L83-L95 |
| 68 | + |
| 69 | +and |
| 70 | + |
| 71 | +> Surprisingly, `flat_map(&block)` appears to be faster than |
| 72 | +> `flat_map { yield }` in spite of the fact that our array here |
| 73 | +> is smaller than the break-even point of 20-25 measured in the |
| 74 | +> `capture_block_vs_yield.rb` benchmark. In fact, the forwaded-block |
| 75 | +> version remains faster in my benchmarks here no matter how small |
| 76 | +> I shrink the `words` array. I'm not sure why! |
| 77 | +> |
| 78 | +> -- https://github.com/rspec/rspec-core/blob/master/benchmarks/flat_map_vs_inject.rb#L37-L42 |
| 79 | + |
| 80 | +This seems to show that the error margin is enough to negate any benefit from |
| 81 | +capturing the block initially. It also shows that not capturing the block is |
| 82 | +still faster at low rates of calling. If this holds for your system, I think |
| 83 | +this PR is good as is and we won't need to capture the block in the `route` |
| 84 | +method, but still use `yield`. |
| 85 | + |
| 86 | +My results using Ruby 2.2.0: |
| 87 | + |
| 88 | +Invoking the block 10 times |
| 89 | +Calculating ------------------------------------- |
| 90 | +Yield 10 times |
| 91 | + 13.127k i/100ms |
| 92 | +Capture block and yield 10 times |
| 93 | + 12.975k i/100ms |
| 94 | +Capture block and call 10 times |
| 95 | + 11.524k i/100ms |
| 96 | +------------------------------------------------- |
| 97 | +Yield 10 times |
| 98 | + 165.030k (± 5.7%) i/s - 827.001k |
| 99 | +Capture block and yield 10 times |
| 100 | + 163.866k (± 5.9%) i/s - 817.425k |
| 101 | +Capture block and call 10 times |
| 102 | + 137.892k (± 7.3%) i/s - 691.440k |
| 103 | + |
| 104 | + |
| 105 | +Invoking the block 25 times |
| 106 | +Calculating ------------------------------------- |
| 107 | +Yield 25 times |
| 108 | + 7.305k i/100ms |
| 109 | +Capture block and yield 25 times |
| 110 | + 7.314k i/100ms |
| 111 | +Capture block and call 25 times |
| 112 | + 6.047k i/100ms |
| 113 | +------------------------------------------------- |
| 114 | +Yield 25 times |
| 115 | + 84.167k (± 5.6%) i/s - 423.690k |
| 116 | +Capture block and yield 25 times |
| 117 | + 82.110k (± 6.4%) i/s - 409.584k |
| 118 | +Capture block and call 25 times |
| 119 | + 67.144k (± 6.2%) i/s - 338.632k |
| 120 | + |
| 121 | + |
| 122 | +Invoking the block 50 times |
| 123 | +Calculating ------------------------------------- |
| 124 | +Yield 50 times |
| 125 | + 4.211k i/100ms |
| 126 | +Capture block and yield 50 times |
| 127 | + 4.181k i/100ms |
| 128 | +Capture block and call 50 times |
| 129 | + 3.410k i/100ms |
| 130 | +------------------------------------------------- |
| 131 | +Yield 50 times |
| 132 | + 45.223k (± 5.0%) i/s - 227.394k |
| 133 | +Capture block and yield 50 times |
| 134 | + 45.253k (± 4.9%) i/s - 225.774k |
| 135 | +Capture block and call 50 times |
| 136 | + 36.181k (± 5.7%) i/s - 180.730k |
| 137 | + |
| 138 | + |
| 139 | +Invoking the block 100 times |
| 140 | +Calculating ------------------------------------- |
| 141 | +Yield 100 times |
| 142 | + 2.356k i/100ms |
| 143 | +Capture block and yield 100 times |
| 144 | + 2.305k i/100ms |
| 145 | +Capture block and call 100 times |
| 146 | + 1.842k i/100ms |
| 147 | +------------------------------------------------- |
| 148 | +Yield 100 times |
| 149 | + 23.677k (± 7.1%) i/s - 117.800k |
| 150 | +Capture block and yield 100 times |
| 151 | + 24.039k (± 4.7%) i/s - 122.165k |
| 152 | +Capture block and call 100 times |
| 153 | + 18.888k (± 6.6%) i/s - 95.784k |
| 154 | + |
| 155 | + |
| 156 | +Invoking the block 1000 times |
| 157 | +Calculating ------------------------------------- |
| 158 | +Yield 1000 times |
| 159 | + 244.000 i/100ms |
| 160 | +Capture block and yield 1000 times |
| 161 | + 245.000 i/100ms |
| 162 | +Capture block and call 1000 times |
| 163 | + 192.000 i/100ms |
| 164 | +------------------------------------------------- |
| 165 | +Yield 1000 times |
| 166 | + 2.540k (± 4.3%) i/s - 12.688k |
| 167 | +Capture block and yield 1000 times |
| 168 | + 2.499k (± 5.6%) i/s - 12.495k |
| 169 | +Capture block and call 1000 times |
| 170 | + 1.975k (± 5.1%) i/s - 9.984k |
| 171 | + |
| 172 | + |
| 173 | +Invoking the block 10000 times |
| 174 | +Calculating ------------------------------------- |
| 175 | +Yield 10000 times |
| 176 | + 24.000 i/100ms |
| 177 | +Capture block and yield 10000 times |
| 178 | + 24.000 i/100ms |
| 179 | +Capture block and call 10000 times |
| 180 | + 19.000 i/100ms |
| 181 | +------------------------------------------------- |
| 182 | +Yield 10000 times |
| 183 | + 232.923 (±15.5%) i/s - 1.128k |
| 184 | +Capture block and yield 10000 times |
| 185 | + 212.504 (±21.6%) i/s - 936.000 |
| 186 | +Capture block and call 10000 times |
| 187 | + 184.090 (±10.3%) i/s - 912.000 |
0 commit comments