Skip to content

Commit 9f69a44

Browse files
committed
image/draw: add RGBA64Image fast path
name old time/op new time/op delta GenericOver-4 15.0ms ± 1% 2.9ms ± 1% -80.56% (p=0.008 n=5+5) GenericMaskOver-4 7.82ms ± 4% 1.69ms ± 2% -78.38% (p=0.008 n=5+5) GenericSrc-4 6.13ms ± 3% 1.66ms ± 1% -72.90% (p=0.008 n=5+5) GenericMaskSrc-4 11.5ms ± 1% 2.0ms ± 0% -82.77% (p=0.008 n=5+5) Updates #44808. Change-Id: I131cf6fad01708540390a8012d8f2a21e849fe9d Reviewed-on: https://go-review.googlesource.com/c/go/+/340049 Reviewed-by: Dmitri Shuralyov <[email protected]> Trust: Nigel Tao <[email protected]>
1 parent 065f380 commit 9f69a44

File tree

3 files changed

+272
-23
lines changed

3 files changed

+272
-23
lines changed

doc/go1.18.html

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,19 @@ <h3 id="minor_library_changes">Minor changes to the library</h3>
8383
TODO: complete this section
8484
</p>
8585

86+
<dl id="image/draw"><dt><a href="/pkg/image/draw/">image/draw</a></dt>
87+
<dd>
88+
<p><!-- CL 340049 -->
89+
The <code>Draw</code> and <code>DrawMask</code> fallback implementations
90+
(used when the arguments are not the most common image types) are now
91+
faster when those arguments implement the optional
92+
<a href="/pkg/image/draw/#RGBA64Image"><code>draw.RGBA64Image</code></a>
93+
and <a href="/pkg/image/#RGBA64Image"><code>image.RGBA64Image</code></a>
94+
interfaces that were added in Go 1.17.
95+
</p>
96+
</dd>
97+
</dl><!-- image/draw -->
98+
8699
<dl id="syscall"><dt><a href="/pkg/syscall/">syscall</a></dt>
87100
<dd>
88101
<p><!-- CL 336550 -->

src/image/draw/draw.go

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,8 @@ func DrawMask(dst Image, r image.Rectangle, src image.Image, sp image.Point, mas
119119
return
120120
}
121121

122-
// Fast paths for special cases. If none of them apply, then we fall back to a general but slow implementation.
122+
// Fast paths for special cases. If none of them apply, then we fall back
123+
// to general but slower implementations.
123124
switch dst0 := dst.(type) {
124125
case *image.RGBA:
125126
if op == Over {
@@ -219,6 +220,84 @@ func DrawMask(dst Image, r image.Rectangle, src image.Image, sp image.Point, mas
219220
y0, y1, dy = y1-1, y0-1, -1
220221
}
221222

223+
// Try the draw.RGBA64Image and image.RGBA64Image interfaces, part of the
224+
// standard library since Go 1.17. These are like the draw.Image and
225+
// image.Image interfaces but they can avoid allocations from converting
226+
// concrete color types to the color.Color interface type.
227+
228+
if dst0, _ := dst.(RGBA64Image); dst0 != nil {
229+
if src0, _ := src.(image.RGBA64Image); src0 != nil {
230+
if mask == nil {
231+
sy := sp.Y + y0 - r.Min.Y
232+
my := mp.Y + y0 - r.Min.Y
233+
for y := y0; y != y1; y, sy, my = y+dy, sy+dy, my+dy {
234+
sx := sp.X + x0 - r.Min.X
235+
mx := mp.X + x0 - r.Min.X
236+
for x := x0; x != x1; x, sx, mx = x+dx, sx+dx, mx+dx {
237+
if op == Src {
238+
dst0.SetRGBA64(x, y, src0.RGBA64At(sx, sy))
239+
} else {
240+
srgba := src0.RGBA64At(sx, sy)
241+
a := m - uint32(srgba.A)
242+
drgba := dst0.RGBA64At(x, y)
243+
dst0.SetRGBA64(x, y, color.RGBA64{
244+
R: uint16((uint32(drgba.R)*a)/m) + srgba.R,
245+
G: uint16((uint32(drgba.G)*a)/m) + srgba.G,
246+
B: uint16((uint32(drgba.B)*a)/m) + srgba.B,
247+
A: uint16((uint32(drgba.A)*a)/m) + srgba.A,
248+
})
249+
}
250+
}
251+
}
252+
return
253+
254+
} else if mask0, _ := mask.(image.RGBA64Image); mask0 != nil {
255+
sy := sp.Y + y0 - r.Min.Y
256+
my := mp.Y + y0 - r.Min.Y
257+
for y := y0; y != y1; y, sy, my = y+dy, sy+dy, my+dy {
258+
sx := sp.X + x0 - r.Min.X
259+
mx := mp.X + x0 - r.Min.X
260+
for x := x0; x != x1; x, sx, mx = x+dx, sx+dx, mx+dx {
261+
ma := uint32(mask0.RGBA64At(mx, my).A)
262+
switch {
263+
case ma == 0:
264+
if op == Over {
265+
// No-op.
266+
} else {
267+
dst0.SetRGBA64(x, y, color.RGBA64{})
268+
}
269+
case ma == m && op == Src:
270+
dst0.SetRGBA64(x, y, src0.RGBA64At(sx, sy))
271+
default:
272+
srgba := src0.RGBA64At(sx, sy)
273+
if op == Over {
274+
drgba := dst0.RGBA64At(x, y)
275+
a := m - (uint32(srgba.A) * ma / m)
276+
dst0.SetRGBA64(x, y, color.RGBA64{
277+
R: uint16((uint32(drgba.R)*a + uint32(srgba.R)*ma) / m),
278+
G: uint16((uint32(drgba.G)*a + uint32(srgba.G)*ma) / m),
279+
B: uint16((uint32(drgba.B)*a + uint32(srgba.B)*ma) / m),
280+
A: uint16((uint32(drgba.A)*a + uint32(srgba.A)*ma) / m),
281+
})
282+
} else {
283+
dst0.SetRGBA64(x, y, color.RGBA64{
284+
R: uint16(uint32(srgba.R) * ma / m),
285+
G: uint16(uint32(srgba.G) * ma / m),
286+
B: uint16(uint32(srgba.B) * ma / m),
287+
A: uint16(uint32(srgba.A) * ma / m),
288+
})
289+
}
290+
}
291+
}
292+
}
293+
return
294+
}
295+
}
296+
}
297+
298+
// If none of the faster code paths above apply, use the draw.Image and
299+
// image.Image interfaces, part of the standard library since Go 1.0.
300+
222301
var out color.RGBA64
223302
sy := sp.Y + y0 - r.Min.Y
224303
my := mp.Y + y0 - r.Min.Y

src/image/draw/draw_test.go

Lines changed: 179 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,138 @@ import (
1313
"testing/quick"
1414
)
1515

16+
// slowestRGBA is a draw.Image like image.RGBA but it is a different type and
17+
// therefore does not trigger the draw.go fastest code paths.
18+
//
19+
// Unlike slowerRGBA, it does not implement the draw.RGBA64Image interface.
20+
type slowestRGBA struct {
21+
Pix []uint8
22+
Stride int
23+
Rect image.Rectangle
24+
}
25+
26+
func (p *slowestRGBA) ColorModel() color.Model { return color.RGBAModel }
27+
28+
func (p *slowestRGBA) Bounds() image.Rectangle { return p.Rect }
29+
30+
func (p *slowestRGBA) At(x, y int) color.Color {
31+
return p.RGBA64At(x, y)
32+
}
33+
34+
func (p *slowestRGBA) RGBA64At(x, y int) color.RGBA64 {
35+
if !(image.Point{x, y}.In(p.Rect)) {
36+
return color.RGBA64{}
37+
}
38+
i := p.PixOffset(x, y)
39+
s := p.Pix[i : i+4 : i+4] // Small cap improves performance, see https://golang.org/issue/27857
40+
r := uint16(s[0])
41+
g := uint16(s[1])
42+
b := uint16(s[2])
43+
a := uint16(s[3])
44+
return color.RGBA64{
45+
(r << 8) | r,
46+
(g << 8) | g,
47+
(b << 8) | b,
48+
(a << 8) | a,
49+
}
50+
}
51+
52+
func (p *slowestRGBA) Set(x, y int, c color.Color) {
53+
if !(image.Point{x, y}.In(p.Rect)) {
54+
return
55+
}
56+
i := p.PixOffset(x, y)
57+
c1 := color.RGBAModel.Convert(c).(color.RGBA)
58+
s := p.Pix[i : i+4 : i+4] // Small cap improves performance, see https://golang.org/issue/27857
59+
s[0] = c1.R
60+
s[1] = c1.G
61+
s[2] = c1.B
62+
s[3] = c1.A
63+
}
64+
65+
func (p *slowestRGBA) PixOffset(x, y int) int {
66+
return (y-p.Rect.Min.Y)*p.Stride + (x-p.Rect.Min.X)*4
67+
}
68+
69+
func init() {
70+
var p interface{} = (*slowestRGBA)(nil)
71+
if _, ok := p.(RGBA64Image); ok {
72+
panic("slowestRGBA should not be an RGBA64Image")
73+
}
74+
}
75+
76+
// slowerRGBA is a draw.Image like image.RGBA but it is a different type and
77+
// therefore does not trigger the draw.go fastest code paths.
78+
//
79+
// Unlike slowestRGBA, it still implements the draw.RGBA64Image interface.
80+
type slowerRGBA struct {
81+
Pix []uint8
82+
Stride int
83+
Rect image.Rectangle
84+
}
85+
86+
func (p *slowerRGBA) ColorModel() color.Model { return color.RGBAModel }
87+
88+
func (p *slowerRGBA) Bounds() image.Rectangle { return p.Rect }
89+
90+
func (p *slowerRGBA) At(x, y int) color.Color {
91+
return p.RGBA64At(x, y)
92+
}
93+
94+
func (p *slowerRGBA) RGBA64At(x, y int) color.RGBA64 {
95+
if !(image.Point{x, y}.In(p.Rect)) {
96+
return color.RGBA64{}
97+
}
98+
i := p.PixOffset(x, y)
99+
s := p.Pix[i : i+4 : i+4] // Small cap improves performance, see https://golang.org/issue/27857
100+
r := uint16(s[0])
101+
g := uint16(s[1])
102+
b := uint16(s[2])
103+
a := uint16(s[3])
104+
return color.RGBA64{
105+
(r << 8) | r,
106+
(g << 8) | g,
107+
(b << 8) | b,
108+
(a << 8) | a,
109+
}
110+
}
111+
112+
func (p *slowerRGBA) Set(x, y int, c color.Color) {
113+
if !(image.Point{x, y}.In(p.Rect)) {
114+
return
115+
}
116+
i := p.PixOffset(x, y)
117+
c1 := color.RGBAModel.Convert(c).(color.RGBA)
118+
s := p.Pix[i : i+4 : i+4] // Small cap improves performance, see https://golang.org/issue/27857
119+
s[0] = c1.R
120+
s[1] = c1.G
121+
s[2] = c1.B
122+
s[3] = c1.A
123+
}
124+
125+
func (p *slowerRGBA) SetRGBA64(x, y int, c color.RGBA64) {
126+
if !(image.Point{x, y}.In(p.Rect)) {
127+
return
128+
}
129+
i := p.PixOffset(x, y)
130+
s := p.Pix[i : i+4 : i+4] // Small cap improves performance, see https://golang.org/issue/27857
131+
s[0] = uint8(c.R >> 8)
132+
s[1] = uint8(c.G >> 8)
133+
s[2] = uint8(c.B >> 8)
134+
s[3] = uint8(c.A >> 8)
135+
}
136+
137+
func (p *slowerRGBA) PixOffset(x, y int) int {
138+
return (y-p.Rect.Min.Y)*p.Stride + (x-p.Rect.Min.X)*4
139+
}
140+
141+
func init() {
142+
var p interface{} = (*slowerRGBA)(nil)
143+
if _, ok := p.(RGBA64Image); !ok {
144+
panic("slowerRGBA should be an RGBA64Image")
145+
}
146+
}
147+
16148
func eq(c0, c1 color.Color) bool {
17149
r0, g0, b0, a0 := c0.RGBA()
18150
r1, g1, b1, a1 := c1.RGBA()
@@ -260,30 +392,55 @@ func TestDraw(t *testing.T) {
260392
for _, r := range rr {
261393
loop:
262394
for _, test := range drawTests {
263-
dst := hgradRed(255).(*image.RGBA).SubImage(r).(Image)
264-
// Draw the (src, mask, op) onto a copy of dst using a slow but obviously correct implementation.
265-
golden := makeGolden(dst, image.Rect(0, 0, 16, 16), test.src, image.ZP, test.mask, image.ZP, test.op)
266-
b := dst.Bounds()
267-
if !b.Eq(golden.Bounds()) {
268-
t.Errorf("draw %v %s: bounds %v versus %v", r, test.desc, dst.Bounds(), golden.Bounds())
269-
continue
270-
}
271-
// Draw the same combination onto the actual dst using the optimized DrawMask implementation.
272-
DrawMask(dst, image.Rect(0, 0, 16, 16), test.src, image.ZP, test.mask, image.ZP, test.op)
273-
if image.Pt(8, 8).In(r) {
274-
// Check that the resultant pixel at (8, 8) matches what we expect
275-
// (the expected value can be verified by hand).
276-
if !eq(dst.At(8, 8), test.expected) {
277-
t.Errorf("draw %v %s: at (8, 8) %v versus %v", r, test.desc, dst.At(8, 8), test.expected)
395+
for i := 0; i < 3; i++ {
396+
dst := hgradRed(255).(*image.RGBA).SubImage(r).(Image)
397+
// For i != 0, substitute a different-typed dst that will take
398+
// us off the fastest code paths. We should still get the same
399+
// result, in terms of final pixel RGBA values.
400+
switch i {
401+
case 1:
402+
d := dst.(*image.RGBA)
403+
dst = &slowerRGBA{
404+
Pix: d.Pix,
405+
Stride: d.Stride,
406+
Rect: d.Rect,
407+
}
408+
case 2:
409+
d := dst.(*image.RGBA)
410+
dst = &slowestRGBA{
411+
Pix: d.Pix,
412+
Stride: d.Stride,
413+
Rect: d.Rect,
414+
}
415+
}
416+
417+
// Draw the (src, mask, op) onto a copy of dst using a slow but obviously correct implementation.
418+
golden := makeGolden(dst, image.Rect(0, 0, 16, 16), test.src, image.ZP, test.mask, image.ZP, test.op)
419+
b := dst.Bounds()
420+
if !b.Eq(golden.Bounds()) {
421+
t.Errorf("draw %v %s on %T: bounds %v versus %v",
422+
r, test.desc, dst, dst.Bounds(), golden.Bounds())
278423
continue
279424
}
280-
}
281-
// Check that the resultant dst image matches the golden output.
282-
for y := b.Min.Y; y < b.Max.Y; y++ {
283-
for x := b.Min.X; x < b.Max.X; x++ {
284-
if !eq(dst.At(x, y), golden.At(x, y)) {
285-
t.Errorf("draw %v %s: at (%d, %d), %v versus golden %v", r, test.desc, x, y, dst.At(x, y), golden.At(x, y))
286-
continue loop
425+
// Draw the same combination onto the actual dst using the optimized DrawMask implementation.
426+
DrawMask(dst, image.Rect(0, 0, 16, 16), test.src, image.ZP, test.mask, image.ZP, test.op)
427+
if image.Pt(8, 8).In(r) {
428+
// Check that the resultant pixel at (8, 8) matches what we expect
429+
// (the expected value can be verified by hand).
430+
if !eq(dst.At(8, 8), test.expected) {
431+
t.Errorf("draw %v %s on %T: at (8, 8) %v versus %v",
432+
r, test.desc, dst, dst.At(8, 8), test.expected)
433+
continue
434+
}
435+
}
436+
// Check that the resultant dst image matches the golden output.
437+
for y := b.Min.Y; y < b.Max.Y; y++ {
438+
for x := b.Min.X; x < b.Max.X; x++ {
439+
if !eq(dst.At(x, y), golden.At(x, y)) {
440+
t.Errorf("draw %v %s on %T: at (%d, %d), %v versus golden %v",
441+
r, test.desc, dst, x, y, dst.At(x, y), golden.At(x, y))
442+
continue loop
443+
}
287444
}
288445
}
289446
}

0 commit comments

Comments
 (0)