@@ -32,28 +32,25 @@ int unpack(int x) {
32
32
33
33
shared vec4 sm[4 ][4 ];
34
34
35
- void main(void ) {
35
+ void mip1(ivec2 i, inout vec4 t) {
36
+ // compute mip 1 using linear filtering
37
+ /*
38
+ * We just use a sampler with linear filter and
39
+ * sample exactly between four texels.
40
+ */
36
41
ivec2 ts = textureSize(baseImage, 0 );
37
-
38
42
// the actual size of our work items is only half the baseImage size, because for the first mip level
39
43
// each work item already uses linear filtering with a sampler to gather a 2x2 texel average
40
44
ivec2 s = ts / ivec2 (2 );
41
-
42
- // Compute the (x, y) coordinates of the current work item within its workgroup using z-order curve
43
- ivec2 l = ivec2 (unpack(int (gl_LocalInvocationID.x)),
44
- unpack(int (gl_LocalInvocationID.x >> 1u)));
45
-
46
- // Compute the global (x, y) coordinate of this work item
47
- ivec2 i = ivec2 (gl_WorkGroupID.xy) * ivec2 (16 ) + l;
48
-
49
- // compute mip 1 using linear filtering
50
45
if (i.x >= s.x || i.y >= s.y)
51
46
return ;
52
47
// Compute a texture coordinate right at the corner between four texels
53
48
vec2 tc = (vec2 (i * 2 ) + vec2 (1.0 )) / vec2 (ts);
54
- vec4 t = textureLod(baseImage, tc, 0.0 );
49
+ t = textureLod(baseImage, tc, 0.0 );
55
50
imageStore(mips[0 ], i, t);
51
+ }
56
52
53
+ void mip2(ivec2 i, inout vec4 t) {
57
54
// compute mip 2 using subgroup quad sharing
58
55
/*
59
56
* The trick here is to assume a 1:1 correspondence between subgroup invocation ids
@@ -68,19 +65,23 @@ void main(void) {
68
65
t = (t + h + v + d) * vec4 (0.25 );
69
66
if ((gl_SubgroupInvocationID & 3 ) == 0 )
70
67
imageStore(mips[1 ], i/ ivec2 (2 ), t);
68
+ }
71
69
70
+ void mip3(ivec2 i, inout vec4 t) {
72
71
// compute mip 3 using subgroup xor shuffles
73
72
/*
74
73
* The trick here is to exchange information between subgroup items with a stride
75
74
* of 4 items. In order to do this, we have subgroupShuffleXor().
76
75
*/
77
- h = subgroupShuffleXor(t, 4 );
78
- v = subgroupShuffleXor(t, 8 );
79
- d = subgroupShuffleXor(t, 12 );
76
+ vec4 h = subgroupShuffleXor(t, 4 );
77
+ vec4 v = subgroupShuffleXor(t, 8 );
78
+ vec4 d = subgroupShuffleXor(t, 12 );
80
79
t = (t + h + v + d) * vec4 (0.25 );
81
80
if ((gl_SubgroupInvocationID & 15 ) == 0 )
82
81
imageStore(mips[2 ], i/ ivec2 (4 ), t);
82
+ }
83
83
84
+ void mip4(ivec2 l, ivec2 i, inout vec4 t) {
84
85
// compute mip 4 using shared memory
85
86
/*
86
87
* For mip 4 we essentially have 8x8 work items.
@@ -94,12 +95,14 @@ void main(void) {
94
95
t = (sm[smc.x][smc.y] + sm[smi.x][smc.y] + sm[smc.x][smi.y] + sm[smi.x][smi.y]) * 0.25 ;
95
96
imageStore(mips[3 ], i/ ivec2 (8 ), t);
96
97
}
98
+ }
97
99
100
+ void mip5(ivec2 l, ivec2 i, vec4 t) {
98
101
// compute mip 5 also using shared memory
99
102
/*
100
103
* For mip 5 we have 16x16 work items.
101
104
*/
102
- smc = l / ivec2 (8 );
105
+ ivec2 smc = l / ivec2 (8 );
103
106
if ((l.x & 7 ) == 0 && (l.y & 7 ) == 0 )
104
107
sm[smc.x][smc.y] = t;
105
108
barrier();
@@ -108,3 +111,19 @@ void main(void) {
108
111
imageStore(mips[4 ], i/ ivec2 (16 ), t);
109
112
}
110
113
}
114
+
115
+ void main(void ) {
116
+ // Compute the (x, y) coordinates of the current work item within its workgroup using z-order curve
117
+ ivec2 l = ivec2 (unpack(int (gl_LocalInvocationID.x)),
118
+ unpack(int (gl_LocalInvocationID.x >> 1u)));
119
+
120
+ // Compute the global (x, y) coordinate of this work item
121
+ ivec2 i = ivec2 (gl_WorkGroupID.xy) * ivec2 (16 ) + l;
122
+
123
+ vec4 t = vec4 (0.0 );
124
+ mip1(i, t);
125
+ mip2(i, t);
126
+ mip3(i, t);
127
+ mip4(l, i, t);
128
+ mip5(l, i, t);
129
+ }
0 commit comments