Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit f5eb99a

Browse files
committedOct 29, 2021
Refactor downsample compute shader
separate individual mip algorithms into functions.
1 parent a35f8c5 commit f5eb99a

File tree

1 file changed

+35
-16
lines changed

1 file changed

+35
-16
lines changed
 

‎res/org/lwjgl/demo/opengl/shader/downsampling/downsample.cs.glsl

+35-16
Original file line numberDiff line numberDiff line change
@@ -32,28 +32,25 @@ int unpack(int x) {
3232

3333
shared vec4 sm[4][4];
3434

35-
void main(void) {
35+
void mip1(ivec2 i, inout vec4 t) {
36+
// compute mip 1 using linear filtering
37+
/*
38+
* We just use a sampler with linear filter and
39+
* sample exactly between four texels.
40+
*/
3641
ivec2 ts = textureSize(baseImage, 0);
37-
3842
// the actual size of our work items is only half the baseImage size, because for the first mip level
3943
// each work item already uses linear filtering with a sampler to gather a 2x2 texel average
4044
ivec2 s = ts / ivec2(2);
41-
42-
// Compute the (x, y) coordinates of the current work item within its workgroup using z-order curve
43-
ivec2 l = ivec2(unpack(int(gl_LocalInvocationID.x)),
44-
unpack(int(gl_LocalInvocationID.x >> 1u)));
45-
46-
// Compute the global (x, y) coordinate of this work item
47-
ivec2 i = ivec2(gl_WorkGroupID.xy) * ivec2(16) + l;
48-
49-
// compute mip 1 using linear filtering
5045
if (i.x >= s.x || i.y >= s.y)
5146
return;
5247
// Compute a texture coordinate right at the corner between four texels
5348
vec2 tc = (vec2(i * 2) + vec2(1.0)) / vec2(ts);
54-
vec4 t = textureLod(baseImage, tc, 0.0);
49+
t = textureLod(baseImage, tc, 0.0);
5550
imageStore(mips[0], i, t);
51+
}
5652

53+
void mip2(ivec2 i, inout vec4 t) {
5754
// compute mip 2 using subgroup quad sharing
5855
/*
5956
* The trick here is to assume a 1:1 correspondence between subgroup invocation ids
@@ -68,19 +65,23 @@ void main(void) {
6865
t = (t + h + v + d) * vec4(0.25);
6966
if ((gl_SubgroupInvocationID & 3) == 0)
7067
imageStore(mips[1], i/ivec2(2), t);
68+
}
7169

70+
void mip3(ivec2 i, inout vec4 t) {
7271
// compute mip 3 using subgroup xor shuffles
7372
/*
7473
* The trick here is to exchange information between subgroup items with a stride
7574
* of 4 items. In order to do this, we have subgroupShuffleXor().
7675
*/
77-
h = subgroupShuffleXor(t, 4);
78-
v = subgroupShuffleXor(t, 8);
79-
d = subgroupShuffleXor(t, 12);
76+
vec4 h = subgroupShuffleXor(t, 4);
77+
vec4 v = subgroupShuffleXor(t, 8);
78+
vec4 d = subgroupShuffleXor(t, 12);
8079
t = (t + h + v + d) * vec4(0.25);
8180
if ((gl_SubgroupInvocationID & 15) == 0)
8281
imageStore(mips[2], i/ivec2(4), t);
82+
}
8383

84+
void mip4(ivec2 l, ivec2 i, inout vec4 t) {
8485
// compute mip 4 using shared memory
8586
/*
8687
* For mip 4 we essentially have 8x8 work items.
@@ -94,12 +95,14 @@ void main(void) {
9495
t = (sm[smc.x][smc.y] + sm[smi.x][smc.y] + sm[smc.x][smi.y] + sm[smi.x][smi.y]) * 0.25;
9596
imageStore(mips[3], i/ivec2(8), t);
9697
}
98+
}
9799

100+
void mip5(ivec2 l, ivec2 i, vec4 t) {
98101
// compute mip 5 also using shared memory
99102
/*
100103
* For mip 5 we have 16x16 work items.
101104
*/
102-
smc = l / ivec2(8);
105+
ivec2 smc = l / ivec2(8);
103106
if ((l.x & 7) == 0 && (l.y & 7) == 0)
104107
sm[smc.x][smc.y] = t;
105108
barrier();
@@ -108,3 +111,19 @@ void main(void) {
108111
imageStore(mips[4], i/ivec2(16), t);
109112
}
110113
}
114+
115+
void main(void) {
116+
// Compute the (x, y) coordinates of the current work item within its workgroup using z-order curve
117+
ivec2 l = ivec2(unpack(int(gl_LocalInvocationID.x)),
118+
unpack(int(gl_LocalInvocationID.x >> 1u)));
119+
120+
// Compute the global (x, y) coordinate of this work item
121+
ivec2 i = ivec2(gl_WorkGroupID.xy) * ivec2(16) + l;
122+
123+
vec4 t = vec4(0.0);
124+
mip1(i, t);
125+
mip2(i, t);
126+
mip3(i, t);
127+
mip4(l, i, t);
128+
mip5(l, i, t);
129+
}

0 commit comments

Comments
 (0)
Please sign in to comment.