Skip to content

Commit

Permalink
Bug 1881638 - Port quad instances to make use of int gpu buffer r=gfx…
Browse files Browse the repository at this point in the history
…-reviewers,lsalzman

This allows extending render task indices to 32 bits without
requiring an increase in the vertex / instance size.

Differential Revision: https://phabricator.services.mozilla.com/D202526

[ghsync] From https://hg.mozilla.org/mozilla-central/rev/b9f2c3f5a805a94fdebf367e5e0bdb964074a3e7
  • Loading branch information
Glenn Watson committed Feb 26, 2024
1 parent dc353f4 commit 315b45c
Show file tree
Hide file tree
Showing 15 changed files with 163 additions and 101 deletions.
7 changes: 6 additions & 1 deletion webrender/res/gpu_buffer.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

uniform HIGHP_SAMPLER_FLOAT sampler2D sGpuBufferF;
uniform HIGHP_SAMPLER_FLOAT sampler2D sGpuBufferI;
uniform HIGHP_SAMPLER_FLOAT isampler2D sGpuBufferI;

ivec2 get_gpu_buffer_uv(HIGHP_FS_ADDRESS int address) {
return ivec2(uint(address) % WR_MAX_VERTEX_TEXTURE_WIDTH,
Expand Down Expand Up @@ -41,3 +41,8 @@ vec4[4] fetch_from_gpu_buffer_4f(HIGHP_FS_ADDRESS int address) {
TEXEL_FETCH(sGpuBufferF, uv, 0, ivec2(3, 0))
);
}

ivec4 fetch_from_gpu_buffer_1i(HIGHP_FS_ADDRESS int address) {
ivec2 uv = get_gpu_buffer_uv(address);
return texelFetch(sGpuBufferI, uv, 0);
}
49 changes: 32 additions & 17 deletions webrender/res/ps_quad.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -81,37 +81,51 @@ QuadPrimitive fetch_primitive(int index) {
return prim;
}

struct QuadHeader {
int transform_id;
int z_id;
};

QuadHeader fetch_header(int address) {
ivec4 header = fetch_from_gpu_buffer_1i(address);

QuadHeader qh = QuadHeader(
header.x,
header.y
);

return qh;
}

struct QuadInstance {
// x
int prim_address;
int prim_address_i;

// y
int quad_flags;
int edge_flags;
int picture_task_address;
int prim_address_f;

// z
int quad_flags;
int edge_flags;
int part_index;
int z_id;
int segment_index;

// w
int segment_index;
int transform_id;
int picture_task_address;
};

QuadInstance decode_instance() {
QuadInstance qi = QuadInstance(
aData.x,

(aData.y >> 24) & 0xff,
(aData.y >> 16) & 0xff,
aData.y & 0xffff,
aData.y,

(aData.z >> 24) & 0xff,
aData.z & 0xffffff,
(aData.z >> 16) & 0xff,
(aData.z >> 8) & 0xff,
(aData.z >> 0) & 0xff,

(aData.w >> 24) & 0xff,
aData.w & 0xffffff
aData.w
);

return qi;
Expand Down Expand Up @@ -165,17 +179,18 @@ float edge_aa_offset(int edge, int flags) {
PrimitiveInfo ps_quad_main(void) {
QuadInstance qi = decode_instance();

Transform transform = fetch_transform(qi.transform_id);
QuadHeader qh = fetch_header(qi.prim_address_i);
Transform transform = fetch_transform(qh.transform_id);
PictureTask task = fetch_picture_task(qi.picture_task_address);
QuadPrimitive prim = fetch_primitive(qi.prim_address);
float z = float(qi.z_id);
QuadPrimitive prim = fetch_primitive(qi.prim_address_f);
float z = float(qh.z_id);

QuadSegment seg;
if (qi.segment_index == INVALID_SEGMENT_INDEX) {
seg.rect = prim.bounds;
seg.uv_rect = vec4(0.0);
} else {
seg = fetch_segment(qi.prim_address, qi.segment_index);
seg = fetch_segment(qi.prim_address_f, qi.segment_index);
}

// The local space rect that we will draw, which is effectively:
Expand Down
35 changes: 26 additions & 9 deletions webrender/src/batch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ use crate::prim_store::{VECS_PER_SEGMENT, PrimitiveInstanceIndex};
use crate::render_target::RenderTargetContext;
use crate::render_task_graph::{RenderTaskId, RenderTaskGraph};
use crate::render_task::{RenderTaskAddress, RenderTaskKind, SubPass};
use crate::renderer::{BlendMode, ShaderColorMode};
use crate::renderer::{MAX_VERTEX_TEXTURE_WIDTH, GpuBufferBuilderF, GpuBufferAddress};
use crate::renderer::{BlendMode, GpuBufferBuilder, ShaderColorMode};
use crate::renderer::{MAX_VERTEX_TEXTURE_WIDTH, GpuBufferAddress};
use crate::resource_cache::{GlyphFetchResult, ImageProperties};
use crate::space::SpaceMapper;
use crate::visibility::{PrimitiveVisibilityFlags, VisibilityState};
Expand Down Expand Up @@ -803,14 +803,15 @@ impl BatchBuilder {
&mut self,
prim_instance_index: PrimitiveInstanceIndex,
transform_id: TransformPaletteId,
gpu_buffer_address: GpuBufferAddress,
prim_address_f: GpuBufferAddress,
quad_flags: QuadFlags,
edge_flags: EdgeAaSegmentMask,
segment_index: u8,
task_id: RenderTaskId,
z_generator: &mut ZBufferIdGenerator,
prim_instances: &[PrimitiveInstance],
render_tasks: &RenderTaskGraph,
gpu_buffer_builder: &mut GpuBufferBuilder,
) {
let prim_instance = &prim_instances[prim_instance_index.0 as usize];
let prim_info = &prim_instance.vis;
Expand All @@ -820,13 +821,14 @@ impl BatchBuilder {
add_quad_to_batch(
self.batcher.render_task_address,
transform_id,
gpu_buffer_address,
prim_address_f,
quad_flags,
edge_flags,
segment_index,
task_id,
z_id,
render_tasks,
gpu_buffer_builder,
|key, instance| {
let batch = self.batcher.set_params_and_get_batch(
key,
Expand Down Expand Up @@ -857,7 +859,7 @@ impl BatchBuilder {
surface_spatial_node_index: SpatialNodeIndex,
z_generator: &mut ZBufferIdGenerator,
prim_instances: &[PrimitiveInstance],
_gpu_buffer_builder: &mut GpuBufferBuilderF,
gpu_buffer_builder: &mut GpuBufferBuilder,
segments: &[RenderTaskId],
) {
let (prim_instance_index, extra_prim_gpu_address) = match cmd {
Expand All @@ -883,6 +885,7 @@ impl BatchBuilder {
z_generator,
prim_instances,
render_tasks,
gpu_buffer_builder,
);
} else {
for (i, task_id) in segments.iter().enumerate() {
Expand All @@ -900,6 +903,7 @@ impl BatchBuilder {
z_generator,
prim_instances,
render_tasks,
gpu_buffer_builder,
);
}
}
Expand Down Expand Up @@ -3837,13 +3841,14 @@ impl<'a, 'rc> RenderTargetContext<'a, 'rc> {
pub fn add_quad_to_batch<F>(
render_task_address: RenderTaskAddress,
transform_id: TransformPaletteId,
gpu_buffer_address: GpuBufferAddress,
prim_address_f: GpuBufferAddress,
quad_flags: QuadFlags,
edge_flags: EdgeAaSegmentMask,
segment_index: u8,
task_id: RenderTaskId,
z_id: ZBufferId,
render_tasks: &RenderTaskGraph,
gpu_buffer_builder: &mut GpuBufferBuilder,
mut f: F,
) where F: FnMut(BatchKey, PrimitiveInstanceData) {

Expand All @@ -3857,6 +3862,15 @@ pub fn add_quad_to_batch<F>(
All = 5,
}

let mut writer = gpu_buffer_builder.i32.write_blocks(1);
writer.push_one([
transform_id.0 as i32,
z_id.0,
0,
0,
]);
let prim_address_i = writer.finish();

let texture = match task_id {
RenderTaskId::INVALID => {
TextureSource::Invalid
Expand Down Expand Up @@ -3898,7 +3912,8 @@ pub fn add_quad_to_batch<F>(
if edge_flags.is_empty() {
let instance = QuadInstance {
render_task_address,
prim_address: gpu_buffer_address,
prim_address_i,
prim_address_f,
z_id,
transform_id,
edge_flags: edge_flags_bits,
Expand All @@ -3911,7 +3926,8 @@ pub fn add_quad_to_batch<F>(
} else if quad_flags.contains(QuadFlags::USE_AA_SEGMENTS) {
let main_instance = QuadInstance {
render_task_address,
prim_address: gpu_buffer_address,
prim_address_i,
prim_address_f,
z_id,
transform_id,
edge_flags: edge_flags_bits,
Expand Down Expand Up @@ -3956,7 +3972,8 @@ pub fn add_quad_to_batch<F>(
} else {
let instance = QuadInstance {
render_task_address,
prim_address: gpu_buffer_address,
prim_address_i,
prim_address_f,
z_id,
transform_id,
edge_flags: edge_flags_bits,
Expand Down
33 changes: 16 additions & 17 deletions webrender/src/frame_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use crate::prim_store::{PictureIndex, PrimitiveScratchBuffer};
use crate::prim_store::{DeferredResolve, PrimitiveInstance};
use crate::profiler::{self, TransactionProfile};
use crate::render_backend::{DataStores, ScratchBuffer};
use crate::renderer::{GpuBufferF, GpuBufferBuilderF, GpuBufferI, GpuBufferBuilderI};
use crate::renderer::{GpuBufferF, GpuBufferBuilderF, GpuBufferI, GpuBufferBuilderI, GpuBufferBuilder};
use crate::render_target::{RenderTarget, PictureCacheTarget, TextureCacheRenderTarget, PictureCacheTargetKind};
use crate::render_target::{RenderTargetContext, RenderTargetKind, AlphaRenderTarget, ColorRenderTarget};
use crate::render_task_graph::{RenderTaskGraph, Pass, SubPassSurface};
Expand Down Expand Up @@ -171,8 +171,7 @@ pub struct FrameBuildingState<'a> {
pub surface_builder: SurfaceBuilder,
pub cmd_buffers: &'a mut CommandBufferList,
pub clip_tree: &'a ClipTree,
pub frame_gpu_data_f: &'a mut GpuBufferBuilderF,
pub frame_gpu_data_i: &'a mut GpuBufferBuilderI,
pub frame_gpu_data: &'a mut GpuBufferBuilder,
}

impl<'a> FrameBuildingState<'a> {
Expand Down Expand Up @@ -277,8 +276,7 @@ impl FrameBuilder {
tile_caches: &mut FastHashMap<SliceId, Box<TileCacheInstance>>,
spatial_tree: &SpatialTree,
cmd_buffers: &mut CommandBufferList,
frame_gpu_data_f: &mut GpuBufferBuilderF,
frame_gpu_data_i: &mut GpuBufferBuilderI,
frame_gpu_data: &mut GpuBufferBuilder,
profile: &mut TransactionProfile,
) {
profile_scope!("build_layer_screen_rects_and_cull_layers");
Expand Down Expand Up @@ -430,8 +428,7 @@ impl FrameBuilder {
surface_builder: SurfaceBuilder::new(),
cmd_buffers,
clip_tree: &mut scene.clip_tree,
frame_gpu_data_f,
frame_gpu_data_i,
frame_gpu_data,
};

// Push a default dirty region which culls primitives
Expand Down Expand Up @@ -561,8 +558,10 @@ impl FrameBuilder {
let mut cmd_buffers = CommandBufferList::new();

// TODO(gw): Recycle backing vec buffers for gpu buffer builder between frames
let mut gpu_buffer_builder_f = GpuBufferBuilderF::new();
let mut gpu_buffer_builder_i = GpuBufferBuilderI::new();
let mut gpu_buffer_builder = GpuBufferBuilder {
f32: GpuBufferBuilderF::new(),
i32: GpuBufferBuilderI::new(),
};

self.build_layer_screen_rects_and_cull_layers(
scene,
Expand All @@ -580,8 +579,7 @@ impl FrameBuilder {
tile_caches,
spatial_tree,
&mut cmd_buffers,
&mut gpu_buffer_builder_f,
&mut gpu_buffer_builder_i,
&mut gpu_buffer_builder,
profile,
);

Expand Down Expand Up @@ -637,7 +635,7 @@ impl FrameBuilder {
output_size,
&mut ctx,
gpu_cache,
&mut gpu_buffer_builder_f,
&mut gpu_buffer_builder,
&render_tasks,
&scene.clip_store,
&mut transform_palette,
Expand Down Expand Up @@ -695,8 +693,8 @@ impl FrameBuilder {
scene.clip_store.end_frame(&mut scratch.clip_store);
scratch.end_frame();

let gpu_buffer_f = gpu_buffer_builder_f.finalize(&render_tasks);
let gpu_buffer_i = gpu_buffer_builder_i.finalize(&render_tasks);
let gpu_buffer_f = gpu_buffer_builder.f32.finalize(&render_tasks);
let gpu_buffer_i = gpu_buffer_builder.i32.finalize(&render_tasks);

Frame {
device_rect: DeviceIntRect::from_origin_and_size(
Expand Down Expand Up @@ -901,7 +899,7 @@ pub fn build_render_pass(
screen_size: DeviceIntSize,
ctx: &mut RenderTargetContext,
gpu_cache: &mut GpuCache,
gpu_buffer_builder: &mut GpuBufferBuilderF,
gpu_buffer_builder: &mut GpuBufferBuilder,
render_tasks: &RenderTaskGraph,
clip_store: &ClipStore,
transforms: &mut TransformPalette,
Expand Down Expand Up @@ -974,7 +972,6 @@ pub fn build_render_pass(
let task_id = sub_pass.task_ids[0];
let task = &render_tasks[task_id];
let target_rect = task.get_target_rect();
let mut gpu_buffer_builder = GpuBufferBuilderF::new();

match task.kind {
RenderTaskKind::Picture(ref pic_task) => {
Expand Down Expand Up @@ -1005,7 +1002,7 @@ pub fn build_render_pass(
pic_task.surface_spatial_node_index,
z_generator,
prim_instances,
&mut gpu_buffer_builder,
gpu_buffer_builder,
segments,
);
});
Expand Down Expand Up @@ -1079,6 +1076,7 @@ pub fn build_render_pass(
z_generator,
prim_instances,
cmd_buffers,
gpu_buffer_builder,
);
pass.alpha.build(
ctx,
Expand All @@ -1089,6 +1087,7 @@ pub fn build_render_pass(
z_generator,
prim_instances,
cmd_buffers,
gpu_buffer_builder,
);

pass
Expand Down
Loading

0 comments on commit 315b45c

Please sign in to comment.