Regarding GL profiling

Sage
Posts: 1,199
Joined: 2004.10
Post: #16
Now, as promised, a LOT of code. First, the C++ which causes vertices to be submitted.

The data structure
Code:
struct foliage_vertex
{
    static const int VBO_FORMAT = GL_T2F_C3F_V3F;
    
    inline foliage_vertex( void ) {}
    
    inline foliage_vertex( const foliage_vertex &c ):
        texcoord( c.texcoord ),
        color( c.color ),
        position( c.position )
    {}

    inline foliage_vertex( const sgf::vec2 &tc, const sgf::vec3 &c, const sgf::vec3 &p ):
        texcoord( tc ),
        color( c ),
        position( p )
    {}
    
    inline foliage_vertex &operator = ( const foliage_vertex &c )
    {
        texcoord = c.texcoord;
        color = c.color;
        position = c.position;
        return *this;
    }
    
    sgf::vec2 texcoord;
    sgf::vec3 color, position;
};

struct foliage_quad {
    foliage_vertex a,b,c,d;
    
    inline foliage_quad( void ) {}
    inline foliage_quad( const foliage_quad &f ):
        a( f.a ),
        b( f.b ),
        c( f.c ),
        d( f.d )
    {}

    inline foliage_quad &operator = ( const foliage_quad &f )
    {
        a = f.a;
        b = f.b;
        c = f.c;
        d = f.d;
        return *this;
    }
};

The code that submits foliage_quad instances ( stored in a std::vector )
Code:
void
Foliage::Patch::display( const FrameRenderState &renderState )
{

    switch( renderState.pass )
    {
        case RenderPass::OpaqueAmbient:
        case RenderPass::OpaqueLit:
        {
            //
            //  Work out visibility -- don't draw unless the patch is within max visible distance
            //
            
            float dist = renderState.camera->position().distance( _position ) - _radius;
            if ( dist < _foliage->_visibleEnd )
            {
                float visibility = 1.0f;
                if ( dist > _foliage->_visibleStart )
                {
                    visibility = 1.0 - ((dist - _foliage->_visibleStart) / (_foliage->_visibleEnd - _foliage->_visibleStart));      
                }
            
                _material->setupForEntity( renderState, shared_from_this() );
                const ShaderRef &shader = _material->shader()->shaderForPass( renderState );
                shader->setUniform( "Normal", _normal );
                shader->setUniform( "Visibility", visibility );

                //
                //  Ramp alpha func to 1.0 when visibility == 0
                //
                
                glAlphaFunc( GL_GREATER, lrp( visibility, 1.0f, AlphaFuncMin ));

                glInterleavedArrays( foliage_vertex::VBO_FORMAT, 0, &(_quads.front()) );
                glDrawArrays( GL_QUADS, 0, _quads.size() * 4 );
            }

            break;
        }

        default:
        {}
    }
}

OK, now that that's covered, GLSL. I structure my GLSL into several files.
./ambient.[vs|fs] -- handles the ambient lighting pass
./lit_d.[vs|fs] -- handles directional lighting passes. I've also got positional & spot passes, but I'm not including them right now, since my demo has only 1 single directional light.
./foliage_setup.vs -- performs billboarding common to both ambient and lit passes
./lit_common.vs -- handles some setup used in all lit passes
/Shaders/Shadow/shadow_directional.[fs|vs] Cascading shadow mapping implementation
/Shaders/Materials/Common/fog.[fs|vs] Common fog code -- for distance and planar fog

Whew.

Now, here they are
ambient.vs
Code:
#include "foliage_setup.vs"
#include <Shaders/Materials/Common/fog.vs>

///////////////////////////////////////////////////////////////////////

varying float Intensity;

///////////////////////////////////////////////////////////////////////
    
void main()
{  
    vec3 ECPos = foliage_setup();
    fog_setup(ECPos);

    gl_TexCoord[0] = gl_TextureMatrix[0] * gl_MultiTexCoord0;
    Intensity = gl_Color.z;
}

ambient.fs
Code:
#include <Shaders/Materials/Common/fog.fs>

///////////////////////////////////////////////////////////////////////

uniform vec4 Ambient;
uniform sampler2D ColorMap;
uniform float Visibility;

///////////////////////////////////////////////////////////////////////

varying float Intensity;

void main()
{
    vec4 color = Ambient * Intensity * texture2D( ColorMap, gl_TexCoord[0].st );
    gl_FragColor = vec4( fog_apply_ambient( color.rgb ), color.a );
}

lit_d.vs
Code:
#include <Shaders/Shadow/shadow_directional.vs>
#include "lit_common.vs"

///////////////////////////////////////////////////////////////////////

void main(void)
{
    lit_common();
    directional_shadow_setup();
}

lit_d.fs
Code:
#include <Shaders/Shadow/shadow_directional.fs>
#include <Shaders/Materials/Common/fog.fs>

///////////////////////////////////////////////////////////////////////

uniform vec4 Diffuse, Ambient;
uniform sampler2D ColorMap;
uniform float Visibility;

///////////////////////////////////////////////////////////////////////

varying vec3 N, ECPos;
varying float Intensity;

///////////////////////////////////////////////////////////////////////

void main()
{
    vec3 L = gl_LightSource[0].position.xyz;    

    //calculate Diffuse Term:
    vec4 diffuse = Diffuse * max( dot(N,L), 0.0 );
    vec4 color = Intensity * texture2D( ColorMap, gl_TexCoord[0].st );

    vec3 fragColor = color.a * shadow_coefficient_low(ECPos) * ( Ambient + diffuse ).rgb;
    gl_FragColor = vec4( fog_apply_lit( fragColor ), color.a );
    
}

foliage_setup.vs
Code:
uniform vec2 Size;
uniform float Time;
uniform vec3 WindDirection;
uniform float WavePeriod, WaveMagnitude;

#ifdef BILLBOARD
uniform vec3 CameraPosition;
#endif

vec3 foliage_setup()
{
    //
    //  Apply waves, and note that the color is used to offset the wave timing, so foliage doesn't all wave in unison,
    //  which would be creepy, as in children-of-the-corn-creepy.
    //
    vec4 vertex = gl_Vertex;
    vec2 dim = ((gl_Color.xy * 2.0) - 1.0);
    vec2 scale = dim * Size;

    #ifdef BILLBOARD

        vec3 directionToCamera = normalize( CameraPosition - gl_Vertex.xyz );
        vec3 axis = normalize( cross( gl_Normal, directionToCamera ));

        vertex.xyz += (scale.x*axis) + (scale.y*gl_Normal);
    
    #endif

    vertex.xyz += WindDirection * WaveMagnitude * dim.y * sin( (Time + gl_Color.z * 10.0) * 3.14159 / WavePeriod );


    //
    //  Note that we return ECPos
    //

    gl_Position = gl_ModelViewProjectionMatrix * vertex;        
    return ( gl_ModelViewMatrix * vertex ).xyz;
}

lit_common.vs
Code:
#include "foliage_setup.vs"
#include <Shaders/Materials/Common/fog.vs>

///////////////////////////////////////////////////////////////////////

uniform vec3 Normal;

///////////////////////////////////////////////////////////////////////

varying vec3 N, ECPos;
varying float Intensity;

///////////////////////////////////////////////////////////////////////

void lit_common(void)
{
    gl_TexCoord[0] = gl_TextureMatrix[0] * gl_MultiTexCoord0;
    Intensity = gl_Color.z;

    N = normalize(gl_NormalMatrix * Normal);
    ECPos = foliage_setup();
    
    fog_setup(ECPos);
}

shadow_directional.vs
Code:
// empty -- but here just in case
void directional_shadow_setup()
{}

shadow_directional.fs
Code:
///////////////////////////////////////////////////////////////////////

uniform sampler2DShadow ShadowMap;
uniform float InvShadowSize;
uniform float Penumbra;
uniform vec4 SplitDepths;

uniform mat4 ShadowMat0;
uniform mat4 ShadowMat1;
uniform mat4 ShadowMat2;
uniform mat4 ShadowMat3;

///////////////////////////////////////////////////////////////////////

float shadow_coefficient( in vec3 ecPos )
{
    // not being used here
}

float shadow_coefficient_low( in vec3 ecPos )
{
    #ifdef SHADOWED
        // generate comparison mask -- value will be 1 for split which we will sample
        vec4 lt = vec4( lessThan( vec4( gl_FragCoord.z ), SplitDepths ) );
        vec4 gt = vec4( greaterThan( vec4( gl_FragCoord.z ), vec4( 0, SplitDepths.x, SplitDepths.y, SplitDepths.z )) );
        vec4 comparison = lt * gt;
    
        vec4 vPos = vec4( ecPos, 1.0 );
        
        vec4 shadowCoord0 = ShadowMat0*vPos,
             shadowCoord1 = ShadowMat1*vPos,
             shadowCoord2 = ShadowMat2*vPos,
             shadowCoord3 = ShadowMat3*vPos;

        vec4 sc = comparison.x * shadowCoord0 +
                  comparison.y * shadowCoord1 +
                  comparison.z * shadowCoord2 +
                  comparison.w * shadowCoord3;
        vec3 shadowCoord = sc.xyz / sc.w;
        
        float scale = Penumbra * InvShadowSize;    
        scale = comparison.x * scale +
                comparison.y * ( scale * 0.25 ) +
                comparison.z * ( scale * 0.125 ) +
                comparison.w * ( scale * 0.125 );

        float shadow = shadow2D(ShadowMap, shadowCoord).x;

        #ifdef SELF_SHADOW_FIX
            // note to self: this doesn't work non-normalized. So don't try.
            vec3 ec_normal = normalize(cross(dFdx(ecPos), dFdy(ecPos)));
            float lambertian = dot(ec_normal,gl_LightSource[0].position.xyz);

            return step( 0.004, lambertian ) * shadow;
        #else
            return shadow;
        #endif
    #else
        return 1.0;
    #endif
}

fog.vs
Code:
uniform float FogDistance;
uniform float FogPlaneVisibleDistance;

#ifdef FOG_PLANE
    uniform vec4 FogPlane;
#endif

///////////////////////////////////////////////////////////////////////

varying float Fog_DistanceContribution;

#ifdef FOG_PLANE
    varying float Fog_PlaneContribution;
#endif

///////////////////////////////////////////////////////////////////////
// Plane maths

#ifdef FOG_PLANE
/*
        classify a vertex as being in front, on, or behind a plane
        return 1 if in front
               0 if on the plane
               -1 if behind the plane
*/

    float halfspace( in vec4 plane, in vec3 v )
    {
        float dt = plane.x*v.x + plane.y*v.y + plane.z*v.z + plane.w;
        return clamp( dt, -1.0, 1.0 );
    }

/*
        Intersect a ray with a plane
        -plane The plane
        -p The origin of the ray
        -dir The normalized direction of the ray
        -intersection The intersection will be written here
        -distance The distance from `intersection to `p will be written here
*/
    void rayIntersection( in vec4 plane, in vec3 p, in vec3 dir, out vec3 intersection, out float distance )
    {
        float dNV = dot( plane.xyz, dir );
    
        vec3 q = plane.xyz * plane.w;
        float t = dot( plane.xyz, ( q - p ) ) / dNV;
        intersection = p + ( dir * t );
        distance = length( intersection - p );
    }
#endif

///////////////////////////////////////////////////////////////////////

void fog_setup( in vec3 ecPosition )
{
    //
    // Calculate distance fog contribution. Note, we store it as reciprocal,
    // where 1 means no fog, 0 means full fog.
    //

    float fragDistance = length( ecPosition );
    Fog_DistanceContribution = min( fragDistance / FogDistance, 1.0 );
    Fog_DistanceContribution = 1.0 - ( Fog_DistanceContribution * Fog_DistanceContribution );

    #ifdef FOG_PLANE
        //
        // Calculate fog plane contribution
        //

        vec3 cameraPosition = vec3( 0,0,0 );
        vec3 rayDirection = normalize( -ecPosition );

        float cameraClassification = halfspace( FogPlane, cameraPosition );
        float vertexClassification = halfspace( FogPlane, ecPosition );

        vec3 intersection;
        float distance;
        rayIntersection( FogPlane, cameraPosition, rayDirection, intersection, distance );          


        //
        // We need to determine which of the four ray transits this vertex represents:
        // 1) camera and vertex are above the fog plane
        // 2) camera is above, the vertex is below
        // 3) camera is below and vertex is above
        // 4) camera and vertex are below
        //

        Fog_PlaneContribution = 0.0;
        if ( cameraClassification > 0.0 )
        {
            if ( vertexClassification < 0.0 )
            {      
                Fog_PlaneContribution = min(( fragDistance - distance ) / FogPlaneVisibleDistance, 1.0 );
            }
        }
        else
        {
            if ( vertexClassification > 0.0 )
            {
                Fog_PlaneContribution = min( distance / FogPlaneVisibleDistance, 1.0 );
            }
            else
            {
                Fog_PlaneContribution = min( fragDistance / FogPlaneVisibleDistance, 1.0 );
            }
        }

        Fog_PlaneContribution = 1.0 - sqrt(Fog_PlaneContribution);
    #endif
}

fog.fs
Code:
uniform vec4 FogColor;

#ifdef FOG_PLANE
uniform vec4 FogPlaneColor;
#endif

///////////////////////////////////////////////////////////////////////

varying float Fog_DistanceContribution;

#ifdef FOG_PLANE
varying float Fog_PlaneContribution;
#endif

///////////////////////////////////////////////////////////////////////

/*
    Apply both distance and planar fog contribution for an ambient render pass
*/
vec3 fog_apply_ambient( in vec3 color )
{
    #ifdef FOG_PLANE
        return mix( FogPlaneColor.rgb, mix( gl_Fog.color.rgb, color, Fog_DistanceContribution ), Fog_PlaneContribution );
    #else
        return mix( FogColor.rgb, color, Fog_DistanceContribution );
    #endif
}

/*
    Apply JUST planar fog contribution for an ambient render pass
*/
vec3 fog_plane_apply_ambient( in vec3 color )
{
    #ifdef FOG_PLANE
        return mix( FogPlaneColor.rgb, color, Fog_PlaneContribution );
    #else
        return color;
    #endif
}

/*
    Apply both distance and planar fog contribution for a lit render pass
*/
vec3 fog_apply_lit( in vec3 color )
{
    //
    // For lit rendering, we don't apply fog color, we just fade to black,
    // reducing the fragment's contribution to the scene
    //

    #ifdef FOG_PLANE
        return Fog_PlaneContribution * Fog_DistanceContribution * color;
    #else
        return Fog_DistanceContribution * color;
    #endif
}

/*
    Apply JUST planar fog contribution for a lit render pass
*/
vec3 fog_plane_apply_lit( in vec3 color )
{
    //
    // For lit rendering, we don't apply fog color, we just fade to black,
    // reducing the fragment's contribution to the scene
    //

    #ifdef FOG_PLANE
        return Fog_PlaneContribution * color;
    #else
        return color;
    #endif
}

NOTE: The planar fog stuff is not enabled here, so it's just distance fog only.

ALSO: That was a lot of code, I'm sorry.
Quote this message in a reply
Sage
Posts: 1,199
Joined: 2004.10
Post: #17
A little more info. I wrote some code to determine how many "patches" are being drawn, and other stats. For the following screenshot:

[Image: Terrain-2009-06-27-01.png]

Code:
numPatches: 908
visiblePatches: 551
visibleDrawingPatches: 141
numQuadsDrawn: 40363

numPatches is the number of patches in the sperical region around the camera

visiblePatches is the number intersecting the view frustum

visibleDrawingPatches is the number within viewing distance

numQuadsDrawn the number of quads submitted by the patches actually drawing.

40k quads is really high, and makes me wonder if something wonky is going on.

EDIT: I determined average quads-per-patch to generally be between 200 and 300.
Quote this message in a reply
Sage
Posts: 1,199
Joined: 2004.10
Post: #18
And, an interesting note. I was not calling glDisableClientState after my batches -- putting that in brought up some of my unhappy 6fps scenes to a solid 20. Which is very encouraging.

My graph sorts objects to be drawn by a hash of their required GL state. It's very coarse, but it does mean that all visible foliage patches draw with only one setup and one teardown; but I wasn't disabling GL_TEXTURE_COORD_ARRAY etc.

I think this fixes some problems. I still have massive fillrate hits, and am looking into optimizing the GLSL now.
Quote this message in a reply
Sage
Posts: 1,199
Joined: 2004.10
Post: #19
And, some more good news. I carefully transitioned to VBOs, profiling every step of the way. Frame rate hovers in the mid to high 20s now, even 30 in some places. Plus, shark tells me that the function Foliage::Patch::display() used to take ~45% CPU time, now takes ~7%

I think I've made a lot of progress here. Still need to optimize GLSL. I'm considering various approaches, but so far it's been nothing but micro-optimizations, probably stuff which the GL driver is doing for me anyway and getting me nothing.
Quote this message in a reply
Member
Posts: 45
Joined: 2008.04
Post: #20
I'd love to play with a demo of this to see what your engine can do
Quote this message in a reply
Oldtimer
Posts: 834
Joined: 2002.09
Post: #21
Shamyl,
Sorry about not getting back to your earlier, and I'm afraid my analysis is outdated now. ;P Still, take a look at the blue line across the top - the CPU Wait For GPU track. It is the only one that looks suspicious. I put the numbers into Excel and graphed them linearly on their own:

[Image: sham.png]

As you can see, they swing wildly, topping out at .6 seconds of stall time. Most of the time though, you're in the 0.1 second zone, which would keep you moored at roughly 10-15 FPS, which is what we're seeing here. These stalls might well have been caused by the non-disabled client state, but it is not inconcievable that they're also rasterization waits – fill-rate hits.

If you want to, please take the Driver Monitor for another spin and monitor only the CPU Wait For GPU value. Ideally, you should be able to push that down a lot lower. Perhaps it already is? Smile
Quote this message in a reply
Sage
Posts: 1,199
Joined: 2004.10
Post: #22
Fenris Wrote:Shamyl,
Sorry about not getting back to your earlier, and I'm afraid my analysis is outdated now. ;P Still, take a look at the blue line across the top - the CPU Wait For GPU track. It is the only one that looks suspicious. I put the numbers into Excel and graphed them linearly on their own:

[Image: sham.png]

As you can see, they swing wildly, topping out at .6 seconds of stall time. Most of the time though, you're in the 0.1 second zone, which would keep you moored at roughly 10-15 FPS, which is what we're seeing here. These stalls might well have been caused by the non-disabled client state, but it is not inconcievable that they're also rasterization waits – fill-rate hits.

If you want to, please take the Driver Monitor for another spin and monitor only the CPU Wait For GPU value. Ideally, you should be able to push that down a lot lower. Perhaps it already is? Smile

Fenris, thanks -- I really appreciate it.
Here's the current output of just CPU wiat for GPU:
Code:
hardwareWaitTime
455794117
433694458
540784762
585982457
612162749
646250570
717378550
676363768
686416101
670520965
591551307
457011817
523798910
606432477
613555042
664094150
635096273
548312722
468069030
595018091
563517755
617394543
604106488
438840833
588295992
589241584
488645395
555236457
610107907
626142860
572552485
497520299
580748334
545845013
468425823
505180382
560881013
575032137
528387347
520461090
677468940
515180850
582633027
653562884
731548466
699453255
693942490
732870498
612767027
687935312
662571115

Now, I have noticed something very odd. I have a simple test PNG which I used to verify that the texture atlas works correctly.

[Image: TestParticles.png]

And this is a "real" foliage atlas:
[Image: FoliageAtlas.png]

My performance is in the mid to high 20s with TestParticles.png; but it's 5 to 10fps lower with FoliageAtlas.png.

???
Quote this message in a reply
Oldtimer
Posts: 834
Joined: 2002.09
Post: #23
Shamyl,
The only reason I can see for it to be slow on the foilage texture is that it contains more texels that need "interesting" blending, but why that should drop it so completely is a mystery. What happens if you turn blending off at this point?

The new numbers look saner, but I'm on my iPod at the moment so I can't verify. Could I possibly ask you for a release-built binary to profile? It's an intriguing behaviour,to say the least...
Quote this message in a reply
Member
Posts: 87
Joined: 2006.08
Post: #24
If you're using alpha test, then it is possible the more complex foliage texture is limiting the GPU's ability to compress the depth buffer, increasing the amount of memory traffic required when rendering a large number of widely spaced pixels.

That should be easy to test by replacing the alpha channel in the foilage texture with the one from the letter texture.
Quote this message in a reply
Sage
Posts: 1,199
Joined: 2004.10
Post: #25
Fenris,

Sorry it took so long -- I had quite a bit of domestic responsibility yesterday Rasp

First, here's the app and a terrain model for it. http://shamyl.zakariya.net/etc/gl/TerrainDemo.zip

Second, here's the info on it.

1) I wanted this to at least be a little "fun" so I included 4x4 car driving stuff. Drive with the arrow keys. When you inevitably flip the car, hit backspace to right it. If you have a generic USB gamepad, you can steer with the D-pad and accelerate/reverse with the 1st and 2nd buttons respectively.

2) I included two foliage types, the first is your generic grass, billboarded and densely populated. The second is shrubbery which is not billboarded. The latter is disabled, but:

3) There are two toggles at the top of the screen. They let you disable the Foliage & Ferns, use the "Debug" texture atlas, or use the real texture atlas.

You'll notice that with foliage running with the debug texture the performance isn't too bad, but switching to the real foliage texture, performance tanks. Both textures are the same size!

Thanks,

EDIT:
I should have mentioned the app requires an Intel Mac with a dedicated GPU. I know it works on the ATI x1600, the Nvidia 9400, 9600
Quote this message in a reply
Sage
Posts: 1,199
Joined: 2004.10
Post: #26
Frogblast Wrote:If you're using alpha test, then it is possible the more complex foliage texture is limiting the GPU's ability to compress the depth buffer, increasing the amount of memory traffic required when rendering a large number of widely spaced pixels.

That should be easy to test by replacing the alpha channel in the foilage texture with the one from the letter texture.

Sounds kind of like I'm screwed if that's the case Cry

That being said, I've come up with some simple GLSL optimizations to implement which may make for some good speedups.
Quote this message in a reply
Oldtimer
Posts: 834
Joined: 2002.09
Post: #27
Quote:Application Specific Information:
*** Terminating app due to uncaught exception 'NSXError', reason: 'Error Domain=NSMachErrorDomain Code=-536870203 UserInfo=0xceae80 "Operation could not be completed. (Mach error -536870203 - (iokit/common) exclusive access and device already open)"'

Thread 0 Crashed:
0 com.apple.CoreFoundation 0x9503df54 ___TERMINATING_DUE_TO_UNCAUGHT_EXCEPTION___ + 4
1 libobjc.A.dylib 0x955e6e3b objc_exception_throw + 40
2 com.apple.CoreFoundation 0x9503dbd1 -[NSException raise] + 17
3 org.zakariya.sgf 0x008232ad -[DDHidDevice openWithOptions:] + 244
4 org.zakariya.sgf 0x00822ce6 -[DDHidDevice startListening] + 74
5 org.zakariya.sgf 0x007b703b -[MouseAccess initWithDelegate:] + 211
6 org.zakariya.sgf 0x007b8796 sgf::hid::MouseManager_Impl::MouseManager_Impl(sgf::hid::MouseManager*) + 102
7 org.zakariya.sgf 0x007b7a12 sgf::hid::MouseManager::MouseManager(sgf::DisplayDelegate*) + 284
8 org.zakariya.sgf 0x007ac8a4 sgf::hid::DeviceHub::DeviceHub(sgf::DisplayDelegate*) + 134
9 org.zakariya.sgf 0x0078484d sgf::DisplayDelegate::toolkitReady() + 41
10 org.zakariya.sre 0x004ad600 sre::Application::toolkitReady() + 32
11 org.zakariya.sgf 0x0082101d -[SGFOpenGLView setDelegate:] + 79
12 org.zakariya.sgf 0x00820244 SGFApplicationMain(sgf::Application*, int, char const**) + 394
13 org.zakariya.Terrain 0x00001f9c main + 82
14 org.zakariya.Terrain 0x00001f0e start + 54

Looks like you've got an interesting crash here... 15" MBP, standard setup... :/
Quote this message in a reply
Member
Posts: 45
Joined: 2008.04
Post: #28
Interesting demo, lots of "how does he do it questions", but first I have a more important dumb user question: how to change the two toggles at the top of the screen? When I mouse over I see a darker bar temporarily under the options, but no amount of mouse clicking or dragging lets me actually change them....
Quote this message in a reply
Member
Posts: 87
Joined: 2006.08
Post: #29
TomorrowPlusX Wrote:Sounds kind of like I'm screwed if that's the case Cry

That being said, I've come up with some simple GLSL optimizations to implement which may make for some good speedups.

More tests to run:

1) How much does performance improve when alpha test is disabled?

2) Can you use occlusion queries to count the total number of your pixels covered by the foilage? (disable alpha test and depth testing when performing this test). How does this compare to the number of pixels in the whole framebuffer?
Quote this message in a reply
DoG
Moderator
Posts: 869
Joined: 2003.01
Post: #30
I find it a bit odd that the performance on the X1600 and GF8600 is about the same. I have the impression that you are doing something very wonky syncing drawing to your physics simulation.

Also, GL Profiler shows you seem to have too many state changes, and a lot of glVertex calls which you should also get rid off.
Quote this message in a reply
Post Reply