doom3-gpl
Doom 3 GPL source release
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Simd_AltiVec.h
Go to the documentation of this file.
1 /*
2 ===========================================================================
3 
4 Doom 3 GPL Source Code
5 Copyright (C) 1999-2011 id Software LLC, a ZeniMax Media company.
6 
7 This file is part of the Doom 3 GPL Source Code (?Doom 3 Source Code?).
8 
9 Doom 3 Source Code is free software: you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation, either version 3 of the License, or
12 (at your option) any later version.
13 
14 Doom 3 Source Code is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with Doom 3 Source Code. If not, see <http://www.gnu.org/licenses/>.
21 
22 In addition, the Doom 3 Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 Source Code. If not, please request a copy in writing from id Software at the address below.
23 
24 If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
25 
26 ===========================================================================
27 */
28 
29 #ifndef __MATH_SIMD_ALTIVEC_H__
30 #define __MATH_SIMD_ALTIVEC_H__
31 
32 /*
33 ===============================================================================
34 
35  AltiVec implementation of idSIMDProcessor
36 
37 ===============================================================================
38 */
39 
40 // Defines for enabling parts of the library
41 
42 // Turns on/off the simple math routines (add, sub, div, etc)
43 #define ENABLE_SIMPLE_MATH
44 
45 // Turns on/off the dot routines
46 #define ENABLE_DOT
47 
48 // Turns on/off the compare routines
49 #define ENABLE_COMPARES
50 
51 // The MinMax routines introduce a couple of bugs. In the bathroom of the alphalabs2 map, the
52 // wrong surface appears in the mirror at times. It also introduces a noticable delay when map
53 // data is loaded such as going through doors.
54 // Turns on/off MinMax routines
55 //#define ENABLE_MINMAX
56 
57 // Turns on/off Clamp routines
58 #define ENABLE_CLAMP
59 
60 // Turns on/off XXX16 routines
61 #define ENABLE_16ROUTINES
62 
63 // Turns on/off LowerTriangularSolve, LowerTriangularSolveTranspose, and MatX_LDLTFactor
64 #define ENABLE_LOWER_TRIANGULAR
65 
66 // Turns on/off TracePointCull, DecalPointCull, and OverlayPoint
67 // The Enable_Cull routines breaks the g_decals functionality, DecalPointCull is
68 // the likely suspect. Bullet holes do not appear on the walls when this optimization
69 // is enabled.
70 //#define ENABLE_CULL
71 
72 // Turns on/off DeriveTriPlanes, DeriveTangents, DeriveUnsmoothedTangents, NormalizeTangents
73 #define ENABLE_DERIVE
74 
75 // Turns on/off CreateTextureSpaceLightVectors, CreateShadowCache, CreateVertexProgramShadowCache
76 #define ENABLE_CREATE
77 
78 // Turns on/off the sound routines
79 #define ENABLE_SOUND_ROUTINES
80 
81 // Turns on/off the stuff that isn't on elsewhere
82 // Currently: BlendJoints, TransformJoints, UntransformJoints, ConvertJointQuatsToJointMats, and
83 // ConvertJointMatsToJointQuats
84 #define LIVE_VICARIOUSLY
85 
86 // This assumes that the dest (and mixBuffer) array to the sound functions is aligned. If this is not true, we take a large
87 // performance hit from having to do unaligned stores
88 //#define SOUND_DEST_ALIGNED
89 
90 // This assumes that the vertexCache array to CreateShadowCache and CreateVertexProgramShadowCache is aligned. If it's not,
91 // then we take a big performance hit from unaligned stores.
92 //#define VERTEXCACHE_ALIGNED
93 
94 // This turns on support for PPC intrinsics in the SIMD_AltiVec.cpp file. Right now it's only used for frsqrte. GCC
95 // supports these intrinsics but XLC does not.
96 //#define PPC_INTRINSICS
97 
98 // This assumes that the idDrawVert array that is used in DeriveUnsmoothedTangents is aligned. If its not aligned,
99 // then we don't get any speedup
100 //#define DERIVE_UNSMOOTH_DRAWVERT_ALIGNED
101 
102 // Disable DRAWVERT_PADDED since we disabled the ENABLE_CULL optimizations and the default
103 // implementation does not allow for the extra padding.
104 // This assumes that idDrawVert has been padded by 4 bytes so that xyz always starts at an aligned
105 // address
106 //#define DRAWVERT_PADDED
107 
109 #if defined(MACOS_X) && defined(__ppc__)
110 public:
111 
112  virtual const char * VPCALL GetName( void ) const;
113 
114 #ifdef ENABLE_SIMPLE_MATH
115  // Basic math, works for both aligned and unaligned data
116  virtual void VPCALL Add( float *dst, const float constant, const float *src, const int count );
117  virtual void VPCALL Add( float *dst, const float *src0, const float *src1, const int count );
118  virtual void VPCALL Sub( float *dst, const float constant, const float *src, const int count );
119  virtual void VPCALL Sub( float *dst, const float *src0, const float *src1, const int count );
120  virtual void VPCALL Mul( float *dst, const float constant, const float *src, const int count);
121  virtual void VPCALL Mul( float *dst, const float *src0, const float *src1, const int count );
122  virtual void VPCALL Div( float *dst, const float constant, const float *divisor, const int count );
123  virtual void VPCALL Div( float *dst, const float *src0, const float *src1, const int count );
124  virtual void VPCALL MulAdd( float *dst, const float constant, const float *src, const int count );
125  virtual void VPCALL MulAdd( float *dst, const float *src0, const float *src1, const int count );
126  virtual void VPCALL MulSub( float *dst, const float constant, const float *src, const int count );
127  virtual void VPCALL MulSub( float *dst, const float *src0, const float *src1, const int count );
128 #endif
129 
130 #ifdef ENABLE_DOT
131  // Dot products, expects data structures in contiguous memory
132  virtual void VPCALL Dot( float *dst, const idVec3 &constant, const idVec3 *src, const int count );
133  virtual void VPCALL Dot( float *dst, const idVec3 &constant, const idPlane *src, const int count );
134  virtual void VPCALL Dot( float *dst, const idVec3 &constant, const idDrawVert *src, const int count );
135  virtual void VPCALL Dot( float *dst, const idPlane &constant,const idVec3 *src, const int count );
136  virtual void VPCALL Dot( float *dst, const idPlane &constant,const idPlane *src, const int count );
137  virtual void VPCALL Dot( float *dst, const idPlane &constant,const idDrawVert *src, const int count );
138  virtual void VPCALL Dot( float *dst, const idVec3 *src0, const idVec3 *src1, const int count );
139  virtual void VPCALL Dot( float &dot, const float *src1, const float *src2, const int count );
140 #endif
141 
142 #ifdef ENABLE_COMPARES
143  // Comparisons, works for both aligned and unaligned data
144  virtual void VPCALL CmpGT( byte *dst, const float *src0, const float constant, const int count );
145  virtual void VPCALL CmpGT( byte *dst, const byte bitNum, const float *src0, const float constant, const int count );
146  virtual void VPCALL CmpGE( byte *dst, const float *src0, const float constant, const int count );
147  virtual void VPCALL CmpGE( byte *dst, const byte bitNum, const float *src0, const float constant, const int count );
148  virtual void VPCALL CmpLT( byte *dst, const float *src0, const float constant, const int count );
149  virtual void VPCALL CmpLT( byte *dst, const byte bitNum, const float *src0, const float constant, const int count );
150  virtual void VPCALL CmpLE( byte *dst, const float *src0, const float constant, const int count );
151  virtual void VPCALL CmpLE( byte *dst, const byte bitNum, const float *src0, const float constant, const int count );
152 #endif
153 
154 #ifdef ENABLE_MINMAX
155  // Min/Max. Expects data structures in contiguous memory
156  virtual void VPCALL MinMax( float &min, float &max, const float *src, const int count );
157  virtual void VPCALL MinMax( idVec2 &min, idVec2 &max, const idVec2 *src, const int count );
158  virtual void VPCALL MinMax( idVec3 &min, idVec3 &max, const idVec3 *src, const int count );
159  virtual void VPCALL MinMax( idVec3 &min, idVec3 &max, const idDrawVert *src, const int count );
160  virtual void VPCALL MinMax( idVec3 &min, idVec3 &max, const idDrawVert *src, const int *indexes, const int count );
161 #endif
162 
163 #ifdef ENABLE_CLAMP
164  // Clamp operations. Works for both aligned and unaligned data
165  virtual void VPCALL Clamp( float *dst, const float *src, const float min, const float max, const int count );
166  virtual void VPCALL ClampMin( float *dst, const float *src, const float min, const int count );
167  virtual void VPCALL ClampMax( float *dst, const float *src, const float max, const int count );
168 #endif
169 
170  // These are already using memcpy and memset functions. Leaving default implementation
171 // virtual void VPCALL Memcpy( void *dst, const void *src, const int count );
172 // virtual void VPCALL Memset( void *dst, const int val, const int count );
173 
174 #ifdef ENABLE_16ROUTINES
175  // Operations that expect 16-byte aligned data and 16-byte padded memory (with zeros), generally faster
176  virtual void VPCALL Zero16( float *dst, const int count );
177  virtual void VPCALL Negate16( float *dst, const int count );
178  virtual void VPCALL Copy16( float *dst, const float *src, const int count );
179  virtual void VPCALL Add16( float *dst, const float *src1, const float *src2, const int count );
180  virtual void VPCALL Sub16( float *dst, const float *src1, const float *src2, const int count );
181  virtual void VPCALL Mul16( float *dst, const float *src1, const float constant, const int count );
182  virtual void VPCALL AddAssign16( float *dst, const float *src, const int count );
183  virtual void VPCALL SubAssign16( float *dst, const float *src, const int count );
184  virtual void VPCALL MulAssign16( float *dst, const float constant, const int count );
185 #endif
186 
187 // Most of these deal with tiny matrices or vectors, generally not worth altivec'ing since
188 // the scalar code is already really fast
189 
190 // virtual void VPCALL MatX_MultiplyVecX( idVecX &dst, const idMatX &mat, const idVecX &vec );
191 // virtual void VPCALL MatX_MultiplyAddVecX( idVecX &dst, const idMatX &mat, const idVecX &vec );
192 // virtual void VPCALL MatX_MultiplySubVecX( idVecX &dst, const idMatX &mat, const idVecX &vec );
193 // virtual void VPCALL MatX_TransposeMultiplyVecX( idVecX &dst, const idMatX &mat, const idVecX &vec );
194 // virtual void VPCALL MatX_TransposeMultiplyAddVecX( idVecX &dst, const idMatX &mat, const idVecX &vec );
195 // virtual void VPCALL MatX_TransposeMultiplySubVecX( idVecX &dst, const idMatX &mat, const idVecX &vec );
196 // virtual void VPCALL MatX_MultiplyMatX( idMatX &dst, const idMatX &m1, const idMatX &m2 );
197 // virtual void VPCALL MatX_TransposeMultiplyMatX( idMatX &dst, const idMatX &m1, const idMatX &m2 );
198 
199 #ifdef ENABLE_LOWER_TRIANGULAR
200  virtual void VPCALL MatX_LowerTriangularSolve( const idMatX &L, float *x, const float *b, const int n, int skip = 0 );
201  virtual void VPCALL MatX_LowerTriangularSolveTranspose( const idMatX &L, float *x, const float *b, const int n );
202  virtual bool VPCALL MatX_LDLTFactor( idMatX &mat, idVecX &invDiag, const int n );
203 #endif
204 #ifdef LIVE_VICARIOUSLY
205  virtual void VPCALL BlendJoints( idJointQuat *joints, const idJointQuat *blendJoints, const float lerp, const int *index, const int numJoints );
206  virtual void VPCALL ConvertJointQuatsToJointMats( idJointMat *jointMats, const idJointQuat *jointQuats, const int numJoints );
207  virtual void VPCALL ConvertJointMatsToJointQuats( idJointQuat *jointQuats, const idJointMat *jointMats, const int numJoints );
208 #endif
209 
210 #ifdef LIVE_VICARIOUSLY
211  virtual void VPCALL TransformJoints( idJointMat *jointMats, const int *parents, const int firstJoint, const int lastJoint );
212  virtual void VPCALL UntransformJoints( idJointMat *jointMats, const int *parents, const int firstJoint, const int lastJoint );
213  virtual void VPCALL TransformVerts( idDrawVert *verts, const int numVerts, const idJointMat *joints, const idVec4 *weights, const int *index, const int numWeights );
214 #endif
215 
216 #ifdef ENABLE_CULL
217  virtual void VPCALL TracePointCull( byte *cullBits, byte &totalOr, const float radius, const idPlane *planes, const idDrawVert *verts, const int numVerts );
218  virtual void VPCALL DecalPointCull( byte *cullBits, const idPlane *planes, const idDrawVert *verts, const int numVerts );
219  virtual void VPCALL OverlayPointCull( byte *cullBits, idVec2 *texCoords, const idPlane *planes, const idDrawVert *verts, const int numVerts );
220 #endif
221 
222 #ifdef ENABLE_DERIVE
223  virtual void VPCALL DeriveTriPlanes( idPlane *planes, const idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes );
224  virtual void VPCALL DeriveTangents( idPlane *planes, idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes );
225  virtual void VPCALL DeriveUnsmoothedTangents( idDrawVert *verts, const dominantTri_s *dominantTris, const int numVerts );
226  virtual void VPCALL NormalizeTangents( idDrawVert *verts, const int numVerts );
227 #endif
228 
229 #ifdef ENABLE_CREATE
230  virtual void VPCALL CreateTextureSpaceLightVectors( idVec3 *lightVectors, const idVec3 &lightOrigin, const idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes );
231  virtual void VPCALL CreateSpecularTextureCoords( idVec4 *texCoords, const idVec3 &lightOrigin, const idVec3 &viewOrigin, const idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes );
232  virtual int VPCALL CreateShadowCache( idVec4 *vertexCache, int *vertRemap, const idVec3 &lightOrigin, const idDrawVert *verts, const int numVerts );
233  virtual int VPCALL CreateVertexProgramShadowCache( idVec4 *vertexCache, const idDrawVert *verts, const int numVerts );
234 #endif
235 
236 #ifdef ENABLE_SOUND_ROUTINES
237  // Sound upsampling and mixing routines, works for aligned and unaligned data
238  virtual void VPCALL UpSamplePCMTo44kHz( float *dest, const short *pcm, const int numSamples, const int kHz, const int numChannels );
239  virtual void VPCALL UpSampleOGGTo44kHz( float *dest, const float * const *ogg, const int numSamples, const int kHz, const int numChannels );
240  virtual void VPCALL MixSoundTwoSpeakerMono( float *mixBuffer, const float *samples, const int numSamples, const float lastV[2], const float currentV[2] );
241  virtual void VPCALL MixSoundTwoSpeakerStereo( float *mixBuffer, const float *samples, const int numSamples, const float lastV[2], const float currentV[2] );
242  virtual void VPCALL MixSoundSixSpeakerMono( float *mixBuffer, const float *samples, const int numSamples, const float lastV[6], const float currentV[6] );
243  virtual void VPCALL MixSoundSixSpeakerStereo( float *mixBuffer, const float *samples, const int numSamples, const float lastV[6], const float currentV[6] );
244  virtual void VPCALL MixedSoundToSamples( short *samples, const float *mixBuffer, const int numSamples );
245 #endif
246 #endif
247 
248 };
249 
250 #endif /* !__MATH_SIMD_ALTIVEC_H__ */
virtual void VPCALL MatX_LowerTriangularSolve(const idMatX &L, float *x, const float *b, const int n, int skip=0)
virtual void VPCALL CreateSpecularTextureCoords(idVec4 *texCoords, const idVec3 &lightOrigin, const idVec3 &viewOrigin, const idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes)
virtual void VPCALL TransformVerts(idDrawVert *verts, const int numVerts, const idJointMat *joints, const idVec4 *weights, const int *index, const int numWeights)
#define min(a, b)
virtual void VPCALL AddAssign16(float *dst, const float *src, const int count)
const GLbyte * weights
Definition: glext.h:3273
virtual void VPCALL Dot(float *dst, const idVec3 &constant, const idVec3 *src, const int count)
virtual void VPCALL BlendJoints(idJointQuat *joints, const idJointQuat *blendJoints, const float lerp, const int *index, const int numJoints)
virtual void VPCALL MixedSoundToSamples(short *samples, const float *mixBuffer, const int numSamples)
GLenum GLsizei n
Definition: glext.h:3705
virtual void VPCALL ClampMax(float *dst, const float *src, const float max, const int count)
Definition: Vector.h:316
virtual void VPCALL TracePointCull(byte *cullBits, byte &totalOr, const float radius, const idPlane *planes, const idDrawVert *verts, const int numVerts)
virtual void VPCALL Add(float *dst, const float constant, const float *src, const int count)
virtual void VPCALL CmpGE(byte *dst, const float *src0, const float constant, const int count)
virtual void VPCALL MixSoundTwoSpeakerStereo(float *mixBuffer, const float *samples, const int numSamples, const float lastV[2], const float currentV[2])
GLuint src
Definition: glext.h:5390
virtual void VPCALL MatX_LowerTriangularSolveTranspose(const idMatX &L, float *x, const float *b, const int n)
GLenum GLint x
Definition: glext.h:2849
virtual void VPCALL Sub16(float *dst, const float *src1, const float *src2, const int count)
virtual void VPCALL ClampMin(float *dst, const float *src, const float min, const int count)
virtual void VPCALL DeriveUnsmoothedTangents(idDrawVert *verts, const dominantTri_s *dominantTris, const int numVerts)
virtual void VPCALL MulAssign16(float *dst, const float constant, const int count)
virtual void VPCALL MixSoundTwoSpeakerMono(float *mixBuffer, const float *samples, const int numSamples, const float lastV[2], const float currentV[2])
GLuint dst
Definition: glext.h:5285
GLuint GLuint GLsizei count
Definition: glext.h:2845
Definition: Vector.h:52
virtual void VPCALL OverlayPointCull(byte *cullBits, idVec2 *texCoords, const idPlane *planes, const idDrawVert *verts, const int numVerts)
virtual void VPCALL Negate16(float *dst, const int count)
GLuint index
Definition: glext.h:3476
Definition: Vector.h:808
virtual void VPCALL DecalPointCull(byte *cullBits, const idPlane *planes, const idDrawVert *verts, const int numVerts)
virtual void VPCALL SubAssign16(float *dst, const float *src, const int count)
virtual void VPCALL CmpLE(byte *dst, const float *src0, const float constant, const int count)
virtual void VPCALL Div(float *dst, const float constant, const float *src, const int count)
virtual void VPCALL ConvertJointQuatsToJointMats(idJointMat *jointMats, const idJointQuat *jointQuats, const int numJoints)
Definition: Plane.h:71
Definition: eax4.h:1413
virtual void VPCALL DeriveTriPlanes(idPlane *planes, const idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes)
INT64 INT64 divisor
Definition: wglext.h:504
virtual bool VPCALL MatX_LDLTFactor(idMatX &mat, idVecX &invDiag, const int n)
virtual void VPCALL MulSub(float *dst, const float constant, const float *src, const int count)
virtual void VPCALL MixSoundSixSpeakerStereo(float *mixBuffer, const float *samples, const int numSamples, const float lastV[6], const float currentV[6])
GLubyte GLubyte b
Definition: glext.h:4662
virtual void VPCALL MixSoundSixSpeakerMono(float *mixBuffer, const float *samples, const int numSamples, const float lastV[6], const float currentV[6])
virtual void VPCALL CreateTextureSpaceLightVectors(idVec3 *lightVectors, const idVec3 &lightOrigin, const idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes)
virtual void VPCALL UpSamplePCMTo44kHz(float *dest, const short *pcm, const int numSamples, const int kHz, const int numChannels)
virtual void VPCALL DeriveTangents(idPlane *planes, idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes)
virtual void VPCALL Mul16(float *dst, const float *src1, const float constant, const int count)
unsigned char byte
Definition: Lib.h:75
virtual int VPCALL CreateShadowCache(idVec4 *vertexCache, int *vertRemap, const idVec3 &lightOrigin, const idDrawVert *verts, const int numVerts)
virtual void VPCALL Zero16(float *dst, const int count)
virtual void VPCALL Mul(float *dst, const float constant, const float *src, const int count)
virtual void VPCALL NormalizeTangents(idDrawVert *verts, const int numVerts)
idVertexCache vertexCache
Definition: VertexCache.cpp:41
virtual void VPCALL Clamp(float *dst, const float *src, const float min, const float max, const int count)
#define VPCALL
Definition: Simd.h:63
virtual void VPCALL UntransformJoints(idJointMat *jointMats, const int *parents, const int firstJoint, const int lastJoint)
float dot(float a[], float b[])
Definition: Model_lwo.cpp:3883
virtual int VPCALL CreateVertexProgramShadowCache(idVec4 *vertexCache, const idDrawVert *verts, const int numVerts)
virtual void VPCALL ConvertJointMatsToJointQuats(idJointQuat *jointQuats, const idJointMat *jointMats, const int numJoints)
virtual void VPCALL MulAdd(float *dst, const float constant, const float *src, const int count)
#define max(x, y)
Definition: os.h:70
virtual void VPCALL MinMax(float &min, float &max, const float *src, const int count)
virtual void VPCALL Copy16(float *dst, const float *src, const int count)
virtual void VPCALL Sub(float *dst, const float constant, const float *src, const int count)
virtual void VPCALL TransformJoints(idJointMat *jointMats, const int *parents, const int firstJoint, const int lastJoint)
virtual void VPCALL CmpLT(byte *dst, const float *src0, const float constant, const int count)
virtual void VPCALL Add16(float *dst, const float *src1, const float *src2, const int count)
virtual void VPCALL UpSampleOGGTo44kHz(float *dest, const float *const *ogg, const int numSamples, const int kHz, const int numChannels)
virtual void VPCALL CmpGT(byte *dst, const float *src0, const float constant, const int count)
virtual const char *VPCALL GetName(void) const