diff --git a/.gitattributes b/.gitattributes index 007c8708542989d20da3e5d1682a742890d461b8..d4fda8e07331c0953a0f686f6d095bdd186809e8 100644 --- a/.gitattributes +++ b/.gitattributes @@ -4316,32 +4316,6 @@ RTCP/Cobalt/OutputProc/test/tTBB_Writer-transient.in_1/tTBB_Writer-transient-ref RTCP/Cobalt/OutputProc/test/tTBB_Writer-transient.run eol=lf RTCP/Cobalt/OutputProc/test/tTBB_Writer-transient.sh eol=lf RTCP/Cobalt/Tools/plot_cobalt_flagging.py -text -RTCP/Cobalt/clAmdFft/appmlEnv.sh -text -RTCP/Cobalt/clAmdFft/bin32/clAmdFft.Client -text -RTCP/Cobalt/clAmdFft/bin32/clAmdFft.Client-1.8.291 -text -RTCP/Cobalt/clAmdFft/bin64/clAmdFft.Client -text -RTCP/Cobalt/clAmdFft/bin64/clAmdFft.Client-1.8.291 -text -RTCP/Cobalt/clAmdFft/clAmdFft-1.8.291-Linux.tar.gz -text -RTCP/Cobalt/clAmdFft/clAmdFft-EULA.txt -text -RTCP/Cobalt/clAmdFft/clAmdFft-README.txt -text -RTCP/Cobalt/clAmdFft/clAmdFft1.8.291.tar.gz -text -RTCP/Cobalt/clAmdFft/doc/clAmdFft.refman.pdf -text svneol=unset#unset -RTCP/Cobalt/clAmdFft/include/clAmdFft.h -text -RTCP/Cobalt/clAmdFft/include/clAmdFft.version.h -text -RTCP/Cobalt/clAmdFft/install-clAmdFft-1.8.291.sh -text -RTCP/Cobalt/clAmdFft/samples/CMakeLists.txt -text -RTCP/Cobalt/clAmdFft/samples/amd-unicode.h -text -RTCP/Cobalt/clAmdFft/samples/clAmdFft.client.cpp -text -RTCP/Cobalt/clAmdFft/samples/clAmdFft.client.h -text -RTCP/Cobalt/clAmdFft/samples/clAmdFft.h -text -RTCP/Cobalt/clAmdFft/samples/clAmdFft.openCL.cpp -text -RTCP/Cobalt/clAmdFft/samples/clAmdFft.openCL.h -text -RTCP/Cobalt/clAmdFft/samples/clMemcpy.cpp -text -RTCP/Cobalt/clAmdFft/samples/statisticalTimer.cpp -text -RTCP/Cobalt/clAmdFft/samples/statisticalTimer.h -text -RTCP/Cobalt/clAmdFft/samples/stdafx.cpp -text -RTCP/Cobalt/clAmdFft/samples/stdafx.h -text -RTCP/Cobalt/clAmdFft/samples/targetver.h -text RTCP/Cobalt/doc/package.dox -text RTCP/doc/package.dox -text SAS/DataManagement/CMakeLists.txt -text diff --git a/RTCP/Cobalt/clAmdFft/appmlEnv.sh b/RTCP/Cobalt/clAmdFft/appmlEnv.sh deleted file mode 100755 index 1752540a870c8cc213a24d0610241bec79af922d..0000000000000000000000000000000000000000 --- a/RTCP/Cobalt/clAmdFft/appmlEnv.sh +++ /dev/null @@ -1,24 +0,0 @@ -#! /bin/bash -# Short script meant to automate the task of setting up a terminal window to -# use the APPML library - -# Verify that this script has been sourced, not directly executed -if [[ "${BASH_SOURCE[0]}" == "${0}" ]] -then - echo "This script is meant to be sourced '.', as it modifies environmental variables" - echo "Try running as: '. $(basename ${0})'" - exit -fi - -# This is a sequence of bash commands to get the directory of this script -scriptDir=$(dirname $(readlink -f ${BASH_SOURCE[0]})) -# echo Script dir is: ${scriptDir} - -# Bash regexp to determine if the terminal is set up to point to APPML -if [[ ${LD_LIBRARY_PATH} = *${scriptDir}/lib64:${scriptDir}/lib32* ]] -then - echo "APPML math libraries is set in LD_LIBRARY_PATH" -else - echo "Patching LD_LIBRARY_PATH to include APPML math libraries" - export LD_LIBRARY_PATH=${scriptDir}/lib64:${scriptDir}/lib32:${LD_LIBRARY_PATH} -fi diff --git a/RTCP/Cobalt/clAmdFft/bin32/clAmdFft.Client b/RTCP/Cobalt/clAmdFft/bin32/clAmdFft.Client deleted file mode 120000 index 4c429f2ba6b154932f86938a7f85885fd7c7d73a..0000000000000000000000000000000000000000 --- a/RTCP/Cobalt/clAmdFft/bin32/clAmdFft.Client +++ /dev/null @@ -1 +0,0 @@ -clAmdFft.Client-1.8.291 \ No newline at end of file diff --git a/RTCP/Cobalt/clAmdFft/bin32/clAmdFft.Client-1.8.291 b/RTCP/Cobalt/clAmdFft/bin32/clAmdFft.Client-1.8.291 deleted file mode 100755 index 7f34771aed6d08c2fc6704228a854c21f9a9aa74..0000000000000000000000000000000000000000 Binary files a/RTCP/Cobalt/clAmdFft/bin32/clAmdFft.Client-1.8.291 and /dev/null differ diff --git a/RTCP/Cobalt/clAmdFft/bin64/clAmdFft.Client b/RTCP/Cobalt/clAmdFft/bin64/clAmdFft.Client deleted file mode 120000 index 4c429f2ba6b154932f86938a7f85885fd7c7d73a..0000000000000000000000000000000000000000 --- a/RTCP/Cobalt/clAmdFft/bin64/clAmdFft.Client +++ /dev/null @@ -1 +0,0 @@ -clAmdFft.Client-1.8.291 \ No newline at end of file diff --git a/RTCP/Cobalt/clAmdFft/bin64/clAmdFft.Client-1.8.291 b/RTCP/Cobalt/clAmdFft/bin64/clAmdFft.Client-1.8.291 deleted file mode 100755 index 8dbf34568a62e89262fe165bfe7edf5a621406d8..0000000000000000000000000000000000000000 Binary files a/RTCP/Cobalt/clAmdFft/bin64/clAmdFft.Client-1.8.291 and /dev/null differ diff --git a/RTCP/Cobalt/clAmdFft/clAmdFft-1.8.291-Linux.tar.gz b/RTCP/Cobalt/clAmdFft/clAmdFft-1.8.291-Linux.tar.gz deleted file mode 100644 index 8f495d1388576ab1abf15db4727c2a7c1a0d2f35..0000000000000000000000000000000000000000 Binary files a/RTCP/Cobalt/clAmdFft/clAmdFft-1.8.291-Linux.tar.gz and /dev/null differ diff --git a/RTCP/Cobalt/clAmdFft/clAmdFft-EULA.txt b/RTCP/Cobalt/clAmdFft/clAmdFft-EULA.txt deleted file mode 100644 index 8cf752a15d2d961d9c640ad0bd67efd695c150a3..0000000000000000000000000000000000000000 --- a/RTCP/Cobalt/clAmdFft/clAmdFft-EULA.txt +++ /dev/null @@ -1,402 +0,0 @@ -LICENSE AGREEMENT - -IMPORTANT-READ CAREFULLY: Do not install, copy or use the enclosed Materials -(defined below) until carefully reading and agreeing to the following terms -and conditions. This is a legal agreement ("Agreement") between you (either -an individual or an entity) ("You") and Advanced Micro Devices, Inc. -("AMD"). If You do not agree to the terms of this Agreement, do not install, -copy or use the Materials or any portion thereof. By installing, copying or -using the Materials provided herewith or that is made available by AMD to -download from any media, You agree to all of the terms of this Agreement. -Note that these Materials are AMD Confidential Information and may not be -shared with any third party except as expressly provided below. - -1. DEFINITIONS. - -In addition to those definitions set forth elsewhere in this Agreement, the -following terms have the meanings specified below: -a) "Distributed Software" means software developed or modified by You either -statically linked to Libraries or dynamically linked to Runtimes, and/or -derivative works of the Sample Source or modifiable Documentation. -b) "Documentation" means associated install scripts and online or electronic -documentation included as part of the deliverables in the Materials, or other -related materials or any portion thereof. -c) "Free Software License" means any software license that requires as a -condition of use, modification, adaptation or distribution of such licensed -software that other software derived from, distributed with or incorporated -into at the source code level be disclosed or distributed in Source Code -form. By way of example, Free Software License includes, but is in no way -limited to any of the following licenses or distribution models, or licenses -or distribution models similar to any of the following: (i) GNU's General -Public License (GPL) or Lesser/Library GPL (LGPL), (ii) The Artistic License -(e.g., PERL), (iii) the Mozilla Public License, (iv) the Netscape Public -License, (v) the Sun Community Source License (SCSL), and (vi) the Sun -Industry Standards Source License (SISSL). -d) "Intellectual Property Rights" means any rights under any patents, -trademarks, copyrights, mask works, trade secret information, intellectual -property, license or similar materials. -e) "Libraries" means libraries in Object Code included as part of the -deliverables in the Materials that may be statically linked into Your -software for the Licensed Purpose. -f) "Licensed Purpose" means: (i) test and evaluate the Materials internally; -(ii) use of the Materials to create Distributed Software; and (iii) -distributing and sublicensing to end users the Distributed Software and -Runtimes. -g) "Materials" means AMD Advanced Parallel Processing Math Library (APPML), -including but not limited to Documentation, Libraries, Runtimes, Object Code, -Sample Source and Tools. -h) "Object Code" means machine readable computer programming code files, -which is not in a human readable form and which does not include debug -symbols similar in detail to Source Code. -i) "Runtimes" means programs or dynamically linked libraries in Object Code -which are included as part of the deliverables in the Materials. -j) "Sample Source" means header files and sample code in Source Code form -which are included as part of the deliverables in the Materials. -k) "Tools" means any tools or utilities in the Materials. -l) "Source Code" means human readable form computer programming code and -related system level documentation, including all comments, symbols and any -procedural code such as job control language. - -2. LICENSE. Subject to the terms and conditions of this Agreement, AMD -hereby grants You a non-exclusive, royalty-free, revocable, non-transferable, -non-assignable limited copyright license to: -a) install, use and reproduce the Materials internally at Your site(s) -solely for the purpose of internal testing and evaluation; -b) modify the Sample Source or Documentation to create Distributed Software; -c) statically link the Libraries or dynamically link Runtimes to Your -Software; and -d) distribute and sublicense to end users in Object Code form only the -Distributed Software and Runtimes for the Licensed Purpose. Your right to -distribute the Distributed Software and Runtimes to end users includes the -right to distribute through distributors including multiple layers of -distributors. - -3. REQUIREMENTS. You will sublicense the end users to use Distributed -Software, Libraries and Runtimes in accordance with terms and conditions that -are substantially similar to the terms and conditions contained in Schedule A -hereof. You may include these terms in Your standard form agreement. You -must reproduce all AMD trademark and/or copyright notices on any copy of the -Distributed Software and Runtimes that You distribute. - -4. RESTRICTIONS. Restrictions regarding Your use of the Materials are as -follows. You may not: -a) distribute, publish or sublicense the Documentation, the Sample Source, -the Libraries (except when built into the Distributed Software), the Tools or -any Source Code in the Materials to anyone; -b) reproduce copies of the Materials other than what is reasonably required -for the Licensed Purpose; -c) decompile, reverse engineer, disassemble or otherwise reduce the Object -Code contained in the Materials to a human-perceivable form; -d) alter any copyright, trademark or patent notice(s) in the Materials; -e) use AMD's trademarks in Your software or product names or in a way that -suggests the Distributed Software comes from AMD or is endorsed by AMD; -f) use AMD's trademarks in Your software or product names or in a way that -that suggests that any of the Materials are endorsed by AMD; -g) include contents in malicious, deceptive or unlawful programs; -h) modify and/or distribute any of the Materials so that any part of thereof -becomes subject to a Free Software License; -i) use the Materials to enable, support or otherwise aid You or a third -party to develop technology competitive with the AMD technology embodied in -Materials or relating to the AMD products; or -j) rent, lease or lend the Materials or transfer the Materials to any third -party except as expressly provided herein. - -You also agree that the Materials are licensed, not sold by AMD. - -Except as expressly provided in Section 2, AMD does not grant, by -implication, estoppel or otherwise any other Intellectual Property Rights. -You agree that all licenses granted herein are conditioned upon the use of -the Materials for the Licensed Purpose. You agree that the Materials and all -partial versions thereto, including without limitation all modifications, -enhancements, updates, bug fixes, inventions, know-how, as well as all -Intellectual Property Rights and all other information relating thereto are -and will remain the sole and exclusive property of AMD. You shall have no -right, title or interest therein except for the limited licenses set forth in -Section 2 of this Agreement. AMD agrees that the foregoing shall not grant -AMD any right, title or interest in Your Distributed Software that is not -provided as part of the Materials, and Intellectual Property Rights therein -are and will remain Your sole and exclusive property. Nothing in this -Agreement shall be construed to limit AMD's right to independently develop or -acquire software or products similar to those of Your software or products -including any Intellectual Property Rights therein. - -The Materials may include third party technologies (e.g. third party -libraries) for which You must obtain licenses from parties other than AMD. -You agree that AMD has not obtained or conveyed to You--and that You shall be -responsible for obtaining--Intellectual Property Rights to use and/or -distribute the applicable, underlying Intellectual Property Rights related to -the third party technologies. These third party technologies are not -licensed as part of the Materials and are not licensed under this Agreement. - -Without limiting Section 10, You agree that AMD has no duty to defend You for -any infringement claims related to the standards and third party -technologies. You agree to indemnify and hold AMD harmless for any costs or -damages that result from such claims. - -5. NO SUPPORT. AMD is under no obligation to provide any kind of technical, -development or end-user support for the Materials. - -6. UPDATES. AMD may provide updates from time to time. If AMD provides -updates, these updates are licensed under the terms of this Agreement. - -7. FEEDBACK. You have no obligation to give AMD any suggestions, comments -or other feedback ("Feedback") relating to the Materials. However, AMD may -use and include any Feedback that You provide to improve the Materials or -other related AMD products and technologies. You grant AMD and its -affiliates and subsidiaries a worldwide, non-exclusive, irrevocable, -royalty-free, perpetual license to, directly or indirectly, use, reproduce, -license, sublicense, distribute, make, have made, sell and otherwise -commercialize the Feedback in the Materials or other AMD technologies. You -further agree not to provide any Feedback that (a) You know is subject to any -patent, copyright or other intellectual property claim or right of any third -party; (b) is subject to a Free Software License; or (c) is subject to -license terms which seek to require any products incorporating or derived -from such Feedback, or other AMD intellectual property, to be licensed to or -otherwise shared with any third party. - -8. CONFIDENTIALITY. You shall refrain from disclosing any Confidential -Information to third parties and will take reasonable security precautions, -at least as great as the precautions it takes to protect its own confidential -information, but no less than reasonable care, to keep confidential the -Confidential Information. For the purposes hereof, "Confidential -Information" means all information disclosed between the parties in -connection with this Agreement, including the Materials and any other -business or technical information provided to You by AMD. You will only -disclose the Confidential Information to Your employees or on-site -subcontractors (a) who have a need to know in furtherance of the Licensed -Purpose; and (b) who have signed a confidentiality agreement with You at -least as restrictive as this Agreement. If at any future time AMD, directly -or indirectly, discloses any other related technology or information to You, -including without limitation any updated versions of the Materials, such -disclosure will also be deemed to be confidential, part of the Materials and -will be subject to the provisions of this Agreement. You may disclose -Confidential Information in accordance with a judicial or other governmental -order, provided that You give AMD reasonable notice prior to such disclosure -to allow AMD a reasonable opportunity to seek a protective order or equivalent. - -9. DISCLAIMER OF WARRANTY. YOU EXPRESSLY ACKNOWLEDGES AND AGREES THAT USE -OF THE MATERIALS ARE AT YOUR SOLE RISK. THE MATERIALS ARE PROVIDED "AS IS" -AND WITHOUT WARRANTY OF ANY KIND AND AMD EXPRESSLY DISCLAIMS ALL WARRANTIES, -EXPRESS AND IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE, NON-INFRINGEMENT, -OR THOSE ARISING FROM CUSTOM OF TRADE OR COURSE OF USAGE. AMD DOES NOT -WARRANT THAT THE MATERIALS WILL MEET YOUR REQUIREMENTS, OR THAT THE OPERATION -OF THE MATERIALS WILL BE UNINTERRUPTED OR ERROR-FREE. THE ENTIRE RISK -ASSOCIATED WITH THE USE OF THE MATERIALS IS ASSUMED BY YOU. FURTHERMORE, AMD -DOES NOT WARRANT OR MAKE ANY REPRESENTATIONS REGARDING THE USE OR THE RESULTS -OF THE USE OF THE MATERIALS IN TERMS OF THEIR CORRECTNESS, ACCURACY, -RELIABILITY, CURRENTNESS, OR OTHERWISE. SHOULD THE CONTENTS OF THE MATERIALS -PROVE DEFECTIVE, YOU ASSUME THE ENTIRE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OF -IMPLIED WARRANTIES, SO THE ABOVE EXCLUSION MAY NOT APPLY TO YOU. - -10. LIMITATION OF LIABILITY AND INDEMNIFICATION. IN NO EVENT SHALL, SHALL -AMD, OR ITS DIRECTORS, OFFICERS, EMPLOYEES OR AGENTS ("AUTHORIZED -REPRESENTATIVES"), ITS SUPPLIERS OR ITS LICENSORS, BE LIABLE TO YOU FOR ANY -PUNITIVE, DIRECT, INCIDENTAL, INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES -(INCLUDING DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS -OF BUSINESS INFORMATION, AND THE LIKE) ARISING OUT OF THE USE, MISUSE OR -INABILITY TO USE THE MATERIALS, BREACH OR DEFAULT, INCLUDING THOSE ARISING -FROM INFRINGEMENT OR ALLEGED INFRINGEMENT OF ANY PATENT, TRADEMARK, COPYRIGHT -OR OTHER INTELLECTUAL PROPERTY RIGHT, BY AMD, EVEN IF AMD AND/OR ITS -AUTHORIZED REPRESENTATIVES HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH -DAMAGES. AMD WILL NOT BE LIABLE FOR LOSS OF, OR DAMAGE TO, YOUR EQUIPMENT, -RECORDS OR DATA OR ANY DAMAGES CLAIMED BY YOU BASED ON ANY THIRD PARTY -CLAIM. IN NO EVENT SHALL AMD'S TOTAL LIABILITY TO YOU FOR ALL DAMAGES, -LOSSES, AND CAUSES OF ACTION WHETHER IN CONTRACT, TORT (INCLUDING NEGLIGENCE) -EXCEED THE AMOUNT OF $10 USD. BY USING THE MATERIALS WITHOUT CHARGE, YOU -ACCEPT THIS ALLOCATION OF RISK. YOU AGREE TO DEFEND, INDEMNIFY AND HOLD -HARMLESS AMD AND ANY OF ITS AUTHORIZED REPRESENTATIVES FROM AND AGAINST ANY -AND ALL LOSS, DAMAGE, LIABILITY AND OTHER EXPENSES (INCLUDING REASONABLE -ATTORNEYS' FEES), ARISING OUT OF OR IN CONNECTION WITH ANY BREACH OF YOUR -OBLIGATIONS UNDER THIS AGREEMENT. - -11. TERMINATION. This Agreement is effective until terminated. You can -terminate this Agreement at any time by destroying the Materials, and all -copies You have made. This Agreement will terminate immediately without -notice from AMD if You fail to comply with any provision of this Agreement. -Upon termination You must destroy the Materials and all copies You have -made. The termination of this Agreement shall: (i) immediately result in the -termination of all sublicenses previously granted by You to third party -distributors and contract manufacturers under Section 3; and (ii) have no -effect on any sublicenses previously granted by You to end users under -Subsection 3, which sublicenses shall survive in accordance with their terms. - -12. GOVERNMENT END USERS. If You are acquiring the Materials on behalf of -any unit or agency of the United States Government, the following provisions -apply. The Government agrees the Materials were developed at private expense -and are provided with "RESTRICTED RIGHTS". Use, duplication, or disclosure -by the Government is subject to restrictions as set forth in DFARS -227.7202-1(a) and 227.7202-3(a) (1995), DFARS 252.227-7013(c) (1) (ii) (Oct -1988), FAR 12.212(a) (1995), FAR 52.227-19, (June 1987) or FAR 52.227-14(ALT -III) (June 1987), as amended from time to time. In the event that this -Agreement, or any part thereof, is deemed inconsistent with the minimum -rights identified in the Restricted Rights provisions, the minimum rights -shall prevail. - -13. EXPORT RESTRICTIONS. You shall adhere to all U.S. and other applicable -export laws, including but not limited to the U.S. Export Administration -Regulations ("EAR"), currently found at 15 C.F.R. Sections 730 through 744. -Further, pursuant to 15 C.F.R Section 740.6, You hereby certifies that, -except pursuant to a license granted by the United States Department of -Commerce Bureau of Industry and Security or as otherwise permitted pursuant -to a License Exception under the EAR, You will not (1) export, re-export or -release to a national of a country in Country Groups D:1 or E:2 any -restricted technology, software, or source code it receives from AMD, or (2) -export to Country Groups D:1 or E:2 the direct product of such technology or -software, if such foreign produced direct product is subject to national -security controls as identified on the Commerce Control List (currently found -in Supplement 1 to Part 774 of EAR). For the most current Country Group -listings, or for additional information about the EAR or Your obligations -under those regulations, please refer to the U.S. Bureau of Industry and -Security's website at http://www.bis.doc.gov/. These export requirements -shall survive any expiration or termination of this Agreement. - -14. CONTROLLING LAW AND SEVERABILITY. This Agreement will be governed by and -construed under the laws of the State of California without reference to its -conflicts of law principles. The rights and obligations under this Agreement -shall not be governed by the United Nations Convention on Contracts or the -International Sale of Goods, the application of which is expressly excluded. -Each party hereto submits to the jurisdiction of the state and federal courts -of Santa Clara County and the Northern District of California for the purpose -of all legal proceedings arising out of or relating to this Agreement or the -subject matter hereof. Each party waives any objection which it may have to -contest such forum. - -15. SURVIVING OBLIGATIONS. Sections 1, 3-16, inclusive, shall survive any -termination of this Agreement and shall bind the parties and their legal -representatives, successors, heirs and assigns. - -16. COMPLETE AGREEMENT. This Agreement constitutes the entire agreement -between the parties and supersedes any prior or contemporaneous oral or -written agreements with respect to the subject matter of this Agreement. No -waiver, amendment or modification of any provision of this Agreement will be -effective unless in writing and signed by the party against whom enforcement -is sought. - -If You agree to abide by the terms and conditions of this Agreement, please -press "Accept." If You do not agree to abide by the terms and conditions of -this Agreement and press "Decline," You may not use the Materials. -SCHEDULE A -END USER LICENSE AGREEMENT -PLEASE READ THIS LICENSE CAREFULLY BEFORE USING THE SOFTWARE. BY USING THE -SOFTWARE, YOU ARE AGREEING TO BE BOUND BY THE TERMS OF THIS LICENSE. IF YOU -DO NOT AGREE TO THESE TERMS AND CONDITIONS, DO NOT USE THE SOFTWARE. -1. License. The software accompanying this License (hereinafter "Software"), -regardless of the media on which it is distributed, are licensed to you by -Advanced Micro Devices, Inc. ("AMD"). You own the medium on which the -Software is recorded, but AMD and AMD's Licensors (referred to collectively -as "AMD") retain title to the Software and related documentation. You may: -a) use the Software.; and -b) make a reasonable number of copies necessary for the purposes of this -License. You must reproduce on such copy AMD's copyright notice and any -other proprietary legends that were on the original copy of the Software -2. Restrictions. The Software contains copyrighted and patented material, -trade secrets and other proprietary material. In order to protect them, and -except as permitted by applicable legislation, you may not: -a) decompile, reverse engineer, disassemble or otherwise reduce the Software -to a human-perceivable form; -b) modify, network, rent, lend, loan, distribute or create derivative works -based upon the Software in whole or in part; or -c) electronically transmit the Software from one computer to another or over -a network or otherwise transfer the Software except as permitted by this -License. -3. Termination. This License is effective until terminated. You may -terminate this License at any time by destroying the Software, related -documentation and all copies thereof. This License will terminate -immediately without notice from AMD if you fail to comply with any provision -of this License. Upon termination you must destroy the Software, related -documentation and all copies thereof. -4. Government End Users. If you are acquiring the Software on behalf of any -unit or agency of the United States Government, the following provisions -apply. The Government agrees the Software and documentation were developed -at private expense and are provided with "RESTRICTED RIGHTS". Use, -duplication, or disclosure by the Government is subject to restrictions as -set forth in DFARS 227.7202-1(a) and 227.7202-3(a) (1995), DFARS -252.227-7013(c)(1)(ii) (Oct 1988), FAR 12.212(a)(1995), FAR 52.227-19, (June -1987) or FAR 52.227-14(ALT III) (June 1987), as amended from time to time. -In the event that this License, or any part thereof, is deemed inconsistent -with the minimum rights identified in the Restricted Rights provisions, the -minimum rights shall prevail. -5. No Other License. No rights or licenses are granted by AMD under this -License, expressly or by implication, with respect to any proprietary -information or patent, copyright, trade secret or other intellectual property -right owned or controlled by AMD, except as expressly provided in this License. -6. Additional Licenses. DISTRIBUTION OR USE OF THE SOFTWARE WITH AN -OPERATING SYSTEM MAY REQUIRE ADDITIONAL LICENSES FROM THE OPERATING SYSTEM -VENDOR. Additional third party licenses may also be required and you agree -that you shall be solely responsible for obtaining such license rights. -7. Disclaimer of Warranty on Software. You expressly acknowledge and agree -that use of the Software is at your sole risk. The Software and related -documentation are provided "AS IS" and without warranty of any kind and AMD -EXPRESSLY DISCLAIMS ALL WARRANTIES, EXPRESS AND IMPLIED, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, ACCURACY, CONDITION, -OWNERSHIP, FITNESS FOR A PARTICULAR PURPOSE, AND/OR OF NON-INFRINGEMENT OF -THIRD PARTY INTELLECTUAL PROPERTY RIGHTS, AND THOSE ARISING FROM CUSTOM OR -TRADE OR COURSE OF USAGE. AMD DOES NOT WARRANT THAT THE FUNCTIONS CONTAINED -IN THE SOFTWARE WILL MEET YOUR REQUIREMENTS, OR THAT THE OPERATION OF THE -SOFTWARE WILL BE UNINTERRUPTED OR ERROR-FREE, OR THAT DEFECTS IN THE SOFTWARE -WILL BE CORRECTED. THE ENTIRE RISK AS TO THE RESULTS AND PERFORMANCE OF THE -SOFTWARE IS ASSUMED BY YOU. FURTHERMORE, AMD DOES NOT WARRANT OR MAKE ANY -REPRESENTATIONS REGARDING THE USE OR THE RESULTS OF THE USE OF THE SOFTWARE -OR RELATED DOCUMENTATION IN TERMS OF THEIR CORRECTNESS, ACCURACY, -RELIABILITY, CURRENTNESS, OR OTHERWISE. NO ORAL OR WRITTEN INFORMATION OR -ADVICE GIVEN BY AMD OR AMD'S AUTHORIZED REPRESENTATIVE SHALL CREATE A -WARRANTY OR IN ANY WAY INCREASE THE SCOPE OF THIS WARRANTY. SHOULD THE -SOFTWARE PROVE DEFECTIVE, YOU (AND NOT AMD OR AMD'S AUTHORIZED -REPRESENTATIVE) ASSUME THE ENTIRE COST OF ALL NECESSARY SERVICING, REPAIR OR -CORRECTION. THE SOFTWARE IS NOT INTENDED FOR USE IN MEDICAL, LIFE SAVING OR -LIFE SUSTAINING APPLICATIONS. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION -OF IMPLIED WARRANTIES, SO THE ABOVE EXCLUSION MAY NOT APPLY TO YOU. -8. Limitation of Liability. UNDER NO CIRCUMSTANCES INCLUDING NEGLIGENCE, -SHALL AMD, OR ITS DIRECTORS, OFFICERS, EMPLOYEES OR AGENTS ("AUTHORIZED -REPRESENTATIVES"), BE LIABLE TO YOU FOR ANY PUNITIVE, EXEMPLARY, DIRECT, -INCIDENTAL, INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES (INCLUDING DAMAGES FOR -LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS -INFORMATION, AND THE LIKE) ARISING OUT OF THE USE, MISUSE OR INABILITY TO USE -THE SOFTWARE OR RELATED DOCUMENTATION, BREACH OR DEFAULT, INCLUDING THOSE -ARISING FROM INFRINGEMENT OR ALLEGED INFRINGEMENT OF ANY PATENT, TRADEMARK, -COPYRIGHT OR OTHER INTELLECTUAL PROPERTY RIGHT, BY AMD, EVEN IF AMD OR AMD'S -AUTHORIZED REPRESENTATIVE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH -DAMAGES. SOME JURISDICTIONS DO NOT ALLOW THE LIMITATION OR EXCLUSION OF -LIABILITY FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THE ABOVE LIMITATION OR -EXCLUSION MAY NOT APPLY TO YOU. AMD will not be liable for: 1) loss of, or -damage to, your records or data; or 2) any damages claimed by you based on -any third party claim. In no event shall AMD's total liability to you for -all damages, losses, and causes of action (whether in contract, tort -(including negligence) or otherwise) exceed the amount paid by you for the -Software. -9. Export Restrictions. You shall adhere to all U.S. and other applicable -export laws, including but not limited to the U.S. Export Administration -Regulations (EAR), currently found at 15 C.F.R. Sections 730 through 744. -Further, pursuant to 15 C.F.R Section 740.6, You hereby certifies that, -except pursuant to a license granted by the United States Department of -Commerce Bureau of Industry and Security or as otherwise permitted pursuant -to a License Exception under the U.S. Export Administration Regulations -("EAR"), You will not (1) export, re-export or release to a national of a -country in Country Groups D:1 or E:2 any restricted technology, software, or -source code it receives from AMD, or (2) export to Country Groups D:1 or E:2 -the direct product of such technology or software, if such foreign produced -direct product is subject to national security controls as identified on the -Commerce Control List (currently found in Supplement 1 to Part 774 of EAR).ÿ -For the most current Country Group listings, or for additional information -about the EAR or Recipient's obligations under those regulations, please -refer to the U.S. Bureau of Industry and Security's website at -http://www.bis.doc.gov/.ÿ These export requirements shall survive any -expiration or termination of this Agreement. -10. Controlling Law and Severability. This Agreement will be governed by and -construed under the laws of the State of California without reference to its -conflicts of law principles. The rights and obligations under this Agreement -shall not be governed by the United Nations Convention on Contracts or the -International Sale of Goods, the application of which is expressly excluded. -Each party hereto submits to the jurisdiction of the state and federal courts -of Santa Clara County and the Northern District of California for the purpose -of all legal proceedings arising out of or relating to this Agreement or the -subject matter hereof. Each party waives any objection which it may have to -contest such forum. -11. Complete Agreement. This License constitutes the entire agreement -between the parties with respect to the use of the Software and the related -documentation, and supersedes all prior or contemporaneous understandings or -agreements, written or oral, regarding such subject matter. No amendment to -or modification of this License will be binding unless in writing and signed -by a duly authorized representative of AMD. \ No newline at end of file diff --git a/RTCP/Cobalt/clAmdFft/clAmdFft-README.txt b/RTCP/Cobalt/clAmdFft/clAmdFft-README.txt deleted file mode 100644 index 87b218b6dc872afeb3bded9e58f473d868bd2892..0000000000000000000000000000000000000000 --- a/RTCP/Cobalt/clAmdFft/clAmdFft-README.txt +++ /dev/null @@ -1,193 +0,0 @@ -clAmdFft Readme - -Version: 1.8 -Release Date: September 2012 - -ChangeLog: - -____________ -Current version: -Fixed: - * Failures in real transforms seen on 7xxx series GPUs with certain - problem sizes involving powers of 3 and 5 - -Known Issues: - * Library may return invalid results on CPU devices. - -____________ -Version 1.8.276 (beta): -Fixed: - * Memory leaks affecting use cases where 'clAmdFftEnqueueTransform' is used in a loop - -____________ -Version 1.8.269 (beta): -New: - * clAmdFft now supports real-to-complex and complex-to-real transforms; - refer to documentation for details - * This release tested using the 12.4 Catalyst software suite - -Known Issues: - * Some degradation in performance of real transforms due to known - runtime/driver issues - * Failures in real transforms have been seen on 7xxx series GPUs with certain - problem sizes involving powers of 3 and 5 - -____________ -Version 1.6.244: -Fixed: - * Failures observed in v1.6.236 in backward transforms of certain power of 2 - (involving radix 4 and radix 8) problem sizes. - -____________ -Version 1.6.236: -New: - * Performance of the FFT library has been improved for Radix-2 1D and 2D transforms - * Support for R4XXX GPUs is deprecated and no longer tested - * Preview: Support for AMD Radeon™ HD7000 series GPUs - * This release tested using the 8.92 runtime driver and the 2.6 APP SDK -____________ -Version 1.4: -New: - * clAmdFft now supports transform lengths whose factors consist exclusively - of powers of 2, 3, and 5 - * clAmdFft supports double precision data types - * clAmdFft executes on OpenCL 1.0 compliant devices - * This release tested using the 8.872 runtime driver and the 2.5 APP SDK - * A helper bash script appmlEnv.sh has been added to the root installation - directory to assist in properly setting up a terminal environment to - execute clAmdFft samples - -Fixed: - * If the library is required to allocate a temporary buffer, and the user does - not specify a temporary buffer on the Enqueue call, the library will - allocate a temporary buffer internally and the lifetime of that temporary - buffer is managed by the lifetime of the FFT plan; deleting the plan will - release the buffer. - * Test failures on CPU device for 32-bit systems (Windows/Linux) - -Known Issues: - * Failures have been seen on graphics cards using R4550 (RV710) GPUs. - -____________ -Version 1.2: -New: - * Reduced the number of internal LDS bank conflicts for our 1D FFT transforms, - increasing performance. - * Padded reads/writes to global memory, decreasing bank conflicts and - increasing performance on 2D transforms. - * This release tested using the 8.841 runtime driver and the 2.4 APP SDK - -Fixed: - * Failures have been seen attempting to queue work on the second GPU device on - a multi GPU 5970 card on Linux. - -Known Issues: - * It is recommended that users query for and explicitely create an - intermediate buffer if clAmdFft requires one. If the library creates the - intermediate buffer internally, a race condition may occur on freeing the - buffer on lower end hardware. - * Failures have been seen on graphics cards using R4550 (RV710) GPUs. - * Test failures on CPU device for 32-bit systems (Windows/Linux) - * It is recommended that windows users uninstall previous version of clAmdFft - before installing newer versions. Otherwise, Add/Remove programs only - removes the latest version. Linux users can delete the install directory. - -____________ -Version 1.0: - * Initial release, available on all platforms - -Known Issues: - * Failures have been seen attempting to queue work on the second GPU device on - a multi GPU 5970 card on Linux. -_____________________ -Building the Samples: - -To install the Linux versions of clAmdFft, uncompress the initial download and - then execute the install script. - -For example: - tar -xf clAmdFft-${version}.tar.gz - - This installs three files into the local directory, one being an - executable bash script. - - sudo mkdir /opt/clAmdFft-${version} - - This pre-creates the install directory with proper permissions in /opt - if it is to be installed there (This is the default). - - ./install-clAmdFft-${version}.sh - - This prints an EULA and uncompresses files into the chosen install - directory. - - cd ${installDir}/bin64 - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${OpenCLLibDir}:${clAmdFftLibDir} - - Export library dependencies to resolve all external linkages to the - client program. The user can create a bash script to help automate this - procedure. - - ./clAmdFft.Client -h - - Understand the command line options that are available to the user - through the sample client. - - ./clAmdFft.Client -iv - - Watch for the version strings to print out; watch for - 'Client Test *****PASS*****' to print out. - -The sample program does not ship with native build files. Instead, a CMake -file is shipped, and users generate a native build file for their system. - -For example: - cd ${installDir} - mkdir samplesBin/ - - This creates a sister directory to the samples directory that will house - the native makefiles and the generated files from the build. - - cd samplesBin/ - ccmake ../samples/ - - ccmake is a curses-based cmake program. It takes a parameter that - specifies the location of the source code to compile. - - Hit 'c' to configure for the platform; ensure that the dependencies to - external libraries are satisfied, including paths to 'ATI Stream SDK' - and 'Boost'. - - After dependencies are satisfied, hit 'c' again to finalize configure - step, then hit 'g' to generate makefile and exit ccmake. - - make help - - Look at the available options for make. - - make - - Build the sample client program. - - ./clAmdFft.Sample -iv - - Watch for the version strings to print out; watch for - 'Client Test *****PASS*****' to print out. -_______________________________________________________________________________ -(C) 2010,2011 Advanced Micro Devices, Inc. All rights reserved. AMD, the AMD -Arrow logo, ATI, the ATI logo, Radeon, FireStream, FireGL, Catalyst, and -combinations thereof are trademarks of Advanced Micro Devices, Inc. Microsoft -(R), Windows, and Windows Vista (R) are registered trademarks of Microsoft -Corporation in the U.S. and/or other jurisdictions. OpenCL and the OpenCL logo -are trademarks of Apple Inc. used by permission by Khronos. Other names are for -informational purposes only and may be trademarks of their respective owners. - -The contents of this document are provided in connection with Advanced Micro -Devices, Inc. ("AMD") products. AMD makes no representations or warranties with -respect to the accuracy or completeness of the contents of this publication and -reserves the right to make changes to specifications and product descriptions -at any time without notice. The information contained herein may be of a -preliminary or advance nature and is subject to change without notice. No -license, whether express, implied, arising by estoppel or otherwise, to any -intellectual property rights is granted by this publication. Except as set forth -in AMD's Standard Terms and Conditions of Sale, AMD assumes no liability -whatsoever, and disclaims any express or implied warranty, relating to its -products including, but not limited to, the implied warranty of -merchantability, fitness for a particular purpose, or infringement of any -intellectual property right. - -AMD's products are not designed, intended, authorized or warranted for use as -components in systems intended for surgical implant into the body, or in other -applications intended to support or sustain life, or in any other application -in which the failure of AMD's product could create a situation where personal -injury, death, or severe property or environmental damage may occur. AMD -reserves the right to discontinue or make changes to its products at any time -without notice. -_______________________________________________________________________________ diff --git a/RTCP/Cobalt/clAmdFft/clAmdFft1.8.291.tar.gz b/RTCP/Cobalt/clAmdFft/clAmdFft1.8.291.tar.gz deleted file mode 100644 index c5597c390b4227b9b394459bb6a4a53c1ef7181a..0000000000000000000000000000000000000000 Binary files a/RTCP/Cobalt/clAmdFft/clAmdFft1.8.291.tar.gz and /dev/null differ diff --git a/RTCP/Cobalt/clAmdFft/doc/clAmdFft.refman.pdf b/RTCP/Cobalt/clAmdFft/doc/clAmdFft.refman.pdf deleted file mode 100644 index d36e16e22290cbece432bc0ffbce8293f6760205..0000000000000000000000000000000000000000 Binary files a/RTCP/Cobalt/clAmdFft/doc/clAmdFft.refman.pdf and /dev/null differ diff --git a/RTCP/Cobalt/clAmdFft/include/clAmdFft.h b/RTCP/Cobalt/clAmdFft/include/clAmdFft.h deleted file mode 100644 index 73e65d0d349eeb24d7d27997d78abe43661a244a..0000000000000000000000000000000000000000 --- a/RTCP/Cobalt/clAmdFft/include/clAmdFft.h +++ /dev/null @@ -1,573 +0,0 @@ -/*********************************************************************** -** Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved. -***********************************************************************/ - -/*! @file clAmdFft.h - * clAmdFft.h defines all of the public interfaces and types that are meant to be used by clFFT clients - * This is the one public header file that should be consumed by clFFT clients. It is written to adhere to native "C" - * interfaces to make clAmdFft library as portable as possible; it should be callable from C, C++, .NET and Fortran, - * either with the proper linking or using wrapper classes. - * - */ - -#pragma once -#if !defined( CLAMDFFT_DOTH ) -#define CLAMDFFT_DOTH - -#if defined(__APPLE__) || defined(__MACOSX) - #include <OpenCL/cl.h> -#else - #include <CL/cl.h> -#endif - -#include "clAmdFft.version.h" - -/*! This preprocessor definition is the standard way of making exporting APIs - * from a DLL simpler. All files within this DLL are compiled with the CLAMDFFT_EXPORTS - * symbol defined on the command line. This symbol should not be defined on any project - * that uses this DLL. This way any other project whose source files include this file see - * clAmdFft functions as being imported from a DLL, whereas this DLL sees symbols - * defined with this macro as being exported. - */ -#if defined( _WIN32 ) - #if !defined( __cplusplus ) - #define inline __inline - #endif - - #if defined( CLAMDFFT_EXPORTS ) - #define CLAMDFFTAPI __declspec( dllexport ) - #else - #define CLAMDFFTAPI __declspec( dllimport ) - #endif -#else - #define CLAMDFFTAPI -#endif - -/* In general, you can not use namespaces for strict C compliance, so we prefix our public accessible names - * with the string clAmdFft - */ - -/* All functions will return pre-defined error codes, and will NOT throw exceptions to the caller - */ - -/*! @brief clAmdFft error codes definition, incorporating OpenCL error definitions - * - * This enumeration is a superset of the OpenCL error codes. For example, CL_OUT_OF_HOST_MEMORY, - * which is defined in cl.h is aliased as CLFFT_OUT_OF_HOST_MEMORY. The set of basic OpenCL - * error codes is extended to add extra values specific to the clAmdFft package. - */ -enum clAmdFftStatus_ -{ - CLFFT_INVALID_GLOBAL_WORK_SIZE = CL_INVALID_GLOBAL_WORK_SIZE, - CLFFT_INVALID_MIP_LEVEL = CL_INVALID_MIP_LEVEL, - CLFFT_INVALID_BUFFER_SIZE = CL_INVALID_BUFFER_SIZE, - CLFFT_INVALID_GL_OBJECT = CL_INVALID_GL_OBJECT, - CLFFT_INVALID_OPERATION = CL_INVALID_OPERATION, - CLFFT_INVALID_EVENT = CL_INVALID_EVENT, - CLFFT_INVALID_EVENT_WAIT_LIST = CL_INVALID_EVENT_WAIT_LIST, - CLFFT_INVALID_GLOBAL_OFFSET = CL_INVALID_GLOBAL_OFFSET, - CLFFT_INVALID_WORK_ITEM_SIZE = CL_INVALID_WORK_ITEM_SIZE, - CLFFT_INVALID_WORK_GROUP_SIZE = CL_INVALID_WORK_GROUP_SIZE, - CLFFT_INVALID_WORK_DIMENSION = CL_INVALID_WORK_DIMENSION, - CLFFT_INVALID_KERNEL_ARGS = CL_INVALID_KERNEL_ARGS, - CLFFT_INVALID_ARG_SIZE = CL_INVALID_ARG_SIZE, - CLFFT_INVALID_ARG_VALUE = CL_INVALID_ARG_VALUE, - CLFFT_INVALID_ARG_INDEX = CL_INVALID_ARG_INDEX, - CLFFT_INVALID_KERNEL = CL_INVALID_KERNEL, - CLFFT_INVALID_KERNEL_DEFINITION = CL_INVALID_KERNEL_DEFINITION, - CLFFT_INVALID_KERNEL_NAME = CL_INVALID_KERNEL_NAME, - CLFFT_INVALID_PROGRAM_EXECUTABLE = CL_INVALID_PROGRAM_EXECUTABLE, - CLFFT_INVALID_PROGRAM = CL_INVALID_PROGRAM, - CLFFT_INVALID_BUILD_OPTIONS = CL_INVALID_BUILD_OPTIONS, - CLFFT_INVALID_BINARY = CL_INVALID_BINARY, - CLFFT_INVALID_SAMPLER = CL_INVALID_SAMPLER, - CLFFT_INVALID_IMAGE_SIZE = CL_INVALID_IMAGE_SIZE, - CLFFT_INVALID_IMAGE_FORMAT_DESCRIPTOR = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, - CLFFT_INVALID_MEM_OBJECT = CL_INVALID_MEM_OBJECT, - CLFFT_INVALID_HOST_PTR = CL_INVALID_HOST_PTR, - CLFFT_INVALID_COMMAND_QUEUE = CL_INVALID_COMMAND_QUEUE, - CLFFT_INVALID_QUEUE_PROPERTIES = CL_INVALID_QUEUE_PROPERTIES, - CLFFT_INVALID_CONTEXT = CL_INVALID_CONTEXT, - CLFFT_INVALID_DEVICE = CL_INVALID_DEVICE, - CLFFT_INVALID_PLATFORM = CL_INVALID_PLATFORM, - CLFFT_INVALID_DEVICE_TYPE = CL_INVALID_DEVICE_TYPE, - CLFFT_INVALID_VALUE = CL_INVALID_VALUE, - CLFFT_MAP_FAILURE = CL_MAP_FAILURE, - CLFFT_BUILD_PROGRAM_FAILURE = CL_BUILD_PROGRAM_FAILURE, - CLFFT_IMAGE_FORMAT_NOT_SUPPORTED = CL_IMAGE_FORMAT_NOT_SUPPORTED, - CLFFT_IMAGE_FORMAT_MISMATCH = CL_IMAGE_FORMAT_MISMATCH, - CLFFT_MEM_COPY_OVERLAP = CL_MEM_COPY_OVERLAP, - CLFFT_PROFILING_INFO_NOT_AVAILABLE = CL_PROFILING_INFO_NOT_AVAILABLE, - CLFFT_OUT_OF_HOST_MEMORY = CL_OUT_OF_HOST_MEMORY, - CLFFT_OUT_OF_RESOURCES = CL_OUT_OF_RESOURCES, - CLFFT_MEM_OBJECT_ALLOCATION_FAILURE = CL_MEM_OBJECT_ALLOCATION_FAILURE, - CLFFT_COMPILER_NOT_AVAILABLE = CL_COMPILER_NOT_AVAILABLE, - CLFFT_DEVICE_NOT_AVAILABLE = CL_DEVICE_NOT_AVAILABLE, - CLFFT_DEVICE_NOT_FOUND = CL_DEVICE_NOT_FOUND, - CLFFT_SUCCESS = CL_SUCCESS, - //-------------------------- Extended status codes for clAmdFft ---------------------------------------- - CLFFT_BUGCHECK = 4*1024, /*!< Bugcheck. */ - CLFFT_NOTIMPLEMENTED, /*!< Functionality is not implemented yet. */ - CLFFT_TRANSPOSED_NOTIMPLEMENTED, /*!< Transposed functionality is not implemented for this transformation. */ - CLFFT_FILE_NOT_FOUND, /*!< Tried to open an existing file on the host system, but failed. */ - CLFFT_FILE_CREATE_FAILURE, /*!< Tried to create a file on the host system, but failed. */ - CLFFT_VERSION_MISMATCH, /*!< Version conflict between client and library. */ - CLFFT_INVALID_PLAN, /*!< Requested plan could not be found. */ - CLFFT_DEVICE_NO_DOUBLE, /*!< Double precision not supported on this device. */ - CLFFT_ENDSTATUS /* This value will always be last, and marks the length of clAmdFftStatus. */ -}; -typedef enum clAmdFftStatus_ clAmdFftStatus; - -/*! @brief The dimension of the input and output buffers that will be fed into all FFT transforms */ -typedef enum clAmdFftDim_ -{ - CLFFT_1D = 1, /*!< 1 Dimensional FFT transform (default). */ - CLFFT_2D, /*!< 2 Dimensional FFT transform. */ - CLFFT_3D, /*!< 3 Dimensional FFT transform. */ - ENDDIMENSION /*!< This value will always be last, and marks the length of clAmdFftDim. */ -} clAmdFftDim; - -/*! @brief What are the expected layout's of the complex numbers <p> - * <b> For Release 1.0,</b> only the CLFFT_COMPLEX_INTERLEAVED and CLFFT_COMPLEX_PLANAR formats are supported. - * The real and hermitian formats should be supported in a future release. - */ -typedef enum clAmdFftLayout_ -{ - CLFFT_COMPLEX_INTERLEAVED = 1, /*!< An array of complex numbers, with real and imaginary components together (default). */ - CLFFT_COMPLEX_PLANAR, /*!< Arrays of real componets and arrays of imaginary components that have been seperated out. */ - CLFFT_HERMITIAN_INTERLEAVED, /*!< Compressed form of complex numbers; complex-conjugates not stored, real and imaginary components in same array. TODO: Document layout */ - CLFFT_HERMITIAN_PLANAR, /*!< Compressed form of complex numbers; complex-conjugates not stored, real and imaginary components in separate arrays. TODO: Document layout */ - CLFFT_REAL, /*!< An array of real numbers, with no corresponding imaginary components. */ - ENDLAYOUT /*!< This value will always be last, and marks the length of clAmdFftLayout. */ -} clAmdFftLayout; - -/*! @brief What is the expected precision of each FFT. - * @ref DistanceStridesandPitches - */ -typedef enum clAmdFftPrecision_ -{ - CLFFT_SINGLE = 1, /*!< An array of complex numbers, with real and imaginary components as floats (default). */ - CLFFT_DOUBLE, /*!< An array of complex numbers, with real and imaginary components as doubles. */ - CLFFT_SINGLE_FAST, /*!< Faster implementation preferred. */ - CLFFT_DOUBLE_FAST, /*!< Faster implementation preferred. */ - ENDPRECISION /*!< This value will always be last, and marks the length of clAmdFftPrecision. */ -} clAmdFftPrecision; - -/*! @brief What is the expected direction of each FFT, time or the frequency domains */ -typedef enum clAmdFftDirection_ -{ - CLFFT_FORWARD = -1, /*!< FFT transform from the time to the frequency domain. */ - CLFFT_BACKWARD = 1, /*!< FFT transform from the frequency to the time domain. */ - CLFFT_MINUS = -1, /*!< Alias for the forward transform. */ - CLFFT_PLUS = 1, /*!< Alias for the backward transform. */ - ENDDIRECTION /*!< This value will always be last, and marks the length of clAmdFftDirection. */ -} clAmdFftDirection; - -/*! @brief Are the input buffers overwritten with the results */ -typedef enum clAmdFftResultLocation_ -{ - CLFFT_INPLACE = 1, /*!< The input and output buffers are the same (default). */ - CLFFT_OUTOFPLACE, /*!< Seperate input and output buffers. */ - ENDPLACE /*!< This value will always be last, and marks the length of clAmdFftPlaceness. */ -} clAmdFftResultLocation; - -/*! @brief whether the result will be returned in original order; only valid for dimensions greater than 1 */ -typedef enum clAmdFftResultTransposed_ { - CLFFT_NOTRANSPOSE = 1, /*!< The results are returned in the original preserved order (default) */ - CLFFT_TRANSPOSED, /*!< The result is transposed where transpose kernel is supported (possibly faster) */ - ENDTRANSPOSED /*!< This value will always be last, and marks the length of clAmdFftResultTransposed */ -} clAmdFftResultTransposed; - -/*! BitMasks to be used with clAmdFftSetupData.debugFlags */ -#define CLFFT_DUMP_PROGRAMS 0x1 - -/*! @brief Data structure that can be passed to clAmdFftSetup() to control the behavior of the FFT runtime - * @details This structure contains values that can be initialized before instantiation of the FFT runtime - * with ::clAmdFftSetup(). To initialize this structure, pass a pointer to a user struct to ::clAmdFftInitSetupData( ), - * which will clear the structure and set the version member variables to the current values. - */ -struct clAmdFftSetupData_ -{ - cl_uint major; /*!< Major version number of the project; signifies major API changes. */ - cl_uint minor; /*!< Minor version number of the project; minor API changes that could break backwards compatibility. */ - cl_uint patch; /*!< Patch version number of the project; Always incrementing number, signifies change over time. */ - - /*! Bitwise flags that control the behavior of library debug logic. */ - cl_ulong debugFlags; /*! This should be set to zero, except when debugging the clAmdFft library. - * <p> debugFlags can be set to CLFFT_DUMP_PROGRAMS, in which case the dynamically generated OpenCL kernels will - * be written to text files in the current working directory. These files will have a *.cl suffix. - */ -}; -typedef struct clAmdFftSetupData_ clAmdFftSetupData; - -/*! @brief An abstract handle to the object that represents the state of the FFT(s) */ -typedef size_t clAmdFftPlanHandle; - -#ifdef __cplusplus -extern "C" { -#endif - /*! @brief Initialize an clAmdFftSetupData struct for the client - * @details clAmdFftSetupData is passed to clAmdFftSetup to control behavior of the FFT runtime - * @param[out] setupData Data structure is cleared, initialized with version information and default values - * @return Enum describing error condition; superset of OpenCL error codes - */ - inline clAmdFftStatus clAmdFftInitSetupData( clAmdFftSetupData* setupData ) - { - setupData->major = clAmdFftVersionMajor; - setupData->minor = clAmdFftVersionMinor; - setupData->patch = clAmdFftVersionPatch; - setupData->debugFlags = 0; - - return CLFFT_SUCCESS; - } - - /*! @brief Initialize internal FFT resources. - * @details AMD's FFT implementation caches kernels, programs and buffers for its internal use. - * @param[in] setupData Data structure that can be passed into the setup routine to control FFT generation behavior - * and debug functionality - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftSetup( const clAmdFftSetupData* setupData ); - - /*! @brief Release all internal resources. - * @details Call when client is done with this FFT library, allowing the library to destroy all resources it has cached - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftTeardown( ); - - /*! @brief Query the FFT library for version information - * @details Return the major, minor and patch version numbers associated with this FFT library - * @param[out] major Major functionality change - * @param[out] minor Minor functionality change - * @param[out] patch Bug fixes, documentation changes, no new features introduced - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetVersion( cl_uint* major, cl_uint* minor, cl_uint* patch ); - - /*! @brief Create a plan object initialized entirely with default values. - * @details A plan is a repository of state for calculating FFT's. Allows the runtime to pre-calculate kernels, programs - * and buffers and associate them with buffers of specified dimensions. - * @param[out] plHandle Handle to the newly created plan - * @param[in] context Client is responsible for providing an OpenCL context for the plan - * @param[in] dim The dimensionality of the FFT transform; describes how many elements are in the array - * @param[in] clLengths An array of lengths, of size 'dim'. Each value describes the length of additional dimensions - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftCreateDefaultPlan( clAmdFftPlanHandle* plHandle, cl_context context, const clAmdFftDim dim, - const size_t* clLengths ); - - /*! @brief Create a copy of an existing plan. - * @details This API allows a client to create a new plan based upon an existing plan. This is a convenience function - * provided for quickly creating plans that are similar, but may differ slightly. - * @param[out] out_plHandle Handle to the newly created plan that is based on in_plHandle - * @param[in] new_context Client is responsible for providing a new context for the new plan - * @param[in] in_plHandle Handle to a plan to be copied, previously created - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftCopyPlan( clAmdFftPlanHandle* out_plHandle, cl_context new_context, clAmdFftPlanHandle in_plHandle ); - - /*! @brief Prepare the plan for execution. - * @details After all plan parameters are set, the client has the option of 'baking' the plan, which tells the runtime that - * no more changes to the plan's parameters are expected, and the OpenCL kernels should be compiled. This optional function - * allows the client application to perform this function when the application is being initialized instead of on the first - * execution. - * At this point, the clAmdFft runtime will apply all implimented optimizations, possibly including - * running kernel experiments on the devices in the plan context. - * <p> Users should assume that this function will take a long time to execute. If a plan is not baked before being executed, - * users should assume that the first call to clAmdFftEnqueueTransform will take a long time to execute. - * <p> If any significant parameter of a plan is changed after the plan is baked (by a subsequent call to one of - * the clAmdFftSetPlan____ functions), that will not be considered an error. Instead, the plan will revert back to - * the unbaked state, discarding the benefits of the baking operation. - * @param[in] plHandle Handle to a plan previously created - * @param[in] numQueues Number of command queues in commQueueFFT; 0 is a valid value, in which case client does not want - * the runtime to run load experiments and only pre-calculate state information - * @param[in] commQueueFFT An array of cl_command_queues created by the client; the command queues must be a proper subset of - * the devices included in the plan context - * @param[in] pfn_notify A function pointer to a notification routine. The notification routine is a callback function that - * an application can register and which will be called when the program executable has been built (successfully or unsuccessfully) - * <b> For Release 1.0,</b> this parameter MUST be NULL or nullptr. - * @param[in] user_data Passed as an argument when pfn_notify is called. - * <b> For Release 1.0,</b> this parameter MUST be NULL or nullptr. - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftBakePlan( clAmdFftPlanHandle plHandle, cl_uint numQueues, cl_command_queue* commQueueFFT, - void (CL_CALLBACK *pfn_notify)(clAmdFftPlanHandle plHandle, void *user_data), void* user_data ); - - /*! @brief Release the resources of a plan. - * @details A plan may include kernels, programs and buffers associated with it that consume memory. When a plan - * is not needed anymore, the client should release the plan. - * @param[in,out] plHandle Handle to a plan previously created - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftDestroyPlan( clAmdFftPlanHandle* plHandle ); - - /*! @brief Retrieve the OpenCL context of a previously created plan. - * @details User should pass a reference to an cl_context variable, which will be changed to point to a - * context set in the specified plan. - * @param[in] plHandle Handle to a plan previously created - * @param[out] context Reference to user allocated cl_context, which will point to context set in plan - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanContext( const clAmdFftPlanHandle plHandle, cl_context* context ); - - /*! @brief Retrieve the floating point precision of the FFT data - * @details User should pass a reference to an clAmdFftPrecision variable, which will be set to the - * precision of the FFT complex data in the plan. - * @param[in] plHandle Handle to a plan previously created - * @param[out] precision Reference to user clAmdFftPrecision enum - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanPrecision( const clAmdFftPlanHandle plHandle, clAmdFftPrecision* precision ); - - /*! @brief Set the floating point precision of the FFT data - * @details Set the plan property which will be the precision of the FFT complex data in the plan. - * @param[in] plHandle Handle to a plan previously created - * @param[in] precision Reference to user clAmdFftPrecision enum <p> - * <b> For Release 1.0,</b> only CLFFT_SINGLE and CLFFT_SINGLE_FAST are supported. - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanPrecision( clAmdFftPlanHandle plHandle, clAmdFftPrecision precision ); - - /*! @brief Retrieve the scaling factor that should be applied to the FFT data - * @details User should pass a reference to an cl_float variable, which will be set to the - * floating point scaling factor that will be multiplied across the FFT data. - * @param[in] plHandle Handle to a plan previously created - * @param[in] dir Which direction does the scaling factor apply to - * @param[out] scale Reference to user cl_float variable - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanScale( const clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float* scale ); - - /*! @brief Set the scaling factor that should be applied to the FFT data - * @details Set the plan property which will be the floating point scaling factor that will be - * multiplied across the FFT data. - * @param[in] plHandle Handle to a plan previously created - * @param[in] dir Which direction does the scaling factor apply to - * @param[in] scale Reference to user cl_float variable - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanScale( clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float scale ); - - /*! @brief Retrieve the number of discrete arrays that this plan can handle concurrently - * @details User should pass a reference to an cl_uint variable, which will be set to the - * number of discrete arrays (1D or 2D) that will be batched together for this plan - * @param[in] plHandle Handle to a plan previously created - * @param[out] batchSize How many discrete number of FFT's are to be performed - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanBatchSize( const clAmdFftPlanHandle plHandle, size_t* batchSize ); - - /*! @brief Set the number of discrete arrays that this plan can handle concurrently - * @details Set the plan property which will be set to the number of discrete arrays (1D or 2D) - * that will be batched together for this plan - * @param[in] plHandle Handle to a plan previously created - * @param[in] batchSize How many discrete number of FFT's are to be performed - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanBatchSize( clAmdFftPlanHandle plHandle, size_t batchSize ); - - /*! @brief Retrieve the dimensionality of FFT's to be transformed in the plan - * @details Queries a plan object and retrieves the dimensionality that the plan is set for. A size is returned to - * help the client allocate the proper storage to hold the dimensions in a further call to clAmdFftGetPlanLength - * @param[in] plHandle Handle to a plan previously created - * @param[out] dim The dimensionality of the FFT's to be transformed - * @param[out] size Value used to allocate an array to hold the FFT dimensions. - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanDim( const clAmdFftPlanHandle plHandle, clAmdFftDim* dim, cl_uint* size ); - - /*! @brief Set the dimensionality of FFT's to be transformed by the plan - * @details Set the dimensionality of FFT's to be transformed by the plan - * @param[in] plHandle Handle to a plan previously created - * @param[in] dim The dimensionality of the FFT's to be transformed - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanDim( clAmdFftPlanHandle plHandle, const clAmdFftDim dim ); - - /*! @brief Retrieve the length of each dimension of the FFT - * @details User should pass a reference to a size_t array, which will be set to the - * length of each discrete dimension of the FFT - * @param[in] plHandle Handle to a plan previously created - * @param[in] dim The dimension of the length parameters; describes how many elements are in the array - * @param[out] clLengths An array of lengths, of size 'dim'. Each array value describes the length of each dimension - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanLength( const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clLengths ); - - /*! @brief Set the length of each dimension of the FFT - * @details Set the plan property which will be the length of each discrete dimension of the FFT - * @param[in] plHandle Handle to a plan previously created - * @param[in] dim The dimension of the length parameters; describes how many elements are in the array - * @param[in] clLengths An array of lengths, of size 'dim'. Each value describes the length of additional dimensions - * <p><b> For Release 1.0, </b> All lengths must be powers of 2. Non-power-of-two dimensions should be supported in a future release. - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanLength( clAmdFftPlanHandle plHandle, const clAmdFftDim dim, const size_t* clLengths ); - - /*! @brief Retrieve the distance between consecutive elements for input buffers in a dimension. - * @details Depending on how the dimension is set in the plan (for 2D or 3D FFT's), strideY or strideZ can be safely - * ignored - * @param[in] plHandle Handle to a plan previously created - * @param[in] dim The dimension of the stride parameters; describes how many elements are in the array - * @param[out] clStrides An array of strides, of size 'dim'. Usually strideX=1 so that successive elements in the first dimension are stored contiguously. - * Typically strideY=LenX, strideZ=LenX*LenY such that successive elements in the second and third dimensions are stored contiguously. - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanInStride( const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides ); - - /*! @brief Set the distance between consecutive elements for input buffers in a dimension. - * @details Set the plan properties which will be the distance between elements in a given dimension - * (units are in terms of clAmdFftPrecision) - * @param[in] plHandle Handle to a plan previously created - * @param[in] dim The dimension of the stride parameters; describes how many elements are in the array - * @param[in] clStrides An array of strides, of size 'dim'. - * See @ref DistanceStridesandPitches for details. - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanInStride( clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides ); - - /*! @brief Retrieve the distance between consecutive elements for output buffers in a dimension. - * @details Depending on how the dimension is set in the plan (for 2D or 3D FFT's), strideY or strideZ can be safely - * ignored - * @param[in] plHandle Handle to a plan previously created - * @param[in] dim The dimension of the stride parameters; describes how many elements are in the array - * @param[out] clStrides An array of strides, of size 'dim'. Usually strideX=1 so that successive elements in the first dimension are stored contiguously. - * Typically strideY=LenX, strideZ=LenX*LenYsuch that successive elements in the second and third dimensions are stored contiguously. - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanOutStride( const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides ); - - /*! @brief Set the distance between consecutive elements for output buffers in a dimension. - * @details Set the plan properties which will be the distance between elements in a given dimension - * (units are in terms of clAmdFftPrecision) - * @param[in] plHandle Handle to a plan previously created - * @param[in] dim The dimension of the stride parameters; describes how many elements are in the array - * @param[in] clStrides An array of strides, of size 'dim'. Usually strideX=1 so that successive elements in the first dimension are stored contiguously. - * Typically strideY=LenX, strideZ=LenX*LenY such that successive elements in the second and third dimensions are stored contiguously. - * @sa clAmdFftSetPlanInStride - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanOutStride( clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides ); - - /*! @brief Retrieve the distance between Array objects - * @details Pitch is the distance between each discrete array object in an FFT array. This is only used - * for 'array' dimensions in clAmdFftDim; see clAmdFftSetPlanDimension (units are in terms of clAmdFftPrecision) - * @param[in] plHandle Handle to a plan previously created - * @param[out] iDist The distance between the beginning elements of the discrete array objects in memory on input. - * For contiguous arrays in memory, iDist=(strideX*strideY*strideZ) - * @param[out] oDist The distance between the beginning elements of the discrete array objects in memory on output. - * For contiguous arrays in memory, oDist=(strideX*strideY*strideZ) - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanDistance( const clAmdFftPlanHandle plHandle, size_t* iDist, size_t* oDist ); - - /*! @brief Set the distance between Array objects - * @details Pitch is the distance between each discrete array object in an FFT array. This is only used - * for 'array' dimensions in clAmdFftDim; see clAmdFftSetPlanDimension (units are in terms of clAmdFftPrecision) - * @param[in] plHandle Handle to a plan previously created - * @param[out] iDist The distance between the beginning elements of the discrete array objects in memory on input. - * For contiguous arrays in memory, iDist=(strideX*strideY*strideZ) - * @param[out] oDist The distance between the beginning elements of the discrete array objects in memory on output. - * For contiguous arrays in memory, oDist=(strideX*strideY*strideZ) - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanDistance( clAmdFftPlanHandle plHandle, size_t iDist, size_t oDist ); - - /*! @brief Retrieve the expected layout of the input and output buffers - * @details Output buffers can be filled with either hermitian or complex numbers. Complex numbers can be stored - * in various layouts; this informs the FFT engine what layout to produce on output - * @param[in] plHandle Handle to a plan previously created - * @param[out] iLayout Indicates how the input buffers are laid out in memory - * @param[out] oLayout Indicates how the output buffers are laid out in memory - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetLayout( const clAmdFftPlanHandle plHandle, clAmdFftLayout* iLayout, clAmdFftLayout* oLayout ); - - /*! @brief Set the expected layout of the input and output buffers - * @details Output buffers can be filled with either hermitian or complex numbers. Complex numbers can be stored - * in various layouts; this informs the FFT engine what layout to produce on output - * @param[in] plHandle Handle to a plan previously created - * @param[in] iLayout Indicates how the input buffers are laid out in memory - * @param[in] oLayout Indicates how the output buffers are laid out in memory - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftSetLayout( clAmdFftPlanHandle plHandle, clAmdFftLayout iLayout, clAmdFftLayout oLayout ); - - /*! @brief Retrieve whether the input buffers are going to be overwritten with results - * @details If the setting is to do an in-place transform, the input buffers are overwritten with the results of the - * transform. If the setting is for out-of-place transforms, the engine knows to look for separate output buffers - * on the Enqueue call. - * @param[in] plHandle Handle to a plan previously created - * @param[out] placeness Tells the FFT engine to clobber the input buffers or to expect output buffers for results - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetResultLocation( const clAmdFftPlanHandle plHandle, clAmdFftResultLocation* placeness ); - - /*! @brief Set whether the input buffers are going to be overwritten with results - * @details If the setting is to do an in-place transform, the input buffers are overwritten with the results of the - * transform. If the setting is for out-of-place transforms, the engine knows to look for separate output buffers - * on the Enqueue call. - * @param[in] plHandle Handle to a plan previously created - * @param[in] placeness Tells the FFT engine to clobber the input buffers or to expect output buffers for results - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftSetResultLocation( clAmdFftPlanHandle plHandle, clAmdFftResultLocation placeness ); - - /*! @brief Retrieve the final transpose setting of a muti-dimensional FFT - * @details A multi-dimensional FFT typically transposes the data several times during calculation. If the client - * does not care about the final transpose to put data back in proper dimension, the final transpose can be skipped - * for possible speed improvements - * @param[in] plHandle Handle to a plan previously created - * @param[out] transposed Parameter specifies whether the final transpose can be skipped - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanTransposeResult( const clAmdFftPlanHandle plHandle, clAmdFftResultTransposed * transposed ); - - /*! @brief Set the final transpose setting of a muti-dimensional FFT - * @details A multi-dimensional FFT typically transposes the data several times during calculation. If the client - * does not care about the final transpose to put data back in proper dimension, the final transpose can be skipped - * for possible speed improvements - * @param[in] plHandle Handle to a plan previously created - * @param[in] transposed Parameter specifies whether the final transpose can be skipped - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanTransposeResult( clAmdFftPlanHandle plHandle, clAmdFftResultTransposed transposed ); - - - /*! @brief Get buffer size (in bytes), which may be needed internally for an intermediate buffer - * @details Very large FFT transforms may need multiple passes, and the operation would need a temporary buffer to hold - * intermediate results. This function is only valid after the plan is baked, otherwise an invalid operation error - * is returned. If buffersize returns as 0, the runtime needs no temporary buffer. - * @param[in] plHandle Handle to a plan previously created - * @param[out] buffersize Size in bytes for intermediate buffer - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetTmpBufSize( const clAmdFftPlanHandle plHandle, size_t* buffersize ); - - /*! @brief Enqueue an FFT transform operation, and return immediately (non-blocking) - * @details This transform API is specific to the interleaved complex format, taking an input buffer with real and imaginary - * components paired together, and outputting the results into an output buffer in the same format - * @param[in] plHandle Handle to a plan previously created - * @param[in] dir Forwards or backwards transform - * @param[in] numQueuesAndEvents Number of command queues in commQueues; number of expected events to be returned in outEvents - * @param[in] commQueues An array of cl_command_queues created by the client; the command queues must be a proper subset of - * the devices included in the plan context - * @param[in] numWaitEvents Specify the number of elements in the eventWaitList array - * @param[in] waitEvents Events that this transform should wait to complete before executing on the device - * @param[out] outEvents The runtime fills this array with events corresponding 1 to 1 with the input command queues passed - * in commQueues. This parameter can be NULL or nullptr, in which case client is not interested in receiving notifications - * when transforms are finished, otherwise if not NULL the client is responsible for allocating this array, with at least - * as many elements as specified in numQueuesAndEvents. - * @param[in] inputBuffers An array of cl_mem objects that contain data for processing by the FFT runtime. If the transform - * is in place, the FFT results will overwrite the input buffers - * @param[out] outputBuffers An array of cl_mem objects that will store the results of out of place transforms. If the transform - * is in place, this parameter may be NULL or nullptr. It is completely ignored - * @param[in] tmpBuffer A cl_mem object that is reserved as a temporary buffer for FFT processing. If clTmpBuffers is NULL or nullptr, - * and the runtime needs temporary storage, an internal temporary buffer will be created on the fly managed by the runtime. - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftEnqueueTransform( - clAmdFftPlanHandle plHandle, - clAmdFftDirection dir, - cl_uint numQueuesAndEvents, - cl_command_queue* commQueues, - cl_uint numWaitEvents, - const cl_event* waitEvents, - cl_event* outEvents, - cl_mem* inputBuffers, - cl_mem* outputBuffers, - cl_mem tmpBuffer - ); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/RTCP/Cobalt/clAmdFft/include/clAmdFft.version.h b/RTCP/Cobalt/clAmdFft/include/clAmdFft.version.h deleted file mode 100644 index 4c3e04a0ea3baa5ad95af902099cec43b4385b34..0000000000000000000000000000000000000000 --- a/RTCP/Cobalt/clAmdFft/include/clAmdFft.version.h +++ /dev/null @@ -1,9 +0,0 @@ -/*********************************************************************** -** Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved. -***********************************************************************/ - -/* the configured version and settings for clAmdFft - */ -#define clAmdFftVersionMajor 1 -#define clAmdFftVersionMinor 8 -#define clAmdFftVersionPatch 291 diff --git a/RTCP/Cobalt/clAmdFft/install-clAmdFft-1.8.291.sh b/RTCP/Cobalt/clAmdFft/install-clAmdFft-1.8.291.sh deleted file mode 100755 index 5f251c66390923e0423d607a11f4e1040ead20f8..0000000000000000000000000000000000000000 --- a/RTCP/Cobalt/clAmdFft/install-clAmdFft-1.8.291.sh +++ /dev/null @@ -1,123 +0,0 @@ -#!/bin/bash -# -# Installation script - -# Grab the name of the bash script, which must have a certain format: install-${AppName}-${AppVersion}.sh -ScriptName=(${0//-/ }) -AppName="${ScriptName[1]}" - -# Parse this version number from the name of this script -AppVersionArray=(${ScriptName[2]//./ }) -AppVersion="${AppVersionArray[0]}.${AppVersionArray[1]}.${AppVersionArray[2]}" - -# Check for any switch arguments -doneswitches=0 -badswitch=0 -acceptlicense=0 -installdir="" -while test "${doneswitches}" = "0" -do - case "${1-}" in - -accept*) - acceptlicense=1 - shift;; - -installdir=*) - installdir=`echo ${1} | sed -e 's%.*=%%'` - shift;; - -*) - echo "${ScriptName}: unrecognised switch: ${1}" - badswitch=1 - exit - shift;; - *) - doneswitches=1;; - esac -done - -showLicense() -{ -if [ "${acceptlicense}" = "1" ]; then - echo "Warning: by installing this software you have accepted" - echo "the license agreement in ${AppName}-EULA.txt" - reply="accept" -else - more ${AppName}-EULA.txt - - reply="" - while [ "${reply}" != "accept" -a "${reply}" != "decline" ]; do - echo -e "[accept/decline]? : \c" - read reply - reply=`echo ${reply} | tr [:upper:] [:lower:]` - done -fi -} - -get_yes_no() -{ -reply="" -while [ "$reply" != "y" -a "$reply" != "n" ]; do - echo -e "$1 ? [y/n] : \c" - read reply - reply=`echo ${reply} | tr [:upper:] [:lower:]` -done -} - -echo -e " ${AppName}-${AppVersion} Installation " -echo -e " ===================================== " -echo -e "" -echo -e "This script will install ${AppName} version ${AppVersion}" - -showLicense - -if [ "${reply}" != "accept" ]; then - echo "Installation declined. ${AppName}-${AppVersion} not installed." - exit -fi - -echo -e "" -echo -e "Where do you want to install ${AppName}-${AppVersion}? Press return to use" -echo -e "the default location (/opt/${AppName}-${AppVersion}), or enter an alternative path." -echo -e "The directory will be created if it does not already exist." -if [ "${installdir}" != "" ]; then - INSTALLDIR=${installdir} -else - INSTALLDIR="" - while [ "${INSTALLDIR}" = "" ]; do - echo -e "> \c" - read ans - if [ $ans ] - then - case $ans in - *) INSTALLDIR=$ans ;; - esac - else - INSTALLDIR=/opt/${AppName}-${AppVersion} - fi - done -fi - -# Replace any ~ by ${HOME} otherwise you end up with a -# subdirectory named ~ (dangerous if you then try to remove it!) -INSTALLDIR=`echo ${INSTALLDIR} | sed -e "s%~%${HOME}%g"` - -echo -e "" -echo -e "Installing to : ${INSTALLDIR}" -echo -e "" - -if [ ! -d "${INSTALLDIR}" ] -then - mkdir -p "${INSTALLDIR}" - if [ $? -ne 0 ] - then - echo -e "***** Cannot create installation directory, installation failed *****" - exit - fi -fi - -# Extract everything from the compressed tar file -fromdir=$( pwd ) -cd "${INSTALLDIR}" -tar -xvf "${fromdir}/${AppName}-${AppVersion}-Linux.tar.gz" - -echo -e "" -echo -e "====== ${AppName}-${AppVersion} installation complete ======" diff --git a/RTCP/Cobalt/clAmdFft/samples/CMakeLists.txt b/RTCP/Cobalt/clAmdFft/samples/CMakeLists.txt deleted file mode 100644 index dcf1f3b8d95a9b31ab72cda1c542d365da35d8c0..0000000000000000000000000000000000000000 --- a/RTCP/Cobalt/clAmdFft/samples/CMakeLists.txt +++ /dev/null @@ -1,170 +0,0 @@ -############################################################################# -## Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved. -############################################################################# -cmake_minimum_required( VERSION 2.6 ) -project( clAmdFft.Sample ) - -# If AMDAPPSDKROOT is defined as an environment value, use that value and cache it so it's visible in the cmake-gui. -# Otherwise, create a sensible default that the user can change -if( DEFINED ENV{AMDAPPSDKROOT} ) - set( AMD_APP_SDK_ROOT $ENV{AMDAPPSDKROOT} CACHE PATH "Environment variable defining the root of the ATI Stream SDK" ) -else( ) - set( AMD_APP_SDK_ROOT "/Path/To/ATI_Stream_SDK" CACHE PATH "Modify this variable to point to the root of the ATI Stream SDK installation" ) -endif( ) - -# If BOOST_ROOT is defined as an environment value, use that value and cache it so it's visible in the cmake-gui. -# Otherwise, create a sensible default that the user can change -if( DEFINED ENV{BOOST_ROOT} ) - set( BOOST_ROOT $ENV{BOOST_ROOT} CACHE PATH "Environment variable defining the root of the Boost installation" ) -else( ) - if( UNIX ) - set( BOOST_ROOT "/usr" CACHE PATH "Modify this variable to point to the root of the Boost installation" ) - else( ) - set( BOOST_ROOT "/Path/To/boost_x_xx_x" CACHE PATH "Modify this variable to point to the root of the Boost installation" ) - endif() -endif( ) - -# Currently, linux has a problem outputing both narrow and wide characters, -# which happens in our client because openCL only supports narrow characters -if( WIN32 ) - option( UNICODE "Build with Unicode Support" ON ) - if( UNICODE ) - message( STATUS "UNICODE build" ) - endif( ) -else() - set( UNICODE OFF ) - message( STATUS "UNICODE feature disabled on linux" ) -endif() - -if( MSVC_IDE ) - set( BUILD64 ${CMAKE_CL_64} ) -else() - option( BUILD64 "Build a 64-bit product" ON ) - if( BUILD64 ) - message( STATUS "64-bit build" ) - endif( ) - - if( IS_DIRECTORY ${PROJECT_SOURCE_DIR}/library/test ) - option( CODE_COVERAGE "Build makefiles with code coverage instrumentation" OFF ) - if( CODE_COVERAGE ) - message( STATUS "Code coverage instrumentation on" ) - endif() - endif() -endif() - -# For linux, modify the global find property to help us find libraries like Boost in the correct paths -if( UNIX ) - if( BUILD64 ) - set_property( GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS TRUE ) - message( STATUS "64bit build - FIND_LIBRARY_USE_LIB64_PATHS: ${FIND_LIBRARY_USE_LIB64_PATHS}" ) - else() - set_property( GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS FALSE ) - message( STATUS "32bit build - FIND_LIBRARY_USE_LIB64_PATHS: ${FIND_LIBRARY_USE_LIB64_PATHS}" ) - endif() -endif() - -# Find the absolute path to the opencl library that we need to link too; the path depends on being 64bit or 32bit -if( BUILD64 ) - find_library( OPENCL_LIBRARIES - NAMES OpenCL - HINTS - ${AMD_APP_SDK_ROOT}/lib/ - ENV AMD_APP_SDK_ROOT - PATH_SUFFIXES x86_64 x86 - ) -else() - find_library( OPENCL_LIBRARIES - NAMES OpenCL - HINTS - ${AMD_APP_SDK_ROOT}/lib/ - ENV AMD_APP_SDK_ROOT - PATH_SUFFIXES x86 - ) -endif() -message( STATUS "OPENCL_LIBRARIES: ${OPENCL_LIBRARIES}" ) - -set( Boost_USE_MULTITHREADED ON ) -set( Boost_USE_STATIC_LIBS ON ) -set( Boost_DETAILED_FAILURE_MSG ON ) -set( Boost_DEBUG ON ) -set( Boost_ADDITIONAL_VERSIONS "1.44.0" "1.44" ) -# On linux, the boost installed in the system always appears to override any user boost installs -if( UNIX ) - set( Boost_NO_SYSTEM_PATHS TRUE ) -endif( ) -find_package( Boost 1.33.0 COMPONENTS program_options ) -message(STATUS "Boost_PROGRAM_OPTIONS_LIBRARY: ${Boost_PROGRAM_OPTIONS_LIBRARY}") - -# FFLAGS depend on the compiler, grab the compiler name from the path -get_filename_component( C_COMPILER_NAME ${CMAKE_C_COMPILER} NAME_WE ) -# message( "C_COMPILER_NAME: " ${C_COMPILER_NAME} ) -# message( "CMAKE_C_COMPILER: " ${CMAKE_C_COMPILER} ) - -# Set common compile and link options -if( C_COMPILER_NAME STREQUAL "cl" ) - # Following options for nMake - message( STATUS "Detected MSVS Ver: " ${MSVC_VERSION} ) - if( NOT MSVC_IDE ) - message( STATUS "Using an nMake environment to build" ) - - endif( ) - -elseif( C_COMPILER_NAME STREQUAL "gcc" ) - message( STATUS "Detected GNU fortran compiler." ) - # set( CMAKE_CXX_FLAGS "-std=c++0x ${CMAKE_CXX_FLAGS}" ) - - if( BUILD64 ) - set( CMAKE_CXX_FLAGS "-m64 ${CMAKE_CXX_FLAGS}" ) - set( CMAKE_C_FLAGS "-m64 ${CMAKE_C_FLAGS}" ) - else( ) - set( CMAKE_CXX_FLAGS "-m32 ${CMAKE_CXX_FLAGS}" ) - set( CMAKE_C_FLAGS "-m32 ${CMAKE_C_FLAGS}" ) - endif( ) -else( ) - message( FATAL_ERROR "Compiler name not detected" ) -endif( ) - -# If UNICODE is defined, pass extra definitions into -if( UNICODE ) - add_definitions( "/DUNICODE /D_UNICODE" ) -endif( ) - -# Print out compiler flags for viewing/debug -message( STATUS "CMAKE_CXX_COMPILER flags: " ${CMAKE_CXX_FLAGS} ) -message( STATUS "CMAKE_CXX_COMPILER debug flags: " ${CMAKE_CXX_FLAGS_DEBUG} ) -message( STATUS "CMAKE_CXX_COMPILER release flags: " ${CMAKE_CXX_FLAGS_RELEASE} ) -message( STATUS "CMAKE_CXX_COMPILER relwithdebinfo flags: " ${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ) -message( STATUS "CMAKE_EXE_LINKER link flags: " ${CMAKE_EXE_LINKER_FLAGS} ) - -include_directories( ${Boost_INCLUDE_DIRS} ${AMD_APP_SDK_ROOT}/include ${PROJECT_SOURCE_DIR}/../include ) - -# Set the OpenCL library include path depending on target platform -if( BUILD64 ) - if( WIN32 ) - link_directories( ${AMD_APP_SDK_ROOT}/lib/x86_64/ ${PROJECT_SOURCE_DIR}/../lib64/import ) - elseif( UNIX ) - link_directories( ${AMD_APP_SDK_ROOT}/lib/x86_64/ ${PROJECT_SOURCE_DIR}/../lib64 ) - endif() -else() - if( WIN32 ) - link_directories( ${AMD_APP_SDK_ROOT}/lib/x86/ ${PROJECT_SOURCE_DIR}/../lib32/import ) - elseif( UNIX ) - link_directories( ${AMD_APP_SDK_ROOT}/lib/x86/ ${PROJECT_SOURCE_DIR}/../lib32 ) - endif() -endif() - -add_executable( clAmdFft.Client - # sources follow - clAmdFft.client.cpp - clAmdFft.openCL.cpp - statisticalTimer.cpp - stdafx.cpp - clAmdFft.client.h - clAmdFft.openCL.h - statisticalTimer.h - stdafx.h - targetver.h - amd-unicode.h - ../include/clAmdFft.h ) - -target_link_libraries(clAmdFft.Client clAmdFft.Runtime ${Boost_LIBRARIES} ${OPENCL_LIBRARIES}) diff --git a/RTCP/Cobalt/clAmdFft/samples/amd-unicode.h b/RTCP/Cobalt/clAmdFft/samples/amd-unicode.h deleted file mode 100644 index 6aad7185c760729a32dd640560d739524fb5b145..0000000000000000000000000000000000000000 --- a/RTCP/Cobalt/clAmdFft/samples/amd-unicode.h +++ /dev/null @@ -1,46 +0,0 @@ -//////////////////////////////////////////// -// Copyright (C) 2011 Advanced Micro Devices, Inc. All Rights Reserved. -//////////////////////////////////////////// - -#pragma once -#if !defined( amd_unicode_h ) -#define amd_unicode_h - -// Typedefs to support unicode and ansii compilation -#if defined( _UNICODE ) - typedef std::wstring tstring; - typedef std::wstringstream tstringstream; - typedef std::wifstream tifstream; - typedef std::wofstream tofstream; - typedef std::wfstream tfstream; - static std::wostream& tout = std::wcout; - static std::wostream& terr = std::wcerr; -#else - typedef std::string tstring; - typedef std::stringstream tstringstream; - typedef std::ifstream tifstream; - typedef std::ofstream tofstream; - typedef std::fstream tfstream; - static std::ostream& tout = std::cout; - static std::ostream& terr = std::cerr; -#endif - -// These macros help linux cope with the conventions of windows tchar.h file -#if defined( _WIN32 ) - #include <tchar.h> - #include <windows.h> -#else - #if defined( __GNUC__ ) - typedef char TCHAR; - typedef char _TCHAR; - #define _tmain main - - #if defined( UNICODE ) - #define _T(x) L ## x - #else - #define _T(x) x - #endif - #endif -#endif - -#endif \ No newline at end of file diff --git a/RTCP/Cobalt/clAmdFft/samples/clAmdFft.client.cpp b/RTCP/Cobalt/clAmdFft/samples/clAmdFft.client.cpp deleted file mode 100644 index b5c6d5dea1cbf904282fe2b075329ba7b981e35c..0000000000000000000000000000000000000000 --- a/RTCP/Cobalt/clAmdFft/samples/clAmdFft.client.cpp +++ /dev/null @@ -1,635 +0,0 @@ -//////////////////////////////////////////// -// Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved. -//////////////////////////////////////////// - -// clAmdFft.client.cpp : Defines the entry point for the console application. -// - -#include "stdafx.h" -#include "../include/clAmdFft.h" -#include "clAmdFft.client.h" -#include "clAmdFft.openCL.h" -#include "statisticalTimer.h" -#include "amd-unicode.h" - -namespace po = boost::program_options; - -// This is used with the program_options class so that the user can type an integer on the command line -// and we store into an enum varaible -template<class _Elem, class _Traits> -std::basic_istream<_Elem, _Traits> & operator>> (std::basic_istream<_Elem, _Traits> & stream, clAmdFftLayout & layout) -{ - cl_uint tmp; - stream >> tmp; - layout = clAmdFftLayout(tmp); - return stream; -} - -// Format an unsigned number with comma thousands separator -// -template<typename T> // T could be 32-bit or 64-bit -std::basic_string<TCHAR> commatize (T number) { - static TCHAR scratch [8*sizeof(T)]; - register TCHAR * ptr = scratch + countOf(scratch); - *(--ptr) = 0; - for (int digits = 3; ; ) { - *(--ptr) = '0' + int (number % 10); - number /= 10; - if (0 == number) - break; - if (--digits <= 0) { - *(--ptr) = ','; - digits = 3; - } - } - return std::basic_string<TCHAR> (ptr); -} // end of commatize () - - -int _tmain( int argc, _TCHAR* argv[] ) -{ - // This helps with mixing output of both wide and narrow characters to the screen - std::ios::sync_with_stdio( false ); - - // Define MEMORYREPORT on windows platfroms to enable debug memory heap checking -#if defined( MEMORYREPORT ) && defined( _WIN32 ) - TCHAR logPath[ MAX_PATH ]; - ::GetCurrentDirectory( MAX_PATH, logPath ); - ::_tcscat_s( logPath, _T( "\\MemoryReport.txt") ); - - // We leak the handle to this file, on purpose, so that the ::_CrtSetReportFile() can output it's memory - // statistics on app shutdown - HANDLE hLogFile; - hLogFile = ::CreateFile( logPath, GENERIC_WRITE, - FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL ); - - ::_CrtSetReportMode( _CRT_ASSERT, _CRTDBG_MODE_FILE | _CRTDBG_MODE_WNDW | _CRTDBG_MODE_DEBUG ); - ::_CrtSetReportMode( _CRT_ERROR, _CRTDBG_MODE_FILE | _CRTDBG_MODE_WNDW | _CRTDBG_MODE_DEBUG ); - ::_CrtSetReportMode( _CRT_WARN, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG ); - - ::_CrtSetReportFile( _CRT_ASSERT, hLogFile ); - ::_CrtSetReportFile( _CRT_ERROR, hLogFile ); - ::_CrtSetReportFile( _CRT_WARN, hLogFile ); - - int tmp = ::_CrtSetDbgFlag( _CRTDBG_REPORT_FLAG ); - tmp |= _CRTDBG_LEAK_CHECK_DF | _CRTDBG_ALLOC_MEM_DF | _CRTDBG_CHECK_ALWAYS_DF; - ::_CrtSetDbgFlag( tmp ); - - // By looking at the memory leak report that is generated by this debug heap, there is a number with - // {} brackets that indicates the incremental allocation number of that block. If you wish to set - // a breakpoint on that allocation number, put it in the _CrtSetBreakAlloc() call below, and the heap - // will issue a bp on the request, allowing you to look at the call stack - // ::_CrtSetBreakAlloc( 1833 ); - -#endif /* MEMORYREPORT */ - - // OpenCL state - cl_context context; - cl_command_queue queue; - cl_mem clMemBuffersIn [ 2 ] = { NULL, NULL }; - cl_mem clMemBuffersOut[ 2 ] = { NULL, NULL }; - std::vector< cl_device_id > device_id; - cl_event outEvent = NULL; - cl_device_type deviceType = CL_DEVICE_TYPE_DEFAULT; - cl_uint deviceGpuList = 0; // a bitmap set - - // FFT state - clAmdFftPlanHandle plHandle; - clAmdFftResultLocation place = CLFFT_INPLACE; - clAmdFftLayout inLayout = CLFFT_COMPLEX_INTERLEAVED; - clAmdFftLayout outLayout = CLFFT_COMPLEX_INTERLEAVED; - size_t clLengths[ 3 ]; - size_t clPadding[ 3 ] = {0, 0, 0, }; // *** TODO - size_t clStrides[ 4 ]; - cl_uint commandQueueFlags = 0; - size_t batchSize = 1; - - // Local Data - size_t buffSizeBytesIn = 0, buffSizeBytesOut = 0; - size_t fftVectorSize= 0, fftVectorSizePadded = 0, fftBatchSize = 0; - cl_uint nBuffersOut = 0; - cl_uint profileCount = 0; - clAmdFftDim dim = CLFFT_1D; - - // Initialize flags for FFT library - std::auto_ptr< clAmdFftSetupData > setupData( new clAmdFftSetupData ); - OPENCL_V_THROW( clAmdFftInitSetupData( setupData.get( ) ), - "clAmdFftInitSetupData failed" ); - - try - { - // Declare the supported options. - po::options_description desc( "clFFT client command line options" ); - desc.add_options() - ( "help,h", "produces this help message" ) - ( "version,v", "Print queryable version information from the clFFT library" ) - ( "clInfo,i", "Print queryable information of the OpenCL runtime" ) - ( "gpu,g", "Force instantiation of an OpenCL GPU device" ) - ( "gpu0", "Force instantiation of an OpenCL GPU device using gpu0" ) - ( "gpu1", "Force instantiation of an OpenCL GPU device using gpu1" ) - ( "gpu2", "Force instantiation of an OpenCL GPU device using gpu2" ) - ( "gpu3", "Force instantiation of an OpenCL GPU device using gpu3" ) - ( "cpu,c", "Force instantiation of an OpenCL CPU device" ) - ( "all,a", "Force instantiation of all OpenCL devices" ) - ( "outPlace,o", "Out of place FFT transform (default: in place)" ) - ( "dumpKernels,d", "FFT engine will dump generated OpenCL FFT kernels to disk (default: dump off)" ) - ( "lenX,x", po::value< size_t >( &clLengths[ 0 ] )->default_value( 1024 ), "Specify the length of the 1st dimension of a test array" ) - ( "lenY,y", po::value< size_t >( &clLengths[ 1 ] )->default_value( 1 ), "Specify the length of the 2nd dimension of a test array" ) - ( "lenZ,z", po::value< size_t >( &clLengths[ 2 ] )->default_value( 1 ), "Specify the length of the 3rd dimension of a test array" ) - ( "batchSize,b", po::value< size_t >( &batchSize )->default_value( 1 ), "If this value is greater than one, arrays will be used " ) - ( "profile,p", po::value< cl_uint >( &profileCount )->default_value( 1 ), "Time and report the kernel speed of the FFT (default: profiling off)" ) - ( "inLayout", po::value< clAmdFftLayout >( &inLayout )->default_value( CLFFT_COMPLEX_INTERLEAVED ), "Layout of input data:\n1) interleaved\n2) planar" ) - ( "outLayout", po::value< clAmdFftLayout >( &outLayout )->default_value( CLFFT_COMPLEX_INTERLEAVED ), "Layout of input data:\n1) interleaved\n2) planar" ) - ; - - po::variables_map vm; - po::store( po::parse_command_line( argc, argv, desc ), vm ); - po::notify( vm ); - - if( vm.count( "version" ) ) - { - const int indent = countOf( "clFFT client API version: " ); - tout << std::left << std::setw( indent ) << _T( "clFFT client API version: " ) - << clAmdFftVersionMajor << _T( "." ) - << clAmdFftVersionMinor << _T( "." ) - << clAmdFftVersionPatch << std::endl; - - cl_uint libMajor, libMinor, libPatch; - clAmdFftGetVersion( &libMajor, &libMinor, &libPatch ); - - tout << std::left << std::setw( indent ) << _T( "clFFT runtime version: " ) - << libMajor << _T( "." ) - << libMinor << _T( "." ) - << libPatch << std::endl << std::endl; - } - - if( vm.count( "help" ) ) - { - // This needs to be 'cout' as program-options does not support wcout yet - std::cout << desc << std::endl; - return 0; - } - - size_t mutex = ((vm.count( "gpu" ) > 0) ? 1 : 0) - | ((vm.count( "gpu0" ) > 0) ? 1 : 0) - | ((vm.count( "gpu1" ) > 0) ? 1 : 0) - | ((vm.count( "gpu2" ) > 0) ? 1 : 0) - | ((vm.count( "gpu3" ) > 0) ? 1 : 0) - | ((vm.count( "cpu" ) > 0) ? 2 : 0) - | ((vm.count( "all" ) > 0) ? 4 : 0); - if ((mutex & (mutex-1)) != 0) { - terr << _T("You have selected mutually-exclusive OpenCL device options:") << std::endl; - if (vm.count ( "gpu" ) > 0) terr << _T(" gpu,g Force instantiation of an OpenCL GPU device" ) << std::endl; - if (vm.count ( "gpu0" ) > 0) terr << _T(" gpu0 Force instantiation of an OpenCL GPU device using gpu0" ) << std::endl; - if (vm.count ( "gpu1" ) > 0) terr << _T(" gpu1 Force instantiation of an OpenCL GPU device using gpu1" ) << std::endl; - if (vm.count ( "gpu2" ) > 0) terr << _T(" gpu2 Force instantiation of an OpenCL GPU device using gpu2" ) << std::endl; - if (vm.count ( "gpu3" ) > 0) terr << _T(" gpu3 Force instantiation of an OpenCL GPU device using gpu3" ) << std::endl; - if (vm.count ( "cpu" ) > 0) terr << _T(" cpu,c Force instantiation of an OpenCL CPU device" ) << std::endl; - if (vm.count ( "all" ) > 0) terr << _T(" all,a Force instantiation of all OpenCL devices" ) << std::endl; - return 1; - } - - if( vm.count( "gpu" ) ) - { - deviceType = CL_DEVICE_TYPE_GPU; - deviceGpuList = ~0; - } - if( vm.count( "gpu0" ) ) - { - deviceType = CL_DEVICE_TYPE_GPU; - deviceGpuList |= 1; - } - if( vm.count( "gpu1" ) ) - { - deviceType = CL_DEVICE_TYPE_GPU; - deviceGpuList |= 2; - } - if( vm.count( "gpu2" ) ) - { - deviceType = CL_DEVICE_TYPE_GPU; - deviceGpuList |= 4; - } - if( vm.count( "gpu3" ) ) - { - deviceType = CL_DEVICE_TYPE_GPU; - deviceGpuList |= 8; - } - - if( vm.count( "cpu" ) ) - { - deviceType = CL_DEVICE_TYPE_CPU; - } - - if( vm.count( "all" ) ) - { - deviceType = CL_DEVICE_TYPE_ALL; - } - - bool printInfo = false; - if( vm.count( "clInfo" ) ) - { - printInfo = true; - } - - if( vm.count( "outPlace" ) ) - { - place = CLFFT_OUTOFPLACE; - } - - if( profileCount > 1 ) - { - commandQueueFlags |= CL_QUEUE_PROFILING_ENABLE; - } - - if( vm.count( "dumpKernels" ) ) - { - setupData->debugFlags |= CLFFT_DUMP_PROGRAMS; - } - - // Our command line does not specify what dimension FFT we wish to transform; we decode - // this from the lengths that the user specifies for X, Y, Z. A length of one means that - // The user does not want that dimension. - - for (unsigned u = 0; u < countOf(clLengths); ++u) { - if (0 != clLengths[u]) continue; - clLengths[u] = 1; - } - - dim = CLFFT_1D; - if( clLengths[ 1 ] > 1 ) - { - dim = CLFFT_2D; - } - if( clLengths[ 2 ] > 1 ) - { - dim = CLFFT_3D; - } - - clStrides[ 0 ] = 1; - clStrides[ 1 ] = clStrides[ 0 ] * (clLengths[ 0 ] + clPadding[ 0 ]); - clStrides[ 2 ] = clStrides[ 1 ] * (clLengths[ 1 ] + clPadding[ 1 ]); - clStrides[ 3 ] = clStrides[ 2 ] * (clLengths[ 2 ] + clPadding[ 2 ]); - - fftVectorSize = clLengths[ 0 ] * clLengths[ 1 ] * clLengths[ 2 ]; - fftVectorSizePadded = clStrides[ 3]; - fftBatchSize = fftVectorSizePadded * batchSize; - - switch( outLayout ) - { - case CLFFT_COMPLEX_INTERLEAVED: - nBuffersOut = 1; - buffSizeBytesOut = fftBatchSize * sizeof( std::complex< float > ); - break; - case CLFFT_COMPLEX_PLANAR: - nBuffersOut = 2; - buffSizeBytesOut = fftBatchSize * sizeof(float); - break; - } - - // Fill our input buffers depending on how we want - switch( inLayout ) - { - case CLFFT_COMPLEX_INTERLEAVED: - { - // This call creates our openCL context and sets up our devices; expected to throw on error - buffSizeBytesIn = fftBatchSize * sizeof( std::complex< float > ); - - device_id = initializeCL( deviceType, deviceGpuList, context, printInfo ); - createOpenCLCommandQueue( context, - commandQueueFlags, queue, - device_id, - buffSizeBytesIn, 1, clMemBuffersIn, - buffSizeBytesOut, nBuffersOut, clMemBuffersOut); - - std::vector< std::complex< float > > input( fftBatchSize ); - - // impulse test case - for( cl_uint i = 0; i < fftBatchSize; ++i ) - { - input[ i ] = 1; - } - - OPENCL_V_THROW( clEnqueueWriteBuffer( queue, clMemBuffersIn[ 0 ], CL_TRUE, 0, buffSizeBytesIn, &input[ 0 ], - 0, NULL, &outEvent ), - "clEnqueueWriteBuffer failed" ); - - //for( cl_uint i = 0; i < fftBatchSize; ++i ) - //{ - // input[ i ] = 1.23456f; - //} - - //OPENCL_V_THROW( clEnqueueWriteBuffer( queue, clMemBuffersOut[ 0 ], CL_TRUE, 0, buffSizeBytes, &input[ 0 ], - // 0, NULL, &outEvent ), - // "clEnqueueWriteBuffer failed" ); - } - break; - case CLFFT_COMPLEX_PLANAR: - { - // This call creates our openCL context and sets up our devices; expected to throw on error - buffSizeBytesIn = fftBatchSize * sizeof( float ); - - device_id = initializeCL( deviceType, deviceGpuList, context, printInfo ); - createOpenCLCommandQueue( context, - commandQueueFlags, queue, - device_id, - buffSizeBytesIn, 2, clMemBuffersIn, - buffSizeBytesOut, nBuffersOut, clMemBuffersOut); - - // Just initialize the input buffer to all 1's for now - std::vector< float > real( fftBatchSize ); - std::vector< float > imag( fftBatchSize ); - for( cl_uint i = 0; i < fftBatchSize; ++i ) - { - real[ i ] = 1; - imag[ i ] = 0; - } - - OPENCL_V_THROW( clEnqueueWriteBuffer( queue, clMemBuffersIn[ 0 ], CL_TRUE, 0, buffSizeBytesIn, &real[ 0 ], - 0, NULL, &outEvent ), - "clEnqueueWriteBuffer failed" ); - OPENCL_V_THROW( clEnqueueWriteBuffer( queue, clMemBuffersIn[ 1 ], CL_TRUE, 0, buffSizeBytesIn, &imag[ 0 ], - 0, NULL, &outEvent ), - "clEnqueueWriteBuffer failed" ); - } - break; - default: - { - throw std::runtime_error( "Input layout format not yet supported" ); - } - break; - } - - } - catch( std::exception& e ) - { - terr << _T( "clFFT error condition reported:" ) << std::endl << e.what() << std::endl; - return 1; - } - - // Performance Data - StatisticalTimer& sTimer = StatisticalTimer::getInstance( ); - sTimer.Reserve( 3, profileCount ); - sTimer.setNormalize( true ); - StatisticalTimer::sTimerID clFFTID = sTimer.getUniqueID( "clFFT", 0 ); - - OPENCL_V_THROW( clAmdFftSetup( setupData.get( ) ), "clAmdFftSetup failed" ); - - OPENCL_V_THROW( clAmdFftCreateDefaultPlan( &plHandle, context, dim, clLengths ), "clAmdFftCreateDefaultPlan failed" ); - - // Default plan creates a plan that expects an inPlace transform with interleaved complex numbers - OPENCL_V_THROW( clAmdFftSetResultLocation( plHandle, place ), "clAmdFftSetResultLocation failed" ); - OPENCL_V_THROW( clAmdFftSetLayout( plHandle, inLayout, outLayout ), "clAmdFftSetLayout failed" ); - OPENCL_V_THROW( clAmdFftSetPlanBatchSize( plHandle, batchSize ), "clAmdFftSetPlanBatchSize failed" ); - - if ((clPadding[ 0 ] | clPadding[ 1 ] | clPadding[ 2 ]) != 0) { - OPENCL_V_THROW (clAmdFftSetPlanInStride ( plHandle, dim, clStrides ), "clAmdFftSetPlanInStride failed" ); - OPENCL_V_THROW (clAmdFftSetPlanOutStride ( plHandle, dim, clStrides ), "clAmdFftSetPlanOutStride failed" ); - OPENCL_V_THROW (clAmdFftSetPlanDistance ( plHandle, clStrides[ dim ], clStrides[ dim ]), "clAmdFftSetPlanDistance failed" ); - } - - OPENCL_V_THROW( clAmdFftBakePlan( plHandle, 1, &queue, NULL, NULL ), "clAmdFftBakePlan failed" ); - - //get the buffersize - size_t buffersize=0; - OPENCL_V_THROW( clAmdFftGetTmpBufSize(plHandle, &buffersize ), "clAmdFftGetTmpBufSize failed" ); - - //allocate the intermediate buffer - cl_mem clMedBuffer=NULL; - - if (buffersize) - { - cl_int medstatus; - clMedBuffer = clCreateBuffer ( context, CL_MEM_READ_WRITE, buffersize, 0, &medstatus); - OPENCL_V_THROW( medstatus, "Creating intmediate Buffer failed" ); - } - - switch( inLayout ) - { - case CLFFT_COMPLEX_INTERLEAVED: - case CLFFT_COMPLEX_PLANAR: - break; - default: - // Don't recognize input layout - return CLFFT_INVALID_ARG_VALUE; - } - - switch( outLayout ) - { - case CLFFT_COMPLEX_INTERLEAVED: - case CLFFT_COMPLEX_PLANAR: - break; - default: - // Don't recognize output layout - return CLFFT_INVALID_ARG_VALUE; - } - - if (( place == CLFFT_INPLACE ) - && ( inLayout != outLayout )) { - switch( inLayout ) - { - case CLFFT_COMPLEX_INTERLEAVED: - { - assert (CLFFT_COMPLEX_PLANAR == outLayout); - throw std::runtime_error( "Cannot use the same buffer for interleaved->planar in-place transforms" ); - break; - } - case CLFFT_COMPLEX_PLANAR: - { - assert (CLFFT_COMPLEX_INTERLEAVED == outLayout); - throw std::runtime_error( "Cannot use the same buffer for planar->interleaved in-place transforms" ); - break; - } - } - } - - // Loop as many times as the user specifies to average out the timings - // - cl_mem * BuffersOut = ( place == CLFFT_INPLACE ) ? NULL : &clMemBuffersOut[ 0 ]; - sTimer.Start(clFFTID); - for( cl_uint i = 0; i < profileCount; ++i ) - { - OPENCL_V_THROW( clAmdFftEnqueueTransform( plHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, &outEvent, - &clMemBuffersIn[ 0 ], BuffersOut, clMedBuffer ), - "clAmdFftEnqueueTransform failed" ); - } - OPENCL_V_THROW( clFinish( queue ), "clFinish failed" ); - sTimer.Stop(clFFTID); - - if( commandQueueFlags & CL_QUEUE_PROFILING_ENABLE ) - { - // Remove all timings that are outside of 3 stddev; we ignore outliers to get a more consistent result - sTimer.pruneOutliers( 3.0 ); - - // windows frequency count is by seconds - double kernelExecTimeNs = sTimer.getAverageTime( clFFTID ) * 1e9/profileCount; - double kernelExecGflops = 5 * fftBatchSize * (log(static_cast<float>(fftVectorSize))/log(2.0f)) / static_cast< double >( kernelExecTimeNs ); - - tout << _T( "FFT kernel execution time < ns >: " ) << commatize ((unsigned long long) kernelExecTimeNs) << std::endl; - tout << _T( "FFT kernel execution Gflops < BatchSize*5*N*log2( N ) >: " ) << kernelExecGflops << std::endl; - } - sTimer.Reset( ); - - // Read and check output data - // This check is not valid if the FFT is executed multiple times inplace. - // - if (( place == CLFFT_OUTOFPLACE ) - || ( profileCount == 1)) - { - bool checkflag= false; - switch( outLayout ) - { - case CLFFT_COMPLEX_INTERLEAVED: - { - std::vector< std::complex< float > > output( fftBatchSize ); - - if( place == CLFFT_INPLACE ) - { - OPENCL_V_THROW( clEnqueueReadBuffer( queue, clMemBuffersIn[ 0 ], CL_TRUE, 0, buffSizeBytesIn, &output[ 0 ], - 0, NULL, NULL ), - "Reading the result buffer failed" ); - } - else - { - OPENCL_V_THROW( clEnqueueReadBuffer( queue, clMemBuffersOut[ 0 ], CL_TRUE, 0, buffSizeBytesOut, &output[ 0 ], - 0, NULL, NULL ), - "Reading the result buffer failed" ); - } - - //check output data - for( cl_uint i = 0; i < fftBatchSize; ++i ) - { - if (0 == (i % fftVectorSizePadded)) - { - if (output[i].real() != fftVectorSize) - { - checkflag = true; - break; - } - } - else - { - if (output[ i ].real() != 0) - { - checkflag = true; - break; - } - } - - if (output[ i ].imag() != 0) - { - checkflag = true; - break; - } - } - } - break; - case CLFFT_COMPLEX_PLANAR: - { - std::valarray< float > real( fftBatchSize ); - std::valarray< float > imag( fftBatchSize ); - - if( place == CLFFT_INPLACE ) - { - OPENCL_V_THROW( clEnqueueReadBuffer( queue, clMemBuffersIn[ 0 ], CL_TRUE, 0, buffSizeBytesIn, &real[ 0 ], - 0, NULL, NULL ), - "Reading the result buffer failed" ); - OPENCL_V_THROW( clEnqueueReadBuffer( queue, clMemBuffersIn[ 1 ], CL_TRUE, 0, buffSizeBytesIn, &imag[ 0 ], - 0, NULL, NULL ), - "Reading the result buffer failed" ); - } - else - { - OPENCL_V_THROW( clEnqueueReadBuffer( queue, clMemBuffersOut[ 0 ], CL_TRUE, 0, buffSizeBytesOut, &real[ 0 ], - 0, NULL, NULL ), - "Reading the result buffer failed" ); - OPENCL_V_THROW( clEnqueueReadBuffer( queue, clMemBuffersOut[ 1 ], CL_TRUE, 0, buffSizeBytesOut, &imag[ 0 ], - 0, NULL, NULL ), - "Reading the result buffer failed" ); - } - - // Check output data - // The output data might not be contiguous in the output buffer, if there - // is any padding in any dimension, so we need to access slices of the buffer. - // We treat the data buffers as arrays of 3D arrays in all cases. - // If this is a 1D test, then - // clLength[ 1 ] and clLength[ 2] will be 1. - // The first element of every 1D slice will be nonzero. - // If this is a 2D test, then - // clLength[ 2 ] will be a. - // The first elment of every 2D slice will be nonzero. - // If this is a 3D test, then - // The first element of every 3D slice will be nonzero. - // - for (unsigned ub = 0; ub < batchSize; ++ub) { - std::slice slice3D (ub * clStrides[ 3 ], clStrides[ 3 ], 1); - std::valarray<float> real3D (real[ slice3D ]); - for (unsigned uz = 0; uz < clLengths[2]; ++uz) { - std::slice slice2D (uz * clStrides[ 2 ], clStrides[ 2 ], 1); - std::valarray<float> real2D (real[ slice2D ]); - bool nzZ = (dim == CLFFT_3D) && (0 == uz); - for (unsigned uy = 0; uy < clLengths[1]; ++uy) { - std::slice slice1D (uy * clStrides[ 1], clStrides[ 1], 1); - std::valarray<float> real1D (real2D [ slice1D ]); - bool nzY = (nzZ || (dim == CLFFT_2D)) && (0 == uy); - for (unsigned ux = 0; ux < clLengths[0]; ++ux) { - bool nzX = (nzY || (dim == CLFFT_1D)) && (0 == ux); - float expected = nzX ? float (fftVectorSize) : 0.0f; - if (real1D[ux] != expected) - checkflag = true; - } - } - } - } - - ////check output data - //for( cl_uint i = 0; i < fftBatchSize; ++i ) - //{ - // if (0 == (i % fftVectorSizePadded)) - // { - // if (real[i] != fftVectorSize) - // { - // checkflag = true; - // break; - // } - // } - // else - // { - // if (real[i] != 0) - // { - // checkflag = true; - // break; - // } - // } - - // if (imag[i] != 0) - // { - // checkflag = true; - // break; - // } - //} - } - break; - default: - { - throw std::runtime_error( "Input layout format not yet supported" ); - } - break; - } - - if (checkflag) - { - std::cout << "\n\n\t\tClient Test *****FAIL*****" << std::endl; - } - else - { - std::cout << "\n\n\t\tClient Test *****PASS*****" << std::endl; - } - } - - OPENCL_V_THROW( clAmdFftDestroyPlan( &plHandle ), "clAmdFftDestroyPlan failed" ); - OPENCL_V_THROW( clAmdFftTeardown( ), "clAmdFftTeardown failed" ); - - cleanupCL( &context, &queue, countOf( clMemBuffersIn ), clMemBuffersIn, countOf( clMemBuffersOut ), clMemBuffersOut, &outEvent ); - - return 0; -} diff --git a/RTCP/Cobalt/clAmdFft/samples/clAmdFft.client.h b/RTCP/Cobalt/clAmdFft/samples/clAmdFft.client.h deleted file mode 100644 index 8d4ac80bbcb78849b758e7698b145667368b1cb4..0000000000000000000000000000000000000000 --- a/RTCP/Cobalt/clAmdFft/samples/clAmdFft.client.h +++ /dev/null @@ -1,13 +0,0 @@ -//////////////////////////////////////////// -// Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved. -//////////////////////////////////////////// - -#pragma once -#if !defined( CLIENT_H ) -#define CLIENT_H - -// Boost headers that we want to use -// #define BOOST_PROGRAM_OPTIONS_DYN_LINK -#include <boost/program_options.hpp> - -#endif diff --git a/RTCP/Cobalt/clAmdFft/samples/clAmdFft.h b/RTCP/Cobalt/clAmdFft/samples/clAmdFft.h deleted file mode 100644 index 73e65d0d349eeb24d7d27997d78abe43661a244a..0000000000000000000000000000000000000000 --- a/RTCP/Cobalt/clAmdFft/samples/clAmdFft.h +++ /dev/null @@ -1,573 +0,0 @@ -/*********************************************************************** -** Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved. -***********************************************************************/ - -/*! @file clAmdFft.h - * clAmdFft.h defines all of the public interfaces and types that are meant to be used by clFFT clients - * This is the one public header file that should be consumed by clFFT clients. It is written to adhere to native "C" - * interfaces to make clAmdFft library as portable as possible; it should be callable from C, C++, .NET and Fortran, - * either with the proper linking or using wrapper classes. - * - */ - -#pragma once -#if !defined( CLAMDFFT_DOTH ) -#define CLAMDFFT_DOTH - -#if defined(__APPLE__) || defined(__MACOSX) - #include <OpenCL/cl.h> -#else - #include <CL/cl.h> -#endif - -#include "clAmdFft.version.h" - -/*! This preprocessor definition is the standard way of making exporting APIs - * from a DLL simpler. All files within this DLL are compiled with the CLAMDFFT_EXPORTS - * symbol defined on the command line. This symbol should not be defined on any project - * that uses this DLL. This way any other project whose source files include this file see - * clAmdFft functions as being imported from a DLL, whereas this DLL sees symbols - * defined with this macro as being exported. - */ -#if defined( _WIN32 ) - #if !defined( __cplusplus ) - #define inline __inline - #endif - - #if defined( CLAMDFFT_EXPORTS ) - #define CLAMDFFTAPI __declspec( dllexport ) - #else - #define CLAMDFFTAPI __declspec( dllimport ) - #endif -#else - #define CLAMDFFTAPI -#endif - -/* In general, you can not use namespaces for strict C compliance, so we prefix our public accessible names - * with the string clAmdFft - */ - -/* All functions will return pre-defined error codes, and will NOT throw exceptions to the caller - */ - -/*! @brief clAmdFft error codes definition, incorporating OpenCL error definitions - * - * This enumeration is a superset of the OpenCL error codes. For example, CL_OUT_OF_HOST_MEMORY, - * which is defined in cl.h is aliased as CLFFT_OUT_OF_HOST_MEMORY. The set of basic OpenCL - * error codes is extended to add extra values specific to the clAmdFft package. - */ -enum clAmdFftStatus_ -{ - CLFFT_INVALID_GLOBAL_WORK_SIZE = CL_INVALID_GLOBAL_WORK_SIZE, - CLFFT_INVALID_MIP_LEVEL = CL_INVALID_MIP_LEVEL, - CLFFT_INVALID_BUFFER_SIZE = CL_INVALID_BUFFER_SIZE, - CLFFT_INVALID_GL_OBJECT = CL_INVALID_GL_OBJECT, - CLFFT_INVALID_OPERATION = CL_INVALID_OPERATION, - CLFFT_INVALID_EVENT = CL_INVALID_EVENT, - CLFFT_INVALID_EVENT_WAIT_LIST = CL_INVALID_EVENT_WAIT_LIST, - CLFFT_INVALID_GLOBAL_OFFSET = CL_INVALID_GLOBAL_OFFSET, - CLFFT_INVALID_WORK_ITEM_SIZE = CL_INVALID_WORK_ITEM_SIZE, - CLFFT_INVALID_WORK_GROUP_SIZE = CL_INVALID_WORK_GROUP_SIZE, - CLFFT_INVALID_WORK_DIMENSION = CL_INVALID_WORK_DIMENSION, - CLFFT_INVALID_KERNEL_ARGS = CL_INVALID_KERNEL_ARGS, - CLFFT_INVALID_ARG_SIZE = CL_INVALID_ARG_SIZE, - CLFFT_INVALID_ARG_VALUE = CL_INVALID_ARG_VALUE, - CLFFT_INVALID_ARG_INDEX = CL_INVALID_ARG_INDEX, - CLFFT_INVALID_KERNEL = CL_INVALID_KERNEL, - CLFFT_INVALID_KERNEL_DEFINITION = CL_INVALID_KERNEL_DEFINITION, - CLFFT_INVALID_KERNEL_NAME = CL_INVALID_KERNEL_NAME, - CLFFT_INVALID_PROGRAM_EXECUTABLE = CL_INVALID_PROGRAM_EXECUTABLE, - CLFFT_INVALID_PROGRAM = CL_INVALID_PROGRAM, - CLFFT_INVALID_BUILD_OPTIONS = CL_INVALID_BUILD_OPTIONS, - CLFFT_INVALID_BINARY = CL_INVALID_BINARY, - CLFFT_INVALID_SAMPLER = CL_INVALID_SAMPLER, - CLFFT_INVALID_IMAGE_SIZE = CL_INVALID_IMAGE_SIZE, - CLFFT_INVALID_IMAGE_FORMAT_DESCRIPTOR = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, - CLFFT_INVALID_MEM_OBJECT = CL_INVALID_MEM_OBJECT, - CLFFT_INVALID_HOST_PTR = CL_INVALID_HOST_PTR, - CLFFT_INVALID_COMMAND_QUEUE = CL_INVALID_COMMAND_QUEUE, - CLFFT_INVALID_QUEUE_PROPERTIES = CL_INVALID_QUEUE_PROPERTIES, - CLFFT_INVALID_CONTEXT = CL_INVALID_CONTEXT, - CLFFT_INVALID_DEVICE = CL_INVALID_DEVICE, - CLFFT_INVALID_PLATFORM = CL_INVALID_PLATFORM, - CLFFT_INVALID_DEVICE_TYPE = CL_INVALID_DEVICE_TYPE, - CLFFT_INVALID_VALUE = CL_INVALID_VALUE, - CLFFT_MAP_FAILURE = CL_MAP_FAILURE, - CLFFT_BUILD_PROGRAM_FAILURE = CL_BUILD_PROGRAM_FAILURE, - CLFFT_IMAGE_FORMAT_NOT_SUPPORTED = CL_IMAGE_FORMAT_NOT_SUPPORTED, - CLFFT_IMAGE_FORMAT_MISMATCH = CL_IMAGE_FORMAT_MISMATCH, - CLFFT_MEM_COPY_OVERLAP = CL_MEM_COPY_OVERLAP, - CLFFT_PROFILING_INFO_NOT_AVAILABLE = CL_PROFILING_INFO_NOT_AVAILABLE, - CLFFT_OUT_OF_HOST_MEMORY = CL_OUT_OF_HOST_MEMORY, - CLFFT_OUT_OF_RESOURCES = CL_OUT_OF_RESOURCES, - CLFFT_MEM_OBJECT_ALLOCATION_FAILURE = CL_MEM_OBJECT_ALLOCATION_FAILURE, - CLFFT_COMPILER_NOT_AVAILABLE = CL_COMPILER_NOT_AVAILABLE, - CLFFT_DEVICE_NOT_AVAILABLE = CL_DEVICE_NOT_AVAILABLE, - CLFFT_DEVICE_NOT_FOUND = CL_DEVICE_NOT_FOUND, - CLFFT_SUCCESS = CL_SUCCESS, - //-------------------------- Extended status codes for clAmdFft ---------------------------------------- - CLFFT_BUGCHECK = 4*1024, /*!< Bugcheck. */ - CLFFT_NOTIMPLEMENTED, /*!< Functionality is not implemented yet. */ - CLFFT_TRANSPOSED_NOTIMPLEMENTED, /*!< Transposed functionality is not implemented for this transformation. */ - CLFFT_FILE_NOT_FOUND, /*!< Tried to open an existing file on the host system, but failed. */ - CLFFT_FILE_CREATE_FAILURE, /*!< Tried to create a file on the host system, but failed. */ - CLFFT_VERSION_MISMATCH, /*!< Version conflict between client and library. */ - CLFFT_INVALID_PLAN, /*!< Requested plan could not be found. */ - CLFFT_DEVICE_NO_DOUBLE, /*!< Double precision not supported on this device. */ - CLFFT_ENDSTATUS /* This value will always be last, and marks the length of clAmdFftStatus. */ -}; -typedef enum clAmdFftStatus_ clAmdFftStatus; - -/*! @brief The dimension of the input and output buffers that will be fed into all FFT transforms */ -typedef enum clAmdFftDim_ -{ - CLFFT_1D = 1, /*!< 1 Dimensional FFT transform (default). */ - CLFFT_2D, /*!< 2 Dimensional FFT transform. */ - CLFFT_3D, /*!< 3 Dimensional FFT transform. */ - ENDDIMENSION /*!< This value will always be last, and marks the length of clAmdFftDim. */ -} clAmdFftDim; - -/*! @brief What are the expected layout's of the complex numbers <p> - * <b> For Release 1.0,</b> only the CLFFT_COMPLEX_INTERLEAVED and CLFFT_COMPLEX_PLANAR formats are supported. - * The real and hermitian formats should be supported in a future release. - */ -typedef enum clAmdFftLayout_ -{ - CLFFT_COMPLEX_INTERLEAVED = 1, /*!< An array of complex numbers, with real and imaginary components together (default). */ - CLFFT_COMPLEX_PLANAR, /*!< Arrays of real componets and arrays of imaginary components that have been seperated out. */ - CLFFT_HERMITIAN_INTERLEAVED, /*!< Compressed form of complex numbers; complex-conjugates not stored, real and imaginary components in same array. TODO: Document layout */ - CLFFT_HERMITIAN_PLANAR, /*!< Compressed form of complex numbers; complex-conjugates not stored, real and imaginary components in separate arrays. TODO: Document layout */ - CLFFT_REAL, /*!< An array of real numbers, with no corresponding imaginary components. */ - ENDLAYOUT /*!< This value will always be last, and marks the length of clAmdFftLayout. */ -} clAmdFftLayout; - -/*! @brief What is the expected precision of each FFT. - * @ref DistanceStridesandPitches - */ -typedef enum clAmdFftPrecision_ -{ - CLFFT_SINGLE = 1, /*!< An array of complex numbers, with real and imaginary components as floats (default). */ - CLFFT_DOUBLE, /*!< An array of complex numbers, with real and imaginary components as doubles. */ - CLFFT_SINGLE_FAST, /*!< Faster implementation preferred. */ - CLFFT_DOUBLE_FAST, /*!< Faster implementation preferred. */ - ENDPRECISION /*!< This value will always be last, and marks the length of clAmdFftPrecision. */ -} clAmdFftPrecision; - -/*! @brief What is the expected direction of each FFT, time or the frequency domains */ -typedef enum clAmdFftDirection_ -{ - CLFFT_FORWARD = -1, /*!< FFT transform from the time to the frequency domain. */ - CLFFT_BACKWARD = 1, /*!< FFT transform from the frequency to the time domain. */ - CLFFT_MINUS = -1, /*!< Alias for the forward transform. */ - CLFFT_PLUS = 1, /*!< Alias for the backward transform. */ - ENDDIRECTION /*!< This value will always be last, and marks the length of clAmdFftDirection. */ -} clAmdFftDirection; - -/*! @brief Are the input buffers overwritten with the results */ -typedef enum clAmdFftResultLocation_ -{ - CLFFT_INPLACE = 1, /*!< The input and output buffers are the same (default). */ - CLFFT_OUTOFPLACE, /*!< Seperate input and output buffers. */ - ENDPLACE /*!< This value will always be last, and marks the length of clAmdFftPlaceness. */ -} clAmdFftResultLocation; - -/*! @brief whether the result will be returned in original order; only valid for dimensions greater than 1 */ -typedef enum clAmdFftResultTransposed_ { - CLFFT_NOTRANSPOSE = 1, /*!< The results are returned in the original preserved order (default) */ - CLFFT_TRANSPOSED, /*!< The result is transposed where transpose kernel is supported (possibly faster) */ - ENDTRANSPOSED /*!< This value will always be last, and marks the length of clAmdFftResultTransposed */ -} clAmdFftResultTransposed; - -/*! BitMasks to be used with clAmdFftSetupData.debugFlags */ -#define CLFFT_DUMP_PROGRAMS 0x1 - -/*! @brief Data structure that can be passed to clAmdFftSetup() to control the behavior of the FFT runtime - * @details This structure contains values that can be initialized before instantiation of the FFT runtime - * with ::clAmdFftSetup(). To initialize this structure, pass a pointer to a user struct to ::clAmdFftInitSetupData( ), - * which will clear the structure and set the version member variables to the current values. - */ -struct clAmdFftSetupData_ -{ - cl_uint major; /*!< Major version number of the project; signifies major API changes. */ - cl_uint minor; /*!< Minor version number of the project; minor API changes that could break backwards compatibility. */ - cl_uint patch; /*!< Patch version number of the project; Always incrementing number, signifies change over time. */ - - /*! Bitwise flags that control the behavior of library debug logic. */ - cl_ulong debugFlags; /*! This should be set to zero, except when debugging the clAmdFft library. - * <p> debugFlags can be set to CLFFT_DUMP_PROGRAMS, in which case the dynamically generated OpenCL kernels will - * be written to text files in the current working directory. These files will have a *.cl suffix. - */ -}; -typedef struct clAmdFftSetupData_ clAmdFftSetupData; - -/*! @brief An abstract handle to the object that represents the state of the FFT(s) */ -typedef size_t clAmdFftPlanHandle; - -#ifdef __cplusplus -extern "C" { -#endif - /*! @brief Initialize an clAmdFftSetupData struct for the client - * @details clAmdFftSetupData is passed to clAmdFftSetup to control behavior of the FFT runtime - * @param[out] setupData Data structure is cleared, initialized with version information and default values - * @return Enum describing error condition; superset of OpenCL error codes - */ - inline clAmdFftStatus clAmdFftInitSetupData( clAmdFftSetupData* setupData ) - { - setupData->major = clAmdFftVersionMajor; - setupData->minor = clAmdFftVersionMinor; - setupData->patch = clAmdFftVersionPatch; - setupData->debugFlags = 0; - - return CLFFT_SUCCESS; - } - - /*! @brief Initialize internal FFT resources. - * @details AMD's FFT implementation caches kernels, programs and buffers for its internal use. - * @param[in] setupData Data structure that can be passed into the setup routine to control FFT generation behavior - * and debug functionality - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftSetup( const clAmdFftSetupData* setupData ); - - /*! @brief Release all internal resources. - * @details Call when client is done with this FFT library, allowing the library to destroy all resources it has cached - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftTeardown( ); - - /*! @brief Query the FFT library for version information - * @details Return the major, minor and patch version numbers associated with this FFT library - * @param[out] major Major functionality change - * @param[out] minor Minor functionality change - * @param[out] patch Bug fixes, documentation changes, no new features introduced - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetVersion( cl_uint* major, cl_uint* minor, cl_uint* patch ); - - /*! @brief Create a plan object initialized entirely with default values. - * @details A plan is a repository of state for calculating FFT's. Allows the runtime to pre-calculate kernels, programs - * and buffers and associate them with buffers of specified dimensions. - * @param[out] plHandle Handle to the newly created plan - * @param[in] context Client is responsible for providing an OpenCL context for the plan - * @param[in] dim The dimensionality of the FFT transform; describes how many elements are in the array - * @param[in] clLengths An array of lengths, of size 'dim'. Each value describes the length of additional dimensions - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftCreateDefaultPlan( clAmdFftPlanHandle* plHandle, cl_context context, const clAmdFftDim dim, - const size_t* clLengths ); - - /*! @brief Create a copy of an existing plan. - * @details This API allows a client to create a new plan based upon an existing plan. This is a convenience function - * provided for quickly creating plans that are similar, but may differ slightly. - * @param[out] out_plHandle Handle to the newly created plan that is based on in_plHandle - * @param[in] new_context Client is responsible for providing a new context for the new plan - * @param[in] in_plHandle Handle to a plan to be copied, previously created - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftCopyPlan( clAmdFftPlanHandle* out_plHandle, cl_context new_context, clAmdFftPlanHandle in_plHandle ); - - /*! @brief Prepare the plan for execution. - * @details After all plan parameters are set, the client has the option of 'baking' the plan, which tells the runtime that - * no more changes to the plan's parameters are expected, and the OpenCL kernels should be compiled. This optional function - * allows the client application to perform this function when the application is being initialized instead of on the first - * execution. - * At this point, the clAmdFft runtime will apply all implimented optimizations, possibly including - * running kernel experiments on the devices in the plan context. - * <p> Users should assume that this function will take a long time to execute. If a plan is not baked before being executed, - * users should assume that the first call to clAmdFftEnqueueTransform will take a long time to execute. - * <p> If any significant parameter of a plan is changed after the plan is baked (by a subsequent call to one of - * the clAmdFftSetPlan____ functions), that will not be considered an error. Instead, the plan will revert back to - * the unbaked state, discarding the benefits of the baking operation. - * @param[in] plHandle Handle to a plan previously created - * @param[in] numQueues Number of command queues in commQueueFFT; 0 is a valid value, in which case client does not want - * the runtime to run load experiments and only pre-calculate state information - * @param[in] commQueueFFT An array of cl_command_queues created by the client; the command queues must be a proper subset of - * the devices included in the plan context - * @param[in] pfn_notify A function pointer to a notification routine. The notification routine is a callback function that - * an application can register and which will be called when the program executable has been built (successfully or unsuccessfully) - * <b> For Release 1.0,</b> this parameter MUST be NULL or nullptr. - * @param[in] user_data Passed as an argument when pfn_notify is called. - * <b> For Release 1.0,</b> this parameter MUST be NULL or nullptr. - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftBakePlan( clAmdFftPlanHandle plHandle, cl_uint numQueues, cl_command_queue* commQueueFFT, - void (CL_CALLBACK *pfn_notify)(clAmdFftPlanHandle plHandle, void *user_data), void* user_data ); - - /*! @brief Release the resources of a plan. - * @details A plan may include kernels, programs and buffers associated with it that consume memory. When a plan - * is not needed anymore, the client should release the plan. - * @param[in,out] plHandle Handle to a plan previously created - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftDestroyPlan( clAmdFftPlanHandle* plHandle ); - - /*! @brief Retrieve the OpenCL context of a previously created plan. - * @details User should pass a reference to an cl_context variable, which will be changed to point to a - * context set in the specified plan. - * @param[in] plHandle Handle to a plan previously created - * @param[out] context Reference to user allocated cl_context, which will point to context set in plan - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanContext( const clAmdFftPlanHandle plHandle, cl_context* context ); - - /*! @brief Retrieve the floating point precision of the FFT data - * @details User should pass a reference to an clAmdFftPrecision variable, which will be set to the - * precision of the FFT complex data in the plan. - * @param[in] plHandle Handle to a plan previously created - * @param[out] precision Reference to user clAmdFftPrecision enum - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanPrecision( const clAmdFftPlanHandle plHandle, clAmdFftPrecision* precision ); - - /*! @brief Set the floating point precision of the FFT data - * @details Set the plan property which will be the precision of the FFT complex data in the plan. - * @param[in] plHandle Handle to a plan previously created - * @param[in] precision Reference to user clAmdFftPrecision enum <p> - * <b> For Release 1.0,</b> only CLFFT_SINGLE and CLFFT_SINGLE_FAST are supported. - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanPrecision( clAmdFftPlanHandle plHandle, clAmdFftPrecision precision ); - - /*! @brief Retrieve the scaling factor that should be applied to the FFT data - * @details User should pass a reference to an cl_float variable, which will be set to the - * floating point scaling factor that will be multiplied across the FFT data. - * @param[in] plHandle Handle to a plan previously created - * @param[in] dir Which direction does the scaling factor apply to - * @param[out] scale Reference to user cl_float variable - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanScale( const clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float* scale ); - - /*! @brief Set the scaling factor that should be applied to the FFT data - * @details Set the plan property which will be the floating point scaling factor that will be - * multiplied across the FFT data. - * @param[in] plHandle Handle to a plan previously created - * @param[in] dir Which direction does the scaling factor apply to - * @param[in] scale Reference to user cl_float variable - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanScale( clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float scale ); - - /*! @brief Retrieve the number of discrete arrays that this plan can handle concurrently - * @details User should pass a reference to an cl_uint variable, which will be set to the - * number of discrete arrays (1D or 2D) that will be batched together for this plan - * @param[in] plHandle Handle to a plan previously created - * @param[out] batchSize How many discrete number of FFT's are to be performed - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanBatchSize( const clAmdFftPlanHandle plHandle, size_t* batchSize ); - - /*! @brief Set the number of discrete arrays that this plan can handle concurrently - * @details Set the plan property which will be set to the number of discrete arrays (1D or 2D) - * that will be batched together for this plan - * @param[in] plHandle Handle to a plan previously created - * @param[in] batchSize How many discrete number of FFT's are to be performed - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanBatchSize( clAmdFftPlanHandle plHandle, size_t batchSize ); - - /*! @brief Retrieve the dimensionality of FFT's to be transformed in the plan - * @details Queries a plan object and retrieves the dimensionality that the plan is set for. A size is returned to - * help the client allocate the proper storage to hold the dimensions in a further call to clAmdFftGetPlanLength - * @param[in] plHandle Handle to a plan previously created - * @param[out] dim The dimensionality of the FFT's to be transformed - * @param[out] size Value used to allocate an array to hold the FFT dimensions. - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanDim( const clAmdFftPlanHandle plHandle, clAmdFftDim* dim, cl_uint* size ); - - /*! @brief Set the dimensionality of FFT's to be transformed by the plan - * @details Set the dimensionality of FFT's to be transformed by the plan - * @param[in] plHandle Handle to a plan previously created - * @param[in] dim The dimensionality of the FFT's to be transformed - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanDim( clAmdFftPlanHandle plHandle, const clAmdFftDim dim ); - - /*! @brief Retrieve the length of each dimension of the FFT - * @details User should pass a reference to a size_t array, which will be set to the - * length of each discrete dimension of the FFT - * @param[in] plHandle Handle to a plan previously created - * @param[in] dim The dimension of the length parameters; describes how many elements are in the array - * @param[out] clLengths An array of lengths, of size 'dim'. Each array value describes the length of each dimension - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanLength( const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clLengths ); - - /*! @brief Set the length of each dimension of the FFT - * @details Set the plan property which will be the length of each discrete dimension of the FFT - * @param[in] plHandle Handle to a plan previously created - * @param[in] dim The dimension of the length parameters; describes how many elements are in the array - * @param[in] clLengths An array of lengths, of size 'dim'. Each value describes the length of additional dimensions - * <p><b> For Release 1.0, </b> All lengths must be powers of 2. Non-power-of-two dimensions should be supported in a future release. - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanLength( clAmdFftPlanHandle plHandle, const clAmdFftDim dim, const size_t* clLengths ); - - /*! @brief Retrieve the distance between consecutive elements for input buffers in a dimension. - * @details Depending on how the dimension is set in the plan (for 2D or 3D FFT's), strideY or strideZ can be safely - * ignored - * @param[in] plHandle Handle to a plan previously created - * @param[in] dim The dimension of the stride parameters; describes how many elements are in the array - * @param[out] clStrides An array of strides, of size 'dim'. Usually strideX=1 so that successive elements in the first dimension are stored contiguously. - * Typically strideY=LenX, strideZ=LenX*LenY such that successive elements in the second and third dimensions are stored contiguously. - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanInStride( const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides ); - - /*! @brief Set the distance between consecutive elements for input buffers in a dimension. - * @details Set the plan properties which will be the distance between elements in a given dimension - * (units are in terms of clAmdFftPrecision) - * @param[in] plHandle Handle to a plan previously created - * @param[in] dim The dimension of the stride parameters; describes how many elements are in the array - * @param[in] clStrides An array of strides, of size 'dim'. - * See @ref DistanceStridesandPitches for details. - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanInStride( clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides ); - - /*! @brief Retrieve the distance between consecutive elements for output buffers in a dimension. - * @details Depending on how the dimension is set in the plan (for 2D or 3D FFT's), strideY or strideZ can be safely - * ignored - * @param[in] plHandle Handle to a plan previously created - * @param[in] dim The dimension of the stride parameters; describes how many elements are in the array - * @param[out] clStrides An array of strides, of size 'dim'. Usually strideX=1 so that successive elements in the first dimension are stored contiguously. - * Typically strideY=LenX, strideZ=LenX*LenYsuch that successive elements in the second and third dimensions are stored contiguously. - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanOutStride( const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides ); - - /*! @brief Set the distance between consecutive elements for output buffers in a dimension. - * @details Set the plan properties which will be the distance between elements in a given dimension - * (units are in terms of clAmdFftPrecision) - * @param[in] plHandle Handle to a plan previously created - * @param[in] dim The dimension of the stride parameters; describes how many elements are in the array - * @param[in] clStrides An array of strides, of size 'dim'. Usually strideX=1 so that successive elements in the first dimension are stored contiguously. - * Typically strideY=LenX, strideZ=LenX*LenY such that successive elements in the second and third dimensions are stored contiguously. - * @sa clAmdFftSetPlanInStride - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanOutStride( clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides ); - - /*! @brief Retrieve the distance between Array objects - * @details Pitch is the distance between each discrete array object in an FFT array. This is only used - * for 'array' dimensions in clAmdFftDim; see clAmdFftSetPlanDimension (units are in terms of clAmdFftPrecision) - * @param[in] plHandle Handle to a plan previously created - * @param[out] iDist The distance between the beginning elements of the discrete array objects in memory on input. - * For contiguous arrays in memory, iDist=(strideX*strideY*strideZ) - * @param[out] oDist The distance between the beginning elements of the discrete array objects in memory on output. - * For contiguous arrays in memory, oDist=(strideX*strideY*strideZ) - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanDistance( const clAmdFftPlanHandle plHandle, size_t* iDist, size_t* oDist ); - - /*! @brief Set the distance between Array objects - * @details Pitch is the distance between each discrete array object in an FFT array. This is only used - * for 'array' dimensions in clAmdFftDim; see clAmdFftSetPlanDimension (units are in terms of clAmdFftPrecision) - * @param[in] plHandle Handle to a plan previously created - * @param[out] iDist The distance between the beginning elements of the discrete array objects in memory on input. - * For contiguous arrays in memory, iDist=(strideX*strideY*strideZ) - * @param[out] oDist The distance between the beginning elements of the discrete array objects in memory on output. - * For contiguous arrays in memory, oDist=(strideX*strideY*strideZ) - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanDistance( clAmdFftPlanHandle plHandle, size_t iDist, size_t oDist ); - - /*! @brief Retrieve the expected layout of the input and output buffers - * @details Output buffers can be filled with either hermitian or complex numbers. Complex numbers can be stored - * in various layouts; this informs the FFT engine what layout to produce on output - * @param[in] plHandle Handle to a plan previously created - * @param[out] iLayout Indicates how the input buffers are laid out in memory - * @param[out] oLayout Indicates how the output buffers are laid out in memory - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetLayout( const clAmdFftPlanHandle plHandle, clAmdFftLayout* iLayout, clAmdFftLayout* oLayout ); - - /*! @brief Set the expected layout of the input and output buffers - * @details Output buffers can be filled with either hermitian or complex numbers. Complex numbers can be stored - * in various layouts; this informs the FFT engine what layout to produce on output - * @param[in] plHandle Handle to a plan previously created - * @param[in] iLayout Indicates how the input buffers are laid out in memory - * @param[in] oLayout Indicates how the output buffers are laid out in memory - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftSetLayout( clAmdFftPlanHandle plHandle, clAmdFftLayout iLayout, clAmdFftLayout oLayout ); - - /*! @brief Retrieve whether the input buffers are going to be overwritten with results - * @details If the setting is to do an in-place transform, the input buffers are overwritten with the results of the - * transform. If the setting is for out-of-place transforms, the engine knows to look for separate output buffers - * on the Enqueue call. - * @param[in] plHandle Handle to a plan previously created - * @param[out] placeness Tells the FFT engine to clobber the input buffers or to expect output buffers for results - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetResultLocation( const clAmdFftPlanHandle plHandle, clAmdFftResultLocation* placeness ); - - /*! @brief Set whether the input buffers are going to be overwritten with results - * @details If the setting is to do an in-place transform, the input buffers are overwritten with the results of the - * transform. If the setting is for out-of-place transforms, the engine knows to look for separate output buffers - * on the Enqueue call. - * @param[in] plHandle Handle to a plan previously created - * @param[in] placeness Tells the FFT engine to clobber the input buffers or to expect output buffers for results - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftSetResultLocation( clAmdFftPlanHandle plHandle, clAmdFftResultLocation placeness ); - - /*! @brief Retrieve the final transpose setting of a muti-dimensional FFT - * @details A multi-dimensional FFT typically transposes the data several times during calculation. If the client - * does not care about the final transpose to put data back in proper dimension, the final transpose can be skipped - * for possible speed improvements - * @param[in] plHandle Handle to a plan previously created - * @param[out] transposed Parameter specifies whether the final transpose can be skipped - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanTransposeResult( const clAmdFftPlanHandle plHandle, clAmdFftResultTransposed * transposed ); - - /*! @brief Set the final transpose setting of a muti-dimensional FFT - * @details A multi-dimensional FFT typically transposes the data several times during calculation. If the client - * does not care about the final transpose to put data back in proper dimension, the final transpose can be skipped - * for possible speed improvements - * @param[in] plHandle Handle to a plan previously created - * @param[in] transposed Parameter specifies whether the final transpose can be skipped - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanTransposeResult( clAmdFftPlanHandle plHandle, clAmdFftResultTransposed transposed ); - - - /*! @brief Get buffer size (in bytes), which may be needed internally for an intermediate buffer - * @details Very large FFT transforms may need multiple passes, and the operation would need a temporary buffer to hold - * intermediate results. This function is only valid after the plan is baked, otherwise an invalid operation error - * is returned. If buffersize returns as 0, the runtime needs no temporary buffer. - * @param[in] plHandle Handle to a plan previously created - * @param[out] buffersize Size in bytes for intermediate buffer - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftGetTmpBufSize( const clAmdFftPlanHandle plHandle, size_t* buffersize ); - - /*! @brief Enqueue an FFT transform operation, and return immediately (non-blocking) - * @details This transform API is specific to the interleaved complex format, taking an input buffer with real and imaginary - * components paired together, and outputting the results into an output buffer in the same format - * @param[in] plHandle Handle to a plan previously created - * @param[in] dir Forwards or backwards transform - * @param[in] numQueuesAndEvents Number of command queues in commQueues; number of expected events to be returned in outEvents - * @param[in] commQueues An array of cl_command_queues created by the client; the command queues must be a proper subset of - * the devices included in the plan context - * @param[in] numWaitEvents Specify the number of elements in the eventWaitList array - * @param[in] waitEvents Events that this transform should wait to complete before executing on the device - * @param[out] outEvents The runtime fills this array with events corresponding 1 to 1 with the input command queues passed - * in commQueues. This parameter can be NULL or nullptr, in which case client is not interested in receiving notifications - * when transforms are finished, otherwise if not NULL the client is responsible for allocating this array, with at least - * as many elements as specified in numQueuesAndEvents. - * @param[in] inputBuffers An array of cl_mem objects that contain data for processing by the FFT runtime. If the transform - * is in place, the FFT results will overwrite the input buffers - * @param[out] outputBuffers An array of cl_mem objects that will store the results of out of place transforms. If the transform - * is in place, this parameter may be NULL or nullptr. It is completely ignored - * @param[in] tmpBuffer A cl_mem object that is reserved as a temporary buffer for FFT processing. If clTmpBuffers is NULL or nullptr, - * and the runtime needs temporary storage, an internal temporary buffer will be created on the fly managed by the runtime. - * @return Enum describing error condition; superset of OpenCL error codes - */ - CLAMDFFTAPI clAmdFftStatus clAmdFftEnqueueTransform( - clAmdFftPlanHandle plHandle, - clAmdFftDirection dir, - cl_uint numQueuesAndEvents, - cl_command_queue* commQueues, - cl_uint numWaitEvents, - const cl_event* waitEvents, - cl_event* outEvents, - cl_mem* inputBuffers, - cl_mem* outputBuffers, - cl_mem tmpBuffer - ); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/RTCP/Cobalt/clAmdFft/samples/clAmdFft.openCL.cpp b/RTCP/Cobalt/clAmdFft/samples/clAmdFft.openCL.cpp deleted file mode 100644 index 5cba77240f0a93dda5a945efe34957a1a814c67f..0000000000000000000000000000000000000000 --- a/RTCP/Cobalt/clAmdFft/samples/clAmdFft.openCL.cpp +++ /dev/null @@ -1,522 +0,0 @@ -//////////////////////////////////////////// -// Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved. -//////////////////////////////////////////// - -// clAmdFft.opencl.cpp : Provides functions to set up openCL -// - -#include "stdafx.h" -#include <stdexcept> -#include <iomanip> -#include <sstream> -#include <cstring> -#include <vector> -#include "clAmdFft.h" -#include "clAmdFft.openCL.h" - -void prettyPrintPlatformInfo( const cl_platform_id& pId ) -{ - size_t platformProfileSize = 0; - OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_PROFILE, 0, NULL, &platformProfileSize ), - "Getting CL_PLATFORM_PROFILE Platform Info string size ( ::clGetPlatformInfo() )" ); - - std::vector< char > szPlatformProfile( platformProfileSize ); - OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_PROFILE, platformProfileSize, &szPlatformProfile[ 0 ], NULL), - "Getting CL_PLATFORM_PROFILE Platform Info string ( ::clGetPlatformInfo() )" ); - - size_t platformVersionSize = 0; - OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_VERSION, 0, NULL, &platformVersionSize ), - "Getting CL_PLATFORM_VERSION Platform Info string size ( ::clGetPlatformInfo() )" ); - - std::vector< char > szPlatformVersion( platformVersionSize ); - OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_VERSION, platformVersionSize, &szPlatformVersion[ 0 ], NULL), - "Getting CL_PLATFORM_VERSION Platform Info string ( ::clGetPlatformInfo() )" ); - - size_t platformNameSize = 0; - OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_NAME, 0, NULL, &platformNameSize ), - "Getting CL_PLATFORM_NAME Platform Info string size ( ::clGetPlatformInfo() )" ); - - std::vector< char > szPlatformName( platformNameSize ); - OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_NAME, platformNameSize, &szPlatformName[ 0 ], NULL), - "Getting CL_PLATFORM_NAME Platform Info string ( ::clGetPlatformInfo() )" ); - - size_t vendorStringSize = 0; - OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_VENDOR, 0, NULL, &vendorStringSize ), - "Getting CL_PLATFORM_VENDOR Platform Info string size ( ::clGetPlatformInfo() )" ); - - std::vector< char > szPlatformVendor( vendorStringSize ); - OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_VENDOR, vendorStringSize, &szPlatformVendor[ 0 ], NULL), - "Getting CL_PLATFORM_VENDOR Platform Info string ( ::clGetPlatformInfo() )" ); - - size_t platformExtensionsSize = 0; - OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_EXTENSIONS, 0, NULL, &platformExtensionsSize ), - "Getting CL_PLATFORM_EXTENSIONS Platform Info string size ( ::clGetPlatformInfo() )" ); - - std::vector< char > szPlatformExtensions( platformExtensionsSize ); - OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_EXTENSIONS, platformExtensionsSize, &szPlatformExtensions[ 0 ], NULL), - "Getting CL_PLATFORM_EXTENSIONS Platform Info string ( ::clGetPlatformInfo() )" ); - - const int indent = countOf( " CL_PLATFORM_EXTENSIONS: " ); - std::cout << std::left << std::setw( indent ) << " CL_PLATFORM_PROFILE: " << &szPlatformProfile[ 0 ] << std::endl; - std::cout << std::left << std::setw( indent ) << " CL_PLATFORM_VERSION: " << &szPlatformVersion[ 0 ] << std::endl; - std::cout << std::left << std::setw( indent ) << " CL_PLATFORM_NAME: " << &szPlatformName[ 0 ] << std::endl; - std::cout << std::left << std::setw( indent ) << " CL_PLATFORM_VENDOR: " << &szPlatformVendor[ 0 ] << std::endl; - std::cout << std::left << std::setw( indent ) << " CL_PLATFORM_EXTENSIONS: " << &szPlatformExtensions[ 0 ] << std::endl; - std::cout << std::right << std::endl; -} - -void prettyPrintDeviceInfo( const cl_device_id& dId ) -{ - size_t deviceNameSize = 0; - OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_NAME, 0, NULL, &deviceNameSize ), - "Getting CL_DEVICE_NAME Platform Info string size ( ::clGetDeviceInfo() )" ); - - std::vector< char > szDeviceName( deviceNameSize ); - OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_NAME, deviceNameSize, &szDeviceName[ 0 ], NULL ), - "Getting CL_DEVICE_NAME Platform Info string ( ::clGetDeviceInfo() )" ); - - size_t deviceVersionSize = 0; - OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_VERSION, 0, NULL, &deviceVersionSize ), - "Getting CL_DEVICE_VERSION Platform Info string size ( ::clGetDeviceInfo() )" ); - - std::vector< char > szDeviceVersion( deviceVersionSize ); - OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_VERSION, deviceVersionSize, &szDeviceVersion[ 0 ], NULL ), - "Getting CL_DEVICE_VERSION Platform Info string ( ::clGetDeviceInfo() )" ); - - size_t driverVersionSize = 0; - OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DRIVER_VERSION, 0, NULL, &driverVersionSize ), - "Getting CL_DRIVER_VERSION Platform Info string size ( ::clGetDeviceInfo() )" ); - - std::vector< char > szDriverVersion( driverVersionSize ); - OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DRIVER_VERSION, driverVersionSize, &szDriverVersion[ 0 ], NULL ), - "Getting CL_DRIVER_VERSION Platform Info string ( ::clGetDeviceInfo() )" ); - - size_t openCLVersionSize = 0; - OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &openCLVersionSize ), - "Getting CL_DEVICE_OPENCL_C_VERSION Platform Info string size ( ::clGetDeviceInfo() )" ); - - std::vector< char > szOpenCLVersion( openCLVersionSize ); - OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_OPENCL_C_VERSION, openCLVersionSize, &szOpenCLVersion[ 0 ], NULL ), - "Getting CL_DEVICE_OPENCL_C_VERSION Platform Info string ( ::clGetDeviceInfo() )" ); - - cl_device_type devType = CL_DEVICE_TYPE_DEFAULT; - OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_TYPE, sizeof( cl_device_type ), &devType, NULL ), - "Getting CL_DEVICE_TYPE device info ( ::clGetDeviceInfo() )" ); - - cl_uint devAddrBits = 0; - OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_ADDRESS_BITS, sizeof( cl_uint ), &devAddrBits, NULL ), - "Getting CL_DEVICE_ADDRESS_BITS device info ( ::clGetDeviceInfo() )" ); - - cl_uint maxClockFreq = 0; - OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof( cl_uint ), &maxClockFreq, NULL ), - "Getting CL_DEVICE_MAX_CLOCK_FREQUENCY device info ( ::clGetDeviceInfo() )" ); - - cl_bool devAvailable = CL_FALSE; - OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_AVAILABLE, sizeof( cl_bool ), &devAvailable, NULL ), - "Getting CL_DEVICE_AVAILABLE device info ( ::clGetDeviceInfo() )" ); - - cl_bool devCompAvailable = CL_FALSE; - OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_COMPILER_AVAILABLE, sizeof( cl_bool ), &devCompAvailable, NULL ), - "Getting CL_DEVICE_COMPILER_AVAILABLE device info ( ::clGetDeviceInfo() )" ); - - size_t devMaxWorkGroup = 0; - OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof( size_t ), &devMaxWorkGroup, NULL ), - "Getting CL_DEVICE_MAX_WORK_GROUP_SIZE device info ( ::clGetDeviceInfo() )" ); - - cl_uint devMaxWorkItemDim = CL_FALSE; - OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( cl_uint ), &devMaxWorkItemDim, NULL ), - "Getting CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS device info ( ::clGetDeviceInfo() )" ); - - std::vector< size_t > devMaxWorkItemSizes( devMaxWorkItemDim ); - OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof( size_t )*devMaxWorkItemSizes.size( ), &devMaxWorkItemSizes[0], NULL), - "Getting CL_DEVICE_MAX_WORK_ITEM_SIZES device info ( ::clGetDeviceInfo() )" ); - - cl_bool deviceHostUnified = 0; - OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof( cl_bool ), &deviceHostUnified, NULL ), - "Getting CL_DEVICE_HOST_UNIFIED_MEMORY Platform Info string ( ::clGetDeviceInfo() )" ); - - cl_ulong devMaxConstantBuffer = 0; - OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( cl_ulong ), &devMaxConstantBuffer, NULL ), - "Getting CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE device info ( ::clGetDeviceInfo() )" ); - - cl_ulong devLocalMemSize = 0; - OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( cl_ulong ), &devLocalMemSize, NULL ), - "Getting CL_DEVICE_LOCAL_MEM_SIZE device info ( ::clGetDeviceInfo() )" ); - - cl_ulong deviceGlobalMemSize = 0; - OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( cl_ulong ), &deviceGlobalMemSize, NULL ), - "Getting CL_DEVICE_GLOBAL_MEM_SIZE device info ( ::clGetDeviceInfo() )" ); - - cl_ulong deviceMaxMemAllocSize = 0; - OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( cl_ulong ), &deviceMaxMemAllocSize, NULL ), - "Getting CL_DEVICE_MAX_MEM_ALLOC_SIZE device info ( ::clGetDeviceInfo() )" ); - - size_t deviceExtSize = 0; - OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_EXTENSIONS, 0, NULL, &deviceExtSize ), - "Getting CL_DEVICE_EXTENSIONS Platform Info string size ( ::clGetDeviceInfo() )" ); - - std::vector< char > szDeviceExt( deviceExtSize ); - OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_EXTENSIONS, deviceExtSize, &szDeviceExt[ 0 ], NULL ), - "Getting CL_DEVICE_EXTENSIONS Platform Info string ( ::clGetDeviceInfo() )" ); - - const int indent = countOf( " CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: " ); - std::cout << std::left << std::setw( indent ) << " CL_DEVICE_NAME: " << &szDeviceName[ 0 ] << std::endl; - std::cout << std::left << std::setw( indent ) << " CL_DEVICE_VERSION: " << &szDeviceVersion[ 0 ] << std::endl; - std::cout << std::left << std::setw( indent ) << " CL_DRIVER_VERSION: " << &szDriverVersion[ 0 ] << std::endl; - std::cout << std::left << std::setw( indent ) << " CL_DEVICE_TYPE: " - << (CL_DEVICE_TYPE_DEFAULT & devType ? "default" : "") - << (CL_DEVICE_TYPE_CPU & devType ? "CPU" : "") - << (CL_DEVICE_TYPE_GPU & devType ? "GPU" : "") - << (CL_DEVICE_TYPE_ACCELERATOR & devType ? "Accelerator" : "") - << std::endl; - std::cout << std::left << std::setw( indent ) << " CL_DEVICE_MAX_CLOCK_FREQUENCY: " << maxClockFreq << std::endl; - std::cout << std::left << std::setw( indent ) << " CL_DEVICE_ADDRESS_BITS: " << devAddrBits << std::endl; - std::cout << std::left << std::setw( indent ) << " CL_DEVICE_AVAILABLE: " << ( devAvailable ? "TRUE": "FALSE") << std::endl; - std::cout << std::left << std::setw( indent ) << " CL_DEVICE_COMPILER_AVAILABLE: " << ( devCompAvailable ? "TRUE": "FALSE") << std::endl; - std::cout << std::left << std::setw( indent ) << " CL_DEVICE_OPENCL_C_VERSION: " << &szOpenCLVersion[ 0 ] << std::endl; - std::cout << std::left << std::setw( indent ) << " CL_DEVICE_MAX_WORK_GROUP_SIZE: " << devMaxWorkGroup << std::endl; - std::cout << std::left << std::setw( indent ) << " CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: " << devMaxWorkItemDim << std::endl; - for( cl_uint wis = 0; wis < devMaxWorkItemSizes.size( ); ++wis ) - { - std::stringstream dimString; - dimString << "Dimension[ " << wis << " ] "; - std::cout << std::right << std::setw( indent ) << dimString.str( ) << devMaxWorkItemSizes[wis] << std::endl; - } - std::cout << std::left << std::setw( indent ) << " CL_DEVICE_HOST_UNIFIED_MEMORY: " << ( deviceHostUnified ? "TRUE": "FALSE") << std::endl; - std::cout << std::left << std::setw( indent ) << " CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: " << devMaxConstantBuffer; - std::cout << " ( " << devMaxConstantBuffer / 1024 << " KB )" << std::endl; - std::cout << std::left << std::setw( indent ) << " CL_DEVICE_LOCAL_MEM_SIZE: " << devLocalMemSize; - std::cout << " ( " << devLocalMemSize / 1024 << " KB )" << std::endl; - std::cout << std::left << std::setw( indent ) << " CL_DEVICE_GLOBAL_MEM_SIZE: " << deviceGlobalMemSize; - std::cout << " ( " << deviceGlobalMemSize / 1048576 << " MB )" << std::endl; - std::cout << std::left << std::setw( indent ) << " CL_DEVICE_MAX_MEM_ALLOC_SIZE: " << deviceMaxMemAllocSize; - std::cout << " ( " << deviceMaxMemAllocSize / 1048576 << " MB )" << std::endl; - std::cout << std::left << std::setw( indent ) << " CL_DEVICE_EXTENSIONS: " << &szDeviceExt[ 0 ] << std::endl; - - std::cout << std::right << std::endl; -} - -// Verify a failed condition; return true on fail -inline cl_bool OPENCL_V_FAIL( cl_int res ) -{ - if( res == CL_SUCCESS ) - return CL_FALSE; - else - return CL_TRUE; -} - -std::string prettyPrintclFFTStatus( const cl_int& status ) -{ - switch( status ) - { - case CLFFT_INVALID_GLOBAL_WORK_SIZE: - return "CLFFT_INVALID_GLOBAL_WORK_SIZE"; - case CLFFT_INVALID_MIP_LEVEL: - return "CLFFT_INVALID_MIP_LEVEL"; - case CLFFT_INVALID_BUFFER_SIZE: - return "CLFFT_INVALID_BUFFER_SIZE"; - case CLFFT_INVALID_GL_OBJECT: - return "CLFFT_INVALID_GL_OBJECT"; - case CLFFT_INVALID_OPERATION: - return "CLFFT_INVALID_OPERATION"; - case CLFFT_INVALID_EVENT: - return "CLFFT_INVALID_EVENT"; - case CLFFT_INVALID_EVENT_WAIT_LIST: - return "CLFFT_INVALID_EVENT_WAIT_LIST"; - case CLFFT_INVALID_GLOBAL_OFFSET: - return "CLFFT_INVALID_GLOBAL_OFFSET"; - case CLFFT_INVALID_WORK_ITEM_SIZE: - return "CLFFT_INVALID_WORK_ITEM_SIZE"; - case CLFFT_INVALID_WORK_GROUP_SIZE: - return "CLFFT_INVALID_WORK_GROUP_SIZE"; - case CLFFT_INVALID_WORK_DIMENSION: - return "CLFFT_INVALID_WORK_DIMENSION"; - case CLFFT_INVALID_KERNEL_ARGS: - return "CLFFT_INVALID_KERNEL_ARGS"; - case CLFFT_INVALID_ARG_SIZE: - return "CLFFT_INVALID_ARG_SIZE"; - case CLFFT_INVALID_ARG_VALUE: - return "CLFFT_INVALID_ARG_VALUE"; - case CLFFT_INVALID_ARG_INDEX: - return "CLFFT_INVALID_ARG_INDEX"; - case CLFFT_INVALID_KERNEL: - return "CLFFT_INVALID_KERNEL"; - case CLFFT_INVALID_KERNEL_DEFINITION: - return "CLFFT_INVALID_KERNEL_DEFINITION"; - case CLFFT_INVALID_KERNEL_NAME: - return "CLFFT_INVALID_KERNEL_NAME"; - case CLFFT_INVALID_PROGRAM_EXECUTABLE: - return "CLFFT_INVALID_PROGRAM_EXECUTABLE"; - case CLFFT_INVALID_PROGRAM: - return "CLFFT_INVALID_PROGRAM"; - case CLFFT_INVALID_BUILD_OPTIONS: - return "CLFFT_INVALID_BUILD_OPTIONS"; - case CLFFT_INVALID_BINARY: - return "CLFFT_INVALID_BINARY"; - case CLFFT_INVALID_SAMPLER: - return "CLFFT_INVALID_SAMPLER"; - case CLFFT_INVALID_IMAGE_SIZE: - return "CLFFT_INVALID_IMAGE_SIZE"; - case CLFFT_INVALID_IMAGE_FORMAT_DESCRIPTOR: - return "CLFFT_INVALID_IMAGE_FORMAT_DESCRIPTOR"; - case CLFFT_INVALID_MEM_OBJECT: - return "CLFFT_INVALID_MEM_OBJECT"; - case CLFFT_INVALID_HOST_PTR: - return "CLFFT_INVALID_HOST_PTR"; - case CLFFT_INVALID_COMMAND_QUEUE: - return "CLFFT_INVALID_COMMAND_QUEUE"; - case CLFFT_INVALID_QUEUE_PROPERTIES: - return "CLFFT_INVALID_QUEUE_PROPERTIES"; - case CLFFT_INVALID_CONTEXT: - return "CLFFT_INVALID_CONTEXT"; - case CLFFT_INVALID_DEVICE: - return "CLFFT_INVALID_DEVICE"; - case CLFFT_INVALID_PLATFORM: - return "CLFFT_INVALID_PLATFORM"; - case CLFFT_INVALID_DEVICE_TYPE: - return "CLFFT_INVALID_DEVICE_TYPE"; - case CLFFT_INVALID_VALUE: - return "CLFFT_INVALID_VALUE"; - case CLFFT_MAP_FAILURE: - return "CLFFT_MAP_FAILURE"; - case CLFFT_BUILD_PROGRAM_FAILURE: - return "CLFFT_BUILD_PROGRAM_FAILURE"; - case CLFFT_IMAGE_FORMAT_NOT_SUPPORTED: - return "CLFFT_IMAGE_FORMAT_NOT_SUPPORTED"; - case CLFFT_IMAGE_FORMAT_MISMATCH: - return "CLFFT_IMAGE_FORMAT_MISMATCH"; - case CLFFT_MEM_COPY_OVERLAP: - return "CLFFT_MEM_COPY_OVERLAP"; - case CLFFT_PROFILING_INFO_NOT_AVAILABLE: - return "CLFFT_PROFILING_INFO_NOT_AVAILABLE"; - case CLFFT_OUT_OF_HOST_MEMORY: - return "CLFFT_OUT_OF_HOST_MEMORY"; - case CLFFT_OUT_OF_RESOURCES: - return "CLFFT_OUT_OF_RESOURCES"; - case CLFFT_MEM_OBJECT_ALLOCATION_FAILURE: - return "CLFFT_MEM_OBJECT_ALLOCATION_FAILURE"; - case CLFFT_COMPILER_NOT_AVAILABLE: - return "CLFFT_COMPILER_NOT_AVAILABLE"; - case CLFFT_DEVICE_NOT_AVAILABLE: - return "CLFFT_DEVICE_NOT_AVAILABLE"; - case CLFFT_DEVICE_NOT_FOUND: - return "CLFFT_DEVICE_NOT_FOUND"; - case CLFFT_SUCCESS: - return "CLFFT_SUCCESS"; - case CLFFT_NOTIMPLEMENTED: - return "CLFFT_NOTIMPLEMENTED"; - case CLFFT_FILE_NOT_FOUND: - return "CLFFT_FILE_NOT_FOUND"; - case CLFFT_FILE_CREATE_FAILURE: - return "CLFFT_FILE_CREATE_FAILURE"; - case CLFFT_VERSION_MISMATCH: - return "CLFFT_VERSION_MISMATCH"; - case CLFFT_INVALID_PLAN: - return "CLFFT_INVALID_PLAN"; - default: - return "Error code not defined"; - break; - } -} - -std::vector< cl_device_id > initializeCL( cl_device_type deviceType, - cl_uint deviceGpuList, - cl_context& context, - bool printclInfo ) -{ - cl_int status = 0; - - /* - * Have a look at the available platforms and pick either - * the AMD one if available or a reasonable default. - */ - - cl_uint numPlatforms = 0; - cl_platform_id platform = NULL; - OPENCL_V_THROW( ::clGetPlatformIDs( 0, NULL, &numPlatforms ), - "Getting number of platforms( ::clGetPlatformsIDs() )" ); - - if( numPlatforms > 0 ) - { - std::vector< cl_platform_id > platforms( numPlatforms ); - OPENCL_V_THROW( ::clGetPlatformIDs( numPlatforms, &platforms[ 0 ], NULL ), - "Getting Platform Id's ( ::clGetPlatformsIDs() )" ); - - // TODO: How should we determine what platform to choose? We are just defaulting to the last one reported, as we - // print out the info - for( unsigned int i=0; i < numPlatforms; ++i ) - { - if( printclInfo ) - { - std::cout << "OpenCL platform [ " << i << " ]:" << std::endl; - prettyPrintPlatformInfo( platforms[i] ); - } - - platform = platforms[i]; - } - } - - if( NULL == platform ) - { - throw std::runtime_error( "No appropriate OpenCL platform could be found" ); - } - - /* - * If we could find our platform, use it. Otherwise use just available platform. - */ - - // Get the device list for this type. - // - cl_uint num_devices = 0; - OPENCL_V_THROW( ::clGetDeviceIDs( platform, deviceType, 0, NULL, &num_devices ), - "Getting OpenCL devices ( ::clGetDeviceIDs() )" ); - if( 0 == num_devices ) - { - OPENCL_V_THROW( CLFFT_DEVICE_NOT_AVAILABLE, "No devices available"); - } - - std::vector< cl_device_id > deviceIDs( num_devices ); - OPENCL_V_THROW( ::clGetDeviceIDs( platform, deviceType, num_devices, &deviceIDs[0], NULL), - "Getting OpenCL deviceIDs ( ::clGetDeviceIDs() )" ); - - if( (CL_DEVICE_TYPE_GPU == deviceType) && (~cl_uint(0) != deviceGpuList) ) - { - // The command line options specify to user certain gpu(s) - // - for( unsigned u = (unsigned) deviceIDs.size(); u-- > 0; ) - { - if( 0 != (deviceGpuList & (1<<u) ) ) - continue; - - // Remove this GPU from the list - deviceIDs[u] = deviceIDs.back(); - deviceIDs.pop_back(); - } - } - - if( 0 == deviceIDs.size( ) ) - { - OPENCL_V_THROW( CLFFT_DEVICE_NOT_AVAILABLE, "No devices available"); - } - - cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; - - ///////////////////////////////////////////////////////////////// - // Create an OpenCL context - ///////////////////////////////////////////////////////////////// - context = clCreateContext( cps, - (cl_uint) deviceIDs.size(), - & deviceIDs[0], - NULL, - NULL, - &status); - OPENCL_V_THROW( status, "Creating Context ( ::clCreateContextFromType() )" ); - - /* First, get the size of device list data */ - size_t deviceListSize; - OPENCL_V_THROW( ::clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &deviceListSize ), - "Getting device array size ( ::clGetContextInfo() )" ); - - ///////////////////////////////////////////////////////////////// - // Detect OpenCL devices - ///////////////////////////////////////////////////////////////// - std::vector< cl_device_id > devices( deviceListSize/sizeof( cl_device_id ) ); - - /* Now, get the device list data */ - OPENCL_V_THROW( ::clGetContextInfo( context, CL_CONTEXT_DEVICES, deviceListSize, &devices[ 0 ], NULL ), - "Getting device array ( ::clGetContextInfo() )" ); - - if( printclInfo ) - { - cl_uint cContextDevices = 0; - - size_t deviceVersionSize = 0; - OPENCL_V_THROW( ::clGetDeviceInfo( devices[0], CL_DEVICE_VERSION, 0, NULL, &deviceVersionSize ), - "Getting CL_DEVICE_VERSION Platform Info string size ( ::clGetDeviceInfo() )" ); - - std::vector< char > szDeviceVersion( deviceVersionSize ); - OPENCL_V_THROW( ::clGetDeviceInfo( devices[0], CL_DEVICE_VERSION, deviceVersionSize, &szDeviceVersion[ 0 ], NULL ), - "Getting CL_DEVICE_VERSION Platform Info string ( ::clGetDeviceInfo() )" ); - - char openclstr[11]="OpenCL 1.0"; - - if (!strncmp((const char*)&szDeviceVersion[ 0 ], openclstr, 10)) - { - cContextDevices = 1; - } - else - { - OPENCL_V_THROW( ::clGetContextInfo( context, CL_CONTEXT_NUM_DEVICES, sizeof( cContextDevices ), &cContextDevices, NULL ), - "Getting number of context devices ( ::clGetContextInfo() )" ); - } - - for( cl_uint i = 0; i < cContextDevices; ++i ) - { - std::cout << "OpenCL devices [ " << i << " ]:" << std::endl; - prettyPrintDeviceInfo( devices[i] ); - } - } - - return devices; -} - -int cleanupCL( cl_context* context, cl_command_queue* commandQueue, - const cl_uint numBuffersIn, cl_mem inputBuffer[], const cl_uint numBuffersOut, cl_mem outputBuffer[], cl_event* outEvent ) -{ - if( *outEvent != NULL ) - OPENCL_V_THROW( clReleaseEvent( *outEvent ), "Error: In clReleaseEvent\n" ); - - releaseOpenCLMemBuffer( numBuffersIn, inputBuffer); - releaseOpenCLMemBuffer( numBuffersOut, outputBuffer); - - if( *commandQueue != NULL ) - OPENCL_V_THROW( clReleaseCommandQueue( *commandQueue ), "Error: In clReleaseCommandQueue\n" ); - - if( *context != NULL ) - OPENCL_V_THROW( clReleaseContext( *context ), "Error: In clReleaseContext\n" ); - - return 0; -} - -int createOpenCLMemoryBuffer( cl_context& context, const size_t bufferSizeBytes, const cl_uint numBuffers, cl_mem buffer[], cl_mem_flags accessibility) { - cl_int status = 0; - - for( cl_uint i = 0; i < numBuffers; ++i ) - { - buffer[ i ] = ::clCreateBuffer( context, accessibility, bufferSizeBytes, NULL, &status); - OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer() )" ); - } - - return 0; -} - -int releaseOpenCLMemBuffer( const cl_uint numBuffers, cl_mem buffer[]) -{ - for( cl_uint i = 0; i < numBuffers; ++i ) - { - if( buffer[ i ] != NULL ) - OPENCL_V_THROW( clReleaseMemObject( buffer[ i ] ), "Error: In clReleaseMemObject\n" ); - } - - return 0; -} - -void createOpenCLCommandQueue( cl_context& context, - cl_uint commandQueueFlags, - cl_command_queue& commandQueue, - std::vector< cl_device_id > devices, - const size_t bufferSizeBytesIn, - const cl_uint numBuffersIn, - cl_mem clMemBufferIn[], - const size_t bufferSizeBytesOut, - const cl_uint numBuffersOut, - cl_mem clMemBufferOut[] ) -{ - cl_int status = 0; - commandQueue = ::clCreateCommandQueue( context, devices[0], commandQueueFlags, &status ); - OPENCL_V_THROW( status, "Creating Command Queue ( ::clCreateCommandQueue() )" ); - - createOpenCLMemoryBuffer( context, bufferSizeBytesIn, numBuffersIn, clMemBufferIn, CL_MEM_READ_WRITE); - createOpenCLMemoryBuffer( context, bufferSizeBytesOut, numBuffersOut, clMemBufferOut, CL_MEM_READ_WRITE); -} - diff --git a/RTCP/Cobalt/clAmdFft/samples/clAmdFft.openCL.h b/RTCP/Cobalt/clAmdFft/samples/clAmdFft.openCL.h deleted file mode 100644 index cb82e024b7ac0d442e42d5d6eac645f0b3fbf408..0000000000000000000000000000000000000000 --- a/RTCP/Cobalt/clAmdFft/samples/clAmdFft.openCL.h +++ /dev/null @@ -1,97 +0,0 @@ -//////////////////////////////////////////// -// Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved. -//////////////////////////////////////////// - -#pragma once -#if !defined( AMD_OPENCL_H ) -#define AMD_OPENCL_H -#include <memory> -#include <stdexcept> -#include "amd-unicode.h" - -// Creating a portable defintion of countof -#if defined( _WIN32 ) - #define countOf _countof -#else - #define countOf( arr ) ( sizeof( arr ) / sizeof( arr[ 0 ] ) ) -#endif - -/* - * \brief OpenCL related initialization (ripped from AMD stream sample code ) - * Create Context, Device list - * Load CL file, compile, link CL source - * Build program and kernel objects - */ -std::vector< cl_device_id > initializeCL( cl_device_type deviceType, - cl_uint deviceGpuList, - cl_context& context, - bool printclInfo ); - -/* - * \brief OpenCL memory buffer creation - */ -int createOpenCLMemoryBuffer( - cl_context& context, - const size_t bufferSizeBytes, - const cl_uint numBuffers, - cl_mem buffer[], - cl_mem_flags accessibility - ); - -/* - * \brief OpenCL command queue creation (ripped from AMD stream sample code ) - * Create Command Queue - * Create OpenCL memory buffer objects - */ -void createOpenCLCommandQueue( cl_context& context, - cl_uint commandQueueFlags, - cl_command_queue& commandQueue, - std::vector< cl_device_id > devices, - const size_t bufferSizeBytesIn, - const cl_uint numBuffersIn, - cl_mem clMemBufferIn[], - const size_t bufferSizeBytesOut, - const cl_uint numBuffersOut, - cl_mem clMemBufferOut[] ); - -/* - * \brief release OpenCL memory buffer - */ -int releaseOpenCLMemBuffer( const cl_uint numBuffers, cl_mem buffer[] ); - -std::string prettyPrintclFFTStatus( const cl_int& status ); - -// This is used to either wrap an OpenCL function call, or to explicitly check a variable for an OpenCL error condition. -// If an error occurs, we throw. -// Note: std::runtime_error does not take unicode strings as input, so only strings supported -inline cl_int OpenCL_V_Throw ( cl_int res, const std::string& msg, size_t lineno ) -{ - switch( res ) - { - case CL_SUCCESS: /**< No error */ - break; - default: - { - std::stringstream tmp; - tmp << "OPENCL_V_THROWERROR< "; - tmp << prettyPrintclFFTStatus( res ); - tmp << " > ("; - tmp << lineno; - tmp << "): "; - tmp << msg; - std::string errorm (tmp.str()); - std::cout << errorm<< std::endl; - throw std::runtime_error( errorm ); - } - } - - return res; -} -#define OPENCL_V_THROW(_status,_message) OpenCL_V_Throw (_status, _message, __LINE__) - -/* - * \brief Release OpenCL resources (Context, Memory etc.) (ripped from AMD stream sample code ) - */ -int cleanupCL( cl_context* context, cl_command_queue* commandQueue, const cl_uint numBuffersIn, cl_mem inputBuffer[], const cl_uint numBuffersOut, cl_mem outputBuffer[], cl_event* outEvent ); - -#endif diff --git a/RTCP/Cobalt/clAmdFft/samples/clMemcpy.cpp b/RTCP/Cobalt/clAmdFft/samples/clMemcpy.cpp deleted file mode 100644 index a1c63fa57807ebffb40a785d18b8caac7010d73b..0000000000000000000000000000000000000000 --- a/RTCP/Cobalt/clAmdFft/samples/clMemcpy.cpp +++ /dev/null @@ -1,998 +0,0 @@ -//////////////////////////////////////////// -// Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved. -//////////////////////////////////////////// - -// clAmdFft.clMemcpy.cpp : OpenCL memory copy kernel generator -// -// -// -//////////////////////////////////////////////////////////////////////////////// - -// TODO: Add 2d/tiled memory copies. - -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <math.h> -#include <CL/opencl.h> -#include <iostream> -#include <vector> -#include <time.h> - -#include <sstream> -#include <string> -using std::stringstream; - -#include <boost/program_options.hpp> -namespace po = boost::program_options; - -//#include "../statTimer/clAmdFft.statisticalTimer.extern.h" -//#include "../include/clAmdFft.sharedLibrary.h" - -#include "../../../common/statisticalTimer.h" - -#include "../../../common/amd-unicode.h" - -class clDataType{ -public: - virtual bool setSize(size_t size) = 0; // set size - virtual size_t getSize() = 0; // size in bytes - virtual size_t getTypeSize() = 0; // size of base type in bytes - virtual std::string getName() = 0; // get cl type name -}; - -class clFloat:public clDataType{ -public: - - clFloat() - { - clSize = 2; - } - - size_t getSize() - { - return clSize * sizeof(float); - } - - size_t getTypeSize() - { - return sizeof(float); - } - - std::string getName() - { - std::stringstream name; - name << "float"; - if(clSize > 1) - { - name << clSize; - } - - std::string ret = name.str().c_str(); - return ret; - } - - bool setSize(size_t size) - { - if(size < 0 || size > 16 || (size & (size - 1)) ) - { - return false; - } - else - { - clSize = size; - return true; - } - } - // The null kernel generator has its own special set of paramters - -private: - size_t clSize; -}; - -class clDouble:public clDataType{ -public: - - clDouble() - { - clSize = 1; - } - - size_t getSize() - { - return clSize * sizeof(double); - } - - size_t getTypeSize() - { - return sizeof(double); - } - - std::string getName() - { - std::stringstream name; - name << "double"; - if(clSize > 1) - { - name << clSize; - } - - std::string ret = name.str().c_str(); - return ret; - } - - bool setSize(size_t size) - { - if(size < 0 || size > 4 || (size & (size - 1))) - { - return false; - } - else - { - clSize = size; - return true; - } - } - // The null kernel generator has its own special set of paramters - -private: - size_t clSize; -}; - -// This is a helper function to query a device for it's caps and check whether a certain user supplied cap is present -// stolen from the clAmdRuntime library -bool checkDevExt( std::string cap, std::vector< cl_device_id >& devices ) -{ - for( size_t d = 0; d < devices.size( ); ++d) - { - size_t deviceExtSize = 0; - ::clGetDeviceInfo( devices[ d ], CL_DEVICE_EXTENSIONS, 0, NULL, &deviceExtSize ), - "Getting CL_DEVICE_EXTENSIONS Platform Info string size ( ::clGetDeviceInfo() )"; - - std::vector< char > szDeviceExt( deviceExtSize ); - ::clGetDeviceInfo( devices[ d ], CL_DEVICE_EXTENSIONS, deviceExtSize, &szDeviceExt[ 0 ], NULL ), - "Getting CL_DEVICE_EXTENSIONS Platform Info string ( ::clGetDeviceInfo() )"; - - std::string strDeviceExt = &szDeviceExt[ 0 ]; - - if( strDeviceExt.find( cap.c_str( ), 0 ) == std::string::npos ) - return 0; - } - - return true; -} - -#define INDENT " " - -// memcpy kernel generator, very simple -// - void GenerateMemcpyKernel (stringstream &ssn, const int registerCount, const int dumbyRegisterCount, const int workGroupSize , clDataType * clType, const bool useBarrier, int ldsPasses, const int dataItemCount, const int writeOnly, const int readOnly, const int memcpyOnly, const bool supportDoublePrecision) -{ -// kernel generator - dumb - - //std::stringstream ssn (std::stringstream::out); - static const bool first_choice = true; - - ssn << "//------------------------------\n" - "// !!!!!NULL Memcopy KERNEL!!!!\n\n"; - - // include double precision support - - if(supportDoublePrecision) - { - ssn<< "\n#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n\n"; - } - - // set the workgroup size to our specification, this will effect the number of wavefronts used - ssn << "__attribute__((reqd_work_group_size(" << workGroupSize << ",1,1)))\n" - << "__kernel void\n" - << "memcpy" << "(\n"; - - -// basically do inplace memcopy unless memcpyOnly is true, then do out of place - - if(!memcpyOnly) - { - ssn << INDENT "__global " << clType->getName() << " *gcomplx\n"; - ssn << ")\n{\n"; - } - else - { - ssn << INDENT "__global const " << clType->getName() << " *in,\n"; - ssn << INDENT "__global " << clType->getName() << " *out\n"; - ssn << ")\n{\n"; - - // a strict memcopy kernel does not require much, just code it here and return - - ssn << INDENT "int gid = get_global_id(0);\n"; - ssn << INDENT "out[gid] = in[gid];\n"; - ssn << INDENT "return;"; - ssn << "\n}\n"; - - return; - } - - - // create registers for kernel to use for memcopies - ssn << "\n" << clType->getName() << " R0"; - for(int i = 1; i < registerCount + dumbyRegisterCount; i++) - { - ssn << ",R" << i; - } - - ssn << ";\n"; - - // identifiers for local work item and global group id - ssn << "\nuint me = get_local_id(0);"; - ssn << "\nuint batch = get_group_id(0);"; - - ssn << "\nglobal "; - - // if read only kernel use const to disable read caching - if(writeOnly) - { - ssn << "const "; - } - - ssn << clType->getName() << "* gpc = gcomplx + me * " << registerCount << " + batch * " << registerCount * workGroupSize << ";"; - - if(ldsPasses > 0) - { - // allocate LDS - ssn << "\n__local " << clType->getName() << " ldsBuff[" << registerCount * workGroupSize << "];"; - ssn << "\n__local " << clType->getName() << "* lds = ldsBuff + me * " << registerCount << ";"; - } - - ssn << "\n"; - - // If write only kernel, don't read back regs to global memory - if(writeOnly || !readOnly) - { - // copy data from Global Memory to regs - for(int i = 0; i < registerCount; i++) - { - ssn << "\nR" << i << "= gpc[" << i << "];"; - } - } - - ssn << "\n"; - - // make number of LDS passes specified, copy from regs to lds back to regs - for(int j = 0; j < ldsPasses; j++) - { - - // copy data from regs to LDS - for(int i = 0; i < registerCount; i++) - { - - ssn << "\nlds[" << i << "] = R" << i << ";"; - } - - ssn << "\n"; - - // insert memory barrier - if(useBarrier == true) - { - ssn << "\nbarrier(CLK_LOCAL_MEM_FENCE);\n"; - } - - // copy data from LDS back to regs - for(int i = 0; i < registerCount; i++) - { - ssn << "\nR" << i << " = lds[" << (registerCount -1 ) - i << "];"; - } - - ssn << "\n"; - - } - - // if dumby registers are specified, just assign a value to them - // do some math - int rIndex = 0; - for(int i = registerCount; i < registerCount + dumbyRegisterCount; i++) - { - if( i == registerCount) - { - ssn << "\nR" << i << " = R0 * 3.1459;"; - // ssn << "\nR0 = R" << i << ";"; // write results to R0 to kee it from being optimized out - } - else - { - ssn << "\nR" << i << " = R" << i <<" + R" << i - 1 << " * 3.1459;"; - } - - ssn << "\nR" << rIndex <<" = R" << i << ";"; - rIndex ++; - if(rIndex >= registerCount) - { - rIndex = 0; - } - } - - ssn << "\n"; - - // if readonly or not a writeonly kernel copy registers back to global memory - if(readOnly || !writeOnly) - { - for(int i = 0; i < registerCount; i++) - { - ssn << "\ngpc[" << i << "] = R" << i << ";"; - } - } - ssn << "\n}\n"; -} - -// http://cottonvibes.blogspot.com/2011/01/dynamically-allocate-aligned-memory.html -// Alignment must be power of 2 (1,2,4,8,16...2^15) -void* aligned_malloc(size_t size, size_t alignment) { - assert(alignment <= 0x8000); - uintptr_t r = (uintptr_t)malloc(size + --alignment + 2); - uintptr_t o = (r + 2 + alignment) & ~(uintptr_t)alignment; - if (!r) return NULL; - ((uint16_t*)o)[-1] = (uint16_t)(o-r); - return (void*)o; -} - -void aligned_free(void* p) { - if (!p) return; - free((void*)((uintptr_t)p-((uint16_t*)p)[-1])); -} - - -int main(int argc, char** argv) -{ - int err; // error code returned from api calls - - size_t global; // global domain size for our calculation - size_t local; // local domain size for our calculation - cl_platform_id platform; - cl_device_id device_id; // compute device id - cl_uint platforms; - cl_context context; // compute context - cl_command_queue commands; // compute command queue - cl_program program; // compute program - cl_kernel kernel; // compute kernel - - cl_mem input; // device memory used for the input array - cl_mem output; // device memory used for the output array for strict memcopy kernel - - cl_device_type deviceType = CL_DEVICE_TYPE_GPU; // make the GPU the default device type - - int workgroupSize = 0; // workgroup size / number of work items per wavefront - int registerCount = 16; // registers allocated in kernels for memcopy operations - int dumbyRegisterCount = 16; // registers allocated, but not used for memcopies - int dataItemCount = 0; // total number of items (type float,float2,4) to copy to/from OpenCL device - int dataItemCountEnd = 0; // total number of items (type float,float2,4) to copy to/from OpenCL device - int ldsPasses = 1; // number of 'passes' copying data to/from LDS - clDataType * clType; // default float type to use - bool useBarrier = true; // include memory barrier in kernels - bool memcpyOnly = false; // if true, creates strict memcopy kernels, not registers allocated (in CL code) - bool writeOnly = false; // only perform write operations - bool readOnly = false; // only perform read operations. - bool bDisableOptimization = false; // disable OpenCL compiler optimizations if true - bool bDoublePrecision = false; - bool bZeroMemcopy = false; // if true, host memory is used by GPU - - cl_ulong start = 0; // profiling start and end times - cl_ulong end = 0; - - clFloat lFloat; - clDouble lDouble; - clType = &lFloat; // float is default - - try - { - // Declare the supported options. - po::options_description desc( "clMemcpy client command line options" ); - desc.add_options() - ( "help,h", "produces this help message" ) - ( "version,v", "Print out build date/version" ) - ( "gpu,g", "Force instantiation of an OpenCL GPU device" ) - ( "cpu,c", "Force instantiation of an OpenCL CPU device" ) - ( "float,f", po::value< int >(), "Float type to use in kernels, 1,2,4,8,16 (default: float2)" ) - ( "double,d", po::value< int >(), "Use double type to use in kernels, 1,2,4 (default: double 1)" ) - ( "regs,r", po::value< int >( ®isterCount )->default_value( 16 ), "Specify number of registers to use in kernels (default: 16)" ) - ( "dumbyRegs,q", po::value< int >( &dumbyRegisterCount )->default_value( 0 ), "Specify number 'dumby registers' to allocate in kernels" ) - ( "memcpyOnly,m", "Generate strict memcopy kernel (default: false)" ) - ( "itemCount,i", po::value< int >( &dataItemCount )->default_value( 0 ), "Number of items to transfer (default: max allocatable)" ) - ( "itemCountEnd,j", po::value< int >( &dataItemCountEnd )->default_value( 0 ), "End of item count, start at i go to j in powers of 2." ) - ( "ldsPasses,l", po::value< int >( &ldsPasses )->default_value( 1 ), "Number of 'passes' using LDS (default: 1, 0 = no LDS used)" ) - ( "barrier,b", po::value< bool >( &useBarrier )->default_value( true ), "Include memory barrier in kernel" ) - ( "writeOnly,x", "Write only kernels (default: false)" ) - ( "readOnly,y", "Read only kernels (default: false" ) - ( "disableOptimization,n", "Disable OpenCL compiler optimizations (default: false" ) - ( "zeroMemcopy,z", "Use zero memcopy kernels, only valid on APUs (default 0)" ) - ( "workgroupSize,w", po::value< int >( &workgroupSize )->default_value( 64 ), "Workgroup size (default 64)" ) - ; - - po::variables_map vm; - po::store( po::parse_command_line( argc, argv, desc ), vm ); - po::notify( vm ); - - stringstream str; - - if( vm.count( "version" ) ) - { - str << "clMemcopy version: " << __DATE__ << " " << __TIME__ <<std::endl; - std::cout << str.str(); - str.str() = ""; - return 0; - } - - if( vm.count( "help" ) ) - { - // This needs to be 'cout' as program-options does not support wcout yet - std::cout << desc << std::endl; - return 0; - } - - size_t mutex = ((vm.count( "gpu" ) > 0) ? 1 : 0) - | ((vm.count( "cpu" ) > 0) ? 2 : 0); - if ((mutex & (mutex-1)) != 0) { - str << "You have selected mutually-exclusive OpenCL device options:" << std::endl; - if (vm.count ( "gpu" ) > 0) str << " gpu,g Force instantiation of an OpenCL GPU device" << std::endl; - if (vm.count ( "cpu" ) > 0) str << " cpu,c Force instantiation of an OpenCL CPU device" << std::endl; - { - std::cout << str.str(); - return 1; - } - } - - mutex = ((vm.count( "writeOnly" ) > 0) ? 1 : 0) - | ((vm.count( "readOnly" ) > 0) ? 2 : 0); - if ((mutex & (mutex-1)) != 0) { - str << "You have selected mutually-exclusive OpenCL device options:" << std::endl; - if (vm.count ( "writeOnly" ) > 0) str << " writeOnly,x Generate write only kernels" << std::endl; - if (vm.count ( "readOnly" ) > 0) str << " readOnly,y Generate read only kernels" << std::endl; - { - std::cout << str.str(); - return 1; - } - } - - if( vm.count( "gpu" ) ) - { - deviceType = CL_DEVICE_TYPE_GPU; - } - - if( vm.count( "cpu" ) ) - { - deviceType = CL_DEVICE_TYPE_CPU; - } - - if( vm.count( "writeOnly" ) ) - { - writeOnly = true; - } - - if( vm.count( "readOnly" ) ) - { - readOnly = true; - } - - if( vm.count( "zeroMemcopy" ) ) - { - bZeroMemcopy = true; - } - - int typeCount = 0; - - if( vm.count( "float" ) ) - { - if(!clType->setSize(vm["float"].as<int>())) - { - std::cout << "Float (float,-f) type must be 1,2,4,8, or 16."; - return 1; - } - typeCount ++; - } - - if( vm.count( "double" ) ) - { - clType = &lDouble; - if(!clType->setSize(vm["double"].as<int>())) - { - std::cout << "Double (double,-d) type must be 1, or 2."; - return 1; - } - bDoublePrecision = true; - typeCount ++; - } - - if(typeCount > 1) - { - std::cout << "Only one register type may be specified (Float,Double)."; - return 1; - } - - if( vm.count( "memcpyOnly" ) ) - { - memcpyOnly = true; - registerCount = 1; - } - - if( vm.count( "disableOptimization" ) ) - { - bDisableOptimization = true; - } - - if(workgroupSize < 1) - { - printf("Error: workgroup size can not be 0"); - return 1; - } - - // if the register count is < 1, it's a pure memcpy kernel - if(registerCount < 1) - { - registerCount = 1; - memcpyOnly = true; - } - - } - catch( std::exception& e ) - { - std::cout << "clMemcopy error condition reported:" << std::endl << e.what() << std::endl; - return 1; - } - - // enumerate platforms to see if anything is available. - // - err=clGetPlatformIDs(1, &platform, &platforms); - if (err != CL_SUCCESS) - { - printf("Error: Failed to get a platform.!\n"); - return EXIT_FAILURE; - } - - // Connect to a compute device - // - err = clGetDeviceIDs(platform, deviceType, 1, &device_id, NULL); - if (err != CL_SUCCESS) - { - printf("Error: Failed to create a device group!\n"); - return EXIT_FAILURE; - } - - // Create a compute context - // - context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); - if (!context) - { - printf("Error: Failed to create a compute context!\n"); - return EXIT_FAILURE; - } - - // Create a command commands - // - commands = clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &err); - if (!commands) - { - printf("Error: Failed to create a command commands!\n"); - return EXIT_FAILURE; - } - - // find how much global memory can safely be allocated - // - cl_ulong maxMemAlloc = 0; - err = clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE,sizeof(cl_ulong), &maxMemAlloc, NULL); - - if (err != CL_SUCCESS) - { - printf("Error: Failed to read MAX_MEM_ALLOC_SIZE from device!\n"); - return EXIT_FAILURE; - } - - // find how much local memory can safely be allocated - // - cl_ulong maxLocalMemAlloc = 0; - err = clGetDeviceInfo(device_id, CL_DEVICE_LOCAL_MEM_SIZE,sizeof(cl_ulong), &maxLocalMemAlloc, NULL); - - if (err != CL_SUCCESS) - { - printf("Error: Failed to read CL_DEVICE_LOCAL_MEM_SIZE from device!\n"); - return EXIT_FAILURE; - } - - // check if double precision is supported - // If the user specifies double precision, check that the device supports double precision first - if( bDoublePrecision ) - { - std::vector< cl_device_id > dev; - dev.push_back(device_id); - - bool retAmdFp64 = checkDevExt( "cl_amd_fp64", dev ); - if( retAmdFp64 != true ) - { - // If AMD's extention is not supported, check for Khronos extention - bool retKhrFp64 = checkDevExt( "cl_khr_fp64", dev ); - if( retKhrFp64 != true ) - { - printf("Error: Device %d does not support double precission\n", device_id); - return EXIT_FAILURE; - } - } - } - - - do - { - - // generate a kernel - // - stringstream kernelSource; - - GenerateMemcpyKernel(kernelSource, registerCount, dumbyRegisterCount, workgroupSize, clType, useBarrier, ldsPasses, dataItemCount, writeOnly, readOnly, memcpyOnly, bDoublePrecision); - - if( !dataItemCountEnd ) // - { - printf("\n%s\n", kernelSource.str().c_str()); - } - // calculate how many data items we want to move, float1,2,4 - // - if(dataItemCount == 0) - { - if( memcpyOnly ) - { - maxMemAlloc /= 2; // need two buffers - } - dataItemCount = (int)(maxMemAlloc / (clType->getSize())); - dataItemCount /= registerCount * workgroupSize; - dataItemCount *= registerCount * workgroupSize; - } - - - // Fill our data set with random float values - // - - void* data = aligned_malloc(clType->getSize() * dataItemCount, 256); // original data set given to device - if(data == NULL) - { - printf("Error: Failed allcating host data buffer!\n"); - return EXIT_FAILURE; - } - - srand ( (unsigned int) time(NULL) ); - for(int i = 0; i < dataItemCount * clType->getSize(); i++) - { - *((char *)data + i) = rand() / (char)RAND_MAX; - } - - // Create the compute program from the source buffer - // - std::string stringKern = kernelSource.str(); - const char *charKern = stringKern.c_str(); - program = clCreateProgramWithSource(context, 1, (const char **) &charKern, NULL, &err); - if (!program) - { - printf("Error: Failed to create compute program!\n"); - return EXIT_FAILURE; - } - - // Build the program executable - // - if(bDisableOptimization) - { - err = clBuildProgram(program, 0, NULL, "-g -cl-opt-disable", NULL, NULL); - } - else - { - err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); - } - - if (err != CL_SUCCESS) - { - size_t len; - char buffer[2048]; - - printf("Error: Failed to build program executable!\n"); - clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); - printf("%s\n", buffer); - exit(1); - } - - // Create the compute kernel in the program we wish to run - // - kernel = clCreateKernel(program, "memcpy", &err); - if (!kernel || err != CL_SUCCESS) - { - printf("Error: Failed to create compute kernel!\n"); - exit(1); - } - /* - // Discover and load the timer module if present - void* timerLibHandle = LoadSharedLibrary( "lib", "clAmdFft.StatTimer", false ); - if( timerLibHandle == NULL ) - { - terr << _T( "Could not find the external timing library; timings disabled" ) << std::endl; - } - - - // Timer module discovered and loaded successfully - // Initialize function pointers to call into the shared module - PFGETSTATTIMER get_timer = reinterpret_cast< PFGETSTATTIMER > ( LoadFunctionAddr( timerLibHandle, "getStatTimer" ) ); - - // Create and initialize our timer class, if the external timer shared library loaded - baseStatTimer* timer = NULL; - */ - size_t writeTimer,readTimer,executeTimer = 0; - StatisticalTimer &timer = StatisticalTimer::getInstance(); - - - - // timer->setNormalize( true ); - timer.Reserve( 3, 1 ); - - writeTimer = timer.getUniqueID( "write", 0 ); - readTimer = timer.getUniqueID( "read", 1 ); - executeTimer = timer.getUniqueID( "execute", 2); - - - // Create the input and output arrays in device memory for our calculation - // - - cl_mem_flags memFlags = CL_MEM_READ_ONLY; - void *hostPtr = NULL; - void *hostPtrOut = NULL; // use to map point to output buffer for memcopy only kernels - - // this option will only work on APUs same physical memory is used by host and device - if(bZeroMemcopy) - { - memFlags |= CL_MEM_ALLOC_HOST_PTR; - // memFlags |= CL_MEM_USE_PERSISTENT_MEM_AMD; - } - - input = clCreateBuffer(context, memFlags, clType->getSize() * dataItemCount, NULL, NULL); - - if (!input) - { - printf("Error: Failed to allocate device memory!!\n"); - exit(1); - } - - if(memcpyOnly) - { - output = clCreateBuffer(context, memFlags, clType->getSize() * dataItemCount, NULL, NULL); - if (!output) - { - printf("Error: Failed to allocate device memory!\n"); - exit(1); - } - } - - - - if( bZeroMemcopy ) - { - // err = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, clType->getSize() * dataItemCount, data, 0, NULL, NULL); //test - // if( err != CL_SUCCESS ) - // { - // printf("Error: Failed to copy host buffer to cl buffer (zero memcopy)!\n"); - // return EXIT_FAILURE; - // } - - hostPtr = clEnqueueMapBuffer(commands, input, CL_TRUE, CL_MAP_WRITE, 0, clType->getSize() * dataItemCount, 0, NULL, NULL, &err); - if( err != CL_SUCCESS ) - { - printf("Error: Failed to map host pointer to zero memcopy buffer!\n"); - return EXIT_FAILURE; - } - - if(memcpyOnly) - { - hostPtrOut = clEnqueueMapBuffer(commands, output, CL_TRUE, CL_MAP_WRITE, 0, clType->getSize() * dataItemCount, 0, NULL, NULL, &err); - if( err != CL_SUCCESS ) - { - printf("Error: Failed to map host pointer to zero memcopy buffer!\n"); - return EXIT_FAILURE; - } - } - - // start timing writing to buffer (device or zero mem copy) - timer.Start(writeTimer); - - memcpy( hostPtr, data, clType->getSize() * dataItemCount); - } - else - { - // start timing writing to buffer (device or zero mem copy) - timer.Start(writeTimer); - } - - // Write our data set into the input array in device memory - // - if( !bZeroMemcopy ) - { - cl_event eventKernelTiming; // for timing - - err = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, clType->getSize() * dataItemCount, data, 0, NULL, &eventKernelTiming); - if (err != CL_SUCCESS) - { - printf("Error: Failed to write to source array!\n"); - exit(1); - } - clFinish(commands); - - - clGetEventProfilingInfo(eventKernelTiming, CL_PROFILING_COMMAND_START, - sizeof(start), &start, NULL); - - clGetEventProfilingInfo(eventKernelTiming, CL_PROFILING_COMMAND_END, - sizeof(end), &end, NULL); - - cl_ulong diff = end-start; - - if( !dataItemCountEnd) - { - printf("\nbuffer write GPU timer %lld",diff); - } - } - - timer.Stop(writeTimer); - - // Set the arguments to our compute kernel - // - - err = 0; - err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input); - - if(memcpyOnly) - { - err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &output); - } - - // err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output); - // err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count); - if (err != CL_SUCCESS) - { - printf("Error: Failed to set kernel arguments! %d\n", err); - exit(1); - } - - - // Get the maximum work group size for executing the kernel on the device - // - err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL); - if (err != CL_SUCCESS) - { - printf("Error: Failed to retrieve kernel work group info! %d\n", err); - exit(1); - } - - // Execute the kernel over the entire range of our 1d input data set - // using the maximum number of work group items for this device - // - if(!memcpyOnly) - { - global = dataItemCount / registerCount; - } - else - { - global = dataItemCount; - } - - if(workgroupSize < local && workgroupSize != 0) - { - local = workgroupSize; - } - if(workgroupSize > local) - { - printf("Error: Max supported workgroup size is %d, requested was %d", (unsigned int)local, workgroupSize); - exit(1); - } - - cl_event eventKernelTiming; // for timing - - timer.Start(executeTimer); // measure kernel execution time - - err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, &eventKernelTiming); - - // Wait for the command commands to get serviced before reading back results - // - - //clWaitForEvents(1, &eventGlobal); - clFinish(commands); - - timer.Stop(executeTimer); // end of kernel execution - - clGetEventProfilingInfo(eventKernelTiming, CL_PROFILING_COMMAND_START, - sizeof(start), &start, NULL); - - clGetEventProfilingInfo(eventKernelTiming, CL_PROFILING_COMMAND_END, - sizeof(end), &end, NULL); - - clReleaseEvent(eventKernelTiming); - - if (err) - { - printf("Error: Failed to execute kernel!\n"); - return EXIT_FAILURE; - } - - timer.Start(readTimer); // measure time to read back from memory - - // Read back the results from the device to verify the output - // - - if( !bZeroMemcopy ) - { - err = clEnqueueReadBuffer( commands, input, CL_TRUE, 0, clType->getSize() * dataItemCount, data, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - printf("Error: Failed to read output array! %d\n", err); - exit(1); - } - } - - timer.Stop(readTimer); - - cl_ulong time = end - start; /* Convert nanoseconds to msecs */ - - // Calculate gflops - - cl_ulong dataTransferred = dataItemCount * clType->getSize(); - - int multiplier = 2; - if(readOnly || writeOnly) - { - multiplier = 1; - } - - int fftlen = (int)(local * registerCount * clType->getSize()/clType->getTypeSize()) / 2; - double gflops = (global/local) * 5 * fftlen * ( log( static_cast< double >( fftlen ) ) / log( 2.0 ) ) / time; - double MBps = (double)(multiplier * (double)(dataTransferred) / time); - - if( !dataItemCountEnd ) - { - printf("\nTicks= %ld\nTransfer= %ld bytes\nbandwidth= %lf GB/S", time , dataTransferred, MBps); - - if(!memcpyOnly) - { - printf("\nType = %s\nfftlen=%d\nGflops %lf\n",clType->getName().c_str(), fftlen, gflops); - } - } - // Shutdown and cleanup - // - - if(bZeroMemcopy) - { - err = clEnqueueUnmapMemObject(commands, input, hostPtr, 0, 0, 0); - if(memcpyOnly) - { - err = clEnqueueUnmapMemObject(commands, output, hostPtrOut, 0, 0, 0); - } - - if(err != CL_SUCCESS) - { - printf("Error: Failed to unmap memory objects!\n"); - return EXIT_FAILURE; - } - } - - clReleaseMemObject(input); - if( memcpyOnly ) - { - clReleaseMemObject(output); - } - - if( data ) - { - aligned_free( data ); - } - - printf("\n%10ld,\t%f,\t%f,\t%f,\t%f,",dataTransferred, timer.getMinimumTime(writeTimer) ,timer.getMinimumTime(executeTimer),timer.getMinimumTime(readTimer),timer.getMinimumTime(writeTimer) + timer.getMinimumTime(executeTimer) + timer.getMinimumTime(readTimer) ); - - clReleaseProgram(program); - clReleaseKernel(kernel); - - dataItemCount*= 2; -} while(dataItemCount <= dataItemCountEnd); - - - clReleaseCommandQueue(commands); - clReleaseContext(context); - - return 0; -} - diff --git a/RTCP/Cobalt/clAmdFft/samples/statisticalTimer.cpp b/RTCP/Cobalt/clAmdFft/samples/statisticalTimer.cpp deleted file mode 100644 index f9111f20069f499c230d9287924a55985da8258c..0000000000000000000000000000000000000000 --- a/RTCP/Cobalt/clAmdFft/samples/statisticalTimer.cpp +++ /dev/null @@ -1,328 +0,0 @@ -//////////////////////////////////////////// -// Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved. -//////////////////////////////////////////// - -// StatTimer.cpp : Defines the exported functions for the DLL application. -// - -#include "stdafx.h" -#include <iostream> -#include <string> -#include <cassert> -#include <limits> -#include <functional> -#include "statisticalTimer.h" - -#if defined( __GNUC__ ) - #include <sys/time.h> -#endif - -// Functor object to help with accumulating values in vectors -template< typename T > -struct Accumulator: public std::unary_function< T, void > -{ - T acc; - - Accumulator( ): acc( 0 ) {} - void operator( )(T x) { acc += x; } -}; - -// Unary predicate used for remove_if() algorithm -// Currently, RangeType is expected to be a floating point type, and ValType an integer type -template< typename RangeType, typename ValType > -struct PruneRange -{ - RangeType lower, upper; - - PruneRange( RangeType mean, RangeType stdev ): lower( mean-stdev ), upper( mean+stdev ) {} - - bool operator( )( ValType val ) - { - // These comparisons can be susceptible to signed/unsigned casting problems - // This is why we cast ValType to RangeType, because RangeType should always be floating and signed - if( static_cast< RangeType >( val ) < lower ) - return true; - else if( static_cast< RangeType >( val ) > upper ) - return true; - - return false; - } -}; - -StatisticalTimer& -StatisticalTimer::getInstance( ) -{ - static StatisticalTimer timer; - return timer; -} - -StatisticalTimer::StatisticalTimer( ): nEvents( 0 ), nSamples( 0 ), normalize( true ) -{ -#if defined( _WIN32 ) - // OS call to get ticks per second2 - ::QueryPerformanceFrequency( reinterpret_cast<LARGE_INTEGER*>( &clkFrequency ) ); -#else - clkFrequency = 1000000; -#endif -} - -StatisticalTimer::~StatisticalTimer( ) -{} - -void -StatisticalTimer::Clear( ) -{ - labelID.clear( ); - clkStart.clear( ); - clkTicks.clear( ); -} - -void -StatisticalTimer::Reset( ) -{ - if( nEvents == 0 || nSamples == 0 ) - throw std::runtime_error( "StatisticalTimer::Reserve( ) was not called before Reset( )" ); - - clkStart.clear( ); - clkTicks.clear( ); - - clkStart.resize( nEvents ); - clkTicks.resize( nEvents ); - - for( unsigned int i = 0; i < nEvents; ++i ) - { - clkTicks.at( i ).reserve( nSamples ); - } - - return; -} - -// The caller can pre-allocate memory, to improve performance. -// nEvents is an approximate value for how many seperate events the caller will think -// they will need, and nSamples is a hint on how many samples we think we will take -// per event -void -StatisticalTimer::Reserve( unsigned int nEvents, unsigned int nSamples ) -{ - this->nEvents = std::max<unsigned int> (1, nEvents); - this->nSamples = std::max<unsigned int> (1, nSamples); - - Clear( ); - labelID.reserve( nEvents ); - - clkStart.resize( nEvents ); - clkTicks.resize( nEvents ); - - for( unsigned int i = 0; i < nEvents; ++i ) - { - clkTicks.at( i ).reserve( nSamples ); - } -} - -void -StatisticalTimer::setNormalize( bool norm ) -{ - normalize = norm; -} - -void -StatisticalTimer::Start( sTimerID id ) -{ -#if defined( _WIN32 ) - ::QueryPerformanceCounter( reinterpret_cast<LARGE_INTEGER*>( &clkStart.at( id ) ) ); -#else - struct timeval s; - gettimeofday(&s, 0); - clkStart.at( id ) = (unsigned long long)s.tv_sec * 1000000 + (unsigned long long)s.tv_usec; -#endif -} - -void -StatisticalTimer::Stop( sTimerID id ) -{ - unsigned long long n; - -#if defined( _WIN32 ) - ::QueryPerformanceCounter( reinterpret_cast<LARGE_INTEGER*>( &n ) ); -#else - struct timeval s; - gettimeofday(&s, 0); - n = (unsigned long long)s.tv_sec * 1000000 + (unsigned long long)s.tv_usec; -#endif - - n -= clkStart.at( id ); - clkStart.at( id ) = 0; - AddSample( id, n ); -} - -void -StatisticalTimer::AddSample( const sTimerID id, const unsigned long long n ) -{ - clkTicks.at( id ).push_back( n ); -} - -// This function's purpose is to provide a mapping from a 'friendly' human readable text string -// to an index into internal data structures. -StatisticalTimer::sTimerID -StatisticalTimer::getUniqueID( const std::string& label, unsigned int groupID ) -{ - // I expect labelID will hardly ever grow beyond 30, so it's not of any use - // to keep this sorted and do a binary search - - labelPair sItem = std::make_pair( label, groupID ); - - stringVector::iterator iter; - iter = std::find( labelID.begin(), labelID.end(), sItem ); - - if( iter != labelID.end( ) ) - return std::distance( labelID.begin( ), iter ); - - labelID.push_back( sItem ); - - return labelID.size( ) - 1; - -} - -double -StatisticalTimer::getMean( sTimerID id ) const -{ - if( clkTicks.empty( ) ) - return 0; - - size_t N = clkTicks.at( id ).size( ); - - Accumulator<unsigned long long> sum = std::for_each( clkTicks.at( id ).begin(), clkTicks.at( id ).end(), Accumulator<unsigned long long>() ); - - return static_cast<double>( sum.acc ) / N; -} - -double -StatisticalTimer::getVariance( sTimerID id ) const -{ - if( clkTicks.empty( ) ) - return 0; - - double mean = getMean( id ); - - size_t N = clkTicks.at( id ).size( ); - double sum = 0; - - for( unsigned int i = 0; i < N; ++i ) - { - double diff = clkTicks.at( id ).at( i ) - mean; - diff *= diff; - sum += diff; - } - - return sum / N; -} - -double -StatisticalTimer::getStdDev( sTimerID id ) const -{ - double variance = getVariance( id ); - - return sqrt( variance ); -} - -double -StatisticalTimer::getAverageTime( sTimerID id ) const -{ - if( normalize ) - return getMean( id ) / clkFrequency; - else - return getMean( id ); -} - -double -StatisticalTimer::getMinimumTime( sTimerID id ) const -{ - clkVector::const_iterator iter = std::min_element( clkTicks.at( id ).begin( ), clkTicks.at( id ).end( ) ); - - if( iter != clkTicks.at( id ).end( ) ) - { - if( normalize ) - return static_cast<double>( *iter ) / clkFrequency; - else - return static_cast<double>( *iter ); - } - else - return 0; -} - -unsigned int -StatisticalTimer::pruneOutliers( sTimerID id , double multiple ) -{ - if( clkTicks.empty( ) ) - return 0; - - double mean = getMean( id ); - double stdDev = getStdDev( id ); - - clkVector& clks = clkTicks.at( id ); - - // Look on p. 379, "The C++ Standard Library" - // std::remove_if does not actually erase, it only copies elements, it returns new 'logical' end - clkVector::iterator newEnd = std::remove_if( clks.begin( ), clks.end( ), PruneRange< double,unsigned long long >( mean, multiple*stdDev ) ); - - clkVector::difference_type dist = std::distance( newEnd, clks.end( ) ); - - if( dist != 0 ) - clks.erase( newEnd, clks.end( ) ); - - assert( dist < std::numeric_limits< unsigned int >::max( ) ); - - return static_cast< unsigned int >( dist ); -} - -unsigned int -StatisticalTimer::pruneOutliers( double multiple ) -{ - unsigned int tCount = 0; - - for( unsigned int l = 0; l < labelID.size( ); ++l ) - { - unsigned int lCount = pruneOutliers( l , multiple ); - std::clog << "\tStatisticalTimer:: Pruning " << lCount << " samples from " << labelID[l].first << std::endl; - tCount += lCount; - } - - return tCount; -} - -// Defining an output print operator -std::ostream& -operator<<( std::ostream& os, const StatisticalTimer& st ) -{ - if( st.clkTicks.empty( ) ) - return os; - - std::ios::fmtflags bckup = os.flags( ); - - for( unsigned int l = 0; l < st.labelID.size( ); ++l ) - { - unsigned long long min = 0; - StatisticalTimer::clkVector::const_iterator iter = std::min_element( st.clkTicks.at( l ).begin( ), st.clkTicks.at( l ).end( ) ); - - if( iter != st.clkTicks.at( l ).end( ) ) - min = *iter; - - os << st.labelID[l].first << ", " << st.labelID[l].second << std::fixed << std::endl; - os << "Min:," << min << std::endl; - os << "Mean:," << st.getMean( l ) << std::endl; - os << "StdDev:," << st.getStdDev( l ) << std::endl; - os << "AvgTime:," << st.getAverageTime( l ) << std::endl; - os << "MinTime:," << st.getMinimumTime( l ) << std::endl; - - for( unsigned int t = 0; t < st.clkTicks[l].size( ); ++t ) - { - os << st.clkTicks[l][t]<< ","; - } - os << "\n" << std::endl; - - } - - os.flags( bckup ); - - return os; -} diff --git a/RTCP/Cobalt/clAmdFft/samples/statisticalTimer.h b/RTCP/Cobalt/clAmdFft/samples/statisticalTimer.h deleted file mode 100644 index f7e38c0fac3d240aed5025bbefc8da76fee3db26..0000000000000000000000000000000000000000 --- a/RTCP/Cobalt/clAmdFft/samples/statisticalTimer.h +++ /dev/null @@ -1,157 +0,0 @@ -//////////////////////////////////////////// -// Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved. -//////////////////////////////////////////// - -#pragma once -#ifndef _STATISTICALTIMER_H_ -#define _STATISTICALTIMER_H_ -#include <iosfwd> -#include <vector> -#include <algorithm> - -/** - * \file clAmdFft.StatisticalTimer.h - * \brief A timer class that provides a cross platform timer for use - * in timing code progress with a high degree of accuracy. - * This class is implemented entirely in the header, to facilitate inclusion into multiple - * projects without needing to compile an object file for each project. - */ - -/** - * \class StatisticalTimer - * \brief Counter that provides a fairly accurate timing mechanism for both - * windows and linux. This timer is used extensively in all the samples. - */ - -class StatisticalTimer -{ - // Private typedefs - typedef std::vector< unsigned long long > clkVector; - typedef std::pair< std::string, unsigned int > labelPair; - typedef std::vector< labelPair > stringVector; - - // In order to calculate statistics <std. dev.>, we need to keep a history of our timings - stringVector labelID; - clkVector clkStart; - std::vector< clkVector > clkTicks; - - // How many clockticks in a second - unsigned long long clkFrequency; - - // Saved sizes for our vectors, used in Reset() to reallocate vectors - clkVector::size_type nEvents, nSamples; - - // This setting controls whether the Timer should convert samples into time by dividing by the - // clock frequency - bool normalize; - - /** - * \fn StatisticalTimer() - * \brief Constructor for StatisticalTimer that initializes the class - * This is private so that user code cannot create their own instantiation. Instead, you - * must go through getInstance( ) to get a reference to the class. - */ - StatisticalTimer( ); - - /** - * \fn ~StatisticalTimer() - * \brief Destructor for StatisticalTimer that cleans up the class - */ - ~StatisticalTimer( ); - - /** - * \fn StatisticalTimer(const StatisticalTimer& ) - * \brief Copy constructors do not make sense for a singleton, disallow copies - */ - StatisticalTimer( const StatisticalTimer& ); - - /** - * \fn operator=( const StatisticalTimer& ) - * \brief Assignment operator does not make sense for a singleton, disallow assignments - */ - StatisticalTimer& operator=( const StatisticalTimer& ); - - friend std::ostream& operator<<( std::ostream& os, const StatisticalTimer& s ); - -public: - // Public typedefs - typedef stringVector::difference_type sTimerID; - - /** - * \fn getInstance() - * \brief This returns a reference to the singleton timer. Guarantees only 1 timer class is ever - * instantiated within a compilable executable. - */ - static StatisticalTimer& getInstance( ); - - /** - * \fn void Start( sTimerID id ) - * \brief Start the timer - * \sa Stop(), Reset() - */ - void Start( sTimerID id ); - - /** - * \fn void Stop( sTimerID id ) - * \brief Stop the timer - * \sa Start(), Reset() - */ - void Stop( sTimerID id ); - - /** - * \fn void AddSample( const sTimerID id, const unsigned long long n ) - * \brief Explicitely add a timing sample into the class - */ - void AddSample( const sTimerID id, const unsigned long long n ); - - /** - * \fn void Reset(void) - * \brief Reset the timer to 0 - * \sa Start(), Stop() - */ - void Clear( ); - - /** - * \fn void Reset(void) - * \brief Reset the timer to 0 - * \sa Start(), Stop() - */ - void Reset( ); - - void Reserve( unsigned int nEvents, unsigned int nSamples ); - - sTimerID getUniqueID( const std::string& label, unsigned int groupID ); - - // Calculate the average/mean of data for a given event - void setNormalize( bool norm ); - - // Calculate the average/mean of data for a given event - double getMean( sTimerID id ) const; - - // Calculate the variance of data for a given event - // Variance - average of the squared differences between data points and the mean - double getVariance( sTimerID id ) const; - - // Sqrt of variance, also in units of the original data - double getStdDev( sTimerID id ) const; - - /** - * \fn double getAverageTime(sTimerID id) const - * \return Return the arithmetic mean of all the samples that have been saved - */ - double getAverageTime( sTimerID id ) const; - - /** - * \fn double getMinimumTime(sTimerID id) const - * \return Return the arithmetic min of all the samples that have been saved - */ - double getMinimumTime( sTimerID id ) const; - - // Using the stdDev of the entire population (of an id), eliminate those samples that fall - // outside some specified multiple of the stdDev. This assumes that the population - // form a gaussian curve. - unsigned int pruneOutliers( double multiple ); - unsigned int pruneOutliers( sTimerID id , double multiple ); -}; - -#endif // _STATISTICALTIMER_H_ diff --git a/RTCP/Cobalt/clAmdFft/samples/stdafx.cpp b/RTCP/Cobalt/clAmdFft/samples/stdafx.cpp deleted file mode 100644 index a4069dfcc0f47908ae7fb5fbfbeac3507e383a49..0000000000000000000000000000000000000000 --- a/RTCP/Cobalt/clAmdFft/samples/stdafx.cpp +++ /dev/null @@ -1,12 +0,0 @@ -//////////////////////////////////////////// -// Copyright (C) 2010 Advanced Micro Devices, Inc. All Rights Reserved. -//////////////////////////////////////////// - -// stdafx.cpp : source file that includes just the standard includes -// clAmdFft.pch will be the pre-compiled header -// stdafx.obj will contain the pre-compiled type information - -#include "stdafx.h" - -// TODO: reference any additional headers you need in STDAFX.H -// and not in this file diff --git a/RTCP/Cobalt/clAmdFft/samples/stdafx.h b/RTCP/Cobalt/clAmdFft/samples/stdafx.h deleted file mode 100644 index 4887dae7f60052252150a02518cf0c65c8318e7a..0000000000000000000000000000000000000000 --- a/RTCP/Cobalt/clAmdFft/samples/stdafx.h +++ /dev/null @@ -1,27 +0,0 @@ -//////////////////////////////////////////// -// Copyright (C) 2010 Advanced Micro Devices, Inc. All Rights Reserved. -//////////////////////////////////////////// - -// stdafx.h : include file for standard system include files, -// or project specific include files that are used frequently, but -// are changed infrequently -// - -#pragma once - -#include "targetver.h" - -#include <iostream> -#include <stdexcept> -#include <iomanip> -#include <complex> -#include <valarray> -#include <stdarg.h> -#if defined( _WIN32 ) - #define NOMINMAX - #define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers - - #include <tchar.h> - #include <windows.h> -#endif - diff --git a/RTCP/Cobalt/clAmdFft/samples/targetver.h b/RTCP/Cobalt/clAmdFft/samples/targetver.h deleted file mode 100644 index bf68fd6c48ba9919933b764c4db9119492f5f45c..0000000000000000000000000000000000000000 --- a/RTCP/Cobalt/clAmdFft/samples/targetver.h +++ /dev/null @@ -1,14 +0,0 @@ -//////////////////////////////////////////// -// Copyright (C) 2010 Advanced Micro Devices, Inc. All Rights Reserved. -//////////////////////////////////////////// - -#pragma once - -// Including SDKDDKVer.h defines the highest available Windows platform. - -// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and -// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h. - -#if defined( _WIN32 ) - #include <SDKDDKVer.h> -#endif