Skip to content
Snippets Groups Projects
Commit cb15ffc3 authored by John Romein's avatar John Romein
Browse files

bug 225:

Added little-endian int->float conversion routines, necessary to bypass
PPF bank.
parent 5d1afa84
No related branches found
No related tags found
No related merge requests found
......@@ -2079,6 +2079,7 @@ RTCP/CNProc/test/filterTestResult.ps -text
RTCP/CNProc/test/inversePPFTestOutput.dat -text
RTCP/CNProc/test/inversePPFTestResult.ps -text
RTCP/CNProc/test/tDedispersion.cc -text
RTCP/CNProc/test/tFIR_Asm.cc -text
RTCP/CNProc/test/tInversePPF.cc -text
RTCP/CNProc/test/tPencilBeamFormer.cc -text
RTCP/CNProc/test/tPencilBeamFormer.sh -text
......
......@@ -2509,4 +2509,270 @@ _fast_memcpy:
lfpdux 14,1,8
addi 1,1,16
blr
# little endian i16complex -> float
.global _ZN5LOFAR4RTCP8_convertISt7complexIsEEEvPS2_IfEPKT_j
_ZN5LOFAR4RTCP8_convertISt7complexIsEEEvPS2_IfEPKT_j:
stwu 1,-32(1)
lis 8,sub_value@ha # load sub_values
li 9,sub_value@l
lfpdx 0,8,9
lis 12,0x0080
ori 12,12,0x0080
srwi 5,5,1
subi 5,5,2
mtctr 5
subi 3,3,8
li 8,8
li 9,4
li 10,14
li 11,6
addi 5,1,16
stfpdx 0,0,1 # initialize int->fp conversion area
stfpdx 0,0,5
lwz 6,0(4)
lwzux 7,4,9
xor 6,6,12
sthbrx 6,10,1
srwi 6,6,16
sthbrx 6,11,1
lwzux 6,4,9
xor 7,7,12
sthbrx 7,10,5
srwi 7,7,16
sthbrx 7,11,5
lfpdx 1,0,1
lwzux 7,4,9
xor 6,6,12
sthbrx 6,10,1
srwi 6,6,16
sthbrx 6,11,1
fpsub 1,1,0
lfpdx 2,0,5
0:
lwzux 6,4,9
xor 7,7,12
sthbrx 7,10,5
srwi 7,7,16
sthbrx 7,11,5
fpsub 2,2,0
stfpsux 1,3,8
lfpdx 1,0,1
lwzux 7,4,9
xor 6,6,12
sthbrx 6,10,1
srwi 6,6,16
sthbrx 6,11,1
fpsub 1,1,0
stfpsux 2,3,8
lfpdx 2,0,5
bdnz 0b
xor 7,7,12
sthbrx 7,10,5
srwi 7,7,16
sthbrx 7,11,5
fpsub 2,2,0
stfpsux 1,3,8
lfpdx 1,0,1
fpsub 1,1,0
stfpsux 2,3,8
lfpdx 2,0,5
fpsub 2,2,0
stfpsux 1,3,8
stfpsux 2,3,8
addi 1,1,32
blr
# i8complex -> float
.global _ZN5LOFAR4RTCP8_convertISt7complexIaEEEvPS2_IfEPKT_j
_ZN5LOFAR4RTCP8_convertISt7complexIaEEEvPS2_IfEPKT_j:
lis 11,_ZN5LOFAR4RTCP13_FIR_fp_tableE@ha
la 11,_ZN5LOFAR4RTCP13_FIR_fp_tableE@l(11)
srwi 5,5,2
subi 5,5,2
mtctr 5
subi 3,3,8
li 10,8
li 9,2
lhz 5,0(4)
lhzux 6,4,9
lhzux 7,4,9
lhzux 8,4,9
slwi 5,5,3
lfpsx 0,11,5
lhzux 5,4,9
slwi 6,6,3
lfpsx 1,11,6
lhzux 6,4,9
slwi 7,7,3
lfpsx 2,11,7
lhzux 7,4,9
slwi 8,8,3
lfpsx 3,11,8
lhzux 8,4,9
0:
slwi 5,5,3
stfpsux 0,3,10
lfpsx 0,11,5
lhzux 5,4,9
slwi 6,6,3
stfpsux 1,3,10
lfpsx 1,11,6
lhzux 6,4,9
slwi 7,7,3
stfpsux 2,3,10
lfpsx 2,11,7
lhzux 7,4,9
slwi 8,8,3
stfpsux 3,3,10
lfpsx 3,11,8
lhzux 8,4,9
bdnz 0b
slwi 5,5,3
stfpsux 0,3,10
lfpsx 0,11,5
slwi 6,6,3
stfpsux 1,3,10
lfpsx 1,11,6
slwi 7,7,3
stfpsux 2,3,10
lfpsx 2,11,7
slwi 8,8,3
stfpsux 3,3,10
lfpsx 3,11,8
stfpsux 0,3,10
stfpsux 1,3,10
stfpsux 2,3,10
stfpsux 3,3,10
blr
# i4complex -> float
.global _ZN5LOFAR4RTCP8_convertINS_5TYPES9i4complexEEEvPSt7complexIfEPKT_j
_ZN5LOFAR4RTCP8_convertINS_5TYPES9i4complexEEEvPSt7complexIfEPKT_j:
lis 11,_ZN5LOFAR4RTCP13_FIR_fp_tableE@ha
la 11,_ZN5LOFAR4RTCP13_FIR_fp_tableE@l(11)
srwi 5,5,2
subi 5,5,2
mtctr 5
subi 3,3,8
li 10,8
li 9,1
lbz 5,0(4)
lbzux 6,4,9
lbzux 7,4,9
lbzux 8,4,9
slwi 5,5,3
lfpsx 0,11,5
lbzux 5,4,9
slwi 6,6,3
lfpsx 1,11,6
lbzux 6,4,9
slwi 7,7,3
lfpsx 2,11,7
lbzux 7,4,9
slwi 8,8,3
lfpsx 3,11,8
lbzux 8,4,9
0:
slwi 5,5,3
stfpsux 0,3,10
lfpsx 0,11,5
lbzux 5,4,9
slwi 6,6,3
stfpsux 1,3,10
lfpsx 1,11,6
lbzux 6,4,9
slwi 7,7,3
stfpsux 2,3,10
lfpsx 2,11,7
lbzux 7,4,9
slwi 8,8,3
stfpsux 3,3,10
lfpsx 3,11,8
lbzux 8,4,9
bdnz 0b
slwi 5,5,3
stfpsux 0,3,10
lfpsx 0,11,5
slwi 6,6,3
stfpsux 1,3,10
lfpsx 1,11,6
slwi 7,7,3
stfpsux 2,3,10
lfpsx 2,11,7
slwi 8,8,3
stfpsux 3,3,10
lfpsx 3,11,8
stfpsux 0,3,10
stfpsux 1,3,10
stfpsux 2,3,10
stfpsux 3,3,10
blr
#endif
......@@ -22,6 +22,7 @@
#define LOFAR_CNPROC_FIR_ASM_H
#if defined HAVE_BGP
#include <Common/lofar_complex.h>
#include <Interface/Config.h>
namespace LOFAR {
......@@ -38,6 +39,8 @@ template <typename SAMPLE_TYPE> extern void _filter(unsigned nrChannels,
fcomplex out[],
int nr_samples_div_16);
template <typename SAMPLE_TYPE> extern void _convert(fcomplex out[], const SAMPLE_TYPE samples[], unsigned count);
extern "C" {
void _transpose_4x8(fcomplex *out,
const fcomplex *in,
......
......@@ -38,13 +38,13 @@ template <typename SAMPLE_TYPE> class PPF: boost::noncopyable
void computeFlags(unsigned stat, const SubbandMetaData *metaData, FilteredData *);
void filter(unsigned stat, double centerFrequency, const SubbandMetaData *metaData, const TransposedData<SAMPLE_TYPE> *, FilteredData *);
private:
void init_fft(), destroy_fft();
#if !defined PPF_C_IMPLEMENTATION
void initConstantTable();
static void initConstantTable();
#endif
private:
void init_fft(), destroy_fft();
#if defined PPF_C_IMPLEMENTATION
fcomplex phaseShift(unsigned time, unsigned chan, double baseFrequency, double delayAtBegin, double delayAfterEnd) const;
#else
......
......@@ -7,6 +7,7 @@ include_directories(${PACKAGE_SOURCE_DIR}/src)
lofar_add_test(tCN_Processing tCN_Processing.cc)
lofar_add_test(tBeamForming tBeamForming.cc)
lofar_add_test(tDedispersion tDedispersion.cc)
lofar_add_test(tFIR_Asm tFIR_Asm.cc)
lofar_add_test(tPencilBeamFormer tPencilBeamFormer.cc)
lofar_add_test(tStokes tStokes.cc)
lofar_add_test(tInversePPF tInversePPF.cc)
......
#include <lofar_config.h>
#include <FIR_Asm.h>
#include <PPF.h>
#include <Common/Timer.h>
#include <iostream>
#if defined HAVE_BGP
using namespace LOFAR;
using namespace LOFAR::RTCP;
using namespace LOFAR::TYPES;
#endif
#define SIZE 131072
int main()
{
#if defined HAVE_BGP
{
i16complex in[SIZE] = {
makei16complex(0x0100, 0x0200),
makei16complex(0x0300, 0x0400),
makei16complex(0x0500, 0x0600),
};
in[SIZE - 1] = makei16complex(0x0700, 0x0801);
fcomplex out[SIZE];
NSTimer timer("little endian i16complex -> float", true);
timer.start();
_convert(out, in, SIZE);
timer.stop();
std::cout << out[0] << ' ' << out[1] << ' ' << out[2] << ' ' << out[SIZE - 1] << std::endl;
}
{
PPF<i8complex>::initConstantTable();
i8complex in[SIZE] = {
makei8complex(1, 2),
makei8complex(3, 4),
makei8complex(5, 6),
};
in[SIZE - 1] = makei8complex(7, 8);
fcomplex out[SIZE];
NSTimer timer("little endian i8complex -> float", true);
timer.start();
_convert(out, in, SIZE);
timer.stop();
std::cout << out[0] << ' ' << out[1] << ' ' << out[2] << ' ' << out[SIZE - 1] << std::endl;
}
{
PPF<i4complex>::initConstantTable();
i4complex in[SIZE] = {
makei4complex(0.5, 1.5),
makei4complex(2.5, 3.5),
makei4complex(4.5, 5.5),
};
in[SIZE - 1] = makei4complex(-1.5, -0.5);
fcomplex out[SIZE];
NSTimer timer("little endian i4complex -> float", true);
timer.start();
_convert(out, in, SIZE);
timer.stop();
std::cout << out[0] << ' ' << out[1] << ' ' << out[2] << ' ' << out[SIZE - 1] << std::endl;
}
#endif
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment