diff --git a/.gitattributes b/.gitattributes index e40112c0240cf6c903b0fde3cfa2a0a7989d423d..9f2d52e8779857a76068115eefa45d1c29cf609a 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1221,6 +1221,7 @@ MAC/bootstrap -text svneol=native#application/octet-stream MAC/lofarconf.in -text svneol=native#application/octet-stream RTCP/BGLProc/bootstrap -text RTCP/BGLProc/src/BGL_Processing.machinefile -text +RTCP/CNProc/src/CN_Processing.machinefile -text RTCP/IONProc/bootstrap -text RTCP/Interface/bootstrap -text RTCP/Run/bootstrap -text diff --git a/RTCP/CNProc/CNProc.spec.in b/RTCP/CNProc/CNProc.spec.in new file mode 100644 index 0000000000000000000000000000000000000000..a5585c238297fdaa6eee06c1e6a860f43aabbc9b --- /dev/null +++ b/RTCP/CNProc/CNProc.spec.in @@ -0,0 +1,160 @@ +# -*- Mode:rpm-spec -*- +# CNProc.spec.in +# + +############################################################################## +# +# Preamble +# +############################################################################## + +Summary: CNProc is ... brief description ... + +%define release @RPM_RELEASE@ +%define version @VERSION@ +%define pkgname @PACKAGE@ +%define pkgdir %{pkgname}-%{version}-%{release} +%define prefix /opt/lofar +%define configure_args @RPM_CONFIGURE_ARGS@ +##define build_kernel_version @BUILD_KERNEL_VERSION@ + +Name: %{pkgname} +Version: %{version} +Release: %{release} +Copyright: LGPL +Group: Application/System +Source: %{pkgname}-%{version}.tar.gz +BuildRoot: %{_tmppath}/%{pkgdir}-root +URL: http://www.astron.nl +Prefix: %{prefix} +BuildArchitectures: i386 # Target platforms, i.e., i586 +##Requires: Common = 1.2 ## define dependent packages here +Packager: %{packager} +Distribution: The LOFAR project +Vendor: ASTRON + +AutoReqProv: no + +%description + +CNProc ... more detailed description ... + +############################################################################## +# +# prep +# +############################################################################## +%prep +echo $prefix + +# create the build directory, untar the source +%setup + +############################################################################## +# +# build +# +############################################################################## +%build +./configure %{configure_args} --prefix=%{prefix} && make + +############################################################################## +# +# install +# +############################################################################## +%install +# To make things work with BUILDROOT +if [ "$RPM_BUILD_ROOT" != "%{_tmppath}/%{pkgdir}-root" ] +then + echo + echo @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + echo @ @ + echo @ RPM_BUILD_ROOT is not what I expected. Please clean it yourself. @ + echo @ @ + echo @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + echo +else + echo Cleaning RPM_BUILD_ROOT: "$RPM_BUILD_ROOT" + rm -rf "$RPM_BUILD_ROOT" +fi +mkdir -p $RPM_BUILD_ROOT%{prefix} +make DESTDIR="$RPM_BUILD_ROOT" install + +#uninstall + +############################################################################## +# +# verify +# +############################################################################## +#verify + +############################################################################## +# +# clean +# +############################################################################## +%clean +# Call me paranoid, but I do not want to be responsible for nuking +# someone's harddrive! +if [ "$RPM_BUILD_ROOT" != "%{_tmppath}/%{pkgdir}-root" ] +then + echo + echo @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + echo @ @ + echo @ RPM_BUILD_ROOT is not what I expected. Please clean it yourself. @ + echo @ @ + echo @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + echo +else + echo Cleaning RPM_BUILD_ROOT: "$RPM_BUILD_ROOT" + rm -rf "$RPM_BUILD_ROOT" +fi + +############################################################################## +# +# files +# +############################################################################## + +# empty 'files' means all distributed files +%files +%defattr(-, root, root) +%{prefix} + +# Your application file list goes here +# %{prefix}/lib/lib*.so* + +# Documentation +# doc COPYING ChangeLog README AUTHORS NEWS +# doc doc/* + +# link the module to the correct path +%post + +# before uninstall +%preun + +# after uninstall +%postun + +############################################################################## +# +# package devel +# +############################################################################## + +#package devel +#Summary: Development files for %{pkgname} +#Group: Applications/System +#description devel +#Development files for %{pkgname}. + +#files devel + +# Your development files go here +# Programmers documentation goes here +#doc doc + +# end of file diff --git a/RTCP/CNProc/Makefile.am b/RTCP/CNProc/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..5e566a0ff45eb92349149f0bca9fadfa39ae0f61 --- /dev/null +++ b/RTCP/CNProc/Makefile.am @@ -0,0 +1,16 @@ +SUBDIRS=src test + +ACLOCAL_AMFLAGS = -I $(top_srcdir)/autoconf_share + +pkgextdir = $(prefix)/config/$(PACKAGE) +pkgext_DATA = pkgext pkgextcppflags pkgextcxxflags pkgextldflags + +DISTCHECK_CONFIGURE_FLAGS=\ + --with-common=$(prefix) + +EXTRA_DIST = \ + Makefile.common \ + CNProc.spec \ + autoconf_share/compiletool + +include $(top_srcdir)/Makefile.common diff --git a/RTCP/CNProc/configure.in b/RTCP/CNProc/configure.in new file mode 100644 index 0000000000000000000000000000000000000000..4dc8a7078435a134adb840e3dd912286857fbc90 --- /dev/null +++ b/RTCP/CNProc/configure.in @@ -0,0 +1,79 @@ +dnl +dnl Process this file with autoconf to produce a configure script. +dnl +AC_INIT +dnl AC_CONFIG_AUX_DIR(config) +dnl AM_CONFIG_HEADER(config/config.h) +AM_CONFIG_HEADER(config.h) +AM_INIT_AUTOMAKE(CNProc, 1.0, no-define) +AM_PROG_AS(gcc) + +dnl Initialize for LOFAR (may set compilers) +lofar_INIT + +dnl Checks for programs. +AC_PROG_AWK +AC_PROG_YACC +AC_PROG_CC +AC_PROG_CXX +AM_PROG_LEX +AC_PROG_INSTALL +AC_PROG_LN_S +AC_DISABLE_SHARED +AC_PROG_LIBTOOL + +dnl Checks for libraries. + +dnl dnl Replace `main' with a function in -lfl: +dnl AC_CHECK_LIB(fl, main) +dnl dnl Replace `main' with a function in -lcosev_r: +dnl AC_CHECK_LIB(cosev_r, main) +dnl dnl Replace `main' with a function in -lcosnm_r: +dnl AC_CHECK_LIB(cosnm_r, main) +dnl dnl Replace `main' with a function in -lorb_r: +dnl AC_CHECK_LIB(orb_r, main) +dnl dnl Replace `main' with a function in -lpthread: +dnl AC_CHECK_LIB(pthread, main) +dnl dnl Replace `main' with a function in -lvport_r: +dnl AC_CHECK_LIB(vport_r, main) + +dnl Checks for header files. +AC_HEADER_STDC +AC_CHECK_HEADERS(unistd.h) + +dnl Checks for typedefs, structures, and compiler characteristics. +AC_C_CONST +AC_TYPE_SIZE_T + +dnl Checks for library functions. +AC_FUNC_VPRINTF + +dnl +dnl Check for LOFAR specific things +dnl +lofar_GENERAL +dnl lofar_BLITZ +lofar_MPI +lofar_FFTW3F(0) +lofar_FFTW2(0) + +lofar_INTERNAL(LCS/Common,Common,,1,Common/LofarTypedefs.h,,) +lofar_INTERNAL(LCS/Stream,Stream,,1,Stream/Stream.h,,) +dnl lofar_INTERNAL(LCS/ACC/APS,APS,,1,APS/ParameterSet.h,,) +dnl lofar_INTERNAL(LCS/ACC/PLC,PLC,,1,PLC/ACCmain.h,,) +dnl lofar_INTERNAL(CEP/tinyCEP,tinyCEP,,1,tinyCEP/TinyDataManager.h,,) +lofar_INTERNAL(RTCP/Interface,Interface,,1,Interface/Config.h,,) +lofar_EXTERNAL(boost,1,boost/multi_array.hpp,"") +lofar_EXTERNAL(mass,0,"",,/opt/mass) +lofar_EXTERNAL(zoid,0,zoid_api.h,"",,-I/cephome/romein/projects/zoid/zoid/lofar,,"-L/cephome/romein/projects/zoid/glibc-build-zoid -L/cephome/romein/projects/zoid/zoid/lofar",-llofar_blrts) +lofar_EXTERNAL(fcnp,0,fcnp_cn.h,fcnp_cn,/cephome/romein/packages/fcnp) + +dnl +dnl Output Makefiles +dnl +AC_OUTPUT( +src/Makefile +test/Makefile +Makefile +CNProc.spec +) diff --git a/RTCP/CNProc/package.dox b/RTCP/CNProc/package.dox new file mode 100644 index 0000000000000000000000000000000000000000..2685e772ac36c3643cb0231e0e1271bafb9dd29f --- /dev/null +++ b/RTCP/CNProc/package.dox @@ -0,0 +1,2 @@ +// \ingroup RTCP +// \defgroup CNProc CNProc Description diff --git a/RTCP/CNProc/scripts/CorrAppl.sh b/RTCP/CNProc/scripts/CorrAppl.sh new file mode 100755 index 0000000000000000000000000000000000000000..f67cf6c85cc4efb091c5418b6e2e61fc58957a9f --- /dev/null +++ b/RTCP/CNProc/scripts/CorrAppl.sh @@ -0,0 +1,107 @@ +#!/bin/bash +# +# CorrAppl: a start/stop/status script for swlevel +# +# Copyright (C) 2007 +# ASTRON (Netherlands Foundation for Research in Astronomy) +# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# Syntax: CorrAppl start|stop|status +# +# $Id$ +# + +# +# SyntaxError msg +# +SyntaxError() +{ + Msg=$1 + + [ -z "${Msg}" ] || echo "ERROR: ${Msg}" + echo "" + echo "Syntax: $(basename $0) start | stop | status" + echo "" + exit 1 +} + +# +# Start the program when it exists +# +start_prog() +{ + # put here your code to start your program + echo 'start_prog()' +} + +# +# Stop the program when it is running +# +stop_prog() +{ + # put here your code to stop your program + killall ApplController + ps -ef | grep -v grep | grep -v ACDaemon[^\ ] | grep ACDaemon 2>&1 >/dev/null + if [ $? -ne 0 ]; then + if [ -f ../etc/ACD.admin ]; then + rm ../etc/ACD.admin + fi + fi + echo 'Freeing CorrAppl (5 minutes)' + ssh $USER@bglsn /opt/lofar/bin/stopBGL.py +} + +# +# show status of program +# +# arg1 = levelnr +# +status_prog() +{ + levelnr=$1 + + # put here code to figure out the status of your program and + # fill the variables prog and pid with the right information + + # e.g. + prog=CorrAppl + + # this line should be left in, it shows the status in the right format + #echo ${levelnr} ${prog} ${pid} | awk '{ printf "%s : %-25s %s\n", $1, $2, $3 }' + echo ${levelnr} ${prog} `ssh $USER@bglsn /opt/lofar/bin/stopBGL.py --status=true` | awk '{ printf "%s : %-25s %s\n", $1, $2, $3 }' +} + +# +# MAIN +# + +# when no argument is given show syntax error. +if [ -z "$1" ]; then + SyntaxError +fi + +# first power down to this level +case $1 in + start) start_prog + ;; + stop) stop_prog + ;; + status) status_prog $2 + ;; + *) SyntaxError + ;; +esac diff --git a/RTCP/CNProc/scripts/prepare_CS1_BGL_Processing.py b/RTCP/CNProc/scripts/prepare_CS1_BGL_Processing.py new file mode 100755 index 0000000000000000000000000000000000000000..6cdbbcb6cd3e72d8ef03e9a8692907b78e5a952e --- /dev/null +++ b/RTCP/CNProc/scripts/prepare_CS1_BGL_Processing.py @@ -0,0 +1,234 @@ +#!/usr/bin/env python + +import math +import time +import datetime +import os +import sys +import copy + +class CS1_Parset(object): + + def __init__(self): + self.stationList = list() + self.parameters = dict() + + def readFromFile(self, fileName): + lastline = '' + for line in open(fileName, 'r').readlines(): + lastline = lastline + line.split('#')[0] + lastline = lastline.rstrip() + if len(lastline) > 0 and lastline[-1] == '\\': + lastline = lastline[:-1] + elif '=' in lastline: + key, value = lastline.split('=') + self.parameters[key.strip()] = value.strip() + lastline = '' + + def writeToFile(self, fileName): + outf = open(fileName, 'w') + items = self.parameters.items() + items.sort() + for key, value in items: + outf.write(key + ' = ' + str(value) + '\n') + outf.close() + + def __contains__(self, key): + return key in self.parameters + + def __setitem__(self, key, value): + self.parameters[key] = value + + def __getitem__(self, key): + return self.parameters[key] + + def getInt32Vector(self, key): + ln = self.parameters[key] + ln_tmp = ln.split('[') + line = ln_tmp[1].split(']') + return [int(lp) for lp in line[0].split(',')] + + def getInt32(self, key): + return int(self.parameters[key]) + + def getStringVector(self, key): + line = self.parameters[key] + line.strip('[').strip(']') + return line.split(',') + + def getString(self, key): + return self.parameters[key] + + def getFloat(self, key): + return float(self.parameters[key]) + + def getBool(self, key): + return self.parameters[key] == 'true' + +class ClusterSlave(object): + def __init__(self, intName, extIP): + self.intName = intName + self.extIP = extIP + def getIntName(self): + return self.intName + def getExtIP(self): + return self.extIP + +class ClusterFEN(object): + def __init__(self, name, address, slaves = list()): + self.slaves = slaves + #Host.__init__(self, name, address) + def getSlaves(self, number = None): + return self.slaves[0:number] + def setSlaves(self, slaves): + self.slaves = slaves + def setSlavesByPattern(self, intNamePattern, extIPPattern, numberRange): + self.slaves = list() + for number in numberRange: + self.slaves.append(ClusterSlave(intNamePattern % number, extIPPattern % number)) + +def parseStationList(): + """ + pattern = '^CS010_dipole0|CS010_dipole4|CS010_dipole8|CS010_dipole12| \ + CS008_dipole0|CS008_dipole4|CS008_dipole8|CS008_dipole12| \ + CS001_dipole0|CS001_dipole4|CS001_dipole8|CS001_dipole12| \ + CS016_dipole0|CS016_dipole4|CS016_dipole8|CS016_dipole12$' + print 'pattern = ' + str(re.search(pattern, 'CS010_dipole8')) + """ + +def getInputNodes(stationList, parset): + inputNodelist = list() + + for s in stationList: + s = s.strip(" ") + s = s.strip("[ ]") + s = s.strip("'") + name = parset.getString('PIC.Core.' + s + '.port') + name=name.split(":") + name=name[0].strip("lii") + inputNodelist.append(int(name)) + + return inputNodelist + +if __name__ == '__main__': + + # create the parset + parset = CS1_Parset() + stationList = list() + + if os.path.exists("../share/Correlator.parset"): + + parset.readFromFile('../share/Correlator.parset') + + #read keys from parset file: OLAP.parset + if os.path.exists("OLAP.parset"): + parset.readFromFile('OLAP.parset') + else: + print 'file OLAP.parset does not exist!' + sys.exit(0) + + ''' + #set start/stop time + if parset.getString('OLAP.OLAP_Conn.station_Input_Transport') == 'NULL': + # Read from memory! + parset['Observation.startTime'] = datetime.datetime.fromtimestamp(1) + else: + #start=int(time.asctime(time.gmtime()))+ 90 + start=int(time.time() + 90) + #parset['Observation.startTime'] = datetime.datetime.fromtimestamp(start) + parset['Observation.startTime'] = datetime.datetime.utcfromtimestamp(start) + + duration = 300 + + parset['Observation.stopTime'] = datetime.datetime.utcfromtimestamp(start + duration) + ''' + + if parset.getString('OLAP.OLAP_Conn.input_BGLProc_Transport') == 'Null': + parset['OLAP.OLAP_Conn.input_BGLProc_Transport'] = 'NULL' + + if parset.getString('OLAP.OLAP_Conn.station_Input_Transport') == 'Null': + parset['OLAP.OLAP_Conn.station_Input_Transport'] = 'NULL' + + if parset.getString('OLAP.OLAP_Conn.BGLProc_Storage_Transport') == 'Null': + parset['OLAP.OLAP_Conn.BGLProc_Storage_Transport'] = 'NULL' + + if not parset.getBool('OLAP.BGLProc.useZoid'): # override CS1.parset + print 'ZOID!!!!' + parset['OLAP.IONProc.useScatter'] = 'false' + parset['OLAP.IONProc.useGather'] = 'false' + parset['OLAP.BGLProc.nodesPerPset'] = 8 + parset['OLAP.IONProc.maxConcurrentComm'] = 2 + + BGLPartition = ('R000_128_0', 'R000_128_0Z')[parset.getBool('OLAP.BGLProc.useZoid')] + parset['CorrAppl.Correlator.partition'] = BGLPartition + + if parset.getBool('OLAP.IONProc.useGather'): + print 'useGather!!!!' + #parset['OLAP.IONProc.integrationSteps'] = integrationTime + parset['OLAP.StorageProc.integrationSteps'] = 1 + else: + parset['OLAP.IONProc.integrationSteps'] = 1 + #parset['OLAP.StorageProc.integrationSteps'] = integrationTime + + if parset.getInt32('Observation.sampleClock') == 160: + parset['OLAP.BGLProc.integrationSteps'] = 608 + elif parset.getInt32('Observation.sampleClock') == 200: + parset['OLAP.BGLProc.integrationSteps'] = 768 + + #get the stations + stationList = parset.getStringVector('OLAP.storageStationNames') + parset['OLAP.nrRSPboards'] = len(stationList) + + #create input cluster objects + liifen = ClusterFEN(name = 'liifen', address = '129.125.99.51') + liifen.setSlavesByPattern('lii%03d', '10.162.0.%d', [1,2,3,4,5,6,7,8,9,10,11,12]) + + #set keys 'Input.InputNodes' and 'Input.OutputNodes' + nSubbands = len(parset.getInt32Vector('Observation.subbandList')) + nSubbandsPerCell = parset.getInt32('OLAP.subbandsPerPset') * parset.getInt32('OLAP.BGLProc.psetsPerCell') + nCells = float(nSubbands) / float(nSubbandsPerCell) + if not nSubbands % nSubbandsPerCell == 0: + raise Exception('Not a integer number of compute cells (nSubbands = %d and nSubbandsPerCell = %d)' % (nSubbands, nSubbandsPerCell)) + nCells = int(nCells) + host = copy.deepcopy(liifen) + slaves = host.getSlaves() + + inputNodes = getInputNodes(stationList, parset) + outputNodes = range(1, nCells + 1) + allNodes = inputNodes + [node for node in outputNodes if not node in inputNodes] + + inputIndices = range(len(inputNodes)) + outputIndices = [allNodes.index(node) for node in outputNodes] + + newslaves = [slaves[ind - 1] for ind in allNodes] + host.setSlaves(newslaves) + noProcesses = len(newslaves) + + parset['Input.InputNodes'] = inputIndices + parset['Input.OutputNodes'] = outputIndices + + bglprocIPs = [newslaves[j].getExtIP() for j in outputIndices] + parset['OLAP.OLAP_Conn.input_BGLProc_ServerHosts'] = '[' + ','.join(bglprocIPs) + ']' + + #create output cluster objects + listfen = ClusterFEN(name = 'listfen', address = '129.125.99.50') + listfen.setSlavesByPattern('list%03d', '10.181.0.%d', [1,2]) + + #set key 'Connections.BGLProc_Storage.ServerHosts' + nSubbandsPerCellStorage = parset.getInt32('OLAP.subbandsPerPset') + nPsetsPerStorage = parset.getInt32('OLAP.psetsPerStorage') + if not nSubbands % (nSubbandsPerCellStorage * nPsetsPerStorage) == 0: + raise Exception('Not a integer number of subbands per storage node!') + + noProcessesStorage = nSubbands / (nSubbandsPerCellStorage * nPsetsPerStorage) + host = copy.deepcopy(listfen) + + storageIPs = [s.getExtIP() for s in host.getSlaves(noProcessesStorage)] + parset['OLAP.OLAP_Conn.BGLProc_Storage_ServerHosts'] = '[' + ','.join(storageIPs) + ']' + + parset.writeToFile('./CS1_BGL_Processing.parset') + + else: + print 'file ../share/Correlator.parset does not exist!' + sys.exit(0) + diff --git a/RTCP/CNProc/scripts/startBGL.sh b/RTCP/CNProc/scripts/startBGL.sh new file mode 100755 index 0000000000000000000000000000000000000000..0f398e635bfc53df227b1d737f3f7acfe2498d5d --- /dev/null +++ b/RTCP/CNProc/scripts/startBGL.sh @@ -0,0 +1,25 @@ +# startBGL.sh jobName partition executable workingDir paramfile noNodes +# +# jobName +# partition +# executable executable file (should be in a place that is readable from BG/L) +# workingDir directory for output files (should be readable by BG/L) +# parameterfile jobName.ps +# noNodes number of nodes of the partition to use +# +# start the job and stores the jobID in jobName.jobID +# +# all ACC processes expect to be started with "ACC" as first parameter + +# start process + +if [ -f ../share/Correlator.parset ] +then + echo "../share/Correlator.parset file exist" +else + echo "Sorry, ../share/Correlator.parset file does not exist" +fi + +./prepare_$3.py + +cd $4; mpirun -partition $2 -mode VN -label -cwd $4 $4/$3 $4/CS1_BGL_Processing.parset 5422 >> ../log/$3.log 2>&1 & diff --git a/RTCP/CNProc/scripts/stopBGL.py b/RTCP/CNProc/scripts/stopBGL.py new file mode 100755 index 0000000000000000000000000000000000000000..01e3ba80736f9f997f285f481f2e5ae15ccfc3df --- /dev/null +++ b/RTCP/CNProc/scripts/stopBGL.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python + +import socket +import os +import sys +import time +from optparse import OptionParser + +host = 'localhost' +port = 32031 +replyformat = 0 + +class switch(object): + def __init__(self, value): + self.value = value + self.fall = False + + def __iter__(self): + """Return the match method once, then stop""" + yield self.match + raise StopIteration + + def match(self, *args): + """Indicate whether or not to enter a case suite""" + if self.fall or not args: + return True + elif self.value in args: # changed for v1.5, see below + self.fall = True + return True + else: + return False + +################################################################ +# mmcs_cmd(szSocket, szCmd) +# +# mmcs_cmd -- send an mmcs command and gather and check the response. +# inputs: +# param0 -- remote tcp port to send command to. +# param1 -- command string +# outputs: +# return values in a list. +# + +def mmcs_cmd (socket, szCmd): + lines = list() + socket.send(szCmd) # execute the command. + if replyformat == 0: + results = socket.recv(1024) # read the result... + lines.append(results.rstrip("\n")) # get rid of lf at end. + else: + file = socket.makefile() + for line in file: + if line.find('\0') >= 0: + break + lines.append(line.rstrip("\n")) # get rid of lf at end. + return lines + +def set_replyformat(socket, rformat): + results = mmcs_cmd(socket, 'replyformat ' + str(rformat) + '\n') + if results[0] != 'OK': + print 'set replyformat:' + str(rformat) + ' ...failed' + global replyformat + replyformat = rformat + +def list_jobs(socket): + set_replyformat(socket, 1) + return mmcs_cmd(socket, 'list_jobs\n') + +def list_blocks(socket): + set_replyformat(socket, 1) + return mmcs_cmd(socket, 'list_blocks\n') + +def jobId(socket): + results = list_jobs(socket) + for line in results: + if line.find(options.blockid) >= 0: + return line.split()[0] + +def free_block(socket): + set_replyformat(socket, 0) + results = mmcs_cmd(socket, 'free ' + options.blockid + '\n'); + if results[0] != 'OK': + print 'free \'%s\' ' % options.blockid + ' ...failed' + +def killjob(socket): + set_replyformat(socket, 0) + jobid = jobId(socket) + results = mmcs_cmd(socket, 'killjob ' + options.blockid + ' ' + jobid + '\n'); + if results[0] != 'OK': + print 'killjob ' + options.blockid + jobid + ' ...failed' + +def partition_exist(socket): + results = list_blocks(socket) + for line in results: + if line.find(options.blockid) >= 0 and line.find(options.user) >= 0: + return True + return False + +def block_status(socket): + set_replyformat(socket, 1) + results = list_blocks(socket) + for line in results: + if line.find(options.blockid) >= 0: + return line.split()[1] + +def job_status(socket): + set_replyformat(socket, 1) + results = list_jobs(socket) + for line in results: + if line.find(options.blockid) >= 0: + return line.split()[1] + +def show_block_status(socket): + set_replyformat(socket, 1) + results = list_jobs(socket) + for line in results: + if line.find(options.blockid) >= 0: + print line + + +# +# Start of mainline +# +if __name__ == '__main__': + + parser = OptionParser() + + parser.add_option('--user' , dest='user' , default=os.environ.get('USER', 'default'), type='string', help='username [%default]') + parser.add_option('--blockid' , dest='blockid' , default='R000_128_0Z' , type='string', help='name of the blockid [%default]') + parser.add_option('--status' , dest='status' , default='false' , type='string', help='Show status of the blockid ') + + # parse the options + (options, args) = parser.parse_args() + + remote = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + + remote.connect((host, port)) + + results = mmcs_cmd(remote, 'set_username ' + options.user + '\n'); + if results[0] != 'OK': + print 'set_username ' + options.user + ' ...failed' + + if options.status == 'true': + if partition_exist(remote): + print 'UP' + else: + print 'DOWN' + sys.exit(0) + + partitionExist = partition_exist(remote) + bl_stat = '' + job_stat = '' + while (partitionExist): + status_block = block_status(remote) + if status_block != bl_stat: + print 'Block %s' % options.blockid + ' status: ' + str(status_block) + bl_stat = status_block + for case in switch(status_block): + if case('A'): + free_block(remote) + break + if case('B'): + free_block(remote) + break + if case('D'): + break + if case('T'): + break + if case('I'): + status_job = job_status(remote) + if status_job != job_stat: + print 'Job status: ' + str(status_job) + job_stat = status_job + for case in switch(status_job): + if case('S'): + killjob(remote) + break + if case('R'): + killjob(remote) + break + if case('D'): + break + break + if case(): # default, could also just omit condition or 'if True' + print "something else!" + # No need to break here, it'll stop anyway + + time.sleep(2) + partitionExist = partition_exist(remote) + + remote.close() + + sys.exit(0) diff --git a/RTCP/CNProc/scripts/stopBGL.sh b/RTCP/CNProc/scripts/stopBGL.sh new file mode 100755 index 0000000000000000000000000000000000000000..926c370ed88085ac9c55611e2a81451d542aafc2 --- /dev/null +++ b/RTCP/CNProc/scripts/stopBGL.sh @@ -0,0 +1,7 @@ +# stopAP.sh partition jobName +# +# partition BG/L partition the job is running on +# jobName The name of the job +# + +ssh $USER@bglsn /opt/lofar/bin/stopBGL.py --blockid=$1 diff --git a/RTCP/CNProc/scripts/swlevel.conf b/RTCP/CNProc/scripts/swlevel.conf new file mode 100644 index 0000000000000000000000000000000000000000..9ffd28f7b36c8cf3eb6c473c2c8fe6f09e68aaba --- /dev/null +++ b/RTCP/CNProc/scripts/swlevel.conf @@ -0,0 +1,11 @@ +# +# swlevel.conf +# +# Table to manage the progrma that should be started and stopped +# level : up : down : root : mpi : program +# + +1:u:d:::ACDaemon +6::d:::ApplController +6::d:::CorrAppl + diff --git a/RTCP/CNProc/src/AsyncCommunication.cc b/RTCP/CNProc/src/AsyncCommunication.cc new file mode 100644 index 0000000000000000000000000000000000000000..4f5a0e78ad278011f6946d77ee01f903da0c2317 --- /dev/null +++ b/RTCP/CNProc/src/AsyncCommunication.cc @@ -0,0 +1,209 @@ +//# Always <lofar_config.h> first! +#include <lofar_config.h> + +#include <AsyncCommunication.h> + +#include <Common/Timer.h> + +#include <cassert> +#include <map> +#include <iostream> + +#define USE_TIMING 0 + +namespace LOFAR { +namespace RTCP { + +#if defined HAVE_MPI + +AsyncCommunication::AsyncCommunication(MPI_Comm comm) +{ + itsCommunicator = comm; + itsCurrentReadHandle = 0; + itsCurrentWriteHandle = 0; +} + + +AsyncCommunication::~AsyncCommunication() +{ +} + +// returns handle to this read +int AsyncCommunication::asyncRead(void* buf, unsigned size, unsigned source, int tag) +{ + AsyncRequest* req = new AsyncRequest(); + + int res = MPI_Irecv(buf, size, MPI_BYTE, source, tag, itsCommunicator, &req->mpiReq); + if (res != MPI_SUCCESS) { + std::cerr << "MPI_Irecv() failed" << std::endl; + exit(1); + } + + req->buf = buf; + req->size = size; + req->rank = source; + req->tag = tag; + + int handle = itsCurrentReadHandle++; + itsReadHandleMap[handle] = req; + + return handle; +} + +// returns handle to this write +int AsyncCommunication::asyncWrite(const void* buf, unsigned size, unsigned dest, int tag) +{ + AsyncRequest* req = new AsyncRequest(); + + int res = MPI_Isend((void*)buf, size, MPI_BYTE, dest, tag, itsCommunicator, &req->mpiReq); + if (res != MPI_SUCCESS) { + std::cerr << "MPI_Isend() failed" << std::endl; + exit(1); + } + + req->buf = (void*)buf; + req->size = size; + req->rank = dest; + req->tag = tag; + + int handle = itsCurrentWriteHandle++; + itsWriteHandleMap[handle] = req; + + return handle; +} + + +void AsyncCommunication::waitForRead(int handle) +{ + AsyncRequest* req = itsReadHandleMap[handle]; + MPI_Status status; + + int res = MPI_Wait(&req->mpiReq, &status); + if (res != MPI_SUCCESS) { + std::cerr << "MPI_Wait() failed" << std::endl; + exit(1); + } + + // done, now remove from map, and free req + itsReadHandleMap.erase(handle); + delete req; +} + +void AsyncCommunication::waitForWrite(int handle) +{ + AsyncRequest* req = itsWriteHandleMap[handle]; + MPI_Status status; + + int res = MPI_Wait(&req->mpiReq, &status); + if (res != MPI_SUCCESS) { + std::cerr << "MPI_Wait() failed" << std::endl; + exit(1); + } + + // done, now remove from map, and free req + itsWriteHandleMap.erase(handle); + delete req; +} + +// returns the handle of the read that was done. +int AsyncCommunication::waitForAnyRead(void*& buf, unsigned& size, unsigned& source, int& tag) +{ + MPI_Status status; + int count = itsReadHandleMap.size(); + MPI_Request reqs[count]; + int mapping[count]; + + int i = 0; + for (std::map<int, AsyncRequest*>::const_iterator it = itsReadHandleMap.begin(); it != itsReadHandleMap.end(); it++) { + int handle = it->first; + AsyncRequest* r = it->second; + reqs[i] = r->mpiReq; + mapping[i] = handle; + i++; + } + + NSTimer waitAnyTimer("MPI_Waitany", USE_TIMING); + waitAnyTimer.start(); + int index = -1; + int res = MPI_Waitany(count, reqs, &index, &status); + waitAnyTimer.stop(); + + if (res != MPI_SUCCESS) { + std::cerr << "MPI_Waitany() failed" << std::endl; + exit(1); + } + + int handle = mapping[index]; + AsyncRequest* req = itsReadHandleMap[handle]; + + buf = req->buf; + size = req->size; + source = req->rank; + tag = req->tag; + + itsReadHandleMap.erase(handle); + delete req; + return handle; +} + + +void AsyncCommunication::waitForAllReads() +{ + int count = itsReadHandleMap.size(); + MPI_Request reqs[count]; + MPI_Status status[count]; + + int i = 0; + for (std::map<int, AsyncRequest*>::const_iterator it = itsReadHandleMap.begin(); it != itsReadHandleMap.end(); it++) { + AsyncRequest* r = it->second; + reqs[i] = r->mpiReq; + i++; + } + + int res = MPI_Waitall(count, reqs, status); + if (res != MPI_SUCCESS) { + std::cerr << "MPI_Waitall() failed" << std::endl; + exit(1); + } + + for (std::map<int, AsyncRequest*>::const_iterator it = itsReadHandleMap.begin(); it != itsReadHandleMap.end(); it++) { + AsyncRequest* r = it->second; + delete r; + } + itsReadHandleMap.clear(); + itsCurrentReadHandle = 0; +} + + +void AsyncCommunication::waitForAllWrites() +{ + int count = itsWriteHandleMap.size(); + MPI_Request reqs[count]; + MPI_Status status[count]; + + int i = 0; + for (std::map<int, AsyncRequest*>::const_iterator it = itsWriteHandleMap.begin(); it != itsWriteHandleMap.end(); it++) { + AsyncRequest* r = it->second; + reqs[i] = r->mpiReq; + i++; + } + + int res = MPI_Waitall(count, reqs, status); + if (res != MPI_SUCCESS) { + std::cerr << "MPI_Waitall() failed" << std::endl; + exit(1); + } + + for (std::map<int, AsyncRequest*>::const_iterator it = itsWriteHandleMap.begin(); it != itsWriteHandleMap.end(); it++) { + AsyncRequest* r = it->second; + delete r; + } + itsWriteHandleMap.clear(); + itsCurrentWriteHandle = 0; +} + + +#endif // HAVE_MPI + +} // namespace RTCPs +} // namespace LOFAR diff --git a/RTCP/CNProc/src/AsyncCommunication.h b/RTCP/CNProc/src/AsyncCommunication.h new file mode 100644 index 0000000000000000000000000000000000000000..00da4443beb74005b49adb4b31bcdd260245cc9f --- /dev/null +++ b/RTCP/CNProc/src/AsyncCommunication.h @@ -0,0 +1,59 @@ +#ifndef LOFAR_CNPROC_ASYNC_COMMUNICATION_H +#define LOFAR_CNPROC_ASYNC_COMMUNICATION_H + +#if defined HAVE_MPI +#define MPICH_IGNORE_CXX_SEEK +#include <mpi.h> +#endif + +#include <map> + +namespace LOFAR { +namespace RTCP { + +#if defined HAVE_MPI + +class AsyncRequest { +public: + MPI_Request mpiReq; + void* buf; + unsigned size; + unsigned rank; + int tag; +}; + +class AsyncCommunication { + public: + AsyncCommunication(MPI_Comm communicator = MPI_COMM_WORLD); + ~AsyncCommunication(); + + // returns handle to this read + int asyncRead(void* buf, unsigned size, unsigned source, int tag); + + // returns handle to this write + int asyncWrite(const void* buf, unsigned size, unsigned dest, int tag); + + void waitForRead(int handle); + void waitForWrite(int handle); + + // returns the handle of the read that was done. + int waitForAnyRead(void*& buf, unsigned& size, unsigned& source, int& tag); + + void waitForAllReads(); + void waitForAllWrites(); + +private: + + MPI_Comm itsCommunicator; + int itsCurrentReadHandle; + int itsCurrentWriteHandle; + std::map<int, AsyncRequest*> itsReadHandleMap; + std::map<int, AsyncRequest*> itsWriteHandleMap; +}; + +#endif // defined HAVE_MPI + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/CNProc/src/AsyncTranspose.cc b/RTCP/CNProc/src/AsyncTranspose.cc new file mode 100644 index 0000000000000000000000000000000000000000..02b41e1ed3a0cea6355054ce36de51f572b35f56 --- /dev/null +++ b/RTCP/CNProc/src/AsyncTranspose.cc @@ -0,0 +1,123 @@ +//# Always #include <lofar_config.h> first! +#include <lofar_config.h> + +#include <AsyncTranspose.h> + +#include <Interface/CN_Mapping.h> +#include <Interface/PrintVector.h> + +#include <cassert> + + +namespace LOFAR { +namespace RTCP { + +#if defined HAVE_MPI + +#define MAX_TAG 100000 // The maximum tag we use to represent a data message. + // Higher tags are metadata. + +template <typename SAMPLE_TYPE> AsyncTranspose<SAMPLE_TYPE>::AsyncTranspose(bool isTransposeInput, bool isTransposeOutput, unsigned nrCoresPerPset, + const LocationInfo &locationInfo, + const std::vector<unsigned> &inputPsets, + const std::vector<unsigned> &outputPsets, unsigned nrSamplesToCNProc) +: + itsIsTransposeInput(isTransposeInput), + itsIsTransposeOutput(isTransposeOutput), + itsInputPsets(inputPsets), + itsOutputPsets(outputPsets), + itsLocationInfo(locationInfo) +{ + itsGroupNumber = -1; + for(unsigned core = 0; core < nrCoresPerPset; core++) { + unsigned rank = locationInfo.remapOnTree(locationInfo.psetNumber(), core); + if(rank == locationInfo.rank()) { + itsGroupNumber = core; + break; + } + } + + for(unsigned i=0; i<inputPsets.size(); i++) { + unsigned rank = locationInfo.remapOnTree(inputPsets[i], itsGroupNumber); + itsRankToPsetIndex[rank] = i; + } + + itsMessageSize = InputData<SAMPLE_TYPE>::requiredSize(1, nrSamplesToCNProc); + dataHandles.resize(inputPsets.size()); + metaDataHandles.resize(inputPsets.size()); + itsAsyncComm = new AsyncCommunication(); +} + + +template <typename SAMPLE_TYPE> AsyncTranspose<SAMPLE_TYPE>::~AsyncTranspose() +{ + delete itsAsyncComm; +} + + +template <typename SAMPLE_TYPE> void AsyncTranspose<SAMPLE_TYPE>::postAllReceives(TransposedData<SAMPLE_TYPE> *transposedData) +{ + for(unsigned i=0; i<itsInputPsets.size(); i++) { + void* buf = (void*) transposedData->samples[i].origin(); + unsigned pset = itsInputPsets[i]; + unsigned rank = itsLocationInfo.remapOnTree(pset, itsGroupNumber); // TODO cache this? maybe in locationInfo itself? + dataHandles[i] = itsAsyncComm->asyncRead(buf, itsMessageSize, rank, rank); + metaDataHandles[i] = itsAsyncComm->asyncRead(&transposedData->metaData[i], sizeof(SubbandMetaData), rank, rank + MAX_TAG); + } +} + + +// returns station number (= pset index) +template <typename SAMPLE_TYPE> unsigned AsyncTranspose<SAMPLE_TYPE>::waitForAnyReceive() +{ + void* buf; + unsigned size, source; + int tag; + + while(true) { + // This read could return either a data message, or a meta data message. + itsAsyncComm->waitForAnyRead(buf, size, source, tag); + + // source is the real rank, calc pset index + unsigned psetIndex = itsRankToPsetIndex[source]; + + if(tag < MAX_TAG) { // real data message + dataHandles[psetIndex] = -1; // record that we have received the data + if(metaDataHandles[psetIndex] == -1) { // We already have the metadata + return psetIndex; + } + } else { // metadata message + metaDataHandles[psetIndex] = -1; // record that we have received the metadata + if(dataHandles[psetIndex] == -1) { + return psetIndex; // We already have the data + } + } + } +} + + +template <typename SAMPLE_TYPE> void AsyncTranspose<SAMPLE_TYPE>::asyncSend(unsigned outputPsetNr, const InputData<SAMPLE_TYPE> *inputData) +{ + unsigned pset = itsOutputPsets[outputPsetNr]; + unsigned rank = itsLocationInfo.remapOnTree(pset, itsGroupNumber); + int tag = itsLocationInfo.rank(); + + itsAsyncComm->asyncWrite(inputData->samples[outputPsetNr].origin(), itsMessageSize, rank, tag); + itsAsyncComm->asyncWrite(&inputData->metaData[outputPsetNr], sizeof(SubbandMetaData), rank, tag + MAX_TAG); +} + + +template <typename SAMPLE_TYPE> void AsyncTranspose<SAMPLE_TYPE>::waitForAllSends() +{ + // this includes the metadata writes... + itsAsyncComm->waitForAllWrites(); +} + +template class AsyncTranspose<i4complex>; +template class AsyncTranspose<i8complex>; +template class AsyncTranspose<i16complex>; + +#endif // MPI + +} // namespace RTCP +} // namespace LOFAR diff --git a/RTCP/CNProc/src/AsyncTranspose.h b/RTCP/CNProc/src/AsyncTranspose.h new file mode 100644 index 0000000000000000000000000000000000000000..f32dd0e9e96ff684dc5f2e402f86e355fb873b56 --- /dev/null +++ b/RTCP/CNProc/src/AsyncTranspose.h @@ -0,0 +1,87 @@ +#ifndef LOFAR_CNPROC_ASYNC_TRANSPOSE_H +#define LOFAR_CNPROC_ASYNC_TRANSPOSE_H + +#include <AsyncCommunication.h> +#include <InputData.h> +#include <LocationInfo.h> +#include <TransposedData.h> +#include <Interface/SubbandMetaData.h> + +#if defined HAVE_MPI +#define MPICH_IGNORE_CXX_SEEK +#include <mpi.h> +#endif + +#if defined HAVE_BGL +#include <bglpersonality.h> +#endif + +#include <vector> + + +namespace LOFAR { +namespace RTCP { + +#if defined HAVE_MPI + +// Nodes in input psets read outputPsets.size subbands from their I/O node (one by one). +// Cores communicate with the same logical core number in another pset +// (due to an extra mapping, this is not the physical core number). + +// # sends = size outputPsets (= nrSubbands) on the input nodes. +// # recvs = size inputPsets (= nrStations) on the output nodes. +// Only the output nodes are actually calculating (filtering and correlating). + +template <typename SAMPLE_TYPE> class AsyncTranspose +{ + public: + AsyncTranspose(bool isTransposeInput, bool isTransposeOutput, unsigned nrCoresPerPset, const LocationInfo &, + const std::vector<unsigned> &inputPsets, const std::vector<unsigned> &outputPsets, unsigned nrSamplesToCNProc); + + ~AsyncTranspose(); + + // Post all async receives for the transpose. + void postAllReceives(TransposedData<SAMPLE_TYPE> *transposedData); + + // Wait for a data message. Returns the station number where the message originates. + unsigned waitForAnyReceive(); + + // Asynchronously send a subband. + void asyncSend(unsigned outputPsetNr, const InputData<SAMPLE_TYPE> *inputData); + + // Make sure all async sends have finished. + void waitForAllSends(); + + private: + + bool itsIsTransposeInput, itsIsTransposeOutput; + + // the size of a data message + unsigned itsMessageSize; + + // A mapping that tells us, if we receive a message from a source, + // to which pset that source belongs. + std::map<unsigned, unsigned> itsRankToPsetIndex; + + AsyncCommunication* itsAsyncComm; + const std::vector<unsigned> &itsInputPsets; + const std::vector<unsigned> &itsOutputPsets; + const LocationInfo &itsLocationInfo; + + // Two maps that contain the handles to the asynchronous reads. + // The maps are indexed by the inputPset index. + // The value is -1 if the read finished. + std::vector<int> dataHandles; + std::vector<int> metaDataHandles; + + // The number of the transpose group we belong to. + // The cores with the same index in a pset together form a group. + unsigned itsGroupNumber; +}; + +#endif // defined HAVE_MPI + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/CNProc/src/BandPass.cc b/RTCP/CNProc/src/BandPass.cc new file mode 100644 index 0000000000000000000000000000000000000000..abb20967866ed8db815bed5f0221c281356964c2 --- /dev/null +++ b/RTCP/CNProc/src/BandPass.cc @@ -0,0 +1,2152 @@ +//# Always #include <lofar_config.h> first! +#include <lofar_config.h> + +#include <BandPass.h> +#include <Interface/AlignedStdAllocator.h> +#include <Interface/Config.h> + +#include <vector> + +#if defined HAVE_FFTW3 +#include <fftw3.h> +#elif defined HAVE_FFTW2 +#include <fftw.h> +#else +#error Should have FFTW3 or FFTW2 installed +#endif + +#include <complex> // FIXME +#include <iostream> // FIXME + +typedef std::complex<float> fcomplex; + + +namespace LOFAR { +namespace RTCP { + + +const float BandPass::stationFilterConstants[65536] = +{ + 36, 36, 35, 35, 34, 33, 32, 31, + 29, 28, 26, 25, 23, 21, 20, 18, + 17, 15, 14, 12, 11, 10, 9, 9, + 8, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 8, 8, 8, 9, 9, 9, + 10, 10, 10, 10, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 15, 15, + 15, 15, 15, 15, 15, 15, 16, 16, + 16, 16, 16, 16, 16, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 19, 19, 19, 19, 19, 19, 19, 20, + 20, 20, 20, 20, 20, 20, 21, 21, + 21, 21, 21, 21, 21, 21, 22, 22, + 22, 22, 22, 22, 22, 22, 23, 23, + 23, 23, 23, 23, 23, 24, 24, 24, + 24, 24, 24, 25, 25, 25, 25, 25, + 25, 26, 26, 26, 26, 26, 26, 26, + 27, 27, 27, 27, 27, 27, 27, 28, + 28, 28, 28, 28, 28, 29, 29, 29, + 29, 29, 29, 30, 30, 30, 30, 30, + 31, 31, 31, 31, 31, 31, 32, 32, + 32, 32, 32, 32, 33, 33, 33, 33, + 33, 33, 34, 34, 34, 34, 34, 35, + 35, 35, 35, 35, 36, 36, 36, 36, + 36, 37, 37, 37, 37, 37, 37, 38, + 38, 38, 38, 38, 39, 39, 39, 39, + 39, 40, 40, 40, 40, 40, 41, 41, + 41, 41, 42, 42, 42, 42, 42, 43, + 43, 43, 43, 43, 44, 44, 44, 44, + 44, 45, 45, 45, 45, 46, 46, 46, + 46, 46, 47, 47, 47, 47, 48, 48, + 48, 48, 49, 49, 49, 49, 50, 50, + 50, 50, 50, 51, 51, 51, 51, 52, + 52, 52, 52, 53, 53, 53, 53, 54, + 54, 54, 54, 55, 55, 55, 55, 56, + 56, 56, 56, 57, 57, 57, 57, 58, + 58, 58, 58, 59, 59, 59, 59, 60, + 60, 60, 60, 61, 61, 61, 62, 62, + 62, 62, 63, 63, 63, 63, 64, 64, + 64, 65, 65, 65, 65, 66, 66, 66, + 67, 67, 67, 67, 68, 68, 68, 69, + 69, 69, 69, 70, 70, 70, 71, 71, + 71, 71, 72, 72, 72, 73, 73, 73, + 74, 74, 74, 74, 75, 75, 75, 76, + 76, 76, 77, 77, 77, 77, 78, 78, + 78, 79, 79, 79, 80, 80, 80, 81, + 81, 81, 82, 82, 82, 83, 83, 83, + 84, 84, 84, 85, 85, 85, 85, 86, + 86, 86, 87, 87, 87, 88, 88, 88, + 89, 89, 89, 90, 90, 90, 91, 91, + 92, 92, 92, 93, 93, 93, 94, 94, + 94, 95, 95, 95, 96, 96, 96, 97, + 97, 97, 98, 98, 99, 99, 99, 100, + 100, 100, 101, 101, 101, 102, 102, 103, + 103, 103, 104, 104, 104, 105, 105, 105, + 106, 106, 107, 107, 107, 108, 108, 108, + 109, 109, 110, 110, 110, 111, 111, 112, + 112, 112, 113, 113, 113, 114, 114, 115, + 115, 115, 116, 116, 117, 117, 117, 118, + 118, 119, 119, 119, 120, 120, 121, 121, + 121, 122, 122, 123, 123, 123, 124, 124, + 125, 125, 125, 126, 126, 127, 127, 127, + 128, 128, 129, 129, 130, 130, 130, 131, + 131, 132, 132, 132, 133, 133, 134, 134, + 135, 135, 135, 136, 136, 137, 137, 138, + 138, 138, 139, 139, 140, 140, 141, 141, + 141, 142, 142, 143, 143, 144, 144, 145, + 145, 145, 146, 146, 147, 147, 148, 148, + 149, 149, 149, 150, 150, 151, 151, 152, + 152, 153, 153, 153, 154, 154, 155, 155, + 156, 156, 157, 157, 158, 158, 159, 159, + 159, 160, 160, 161, 161, 162, 162, 163, + 163, 164, 164, 165, 165, 165, 166, 166, + 167, 167, 168, 168, 169, 169, 170, 170, + 171, 171, 172, 172, 173, 173, 174, 174, + 174, 175, 175, 176, 176, 177, 177, 178, + 178, 179, 179, 180, 180, 181, 181, 182, + 182, 183, 183, 184, 184, 185, 185, 186, + 186, 187, 187, 188, 188, 189, 189, 190, + 190, 191, 191, 192, 192, 193, 193, 194, + 194, 195, 195, 196, 196, 197, 197, 198, + 198, 199, 199, 200, 200, 201, 201, 202, + 202, 203, 203, 204, 204, 205, 205, 206, + 206, 207, 207, 208, 208, 209, 209, 210, + 210, 211, 211, 212, 212, 213, 213, 214, + 214, 215, 215, 216, 217, 217, 218, 218, + 219, 219, 220, 220, 221, 221, 222, 222, + 223, 223, 224, 224, 225, 225, 226, 226, + 227, 227, 228, 229, 229, 230, 230, 231, + 231, 232, 232, 233, 233, 234, 234, 235, + 235, 236, 236, 237, 237, 238, 239, 239, + 240, 240, 241, 241, 242, 242, 243, 243, + 244, 244, 245, 245, 246, 247, 247, 248, + 248, 249, 249, 250, 250, 251, 251, 252, + 252, 253, 253, 254, 255, 255, 256, 256, + 257, 257, 258, 258, 259, 259, 260, 260, + 261, 261, 262, 263, 263, 264, 264, 265, + 265, 266, 266, 267, 267, 268, 268, 269, + 270, 270, 271, 271, 272, 272, 273, 273, + 274, 274, 275, 275, 276, 277, 277, 278, + 278, 279, 279, 280, 280, 281, 281, 282, + 282, 283, 283, 284, 285, 285, 286, 286, + 287, 287, 288, 288, 289, 289, 290, 290, + 291, 291, 292, 293, 293, 294, 294, 295, + 295, 296, 296, 297, 297, 298, 298, 299, + 299, 300, 300, 301, 302, 302, 303, 303, + 304, 304, 305, 305, 306, 306, 307, 307, + 308, 308, 309, 309, 310, 311, 311, 312, + 312, 313, 313, 314, 314, 315, 315, 316, + 316, 317, 317, 318, 318, 319, 319, 320, + 320, 321, 321, 322, 322, 323, 323, 324, + 325, 325, 326, 326, 327, 327, 328, 328, + 329, 329, 330, 330, 331, 331, 332, 332, + 333, 333, 334, 334, 335, 335, 336, 336, + 337, 337, 338, 338, 339, 339, 340, 340, + 341, 341, 342, 342, 343, 343, 344, 344, + 345, 345, 346, 346, 347, 347, 348, 348, + 348, 349, 349, 350, 350, 351, 351, 352, + 352, 353, 353, 354, 354, 355, 355, 356, + 356, 357, 357, 358, 358, 359, 359, 359, + 360, 360, 361, 361, 362, 362, 363, 363, + 364, 364, 365, 365, 365, 366, 366, 367, + 367, 368, 368, 369, 369, 370, 370, 370, + 371, 371, 372, 372, 373, 373, 373, 374, + 374, 375, 375, 376, 376, 377, 377, 377, + 378, 378, 379, 379, 380, 380, 380, 381, + 381, 382, 382, 382, 383, 383, 384, 384, + 385, 385, 385, 386, 386, 387, 387, 387, + 388, 388, 389, 389, 389, 390, 390, 391, + 391, 391, 392, 392, 393, 393, 393, 394, + 394, 394, 395, 395, 396, 396, 396, 397, + 397, 397, 398, 398, 399, 399, 399, 400, + 400, 400, 401, 401, 401, 402, 402, 402, + 403, 403, 404, 404, 404, 405, 405, 405, + 406, 406, 406, 407, 407, 407, 408, 408, + 408, 409, 409, 409, 409, 410, 410, 410, + 411, 411, 411, 412, 412, 412, 413, 413, + 413, 414, 414, 414, 414, 415, 415, 415, + 416, 416, 416, 416, 417, 417, 417, 418, + 418, 418, 418, 419, 419, 419, 419, 420, + 420, 420, 420, 421, 421, 421, 421, 422, + 422, 422, 422, 423, 423, 423, 423, 424, + 424, 424, 424, 425, 425, 425, 425, 425, + 426, 426, 426, 426, 427, 427, 427, 427, + 427, 428, 428, 428, 428, 428, 429, 429, + 429, 429, 429, 429, 430, 430, 430, 430, + 430, 430, 431, 431, 431, 431, 431, 431, + 432, 432, 432, 432, 432, 432, 432, 433, + 433, 433, 433, 433, 433, 433, 434, 434, + 434, 434, 434, 434, 434, 434, 434, 435, + 435, 435, 435, 435, 435, 435, 435, 435, + 435, 436, 436, 436, 436, 436, 436, 436, + 436, 436, 436, 436, 436, 436, 436, 436, + 436, 437, 437, 437, 437, 437, 437, 437, + 437, 437, 437, 437, 437, 437, 437, 437, + 437, 437, 437, 437, 437, 437, 437, 437, + 437, 437, 437, 437, 437, 437, 437, 437, + 437, 437, 437, 437, 437, 437, 437, 437, + 436, 436, 436, 436, 436, 436, 436, 436, + 436, 436, 436, 436, 436, 436, 436, 435, + 435, 435, 435, 435, 435, 435, 435, 435, + 435, 434, 434, 434, 434, 434, 434, 434, + 434, 433, 433, 433, 433, 433, 433, 433, + 432, 432, 432, 432, 432, 432, 432, 431, + 431, 431, 431, 431, 430, 430, 430, 430, + 430, 429, 429, 429, 429, 429, 428, 428, + 428, 428, 428, 427, 427, 427, 427, 426, + 426, 426, 426, 425, 425, 425, 425, 424, + 424, 424, 424, 423, 423, 423, 423, 422, + 422, 422, 421, 421, 421, 420, 420, 420, + 420, 419, 419, 419, 418, 418, 418, 417, + 417, 417, 416, 416, 416, 415, 415, 415, + 414, 414, 413, 413, 413, 412, 412, 412, + 411, 411, 410, 410, 410, 409, 409, 409, + 408, 408, 407, 407, 406, 406, 406, 405, + 405, 404, 404, 403, 403, 403, 402, 402, + 401, 401, 400, 400, 399, 399, 398, 398, + 398, 397, 397, 396, 396, 395, 395, 394, + 394, 393, 393, 392, 392, 391, 391, 390, + 389, 389, 388, 388, 387, 387, 386, 386, + 385, 385, 384, 383, 383, 382, 382, 381, + 381, 380, 379, 379, 378, 378, 377, 376, + 376, 375, 375, 374, 373, 373, 372, 372, + 371, 370, 370, 369, 368, 368, 367, 367, + 366, 365, 365, 364, 363, 363, 362, 361, + 361, 360, 359, 358, 358, 357, 356, 356, + 355, 354, 354, 353, 352, 351, 351, 350, + 349, 349, 348, 347, 346, 346, 345, 344, + 343, 343, 342, 341, 340, 339, 339, 338, + 337, 336, 336, 335, 334, 333, 332, 332, + 331, 330, 329, 328, 328, 327, 326, 325, + 324, 323, 323, 322, 321, 320, 319, 318, + 318, 317, 316, 315, 314, 313, 312, 311, + 311, 310, 309, 308, 307, 306, 305, 304, + 303, 302, 302, 301, 300, 299, 298, 297, + 296, 295, 294, 293, 292, 291, 290, 289, + 288, 288, 287, 286, 285, 284, 283, 282, + 281, 280, 279, 278, 277, 276, 275, 274, + 273, 272, 271, 270, 269, 268, 267, 266, + 265, 264, 262, 261, 260, 259, 258, 257, + 256, 255, 254, 253, 252, 251, 250, 249, + 248, 247, 245, 244, 243, 242, 241, 240, + 239, 238, 237, 236, 234, 233, 232, 231, + 230, 229, 228, 227, 225, 224, 223, 222, + 221, 220, 218, 217, 216, 215, 214, 213, + 211, 210, 209, 208, 207, 206, 204, 203, + 202, 201, 200, 198, 197, 196, 195, 193, + 192, 191, 190, 189, 187, 186, 185, 184, + 182, 181, 180, 179, 177, 176, 175, 174, + 172, 171, 170, 168, 167, 166, 165, 163, + 162, 161, 159, 158, 157, 155, 154, 153, + 152, 150, 149, 148, 146, 145, 144, 142, + 141, 140, 138, 137, 136, 134, 133, 132, + 130, 129, 127, 126, 125, 123, 122, 121, + 119, 118, 116, 115, 114, 112, 111, 109, + 108, 107, 105, 104, 102, 101, 100, 98, + 97, 95, 94, 93, 91, 90, 88, 87, + 85, 84, 82, 81, 80, 78, 77, 75, + 74, 72, 71, 69, 68, 66, 65, 64, + 62, 61, 59, 58, 56, 55, 53, 52, + 50, 49, 47, 46, 44, 43, 41, 40, + 38, 37, 35, 34, 32, 30, 29, 27, + 26, 24, 23, 21, 20, 18, 17, 15, + 14, 12, 10, 9, 7, 6, 4, 3, + 1, 0, -2, -4, -5, -7, -8, -10, + -11, -13, -15, -16, -18, -19, -21, -23, + -24, -26, -27, -29, -31, -32, -34, -35, + -37, -39, -40, -42, -43, -45, -47, -48, + -50, -52, -53, -55, -57, -58, -60, -61, + -63, -65, -66, -68, -70, -71, -73, -75, + -76, -78, -80, -81, -83, -84, -86, -88, + -89, -91, -93, -94, -96, -98, -99, -101, + -103, -105, -106, -108, -110, -111, -113, -115, + -116, -118, -120, -121, -123, -125, -126, -128, + -130, -132, -133, -135, -137, -138, -140, -142, + -143, -145, -147, -149, -150, -152, -154, -155, + -157, -159, -161, -162, -164, -166, -167, -169, + -171, -173, -174, -176, -178, -180, -181, -183, + -185, -186, -188, -190, -192, -193, -195, -197, + -199, -200, -202, -204, -206, -207, -209, -211, + -213, -214, -216, -218, -219, -221, -223, -225, + -226, -228, -230, -232, -233, -235, -237, -239, + -240, -242, -244, -246, -247, -249, -251, -253, + -255, -256, -258, -260, -262, -263, -265, -267, + -269, -270, -272, -274, -276, -277, -279, -281, + -283, -284, -286, -288, -290, -291, -293, -295, + -297, -299, -300, -302, -304, -306, -307, -309, + -311, -313, -314, -316, -318, -320, -321, -323, + -325, -327, -328, -330, -332, -334, -336, -337, + -339, -341, -343, -344, -346, -348, -350, -351, + -353, -355, -357, -358, -360, -362, -364, -365, + -367, -369, -371, -372, -374, -376, -378, -379, + -381, -383, -385, -386, -388, -390, -392, -393, + -395, -397, -399, -400, -402, -404, -406, -407, + -409, -411, -413, -414, -416, -418, -419, -421, + -423, -425, -426, -428, -430, -432, -433, -435, + -437, -438, -440, -442, -444, -445, -447, -449, + -450, -452, -454, -456, -457, -459, -461, -462, + -464, -466, -468, -469, -471, -473, -474, -476, + -478, -479, -481, -483, -485, -486, -488, -490, + -491, -493, -495, -496, -498, -500, -501, -503, + -505, -506, -508, -510, -511, -513, -515, -516, + -518, -520, -521, -523, -525, -526, -528, -530, + -531, -533, -534, -536, -538, -539, -541, -543, + -544, -546, -547, -549, -551, -552, -554, -556, + -557, -559, -560, -562, -564, -565, -567, -568, + -570, -572, -573, -575, -576, -578, -579, -581, + -583, -584, -586, -587, -589, -590, -592, -594, + -595, -597, -598, -600, -601, -603, -604, -606, + -607, -609, -610, -612, -614, -615, -617, -618, + -620, -621, -623, -624, -626, -627, -629, -630, + -632, -633, -635, -636, -638, -639, -640, -642, + -643, -645, -646, -648, -649, -651, -652, -654, + -655, -656, -658, -659, -661, -662, -664, -665, + -666, -668, -669, -671, -672, -673, -675, -676, + -678, -679, -680, -682, -683, -685, -686, -687, + -689, -690, -691, -693, -694, -695, -697, -698, + -699, -701, -702, -703, -705, -706, -707, -709, + -710, -711, -713, -714, -715, -716, -718, -719, + -720, -722, -723, -724, -725, -727, -728, -729, + -730, -732, -733, -734, -735, -736, -738, -739, + -740, -741, -743, -744, -745, -746, -747, -748, + -750, -751, -752, -753, -754, -755, -757, -758, + -759, -760, -761, -762, -763, -765, -766, -767, + -768, -769, -770, -771, -772, -773, -774, -776, + -777, -778, -779, -780, -781, -782, -783, -784, + -785, -786, -787, -788, -789, -790, -791, -792, + -793, -794, -795, -796, -797, -798, -799, -800, + -801, -802, -803, -804, -805, -806, -807, -808, + -808, -809, -810, -811, -812, -813, -814, -815, + -816, -816, -817, -818, -819, -820, -821, -822, + -822, -823, -824, -825, -826, -827, -827, -828, + -829, -830, -831, -831, -832, -833, -834, -834, + -835, -836, -837, -837, -838, -839, -840, -840, + -841, -842, -842, -843, -844, -844, -845, -846, + -846, -847, -848, -848, -849, -850, -850, -851, + -852, -852, -853, -853, -854, -855, -855, -856, + -856, -857, -857, -858, -859, -859, -860, -860, + -861, -861, -862, -862, -863, -863, -864, -864, + -865, -865, -866, -866, -867, -867, -867, -868, + -868, -869, -869, -870, -870, -870, -871, -871, + -871, -872, -872, -873, -873, -873, -874, -874, + -874, -875, -875, -875, -876, -876, -876, -876, + -877, -877, -877, -878, -878, -878, -878, -879, + -879, -879, -879, -879, -880, -880, -880, -880, + -880, -881, -881, -881, -881, -881, -881, -881, + -882, -882, -882, -882, -882, -882, -882, -882, + -882, -882, -882, -882, -883, -883, -883, -883, + -883, -883, -883, -883, -883, -883, -883, -883, + -883, -883, -882, -882, -882, -882, -882, -882, + -882, -882, -882, -882, -882, -882, -881, -881, + -881, -881, -881, -881, -880, -880, -880, -880, + -880, -879, -879, -879, -879, -879, -878, -878, + -878, -878, -877, -877, -877, -876, -876, -876, + -876, -875, -875, -875, -874, -874, -874, -873, + -873, -872, -872, -872, -871, -871, -870, -870, + -870, -869, -869, -868, -868, -867, -867, -866, + -866, -865, -865, -864, -864, -863, -863, -862, + -862, -861, -861, -860, -860, -859, -858, -858, + -857, -857, -856, -855, -855, -854, -854, -853, + -852, -852, -851, -850, -850, -849, -848, -847, + -847, -846, -845, -845, -844, -843, -842, -842, + -841, -840, -839, -838, -838, -837, -836, -835, + -834, -833, -833, -832, -831, -830, -829, -828, + -827, -827, -826, -825, -824, -823, -822, -821, + -820, -819, -818, -817, -816, -815, -814, -813, + -812, -811, -810, -809, -808, -807, -806, -805, + -804, -803, -802, -801, -800, -798, -797, -796, + -795, -794, -793, -792, -791, -789, -788, -787, + -786, -785, -783, -782, -781, -780, -779, -777, + -776, -775, -774, -772, -771, -770, -769, -767, + -766, -765, -763, -762, -761, -759, -758, -757, + -755, -754, -753, -751, -750, -748, -747, -746, + -744, -743, -741, -740, -738, -737, -735, -734, + -733, -731, -730, -728, -727, -725, -724, -722, + -720, -719, -717, -716, -714, -713, -711, -710, + -708, -706, -705, -703, -702, -700, -698, -697, + -695, -693, -692, -690, -688, -687, -685, -683, + -682, -680, -678, -676, -675, -673, -671, -669, + -668, -666, -664, -662, -661, -659, -657, -655, + -653, -651, -650, -648, -646, -644, -642, -640, + -639, -637, -635, -633, -631, -629, -627, -625, + -623, -621, -619, -617, -615, -614, -612, -610, + -608, -606, -604, -602, -600, -598, -596, -594, + -592, -589, -587, -585, -583, -581, -579, -577, + -575, -573, -571, -569, -567, -564, -562, -560, + -558, -556, -554, -552, -549, -547, -545, -543, + -541, -538, -536, -534, -532, -530, -527, -525, + -523, -521, -518, -516, -514, -512, -509, -507, + -505, -502, -500, -498, -495, -493, -491, -488, + -486, -484, -481, -479, -477, -474, -472, -469, + -467, -465, -462, -460, -457, -455, -453, -450, + -448, -445, -443, -440, -438, -435, -433, -430, + -428, -425, -423, -420, -418, -415, -413, -410, + -408, -405, -403, -400, -398, -395, -392, -390, + -387, -385, -382, -380, -377, -374, -372, -369, + -366, -364, -361, -359, -356, -353, -351, -348, + -345, -343, -340, -337, -334, -332, -329, -326, + -324, -321, -318, -315, -313, -310, -307, -305, + -302, -299, -296, -293, -291, -288, -285, -282, + -280, -277, -274, -271, -268, -266, -263, -260, + -257, -254, -251, -248, -246, -243, -240, -237, + -234, -231, -228, -226, -223, -220, -217, -214, + -211, -208, -205, -202, -199, -196, -194, -191, + -188, -185, -182, -179, -176, -173, -170, -167, + -164, -161, -158, -155, -152, -149, -146, -143, + -140, -137, -134, -131, -128, -125, -122, -119, + -116, -113, -110, -107, -104, -100, -97, -94, + -91, -88, -85, -82, -79, -76, -73, -70, + -67, -63, -60, -57, -54, -51, -48, -45, + -42, -39, -35, -32, -29, -26, -23, -20, + -16, -13, -10, -7, -4, -1, 2, 6, + 9, 12, 15, 18, 22, 25, 28, 31, + 34, 38, 41, 44, 47, 50, 54, 57, + 60, 63, 67, 70, 73, 76, 79, 83, + 86, 89, 92, 96, 99, 102, 105, 109, + 112, 115, 118, 122, 125, 128, 132, 135, + 138, 141, 145, 148, 151, 154, 158, 161, + 164, 168, 171, 174, 178, 181, 184, 187, + 191, 194, 197, 201, 204, 207, 211, 214, + 217, 221, 224, 227, 230, 234, 237, 240, + 244, 247, 250, 254, 257, 260, 264, 267, + 270, 274, 277, 280, 284, 287, 290, 294, + 297, 300, 304, 307, 310, 314, 317, 320, + 324, 327, 330, 334, 337, 341, 344, 347, + 351, 354, 357, 361, 364, 367, 371, 374, + 377, 381, 384, 387, 391, 394, 397, 401, + 404, 407, 411, 414, 417, 421, 424, 427, + 431, 434, 437, 441, 444, 448, 451, 454, + 458, 461, 464, 468, 471, 474, 478, 481, + 484, 488, 491, 494, 498, 501, 504, 507, + 511, 514, 517, 521, 524, 527, 531, 534, + 537, 541, 544, 547, 551, 554, 557, 560, + 564, 567, 570, 574, 577, 580, 584, 587, + 590, 593, 597, 600, 603, 606, 610, 613, + 616, 620, 623, 626, 629, 633, 636, 639, + 642, 646, 649, 652, 655, 659, 662, 665, + 668, 671, 675, 678, 681, 684, 688, 691, + 694, 697, 700, 704, 707, 710, 713, 716, + 720, 723, 726, 729, 732, 735, 739, 742, + 745, 748, 751, 754, 758, 761, 764, 767, + 770, 773, 776, 779, 783, 786, 789, 792, + 795, 798, 801, 804, 807, 810, 813, 817, + 820, 823, 826, 829, 832, 835, 838, 841, + 844, 847, 850, 853, 856, 859, 862, 865, + 868, 871, 874, 877, 880, 883, 886, 889, + 892, 895, 898, 901, 904, 907, 910, 913, + 915, 918, 921, 924, 927, 930, 933, 936, + 939, 942, 944, 947, 950, 953, 956, 959, + 961, 964, 967, 970, 973, 976, 978, 981, + 984, 987, 990, 992, 995, 998, 1001, 1003, + 1006, 1009, 1012, 1014, 1017, 1020, 1022, 1025, + 1028, 1030, 1033, 1036, 1038, 1041, 1044, 1046, + 1049, 1052, 1054, 1057, 1060, 1062, 1065, 1067, + 1070, 1073, 1075, 1078, 1080, 1083, 1085, 1088, + 1090, 1093, 1095, 1098, 1100, 1103, 1105, 1108, + 1110, 1113, 1115, 1118, 1120, 1123, 1125, 1128, + 1130, 1132, 1135, 1137, 1140, 1142, 1144, 1147, + 1149, 1151, 1154, 1156, 1158, 1161, 1163, 1165, + 1168, 1170, 1172, 1174, 1177, 1179, 1181, 1183, + 1186, 1188, 1190, 1192, 1195, 1197, 1199, 1201, + 1203, 1205, 1208, 1210, 1212, 1214, 1216, 1218, + 1220, 1222, 1224, 1227, 1229, 1231, 1233, 1235, + 1237, 1239, 1241, 1243, 1245, 1247, 1249, 1251, + 1253, 1255, 1257, 1258, 1260, 1262, 1264, 1266, + 1268, 1270, 1272, 1274, 1275, 1277, 1279, 1281, + 1283, 1284, 1286, 1288, 1290, 1292, 1293, 1295, + 1297, 1299, 1300, 1302, 1304, 1305, 1307, 1309, + 1310, 1312, 1314, 1315, 1317, 1318, 1320, 1322, + 1323, 1325, 1326, 1328, 1329, 1331, 1332, 1334, + 1335, 1337, 1338, 1340, 1341, 1343, 1344, 1346, + 1347, 1348, 1350, 1351, 1353, 1354, 1355, 1357, + 1358, 1359, 1361, 1362, 1363, 1364, 1366, 1367, + 1368, 1369, 1371, 1372, 1373, 1374, 1375, 1377, + 1378, 1379, 1380, 1381, 1382, 1383, 1384, 1386, + 1387, 1388, 1389, 1390, 1391, 1392, 1393, 1394, + 1395, 1396, 1397, 1398, 1399, 1400, 1400, 1401, + 1402, 1403, 1404, 1405, 1406, 1406, 1407, 1408, + 1409, 1410, 1410, 1411, 1412, 1413, 1413, 1414, + 1415, 1416, 1416, 1417, 1418, 1418, 1419, 1420, + 1420, 1421, 1421, 1422, 1422, 1423, 1424, 1424, + 1425, 1425, 1426, 1426, 1427, 1427, 1427, 1428, + 1428, 1429, 1429, 1429, 1430, 1430, 1431, 1431, + 1431, 1432, 1432, 1432, 1432, 1433, 1433, 1433, + 1433, 1434, 1434, 1434, 1434, 1434, 1434, 1435, + 1435, 1435, 1435, 1435, 1435, 1435, 1435, 1435, + 1435, 1435, 1435, 1435, 1435, 1435, 1435, 1435, + 1435, 1435, 1435, 1435, 1435, 1434, 1434, 1434, + 1434, 1434, 1434, 1433, 1433, 1433, 1433, 1432, + 1432, 1432, 1432, 1431, 1431, 1431, 1430, 1430, + 1429, 1429, 1429, 1428, 1428, 1427, 1427, 1426, + 1426, 1425, 1425, 1424, 1424, 1423, 1423, 1422, + 1422, 1421, 1420, 1420, 1419, 1419, 1418, 1417, + 1416, 1416, 1415, 1414, 1414, 1413, 1412, 1411, + 1410, 1410, 1409, 1408, 1407, 1406, 1405, 1405, + 1404, 1403, 1402, 1401, 1400, 1399, 1398, 1397, + 1396, 1395, 1394, 1393, 1392, 1391, 1390, 1389, + 1387, 1386, 1385, 1384, 1383, 1382, 1380, 1379, + 1378, 1377, 1376, 1374, 1373, 1372, 1370, 1369, + 1368, 1367, 1365, 1364, 1362, 1361, 1360, 1358, + 1357, 1355, 1354, 1352, 1351, 1349, 1348, 1346, + 1345, 1343, 1342, 1340, 1339, 1337, 1335, 1334, + 1332, 1330, 1329, 1327, 1325, 1324, 1322, 1320, + 1319, 1317, 1315, 1313, 1311, 1310, 1308, 1306, + 1304, 1302, 1300, 1298, 1297, 1295, 1293, 1291, + 1289, 1287, 1285, 1283, 1281, 1279, 1277, 1275, + 1273, 1271, 1268, 1266, 1264, 1262, 1260, 1258, + 1256, 1253, 1251, 1249, 1247, 1245, 1242, 1240, + 1238, 1236, 1233, 1231, 1229, 1226, 1224, 1222, + 1219, 1217, 1214, 1212, 1210, 1207, 1205, 1202, + 1200, 1197, 1195, 1192, 1190, 1187, 1185, 1182, + 1179, 1177, 1174, 1172, 1169, 1166, 1164, 1161, + 1158, 1156, 1153, 1150, 1148, 1145, 1142, 1139, + 1136, 1134, 1131, 1128, 1125, 1122, 1119, 1117, + 1114, 1111, 1108, 1105, 1102, 1099, 1096, 1093, + 1090, 1087, 1084, 1081, 1078, 1075, 1072, 1069, + 1066, 1063, 1060, 1057, 1053, 1050, 1047, 1044, + 1041, 1038, 1034, 1031, 1028, 1025, 1021, 1018, + 1015, 1012, 1008, 1005, 1002, 998, 995, 992, + 988, 985, 981, 978, 975, 971, 968, 964, + 961, 957, 954, 950, 947, 943, 940, 936, + 933, 929, 926, 922, 918, 915, 911, 908, + 904, 900, 897, 893, 889, 885, 882, 878, + 874, 871, 867, 863, 859, 855, 852, 848, + 844, 840, 836, 832, 829, 825, 821, 817, + 813, 809, 805, 801, 797, 793, 789, 785, + 781, 777, 773, 769, 765, 761, 757, 753, + 749, 745, 741, 737, 732, 728, 724, 720, + 716, 712, 707, 703, 699, 695, 691, 686, + 682, 678, 674, 669, 665, 661, 656, 652, + 648, 643, 639, 635, 630, 626, 622, 617, + 613, 608, 604, 600, 595, 591, 586, 582, + 577, 573, 568, 564, 559, 555, 550, 546, + 541, 537, 532, 528, 523, 518, 514, 509, + 505, 500, 495, 491, 486, 481, 477, 472, + 467, 463, 458, 453, 448, 444, 439, 434, + 429, 425, 420, 415, 410, 406, 401, 396, + 391, 386, 382, 377, 372, 367, 362, 357, + 352, 347, 343, 338, 333, 328, 323, 318, + 313, 308, 303, 298, 293, 288, 283, 278, + 273, 268, 263, 258, 253, 248, 243, 238, + 233, 228, 223, 218, 213, 208, 203, 198, + 193, 187, 182, 177, 172, 167, 162, 157, + 152, 146, 141, 136, 131, 126, 121, 115, + 110, 105, 100, 95, 89, 84, 79, 74, + 68, 63, 58, 53, 47, 42, 37, 32, + 26, 21, 16, 10, 5, 0, -5, -11, + -16, -21, -27, -32, -37, -43, -48, -53, + -59, -64, -70, -75, -80, -86, -91, -96, + -102, -107, -113, -118, -123, -129, -134, -140, + -145, -150, -156, -161, -167, -172, -178, -183, + -188, -194, -199, -205, -210, -216, -221, -227, + -232, -238, -243, -248, -254, -259, -265, -270, + -276, -281, -287, -292, -298, -303, -309, -314, + -320, -325, -331, -336, -342, -347, -353, -358, + -364, -369, -375, -381, -386, -392, -397, -403, + -408, -414, -419, -425, -430, -436, -441, -447, + -452, -458, -464, -469, -475, -480, -486, -491, + -497, -502, -508, -513, -519, -524, -530, -536, + -541, -547, -552, -558, -563, -569, -574, -580, + -585, -591, -597, -602, -608, -613, -619, -624, + -630, -635, -641, -646, -652, -658, -663, -669, + -674, -680, -685, -691, -696, -702, -707, -713, + -718, -724, -729, -735, -740, -746, -751, -757, + -762, -768, -773, -779, -784, -790, -795, -801, + -806, -812, -817, -823, -828, -834, -839, -845, + -850, -856, -861, -867, -872, -877, -883, -888, + -894, -899, -905, -910, -916, -921, -926, -932, + -937, -943, -948, -953, -959, -964, -970, -975, + -980, -986, -991, -997, -1002, -1007, -1013, -1018, + -1023, -1029, -1034, -1039, -1045, -1050, -1055, -1061, + -1066, -1071, -1077, -1082, -1087, -1092, -1098, -1103, + -1108, -1114, -1119, -1124, -1129, -1135, -1140, -1145, + -1150, -1155, -1161, -1166, -1171, -1176, -1181, -1187, + -1192, -1197, -1202, -1207, -1212, -1218, -1223, -1228, + -1233, -1238, -1243, -1248, -1253, -1258, -1264, -1269, + -1274, -1279, -1284, -1289, -1294, -1299, -1304, -1309, + -1314, -1319, -1324, -1329, -1334, -1339, -1344, -1349, + -1354, -1359, -1364, -1369, -1374, -1378, -1383, -1388, + -1393, -1398, -1403, -1408, -1413, -1417, -1422, -1427, + -1432, -1437, -1441, -1446, -1451, -1456, -1461, -1465, + -1470, -1475, -1480, -1484, -1489, -1494, -1498, -1503, + -1508, -1512, -1517, -1522, -1526, -1531, -1535, -1540, + -1545, -1549, -1554, -1558, -1563, -1567, -1572, -1577, + -1581, -1586, -1590, -1594, -1599, -1603, -1608, -1612, + -1617, -1621, -1625, -1630, -1634, -1639, -1643, -1647, + -1652, -1656, -1660, -1665, -1669, -1673, -1677, -1682, + -1686, -1690, -1694, -1699, -1703, -1707, -1711, -1715, + -1719, -1724, -1728, -1732, -1736, -1740, -1744, -1748, + -1752, -1756, -1760, -1764, -1768, -1772, -1776, -1780, + -1784, -1788, -1792, -1796, -1800, -1804, -1807, -1811, + -1815, -1819, -1823, -1827, -1830, -1834, -1838, -1842, + -1845, -1849, -1853, -1857, -1860, -1864, -1868, -1871, + -1875, -1878, -1882, -1886, -1889, -1893, -1896, -1900, + -1903, -1907, -1910, -1914, -1917, -1921, -1924, -1927, + -1931, -1934, -1938, -1941, -1944, -1948, -1951, -1954, + -1957, -1961, -1964, -1967, -1970, -1974, -1977, -1980, + -1983, -1986, -1989, -1992, -1996, -1999, -2002, -2005, + -2008, -2011, -2014, -2017, -2020, -2023, -2026, -2029, + -2031, -2034, -2037, -2040, -2043, -2046, -2049, -2051, + -2054, -2057, -2060, -2062, -2065, -2068, -2070, -2073, + -2076, -2078, -2081, -2083, -2086, -2089, -2091, -2094, + -2096, -2099, -2101, -2104, -2106, -2108, -2111, -2113, + -2116, -2118, -2120, -2122, -2125, -2127, -2129, -2132, + -2134, -2136, -2138, -2140, -2142, -2145, -2147, -2149, + -2151, -2153, -2155, -2157, -2159, -2161, -2163, -2165, + -2167, -2169, -2171, -2173, -2174, -2176, -2178, -2180, + -2182, -2184, -2185, -2187, -2189, -2190, -2192, -2194, + -2195, -2197, -2199, -2200, -2202, -2203, -2205, -2206, + -2208, -2209, -2211, -2212, -2214, -2215, -2216, -2218, + -2219, -2220, -2222, -2223, -2224, -2225, -2227, -2228, + -2229, -2230, -2231, -2232, -2233, -2235, -2236, -2237, + -2238, -2239, -2240, -2241, -2242, -2242, -2243, -2244, + -2245, -2246, -2247, -2248, -2248, -2249, -2250, -2251, + -2251, -2252, -2253, -2253, -2254, -2254, -2255, -2256, + -2256, -2257, -2257, -2258, -2258, -2259, -2259, -2259, + -2260, -2260, -2260, -2261, -2261, -2261, -2261, -2262, + -2262, -2262, -2262, -2262, -2263, -2263, -2263, -2263, + -2263, -2263, -2263, -2263, -2263, -2263, -2263, -2263, + -2262, -2262, -2262, -2262, -2262, -2261, -2261, -2261, + -2261, -2260, -2260, -2260, -2259, -2259, -2258, -2258, + -2257, -2257, -2256, -2256, -2255, -2255, -2254, -2254, + -2253, -2252, -2252, -2251, -2250, -2249, -2249, -2248, + -2247, -2246, -2245, -2244, -2244, -2243, -2242, -2241, + -2240, -2239, -2238, -2237, -2236, -2234, -2233, -2232, + -2231, -2230, -2229, -2227, -2226, -2225, -2224, -2222, + -2221, -2220, -2218, -2217, -2215, -2214, -2212, -2211, + -2209, -2208, -2206, -2205, -2203, -2202, -2200, -2198, + -2197, -2195, -2193, -2191, -2190, -2188, -2186, -2184, + -2182, -2180, -2179, -2177, -2175, -2173, -2171, -2169, + -2167, -2165, -2162, -2160, -2158, -2156, -2154, -2152, + -2150, -2147, -2145, -2143, -2140, -2138, -2136, -2133, + -2131, -2129, -2126, -2124, -2121, -2119, -2116, -2114, + -2111, -2108, -2106, -2103, -2101, -2098, -2095, -2092, + -2090, -2087, -2084, -2081, -2079, -2076, -2073, -2070, + -2067, -2064, -2061, -2058, -2055, -2052, -2049, -2046, + -2043, -2040, -2037, -2033, -2030, -2027, -2024, -2021, + -2017, -2014, -2011, -2007, -2004, -2001, -1997, -1994, + -1990, -1987, -1983, -1980, -1976, -1973, -1969, -1966, + -1962, -1958, -1955, -1951, -1947, -1944, -1940, -1936, + -1932, -1928, -1925, -1921, -1917, -1913, -1909, -1905, + -1901, -1897, -1893, -1889, -1885, -1881, -1877, -1873, + -1869, -1864, -1860, -1856, -1852, -1848, -1843, -1839, + -1835, -1830, -1826, -1822, -1817, -1813, -1809, -1804, + -1800, -1795, -1791, -1786, -1782, -1777, -1772, -1768, + -1763, -1758, -1754, -1749, -1744, -1740, -1735, -1730, + -1725, -1720, -1716, -1711, -1706, -1701, -1696, -1691, + -1686, -1681, -1676, -1671, -1666, -1661, -1656, -1651, + -1646, -1640, -1635, -1630, -1625, -1620, -1614, -1609, + -1604, -1599, -1593, -1588, -1582, -1577, -1572, -1566, + -1561, -1555, -1550, -1544, -1539, -1533, -1528, -1522, + -1516, -1511, -1505, -1500, -1494, -1488, -1482, -1477, + -1471, -1465, -1459, -1454, -1448, -1442, -1436, -1430, + -1424, -1418, -1412, -1406, -1400, -1394, -1388, -1382, + -1376, -1370, -1364, -1358, -1352, -1346, -1339, -1333, + -1327, -1321, -1315, -1308, -1302, -1296, -1289, -1283, + -1277, -1270, -1264, -1258, -1251, -1245, -1238, -1232, + -1225, -1219, -1212, -1206, -1199, -1193, -1186, -1179, + -1173, -1166, -1159, -1153, -1146, -1139, -1133, -1126, + -1119, -1112, -1105, -1099, -1092, -1085, -1078, -1071, + -1064, -1057, -1050, -1044, -1037, -1030, -1023, -1016, + -1009, -1001, -994, -987, -980, -973, -966, -959, + -952, -945, -937, -930, -923, -916, -908, -901, + -894, -887, -879, -872, -865, -857, -850, -842, + -835, -828, -820, -813, -805, -798, -790, -783, + -775, -768, -760, -753, -745, -738, -730, -722, + -715, -707, -699, -692, -684, -676, -669, -661, + -653, -645, -638, -630, -622, -614, -606, -599, + -591, -583, -575, -567, -559, -551, -543, -536, + -528, -520, -512, -504, -496, -488, -480, -472, + -464, -456, -447, -439, -431, -423, -415, -407, + -399, -391, -382, -374, -366, -358, -350, -342, + -333, -325, -317, -309, -300, -292, -284, -275, + -267, -259, -250, -242, -234, -225, -217, -209, + -200, -192, -183, -175, -167, -158, -150, -141, + -133, -124, -116, -107, -99, -90, -82, -73, + -65, -56, -48, -39, -30, -22, -13, -5, + 4, 12, 21, 30, 38, 47, 56, 64, + 73, 82, 90, 99, 108, 116, 125, 134, + 143, 151, 160, 169, 178, 186, 195, 204, + 213, 221, 230, 239, 248, 257, 265, 274, + 283, 292, 301, 310, 318, 327, 336, 345, + 354, 363, 372, 381, 389, 398, 407, 416, + 425, 434, 443, 452, 461, 470, 479, 488, + 496, 505, 514, 523, 532, 541, 550, 559, + 568, 577, 586, 595, 604, 613, 622, 631, + 640, 649, 658, 667, 676, 685, 694, 703, + 712, 721, 730, 739, 748, 757, 766, 775, + 784, 793, 802, 811, 820, 829, 838, 847, + 856, 865, 874, 883, 892, 901, 910, 920, + 929, 938, 947, 956, 965, 974, 983, 992, + 1001, 1010, 1019, 1028, 1037, 1046, 1055, 1064, + 1073, 1082, 1091, 1100, 1109, 1118, 1127, 1136, + 1145, 1154, 1163, 1172, 1181, 1190, 1199, 1208, + 1217, 1226, 1235, 1244, 1253, 1262, 1271, 1280, + 1289, 1298, 1307, 1316, 1324, 1333, 1342, 1351, + 1360, 1369, 1378, 1387, 1396, 1405, 1414, 1423, + 1431, 1440, 1449, 1458, 1467, 1476, 1485, 1493, + 1502, 1511, 1520, 1529, 1538, 1546, 1555, 1564, + 1573, 1582, 1590, 1599, 1608, 1617, 1625, 1634, + 1643, 1652, 1660, 1669, 1678, 1687, 1695, 1704, + 1713, 1721, 1730, 1739, 1747, 1756, 1765, 1773, + 1782, 1790, 1799, 1808, 1816, 1825, 1833, 1842, + 1850, 1859, 1867, 1876, 1884, 1893, 1901, 1910, + 1918, 1927, 1935, 1944, 1952, 1961, 1969, 1977, + 1986, 1994, 2002, 2011, 2019, 2027, 2036, 2044, + 2052, 2061, 2069, 2077, 2085, 2094, 2102, 2110, + 2118, 2127, 2135, 2143, 2151, 2159, 2167, 2175, + 2183, 2192, 2200, 2208, 2216, 2224, 2232, 2240, + 2248, 2256, 2264, 2272, 2280, 2288, 2296, 2303, + 2311, 2319, 2327, 2335, 2343, 2351, 2358, 2366, + 2374, 2382, 2389, 2397, 2405, 2412, 2420, 2428, + 2435, 2443, 2451, 2458, 2466, 2473, 2481, 2489, + 2496, 2504, 2511, 2518, 2526, 2533, 2541, 2548, + 2556, 2563, 2570, 2578, 2585, 2592, 2599, 2607, + 2614, 2621, 2628, 2636, 2643, 2650, 2657, 2664, + 2671, 2678, 2685, 2692, 2700, 2707, 2714, 2720, + 2727, 2734, 2741, 2748, 2755, 2762, 2769, 2776, + 2782, 2789, 2796, 2803, 2809, 2816, 2823, 2829, + 2836, 2843, 2849, 2856, 2862, 2869, 2875, 2882, + 2888, 2895, 2901, 2908, 2914, 2920, 2927, 2933, + 2939, 2945, 2952, 2958, 2964, 2970, 2977, 2983, + 2989, 2995, 3001, 3007, 3013, 3019, 3025, 3031, + 3037, 3043, 3049, 3055, 3060, 3066, 3072, 3078, + 3084, 3089, 3095, 3101, 3106, 3112, 3118, 3123, + 3129, 3134, 3140, 3145, 3151, 3156, 3162, 3167, + 3172, 3178, 3183, 3188, 3193, 3199, 3204, 3209, + 3214, 3219, 3224, 3230, 3235, 3240, 3245, 3250, + 3255, 3260, 3264, 3269, 3274, 3279, 3284, 3289, + 3293, 3298, 3303, 3307, 3312, 3317, 3321, 3326, + 3330, 3335, 3339, 3344, 3348, 3352, 3357, 3361, + 3365, 3370, 3374, 3378, 3382, 3387, 3391, 3395, + 3399, 3403, 3407, 3411, 3415, 3419, 3423, 3427, + 3430, 3434, 3438, 3442, 3446, 3449, 3453, 3457, + 3460, 3464, 3467, 3471, 3474, 3478, 3481, 3485, + 3488, 3491, 3495, 3498, 3501, 3505, 3508, 3511, + 3514, 3517, 3520, 3523, 3526, 3529, 3532, 3535, + 3538, 3541, 3544, 3547, 3549, 3552, 3555, 3557, + 3560, 3563, 3565, 3568, 3570, 3573, 3575, 3578, + 3580, 3583, 3585, 3587, 3589, 3592, 3594, 3596, + 3598, 3600, 3602, 3604, 3606, 3608, 3610, 3612, + 3614, 3616, 3618, 3620, 3621, 3623, 3625, 3626, + 3628, 3630, 3631, 3633, 3634, 3635, 3637, 3638, + 3640, 3641, 3642, 3643, 3645, 3646, 3647, 3648, + 3649, 3650, 3651, 3652, 3653, 3654, 3655, 3656, + 3657, 3657, 3658, 3659, 3659, 3660, 3661, 3661, + 3662, 3662, 3663, 3663, 3664, 3664, 3664, 3665, + 3665, 3665, 3665, 3665, 3665, 3666, 3666, 3666, + 3666, 3666, 3665, 3665, 3665, 3665, 3665, 3664, + 3664, 3664, 3663, 3663, 3663, 3662, 3662, 3661, + 3660, 3660, 3659, 3658, 3658, 3657, 3656, 3655, + 3654, 3653, 3652, 3651, 3650, 3649, 3648, 3647, + 3646, 3645, 3644, 3642, 3641, 3640, 3638, 3637, + 3635, 3634, 3632, 3631, 3629, 3627, 3626, 3624, + 3622, 3621, 3619, 3617, 3615, 3613, 3611, 3609, + 3607, 3605, 3603, 3601, 3598, 3596, 3594, 3592, + 3589, 3587, 3585, 3582, 3580, 3577, 3575, 3572, + 3569, 3567, 3564, 3561, 3558, 3556, 3553, 3550, + 3547, 3544, 3541, 3538, 3535, 3532, 3528, 3525, + 3522, 3519, 3515, 3512, 3509, 3505, 3502, 3498, + 3495, 3491, 3488, 3484, 3480, 3477, 3473, 3469, + 3465, 3461, 3458, 3454, 3450, 3446, 3442, 3437, + 3433, 3429, 3425, 3421, 3416, 3412, 3408, 3403, + 3399, 3395, 3390, 3385, 3381, 3376, 3372, 3367, + 3362, 3357, 3353, 3348, 3343, 3338, 3333, 3328, + 3323, 3318, 3313, 3308, 3303, 3297, 3292, 3287, + 3282, 3276, 3271, 3265, 3260, 3254, 3249, 3243, + 3238, 3232, 3226, 3221, 3215, 3209, 3203, 3197, + 3191, 3185, 3179, 3173, 3167, 3161, 3155, 3149, + 3143, 3136, 3130, 3124, 3117, 3111, 3105, 3098, + 3092, 3085, 3079, 3072, 3065, 3059, 3052, 3045, + 3038, 3031, 3025, 3018, 3011, 3004, 2997, 2990, + 2983, 2975, 2968, 2961, 2954, 2947, 2939, 2932, + 2925, 2917, 2910, 2902, 2895, 2887, 2879, 2872, + 2864, 2856, 2849, 2841, 2833, 2825, 2817, 2809, + 2801, 2793, 2785, 2777, 2769, 2761, 2753, 2745, + 2737, 2728, 2720, 2712, 2703, 2695, 2686, 2678, + 2669, 2661, 2652, 2644, 2635, 2626, 2617, 2609, + 2600, 2591, 2582, 2573, 2564, 2555, 2546, 2537, + 2528, 2519, 2510, 2501, 2491, 2482, 2473, 2464, + 2454, 2445, 2435, 2426, 2417, 2407, 2397, 2388, + 2378, 2369, 2359, 2349, 2339, 2330, 2320, 2310, + 2300, 2290, 2280, 2270, 2260, 2250, 2240, 2230, + 2220, 2209, 2199, 2189, 2179, 2168, 2158, 2148, + 2137, 2127, 2116, 2106, 2095, 2085, 2074, 2063, + 2053, 2042, 2031, 2021, 2010, 1999, 1988, 1977, + 1966, 1955, 1944, 1933, 1922, 1911, 1900, 1889, + 1878, 1867, 1855, 1844, 1833, 1821, 1810, 1799, + 1787, 1776, 1764, 1753, 1741, 1730, 1718, 1707, + 1695, 1683, 1671, 1660, 1648, 1636, 1624, 1612, + 1601, 1589, 1577, 1565, 1553, 1541, 1529, 1517, + 1504, 1492, 1480, 1468, 1456, 1443, 1431, 1419, + 1407, 1394, 1382, 1369, 1357, 1344, 1332, 1319, + 1307, 1294, 1282, 1269, 1256, 1244, 1231, 1218, + 1205, 1193, 1180, 1167, 1154, 1141, 1128, 1115, + 1102, 1089, 1076, 1063, 1050, 1037, 1024, 1011, + 998, 984, 971, 958, 945, 931, 918, 905, + 891, 878, 864, 851, 838, 824, 811, 797, + 783, 770, 756, 743, 729, 715, 702, 688, + 674, 660, 647, 633, 619, 605, 591, 577, + 563, 549, 535, 521, 507, 493, 479, 465, + 451, 437, 423, 409, 395, 380, 366, 352, + 338, 323, 309, 295, 280, 266, 252, 237, + 223, 209, 194, 180, 165, 151, 136, 122, + 107, 92, 78, 63, 49, 34, 19, 5, + -10, -25, -40, -54, -69, -84, -99, -114, + -128, -143, -158, -173, -188, -203, -218, -233, + -248, -263, -278, -293, -308, -323, -338, -353, + -368, -383, -398, -413, -428, -443, -459, -474, + -489, -504, -519, -535, -550, -565, -580, -596, + -611, -626, -642, -657, -672, -688, -703, -718, + -734, -749, -765, -780, -795, -811, -826, -842, + -857, -873, -888, -904, -919, -935, -950, -966, + -981, -997, -1013, -1028, -1044, -1059, -1075, -1091, + -1106, -1122, -1137, -1153, -1169, -1184, -1200, -1216, + -1231, -1247, -1263, -1278, -1294, -1310, -1326, -1341, + -1357, -1373, -1389, -1404, -1420, -1436, -1452, -1467, + -1483, -1499, -1515, -1531, -1546, -1562, -1578, -1594, + -1610, -1625, -1641, -1657, -1673, -1689, -1705, -1720, + -1736, -1752, -1768, -1784, -1800, -1815, -1831, -1847, + -1863, -1879, -1895, -1911, -1926, -1942, -1958, -1974, + -1990, -2006, -2022, -2037, -2053, -2069, -2085, -2101, + -2117, -2133, -2148, -2164, -2180, -2196, -2212, -2228, + -2244, -2259, -2275, -2291, -2307, -2323, -2339, -2354, + -2370, -2386, -2402, -2418, -2433, -2449, -2465, -2481, + -2497, -2512, -2528, -2544, -2560, -2576, -2591, -2607, + -2623, -2639, -2654, -2670, -2686, -2701, -2717, -2733, + -2749, -2764, -2780, -2796, -2811, -2827, -2843, -2858, + -2874, -2890, -2905, -2921, -2936, -2952, -2968, -2983, + -2999, -3014, -3030, -3045, -3061, -3076, -3092, -3107, + -3123, -3138, -3154, -3169, -3185, -3200, -3216, -3231, + -3246, -3262, -3277, -3293, -3308, -3323, -3339, -3354, + -3369, -3385, -3400, -3415, -3430, -3446, -3461, -3476, + -3491, -3506, -3521, -3537, -3552, -3567, -3582, -3597, + -3612, -3627, -3642, -3657, -3672, -3687, -3702, -3717, + -3732, -3747, -3762, -3777, -3792, -3807, -3821, -3836, + -3851, -3866, -3881, -3895, -3910, -3925, -3939, -3954, + -3969, -3983, -3998, -4012, -4027, -4042, -4056, -4071, + -4085, -4100, -4114, -4128, -4143, -4157, -4172, -4186, + -4200, -4215, -4229, -4243, -4257, -4271, -4286, -4300, + -4314, -4328, -4342, -4356, -4370, -4384, -4398, -4412, + -4426, -4440, -4454, -4468, -4482, -4495, -4509, -4523, + -4537, -4550, -4564, -4578, -4591, -4605, -4619, -4632, + -4646, -4659, -4673, -4686, -4699, -4713, -4726, -4740, + -4753, -4766, -4779, -4793, -4806, -4819, -4832, -4845, + -4858, -4871, -4884, -4897, -4910, -4923, -4936, -4949, + -4962, -4974, -4987, -5000, -5013, -5025, -5038, -5051, + -5063, -5076, -5088, -5101, -5113, -5125, -5138, -5150, + -5162, -5175, -5187, -5199, -5211, -5224, -5236, -5248, + -5260, -5272, -5284, -5296, -5308, -5319, -5331, -5343, + -5355, -5366, -5378, -5390, -5401, -5413, -5424, -5436, + -5447, -5459, -5470, -5482, -5493, -5504, -5515, -5527, + -5538, -5549, -5560, -5571, -5582, -5593, -5604, -5615, + -5626, -5636, -5647, -5658, -5668, -5679, -5690, -5700, + -5711, -5721, -5732, -5742, -5752, -5763, -5773, -5783, + -5793, -5804, -5814, -5824, -5834, -5844, -5854, -5863, + -5873, -5883, -5893, -5903, -5912, -5922, -5931, -5941, + -5950, -5960, -5969, -5979, -5988, -5997, -6006, -6016, + -6025, -6034, -6043, -6052, -6061, -6070, -6078, -6087, + -6096, -6105, -6113, -6122, -6131, -6139, -6148, -6156, + -6164, -6173, -6181, -6189, -6197, -6205, -6214, -6222, + -6230, -6238, -6245, -6253, -6261, -6269, -6276, -6284, + -6292, -6299, -6307, -6314, -6321, -6329, -6336, -6343, + -6350, -6358, -6365, -6372, -6379, -6386, -6392, -6399, + -6406, -6413, -6419, -6426, -6433, -6439, -6445, -6452, + -6458, -6464, -6471, -6477, -6483, -6489, -6495, -6501, + -6507, -6513, -6519, -6524, -6530, -6536, -6541, -6547, + -6552, -6558, -6563, -6568, -6573, -6579, -6584, -6589, + -6594, -6599, -6604, -6608, -6613, -6618, -6623, -6627, + -6632, -6636, -6641, -6645, -6649, -6654, -6658, -6662, + -6666, -6670, -6674, -6678, -6682, -6685, -6689, -6693, + -6696, -6700, -6703, -6707, -6710, -6714, -6717, -6720, + -6723, -6726, -6729, -6732, -6735, -6738, -6741, -6743, + -6746, -6748, -6751, -6753, -6756, -6758, -6760, -6763, + -6765, -6767, -6769, -6771, -6773, -6774, -6776, -6778, + -6779, -6781, -6783, -6784, -6785, -6787, -6788, -6789, + -6790, -6791, -6792, -6793, -6794, -6795, -6795, -6796, + -6797, -6797, -6798, -6798, -6798, -6799, -6799, -6799, + -6799, -6799, -6799, -6799, -6799, -6798, -6798, -6798, + -6797, -6797, -6796, -6795, -6795, -6794, -6793, -6792, + -6791, -6790, -6789, -6788, -6786, -6785, -6784, -6782, + -6781, -6779, -6777, -6776, -6774, -6772, -6770, -6768, + -6766, -6764, -6761, -6759, -6757, -6754, -6752, -6749, + -6747, -6744, -6741, -6738, -6735, -6732, -6729, -6726, + -6723, -6719, -6716, -6713, -6709, -6706, -6702, -6698, + -6694, -6691, -6687, -6683, -6679, -6674, -6670, -6666, + -6662, -6657, -6653, -6648, -6643, -6639, -6634, -6629, + -6624, -6619, -6614, -6609, -6604, -6598, -6593, -6588, + -6582, -6576, -6571, -6565, -6559, -6553, -6547, -6541, + -6535, -6529, -6523, -6517, -6510, -6504, -6497, -6491, + -6484, -6477, -6470, -6463, -6456, -6449, -6442, -6435, + -6428, -6420, -6413, -6405, -6398, -6390, -6382, -6374, + -6367, -6359, -6351, -6343, -6334, -6326, -6318, -6309, + -6301, -6292, -6284, -6275, -6266, -6257, -6248, -6239, + -6230, -6221, -6212, -6202, -6193, -6184, -6174, -6164, + -6155, -6145, -6135, -6125, -6115, -6105, -6095, -6085, + -6074, -6064, -6054, -6043, -6032, -6022, -6011, -6000, + -5989, -5978, -5967, -5956, -5945, -5933, -5922, -5911, + -5899, -5887, -5876, -5864, -5852, -5840, -5828, -5816, + -5804, -5792, -5779, -5767, -5755, -5742, -5729, -5717, + -5704, -5691, -5678, -5665, -5652, -5639, -5626, -5612, + -5599, -5585, -5572, -5558, -5545, -5531, -5517, -5503, + -5489, -5475, -5461, -5447, -5432, -5418, -5403, -5389, + -5374, -5359, -5345, -5330, -5315, -5300, -5285, -5269, + -5254, -5239, -5223, -5208, -5192, -5177, -5161, -5145, + -5129, -5113, -5097, -5081, -5065, -5049, -5032, -5016, + -4999, -4983, -4966, -4950, -4933, -4916, -4899, -4882, + -4865, -4847, -4830, -4813, -4795, -4778, -4760, -4743, + -4725, -4707, -4689, -4671, -4653, -4635, -4617, -4598, + -4580, -4562, -4543, -4524, -4506, -4487, -4468, -4449, + -4430, -4411, -4392, -4373, -4354, -4334, -4315, -4295, + -4276, -4256, -4236, -4216, -4196, -4176, -4156, -4136, + -4116, -4096, -4075, -4055, -4034, -4014, -3993, -3972, + -3951, -3930, -3909, -3888, -3867, -3846, -3825, -3803, + -3782, -3760, -3739, -3717, -3695, -3673, -3651, -3629, + -3607, -3585, -3563, -3541, -3518, -3496, -3473, -3451, + -3428, -3405, -3383, -3360, -3337, -3314, -3290, -3267, + -3244, -3221, -3197, -3174, -3150, -3126, -3103, -3079, + -3055, -3031, -3007, -2983, -2959, -2934, -2910, -2886, + -2861, -2837, -2812, -2787, -2763, -2738, -2713, -2688, + -2663, -2638, -2612, -2587, -2562, -2536, -2511, -2485, + -2459, -2434, -2408, -2382, -2356, -2330, -2304, -2278, + -2251, -2225, -2199, -2172, -2146, -2119, -2092, -2066, + -2039, -2012, -1985, -1958, -1931, -1904, -1876, -1849, + -1822, -1794, -1767, -1739, -1711, -1683, -1656, -1628, + -1600, -1572, -1544, -1515, -1487, -1459, -1430, -1402, + -1373, -1345, -1316, -1287, -1258, -1230, -1201, -1172, + -1142, -1113, -1084, -1055, -1025, -996, -966, -937, + -907, -877, -848, -818, -788, -758, -728, -698, + -667, -637, -607, -576, -546, -515, -485, -454, + -423, -392, -362, -331, -300, -268, -237, -206, + -175, -143, -112, -81, -49, -17, 14, 46, + 78, 110, 142, 174, 206, 238, 270, 302, + 335, 367, 399, 432, 465, 497, 530, 563, + 596, 628, 661, 694, 728, 761, 794, 827, + 861, 894, 927, 961, 995, 1028, 1062, 1096, + 1130, 1163, 1197, 1231, 1266, 1300, 1334, 1368, + 1402, 1437, 1471, 1506, 1540, 1575, 1610, 1644, + 1679, 1714, 1749, 1784, 1819, 1854, 1889, 1925, + 1960, 1995, 2031, 2066, 2101, 2137, 2173, 2208, + 2244, 2280, 2316, 2352, 2388, 2424, 2460, 2496, + 2532, 2568, 2604, 2641, 2677, 2714, 2750, 2787, + 2823, 2860, 2897, 2934, 2970, 3007, 3044, 3081, + 3118, 3155, 3193, 3230, 3267, 3304, 3342, 3379, + 3417, 3454, 3492, 3529, 3567, 3605, 3643, 3681, + 3718, 3756, 3794, 3832, 3870, 3909, 3947, 3985, + 4023, 4062, 4100, 4138, 4177, 4216, 4254, 4293, + 4331, 4370, 4409, 4448, 4487, 4526, 4564, 4603, + 4643, 4682, 4721, 4760, 4799, 4839, 4878, 4917, + 4957, 4996, 5036, 5075, 5115, 5154, 5194, 5234, + 5274, 5313, 5353, 5393, 5433, 5473, 5513, 5553, + 5593, 5634, 5674, 5714, 5754, 5795, 5835, 5875, + 5916, 5956, 5997, 6037, 6078, 6119, 6159, 6200, + 6241, 6282, 6323, 6364, 6404, 6445, 6486, 6527, + 6569, 6610, 6651, 6692, 6733, 6774, 6816, 6857, + 6899, 6940, 6981, 7023, 7064, 7106, 7148, 7189, + 7231, 7272, 7314, 7356, 7398, 7440, 7481, 7523, + 7565, 7607, 7649, 7691, 7733, 7775, 7818, 7860, + 7902, 7944, 7986, 8029, 8071, 8113, 8156, 8198, + 8240, 8283, 8325, 8368, 8410, 8453, 8496, 8538, + 8581, 8624, 8666, 8709, 8752, 8795, 8837, 8880, + 8923, 8966, 9009, 9052, 9095, 9138, 9181, 9224, + 9267, 9310, 9353, 9396, 9440, 9483, 9526, 9569, + 9613, 9656, 9699, 9743, 9786, 9829, 9873, 9916, + 9960, 10003, 10046, 10090, 10133, 10177, 10221, 10264, + 10308, 10351, 10395, 10439, 10482, 10526, 10570, 10614, + 10657, 10701, 10745, 10789, 10832, 10876, 10920, 10964, + 11008, 11052, 11096, 11140, 11184, 11228, 11272, 11316, + 11360, 11404, 11448, 11492, 11536, 11580, 11624, 11668, + 11712, 11756, 11800, 11844, 11889, 11933, 11977, 12021, + 12065, 12110, 12154, 12198, 12242, 12286, 12331, 12375, + 12419, 12464, 12508, 12552, 12596, 12641, 12685, 12729, + 12774, 12818, 12862, 12907, 12951, 12995, 13040, 13084, + 13129, 13173, 13217, 13262, 13306, 13351, 13395, 13439, + 13484, 13528, 13573, 13617, 13661, 13706, 13750, 13795, + 13839, 13883, 13928, 13972, 14017, 14061, 14106, 14150, + 14194, 14239, 14283, 14328, 14372, 14417, 14461, 14505, + 14550, 14594, 14639, 14683, 14727, 14772, 14816, 14861, + 14905, 14949, 14994, 15038, 15082, 15127, 15171, 15215, + 15260, 15304, 15348, 15393, 15437, 15481, 15526, 15570, + 15614, 15658, 15703, 15747, 15791, 15835, 15880, 15924, + 15968, 16012, 16056, 16100, 16145, 16189, 16233, 16277, + 16321, 16365, 16409, 16453, 16497, 16541, 16585, 16629, + 16673, 16717, 16761, 16805, 16849, 16893, 16937, 16981, + 17025, 17069, 17113, 17156, 17200, 17244, 17288, 17332, + 17375, 17419, 17463, 17506, 17550, 17594, 17637, 17681, + 17725, 17768, 17812, 17855, 17899, 17942, 17986, 18029, + 18072, 18116, 18159, 18203, 18246, 18289, 18332, 18376, + 18419, 18462, 18505, 18549, 18592, 18635, 18678, 18721, + 18764, 18807, 18850, 18893, 18936, 18979, 19022, 19064, + 19107, 19150, 19193, 19236, 19278, 19321, 19364, 19406, + 19449, 19491, 19534, 19576, 19619, 19661, 19704, 19746, + 19788, 19831, 19873, 19915, 19957, 20000, 20042, 20084, + 20126, 20168, 20210, 20252, 20294, 20336, 20378, 20420, + 20461, 20503, 20545, 20587, 20628, 20670, 20712, 20753, + 20795, 20836, 20878, 20919, 20960, 21002, 21043, 21084, + 21125, 21167, 21208, 21249, 21290, 21331, 21372, 21413, + 21454, 21495, 21535, 21576, 21617, 21658, 21698, 21739, + 21779, 21820, 21860, 21901, 21941, 21982, 22022, 22062, + 22102, 22142, 22183, 22223, 22263, 22303, 22343, 22383, + 22422, 22462, 22502, 22542, 22581, 22621, 22660, 22700, + 22739, 22779, 22818, 22858, 22897, 22936, 22975, 23014, + 23053, 23092, 23131, 23170, 23209, 23248, 23287, 23326, + 23364, 23403, 23441, 23480, 23518, 23557, 23595, 23633, + 23672, 23710, 23748, 23786, 23824, 23862, 23900, 23938, + 23976, 24013, 24051, 24089, 24126, 24164, 24201, 24239, + 24276, 24313, 24351, 24388, 24425, 24462, 24499, 24536, + 24573, 24610, 24647, 24683, 24720, 24756, 24793, 24830, + 24866, 24902, 24939, 24975, 25011, 25047, 25083, 25119, + 25155, 25191, 25227, 25262, 25298, 25334, 25369, 25405, + 25440, 25476, 25511, 25546, 25581, 25616, 25651, 25686, + 25721, 25756, 25791, 25826, 25860, 25895, 25929, 25964, + 25998, 26033, 26067, 26101, 26135, 26169, 26203, 26237, + 26271, 26305, 26338, 26372, 26405, 26439, 26472, 26506, + 26539, 26572, 26605, 26638, 26671, 26704, 26737, 26770, + 26803, 26835, 26868, 26900, 26933, 26965, 26997, 27030, + 27062, 27094, 27126, 27158, 27189, 27221, 27253, 27285, + 27316, 27348, 27379, 27410, 27441, 27473, 27504, 27535, + 27566, 27597, 27627, 27658, 27689, 27719, 27750, 27780, + 27810, 27841, 27871, 27901, 27931, 27961, 27991, 28020, + 28050, 28080, 28109, 28139, 28168, 28197, 28227, 28256, + 28285, 28314, 28343, 28371, 28400, 28429, 28457, 28486, + 28514, 28543, 28571, 28599, 28627, 28655, 28683, 28711, + 28738, 28766, 28794, 28821, 28849, 28876, 28903, 28930, + 28957, 28984, 29011, 29038, 29065, 29092, 29118, 29145, + 29171, 29197, 29223, 29250, 29276, 29302, 29327, 29353, + 29379, 29405, 29430, 29456, 29481, 29506, 29531, 29556, + 29581, 29606, 29631, 29656, 29681, 29705, 29730, 29754, + 29778, 29802, 29827, 29851, 29874, 29898, 29922, 29946, + 29969, 29993, 30016, 30039, 30063, 30086, 30109, 30132, + 30155, 30177, 30200, 30223, 30245, 30267, 30290, 30312, + 30334, 30356, 30378, 30400, 30422, 30443, 30465, 30486, + 30508, 30529, 30550, 30571, 30592, 30613, 30634, 30654, + 30675, 30696, 30716, 30736, 30756, 30777, 30797, 30817, + 30836, 30856, 30876, 30895, 30915, 30934, 30953, 30973, + 30992, 31011, 31030, 31048, 31067, 31086, 31104, 31123, + 31141, 31159, 31177, 31195, 31213, 31231, 31249, 31266, + 31284, 31301, 31318, 31336, 31353, 31370, 31387, 31404, + 31420, 31437, 31453, 31470, 31486, 31502, 31519, 31535, + 31551, 31566, 31582, 31598, 31613, 31629, 31644, 31659, + 31674, 31689, 31704, 31719, 31734, 31749, 31763, 31777, + 31792, 31806, 31820, 31834, 31848, 31862, 31876, 31889, + 31903, 31916, 31929, 31943, 31956, 31969, 31982, 31994, + 32007, 32020, 32032, 32045, 32057, 32069, 32081, 32093, + 32105, 32117, 32128, 32140, 32151, 32163, 32174, 32185, + 32196, 32207, 32218, 32229, 32239, 32250, 32260, 32270, + 32281, 32291, 32301, 32311, 32320, 32330, 32340, 32349, + 32358, 32368, 32377, 32386, 32395, 32404, 32412, 32421, + 32430, 32438, 32446, 32455, 32463, 32471, 32479, 32486, + 32494, 32502, 32509, 32516, 32524, 32531, 32538, 32545, + 32552, 32558, 32565, 32572, 32578, 32584, 32590, 32597, + 32603, 32608, 32614, 32620, 32625, 32631, 32636, 32642, + 32647, 32652, 32657, 32661, 32666, 32671, 32675, 32680, + 32684, 32688, 32692, 32696, 32700, 32704, 32707, 32711, + 32714, 32718, 32721, 32724, 32727, 32730, 32733, 32735, + 32738, 32740, 32743, 32745, 32747, 32749, 32751, 32753, + 32755, 32756, 32758, 32759, 32760, 32762, 32763, 32764, + 32764, 32765, 32766, 32766, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32766, 32766, 32765, 32764, + 32764, 32763, 32762, 32760, 32759, 32758, 32756, 32755, + 32753, 32751, 32749, 32747, 32745, 32743, 32740, 32738, + 32735, 32733, 32730, 32727, 32724, 32721, 32718, 32714, + 32711, 32707, 32704, 32700, 32696, 32692, 32688, 32684, + 32680, 32675, 32671, 32666, 32661, 32657, 32652, 32647, + 32642, 32636, 32631, 32625, 32620, 32614, 32608, 32603, + 32597, 32590, 32584, 32578, 32572, 32565, 32558, 32552, + 32545, 32538, 32531, 32524, 32516, 32509, 32502, 32494, + 32486, 32479, 32471, 32463, 32455, 32446, 32438, 32430, + 32421, 32412, 32404, 32395, 32386, 32377, 32368, 32358, + 32349, 32340, 32330, 32320, 32311, 32301, 32291, 32281, + 32270, 32260, 32250, 32239, 32229, 32218, 32207, 32196, + 32185, 32174, 32163, 32151, 32140, 32128, 32117, 32105, + 32093, 32081, 32069, 32057, 32045, 32032, 32020, 32007, + 31994, 31982, 31969, 31956, 31943, 31929, 31916, 31903, + 31889, 31876, 31862, 31848, 31834, 31820, 31806, 31792, + 31777, 31763, 31749, 31734, 31719, 31704, 31689, 31674, + 31659, 31644, 31629, 31613, 31598, 31582, 31566, 31551, + 31535, 31519, 31502, 31486, 31470, 31453, 31437, 31420, + 31404, 31387, 31370, 31353, 31336, 31318, 31301, 31284, + 31266, 31249, 31231, 31213, 31195, 31177, 31159, 31141, + 31123, 31104, 31086, 31067, 31048, 31030, 31011, 30992, + 30973, 30953, 30934, 30915, 30895, 30876, 30856, 30836, + 30817, 30797, 30777, 30756, 30736, 30716, 30696, 30675, + 30654, 30634, 30613, 30592, 30571, 30550, 30529, 30508, + 30486, 30465, 30443, 30422, 30400, 30378, 30356, 30334, + 30312, 30290, 30267, 30245, 30223, 30200, 30177, 30155, + 30132, 30109, 30086, 30063, 30039, 30016, 29993, 29969, + 29946, 29922, 29898, 29874, 29851, 29827, 29802, 29778, + 29754, 29730, 29705, 29681, 29656, 29631, 29606, 29581, + 29556, 29531, 29506, 29481, 29456, 29430, 29405, 29379, + 29353, 29327, 29302, 29276, 29250, 29223, 29197, 29171, + 29145, 29118, 29092, 29065, 29038, 29011, 28984, 28957, + 28930, 28903, 28876, 28849, 28821, 28794, 28766, 28738, + 28711, 28683, 28655, 28627, 28599, 28571, 28543, 28514, + 28486, 28457, 28429, 28400, 28371, 28343, 28314, 28285, + 28256, 28227, 28197, 28168, 28139, 28109, 28080, 28050, + 28020, 27991, 27961, 27931, 27901, 27871, 27841, 27810, + 27780, 27750, 27719, 27689, 27658, 27627, 27597, 27566, + 27535, 27504, 27473, 27441, 27410, 27379, 27348, 27316, + 27285, 27253, 27221, 27189, 27158, 27126, 27094, 27062, + 27030, 26997, 26965, 26933, 26900, 26868, 26835, 26803, + 26770, 26737, 26704, 26671, 26638, 26605, 26572, 26539, + 26506, 26472, 26439, 26405, 26372, 26338, 26305, 26271, + 26237, 26203, 26169, 26135, 26101, 26067, 26033, 25998, + 25964, 25929, 25895, 25860, 25826, 25791, 25756, 25721, + 25686, 25651, 25616, 25581, 25546, 25511, 25476, 25440, + 25405, 25369, 25334, 25298, 25262, 25227, 25191, 25155, + 25119, 25083, 25047, 25011, 24975, 24939, 24902, 24866, + 24830, 24793, 24756, 24720, 24683, 24647, 24610, 24573, + 24536, 24499, 24462, 24425, 24388, 24351, 24313, 24276, + 24239, 24201, 24164, 24126, 24089, 24051, 24013, 23976, + 23938, 23900, 23862, 23824, 23786, 23748, 23710, 23672, + 23633, 23595, 23557, 23518, 23480, 23441, 23403, 23364, + 23326, 23287, 23248, 23209, 23170, 23131, 23092, 23053, + 23014, 22975, 22936, 22897, 22858, 22818, 22779, 22739, + 22700, 22660, 22621, 22581, 22542, 22502, 22462, 22422, + 22383, 22343, 22303, 22263, 22223, 22183, 22142, 22102, + 22062, 22022, 21982, 21941, 21901, 21860, 21820, 21779, + 21739, 21698, 21658, 21617, 21576, 21535, 21495, 21454, + 21413, 21372, 21331, 21290, 21249, 21208, 21167, 21125, + 21084, 21043, 21002, 20960, 20919, 20878, 20836, 20795, + 20753, 20712, 20670, 20628, 20587, 20545, 20503, 20461, + 20420, 20378, 20336, 20294, 20252, 20210, 20168, 20126, + 20084, 20042, 20000, 19957, 19915, 19873, 19831, 19788, + 19746, 19704, 19661, 19619, 19576, 19534, 19491, 19449, + 19406, 19364, 19321, 19278, 19236, 19193, 19150, 19107, + 19064, 19022, 18979, 18936, 18893, 18850, 18807, 18764, + 18721, 18678, 18635, 18592, 18549, 18505, 18462, 18419, + 18376, 18332, 18289, 18246, 18203, 18159, 18116, 18072, + 18029, 17986, 17942, 17899, 17855, 17812, 17768, 17725, + 17681, 17637, 17594, 17550, 17506, 17463, 17419, 17375, + 17332, 17288, 17244, 17200, 17156, 17113, 17069, 17025, + 16981, 16937, 16893, 16849, 16805, 16761, 16717, 16673, + 16629, 16585, 16541, 16497, 16453, 16409, 16365, 16321, + 16277, 16233, 16189, 16145, 16100, 16056, 16012, 15968, + 15924, 15880, 15835, 15791, 15747, 15703, 15658, 15614, + 15570, 15526, 15481, 15437, 15393, 15348, 15304, 15260, + 15215, 15171, 15127, 15082, 15038, 14994, 14949, 14905, + 14861, 14816, 14772, 14727, 14683, 14639, 14594, 14550, + 14505, 14461, 14417, 14372, 14328, 14283, 14239, 14194, + 14150, 14106, 14061, 14017, 13972, 13928, 13883, 13839, + 13795, 13750, 13706, 13661, 13617, 13573, 13528, 13484, + 13439, 13395, 13351, 13306, 13262, 13217, 13173, 13129, + 13084, 13040, 12995, 12951, 12907, 12862, 12818, 12774, + 12729, 12685, 12641, 12596, 12552, 12508, 12464, 12419, + 12375, 12331, 12286, 12242, 12198, 12154, 12110, 12065, + 12021, 11977, 11933, 11889, 11844, 11800, 11756, 11712, + 11668, 11624, 11580, 11536, 11492, 11448, 11404, 11360, + 11316, 11272, 11228, 11184, 11140, 11096, 11052, 11008, + 10964, 10920, 10876, 10832, 10789, 10745, 10701, 10657, + 10614, 10570, 10526, 10482, 10439, 10395, 10351, 10308, + 10264, 10221, 10177, 10133, 10090, 10046, 10003, 9960, + 9916, 9873, 9829, 9786, 9743, 9699, 9656, 9613, + 9569, 9526, 9483, 9440, 9396, 9353, 9310, 9267, + 9224, 9181, 9138, 9095, 9052, 9009, 8966, 8923, + 8880, 8837, 8795, 8752, 8709, 8666, 8624, 8581, + 8538, 8496, 8453, 8410, 8368, 8325, 8283, 8240, + 8198, 8156, 8113, 8071, 8029, 7986, 7944, 7902, + 7860, 7818, 7775, 7733, 7691, 7649, 7607, 7565, + 7523, 7481, 7440, 7398, 7356, 7314, 7272, 7231, + 7189, 7148, 7106, 7064, 7023, 6981, 6940, 6899, + 6857, 6816, 6774, 6733, 6692, 6651, 6610, 6569, + 6527, 6486, 6445, 6404, 6364, 6323, 6282, 6241, + 6200, 6159, 6119, 6078, 6037, 5997, 5956, 5916, + 5875, 5835, 5795, 5754, 5714, 5674, 5634, 5593, + 5553, 5513, 5473, 5433, 5393, 5353, 5313, 5274, + 5234, 5194, 5154, 5115, 5075, 5036, 4996, 4957, + 4917, 4878, 4839, 4799, 4760, 4721, 4682, 4643, + 4603, 4564, 4526, 4487, 4448, 4409, 4370, 4331, + 4293, 4254, 4216, 4177, 4138, 4100, 4062, 4023, + 3985, 3947, 3909, 3870, 3832, 3794, 3756, 3718, + 3681, 3643, 3605, 3567, 3529, 3492, 3454, 3417, + 3379, 3342, 3304, 3267, 3230, 3193, 3155, 3118, + 3081, 3044, 3007, 2970, 2934, 2897, 2860, 2823, + 2787, 2750, 2714, 2677, 2641, 2604, 2568, 2532, + 2496, 2460, 2424, 2388, 2352, 2316, 2280, 2244, + 2208, 2173, 2137, 2101, 2066, 2031, 1995, 1960, + 1925, 1889, 1854, 1819, 1784, 1749, 1714, 1679, + 1644, 1610, 1575, 1540, 1506, 1471, 1437, 1402, + 1368, 1334, 1300, 1266, 1231, 1197, 1163, 1130, + 1096, 1062, 1028, 995, 961, 927, 894, 861, + 827, 794, 761, 728, 694, 661, 628, 596, + 563, 530, 497, 465, 432, 399, 367, 335, + 302, 270, 238, 206, 174, 142, 110, 78, + 46, 14, -17, -49, -81, -112, -143, -175, + -206, -237, -268, -300, -331, -362, -392, -423, + -454, -485, -515, -546, -576, -607, -637, -667, + -698, -728, -758, -788, -818, -848, -877, -907, + -937, -966, -996, -1025, -1055, -1084, -1113, -1142, + -1172, -1201, -1230, -1258, -1287, -1316, -1345, -1373, + -1402, -1430, -1459, -1487, -1515, -1544, -1572, -1600, + -1628, -1656, -1683, -1711, -1739, -1767, -1794, -1822, + -1849, -1876, -1904, -1931, -1958, -1985, -2012, -2039, + -2066, -2092, -2119, -2146, -2172, -2199, -2225, -2251, + -2278, -2304, -2330, -2356, -2382, -2408, -2434, -2459, + -2485, -2511, -2536, -2562, -2587, -2612, -2638, -2663, + -2688, -2713, -2738, -2763, -2787, -2812, -2837, -2861, + -2886, -2910, -2934, -2959, -2983, -3007, -3031, -3055, + -3079, -3103, -3126, -3150, -3174, -3197, -3221, -3244, + -3267, -3290, -3314, -3337, -3360, -3383, -3405, -3428, + -3451, -3473, -3496, -3518, -3541, -3563, -3585, -3607, + -3629, -3651, -3673, -3695, -3717, -3739, -3760, -3782, + -3803, -3825, -3846, -3867, -3888, -3909, -3930, -3951, + -3972, -3993, -4014, -4034, -4055, -4075, -4096, -4116, + -4136, -4156, -4176, -4196, -4216, -4236, -4256, -4276, + -4295, -4315, -4334, -4354, -4373, -4392, -4411, -4430, + -4449, -4468, -4487, -4506, -4524, -4543, -4562, -4580, + -4598, -4617, -4635, -4653, -4671, -4689, -4707, -4725, + -4743, -4760, -4778, -4795, -4813, -4830, -4847, -4865, + -4882, -4899, -4916, -4933, -4950, -4966, -4983, -4999, + -5016, -5032, -5049, -5065, -5081, -5097, -5113, -5129, + -5145, -5161, -5177, -5192, -5208, -5223, -5239, -5254, + -5269, -5285, -5300, -5315, -5330, -5345, -5359, -5374, + -5389, -5403, -5418, -5432, -5447, -5461, -5475, -5489, + -5503, -5517, -5531, -5545, -5558, -5572, -5585, -5599, + -5612, -5626, -5639, -5652, -5665, -5678, -5691, -5704, + -5717, -5729, -5742, -5755, -5767, -5779, -5792, -5804, + -5816, -5828, -5840, -5852, -5864, -5876, -5887, -5899, + -5911, -5922, -5933, -5945, -5956, -5967, -5978, -5989, + -6000, -6011, -6022, -6032, -6043, -6054, -6064, -6074, + -6085, -6095, -6105, -6115, -6125, -6135, -6145, -6155, + -6164, -6174, -6184, -6193, -6202, -6212, -6221, -6230, + -6239, -6248, -6257, -6266, -6275, -6284, -6292, -6301, + -6309, -6318, -6326, -6334, -6343, -6351, -6359, -6367, + -6374, -6382, -6390, -6398, -6405, -6413, -6420, -6428, + -6435, -6442, -6449, -6456, -6463, -6470, -6477, -6484, + -6491, -6497, -6504, -6510, -6517, -6523, -6529, -6535, + -6541, -6547, -6553, -6559, -6565, -6571, -6576, -6582, + -6588, -6593, -6598, -6604, -6609, -6614, -6619, -6624, + -6629, -6634, -6639, -6643, -6648, -6653, -6657, -6662, + -6666, -6670, -6674, -6679, -6683, -6687, -6691, -6694, + -6698, -6702, -6706, -6709, -6713, -6716, -6719, -6723, + -6726, -6729, -6732, -6735, -6738, -6741, -6744, -6747, + -6749, -6752, -6754, -6757, -6759, -6761, -6764, -6766, + -6768, -6770, -6772, -6774, -6776, -6777, -6779, -6781, + -6782, -6784, -6785, -6786, -6788, -6789, -6790, -6791, + -6792, -6793, -6794, -6795, -6795, -6796, -6797, -6797, + -6798, -6798, -6798, -6799, -6799, -6799, -6799, -6799, + -6799, -6799, -6799, -6798, -6798, -6798, -6797, -6797, + -6796, -6795, -6795, -6794, -6793, -6792, -6791, -6790, + -6789, -6788, -6787, -6785, -6784, -6783, -6781, -6779, + -6778, -6776, -6774, -6773, -6771, -6769, -6767, -6765, + -6763, -6760, -6758, -6756, -6753, -6751, -6748, -6746, + -6743, -6741, -6738, -6735, -6732, -6729, -6726, -6723, + -6720, -6717, -6714, -6710, -6707, -6703, -6700, -6696, + -6693, -6689, -6685, -6682, -6678, -6674, -6670, -6666, + -6662, -6658, -6654, -6649, -6645, -6641, -6636, -6632, + -6627, -6623, -6618, -6613, -6608, -6604, -6599, -6594, + -6589, -6584, -6579, -6573, -6568, -6563, -6558, -6552, + -6547, -6541, -6536, -6530, -6524, -6519, -6513, -6507, + -6501, -6495, -6489, -6483, -6477, -6471, -6464, -6458, + -6452, -6445, -6439, -6433, -6426, -6419, -6413, -6406, + -6399, -6392, -6386, -6379, -6372, -6365, -6358, -6350, + -6343, -6336, -6329, -6321, -6314, -6307, -6299, -6292, + -6284, -6276, -6269, -6261, -6253, -6245, -6238, -6230, + -6222, -6214, -6205, -6197, -6189, -6181, -6173, -6164, + -6156, -6148, -6139, -6131, -6122, -6113, -6105, -6096, + -6087, -6078, -6070, -6061, -6052, -6043, -6034, -6025, + -6016, -6006, -5997, -5988, -5979, -5969, -5960, -5950, + -5941, -5931, -5922, -5912, -5903, -5893, -5883, -5873, + -5863, -5854, -5844, -5834, -5824, -5814, -5804, -5793, + -5783, -5773, -5763, -5752, -5742, -5732, -5721, -5711, + -5700, -5690, -5679, -5668, -5658, -5647, -5636, -5626, + -5615, -5604, -5593, -5582, -5571, -5560, -5549, -5538, + -5527, -5515, -5504, -5493, -5482, -5470, -5459, -5447, + -5436, -5424, -5413, -5401, -5390, -5378, -5366, -5355, + -5343, -5331, -5319, -5308, -5296, -5284, -5272, -5260, + -5248, -5236, -5224, -5211, -5199, -5187, -5175, -5162, + -5150, -5138, -5125, -5113, -5101, -5088, -5076, -5063, + -5051, -5038, -5025, -5013, -5000, -4987, -4974, -4962, + -4949, -4936, -4923, -4910, -4897, -4884, -4871, -4858, + -4845, -4832, -4819, -4806, -4793, -4779, -4766, -4753, + -4740, -4726, -4713, -4699, -4686, -4673, -4659, -4646, + -4632, -4619, -4605, -4591, -4578, -4564, -4550, -4537, + -4523, -4509, -4495, -4482, -4468, -4454, -4440, -4426, + -4412, -4398, -4384, -4370, -4356, -4342, -4328, -4314, + -4300, -4286, -4271, -4257, -4243, -4229, -4215, -4200, + -4186, -4172, -4157, -4143, -4128, -4114, -4100, -4085, + -4071, -4056, -4042, -4027, -4012, -3998, -3983, -3969, + -3954, -3939, -3925, -3910, -3895, -3881, -3866, -3851, + -3836, -3821, -3807, -3792, -3777, -3762, -3747, -3732, + -3717, -3702, -3687, -3672, -3657, -3642, -3627, -3612, + -3597, -3582, -3567, -3552, -3537, -3521, -3506, -3491, + -3476, -3461, -3446, -3430, -3415, -3400, -3385, -3369, + -3354, -3339, -3323, -3308, -3293, -3277, -3262, -3246, + -3231, -3216, -3200, -3185, -3169, -3154, -3138, -3123, + -3107, -3092, -3076, -3061, -3045, -3030, -3014, -2999, + -2983, -2968, -2952, -2936, -2921, -2905, -2890, -2874, + -2858, -2843, -2827, -2811, -2796, -2780, -2764, -2749, + -2733, -2717, -2701, -2686, -2670, -2654, -2639, -2623, + -2607, -2591, -2576, -2560, -2544, -2528, -2512, -2497, + -2481, -2465, -2449, -2433, -2418, -2402, -2386, -2370, + -2354, -2339, -2323, -2307, -2291, -2275, -2259, -2244, + -2228, -2212, -2196, -2180, -2164, -2148, -2133, -2117, + -2101, -2085, -2069, -2053, -2037, -2022, -2006, -1990, + -1974, -1958, -1942, -1926, -1911, -1895, -1879, -1863, + -1847, -1831, -1815, -1800, -1784, -1768, -1752, -1736, + -1720, -1705, -1689, -1673, -1657, -1641, -1625, -1610, + -1594, -1578, -1562, -1546, -1531, -1515, -1499, -1483, + -1467, -1452, -1436, -1420, -1404, -1389, -1373, -1357, + -1341, -1326, -1310, -1294, -1278, -1263, -1247, -1231, + -1216, -1200, -1184, -1169, -1153, -1137, -1122, -1106, + -1091, -1075, -1059, -1044, -1028, -1013, -997, -981, + -966, -950, -935, -919, -904, -888, -873, -857, + -842, -826, -811, -795, -780, -765, -749, -734, + -718, -703, -688, -672, -657, -642, -626, -611, + -596, -580, -565, -550, -535, -519, -504, -489, + -474, -459, -443, -428, -413, -398, -383, -368, + -353, -338, -323, -308, -293, -278, -263, -248, + -233, -218, -203, -188, -173, -158, -143, -128, + -114, -99, -84, -69, -54, -40, -25, -10, + 5, 19, 34, 49, 63, 78, 92, 107, + 122, 136, 151, 165, 180, 194, 209, 223, + 237, 252, 266, 280, 295, 309, 323, 338, + 352, 366, 380, 395, 409, 423, 437, 451, + 465, 479, 493, 507, 521, 535, 549, 563, + 577, 591, 605, 619, 633, 647, 660, 674, + 688, 702, 715, 729, 743, 756, 770, 783, + 797, 811, 824, 838, 851, 864, 878, 891, + 905, 918, 931, 945, 958, 971, 984, 998, + 1011, 1024, 1037, 1050, 1063, 1076, 1089, 1102, + 1115, 1128, 1141, 1154, 1167, 1180, 1193, 1205, + 1218, 1231, 1244, 1256, 1269, 1282, 1294, 1307, + 1319, 1332, 1344, 1357, 1369, 1382, 1394, 1407, + 1419, 1431, 1443, 1456, 1468, 1480, 1492, 1504, + 1517, 1529, 1541, 1553, 1565, 1577, 1589, 1601, + 1612, 1624, 1636, 1648, 1660, 1671, 1683, 1695, + 1707, 1718, 1730, 1741, 1753, 1764, 1776, 1787, + 1799, 1810, 1821, 1833, 1844, 1855, 1867, 1878, + 1889, 1900, 1911, 1922, 1933, 1944, 1955, 1966, + 1977, 1988, 1999, 2010, 2021, 2031, 2042, 2053, + 2063, 2074, 2085, 2095, 2106, 2116, 2127, 2137, + 2148, 2158, 2168, 2179, 2189, 2199, 2209, 2220, + 2230, 2240, 2250, 2260, 2270, 2280, 2290, 2300, + 2310, 2320, 2330, 2339, 2349, 2359, 2369, 2378, + 2388, 2397, 2407, 2417, 2426, 2435, 2445, 2454, + 2464, 2473, 2482, 2491, 2501, 2510, 2519, 2528, + 2537, 2546, 2555, 2564, 2573, 2582, 2591, 2600, + 2609, 2617, 2626, 2635, 2644, 2652, 2661, 2669, + 2678, 2686, 2695, 2703, 2712, 2720, 2728, 2737, + 2745, 2753, 2761, 2769, 2777, 2785, 2793, 2801, + 2809, 2817, 2825, 2833, 2841, 2849, 2856, 2864, + 2872, 2879, 2887, 2895, 2902, 2910, 2917, 2925, + 2932, 2939, 2947, 2954, 2961, 2968, 2975, 2983, + 2990, 2997, 3004, 3011, 3018, 3025, 3031, 3038, + 3045, 3052, 3059, 3065, 3072, 3079, 3085, 3092, + 3098, 3105, 3111, 3117, 3124, 3130, 3136, 3143, + 3149, 3155, 3161, 3167, 3173, 3179, 3185, 3191, + 3197, 3203, 3209, 3215, 3221, 3226, 3232, 3238, + 3243, 3249, 3254, 3260, 3265, 3271, 3276, 3282, + 3287, 3292, 3297, 3303, 3308, 3313, 3318, 3323, + 3328, 3333, 3338, 3343, 3348, 3353, 3357, 3362, + 3367, 3372, 3376, 3381, 3385, 3390, 3395, 3399, + 3403, 3408, 3412, 3416, 3421, 3425, 3429, 3433, + 3437, 3442, 3446, 3450, 3454, 3458, 3461, 3465, + 3469, 3473, 3477, 3480, 3484, 3488, 3491, 3495, + 3498, 3502, 3505, 3509, 3512, 3515, 3519, 3522, + 3525, 3528, 3532, 3535, 3538, 3541, 3544, 3547, + 3550, 3553, 3556, 3558, 3561, 3564, 3567, 3569, + 3572, 3575, 3577, 3580, 3582, 3585, 3587, 3589, + 3592, 3594, 3596, 3598, 3601, 3603, 3605, 3607, + 3609, 3611, 3613, 3615, 3617, 3619, 3621, 3622, + 3624, 3626, 3627, 3629, 3631, 3632, 3634, 3635, + 3637, 3638, 3640, 3641, 3642, 3644, 3645, 3646, + 3647, 3648, 3649, 3650, 3651, 3652, 3653, 3654, + 3655, 3656, 3657, 3658, 3658, 3659, 3660, 3660, + 3661, 3662, 3662, 3663, 3663, 3663, 3664, 3664, + 3664, 3665, 3665, 3665, 3665, 3665, 3666, 3666, + 3666, 3666, 3666, 3665, 3665, 3665, 3665, 3665, + 3665, 3664, 3664, 3664, 3663, 3663, 3662, 3662, + 3661, 3661, 3660, 3659, 3659, 3658, 3657, 3657, + 3656, 3655, 3654, 3653, 3652, 3651, 3650, 3649, + 3648, 3647, 3646, 3645, 3643, 3642, 3641, 3640, + 3638, 3637, 3635, 3634, 3633, 3631, 3630, 3628, + 3626, 3625, 3623, 3621, 3620, 3618, 3616, 3614, + 3612, 3610, 3608, 3606, 3604, 3602, 3600, 3598, + 3596, 3594, 3592, 3589, 3587, 3585, 3583, 3580, + 3578, 3575, 3573, 3570, 3568, 3565, 3563, 3560, + 3557, 3555, 3552, 3549, 3547, 3544, 3541, 3538, + 3535, 3532, 3529, 3526, 3523, 3520, 3517, 3514, + 3511, 3508, 3505, 3501, 3498, 3495, 3491, 3488, + 3485, 3481, 3478, 3474, 3471, 3467, 3464, 3460, + 3457, 3453, 3449, 3446, 3442, 3438, 3434, 3430, + 3427, 3423, 3419, 3415, 3411, 3407, 3403, 3399, + 3395, 3391, 3387, 3382, 3378, 3374, 3370, 3365, + 3361, 3357, 3352, 3348, 3344, 3339, 3335, 3330, + 3326, 3321, 3317, 3312, 3307, 3303, 3298, 3293, + 3289, 3284, 3279, 3274, 3269, 3264, 3260, 3255, + 3250, 3245, 3240, 3235, 3230, 3224, 3219, 3214, + 3209, 3204, 3199, 3193, 3188, 3183, 3178, 3172, + 3167, 3162, 3156, 3151, 3145, 3140, 3134, 3129, + 3123, 3118, 3112, 3106, 3101, 3095, 3089, 3084, + 3078, 3072, 3066, 3060, 3055, 3049, 3043, 3037, + 3031, 3025, 3019, 3013, 3007, 3001, 2995, 2989, + 2983, 2977, 2970, 2964, 2958, 2952, 2945, 2939, + 2933, 2927, 2920, 2914, 2908, 2901, 2895, 2888, + 2882, 2875, 2869, 2862, 2856, 2849, 2843, 2836, + 2829, 2823, 2816, 2809, 2803, 2796, 2789, 2782, + 2776, 2769, 2762, 2755, 2748, 2741, 2734, 2727, + 2720, 2714, 2707, 2700, 2692, 2685, 2678, 2671, + 2664, 2657, 2650, 2643, 2636, 2628, 2621, 2614, + 2607, 2599, 2592, 2585, 2578, 2570, 2563, 2556, + 2548, 2541, 2533, 2526, 2518, 2511, 2504, 2496, + 2489, 2481, 2473, 2466, 2458, 2451, 2443, 2435, + 2428, 2420, 2412, 2405, 2397, 2389, 2382, 2374, + 2366, 2358, 2351, 2343, 2335, 2327, 2319, 2311, + 2303, 2296, 2288, 2280, 2272, 2264, 2256, 2248, + 2240, 2232, 2224, 2216, 2208, 2200, 2192, 2183, + 2175, 2167, 2159, 2151, 2143, 2135, 2127, 2118, + 2110, 2102, 2094, 2085, 2077, 2069, 2061, 2052, + 2044, 2036, 2027, 2019, 2011, 2002, 1994, 1986, + 1977, 1969, 1961, 1952, 1944, 1935, 1927, 1918, + 1910, 1901, 1893, 1884, 1876, 1867, 1859, 1850, + 1842, 1833, 1825, 1816, 1808, 1799, 1790, 1782, + 1773, 1765, 1756, 1747, 1739, 1730, 1721, 1713, + 1704, 1695, 1687, 1678, 1669, 1660, 1652, 1643, + 1634, 1625, 1617, 1608, 1599, 1590, 1582, 1573, + 1564, 1555, 1546, 1538, 1529, 1520, 1511, 1502, + 1493, 1485, 1476, 1467, 1458, 1449, 1440, 1431, + 1423, 1414, 1405, 1396, 1387, 1378, 1369, 1360, + 1351, 1342, 1333, 1324, 1316, 1307, 1298, 1289, + 1280, 1271, 1262, 1253, 1244, 1235, 1226, 1217, + 1208, 1199, 1190, 1181, 1172, 1163, 1154, 1145, + 1136, 1127, 1118, 1109, 1100, 1091, 1082, 1073, + 1064, 1055, 1046, 1037, 1028, 1019, 1010, 1001, + 992, 983, 974, 965, 956, 947, 938, 929, + 920, 910, 901, 892, 883, 874, 865, 856, + 847, 838, 829, 820, 811, 802, 793, 784, + 775, 766, 757, 748, 739, 730, 721, 712, + 703, 694, 685, 676, 667, 658, 649, 640, + 631, 622, 613, 604, 595, 586, 577, 568, + 559, 550, 541, 532, 523, 514, 505, 496, + 488, 479, 470, 461, 452, 443, 434, 425, + 416, 407, 398, 389, 381, 372, 363, 354, + 345, 336, 327, 318, 310, 301, 292, 283, + 274, 265, 257, 248, 239, 230, 221, 213, + 204, 195, 186, 178, 169, 160, 151, 143, + 134, 125, 116, 108, 99, 90, 82, 73, + 64, 56, 47, 38, 30, 21, 12, 4, + -5, -13, -22, -30, -39, -48, -56, -65, + -73, -82, -90, -99, -107, -116, -124, -133, + -141, -150, -158, -167, -175, -183, -192, -200, + -209, -217, -225, -234, -242, -250, -259, -267, + -275, -284, -292, -300, -309, -317, -325, -333, + -342, -350, -358, -366, -374, -382, -391, -399, + -407, -415, -423, -431, -439, -447, -456, -464, + -472, -480, -488, -496, -504, -512, -520, -528, + -536, -543, -551, -559, -567, -575, -583, -591, + -599, -606, -614, -622, -630, -638, -645, -653, + -661, -669, -676, -684, -692, -699, -707, -715, + -722, -730, -738, -745, -753, -760, -768, -775, + -783, -790, -798, -805, -813, -820, -828, -835, + -842, -850, -857, -865, -872, -879, -887, -894, + -901, -908, -916, -923, -930, -937, -945, -952, + -959, -966, -973, -980, -987, -994, -1001, -1009, + -1016, -1023, -1030, -1037, -1044, -1050, -1057, -1064, + -1071, -1078, -1085, -1092, -1099, -1105, -1112, -1119, + -1126, -1133, -1139, -1146, -1153, -1159, -1166, -1173, + -1179, -1186, -1193, -1199, -1206, -1212, -1219, -1225, + -1232, -1238, -1245, -1251, -1258, -1264, -1270, -1277, + -1283, -1289, -1296, -1302, -1308, -1315, -1321, -1327, + -1333, -1339, -1346, -1352, -1358, -1364, -1370, -1376, + -1382, -1388, -1394, -1400, -1406, -1412, -1418, -1424, + -1430, -1436, -1442, -1448, -1454, -1459, -1465, -1471, + -1477, -1482, -1488, -1494, -1500, -1505, -1511, -1516, + -1522, -1528, -1533, -1539, -1544, -1550, -1555, -1561, + -1566, -1572, -1577, -1582, -1588, -1593, -1599, -1604, + -1609, -1614, -1620, -1625, -1630, -1635, -1640, -1646, + -1651, -1656, -1661, -1666, -1671, -1676, -1681, -1686, + -1691, -1696, -1701, -1706, -1711, -1716, -1720, -1725, + -1730, -1735, -1740, -1744, -1749, -1754, -1758, -1763, + -1768, -1772, -1777, -1782, -1786, -1791, -1795, -1800, + -1804, -1809, -1813, -1817, -1822, -1826, -1830, -1835, + -1839, -1843, -1848, -1852, -1856, -1860, -1864, -1869, + -1873, -1877, -1881, -1885, -1889, -1893, -1897, -1901, + -1905, -1909, -1913, -1917, -1921, -1925, -1928, -1932, + -1936, -1940, -1944, -1947, -1951, -1955, -1958, -1962, + -1966, -1969, -1973, -1976, -1980, -1983, -1987, -1990, + -1994, -1997, -2001, -2004, -2007, -2011, -2014, -2017, + -2021, -2024, -2027, -2030, -2033, -2037, -2040, -2043, + -2046, -2049, -2052, -2055, -2058, -2061, -2064, -2067, + -2070, -2073, -2076, -2079, -2081, -2084, -2087, -2090, + -2092, -2095, -2098, -2101, -2103, -2106, -2108, -2111, + -2114, -2116, -2119, -2121, -2124, -2126, -2129, -2131, + -2133, -2136, -2138, -2140, -2143, -2145, -2147, -2150, + -2152, -2154, -2156, -2158, -2160, -2162, -2165, -2167, + -2169, -2171, -2173, -2175, -2177, -2179, -2180, -2182, + -2184, -2186, -2188, -2190, -2191, -2193, -2195, -2197, + -2198, -2200, -2202, -2203, -2205, -2206, -2208, -2209, + -2211, -2212, -2214, -2215, -2217, -2218, -2220, -2221, + -2222, -2224, -2225, -2226, -2227, -2229, -2230, -2231, + -2232, -2233, -2234, -2236, -2237, -2238, -2239, -2240, + -2241, -2242, -2243, -2244, -2244, -2245, -2246, -2247, + -2248, -2249, -2249, -2250, -2251, -2252, -2252, -2253, + -2254, -2254, -2255, -2255, -2256, -2256, -2257, -2257, + -2258, -2258, -2259, -2259, -2260, -2260, -2260, -2261, + -2261, -2261, -2261, -2262, -2262, -2262, -2262, -2262, + -2263, -2263, -2263, -2263, -2263, -2263, -2263, -2263, + -2263, -2263, -2263, -2263, -2262, -2262, -2262, -2262, + -2262, -2261, -2261, -2261, -2261, -2260, -2260, -2260, + -2259, -2259, -2259, -2258, -2258, -2257, -2257, -2256, + -2256, -2255, -2254, -2254, -2253, -2253, -2252, -2251, + -2251, -2250, -2249, -2248, -2248, -2247, -2246, -2245, + -2244, -2243, -2242, -2242, -2241, -2240, -2239, -2238, + -2237, -2236, -2235, -2233, -2232, -2231, -2230, -2229, + -2228, -2227, -2225, -2224, -2223, -2222, -2220, -2219, + -2218, -2216, -2215, -2214, -2212, -2211, -2209, -2208, + -2206, -2205, -2203, -2202, -2200, -2199, -2197, -2195, + -2194, -2192, -2190, -2189, -2187, -2185, -2184, -2182, + -2180, -2178, -2176, -2174, -2173, -2171, -2169, -2167, + -2165, -2163, -2161, -2159, -2157, -2155, -2153, -2151, + -2149, -2147, -2145, -2142, -2140, -2138, -2136, -2134, + -2132, -2129, -2127, -2125, -2122, -2120, -2118, -2116, + -2113, -2111, -2108, -2106, -2104, -2101, -2099, -2096, + -2094, -2091, -2089, -2086, -2083, -2081, -2078, -2076, + -2073, -2070, -2068, -2065, -2062, -2060, -2057, -2054, + -2051, -2049, -2046, -2043, -2040, -2037, -2034, -2031, + -2029, -2026, -2023, -2020, -2017, -2014, -2011, -2008, + -2005, -2002, -1999, -1996, -1992, -1989, -1986, -1983, + -1980, -1977, -1974, -1970, -1967, -1964, -1961, -1957, + -1954, -1951, -1948, -1944, -1941, -1938, -1934, -1931, + -1927, -1924, -1921, -1917, -1914, -1910, -1907, -1903, + -1900, -1896, -1893, -1889, -1886, -1882, -1878, -1875, + -1871, -1868, -1864, -1860, -1857, -1853, -1849, -1845, + -1842, -1838, -1834, -1830, -1827, -1823, -1819, -1815, + -1811, -1807, -1804, -1800, -1796, -1792, -1788, -1784, + -1780, -1776, -1772, -1768, -1764, -1760, -1756, -1752, + -1748, -1744, -1740, -1736, -1732, -1728, -1724, -1719, + -1715, -1711, -1707, -1703, -1699, -1694, -1690, -1686, + -1682, -1677, -1673, -1669, -1665, -1660, -1656, -1652, + -1647, -1643, -1639, -1634, -1630, -1625, -1621, -1617, + -1612, -1608, -1603, -1599, -1594, -1590, -1586, -1581, + -1577, -1572, -1567, -1563, -1558, -1554, -1549, -1545, + -1540, -1535, -1531, -1526, -1522, -1517, -1512, -1508, + -1503, -1498, -1494, -1489, -1484, -1480, -1475, -1470, + -1465, -1461, -1456, -1451, -1446, -1441, -1437, -1432, + -1427, -1422, -1417, -1413, -1408, -1403, -1398, -1393, + -1388, -1383, -1378, -1374, -1369, -1364, -1359, -1354, + -1349, -1344, -1339, -1334, -1329, -1324, -1319, -1314, + -1309, -1304, -1299, -1294, -1289, -1284, -1279, -1274, + -1269, -1264, -1258, -1253, -1248, -1243, -1238, -1233, + -1228, -1223, -1218, -1212, -1207, -1202, -1197, -1192, + -1187, -1181, -1176, -1171, -1166, -1161, -1155, -1150, + -1145, -1140, -1135, -1129, -1124, -1119, -1114, -1108, + -1103, -1098, -1092, -1087, -1082, -1077, -1071, -1066, + -1061, -1055, -1050, -1045, -1039, -1034, -1029, -1023, + -1018, -1013, -1007, -1002, -997, -991, -986, -980, + -975, -970, -964, -959, -953, -948, -943, -937, + -932, -926, -921, -916, -910, -905, -899, -894, + -888, -883, -877, -872, -867, -861, -856, -850, + -845, -839, -834, -828, -823, -817, -812, -806, + -801, -795, -790, -784, -779, -773, -768, -762, + -757, -751, -746, -740, -735, -729, -724, -718, + -713, -707, -702, -696, -691, -685, -680, -674, + -669, -663, -658, -652, -646, -641, -635, -630, + -624, -619, -613, -608, -602, -597, -591, -585, + -580, -574, -569, -563, -558, -552, -547, -541, + -536, -530, -524, -519, -513, -508, -502, -497, + -491, -486, -480, -475, -469, -464, -458, -452, + -447, -441, -436, -430, -425, -419, -414, -408, + -403, -397, -392, -386, -381, -375, -369, -364, + -358, -353, -347, -342, -336, -331, -325, -320, + -314, -309, -303, -298, -292, -287, -281, -276, + -270, -265, -259, -254, -248, -243, -238, -232, + -227, -221, -216, -210, -205, -199, -194, -188, + -183, -178, -172, -167, -161, -156, -150, -145, + -140, -134, -129, -123, -118, -113, -107, -102, + -96, -91, -86, -80, -75, -70, -64, -59, + -53, -48, -43, -37, -32, -27, -21, -16, + -11, -5, 0, 5, 10, 16, 21, 26, + 32, 37, 42, 47, 53, 58, 63, 68, + 74, 79, 84, 89, 95, 100, 105, 110, + 115, 121, 126, 131, 136, 141, 146, 152, + 157, 162, 167, 172, 177, 182, 187, 193, + 198, 203, 208, 213, 218, 223, 228, 233, + 238, 243, 248, 253, 258, 263, 268, 273, + 278, 283, 288, 293, 298, 303, 308, 313, + 318, 323, 328, 333, 338, 343, 347, 352, + 357, 362, 367, 372, 377, 382, 386, 391, + 396, 401, 406, 410, 415, 420, 425, 429, + 434, 439, 444, 448, 453, 458, 463, 467, + 472, 477, 481, 486, 491, 495, 500, 505, + 509, 514, 518, 523, 528, 532, 537, 541, + 546, 550, 555, 559, 564, 568, 573, 577, + 582, 586, 591, 595, 600, 604, 608, 613, + 617, 622, 626, 630, 635, 639, 643, 648, + 652, 656, 661, 665, 669, 674, 678, 682, + 686, 691, 695, 699, 703, 707, 712, 716, + 720, 724, 728, 732, 737, 741, 745, 749, + 753, 757, 761, 765, 769, 773, 777, 781, + 785, 789, 793, 797, 801, 805, 809, 813, + 817, 821, 825, 829, 832, 836, 840, 844, + 848, 852, 855, 859, 863, 867, 871, 874, + 878, 882, 885, 889, 893, 897, 900, 904, + 908, 911, 915, 918, 922, 926, 929, 933, + 936, 940, 943, 947, 950, 954, 957, 961, + 964, 968, 971, 975, 978, 981, 985, 988, + 992, 995, 998, 1002, 1005, 1008, 1012, 1015, + 1018, 1021, 1025, 1028, 1031, 1034, 1038, 1041, + 1044, 1047, 1050, 1053, 1057, 1060, 1063, 1066, + 1069, 1072, 1075, 1078, 1081, 1084, 1087, 1090, + 1093, 1096, 1099, 1102, 1105, 1108, 1111, 1114, + 1117, 1119, 1122, 1125, 1128, 1131, 1134, 1136, + 1139, 1142, 1145, 1148, 1150, 1153, 1156, 1158, + 1161, 1164, 1166, 1169, 1172, 1174, 1177, 1179, + 1182, 1185, 1187, 1190, 1192, 1195, 1197, 1200, + 1202, 1205, 1207, 1210, 1212, 1214, 1217, 1219, + 1222, 1224, 1226, 1229, 1231, 1233, 1236, 1238, + 1240, 1242, 1245, 1247, 1249, 1251, 1253, 1256, + 1258, 1260, 1262, 1264, 1266, 1268, 1271, 1273, + 1275, 1277, 1279, 1281, 1283, 1285, 1287, 1289, + 1291, 1293, 1295, 1297, 1298, 1300, 1302, 1304, + 1306, 1308, 1310, 1311, 1313, 1315, 1317, 1319, + 1320, 1322, 1324, 1325, 1327, 1329, 1330, 1332, + 1334, 1335, 1337, 1339, 1340, 1342, 1343, 1345, + 1346, 1348, 1349, 1351, 1352, 1354, 1355, 1357, + 1358, 1360, 1361, 1362, 1364, 1365, 1367, 1368, + 1369, 1370, 1372, 1373, 1374, 1376, 1377, 1378, + 1379, 1380, 1382, 1383, 1384, 1385, 1386, 1387, + 1389, 1390, 1391, 1392, 1393, 1394, 1395, 1396, + 1397, 1398, 1399, 1400, 1401, 1402, 1403, 1404, + 1405, 1405, 1406, 1407, 1408, 1409, 1410, 1410, + 1411, 1412, 1413, 1414, 1414, 1415, 1416, 1416, + 1417, 1418, 1419, 1419, 1420, 1420, 1421, 1422, + 1422, 1423, 1423, 1424, 1424, 1425, 1425, 1426, + 1426, 1427, 1427, 1428, 1428, 1429, 1429, 1429, + 1430, 1430, 1431, 1431, 1431, 1432, 1432, 1432, + 1432, 1433, 1433, 1433, 1433, 1434, 1434, 1434, + 1434, 1434, 1434, 1435, 1435, 1435, 1435, 1435, + 1435, 1435, 1435, 1435, 1435, 1435, 1435, 1435, + 1435, 1435, 1435, 1435, 1435, 1435, 1435, 1435, + 1435, 1434, 1434, 1434, 1434, 1434, 1434, 1433, + 1433, 1433, 1433, 1432, 1432, 1432, 1432, 1431, + 1431, 1431, 1430, 1430, 1429, 1429, 1429, 1428, + 1428, 1427, 1427, 1427, 1426, 1426, 1425, 1425, + 1424, 1424, 1423, 1422, 1422, 1421, 1421, 1420, + 1420, 1419, 1418, 1418, 1417, 1416, 1416, 1415, + 1414, 1413, 1413, 1412, 1411, 1410, 1410, 1409, + 1408, 1407, 1406, 1406, 1405, 1404, 1403, 1402, + 1401, 1400, 1400, 1399, 1398, 1397, 1396, 1395, + 1394, 1393, 1392, 1391, 1390, 1389, 1388, 1387, + 1386, 1384, 1383, 1382, 1381, 1380, 1379, 1378, + 1377, 1375, 1374, 1373, 1372, 1371, 1369, 1368, + 1367, 1366, 1364, 1363, 1362, 1361, 1359, 1358, + 1357, 1355, 1354, 1353, 1351, 1350, 1348, 1347, + 1346, 1344, 1343, 1341, 1340, 1338, 1337, 1335, + 1334, 1332, 1331, 1329, 1328, 1326, 1325, 1323, + 1322, 1320, 1318, 1317, 1315, 1314, 1312, 1310, + 1309, 1307, 1305, 1304, 1302, 1300, 1299, 1297, + 1295, 1293, 1292, 1290, 1288, 1286, 1284, 1283, + 1281, 1279, 1277, 1275, 1274, 1272, 1270, 1268, + 1266, 1264, 1262, 1260, 1258, 1257, 1255, 1253, + 1251, 1249, 1247, 1245, 1243, 1241, 1239, 1237, + 1235, 1233, 1231, 1229, 1227, 1224, 1222, 1220, + 1218, 1216, 1214, 1212, 1210, 1208, 1205, 1203, + 1201, 1199, 1197, 1195, 1192, 1190, 1188, 1186, + 1183, 1181, 1179, 1177, 1174, 1172, 1170, 1168, + 1165, 1163, 1161, 1158, 1156, 1154, 1151, 1149, + 1147, 1144, 1142, 1140, 1137, 1135, 1132, 1130, + 1128, 1125, 1123, 1120, 1118, 1115, 1113, 1110, + 1108, 1105, 1103, 1100, 1098, 1095, 1093, 1090, + 1088, 1085, 1083, 1080, 1078, 1075, 1073, 1070, + 1067, 1065, 1062, 1060, 1057, 1054, 1052, 1049, + 1046, 1044, 1041, 1038, 1036, 1033, 1030, 1028, + 1025, 1022, 1020, 1017, 1014, 1012, 1009, 1006, + 1003, 1001, 998, 995, 992, 990, 987, 984, + 981, 978, 976, 973, 970, 967, 964, 961, + 959, 956, 953, 950, 947, 944, 942, 939, + 936, 933, 930, 927, 924, 921, 918, 915, + 913, 910, 907, 904, 901, 898, 895, 892, + 889, 886, 883, 880, 877, 874, 871, 868, + 865, 862, 859, 856, 853, 850, 847, 844, + 841, 838, 835, 832, 829, 826, 823, 820, + 817, 813, 810, 807, 804, 801, 798, 795, + 792, 789, 786, 783, 779, 776, 773, 770, + 767, 764, 761, 758, 754, 751, 748, 745, + 742, 739, 735, 732, 729, 726, 723, 720, + 716, 713, 710, 707, 704, 700, 697, 694, + 691, 688, 684, 681, 678, 675, 671, 668, + 665, 662, 659, 655, 652, 649, 646, 642, + 639, 636, 633, 629, 626, 623, 620, 616, + 613, 610, 606, 603, 600, 597, 593, 590, + 587, 584, 580, 577, 574, 570, 567, 564, + 560, 557, 554, 551, 547, 544, 541, 537, + 534, 531, 527, 524, 521, 517, 514, 511, + 507, 504, 501, 498, 494, 491, 488, 484, + 481, 478, 474, 471, 468, 464, 461, 458, + 454, 451, 448, 444, 441, 437, 434, 431, + 427, 424, 421, 417, 414, 411, 407, 404, + 401, 397, 394, 391, 387, 384, 381, 377, + 374, 371, 367, 364, 361, 357, 354, 351, + 347, 344, 341, 337, 334, 330, 327, 324, + 320, 317, 314, 310, 307, 304, 300, 297, + 294, 290, 287, 284, 280, 277, 274, 270, + 267, 264, 260, 257, 254, 250, 247, 244, + 240, 237, 234, 230, 227, 224, 221, 217, + 214, 211, 207, 204, 201, 197, 194, 191, + 187, 184, 181, 178, 174, 171, 168, 164, + 161, 158, 154, 151, 148, 145, 141, 138, + 135, 132, 128, 125, 122, 118, 115, 112, + 109, 105, 102, 99, 96, 92, 89, 86, + 83, 79, 76, 73, 70, 67, 63, 60, + 57, 54, 50, 47, 44, 41, 38, 34, + 31, 28, 25, 22, 18, 15, 12, 9, + 6, 2, -1, -4, -7, -10, -13, -16, + -20, -23, -26, -29, -32, -35, -39, -42, + -45, -48, -51, -54, -57, -60, -63, -67, + -70, -73, -76, -79, -82, -85, -88, -91, + -94, -97, -100, -104, -107, -110, -113, -116, + -119, -122, -125, -128, -131, -134, -137, -140, + -143, -146, -149, -152, -155, -158, -161, -164, + -167, -170, -173, -176, -179, -182, -185, -188, + -191, -194, -196, -199, -202, -205, -208, -211, + -214, -217, -220, -223, -226, -228, -231, -234, + -237, -240, -243, -246, -248, -251, -254, -257, + -260, -263, -266, -268, -271, -274, -277, -280, + -282, -285, -288, -291, -293, -296, -299, -302, + -305, -307, -310, -313, -315, -318, -321, -324, + -326, -329, -332, -334, -337, -340, -343, -345, + -348, -351, -353, -356, -359, -361, -364, -366, + -369, -372, -374, -377, -380, -382, -385, -387, + -390, -392, -395, -398, -400, -403, -405, -408, + -410, -413, -415, -418, -420, -423, -425, -428, + -430, -433, -435, -438, -440, -443, -445, -448, + -450, -453, -455, -457, -460, -462, -465, -467, + -469, -472, -474, -477, -479, -481, -484, -486, + -488, -491, -493, -495, -498, -500, -502, -505, + -507, -509, -512, -514, -516, -518, -521, -523, + -525, -527, -530, -532, -534, -536, -538, -541, + -543, -545, -547, -549, -552, -554, -556, -558, + -560, -562, -564, -567, -569, -571, -573, -575, + -577, -579, -581, -583, -585, -587, -589, -592, + -594, -596, -598, -600, -602, -604, -606, -608, + -610, -612, -614, -615, -617, -619, -621, -623, + -625, -627, -629, -631, -633, -635, -637, -639, + -640, -642, -644, -646, -648, -650, -651, -653, + -655, -657, -659, -661, -662, -664, -666, -668, + -669, -671, -673, -675, -676, -678, -680, -682, + -683, -685, -687, -688, -690, -692, -693, -695, + -697, -698, -700, -702, -703, -705, -706, -708, + -710, -711, -713, -714, -716, -717, -719, -720, + -722, -724, -725, -727, -728, -730, -731, -733, + -734, -735, -737, -738, -740, -741, -743, -744, + -746, -747, -748, -750, -751, -753, -754, -755, + -757, -758, -759, -761, -762, -763, -765, -766, + -767, -769, -770, -771, -772, -774, -775, -776, + -777, -779, -780, -781, -782, -783, -785, -786, + -787, -788, -789, -791, -792, -793, -794, -795, + -796, -797, -798, -800, -801, -802, -803, -804, + -805, -806, -807, -808, -809, -810, -811, -812, + -813, -814, -815, -816, -817, -818, -819, -820, + -821, -822, -823, -824, -825, -826, -827, -827, + -828, -829, -830, -831, -832, -833, -833, -834, + -835, -836, -837, -838, -838, -839, -840, -841, + -842, -842, -843, -844, -845, -845, -846, -847, + -847, -848, -849, -850, -850, -851, -852, -852, + -853, -854, -854, -855, -855, -856, -857, -857, + -858, -858, -859, -860, -860, -861, -861, -862, + -862, -863, -863, -864, -864, -865, -865, -866, + -866, -867, -867, -868, -868, -869, -869, -870, + -870, -870, -871, -871, -872, -872, -872, -873, + -873, -874, -874, -874, -875, -875, -875, -876, + -876, -876, -876, -877, -877, -877, -878, -878, + -878, -878, -879, -879, -879, -879, -879, -880, + -880, -880, -880, -880, -881, -881, -881, -881, + -881, -881, -882, -882, -882, -882, -882, -882, + -882, -882, -882, -882, -882, -882, -883, -883, + -883, -883, -883, -883, -883, -883, -883, -883, + -883, -883, -883, -883, -882, -882, -882, -882, + -882, -882, -882, -882, -882, -882, -882, -882, + -881, -881, -881, -881, -881, -881, -881, -880, + -880, -880, -880, -880, -879, -879, -879, -879, + -879, -878, -878, -878, -878, -877, -877, -877, + -876, -876, -876, -876, -875, -875, -875, -874, + -874, -874, -873, -873, -873, -872, -872, -871, + -871, -871, -870, -870, -870, -869, -869, -868, + -868, -867, -867, -867, -866, -866, -865, -865, + -864, -864, -863, -863, -862, -862, -861, -861, + -860, -860, -859, -859, -858, -857, -857, -856, + -856, -855, -855, -854, -853, -853, -852, -852, + -851, -850, -850, -849, -848, -848, -847, -846, + -846, -845, -844, -844, -843, -842, -842, -841, + -840, -840, -839, -838, -837, -837, -836, -835, + -834, -834, -833, -832, -831, -831, -830, -829, + -828, -827, -827, -826, -825, -824, -823, -822, + -822, -821, -820, -819, -818, -817, -816, -816, + -815, -814, -813, -812, -811, -810, -809, -808, + -808, -807, -806, -805, -804, -803, -802, -801, + -800, -799, -798, -797, -796, -795, -794, -793, + -792, -791, -790, -789, -788, -787, -786, -785, + -784, -783, -782, -781, -780, -779, -778, -777, + -776, -774, -773, -772, -771, -770, -769, -768, + -767, -766, -765, -763, -762, -761, -760, -759, + -758, -757, -755, -754, -753, -752, -751, -750, + -748, -747, -746, -745, -744, -743, -741, -740, + -739, -738, -736, -735, -734, -733, -732, -730, + -729, -728, -727, -725, -724, -723, -722, -720, + -719, -718, -716, -715, -714, -713, -711, -710, + -709, -707, -706, -705, -703, -702, -701, -699, + -698, -697, -695, -694, -693, -691, -690, -689, + -687, -686, -685, -683, -682, -680, -679, -678, + -676, -675, -673, -672, -671, -669, -668, -666, + -665, -664, -662, -661, -659, -658, -656, -655, + -654, -652, -651, -649, -648, -646, -645, -643, + -642, -640, -639, -638, -636, -635, -633, -632, + -630, -629, -627, -626, -624, -623, -621, -620, + -618, -617, -615, -614, -612, -610, -609, -607, + -606, -604, -603, -601, -600, -598, -597, -595, + -594, -592, -590, -589, -587, -586, -584, -583, + -581, -579, -578, -576, -575, -573, -572, -570, + -568, -567, -565, -564, -562, -560, -559, -557, + -556, -554, -552, -551, -549, -547, -546, -544, + -543, -541, -539, -538, -536, -534, -533, -531, + -530, -528, -526, -525, -523, -521, -520, -518, + -516, -515, -513, -511, -510, -508, -506, -505, + -503, -501, -500, -498, -496, -495, -493, -491, + -490, -488, -486, -485, -483, -481, -479, -478, + -476, -474, -473, -471, -469, -468, -466, -464, + -462, -461, -459, -457, -456, -454, -452, -450, + -449, -447, -445, -444, -442, -440, -438, -437, + -435, -433, -432, -430, -428, -426, -425, -423, + -421, -419, -418, -416, -414, -413, -411, -409, + -407, -406, -404, -402, -400, -399, -397, -395, + -393, -392, -390, -388, -386, -385, -383, -381, + -379, -378, -376, -374, -372, -371, -369, -367, + -365, -364, -362, -360, -358, -357, -355, -353, + -351, -350, -348, -346, -344, -343, -341, -339, + -337, -336, -334, -332, -330, -328, -327, -325, + -323, -321, -320, -318, -316, -314, -313, -311, + -309, -307, -306, -304, -302, -300, -299, -297, + -295, -293, -291, -290, -288, -286, -284, -283, + -281, -279, -277, -276, -274, -272, -270, -269, + -267, -265, -263, -262, -260, -258, -256, -255, + -253, -251, -249, -247, -246, -244, -242, -240, + -239, -237, -235, -233, -232, -230, -228, -226, + -225, -223, -221, -219, -218, -216, -214, -213, + -211, -209, -207, -206, -204, -202, -200, -199, + -197, -195, -193, -192, -190, -188, -186, -185, + -183, -181, -180, -178, -176, -174, -173, -171, + -169, -167, -166, -164, -162, -161, -159, -157, + -155, -154, -152, -150, -149, -147, -145, -143, + -142, -140, -138, -137, -135, -133, -132, -130, + -128, -126, -125, -123, -121, -120, -118, -116, + -115, -113, -111, -110, -108, -106, -105, -103, + -101, -99, -98, -96, -94, -93, -91, -89, + -88, -86, -84, -83, -81, -80, -78, -76, + -75, -73, -71, -70, -68, -66, -65, -63, + -61, -60, -58, -57, -55, -53, -52, -50, + -48, -47, -45, -43, -42, -40, -39, -37, + -35, -34, -32, -31, -29, -27, -26, -24, + -23, -21, -19, -18, -16, -15, -13, -11, + -10, -8, -7, -5, -4, -2, 0, 1, + 3, 4, 6, 7, 9, 10, 12, 14, + 15, 17, 18, 20, 21, 23, 24, 26, + 27, 29, 30, 32, 34, 35, 37, 38, + 40, 41, 43, 44, 46, 47, 49, 50, + 52, 53, 55, 56, 58, 59, 61, 62, + 64, 65, 66, 68, 69, 71, 72, 74, + 75, 77, 78, 80, 81, 82, 84, 85, + 87, 88, 90, 91, 93, 94, 95, 97, + 98, 100, 101, 102, 104, 105, 107, 108, + 109, 111, 112, 114, 115, 116, 118, 119, + 121, 122, 123, 125, 126, 127, 129, 130, + 132, 133, 134, 136, 137, 138, 140, 141, + 142, 144, 145, 146, 148, 149, 150, 152, + 153, 154, 155, 157, 158, 159, 161, 162, + 163, 165, 166, 167, 168, 170, 171, 172, + 174, 175, 176, 177, 179, 180, 181, 182, + 184, 185, 186, 187, 189, 190, 191, 192, + 193, 195, 196, 197, 198, 200, 201, 202, + 203, 204, 206, 207, 208, 209, 210, 211, + 213, 214, 215, 216, 217, 218, 220, 221, + 222, 223, 224, 225, 227, 228, 229, 230, + 231, 232, 233, 234, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 247, 248, + 249, 250, 251, 252, 253, 254, 255, 256, + 257, 258, 259, 260, 261, 262, 264, 265, + 266, 267, 268, 269, 270, 271, 272, 273, + 274, 275, 276, 277, 278, 279, 280, 281, + 282, 283, 284, 285, 286, 287, 288, 288, + 289, 290, 291, 292, 293, 294, 295, 296, + 297, 298, 299, 300, 301, 302, 302, 303, + 304, 305, 306, 307, 308, 309, 310, 311, + 311, 312, 313, 314, 315, 316, 317, 318, + 318, 319, 320, 321, 322, 323, 323, 324, + 325, 326, 327, 328, 328, 329, 330, 331, + 332, 332, 333, 334, 335, 336, 336, 337, + 338, 339, 339, 340, 341, 342, 343, 343, + 344, 345, 346, 346, 347, 348, 349, 349, + 350, 351, 351, 352, 353, 354, 354, 355, + 356, 356, 357, 358, 358, 359, 360, 361, + 361, 362, 363, 363, 364, 365, 365, 366, + 367, 367, 368, 368, 369, 370, 370, 371, + 372, 372, 373, 373, 374, 375, 375, 376, + 376, 377, 378, 378, 379, 379, 380, 381, + 381, 382, 382, 383, 383, 384, 385, 385, + 386, 386, 387, 387, 388, 388, 389, 389, + 390, 391, 391, 392, 392, 393, 393, 394, + 394, 395, 395, 396, 396, 397, 397, 398, + 398, 398, 399, 399, 400, 400, 401, 401, + 402, 402, 403, 403, 403, 404, 404, 405, + 405, 406, 406, 406, 407, 407, 408, 408, + 409, 409, 409, 410, 410, 410, 411, 411, + 412, 412, 412, 413, 413, 413, 414, 414, + 415, 415, 415, 416, 416, 416, 417, 417, + 417, 418, 418, 418, 419, 419, 419, 420, + 420, 420, 420, 421, 421, 421, 422, 422, + 422, 423, 423, 423, 423, 424, 424, 424, + 424, 425, 425, 425, 425, 426, 426, 426, + 426, 427, 427, 427, 427, 428, 428, 428, + 428, 428, 429, 429, 429, 429, 429, 430, + 430, 430, 430, 430, 431, 431, 431, 431, + 431, 432, 432, 432, 432, 432, 432, 432, + 433, 433, 433, 433, 433, 433, 433, 434, + 434, 434, 434, 434, 434, 434, 434, 435, + 435, 435, 435, 435, 435, 435, 435, 435, + 435, 436, 436, 436, 436, 436, 436, 436, + 436, 436, 436, 436, 436, 436, 436, 436, + 437, 437, 437, 437, 437, 437, 437, 437, + 437, 437, 437, 437, 437, 437, 437, 437, + 437, 437, 437, 437, 437, 437, 437, 437, + 437, 437, 437, 437, 437, 437, 437, 437, + 437, 437, 437, 437, 437, 437, 437, 436, + 436, 436, 436, 436, 436, 436, 436, 436, + 436, 436, 436, 436, 436, 436, 436, 435, + 435, 435, 435, 435, 435, 435, 435, 435, + 435, 434, 434, 434, 434, 434, 434, 434, + 434, 434, 433, 433, 433, 433, 433, 433, + 433, 432, 432, 432, 432, 432, 432, 432, + 431, 431, 431, 431, 431, 431, 430, 430, + 430, 430, 430, 430, 429, 429, 429, 429, + 429, 429, 428, 428, 428, 428, 428, 427, + 427, 427, 427, 427, 426, 426, 426, 426, + 425, 425, 425, 425, 425, 424, 424, 424, + 424, 423, 423, 423, 423, 422, 422, 422, + 422, 421, 421, 421, 421, 420, 420, 420, + 420, 419, 419, 419, 419, 418, 418, 418, + 418, 417, 417, 417, 416, 416, 416, 416, + 415, 415, 415, 414, 414, 414, 414, 413, + 413, 413, 412, 412, 412, 411, 411, 411, + 410, 410, 410, 409, 409, 409, 409, 408, + 408, 408, 407, 407, 407, 406, 406, 406, + 405, 405, 405, 404, 404, 404, 403, 403, + 402, 402, 402, 401, 401, 401, 400, 400, + 400, 399, 399, 399, 398, 398, 397, 397, + 397, 396, 396, 396, 395, 395, 394, 394, + 394, 393, 393, 393, 392, 392, 391, 391, + 391, 390, 390, 389, 389, 389, 388, 388, + 387, 387, 387, 386, 386, 385, 385, 385, + 384, 384, 383, 383, 382, 382, 382, 381, + 381, 380, 380, 380, 379, 379, 378, 378, + 377, 377, 377, 376, 376, 375, 375, 374, + 374, 373, 373, 373, 372, 372, 371, 371, + 370, 370, 370, 369, 369, 368, 368, 367, + 367, 366, 366, 365, 365, 365, 364, 364, + 363, 363, 362, 362, 361, 361, 360, 360, + 359, 359, 359, 358, 358, 357, 357, 356, + 356, 355, 355, 354, 354, 353, 353, 352, + 352, 351, 351, 350, 350, 349, 349, 348, + 348, 348, 347, 347, 346, 346, 345, 345, + 344, 344, 343, 343, 342, 342, 341, 341, + 340, 340, 339, 339, 338, 338, 337, 337, + 336, 336, 335, 335, 334, 334, 333, 333, + 332, 332, 331, 331, 330, 330, 329, 329, + 328, 328, 327, 327, 326, 326, 325, 325, + 324, 323, 323, 322, 322, 321, 321, 320, + 320, 319, 319, 318, 318, 317, 317, 316, + 316, 315, 315, 314, 314, 313, 313, 312, + 312, 311, 311, 310, 309, 309, 308, 308, + 307, 307, 306, 306, 305, 305, 304, 304, + 303, 303, 302, 302, 301, 300, 300, 299, + 299, 298, 298, 297, 297, 296, 296, 295, + 295, 294, 294, 293, 293, 292, 291, 291, + 290, 290, 289, 289, 288, 288, 287, 287, + 286, 286, 285, 285, 284, 283, 283, 282, + 282, 281, 281, 280, 280, 279, 279, 278, + 278, 277, 277, 276, 275, 275, 274, 274, + 273, 273, 272, 272, 271, 271, 270, 270, + 269, 268, 268, 267, 267, 266, 266, 265, + 265, 264, 264, 263, 263, 262, 261, 261, + 260, 260, 259, 259, 258, 258, 257, 257, + 256, 256, 255, 255, 254, 253, 253, 252, + 252, 251, 251, 250, 250, 249, 249, 248, + 248, 247, 247, 246, 245, 245, 244, 244, + 243, 243, 242, 242, 241, 241, 240, 240, + 239, 239, 238, 237, 237, 236, 236, 235, + 235, 234, 234, 233, 233, 232, 232, 231, + 231, 230, 230, 229, 229, 228, 227, 227, + 226, 226, 225, 225, 224, 224, 223, 223, + 222, 222, 221, 221, 220, 220, 219, 219, + 218, 218, 217, 217, 216, 215, 215, 214, + 214, 213, 213, 212, 212, 211, 211, 210, + 210, 209, 209, 208, 208, 207, 207, 206, + 206, 205, 205, 204, 204, 203, 203, 202, + 202, 201, 201, 200, 200, 199, 199, 198, + 198, 197, 197, 196, 196, 195, 195, 194, + 194, 193, 193, 192, 192, 191, 191, 190, + 190, 189, 189, 188, 188, 187, 187, 186, + 186, 185, 185, 184, 184, 183, 183, 182, + 182, 181, 181, 180, 180, 179, 179, 178, + 178, 177, 177, 176, 176, 175, 175, 174, + 174, 174, 173, 173, 172, 172, 171, 171, + 170, 170, 169, 169, 168, 168, 167, 167, + 166, 166, 165, 165, 165, 164, 164, 163, + 163, 162, 162, 161, 161, 160, 160, 159, + 159, 159, 158, 158, 157, 157, 156, 156, + 155, 155, 154, 154, 153, 153, 153, 152, + 152, 151, 151, 150, 150, 149, 149, 149, + 148, 148, 147, 147, 146, 146, 145, 145, + 145, 144, 144, 143, 143, 142, 142, 141, + 141, 141, 140, 140, 139, 139, 138, 138, + 138, 137, 137, 136, 136, 135, 135, 135, + 134, 134, 133, 133, 132, 132, 132, 131, + 131, 130, 130, 130, 129, 129, 128, 128, + 127, 127, 127, 126, 126, 125, 125, 125, + 124, 124, 123, 123, 123, 122, 122, 121, + 121, 121, 120, 120, 119, 119, 119, 118, + 118, 117, 117, 117, 116, 116, 115, 115, + 115, 114, 114, 113, 113, 113, 112, 112, + 112, 111, 111, 110, 110, 110, 109, 109, + 108, 108, 108, 107, 107, 107, 106, 106, + 105, 105, 105, 104, 104, 104, 103, 103, + 103, 102, 102, 101, 101, 101, 100, 100, + 100, 99, 99, 99, 98, 98, 97, 97, + 97, 96, 96, 96, 95, 95, 95, 94, + 94, 94, 93, 93, 93, 92, 92, 92, + 91, 91, 90, 90, 90, 89, 89, 89, + 88, 88, 88, 87, 87, 87, 86, 86, + 86, 85, 85, 85, 85, 84, 84, 84, + 83, 83, 83, 82, 82, 82, 81, 81, + 81, 80, 80, 80, 79, 79, 79, 78, + 78, 78, 77, 77, 77, 77, 76, 76, + 76, 75, 75, 75, 74, 74, 74, 74, + 73, 73, 73, 72, 72, 72, 71, 71, + 71, 71, 70, 70, 70, 69, 69, 69, + 69, 68, 68, 68, 67, 67, 67, 67, + 66, 66, 66, 65, 65, 65, 65, 64, + 64, 64, 63, 63, 63, 63, 62, 62, + 62, 62, 61, 61, 61, 60, 60, 60, + 60, 59, 59, 59, 59, 58, 58, 58, + 58, 57, 57, 57, 57, 56, 56, 56, + 56, 55, 55, 55, 55, 54, 54, 54, + 54, 53, 53, 53, 53, 52, 52, 52, + 52, 51, 51, 51, 51, 50, 50, 50, + 50, 50, 49, 49, 49, 49, 48, 48, + 48, 48, 47, 47, 47, 47, 46, 46, + 46, 46, 46, 45, 45, 45, 45, 44, + 44, 44, 44, 44, 43, 43, 43, 43, + 43, 42, 42, 42, 42, 42, 41, 41, + 41, 41, 40, 40, 40, 40, 40, 39, + 39, 39, 39, 39, 38, 38, 38, 38, + 38, 37, 37, 37, 37, 37, 37, 36, + 36, 36, 36, 36, 35, 35, 35, 35, + 35, 34, 34, 34, 34, 34, 33, 33, + 33, 33, 33, 33, 32, 32, 32, 32, + 32, 32, 31, 31, 31, 31, 31, 31, + 30, 30, 30, 30, 30, 29, 29, 29, + 29, 29, 29, 28, 28, 28, 28, 28, + 28, 27, 27, 27, 27, 27, 27, 27, + 26, 26, 26, 26, 26, 26, 26, 25, + 25, 25, 25, 25, 25, 24, 24, 24, + 24, 24, 24, 23, 23, 23, 23, 23, + 23, 23, 22, 22, 22, 22, 22, 22, + 22, 22, 21, 21, 21, 21, 21, 21, + 21, 21, 20, 20, 20, 20, 20, 20, + 20, 19, 19, 19, 19, 19, 19, 19, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 16, 16, 16, 16, 16, + 16, 16, 15, 15, 15, 15, 15, 15, + 15, 15, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 13, 13, 13, 13, 13, 13, 13, 13, + 12, 12, 12, 12, 12, 12, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 10, 10, 10, 10, + 9, 9, 9, 8, 8, 8, 7, 7, + 7, 7, 7, 7, 7, 7, 8, 8, + 9, 9, 10, 11, 12, 14, 15, 17, + 18, 20, 21, 23, 25, 26, 28, 29, + 31, 32, 33, 34, 35, 35, 36, 36, +}; + + +BandPass::BandPass(bool correct, unsigned nrChannels) +: + factors(new float[nrChannels]) +{ + if (correct) + computeCorrectionFactors(nrChannels); + else + for (unsigned i = 0; i < nrChannels; i ++) + factors[i] = 1.0; +} + + +void BandPass::computeCorrectionFactors(unsigned nrChannels) +{ + // This is the square of the bandpass, since the correlator multiplies two + // bandpasses. The following matlab functions are used: + + // f=fftshift(fft(Coeffs16384Kaiser_quant,262144)) + // m=f(131073-128:131073+127) + // r=f(131073-128+256:131073+127+256) + // l=f(131073-128-256:131073+127-256) + // plot(2^50./(abs(m).^2+abs(l).^2+abs(r).^2)) + + // it is not worth to use the more complex R2C FFTW method + std::vector<fcomplex, AlignedStdAllocator<fcomplex, 16> > in(262144, 0.0), out(262144); + + for (unsigned i = 0; i < 16384; i ++) + in[i] = stationFilterConstants[i]; + +#if defined HAVE_FFTW3 + fftwf_plan plan = fftwf_plan_dft_1d(262144, reinterpret_cast<fftwf_complex *>(&in[0]), reinterpret_cast<fftwf_complex *>(&out[0]), FFTW_FORWARD, FFTW_ESTIMATE); + fftwf_execute(plan); + fftwf_destroy_plan(plan); +#elif defined HAVE_FFTW2 + fftw_plan plan = fftw_create_plan(262144, FFTW_FORWARD, FFTW_ESTIMATE); + fftw_one(plan, reinterpret_cast<fftw_complex *>(&in[0]), reinterpret_cast<fftw_complex *>(&out[0])); + fftw_destroy_plan(plan); +#else +#error need FFTW2 or FFTW3 +#endif + + for (unsigned i = 0; i < nrChannels; i ++) { + fcomplex m = out[(i - nrChannels / 2) % 262144U]; + fcomplex l = out[(i - 3 * nrChannels / 2) % 262144U]; + fcomplex r = out[i + nrChannels / 2]; + + factors[i] = pow(2, 50) / abs(m * m + l * l + r * r); + } +} + + +BandPass::~BandPass() +{ + delete [] factors; +} + +} // namespace RTCP +} // namespace LOFAR + + +#if 0 +int main() +{ + LOFAR::RTCP::BandPass bandpass; + const float *f = bandpass.correctionFactors(256); + + for (unsigned i = 0; i < 256; i ++) + std::clog << i << ' ' << f[i] << std::endl; + + return 0; +} +#endif diff --git a/RTCP/CNProc/src/BandPass.h b/RTCP/CNProc/src/BandPass.h new file mode 100644 index 0000000000000000000000000000000000000000..7e04b54c95c4fc89d492102f3b687ee128ae1e56 --- /dev/null +++ b/RTCP/CNProc/src/BandPass.h @@ -0,0 +1,32 @@ +#ifndef LOFAR_CNPROC_BANDPASS_H +#define LOFAR_CNPROC_BANDPASS_H + + +namespace LOFAR { +namespace RTCP { + +class BandPass { + public: + BandPass(bool correct, unsigned nrChannels); + ~BandPass(); + + const float *correctionFactors() const; + + private: + void computeCorrectionFactors(unsigned nrChannels); + + static const float stationFilterConstants[65536]; + + float *factors; +}; + + +inline const float *BandPass::correctionFactors() const +{ + return factors; +} + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/CNProc/src/CNProc.log_prop b/RTCP/CNProc/src/CNProc.log_prop new file mode 100644 index 0000000000000000000000000000000000000000..c8288c9751c7b475e3d48b75a94ce6a8c2018978 --- /dev/null +++ b/RTCP/CNProc/src/CNProc.log_prop @@ -0,0 +1,14 @@ +# Configure the rootLogger +log4cplus.rootLogger=INFO, STDOUT +log4cplus.LCS.Common=INFO, STDOUT +log4cplus.logger.TRC=TRACE, NOLOG +log4cplus.logger.TRC.additivity=false +log4cplus.additivity=false +# Define the STDOUT appender +log4cplus.appender.STDOUT=log4cplus::ConsoleAppender +log4cplus.appender.STDOUT.Threshhold=TRACE2 +log4cplus.appender.STDOUT.layout=log4cplus::PatternLayout +log4cplus.appender.STDOUT.layout.ConversionPattern=%-5p [%x]%c{3} - %m%n +log4cplus.appender.STDOUT.logToStdErr=false +log4cplus.appender.STDOUT.ImmediateFlush=true +log4cplus.appender.NOLOG=log4cplus::NullAppender diff --git a/RTCP/CNProc/src/CN_Processing.cc b/RTCP/CNProc/src/CN_Processing.cc new file mode 100644 index 0000000000000000000000000000000000000000..7f63f17fbf0e3dbb07fddb9cfb007583b3f9122e --- /dev/null +++ b/RTCP/CNProc/src/CN_Processing.cc @@ -0,0 +1,525 @@ +//# CN_Processing.cc: Blue Gene processing for 1 second of sampled data +//# +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id$ + +//# Always #include <lofar_config.h> first! +#include <lofar_config.h> + +//# Includes +#include <CN_Processing.h> +#include <CorrelatorAsm.h> +#include <FIR_Asm.h> + +#include <Common/Timer.h> +#include <Interface/CN_Configuration.h> +#include <Interface/CN_Mapping.h> + +#include <cassert> +#include <complex> +#include <cmath> +#include <iomanip> +#include <iostream> +#include <map> + +#if defined HAVE_BGP +#include <common/bgp_personality_inlines.h> +#include <spi/kernel_interface.h> +#endif + +#if defined HAVE_ZOID && (defined HAVE_BGL || defined HAVE_BGP) +extern "C" { +#include <lofar.h> +} + +#endif + +#if (defined HAVE_BGP || defined HAVE_BGL) +#define LOG_CONDITION (itsLocationInfo.rankInPset() == 0) +//#define LOG_CONDITION (itsLocationInfo.rank() == 0) +//#define LOG_CONDITION 1 +#else +#define LOG_CONDITION 1 +#endif + +namespace LOFAR { +namespace RTCP { + +#if !defined HAVE_MASS + +inline static dcomplex cosisin(double x) +{ + return makedcomplex(cos(x), sin(x)); +} + +#endif + + +//static NSTimer transposeTimer("transpose()", true); // Unused --Rob +static NSTimer computeTimer("computing", true); +static NSTimer totalProcessingTimer("global total processing", true); + + +CN_Processing_Base::~CN_Processing_Base() +{ +} + + +template <typename SAMPLE_TYPE> CN_Processing<SAMPLE_TYPE>::CN_Processing(Stream *str, const LocationInfo &locationInfo) +: + itsStream(str), + itsLocationInfo(locationInfo), + itsInputData(0), + itsTransposedData(0), + itsFilteredData(0), + itsCorrelatedData(0), +#if defined HAVE_BGL || defined HAVE_BGP + itsDoAsyncCommunication(false), + itsTranspose(0), + itsAsyncTranspose(0), +#endif + itsPPF(0), + itsCorrelator(0) +{ + memset(itsArenas, 0, sizeof itsArenas); + +// #if defined HAVE_BGL +// getPersonality(); +// #endif + +#if defined HAVE_ZOID && (defined HAVE_BGL || defined HAVE_BGP) + initIONode(); +#endif +} + + +template <typename SAMPLE_TYPE> CN_Processing<SAMPLE_TYPE>::~CN_Processing() +{ +} + + +#if 0 + //#if defined HAVE_BGL + +struct Location { + unsigned pset, rankInPset; +}; + + +template <typename SAMPLE_TYPE> void CN_Processing<SAMPLE_TYPE>::getPersonality() +{ + int retval = rts_get_personality(&itsPersonality, sizeof itsPersonality); + assert(retval == 0); + + if (itsLocationInfo.rank() == 0) + std::clog << "topology = (" + << itsPersonality.getXsize() << ',' + << itsPersonality.getYsize() << ',' + << itsPersonality.getZsize() << "), torus wraparound = (" + << (itsPersonality.isTorusX() ? 'T' : 'F') << ',' + << (itsPersonality.isTorusY() ? 'T' : 'F') << ',' + << (itsPersonality.isTorusZ() ? 'T' : 'F') << ')' + << std::endl; + + itsRankInPset = itsPersonality.rankInPset() + itsPersonality.numNodesInPset() * (itsLocationInfo.rank() / itsPersonality.numComputeNodes()); + + Location myLocation = { + itsPersonality.getPsetNum(), itsRankInPset + }; + + std::vector<Location> allLocations(itsLocationInfo.nrNodes()); + + MPI_Gather(&myLocation, 2, MPI_INT, &allLocations[0], 2, MPI_INT, 0, MPI_COMM_WORLD); + + if (itsLocationInfo.rank() == 0) { + unsigned nrCoresPerPset = itsPersonality.numNodesInPset() * (itsPersonality.isVirtualNodeMode() ? 2 : 1); + std::vector<std::vector<unsigned> > cores(itsPersonality.numPsets(), std::vector<unsigned>(nrCoresPerPset)); + + for (unsigned rank = 0; rank < allLocations.size(); rank ++) + cores[allLocations[rank].pset][allLocations[rank].rankInPset] = rank; + +// for (unsigned pset = 0; pset < itsPersonality.numPsets(); pset ++) +// std::clog << "pset " << pset << " contains cores " << cores[pset] << std::endl; + } +} + +#endif + + +#if defined HAVE_ZOID && (defined HAVE_BGL || defined HAVE_BGP) + +void CN_Processing<SAMPLE_TYPE>::initIONode() const +{ + // one of the compute cores in each Pset has to initialize its I/O node + + if (itsLocationInfo.rankInPset() == 0) { + std::vector<size_t> lengths; + + for (int arg = 0; original_argv[arg] != 0; arg ++) { + std::clog << "adding arg " << original_argv[arg] << std::endl; + lengths.push_back(strlen(original_argv[arg]) + 1); + } + + std::clog << "calling lofar_init(..., ..., " << lengths.size() << ")" << std::endl; + lofar_init(original_argv, &lengths[0], lengths.size()); + } +} + +#endif + + +#if 0 +template <typename SAMPLE_TYPE> void CN_Processing<SAMPLE_TYPE>::checkConsistency(Parset *parset) const +{ + ASSERT(parset->nrPPFTaps() == NR_TAPS); + ASSERT(parset->getInt32("Observation.nrPolarisations") == NR_POLARIZATIONS); + +#if !defined C_IMPLEMENTATION + ASSERT(parset->CNintegrationSteps() % 16 == 0); + + ASSERT(_FIR_constants_used.nr_taps == NR_TAPS); + ASSERT(_FIR_constants_used.nr_polarizations == NR_POLARIZATIONS); +#endif + +#if defined HAVE_BGL + unsigned physicalCoresPerPset = itsPersonality.numNodesInPset(); + + if (itsPersonality.isVirtualNodeMode()) + physicalCoresPerPset *= 2; + + ASSERTSTR(parset->nrCoresPerPset() <= physicalCoresPerPset, "too many cores per pset specified"); + ASSERTSTR(parset->nrPsets() <= itsPersonality.numPsets(), "not enough psets available"); +#endif +} +#endif + + +#if defined HAVE_MPI + +template <typename SAMPLE_TYPE> void CN_Processing<SAMPLE_TYPE>::printSubbandList() const +{ + std::clog << "node " << itsLocationInfo.rank() << " filters and correlates subbands "; + + unsigned sb = itsCurrentSubband; + + do { + std::clog << (sb == itsCurrentSubband ? '[' : ',') << sb; + + if ((sb += itsSubbandIncrement) >= itsLastSubband) + sb -= itsLastSubband - itsFirstSubband; + + } while (sb != itsCurrentSubband); + + std::clog << ']' << std::endl; +} + +#endif // HAVE_MPI + + +template <typename SAMPLE_TYPE> void CN_Processing<SAMPLE_TYPE>::preprocess(CN_Configuration &configuration) +{ + //checkConsistency(parset); TODO + +// #if defined HAVE_BGL +// unsigned usedCoresPerPset = configuration.nrUsedCoresPerPset(); +// unsigned myPset = itsPersonality.getPsetNum(); +// unsigned myCore = CN_Mapping::reverseMapCoreOnPset(itsRankInPset, myPset); +#if defined HAVE_BGL || HAVE_BGP + unsigned usedCoresPerPset = configuration.nrUsedCoresPerPset(); + unsigned myPset = itsLocationInfo.psetNumber(); + unsigned myCore = CN_Mapping::reverseMapCoreOnPset(itsLocationInfo.rankInPset(), myPset); +#else + unsigned usedCoresPerPset = 1; + unsigned myPset = 0; + unsigned myCore = 0; +#endif + std::vector<unsigned> &inputPsets = configuration.inputPsets(); + std::vector<unsigned> &outputPsets = configuration.outputPsets(); + std::vector<unsigned> &tabList = configuration.tabList(); + +#if defined HAVE_BGP || defined HAVE_BGL + if(!itsDoAsyncCommunication) { + Transpose<SAMPLE_TYPE>::getMPIgroups(usedCoresPerPset, itsLocationInfo, inputPsets, outputPsets); + } +#endif + + std::vector<unsigned>::const_iterator inputPsetIndex = std::find(inputPsets.begin(), inputPsets.end(), myPset); + std::vector<unsigned>::const_iterator outputPsetIndex = std::find(outputPsets.begin(), outputPsets.end(), myPset); + + itsIsTransposeInput = inputPsetIndex != inputPsets.end(); + itsIsTransposeOutput = outputPsetIndex != outputPsets.end(); + + itsNrStations = configuration.nrStations(); + itsOutputPsetSize = outputPsets.size(); + unsigned nrBaselines = itsNrStations * (itsNrStations + 1) / 2; + unsigned nrChannels = configuration.nrChannelsPerSubband(); + unsigned nrSamplesPerIntegration = configuration.nrSamplesPerIntegration(); + unsigned nrSamplesToCNProc = configuration.nrSamplesToCNProc(); + + // Each phase (e.g., transpose, PPF, correlator) reads from an input data + // set and writes to an output data set. To save memory, two memory buffers + // are used, and consecutive phases alternately use one of them as input + // buffer and the other as output buffer. + // Since each buffer (arena) in used multiple times, we use multiple + // Allocators for a single arena, but the Allocators are hidden in the + // implementations of InputData, TransposedData, etc. + + size_t inputDataSize = itsIsTransposeInput ? InputData<SAMPLE_TYPE>::requiredSize(outputPsets.size(), nrSamplesToCNProc) : 0; + size_t transposedDataSize = itsIsTransposeOutput ? TransposedData<SAMPLE_TYPE>::requiredSize(itsNrStations, nrSamplesToCNProc) : 0; + size_t filteredDataSize = itsIsTransposeOutput ? FilteredData::requiredSize(itsNrStations, nrChannels, nrSamplesPerIntegration) : 0; + size_t correlatedDataSize = itsIsTransposeOutput ? CorrelatedData::requiredSize(nrBaselines, nrChannels) : 0; + + itsArenas[0] = new MallocedArena(inputDataSize, 32); + itsArenas[1] = new MallocedArena(std::max(transposedDataSize, correlatedDataSize), 32); + itsArenas[2] = new MallocedArena(filteredDataSize, 32); + + if (itsIsTransposeInput) { + itsInputData = new InputData<SAMPLE_TYPE>(*itsArenas[0], outputPsets.size(), nrSamplesToCNProc); + } + + if (itsIsTransposeOutput) { + unsigned nrSubbandsPerPset = configuration.nrSubbandsPerPset(); + unsigned logicalNode = usedCoresPerPset * (outputPsetIndex - outputPsets.begin()) + myCore; + // TODO: logicalNode assumes output psets are consecutively numbered + + itsCenterFrequencies = configuration.refFreqs(); + itsFirstSubband = (logicalNode / usedCoresPerPset) * nrSubbandsPerPset; + itsLastSubband = itsFirstSubband + nrSubbandsPerPset; + itsCurrentSubband = itsFirstSubband + logicalNode % usedCoresPerPset % nrSubbandsPerPset; + itsSubbandIncrement = usedCoresPerPset % nrSubbandsPerPset; + +#if defined HAVE_MPI + printSubbandList(); +#endif // HAVE_MPI + + itsTransposedData = new TransposedData<SAMPLE_TYPE>(*itsArenas[1], itsNrStations, nrSamplesToCNProc); + itsFilteredData = new FilteredData(*itsArenas[2], itsNrStations, nrChannels, nrSamplesPerIntegration); + itsCorrelatedData = new CorrelatedData(*itsArenas[1], nrBaselines, nrChannels); + + itsPPF = new PPF<SAMPLE_TYPE>(itsNrStations, nrChannels, nrSamplesPerIntegration, configuration.sampleRate() / nrChannels, configuration.delayCompensation()); + itsCorrelator = new Correlator(itsNrStations, nrChannels, nrSamplesPerIntegration, configuration.correctBandPass()); + } + +#if defined HAVE_MPI + if (itsIsTransposeInput || itsIsTransposeOutput) { + if(itsDoAsyncCommunication) { + itsAsyncTranspose = new AsyncTranspose<SAMPLE_TYPE>(itsIsTransposeInput, itsIsTransposeOutput, + usedCoresPerPset, itsLocationInfo, inputPsets, outputPsets, nrSamplesToCNProc); + } else { + itsTranspose = new Transpose<SAMPLE_TYPE>(itsIsTransposeInput, itsIsTransposeOutput, myCore); + itsTranspose->setupTransposeParams(itsLocationInfo, inputPsets, outputPsets, itsInputData, itsTransposedData); + } + } +#endif // HAVE_MPI +} + + +template <typename SAMPLE_TYPE> void CN_Processing<SAMPLE_TYPE>::process() +{ + totalProcessingTimer.start(); + NSTimer totalTimer("total processing", LOG_CONDITION); + totalTimer.start(); + +#if defined HAVE_MPI + if(itsDoAsyncCommunication) { + if (itsIsTransposeInput) { + itsInputData->readMetaData(itsStream); // sync read the meta data + } + + if(itsIsTransposeOutput) { + NSTimer postAsyncReceives("post async receives", LOG_CONDITION); + postAsyncReceives.start(); + itsAsyncTranspose->postAllReceives(itsTransposedData); + postAsyncReceives.stop(); + } + } +#endif // HAVE_MPI + + if (itsIsTransposeInput) { +#if defined HAVE_MPI + if (LOG_CONDITION) + std::clog << std::setprecision(12) << "core " << itsLocationInfo.rank() << ": start reading at " << MPI_Wtime() << '\n'; +#endif // HAVE_MPI + + static NSTimer readTimer("receive timer", true); + +#if defined HAVE_MPI + if(itsDoAsyncCommunication) { + NSTimer asyncSendTimer("async send", LOG_CONDITION); + + for(unsigned i=0; i<itsOutputPsetSize; i++) { + readTimer.start(); + itsInputData->readOne(itsStream); // Synchronously read 1 subband from my IO node. + readTimer.stop(); + asyncSendTimer.start(); + itsAsyncTranspose->asyncSend(i, itsInputData); // Asynchronously send one subband to another pset. + asyncSendTimer.stop(); + } + } else { // Synchronous + readTimer.start(); + itsInputData->read(itsStream); + readTimer.stop(); + } +#else // NO MPI + readTimer.start(); + itsInputData->read(itsStream); + readTimer.stop(); +#endif + } // itsIsTransposeInput + +#if defined HAVE_MPI + if(!itsDoAsyncCommunication) { + if (itsIsTransposeInput || itsIsTransposeOutput) { + if (LOG_CONDITION) { + std::clog << std::setprecision(12) << "core " << itsLocationInfo.rank() << ": start transpose at " << MPI_Wtime() << '\n'; + } +#if 0 + MPI_Barrier(itsTransposeGroup); + MPI_Barrier(itsTransposeGroup); +#endif + + NSTimer transposeTimer("one transpose", LOG_CONDITION); + transposeTimer.start(); + itsTranspose->transpose(itsInputData, itsTransposedData); + itsTranspose->transposeMetaData(itsInputData, itsTransposedData); + transposeTimer.stop(); + } + } +#endif // HAVE_MPI + + if (itsIsTransposeOutput) { +#if defined HAVE_MPI + if (LOG_CONDITION) + std::clog << std::setprecision(12) << "core " << itsLocationInfo.rank() << ": start processing at " << MPI_Wtime() << '\n'; + + if(itsDoAsyncCommunication) { + NSTimer asyncReceiveTimer("wait for any async receive", LOG_CONDITION); + + for (unsigned i = 0; i < itsNrStations; i ++) { + asyncReceiveTimer.start(); + unsigned stat = itsAsyncTranspose->waitForAnyReceive(); + asyncReceiveTimer.stop(); + + computeTimer.start(); + itsPPF->computeFlags(stat, itsTransposedData, itsFilteredData); + itsPPF->filter(stat, itsCenterFrequencies[itsCurrentSubband], itsTransposedData, itsFilteredData); + computeTimer.stop(); + } + } else { + for (unsigned stat = 0; stat < itsNrStations; stat ++) { + computeTimer.start(); + itsPPF->computeFlags(stat, itsTransposedData, itsFilteredData); + itsPPF->filter(stat, itsCenterFrequencies[itsCurrentSubband], itsTransposedData, itsFilteredData); + computeTimer.stop(); + } + } +#else // NO MPI + for (unsigned stat = 0; stat < itsNrStations; stat ++) { + computeTimer.start(); + itsPPF->computeFlags(stat, itsTransposedData, itsFilteredData); + itsPPF->filter(stat, itsCenterFrequencies[itsCurrentSubband], itsTransposedData, itsFilteredData); + computeTimer.stop(); + } +#endif // HAVE_MPI + + computeTimer.start(); + itsCorrelator->computeFlagsAndCentroids(itsFilteredData, itsCorrelatedData); + itsCorrelator->correlate(itsFilteredData, itsCorrelatedData); + computeTimer.stop(); + +#if defined HAVE_MPI + if (LOG_CONDITION) + std::clog << std::setprecision(12) << "core " << itsLocationInfo.rank() << ": start writing at " << MPI_Wtime() << '\n'; +#endif // HAVE_MPI + + static NSTimer writeTimer("send timer", true); + writeTimer.start(); + itsCorrelatedData->write(itsStream); + writeTimer.stop(); + +#if defined HAVE_MPI + if(itsDoAsyncCommunication && itsIsTransposeInput) { + NSTimer waitAsyncSendTimer("wait for all async sends", LOG_CONDITION); + waitAsyncSendTimer.start(); + itsAsyncTranspose->waitForAllSends(); + waitAsyncSendTimer.stop(); + } +#endif + } // itsIsTransposeOutput + +#if defined HAVE_MPI + if (itsIsTransposeInput || itsIsTransposeOutput) { + if (LOG_CONDITION) { + std::clog << std::setprecision(12) << "core " << itsLocationInfo.rank() << ": start idling at " << MPI_Wtime() << '\n'; + } + } +#endif // HAVE_MPI + +#if 0 + static unsigned count = 0; + + if (itsLocationInfo.rank() == 5 && ++ count == 9) + for (double time = MPI_Wtime() + 4.0; MPI_Wtime() < time;) + ; +#endif + + if ((itsCurrentSubband += itsSubbandIncrement) >= itsLastSubband) { + itsCurrentSubband -= itsLastSubband - itsFirstSubband; + } + + totalTimer.stop(); + totalProcessingTimer.stop(); +} + + +template <typename SAMPLE_TYPE> void CN_Processing<SAMPLE_TYPE>::postprocess() +{ + if (itsIsTransposeInput) { + delete itsInputData; + } + + if (itsIsTransposeInput || itsIsTransposeOutput) { +#if defined HAVE_MPI + if(itsDoAsyncCommunication) { + delete itsAsyncTranspose; + } else { + delete itsTranspose; + } +#endif // HAVE_MPI + } + + if (itsIsTransposeOutput) { + delete itsTransposedData; + delete itsPPF; + delete itsFilteredData; + delete itsCorrelator; + delete itsCorrelatedData; + + delete itsArenas[0]; + delete itsArenas[1]; + delete itsArenas[2]; + } +} + + +template class CN_Processing<i4complex>; +template class CN_Processing<i8complex>; +template class CN_Processing<i16complex>; + +} // namespace RTCP +} // namespace LOFAR diff --git a/RTCP/CNProc/src/CN_Processing.h b/RTCP/CNProc/src/CN_Processing.h new file mode 100644 index 0000000000000000000000000000000000000000..ac9ae8a3f6d502165332cd2891a9c68a8e7f8cc7 --- /dev/null +++ b/RTCP/CNProc/src/CN_Processing.h @@ -0,0 +1,130 @@ +//# CN_Processing.h: polyphase filter and correlator +//# +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id$ + +#ifndef LOFAR_CNPROC_CN_PROCESSING_H +#define LOFAR_CNPROC_CN_PROCESSING_H + +#if 0 || !(defined HAVE_BGL || defined HAVE_BGP) +#define C_IMPLEMENTATION +#endif + +#include <Stream/Stream.h> +#include <Interface/Config.h> +#if 0 +#include <Interface/Parset.h> +#else +#include <Interface/CN_Configuration.h> +#endif + +#include <Interface/Allocator.h> + +#include <InputData.h> +#include <FilteredData.h> +#include <TransposedData.h> +#include <Interface/CorrelatedData.h> + +#include <Transpose.h> +#include <AsyncTranspose.h> +#include <PPF.h> +#include <Correlator.h> + +#include <LocationInfo.h> + +#if defined HAVE_BGL +#include <bglpersonality.h> +#include <rts.h> +#endif + + + +namespace LOFAR { +namespace RTCP { + + +class CN_Processing_Base // untemplated helper class +{ + public: + virtual ~CN_Processing_Base(); + + virtual void preprocess(CN_Configuration &) = 0; + virtual void process() = 0; + virtual void postprocess() = 0; +}; + + +template <typename SAMPLE_TYPE> class CN_Processing : public CN_Processing_Base +{ + public: + CN_Processing(Stream *, const LocationInfo &); + ~CN_Processing(); + + virtual void preprocess(CN_Configuration &); + virtual void process(); + virtual void postprocess(); + + private: +#if 0 + void checkConsistency(Parset *) const; +#endif + +#if defined HAVE_BGL + void getPersonality(); +#endif + +#if defined HAVE_ZOID && defined HAVE_BGL + void initIONode() const; +#endif + +#if defined HAVE_MPI + void printSubbandList() const; +#endif + + unsigned itsNrStations; + unsigned itsOutputPsetSize; + Stream *itsStream; + const LocationInfo &itsLocationInfo; + std::vector<double> itsCenterFrequencies; + unsigned itsFirstSubband, itsCurrentSubband, itsLastSubband, itsSubbandIncrement; + bool itsIsTransposeInput, itsIsTransposeOutput; + + Arena *itsArenas[3]; + InputData<SAMPLE_TYPE> *itsInputData; + TransposedData<SAMPLE_TYPE> *itsTransposedData; + FilteredData *itsFilteredData; + CorrelatedData *itsCorrelatedData; + +#if defined HAVE_MPI + bool itsDoAsyncCommunication; + Transpose<SAMPLE_TYPE> *itsTranspose; + AsyncTranspose<SAMPLE_TYPE> *itsAsyncTranspose; +#endif + PPF<SAMPLE_TYPE> *itsPPF; + Correlator *itsCorrelator; + +#if defined HAVE_BGL + CNPersonality itsPersonality; + unsigned itsRankInPset; // core number, not node number! +#endif +}; + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/CNProc/src/CN_Processing.machinefile b/RTCP/CNProc/src/CN_Processing.machinefile new file mode 100644 index 0000000000000000000000000000000000000000..74ba1990dfbc677166a90c79bc26579bebf6d288 --- /dev/null +++ b/RTCP/CNProc/src/CN_Processing.machinefile @@ -0,0 +1,7 @@ +lofar13 +lofar14 +lofar4 +lofar5 +lofar6 +lofar7 +lofar8 diff --git a/RTCP/CNProc/src/CN_Processing_main.cc b/RTCP/CNProc/src/CN_Processing_main.cc new file mode 100644 index 0000000000000000000000000000000000000000..b9f4e4eb39c3f0bc561637fd81c4867062a33ec4 --- /dev/null +++ b/RTCP/CNProc/src/CN_Processing_main.cc @@ -0,0 +1,180 @@ +//# CN_Processing_main.cc: +//# +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id$ + +#include <lofar_config.h> + +#include <Common/Exception.h> +#include <Interface/CN_Command.h> +#include <Interface/CN_Configuration.h> +#include <FCNP_ClientStream.h> +#include <Stream/FileStream.h> +#include <Stream/NullStream.h> +#include <Stream/SocketStream.h> +#include <CNProc/LocationInfo.h> +#include <CNProc/CN_Processing.h> +#include <CNProc/Package__Version.h> + +#include <boost/lexical_cast.hpp> +#include <execinfo.h> + +#if defined HAVE_MPI +#define MPICH_IGNORE_CXX_SEEK +#include <mpi.h> +#endif + +#if defined HAVE_FCNP && defined HAVE_BGP +#include <fcnp_cn.h> +#endif + +// if exceptions are not caught, an attempt is made to create a backtrace +// from the place where the exception is thrown. +#define CATCH_EXCEPTIONS + + +using namespace LOFAR; +using namespace LOFAR::RTCP; + + +#if !defined CATCH_EXCEPTIONS + +void terminate_with_backtrace() +{ + std::cerr << "terminate_with_backtrace()" << std::endl; + + void *buffer[100]; + int nptrs = backtrace(buffer, 100); + char **strings = backtrace_symbols(buffer, nptrs); + + for (int i = 0; i < nptrs; i ++) + std::cerr << i << ": " << strings[i] << std::endl; + + free(strings); + abort(); +} + +#endif + + +int main(int argc, char **argv) +{ + std::clog.rdbuf(std::cout.rdbuf()); + +#if !defined CATCH_EXCEPTIONS + std::set_terminate(terminate_with_backtrace); +#endif + +#if defined CATCH_EXCEPTIONS + try { +#endif + +#if defined HAVE_MPI + MPI_Init(&argc, &argv); +#endif + + LocationInfo locationInfo; + + if (locationInfo.rank() == 0) { + std::string type = "brief"; + Version::show<CNProcVersion> (std::cout, "CNProc", type); + } + + std::clog << "creating connection to ION ..." << std::endl; + + Stream *ionStream; + +#if defined HAVE_ZOID && defined HAVE_BGL + ionStream = new ZoidClientStream; +#elif 1 && defined HAVE_FCNP && defined HAVE_BGP + std::vector<unsigned> psetDimensions(3); + + psetDimensions[0] = 4; + psetDimensions[1] = 2; + psetDimensions[2] = 2; + + FCNP_CN::init(psetDimensions); + ionStream = new FCNP_ClientStream; +#elif 0 + ionStream = new NullStream; +#elif 0 + usleep(10000 * locationInfo.rankInPset()); // do not connect all at the same time + + ionStream = new SocketStream("127.0.0.1", 5000 + locationInfo.rankInPset(), SocketStream::TCP, SocketStream::Client); +#else + throw std::runtime_error("unknown Stream type between ION and CN"); +#endif + + std::clog << "connection successful" << std::endl; + + CN_Configuration configuration; + CN_Processing_Base *proc = 0; + CN_Command command; + + do { + command.read(ionStream); + + switch (command.value()) { + case CN_Command::PREPROCESS : configuration.read(ionStream); + + switch (configuration.nrBitsPerSample()) { + case 4: proc = new CN_Processing<i4complex>(ionStream, locationInfo); + break; + + case 8: proc = new CN_Processing<i8complex>(ionStream, locationInfo); + break; + + case 16: proc = new CN_Processing<i16complex>(ionStream, locationInfo); + break; + } + + proc->preprocess(configuration); + break; + + case CN_Command::PROCESS : proc->process(); + break; + + case CN_Command::POSTPROCESS : proc->postprocess(); + delete proc; + proc = 0; + break; + + case CN_Command::STOP : break; + + default : std::cerr << "Bad command!" << std::endl; + abort(); + } + } while (command.value() != CN_Command::STOP); + + delete ionStream; + +#if defined HAVE_MPI + MPI_Finalize(); +#endif + + return 0; +#if defined CATCH_EXCEPTIONS + } catch (Exception &ex) { + std::cerr << "Uncaught Exception: " << ex.what() << std::endl; + return 1; + } catch (std::exception &ex) { + std::cerr << "Uncaught exception: " << ex.what() << std::endl; + return 1; + } +#endif +} diff --git a/RTCP/CNProc/src/Correlator.cc b/RTCP/CNProc/src/Correlator.cc new file mode 100644 index 0000000000000000000000000000000000000000..2cda88f4b67dcc0e748d9267fb57574e539c50ca --- /dev/null +++ b/RTCP/CNProc/src/Correlator.cc @@ -0,0 +1,273 @@ +//# Always #include <lofar_config.h> first! +#include <lofar_config.h> + +#include <Common/Timer.h> + +#include <Correlator.h> +#include <CorrelatorAsm.h> + +#include <map> + + +namespace LOFAR { +namespace RTCP { + + +static NSTimer computeFlagsTimer("Correlator::computeFlags()", true); +static NSTimer correlateTimer("Correlator::correlate()", true); +static NSTimer weightTimer("Correlator::weight()", true); + + +Correlator::Correlator(unsigned nrStations, unsigned nrChannels, unsigned nrSamplesPerIntegration, bool correctBandPass) +: + itsNrStations(nrStations), + itsNrBaselines(nrStations * (nrStations + 1) / 2), + itsNrChannels(nrChannels), + itsNrSamplesPerIntegration(nrSamplesPerIntegration), + itsCorrelationWeights(new float[nrSamplesPerIntegration + 1]), + itsBandPass(correctBandPass, nrChannels) +{ + itsCorrelationWeights[0] = 0.0; + + for (unsigned i = 1; i <= nrSamplesPerIntegration; i ++) + itsCorrelationWeights[i] = 1.0e-6 / i; +} + + +Correlator::~Correlator() +{ + delete [] itsCorrelationWeights; +} + + +#if 1 + +double Correlator::computeCentroidAndValidSamples(const SparseSet<unsigned> &flags, unsigned &nrValidSamples) const +{ + unsigned sq = itsNrSamplesPerIntegration * itsNrSamplesPerIntegration; + unsigned nrSamples = itsNrSamplesPerIntegration; + + for (SparseSet<unsigned>::const_iterator it = flags.getRanges().begin(); it != flags.getRanges().end(); it ++) { + sq -= (it->end - it->begin) * (it->end + it->begin); + nrSamples -= (it->end - it->begin); + } + + nrValidSamples = nrSamples; + return nrSamples > 0 ? (double) sq / (double) (2 * nrSamples) : .5; +} + + +void Correlator::computeFlagsAndCentroids(const FilteredData *filteredData, CorrelatedData *correlatedData) +{ + computeFlagsTimer.start(); + + for (unsigned stat2 = 0; stat2 < itsNrStations; stat2 ++) { + for (unsigned stat1 = 0; stat1 <= stat2; stat1 ++) { + unsigned nrValidSamples; + unsigned bl = baseline(stat1, stat2); + + correlatedData->centroids[bl] = computeCentroidAndValidSamples(filteredData->flags[stat1] | filteredData->flags[stat2], nrValidSamples); + correlatedData->nrValidSamples[bl][0] = 0; // channel 0 does not contain valid data + + for (unsigned ch = 1; ch < itsNrChannels; ch ++) + correlatedData->nrValidSamples[bl][ch] = nrValidSamples; + } + } + + computeFlagsTimer.stop(); +} + +#else + +void Correlator::computeFlags(const FilteredData *filteredData, CorrelatedData *correlatedData) +{ + computeFlagsTimer.start(); + + for (unsigned stat2 = 0; stat2 < itsNrStations; stat2 ++) { + for (unsigned stat1 = 0; stat1 <= stat2; stat1 ++) { + unsigned bl = baseline(stat1, stat2); + unsigned nrValidSamples = itsNrSamplesPerIntegration - (filteredData->flags[stat1] | filteredData->flags[stat2]).count(); + + correlatedData->nrValidSamples[bl][0] = 0; // channel 0 does not contain valid data + + for (unsigned ch = 1; ch < itsNrChannels; ch ++) + correlatedData->nrValidSamples[bl][ch] = nrValidSamples; + } + } + + computeFlagsTimer.stop(); +} + +#endif + + +void Correlator::correlate(const FilteredData *filteredData, CorrelatedData *correlatedData) +{ + correlateTimer.start(); + +#if defined CORRELATOR_C_IMPLEMENTATION + for (unsigned ch = 0; ch < itsNrChannels; ch ++) { + for (unsigned stat2 = 0; stat2 < itsNrStations; stat2 ++) { + for (unsigned stat1 = 0; stat1 <= stat2; stat1 ++) { + unsigned bl = baseline(stat1, stat2), nrValid = 0; + + if (ch > 0 /* && !itsRFIflags[stat1][ch] && !itsRFIflags[stat2][ch] */) { + nrValid = correlatedData->nrValidSamples[bl][ch]; + for (unsigned pol1 = 0; pol1 < NR_POLARIZATIONS; pol1 ++) { + for (unsigned pol2 = 0; pol2 < NR_POLARIZATIONS; pol2 ++) { + dcomplex sum = makedcomplex(0, 0); + for (unsigned time = 0; time < itsNrSamplesPerIntegration; time ++) { + sum += filteredData->samples[ch][stat1][time][pol1] * conj(filteredData->samples[ch][stat2][time][pol2]); + } + sum *= itsCorrelationWeights[nrValid] * itsBandPass.correctionFactors()[ch]; + correlatedData->visibilities[bl][ch][pol1][pol2] = sum; + } + } + } + + if (nrValid == 0) { + for (unsigned pol1 = 0; pol1 < NR_POLARIZATIONS; pol1 ++) { + for (unsigned pol2 = 0; pol2 < NR_POLARIZATIONS; pol2 ++) { + correlatedData->visibilities[bl][ch][pol1][pol2] = makefcomplex(0, 0); + } + } + } + + //nrValidSamples[bl][ch] = nrValid; + } + } + } +#else + // Blue Gene/L assembler version. + + for (unsigned ch = 1; ch < itsNrChannels; ch ++) { + // build a map of valid stations + unsigned nrValidStations = 0, map[itsNrStations]; + + for (unsigned stat2 = 0; stat2 < itsNrStations; stat2 ++) { +// if (!itsRFIflags[stat2][ch]) { + map[nrValidStations ++] = stat2; +// } else { // clear correlations that involve invalided stations +// for (unsigned stat1 = 0; stat1 < itsNrStations; stat1 ++) { +// unsigned bl = stat1 < stat2 ? baseline(stat1, stat2) : +// baseline(stat2, stat1); +// //_clear_correlation(&visibilities[bl][ch]); +// nrValidSamples[bl][ch] = 0; +// } +// } + } + + if (nrValidStations == 0) { + break; + } + + // Divide the correlation matrix into blocks of 3x2, 2x2, 3+2, 2+1, and 1x1. + + // do the first (auto)correlation(s) (these are the "left"most 1 or 3 + // squares in the corner of the triangle) + if (nrValidStations % 2 == 0) { + unsigned stat10 = map[0], stat11 = map[1]; + + _auto_correlate_2(filteredData->samples[ch][stat10].origin(), + filteredData->samples[ch][stat11].origin(), + correlatedData->visibilities[baseline(stat10, stat10)][ch].origin(), + correlatedData->visibilities[baseline(stat10, stat11)][ch].origin(), + correlatedData->visibilities[baseline(stat11, stat11)][ch].origin(), + itsNrSamplesPerIntegration); + } else { + unsigned stat10 = map[0]; + + _auto_correlate_1(filteredData->samples[ch][stat10].origin(), + correlatedData->visibilities[baseline(stat10, stat10)][ch].origin(), + itsNrSamplesPerIntegration); + } + + for (unsigned stat2 = nrValidStations % 2 ? 1 : 2; stat2 < nrValidStations; stat2 += 2) { + unsigned stat1 = 0; + +#if defined HAVE_BGL + // do as many 3x2 blocks as possible + for (; stat1 + 3 <= stat2; stat1 += 3) { + unsigned stat10 = map[stat1], stat11 = map[stat1+1], stat12 = map[stat1+2]; + unsigned stat20 = map[stat2], stat21 = map[stat2+1]; + + _correlate_3x2(filteredData->samples[ch][stat10].origin(), + filteredData->samples[ch][stat11].origin(), + filteredData->samples[ch][stat12].origin(), + filteredData->samples[ch][stat20].origin(), + filteredData->samples[ch][stat21].origin(), + correlatedData->visibilities[baseline(stat10, stat20)][ch].origin(), + correlatedData->visibilities[baseline(stat10, stat21)][ch].origin(), + correlatedData->visibilities[baseline(stat11, stat20)][ch].origin(), + correlatedData->visibilities[baseline(stat11, stat21)][ch].origin(), + correlatedData->visibilities[baseline(stat12, stat20)][ch].origin(), + correlatedData->visibilities[baseline(stat12, stat21)][ch].origin(), + itsNrSamplesPerIntegration); + } +#endif + + // see if a 2x2 block is necessary + for (; stat1 + 2 <= stat2; stat1 += 2) { + unsigned stat10 = map[stat1], stat11 = map[stat1+1]; + unsigned stat20 = map[stat2], stat21 = map[stat2+1]; + + _correlate_2x2(filteredData->samples[ch][stat10].origin(), + filteredData->samples[ch][stat11].origin(), + filteredData->samples[ch][stat20].origin(), + filteredData->samples[ch][stat21].origin(), + correlatedData->visibilities[baseline(stat10, stat20)][ch].origin(), + correlatedData->visibilities[baseline(stat10, stat21)][ch].origin(), + correlatedData->visibilities[baseline(stat11, stat20)][ch].origin(), + correlatedData->visibilities[baseline(stat11, stat21)][ch].origin(), + itsNrSamplesPerIntegration); + } + + // do the remaining (auto)correlations near the diagonal + if (stat1 == stat2) { + unsigned stat10 = map[stat1], stat11 = map[stat1+1]; + + _auto_correlate_2(filteredData->samples[ch][stat10].origin(), + filteredData->samples[ch][stat11].origin(), + correlatedData->visibilities[baseline(stat10,stat10)][ch].origin(), + correlatedData->visibilities[baseline(stat10,stat11)][ch].origin(), + correlatedData->visibilities[baseline(stat11,stat11)][ch].origin(), + itsNrSamplesPerIntegration); + } else { + unsigned stat10 = map[stat1], stat11 = map[stat1+1], stat12 = map[stat1+2]; + + _auto_correlate_3(filteredData->samples[ch][stat10].origin(), + filteredData->samples[ch][stat11].origin(), + filteredData->samples[ch][stat12].origin(), + correlatedData->visibilities[baseline(stat10,stat11)][ch].origin(), + correlatedData->visibilities[baseline(stat10,stat12)][ch].origin(), + correlatedData->visibilities[baseline(stat11,stat11)][ch].origin(), + correlatedData->visibilities[baseline(stat11,stat12)][ch].origin(), + correlatedData->visibilities[baseline(stat12,stat12)][ch].origin(), + itsNrSamplesPerIntegration); + } + } + } + + weightTimer.start(); +#if 0 + for (unsigned bl = 0; bl < itsNrBaselines; bl ++) { + for (unsigned ch = 0; ch < itsNrChannels; ch ++) { + for (unsigned pol1 = 0; pol1 < NR_POLARIZATIONS; pol1 ++) { + for (unsigned pol2 = 0; pol2 < NR_POLARIZATIONS; pol2 ++) { + itsCorrelatedData->visibilities[bl][ch][pol1][pol2] *= itsCorrelationWeights[(*nrValidSamples)[bl][ch]]; + } + } + } + } +#else + _weigh_visibilities(correlatedData->visibilities.origin(), correlatedData->nrValidSamples.origin(), itsCorrelationWeights, itsBandPass.correctionFactors(), itsNrBaselines, itsNrChannels); +#endif + weightTimer.stop(); +#endif + + correlateTimer.stop(); +} + + +} // namespace RTCP +} // namespace LOFAR diff --git a/RTCP/CNProc/src/Correlator.h b/RTCP/CNProc/src/Correlator.h new file mode 100644 index 0000000000000000000000000000000000000000..b702854467c90cb2fa0175f9ef44b7e8cd0dbf6d --- /dev/null +++ b/RTCP/CNProc/src/Correlator.h @@ -0,0 +1,49 @@ +#ifndef LOFAR_CNPROC_CORRELATOR_H +#define LOFAR_CNPROC_CORRELATOR_H + +#if 0 || !(defined HAVE_BGL || defined HAVE_BGP) +#define CORRELATOR_C_IMPLEMENTATION +#endif + + +#include <BandPass.h> +#include <FilteredData.h> +#include <Interface/CorrelatedData.h> + +#include <cassert> + +#include <boost/multi_array.hpp> + +namespace LOFAR { +namespace RTCP { + +class Correlator +{ + public: + Correlator(unsigned nrStations, unsigned nrChannels, unsigned nrSamplesPerIntegration, bool correctBandPass); + ~Correlator(); + + void correlate(const FilteredData *, CorrelatedData *); + void computeFlagsAndCentroids(const FilteredData *, CorrelatedData *); + + static unsigned baseline(unsigned station1, unsigned station2); + + private: + unsigned itsNrStations, itsNrBaselines, itsNrChannels, itsNrSamplesPerIntegration; + float *itsCorrelationWeights; //[itsNrSamplesPerIntegration + 1] + BandPass itsBandPass; + + double computeCentroidAndValidSamples(const SparseSet<unsigned> &flags, unsigned &nrValidSamples) const; +}; + + +inline unsigned Correlator::baseline(unsigned station1, unsigned station2) +{ + assert(station1 <= station2); + return station2 * (station2 + 1) / 2 + station1; +} + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/CNProc/src/CorrelatorAsm.S b/RTCP/CNProc/src/CorrelatorAsm.S new file mode 100644 index 0000000000000000000000000000000000000000..383cccc79bafb0e02729d6c787e297cb5dd09178 --- /dev/null +++ b/RTCP/CNProc/src/CorrelatorAsm.S @@ -0,0 +1,1282 @@ +# Correlator.S: correlator assembly for BG/L double Hummer +# +# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# $Id$ + +#if defined HAVE_BGL || defined HAVE_BGP +#define CN_PROCESSING +#include <Interface/Config.h> + +# C[s1][s2][p1][p2][ch] = sum over t of A[ch][s1][p1][t] * ~ A[ch][s2][p2][t] +# Cr = Ar*Br+Ai*Bi, Ci = Ai*Br-Ar*Bi + +# fxcpnsma 1,16,21,1 # f1r += f21r*f16r, f1i -= f21i*f16r +# fxcxma 1,16,21,1 # f1r += f21i*f16i, f1i += f21r*f16i + + +.align 5 + +zero: .long 0,0 +one: .long 0x3f800000,0x3f800000 + +.global _correlate_2x2 +_correlate_2x2: + +# computes correlations of S0*~S2,S0*~S3,S1*~S2,S1*~S3 +# r3 : pointer to S0 data +# r4 : pointer to S1 data +# r5 : pointer to S2 data +# r6 : pointer to S3 data +# r7 : pointer to S0*~S2 output +# r8 : pointer to S0*~S3 output +# r9 : pointer to S1*~S2 output +# r10: pointer to S1*~S3 output +# 8(r1): nr_samples_to_integrate + +# local variables: +# r11: temporary +# f0 : sum of S0_X * ~S2_X +# f1 : sum of S0_X * ~S2_Y +# f2 : sum of S0_Y * ~S2_X +# f3 : sum of S0_Y * ~S2_Y +# f4 : sum of S0_X * ~S3_X +# f5 : sum of S0_X * ~S3_Y +# f6 : sum of S0_Y * ~S3_X +# f7 : sum of S0_Y * ~S3_Y +# f8 : sum of S1_X * ~S2_X +# f9 : sum of S1_X * ~S2_Y +# f10: sum of S1_Y * ~S2_X +# f11: sum of S1_Y * ~S2_Y +# f12: sum of S1_X * ~S3_X +# f13: sum of S1_X * ~S3_Y +# f14: sum of S1_Y * ~S3_X +# f15: sum of S1_Y * ~S3_Y +# f16,f24:S0_X (sample from station S0, X polarization) +# f17,f25:S0_Y +# f18,f26:S1_X +# f19,f27:S1_Y +# f20,f28:S2_X +# f21,f29:S2_Y +# f22,f30:S3_X +# f23,f31:S3_Y + + lis 12,zero@ha ; lwz 0,8(1) + addi 12,12,zero@l + ; lfpsx 0,0,12 + + li 12,-16 # push call-saved registers + ; stfpdux 14,1,12 + ; stfpdux 15,1,12 + ; stfpdux 16,1,12 + ; stfpdux 17,1,12 + ; stfpdux 18,1,12 + ; stfpdux 19,1,12 + ; stfpdux 20,1,12 + ; stfpdux 21,1,12 + ; stfpdux 22,1,12 + ; stfpdux 23,1,12 + ; stfpdux 24,1,12 + ; stfpdux 25,1,12 + li 11,8 ; stfpdux 26,1,12 + srwi 0,0,1 ; stfpdux 27,1,12 + mtctr 0 ; stfpdux 28,1,12 + fpmr 1,0 ; stfpdux 29,1,12 + fpmr 2,0 ; stfpdux 30,1,12 + fpmr 3,0 ; stfpdux 31,1,12 + + fpmr 4,0 ; lfpsx 16,0,3 + fpmr 5,0 ; lfpsux 17,3,11 + fpmr 6,0 ; lfpsx 18,0,4 + fpmr 7,0 ; lfpsux 19,4,11 + fpmr 8,0 ; lfpsx 20,0,5 + fpmr 9,0 ; lfpsux 21,5,11 + fpmr 10,0 ; lfpsx 22,0,6 + fpmr 11,0 ; lfpsux 23,6,11 + fpmr 12,0 ; lfpsux 24,3,11 + fpmr 13,0 ; lfpsux 25,3,11 + fpmr 14,0 ; lfpsux 28,5,11 + fpmr 15,0 + +0: # loop over time + + # S0 * ~S2, phase 1 + fxcpnsma 0,16,20,0 + fxcpnsma 1,16,21,1 ; lfpsux 29,5,11 + fxcpnsma 2,17,20,2 + fxcpnsma 3,17,21,3 + + # S0 * ~S3, phase 1 + fxcpnsma 4,16,22,4 + fxcpnsma 5,16,23,5 ; lfpsux 26,4,11 + fxcpnsma 6,17,22,6 + fxcpnsma 7,17,23,7 + + # S1 * ~S2, phase 1 + fxcpnsma 8,18,20,8 + fxcpnsma 9,18,21,9 ; lfpsux 27,4,11 + fxcpnsma 10,19,20,10 + fxcpnsma 11,19,21,11 + + # S1 * ~S3, phase 1 + fxcpnsma 12,18,22,12 + fxcpnsma 13,18,23,13 ; lfpsux 30,6,11 + fxcpnsma 14,19,22,14 + fxcpnsma 15,19,23,15 + + # S0 * ~S2, phase 2 + fxcxma 0,16,20,0 + fxcxma 1,16,21,1 ; lfpsux 31,6,11 + fxcxma 2,17,20,2 + fxcxma 3,17,21,3 + + # S0 * ~S3, phase 2 + fxcxma 4,16,22,4 + fxcxma 5,16,23,5 ; lfpsux 16,3,11 + fxcxma 6,17,22,6 + fxcxma 7,17,23,7 + + # S1 * ~S2, phase 2 + fxcxma 8,18,20,8 + fxcxma 9,18,21,9 ; lfpsux 17,3,11 + fxcxma 10,19,20,10 + fxcxma 11,19,21,11 + + # S1 * ~S3, phase 2 + fxcxma 12,18,22,12 + fxcxma 13,18,23,13 ; lfpsux 20,5,11 + fxcxma 14,19,22,14 + fxcxma 15,19,23,15 + + # S0 * ~S2, phase 1 + fxcpnsma 0,24,28,0 + fxcpnsma 1,24,29,1 ; lfpsux 21,5,11 + fxcpnsma 2,25,28,2 + fxcpnsma 3,25,29,3 + + # S0 * ~S3, phase 1 + fxcpnsma 4,24,30,4 + fxcpnsma 5,24,31,5 ; lfpsux 18,4,11 + fxcpnsma 6,25,30,6 + fxcpnsma 7,25,31,7 + + # S1 * ~S2, phase 1 + fxcpnsma 8,26,28,8 + fxcpnsma 9,26,29,9 ; lfpsux 19,4,11 + fxcpnsma 10,27,28,10 + fxcpnsma 11,27,29,11 + + # S1 * ~S3, phase 1 + fxcpnsma 12,26,30,12 + fxcpnsma 13,26,31,13 ; lfpsux 22,6,11 + fxcpnsma 14,27,30,14 + fxcpnsma 15,27,31,15 + + # S0 * ~S2, phase 2 + fxcxma 0,24,28,0 + fxcxma 1,24,29,1 ; lfpsux 23,6,11 + fxcxma 2,25,28,2 + fxcxma 3,25,29,3 + + # S0 * ~S3, phase 2 + fxcxma 4,24,30,4 + fxcxma 5,24,31,5 ; lfpsux 24,3,11 + fxcxma 6,25,30,6 + fxcxma 7,25,31,7 + + # S1 * ~S2, phase 2 + fxcxma 8,26,28,8 + fxcxma 9,26,29,9 ; lfpsux 25,3,11 + fxcxma 10,27,28,10 + fxcxma 11,27,29,11 + + # S1 * ~S3, phase 2 + fxcxma 12,26,30,12 + fxcxma 13,26,31,13 ; lfpsux 28,5,11 + fxcxma 14,27,30,14 + fxcxma 15,27,31,15 + + bdnz 0b + +#if !defined HAVE_BGP + dcbz 0,7 +#endif + stfpsx 0,0,7 # store results S0 * ~S2 + stfpsux 1,7,11 + stfpsux 2,7,11 + stfpsux 3,7,11 + +#if !defined HAVE_BGP + dcbz 0,8 +#endif + stfpsx 4,0,8 # store results S0 * ~S3 + stfpsux 5,8,11 + stfpsux 6,8,11 + stfpsux 7,8,11 + +#if !defined HAVE_BGP + dcbz 0,9 +#endif + stfpsx 8,0,9 # store results S1 * ~S2 + stfpsux 9,9,11 + stfpsux 10,9,11 + stfpsux 11,9,11 + +#if !defined HAVE_BGP + dcbz 0,10 +#endif + stfpsx 12,0,10 # store results S1 * ~S3 + stfpsux 13,10,11 + stfpsux 14,10,11 + stfpsux 15,10,11 + + li 11,16 # restore call-saved registers + lfpdx 31,0,1 + lfpdux 30,1,11 + lfpdux 29,1,11 + lfpdux 28,1,11 + lfpdux 27,1,11 + lfpdux 26,1,11 + lfpdux 25,1,11 + lfpdux 24,1,11 + lfpdux 23,1,11 + lfpdux 22,1,11 + lfpdux 21,1,11 + lfpdux 20,1,11 + lfpdux 19,1,11 + lfpdux 18,1,11 + lfpdux 17,1,11 + lfpdux 16,1,11 + lfpdux 15,1,11 + lfpdux 14,1,11 + + addi 1,1,16 # reset stack pointer + + blr # return + + +.align 5 +.global _correlate_3x2 +_correlate_3x2: + +# computes correlations of S0*~S3,S0*~S4,S1*~S3,S1*~S4,S2*~S3,S2*~S4 +# r3 : pointer to S0 data +# r4 : pointer to S1 data +# r5 : pointer to S2 data +# r6 : pointer to S3 data +# r7 : pointer to S4 data +# r8 : pointer to S0*~S3 output +# r9 : pointer to S0*~S4 output +# r10: pointer to S1*~S3 output +# 8(r1): pointer to S1*~S4 output +# 12(r1): pointer to S2*~S3 output +# 16(r1): pointer to S2*~S4 output +# 20(r1): nr_samples_to_integrate + + li 12,-16 ; lwz 0,20(1) + lis 11,zero@ha ; stfpdux 14,1,12 + addi 11,11,zero@l ; stfpdux 15,1,12 + ; lfpsx 0,0,11 + + srwi 0,0,1 ; stfpdux 16,1,12 + mtctr 0 ; stfpdux 17,1,12 + fpmr 1,0 ; stfpdux 18,1,12 + fpmr 2,0 ; stfpdux 19,1,12 + fpmr 3,0 ; stfpdux 20,1,12 + fpmr 4,0 ; stfpdux 21,1,12 + fpmr 5,0 ; stfpdux 22,1,12 + fpmr 6,0 ; stfpdux 23,1,12 + fpmr 7,0 ; stfpdux 24,1,12 + fpmr 8,0 ; stfpdux 25,1,12 + fpmr 9,0 ; stfpdux 26,1,12 + fpmr 10,0 ; stfpdux 27,1,12 + fpmr 11,0 ; stfpdux 28,1,12 + fpmr 12,0 ; stfpdux 29,1,12 + fpmr 13,0 ; stfpdux 30,1,12 + fpmr 14,0 ; stfpdux 31,1,12 + fpmr 15,0 ; li 11,8 + + fpmr 16,0 ; lfpsx 24,0,3 + fpmr 17,0 ; lfpsux 25,3,11 + fpmr 18,0 ; lfpsx 26,0,4 + fpmr 19,0 ; lfpsx 28,0,6 + fpmr 20,0 ; lfpsux 29,6,11 + fpmr 21,0 ; lfpsx 30,0,7 + fpmr 22,0 ; sub 5,5,11 + fpmr 23,0 + + +0: + # S0 * ~S3, phase 1 + fxcpnsma 0,24,28,0 ; lfpsux 31,7,11 + fxcpnsma 1,24,29,1 ; lfpsux 27,4,11 + fxcpnsma 2,25,28,2 + fxcpnsma 3,25,29,3 + + # S0 * ~S4, phase 1 + fxcpnsma 4,24,30,4 + fxcpnsma 5,24,31,5 + fxcpnsma 6,25,30,6 + fxcpnsma 7,25,31,7 + + # S0 * ~S3, phase 2 + fxcxma 0,24,28,0 + fxcxma 1,24,29,1 + fxcxma 2,25,28,2 + fxcxma 3,25,29,3 + + # S0 * ~S4, phase 2 ; # ld S2 + fxcxma 4,24,30,4 + fxcxma 5,24,31,5 ; lfpsux 24,5,11 + fxcxma 6,25,30,6 + fxcxma 7,25,31,7 ; lfpsux 25,5,11 + + # S1 * ~S3, phase 1 + fxcpnsma 8,26,28,8 + fxcpnsma 9,26,29,9 + fxcpnsma 10,27,28,10 + fxcpnsma 11,27,29,11 + + # S1 * ~S4, phase 1 + fxcpnsma 12,26,30,12 + fxcpnsma 13,26,31,13 + fxcpnsma 14,27,30,14 + fxcpnsma 15,27,31,15 + + # S1 * ~S3, phase 2 + fxcxma 8,26,28,8 + fxcxma 9,26,29,9 + fxcxma 10,27,28,10 + fxcxma 11,27,29,11 + + # S1 * ~S4, phase 2 ; # ld S0 + fxcxma 12,26,30,12 + fxcxma 13,26,31,13 ; lfpsux 26,3,11 + fxcxma 14,27,30,14 + fxcxma 15,27,31,15 ; lfpsux 27,3,11 + + # S2 * ~S3, phase 1 + fxcpnsma 16,24,28,16 + fxcpnsma 17,24,29,17 + fxcpnsma 18,25,28,18 + fxcpnsma 19,25,29,19 + + # S2 * ~S4, phase 1 + fxcpnsma 20,24,30,20 + fxcpnsma 21,24,31,21 + fxcpnsma 22,25,30,22 + fxcpnsma 23,25,31,23 + + # S2 * ~S3, phase 2 ; # ld S3 + fxcxma 16,24,28,16 + fxcxma 17,24,29,17 + fxcxma 18,25,28,18 ; lfpsux 28,6,11 + fxcxma 19,25,29,19 ; lfpsux 29,6,11 + + # S2 * ~S4, phase 2 ; # ld S4 + fxcxma 20,24,30,20 + fxcxma 21,24,31,21 + fxcxma 22,25,30,22 ; lfpsux 30,7,11 + fxcxma 23,25,31,23 + + # S0 * ~S3, phase 1 ; # ld S1 + fxcpnsma 0,26,28,0 + fxcpnsma 1,26,29,1 + fxcpnsma 2,27,28,2 ; lfpsux 31,7,11 + fxcpnsma 3,27,29,3 ; lfpsux 24,4,11 + + # S0 * ~S4, phase 1 + fxcpnsma 4,26,30,4 + fxcpnsma 5,26,31,5 + fxcpnsma 6,27,30,6 ; lfpsux 25,4,11 + fxcpnsma 7,27,31,7 + + # S0 * ~S4, phase 2 + fxcxma 0,26,28,0 + fxcxma 1,26,29,1 + fxcxma 2,27,28,2 + fxcxma 3,27,29,3 + + # S0 * ~S4, phase 2 ; # ld S2 + fxcxma 4,26,30,4 + fxcxma 5,26,31,5 ; lfpsux 26,5,11 + fxcxma 6,27,30,6 + fxcxma 7,27,31,7 ; lfpsux 27,5,11 + + # S1 * ~S3, phase 1 ; # fetch 2 station S3 samples from L2 to L1 + fxcpnsma 8,24,28,8 ; dcbt 6,11 + fxcpnsma 9,24,29,9 ; dcbt 7,11 # ditto for S4 + fxcpnsma 10,25,28,10 + fxcpnsma 11,25,29,11 + + # S1 * ~S4, phase 1 + fxcpnsma 12,24,30,12 + fxcpnsma 13,24,31,13 + fxcpnsma 14,25,30,14 + fxcpnsma 15,25,31,15 + + # S1 * ~S3, phase 2 + fxcxma 8,24,28,8 + fxcxma 9,24,29,9 + fxcxma 10,25,28,10 + fxcxma 11,25,29,11 + + # S1 * ~S4, phase 2 ; # ld S0 + fxcxma 12,24,30,12 + fxcxma 13,24,31,13 ; lfpsux 24,3,11 + fxcxma 14,25,30,14 + fxcxma 15,25,31,15 ; lfpsux 25,3,11 + + # S2 * ~S3, phase 1 + fxcpnsma 16,26,28,16 + fxcpnsma 17,26,29,17 + fxcpnsma 18,27,28,18 + fxcpnsma 19,27,29,19 + + # S2 * ~S4, phase 1 + fxcpnsma 20,26,30,20 + fxcpnsma 21,26,31,21 + fxcpnsma 22,27,30,22 + fxcpnsma 23,27,31,23 + + # S2 * ~S3, phase 2 ; # ld S3 + fxcxma 16,26,28,16 + fxcxma 17,26,29,17 + fxcxma 18,27,28,18 ; lfpsux 28,6,11 + fxcxma 19,27,29,19 ; lfpsux 29,6,11 + + # S2 * ~S4, phase 2 ; # ld S1 and S4 + fxcxma 20,26,30,20 + fxcxma 21,26,31,21 ; lfpsux 26,4,11 + fxcxma 22,27,30,22 ; lfpsux 30,7,11 + fxcxma 23,27,31,23 + + bdnz 0b + + lwz 3,288+8(1) # load function arguments 9-11 + lwz 4,288+12(1) + lwz 5,288+16(1) + +#if !defined HAVE_BGP + dcbz 0,8 +#endif + stfpsx 0,0,8 # store results S0 * ~S3 + stfpsux 1,8,11 + stfpsux 2,8,11 + stfpsux 3,8,11 + +#if !defined HAVE_BGP + dcbz 0,9 +#endif + stfpsx 4,0,9 # store results S0 * ~S4 + stfpsux 5,9,11 + stfpsux 6,9,11 + stfpsux 7,9,11 + +#if !defined HAVE_BGP + dcbz 0,10 +#endif + stfpsx 8,0,10 # store results S1 * ~S3 + stfpsux 9,10,11 + stfpsux 10,10,11 + stfpsux 11,10,11 + +#if !defined HAVE_BGP + dcbz 0,3 +#endif + stfpsx 12,0,3 # store results S1 * ~S4 + stfpsux 13,3,11 + stfpsux 14,3,11 + stfpsux 15,3,11 + +#if !defined HAVE_BGP + dcbz 0,4 +#endif + stfpsx 16,0,4 # store results S2 * ~S3 + stfpsux 17,4,11 + stfpsux 18,4,11 + stfpsux 19,4,11 + +#if !defined HAVE_BGP + dcbz 0,5 +#endif + stfpsx 20,0,5 # store results S2 * ~S4 + stfpsux 21,5,11 + stfpsux 22,5,11 + stfpsux 23,5,11 + + li 11,16 # restore call-saved registers + lfpdx 31,0,1 + lfpdux 30,1,11 + lfpdux 29,1,11 + lfpdux 28,1,11 + lfpdux 27,1,11 + lfpdux 26,1,11 + lfpdux 25,1,11 + lfpdux 24,1,11 + lfpdux 23,1,11 + lfpdux 22,1,11 + lfpdux 21,1,11 + lfpdux 20,1,11 + lfpdux 19,1,11 + lfpdux 18,1,11 + lfpdux 17,1,11 + lfpdux 16,1,11 + lfpdux 15,1,11 + lfpdux 14,1,11 + + addi 1,1,16 # reset stack pointer + + blr # return + + +.align 5 +.global _auto_correlate_1 +_auto_correlate_1: + +# computes auto correlations of S0*~S0 +# r3 : pointer to S0 data +# r4 : pointer to 4 results +# r5 : nr_samples_to_integrate + + srwi 5,5,2 + lis 12,zero@ha + mtctr 5 + addi 12,12,zero@l + lfpsx 0,0,12 # f0r = 0, f0i = 0 + + li 8,8 + + lfpsx 4,0,3 # f8:f9 = S0 + lfpsux 5,3,8 + lfpsux 6,3,8 # f6:f7 = S0 + lfpsux 7,3,8 + fpmr 1,0 + lfpsux 8,3,8 # f8:f9 = S0 + fpmr 2,0 + lfpsux 9,3,8 + + fpmr 3,0 + lfpsx 12,12,8 # f12r = 1, f12i = 1 + + +0: # loop over time + + lfpsux 10,3,8 # f6:f7 = S0 + fpmadd 0,4,4,0 # S0 * ~S0 + lfpsux 11,3,8 + fxcpnsma 1,4,5,1 # f1r += f5r*f4r, f1i -= f5i*f4r + fxcxma 2,4,5,2 # f2r += f5i*f4i, f2i += f5r*f4i + fpmadd 3,5,5,3 + + lfpsux 4,3,8 # f4:f5 = S0 + fpmadd 0,6,6,0 # S0 * ~S0 + lfpsux 5,3,8 + fxcpnsma 1,6,7,1 # f1r += f7r*f6r, f1i -= f7i*f6r + fxcxma 2,6,7,2 # f2r += f7i*f6i, f2i += f7r*f6i + fpmadd 3,7,7,3 + + lfpsux 6,3,8 # f6:f7 = S0 + fpmadd 0,8,8,0 # S0 * ~S0 + lfpsux 7,3,8 + fxcpnsma 1,8,9,1 # f1r += f9r*f8r, f1i -= f9i*f8r + fxcxma 2,8,9,2 # f2r += f9i*f8i, f2i += f9r*f8i + fpmadd 3,9,9,3 + + lfpsux 8,3,8 # f8:f9 = S0 + fpmadd 0,10,10,0 # S0 * ~S0 + lfpsux 9,3,8 + fxcpnsma 1,10,11,1 # f1r += f11r*f10r, f1i -= f11i*f10r + fxcxma 2,10,11,2 # f2r += f11i*f10i, f2i += f11r*f10i + fpmadd 3,11,11,3 + + bdnz 0b + + fxcsnsma 0,0,12,0 # f0r += 1*f0i, f0i += -1*f0i = 0 + fpadd 1,1,2 + fxcsnsma 3,3,12,3 + +#if !defined HAVE_BGP + dcbz 0,4 # store result +#endif + stfpsx 0,0,4 + stfpsux 1,4,8 + fsneg 1,1 + stfpsux 1,4,8 + stfpsux 3,4,8 + + blr # return + + +.align 5 +.global _auto_correlate_2 +_auto_correlate_2: + +# computes correlations of S0*~S0,S0*~S1,S1*~S1 +# r3 : pointer to S0 data +# r4 : pointer to S1 data +# r5 : pointer to S0*~S0 output +# r6 : pointer to S0*~S1 output +# r7 : pointer to S1*~S1 output +# r8 : nr_samples_to_integrate + +# local variables: +# r11: temporary +# f0 : sum of S0_X * S0_X +# f1 : sum of S0_X * ~S0_Y +# f3 : sum of S0_Y * S0_Y +# f4 : sum of S0_X * ~S1_X +# f5 : sum of S0_X * ~S1_Y +# f6 : sum of S0_Y * ~S1_X +# f7 : sum of S0_Y * ~S1_Y +# f8 : sum of S1_X * S1_X +# f9 : sum of S1_X * ~S1_Y +# f11: sum of S1_Y * S1_Y +# f12,f16:S0_X (sample from station S0, X polarization) +# f13,f17:S0_Y +# f14,f18:S1_X +# f15,f19:S1_Y + + srwi 8,8,1 + lis 12,zero@ha + mtctr 8 + addi 12,12,zero@l + lfpsx 0,0,12 + + li 11,-16 # push call-saved registers + stfpdux 14,1,11 + fpmr 1,0 + stfpdux 15,1,11 + fpmr 2,0 + stfpdux 16,1,11 + fpmr 3,0 + stfpdux 17,1,11 + fpmr 4,0 + stfpdux 18,1,11 + fpmr 5,0 + stfpdux 19,1,11 + + fpmr 6,0 + li 11,8 # prefetch station samples + fpmr 7,0 + lfpsx 12,0,3 + fpmr 8,0 + lfpsux 13,3,11 + fpmr 9,0 + lfpsx 14,0,4 + fpmr 10,0 + lfpsux 15,4,11 + fpmr 11,0 + +0: # loop over time + + # S0 * ~S0, phase 1 + fpmadd 0,12,12,0 ; lfpsux 16,3,11 + fxcpnsma 1,12,13,1 ; lfpsux 17,3,11 + fpmadd 3,13,13,3 ; lfpsux 18,4,11 + + # S0 * ~S1, phase 1 + fxcpnsma 4,12,14,4 + fxcpnsma 5,12,15,5 + fxcpnsma 6,13,14,6 + fxcpnsma 7,13,15,7 + + # S1 * ~S1, phase 1 + fpmadd 8,14,14,8 ; lfpsux 19,4,11 + fxcpnsma 9,14,15,9 + fpmadd 11,15,15,11 + + # S0 * ~S0, phase 2 + fxcxma 1,12,13,1 + + # S0 * ~S1, phase 2 + fxcxma 4,12,14,4 + fxcxma 5,12,15,5 ; lfpsux 12,3,11 + fxcxma 6,13,14,6 + fxcxma 7,13,15,7 ; lfpsux 13,3,11 + + # S1 * ~S1, phase 2 + fxcxma 9,14,15,9 ; lfpsux 14,4,11 + + # S0 * ~S0, phase 1 + fpmadd 0,16,16,0 + fxcpnsma 1,16,17,1 + fpmadd 3,17,17,3 + + # S0 * ~S1, phase 1 + fxcpnsma 4,16,18,4 + fxcpnsma 5,16,19,5 ; lfpsux 15,4,11 + fxcpnsma 6,17,18,6 + fxcpnsma 7,17,19,7 + + # S1 * ~S1, phase 1 + fpmadd 8,18,18,8 + fxcpnsma 9,18,19,9 + fpmadd 11,19,19,11 + + # S0 * ~S0, phase 2 + fxcxma 1,16,17,1 + + # S0 * ~S1, phase 2 + fxcxma 4,16,18,4 + fxcxma 5,16,19,5 + fxcxma 6,17,18,6 + fxcxma 7,17,19,7 + + # S1 * ~S1, phase 2 + fxcxma 9,18,19,9 + + bdnz 0b + + lfpsx 12,12,11 # f12r = 1, f12i = 1 + + fxcsnsma 0,0,12,0 # f0r += 1*f0i, f0i += -1*f0i = 0 + fxcsnsma 3,3,12,3 + fxcsnsma 8,8,12,8 + fxcsnsma 11,11,12,11 + +#if !defined HAVE_BGP + dcbz 0,5 +#endif + stfpsx 0,0,5 # store results S0 * ~S0 + stfpsux 1,5,11 + fsneg 1,1 + stfpsux 1,5,11 + stfpsux 3,5,11 + +#if !defined HAVE_BGP + dcbz 0,6 +#endif + stfpsx 4,0,6 # store results S0 * ~S1 + stfpsux 5,6,11 + stfpsux 6,6,11 + stfpsux 7,6,11 + +#if !defined HAVE_BGP + dcbz 0,7 +#endif + stfpsx 8,0,7 # store results S1 * ~S1 + stfpsux 9,7,11 + fsneg 9,9 + stfpsux 9,7,11 + stfpsux 11,7,11 + + li 11,16 # restore call-saved registers + lfpdx 19,0,1 + lfpdux 18,1,11 + lfpdux 17,1,11 + lfpdux 16,1,11 + lfpdux 15,1,11 + lfpdux 14,1,11 + + addi 1,1,16 # reset stack pointer + + blr # return + + +.align 5 +.global _auto_correlate_3 +_auto_correlate_3: + +# computes correlations of S0*~S1,S0*~S2,S1*~S1,S1*~S2,S2*~S2 +# r3 : pointer to S0 data +# r4 : pointer to S1 data +# r5 : pointer to S2 data +# r6 : pointer to S0*~S1 output +# r7 : pointer to S0*~S2 output +# r8 : pointer to S1*~S1 output +# r9 : pointer to S1*~S2 output +# r10 : pointer to S2*~S2 output +# 8(r1): nr_samples_to_integrate + +# local variables: +# r11: temporary +# f0: sum of S0_X * ~S1_X +# f1: sum of S0_X * ~S1_Y +# f2: sum of S0_Y * ~S1_X +# f3: sum of S0_Y * ~S1_Y +# f4: sum of S0_X * ~S1_X +# f5: sum of S0_X * ~S1_Y +# f6: sum of S0_Y * ~S1_X +# f7: sum of S0_Y * ~S1_Y +# f8 : sum of S1_X * S1_X +# f9 : sum of S1_X * ~S1_Y +# f10 : sum of S1_Y * S1_Y +# f11 : sum of S0_X * ~S1_X +# f12 : sum of S0_X * ~S1_Y +# f13 : sum of S0_Y * ~S1_X +# f14 : sum of S0_Y * ~S1_Y +# f15 : sum of S2_X * S2_X +# f16 : sum of S2_X * ~S2_Y +# f17 : sum of S2_Y * S2_Y +# f18,f24:S0_X (sample from station S0, X polarization) +# f19,f25:S0_Y +# f20,f26:S1_X +# f21,f27:S1_Y +# f22,f28:S2_X +# f22,f29:S2_Y + + + lis 12,zero@ha + lwz 0,8(1) + addi 12,12,zero@l + lfpsx 0,0,12 + + li 11,-16 # push call-saved registers + stfpdux 14,1,11 + stfpdux 15,1,11 + stfpdux 16,1,11 + srwi 0,0,1 + stfpdux 17,1,11 + mtctr 0 + stfpdux 18,1,11 ; fpmr 1,0 + stfpdux 19,1,11 + fpmr 2,0 + stfpdux 20,1,11 + fpmr 3,0 + stfpdux 21,1,11 + fpmr 4,0 + stfpdux 22,1,11 + fpmr 5,0 + stfpdux 23,1,11 + fpmr 6,0 + stfpdux 24,1,11 + fpmr 7,0 + stfpdux 25,1,11 + fpmr 8,0 + stfpdux 26,1,11 + fpmr 9,0 + stfpdux 27,1,11 + fpmr 10,0 + stfpdux 28,1,11 + fpmr 11,0 + stfpdux 29,1,11 + + li 11,8 # prefetch station samples + ; lfpsx 18,0,3 + fpmr 12,0 + ; lfpsux 19,3,11 + fpmr 13,0 + ; lfpsx 20,0,4 + fpmr 14,0 + ; lfpsux 21,4,11 + fpmr 15,0 + ; lfpsx 22,0,5 + fpmr 16,0 + ; lfpsux 23,5,11 + fpmr 17,0 + +0: # loop over time + # S0 * ~S1, phase 1 + fxcpnsma 0,18,20,0 ; lfpsux 24,3,11 + fxcpnsma 1,18,21,1 ; lfpsux 25,3,11 + fxcpnsma 2,19,20,2 ; lfpsux 26,4,11 + fxcpnsma 3,19,21,3 + + # S0 * ~S2, phase 1 + fxcpnsma 4,18,22,4 + fxcpnsma 5,18,23,5 + fxcpnsma 6,19,22,6 + fxcpnsma 7,19,23,7 + + # S1 * ~S1, phase 1 + fpmadd 8,20,20,8 ; lfpsux 27,4,11 + fxcpnsma 9,20,21,9 ; lfpsux 28,5,11 + fpmadd 10,21,21,10 ; lfpsux 29,5,11 + + # S1 * ~S2, phase 1 + fxcpnsma 11,20,22,11 + fxcpnsma 12,20,23,12 + fxcpnsma 13,21,22,13 + fxcpnsma 14,21,23,14 + + # S2 * ~S2, phase 1 + fpmadd 15,22,22,15 + fxcpnsma 16,22,23,16 + fpmadd 17,23,23,17 + + # S0 * ~S1, phase 2 + fxcxma 0,18,20,0 + fxcxma 1,18,21,1 + fxcxma 2,19,20,2 + fxcxma 3,19,21,3 + + # S0 * ~S2, phase 2 + fxcxma 4,18,22,4 + fxcxma 5,18,23,5 + fxcxma 6,19,22,6 + fxcxma 7,19,23,7 + + # S1_X * ~S1_Y, phase 2 + fxcxma 9,20,21,9 + + # S1 * ~S2, phase 2 + fxcxma 11,20,22,11 + fxcxma 12,20,23,12 + fxcxma 13,21,22,13 + fxcxma 14,21,23,14 + + # S2 * ~S2, phase 2 + fxcxma 16,22,23,16 + + # S0 * ~S1, phase 1 + fxcpnsma 0,24,26,0 ; lfpsux 18,3,11 + fxcpnsma 1,24,27,1 ; lfpsux 19,3,11 + fxcpnsma 2,25,26,2 ; lfpsux 20,4,11 + fxcpnsma 3,25,27,3 + + # S0 * ~S2, phase 1 + fxcpnsma 4,24,28,4 + fxcpnsma 5,24,29,5 + fxcpnsma 6,25,28,6 + fxcpnsma 7,25,29,7 + + # S1 * ~S1, phase 1 + fpmadd 8,26,26,8 ; lfpsux 21,4,11 + fxcpnsma 9,26,27,9 ; lfpsux 22,5,11 + fpmadd 10,27,27,10 ; lfpsux 23,5,11 + + # S1 * ~S2, phase 1 + fxcpnsma 11,26,28,11 + fxcpnsma 12,26,29,12 + fxcpnsma 13,27,28,13 + fxcpnsma 14,27,29,14 + + # S2 * ~S2, phase 1 + fpmadd 15,28,28,15 + fxcpnsma 16,28,29,16 + fpmadd 17,29,29,17 + + # S0 * ~S1, phase 2 + fxcxma 0,24,26,0 + fxcxma 1,24,27,1 + fxcxma 2,25,26,2 + fxcxma 3,25,27,3 + + # S0 * ~S2, phase 2 + fxcxma 4,24,28,4 + fxcxma 5,24,29,5 + fxcxma 6,25,28,6 + fxcxma 7,25,29,7 + + # S1_X * ~S1_Y, phase 2 + fxcxma 9,26,27,9 + + # S1 * ~S2, phase 2 + fxcxma 11,26,28,11 + fxcxma 12,26,29,12 + fxcxma 13,27,28,13 + fxcxma 14,27,29,14 + + # S2 * ~S2, phase 2 + fxcxma 16,28,29,16 + + bdnz 0b + + lfpsx 18,12,11 # f18r = 1, f18i = 1 + + fxcsnsma 8,8,18,8 # f8r += 1*f8i, f8i += -1*f8i = 0 + fxcsnsma 10,10,18,10 + fxcsnsma 15,15,18,15 + fxcsnsma 17,17,18,17 + +#if !defined HAVE_BGP + dcbz 0,6 +#endif + stfpsx 0,0,6 # store results S0 * ~S1 + stfpsux 1,6,11 + stfpsux 2,6,11 + stfpsux 3,6,11 + +#if !defined HAVE_BGP + dcbz 0,7 +#endif + stfpsx 4,0,7 # store results S0 * ~S2 + stfpsux 5,7,11 + stfpsux 6,7,11 + stfpsux 7,7,11 + +#if !defined HAVE_BGP + dcbz 0,8 +#endif + stfpsx 8,0,8 # store results S1 * ~S1 + stfpsux 9,8,11 + fsneg 9,9 + stfpsux 9,8,11 + stfpsux 10,8,11 + +#if !defined HAVE_BGP + dcbz 0,9 +#endif + stfpsx 11,0,9 # store results S1 * ~S2 + stfpsux 12,9,11 + stfpsux 13,9,11 + stfpsux 14,9,11 + +#if !defined HAVE_BGP + dcbz 0,10 +#endif + stfpsx 15,0,10 # store results S2 * ~S2 + stfpsux 16,10,11 + fsneg 16,16 + stfpsux 16,10,11 + stfpsux 17,10,11 + + li 11,16 # restore call-saved registers + lfpdx 29,0,1 + lfpdux 28,1,11 + lfpdux 27,1,11 + lfpdux 26,1,11 + lfpdux 25,1,11 + lfpdux 24,1,11 + lfpdux 23,1,11 + lfpdux 22,1,11 + lfpdux 21,1,11 + lfpdux 20,1,11 + lfpdux 19,1,11 + lfpdux 18,1,11 + lfpdux 17,1,11 + lfpdux 16,1,11 + lfpdux 15,1,11 + lfpdux 14,1,11 + + addi 1,1,16 # reset stack pointer + + blr # return + + +#if 0 +.align 5 +.global _add_correlations +_add_correlations: + + li 11,-16 # push call-saved registers + subi 1,1,32 + stmw 24,0(1) + stfpdux 14,1,11 + stfpdux 15,1,11 + + srwi 7,7,1 + mtctr 7 + + li 8,8 + + sub 3,3,8 + sub 4,4,8 + + mr 10,3 + + ; lfpsux 0,3,8 + ; lfpsux 1,3,8 + ; lfpsux 2,3,8 + ; lfpsux 3,3,8 + ; lfpsux 4,3,8 + ; lfpsux 5,3,8 + ; lfpsux 6,3,8 + ; lfpsux 7,3,8 + + ; lfpsux 8,4,8 + ; lfpsux 9,4,8 + ; lfpsux 10,4,8 + ; lfpsux 11,4,8 + ; lfpsux 12,4,8 + ; lfpsux 13,4,8 + ; lfpsux 14,4,8 + ; lfpsux 15,4,8 + +0: + fpadd 0,0,8 ; lfpsux 8,4,8 + fpadd 1,1,9 ; lfpsux 9,4,8 + fpadd 2,2,10 ; lfpsux 10,4,8 + fpadd 3,3,11 ; lfpsux 11,4,8 + fpadd 4,4,12 ; lfpsux 12,4,8 + fpadd 5,5,13 ; lfpsux 13,4,8 + fpadd 6,6,14 ; lfpsux 14,4,8 + fpadd 7,7,15 ; lfpsux 15,4,8 + + ; stfpsux 0,10,8 + ; lfpsux 0,3,8 + ; stfpsux 1,10,8 + ; lfpsux 1,3,8 + ; stfpsux 2,10,8 + ; lfpsux 2,3,8 + ; stfpsux 3,10,8 + ; lfpsux 3,3,8 + ; stfpsux 4,10,8 + ; lfpsux 4,3,8 + ; stfpsux 5,10,8 + ; lfpsux 5,3,8 + ; stfpsux 6,10,8 + ; lfpsux 6,3,8 + ; stfpsux 7,10,8 + ; lfpsux 7,3,8 + + bdnz 0b + + # now do all nrValidSamples; since overflows should not occur, we + # treat two unsigned shorts as one word + + srwi 7,7,2 ; lwzx 24,0,5 + mtctr 7 ; lwzx 28,0,6 + li 9,4 ; lwzux 25,5,9 + ; lwzux 29,6,9 + ; lwzux 26,5,9 + ; lwzux 30,6,9 + ; lwzux 27,5,9 + ; lwzux 31,6,9 + +1: add 24,24,28 ; lwzux 28,6,9 + ; stw 24,-28(5) + ; lwzux 24,5,9 + + add 25,25,29 ; lwzux 29,6,9 + ; stw 25,-28(5) + ; lwzux 25,5,9 + + add 26,26,30 ; lwzux 30,6,9 + ; stw 26,-28(5) + ; lwzux 26,5,9 + + add 27,27,31 ; lwzux 31,6,9 + ; stw 27,-28(5) + ; lwzux 27,5,9 + + bdnz 1b + + li 11,16 # restore call-saved registers + lfpdx 15,0,1 + lfpdux 14,1,11 + + lmw 24,16(1) + addi 1,1,48 # reset stack pointer + + blr # return +#endif + + +.align 5 +.global _clear_correlation +_clear_correlation: +#if defined HAVE_BGP + li 0,0 + stw 0,0(3) + stw 0,4(3) + stw 0,8(3) + stw 0,12(3) + stw 0,16(3) + stw 0,20(3) + stw 0,24(3) + stw 0,28(3) +#else + dcbz 0,3 # clear the entire cache line, it contains all +#endif + blr # polarizations + + +#define SIZEOF_NR_VALID_SAMPLES 2 + +#if SIZEOF_NR_VALID_SAMPLES != 1 && SIZEOF_NR_VALID_SAMPLES != 2 && SIZEOF_NR_VALID_SAMPLES != 4 +#error Unsupported SIZEOF_NR_VALID_SAMPLES +#endif + + +.align 5 +.global _weigh_visibilities +_weigh_visibilities: + + slwi 9,8,2 ; mullw 7,7,8 + addi 0,9,-1 ; li 8,8 + li 11,SIZEOF_NR_VALID_SAMPLES + srwi 7,7,1 + sub 4,4,11 + li 12,0 + mtctr 7 + sub 7,3,8 + +#if SIZEOF_NR_VALID_SAMPLES == 1 + lbzux 9,4,11 + lbzux 10,4,11 +#elif SIZEOF_NR_VALID_SAMPLES == 2 + lhzux 9,4,11 + lhzux 10,4,11 +#elif SIZEOF_NR_VALID_SAMPLES == 4 + lwzux 9,4,11 + lwzux 10,4,11 +#endif + + lfpsx 9,0,6 + lfpsx 0,3,8 + lfpsux 1,3,8 + lfpsux 2,3,8 + lfpsux 3,3,8 + +0: + fxpmul 0,9,0 ; slwi 9,9,2 + fxpmul 1,9,1 ; lfsx 8,5,9 + + fxpmul 2,9,2 ; lfpsux 4,3,8 + fxpmul 3,9,3 ; lfpsux 5,3,8 + ; lfpsux 6,3,8 + ; lfpsux 7,3,8 + + fxpmul 0,8,0 + fxpmul 1,8,1 + fxpmul 2,8,2 + fxpmul 3,8,3 + +#if SIZEOF_NR_VALID_SAMPLES == 1 + lbzux 9,4,11 +#elif SIZEOF_NR_VALID_SAMPLES == 2 + lhzux 9,4,11 +#else + lwzux 9,4,11 +#endif + + addi 12,12,8 ; stfpsux 0,7,8 + and 12,12,0 ; stfpsux 1,7,8 + ; stfpsux 2,7,8 + ; stfpsux 3,7,8 + + fxsmul 4,9,4 ; slwi 10,10,2 + fxsmul 5,9,5 ; lfsx 8,5,10 + + fxsmul 6,9,6 ; lfpsux 0,3,8 + fxsmul 7,9,7 ; lfpsux 1,3,8 + ; lfpsux 2,3,8 + ; lfpsux 3,3,8 + fxpmul 4,8,4 ; lfpsx 9,6,12 + fxpmul 5,8,5 + fxpmul 6,8,6 + fxpmul 7,8,7 + +#if SIZEOF_NR_VALID_SAMPLES == 1 + lbzux 10,4,11 +#elif SIZEOF_NR_VALID_SAMPLES == 2 + lhzux 10,4,11 +#elif SIZEOF_NR_VALID_SAMPLES == 4 + lwzux 10,4,11 +#endif + + ; stfpsux 4,7,8 + ; stfpsux 5,7,8 + ; stfpsux 6,7,8 + ; stfpsux 7,7,8 + + bdnz 0b + blr + +#endif diff --git a/RTCP/CNProc/src/CorrelatorAsm.h b/RTCP/CNProc/src/CorrelatorAsm.h new file mode 100644 index 0000000000000000000000000000000000000000..48de6661c7472eaf63b9958db38d7298484da56a --- /dev/null +++ b/RTCP/CNProc/src/CorrelatorAsm.h @@ -0,0 +1,93 @@ +//# Correlator.h: header files for CN assembly +//# +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id$ + +#ifndef LOFAR_CNPROC_CORRELATOR_ASM_H +#define LOFAR_CNPROC_CORRELATOR_ASM_H + +#if defined HAVE_BGL || defined HAVE_BGP +#include <Interface/Config.h> + +namespace LOFAR { +namespace RTCP { + +//typedef fcomplex stationInputType[NR_SAMPLES_PER_INTEGRATION | 2][NR_POLARIZATIONS]; +//typedef fcomplex CorrelatedOutputType[NR_POLARIZATIONS][NR_POLARIZATIONS]; +typedef fcomplex stationInputType, CorrelatedOutputType; + +extern "C" { + void _correlate_2x2(const stationInputType *S0, + const stationInputType *S1, + const stationInputType *S2, + const stationInputType *S3, + CorrelatedOutputType *S0_S2, + CorrelatedOutputType *S0_S3, + CorrelatedOutputType *S1_S2, + CorrelatedOutputType *S1_S3, + unsigned nrSamplesToIntegrate); + + void _correlate_3x2(const stationInputType *S0, + const stationInputType *S1, + const stationInputType *S2, + const stationInputType *S3, + const stationInputType *S4, + CorrelatedOutputType *S0_S3, + CorrelatedOutputType *S0_S4, + CorrelatedOutputType *S1_S3, + CorrelatedOutputType *S1_S4, + CorrelatedOutputType *S2_S3, + CorrelatedOutputType *S2_S4, + unsigned nrSamplesToIntegrate); + + void _auto_correlate_1(const stationInputType *S0, + CorrelatedOutputType *S0_S0, + unsigned nrSamplesToIntegrate); + + void _auto_correlate_2(const stationInputType *S0, + const stationInputType *S1, + CorrelatedOutputType *S0_S0, + CorrelatedOutputType *S0_S1, + CorrelatedOutputType *S1_S1, + unsigned nrSamplesToIntegrate); + + void _auto_correlate_3(const stationInputType *S0, + const stationInputType *S1, + const stationInputType *S2, + CorrelatedOutputType *S0_S1, + CorrelatedOutputType *S0_S2, + CorrelatedOutputType *S1_S1, + CorrelatedOutputType *S1_S2, + CorrelatedOutputType *S2_S2, + unsigned nrSamplesToIntegrate); + + void _clear_correlation(CorrelatedOutputType *S0_S0); + + void _weigh_visibilities( + fcomplex *visibilities, + unsigned short *nrValidSamplesCounted, + const float correlationWeights[/*nrSamplesToIntegrate + 1*/], + const float bandPassCorrectionFactors[/*nrChannels*/], + unsigned nrBaselines, + unsigned NrChannels); +}; + +} +} +#endif +#endif diff --git a/RTCP/CNProc/src/FCNP_ClientStream.cc b/RTCP/CNProc/src/FCNP_ClientStream.cc new file mode 100644 index 0000000000000000000000000000000000000000..ddbbeb17567f10d5269c03ed7f42738d149330c2 --- /dev/null +++ b/RTCP/CNProc/src/FCNP_ClientStream.cc @@ -0,0 +1,82 @@ +//# FCNP_ClientStream.cc: Fast Collective Network Protocol Stream +//# +//# Copyright (C) 2008 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id$ + +//# Always #include <lofar_config.h> first! +#include <lofar_config.h> + +#if defined HAVE_FCNP && defined HAVE_BGP + +#include <Common/Timer.h> +#include <Interface/Align.h> +#include <Interface/AlignedStdAllocator.h> +#include <FCNP_ClientStream.h> + +#include <fcnp_cn.h> + +#include <cstring> +#include <vector> + + +namespace LOFAR { +namespace RTCP { + + +FCNP_ClientStream::~FCNP_ClientStream() +{ +} + + +void FCNP_ClientStream::read(void *ptr, size_t size) +{ + //std::clog << "FCNP_ClientStream::read(" << std::hex << ptr << ", " << std::dec << size << ", ...)" << std::endl; + + if (!aligned(ptr, 16) || !aligned(size, 16)) { + size_t alignedSize = align(size, 16); + std::vector<char, AlignedStdAllocator<char, 16> > alignedBuffer(alignedSize); + + FCNP_CN::IONtoCN_ZeroCopy(&alignedBuffer[0], alignedSize); + memcpy(ptr, &alignedBuffer[0], size); + } else { + FCNP_CN::IONtoCN_ZeroCopy(ptr, size); + } +} + + +void FCNP_ClientStream::write(const void *ptr, size_t size) +{ + //std::clog << "FCNP_ClientStream::write(" << std::hex << ptr << ", " << std::dec << size << ", ...)" << std::endl; + + if (!aligned(ptr, 16) || !aligned(size, 16)) { + size_t alignedSize = align(size, 16); + std::vector<char, AlignedStdAllocator<char, 16> > alignedBuffer(alignedSize); + + memcpy(&alignedBuffer[0], ptr, size); + FCNP_CN::CNtoION_ZeroCopy(&alignedBuffer[0], alignedSize); + } else { + FCNP_CN::CNtoION_ZeroCopy(ptr, size); + } +} + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/CNProc/src/FCNP_ClientStream.h b/RTCP/CNProc/src/FCNP_ClientStream.h new file mode 100644 index 0000000000000000000000000000000000000000..15b143bfb90d2a728c1b97fa04bda485b36ee5e9 --- /dev/null +++ b/RTCP/CNProc/src/FCNP_ClientStream.h @@ -0,0 +1,46 @@ +//# FCNP_ClientStream.h: Stream that implements FCNP protocol +//# +//# Copyright (C) 2005 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id$ + +#ifndef LOFAR_CNPROC_FCNP_CLIENT_STREAM_H +#define LOFAR_CNPROC_FCNP_CLIENT_STREAM_H + +#if defined HAVE_FCNP && defined HAVE_BGP + +#include <Stream/Stream.h> + +namespace LOFAR { +namespace RTCP { + +class FCNP_ClientStream : public Stream +{ + public: + virtual ~FCNP_ClientStream(); + + virtual void read(void *ptr, size_t size); + virtual void write(const void *ptr, size_t size); +}; + +} // namespace RTCP +} // namespace LOFAR + +#endif +#endif diff --git a/RTCP/CNProc/src/FFT_Asm.S b/RTCP/CNProc/src/FFT_Asm.S new file mode 100644 index 0000000000000000000000000000000000000000..734479ccc957cea4145aaff670907c1343150b4e --- /dev/null +++ b/RTCP/CNProc/src/FFT_Asm.S @@ -0,0 +1,989 @@ +#if defined HAVE_BGL || defined HAVE_BGP + +_C1: .long 0x3F800000, 0x3F800000 +_W: .long 0x3F6C835E, 0xBEC3EF15 +_W2: .long 0x3F3504F3, 0xBF3504F3 + +twiddle: + .long 0x3F7FEC43, 0xBCC90AB0 + .long 0x3F7FB10F, 0xBD48FB30 + .long 0x3F7F4E6D, 0xBD96A905 + .long 0x3F7EC46D, 0xBDC8BD36 + .long 0x3F7E1324, 0xBDFAB272 + .long 0x3F7D3AAC, 0xBE164083 + .long 0x3F7C3B28, 0xBE2F10A2 + .long 0x3F7B14BE, 0xBE47C5C2 + .long 0x3F79C79D, 0xBE605C13 + .long 0x3F7853F8, 0xBE78CFCC + .long 0x3F76BA07, 0xBE888E93 + .long 0x3F74FA0B, 0xBE94A031 + .long 0x3F731448, 0xBEA09AE5 + .long 0x3F710908, 0xBEAC7CD3 + .long 0x3F6ED89D, 0xBEB8442A + .long 0x3F7FB10F, 0xBD48FB30 + .long 0x3F7EC46D, 0xBDC8BD36 + .long 0x3F7D3AAC, 0xBE164083 + .long 0x3F7B14BE, 0xBE47C5C2 + .long 0x3F7853F8, 0xBE78CFCC + .long 0x3F74FA0B, 0xBE94A031 + .long 0x3F710908, 0xBEAC7CD3 + .long 0x3F6C835F, 0xBEC3EF16 + .long 0x3F676BD7, 0xBEDAE880 + .long 0x3F61C598, 0xBEF15AE9 + .long 0x3F5B941A, 0xBF039C3C + .long 0x3F54DB31, 0xBF0E39DA + .long 0x3F4D9F02, 0xBF187FC0 + .long 0x3F45E403, 0xBF226799 + .long 0x3F3DAEFA, 0xBF2BEB4A + .long 0x3F7F4E6D, 0xBD96A905 + .long 0x3F7D3AAC, 0xBE164083 + .long 0x3F79C79D, 0xBE605C13 + .long 0x3F74FA0B, 0xBE94A031 + .long 0x3F6ED89D, 0xBEB8442A + .long 0x3F676BD7, 0xBEDAE880 + .long 0x3F5EBE05, 0xBEFC5D26 + .long 0x3F54DB31, 0xBF0E39DA + .long 0x3F49D113, 0xBF1D7FD1 + .long 0x3F3DAEFA, 0xBF2BEB4A + .long 0x3F3085BB, 0xBF396841 + .long 0x3F226799, 0xBF45E403 + .long 0x3F13682B, 0xBF514D3C + .long 0x3F039C3E, 0xBF5B9419 + .long 0x3EE63374, 0xBF64AA5A + .long 0x3F7EC46D, 0xBDC8BD36 + .long 0x3F7B14BE, 0xBE47C5C2 + .long 0x3F74FA0B, 0xBE94A031 + .long 0x3F6C835F, 0xBEC3EF16 + .long 0x3F61C598, 0xBEF15AE9 + .long 0x3F54DB31, 0xBF0E39DA + .long 0x3F45E403, 0xBF226799 + .long 0x3F3504F3, 0xBF3504F4 + .long 0x3F226799, 0xBF45E403 + .long 0x3F0E39DA, 0xBF54DB31 + .long 0x3EF15AEB, 0xBF61C598 + .long 0x3EC3EF15, 0xBF6C835F + .long 0x3E94A030, 0xBF74FA0B + .long 0x3E47C5C4, 0xBF7B14BE + .long 0x3DC8BD35, 0xBF7EC46D + .long 0x3F7E1324, 0xBDFAB272 + .long 0x3F7853F8, 0xBE78CFCC + .long 0x3F6ED89D, 0xBEB8442A + .long 0x3F61C598, 0xBEF15AE9 + .long 0x3F514D3D, 0xBF13682B + .long 0x3F3DAEFA, 0xBF2BEB4A + .long 0x3F273656, 0xBF41D870 + .long 0x3F0E39DA, 0xBF54DB31 + .long 0x3EE63374, 0xBF64AA5A + .long 0x3EAC7CD3, 0xBF710908 + .long 0x3E605C12, 0xBF79C79D + .long 0x3DC8BD35, 0xBF7EC46D + .long 0xBCC90AAD, 0xBF7FEC43 + .long 0xBE164082, 0xBF7D3AAC + .long 0xBE888E92, 0xBF76BA07 + .long 0x3F7D3AAC, 0xBE164083 + .long 0x3F74FA0B, 0xBE94A031 + .long 0x3F676BD7, 0xBEDAE880 + .long 0x3F54DB31, 0xBF0E39DA + .long 0x3F3DAEFA, 0xBF2BEB4A + .long 0x3F226799, 0xBF45E403 + .long 0x3F039C3E, 0xBF5B9419 + .long 0x3EC3EF15, 0xBF6C835F + .long 0x3E78CFD0, 0xBF7853F8 + .long 0x3DC8BD35, 0xBF7EC46D + .long 0xBD48FB21, 0xBF7FB10F + .long 0xBE47C5C2, 0xBF7B14BE + .long 0xBEAC7CD2, 0xBF710908 + .long 0xBEF15AE6, 0xBF61C598 + .long 0xBF187FC1, 0xBF4D9F01 + .long 0x3F7C3B28, 0xBE2F10A2 + .long 0x3F710908, 0xBEAC7CD3 + .long 0x3F5EBE05, 0xBEFC5D26 + .long 0x3F45E403, 0xBF226799 + .long 0x3F273656, 0xBF41D870 + .long 0x3F039C3E, 0xBF5B9419 + .long 0x3EB8442B, 0xBF6ED89D + .long 0x3E47C5C4, 0xBF7B14BE + .long 0x3CC90ABE, 0xBF7FEC43 + .long 0xBE164082, 0xBF7D3AAC + .long 0xBEA09AE4, 0xBF731448 + .long 0xBEF15AE6, 0xBF61C598 + .long 0xBF1D7FD0, 0xBF49D113 + .long 0xBF3DAEF8, 0xBF2BEB4B + .long 0xBF584852, 0xBF08F59C + .long 0x3F7B14BE, 0xBE47C5C2 + .long 0x3F6C835F, 0xBEC3EF16 + .long 0x3F54DB31, 0xBF0E39DA + .long 0x3F3504F3, 0xBF3504F4 + .long 0x3F0E39DA, 0xBF54DB31 + .long 0x3EC3EF15, 0xBF6C835F + .long 0x3E47C5C4, 0xBF7B14BE + .long 0xB33BBD2E, 0xBF800000 + .long 0xBE47C5C2, 0xBF7B14BE + .long 0xBEC3EF14, 0xBF6C835F + .long 0xBF0E39D9, 0xBF54DB32 + .long 0xBF3504F3, 0xBF3504F3 + .long 0xBF54DB32, 0xBF0E39D9 + .long 0xBF6C835E, 0xBEC3EF17 + .long 0xBF7B14BF, 0xBE47C5C1 + .long 0x3F79C79D, 0xBE605C13 + .long 0x3F676BD7, 0xBEDAE880 + .long 0x3F49D113, 0xBF1D7FD1 + .long 0x3F226799, 0xBF45E403 + .long 0x3EE63374, 0xBF64AA5A + .long 0x3E78CFD0, 0xBF7853F8 + .long 0x3CC90ABE, 0xBF7FEC43 + .long 0xBE47C5C2, 0xBF7B14BE + .long 0xBECF7BCB, 0xBF6A09A6 + .long 0xBF187FC1, 0xBF4D9F01 + .long 0xBF41D870, 0xBF273656 + .long 0xBF61C596, 0xBEF15AED + .long 0xBF76BA07, 0xBE888E92 + .long 0xBF7FB10F, 0xBD48FB3D + .long 0xBF7C3B28, 0x3E2F10A9 + .long 0x3F7853F8, 0xBE78CFCC + .long 0x3F61C598, 0xBEF15AE9 + .long 0x3F3DAEFA, 0xBF2BEB4A + .long 0x3F0E39DA, 0xBF54DB31 + .long 0x3EAC7CD3, 0xBF710908 + .long 0x3DC8BD35, 0xBF7EC46D + .long 0xBE164082, 0xBF7D3AAC + .long 0xBEC3EF14, 0xBF6C835F + .long 0xBF187FC1, 0xBF4D9F01 + .long 0xBF45E404, 0xBF226799 + .long 0xBF676BD7, 0xBEDAE87F + .long 0xBF7B14BF, 0xBE47C5C1 + .long 0xBF7FB10F, 0x3D48FB2C + .long 0xBF74FA0B, 0x3E94A030 + .long 0xBF5B941B, 0x3F039C3C + .long 0x3F76BA07, 0xBE888E93 + .long 0x3F5B941A, 0xBF039C3C + .long 0x3F3085BB, 0xBF396841 + .long 0x3EF15AEB, 0xBF61C598 + .long 0x3E605C12, 0xBF79C79D + .long 0xBD48FB21, 0xBF7FB10F + .long 0xBEA09AE4, 0xBF731448 + .long 0xBF0E39D9, 0xBF54DB32 + .long 0xBF41D870, 0xBF273656 + .long 0xBF676BD7, 0xBEDAE87F + .long 0xBF7C3B28, 0xBE2F109D + .long 0xBF7EC46D, 0x3DC8BD27 + .long 0xBF6ED89E, 0x3EB84428 + .long 0xBF4D9F03, 0x3F187FC0 + .long 0xBF1D7FD1, 0x3F49D113 + .long 0x3F74FA0B, 0xBE94A031 + .long 0x3F54DB31, 0xBF0E39DA + .long 0x3F226799, 0xBF45E403 + .long 0x3EC3EF15, 0xBF6C835F + .long 0x3DC8BD35, 0xBF7EC46D + .long 0xBE47C5C2, 0xBF7B14BE + .long 0xBEF15AE6, 0xBF61C598 + .long 0xBF3504F3, 0xBF3504F3 + .long 0xBF61C596, 0xBEF15AED + .long 0xBF7B14BF, 0xBE47C5C1 + .long 0xBF7EC46D, 0x3DC8BD27 + .long 0xBF6C835F, 0x3EC3EF16 + .long 0xBF45E405, 0x3F226798 + .long 0xBF0E39DD, 0x3F54DB2F + .long 0xBE94A02D, 0x3F74FA0B + .long 0x3F731448, 0xBEA09AE5 + .long 0x3F4D9F02, 0xBF187FC0 + .long 0x3F13682B, 0xBF514D3C + .long 0x3E94A030, 0xBF74FA0B + .long 0xBCC90AAD, 0xBF7FEC43 + .long 0xBEAC7CD2, 0xBF710908 + .long 0xBF1D7FD0, 0xBF49D113 + .long 0xBF54DB32, 0xBF0E39D9 + .long 0xBF76BA07, 0xBE888E92 + .long 0xBF7FB10F, 0x3D48FB2C + .long 0xBF6ED89E, 0x3EB84428 + .long 0xBF45E405, 0x3F226798 + .long 0xBF08F59D, 0x3F584851 + .long 0xBE78CFD9, 0x3F7853F7 + .long 0x3D96A922, 0x3F7F4E6D + .long 0x3F710908, 0xBEAC7CD3 + .long 0x3F45E403, 0xBF226799 + .long 0x3F039C3E, 0xBF5B9419 + .long 0x3E47C5C4, 0xBF7B14BE + .long 0xBE164082, 0xBF7D3AAC + .long 0xBEF15AE6, 0xBF61C598 + .long 0xBF3DAEF8, 0xBF2BEB4B + .long 0xBF6C835E, 0xBEC3EF17 + .long 0xBF7FB10F, 0xBD48FB3D + .long 0xBF74FA0B, 0x3E94A030 + .long 0xBF4D9F03, 0x3F187FC0 + .long 0xBF0E39DD, 0x3F54DB2F + .long 0xBE78CFD9, 0x3F7853F7 + .long 0x3DC8BD1D, 0x3F7EC46D + .long 0x3EDAE87B, 0x3F676BD9 + .long 0x3F6ED89D, 0xBEB8442A + .long 0x3F3DAEFA, 0xBF2BEB4A + .long 0x3EE63374, 0xBF64AA5A + .long 0x3DC8BD35, 0xBF7EC46D + .long 0xBE888E92, 0xBF76BA07 + .long 0xBF187FC1, 0xBF4D9F01 + .long 0xBF584852, 0xBF08F59C + .long 0xBF7B14BF, 0xBE47C5C1 + .long 0xBF7C3B28, 0x3E2F10A9 + .long 0xBF5B941B, 0x3F039C3C + .long 0xBF1D7FD1, 0x3F49D113 + .long 0xBE94A02D, 0x3F74FA0B + .long 0x3D96A922, 0x3F7F4E6D + .long 0x3EDAE87B, 0x3F676BD9 + .long 0x3F396841, 0x3F3085BC + + +.align 5 +.global _fft256 + +_fft256: + +#define C1 31 +#define W 30 +#define W2 29 + +#define a0 0 +#define a1 4 +#define a2 8 +#define a3 12 +#define a4 1 +#define a5 5 +#define a6 9 +#define a7 13 +#define a8 2 +#define a9 6 +#define a10 10 +#define a11 14 +#define a12 3 +#define a13 7 +#define a14 11 +#define a15 15 + +#define b0 16 +#define b1 a0 +#define b2 a2 +#define b3 a1 +#define b4 a3 +#define b5 a4 +#define b6 a6 +#define b7 a5 +#define b8 a7 +#define b9 a8 +#define b10 a10 +#define b11 a9 +#define b12 a11 +#define b13 a12 +#define b14 a14 +#define b15 a13 + +#define c0 a15 +#define c2 b0 +#define c1 b2 +#define c3 b1 +#define c4 b3 +#define c6 b4 +#define c5 b6 +#define c7 b5 +#define c8 b7 +#define c10 b8 +#define c9 b10 +#define c11 b9 +#define c12 b11 +#define c14 b12 +#define c13 b14 +#define c15 b13 + +#define x5 b15 +#define x6 17 +#define x7 18 +#define x9 19 +#define x11 20 +#define x13 21 +#define x14 22 +#define x15 23 + +#define d5 c5 +#define d6 c6 +#define d7 c7 +#define d9 c9 +#define d11 c11 +#define d13 c13 +#define d14 c14 +#define d15 c15 + +#define e0 30 +#define e1 29 +#define e2 28 +#define e3 27 +#define e4 26 +#define e5 25 +#define e6 24 +#define e7 23 +#define e8 22 +#define e9 21 +#define e10 20 +#define e11 19 +#define e12 18 +#define e13 17 +#define e14 16 +#define e15 15 + +#define f0 14 +#define f2 e0 +#define f1 e2 +#define f3 e1 +#define f4 e3 +#define f6 e4 +#define f5 e6 +#define f7 e5 +#define f8 e7 +#define f10 e8 +#define f9 e10 +#define f11 e9 +#define f12 e11 +#define f14 e12 +#define f13 e14 +#define f15 e13 + +#define f0 14 +#define f2 e0 +#define f1 e2 +#define f3 e1 +#define f4 e3 +#define f6 e4 +#define f5 e6 +#define f7 e5 +#define f8 e7 +#define f10 e8 +#define f9 e10 +#define f11 e9 +#define f12 e11 +#define f14 e12 +#define f13 e14 +#define f15 e13 + +#define t4 10 +#define t8 11 +#define t12 12 +#define t1 13 +#define t5 14 +#define t9 1 +#define t13 3 +#define t2 5 +#define t6 7 +#define t10 9 +#define t14 t4 +#define t3 t8 +#define t7 t12 +#define t11 t1 +#define t15 t5 + +#define y4 0 +#define y8 2 +#define y12 4 +#define y1 6 +#define y5 8 +#define y9 y4 +#define y13 y8 +#define y2 y12 +#define y6 y1 +#define y10 y5 +#define y14 g4 +#define y3 g8 +#define y7 g12 +#define y11 g1 +#define y15 g5 + +#define g8 f8 +#define g12 f12 +#define g1 f1 +#define g5 f5 +#define g4 f4 +#define g9 f9 +#define g13 f13 +#define g2 f2 +#define g6 f6 +#define g10 f10 +#define g14 f14 +#define g3 f3 +#define g7 f7 +#define g11 f11 +#define g15 f15 + + li 9,-16 + ; stfpdux 14,1,9 + ; stfpdux 15,1,9 + ; stfpdux 16,1,9 + ; stfpdux 17,1,9 + ; stfpdux 18,1,9 + ; stfpdux 19,1,9 + ; stfpdux 20,1,9 + ; stfpdux 21,1,9 + ; stfpdux 22,1,9 + ; stfpdux 23,1,9 + ; stfpdux 24,1,9 + ; stfpdux 25,1,9 + ; stfpdux 26,1,9 + ; stfpdux 27,1,9 + ; stfpdux 28,1,9 + li 10,17*16*8/32 ; stfpdux 29,1,9 + li 8,0 ; stfpdux 30,1,9 + mtctr 10 ; stfpdux 31,1,9 + +0: dcbt 3,8 + addi 8,8,32 ; bdnz 0b + + + li 9,16*8 ; lfpsx a0,0,3 + lis 7,_C1@ha ; lfpsux a4,3,9 + la 7,_C1@l(7) ; lfpsux a8,3,9 + ; lfpsx C1,0,7 + ; lfpsux a12,3,9 + addi 7,7,8 ; lfpsux a1,3,9 + li 8,8 ; lfpsux a5,3,9 + li 0,15 ; lfpsux a9,3,9 + mtctr 0 ; lfpsux a13,3,9 + lis 6,twiddle-8@ha ; lfpsux a2,3,9 + la 6,twiddle-8@l(6) ; lfpsux a6,3,9 + ; lfpsux a10,3,9 + ; lfpsux a14,3,9 + ; lfpsux a3,3,9 + ; lfpsux a7,3,9 + ; lfpsux a11,3,9 + ; lfpsux a15,3,9 + + fpadd b0,a0,a2 + fpsub b1,a0,a2 + fpadd b2,a1,a3 + fpsub b3,a1,a3 + + fpadd b4,a4,a6 + fpsub b5,a4,a6 + fpadd b6,a5,a7 + fpsub b7,a5,a7 + + fpadd b8,a8,a10 + fpsub b9,a8,a10 + fpadd b10,a9,a11 + fpsub b11,a9,a11 + + fpadd b12,a12,a14 + fpsub b13,a12,a14 + fpadd b14,a13,a15 + fpsub b15,a13,a15 + + fpadd c0,b0,b2 + fpsub c2,b0,b2 ; lfpsx W,0,7 + fxcxnsma c1,C1,b3,b1 ; lfpsx W2,7,8 + fxcxnpma c3,C1,b3,b1 + + fpadd c4,b4,b6 + fpsub c6,b4,b6 + fxcxnsma c5,C1,b7,b5 + fxcxnpma c7,C1,b7,b5 + + fpadd c8,b8,b10 + fpsub c10,b8,b10 + fxcxnsma c9,C1,b11,b9 + fxcxnpma c11,C1,b11,b9 + + fpadd c12,b12,b14 + fpsub c14,b12,b14 + fxcxnsma c13,C1,b15,b13 + fxcxnpma c15,C1,b15,b13 + + fxpmul x5,c5,W + fxpmul x6,c6,W2 + fxpmul x7,c7,W + + fxpmul x9,c9,W2 + fxpmul x11,W2,c11 + + fxpmul x13,c13,W + fxpmul x14,W2,c14 + fxpmul x15,c15,W + + fxcxnpma d5,c5,W,x5 + fxcxnpma d6,c6,W2,x6 + fxcxnsma d7,c7,W,x7 + + fxcxnpma d9,c9,W2,x9 + fxcxnsma d11,W2,c11,x11 + + fxcxnsma d13,c13,W,x13 + fxcxnsma d14,W2,c14,x14 + fxcxnpma d15,c15,W,x15 + + fpadd e0,c0,c8 + fpsub e1,c0,c8 + fpadd e2,c4,c12 + fpsub e3,c4,c12 + + fpadd e4,c1,d9 + fpsub e5,c1,d9 + fxnmsub e6,d13,C1,d5 + fxmadd e7,d13,C1,d5 + + fxcxnsma e8,C1,c10,c2 + fxcxnpma e9,C1,c10,c2 + fpsub e10,d6,d14 + fpadd e11,d6,d14 + + fpsub e12,c3,d11 + fpadd e13,c3,d11 + fxnmadd e14,d7,C1,d15 + fxnmsub e15,d7,C1,d15 ; addi 3,3,-239*8 + + fpadd f0,e0,e2 ; lfpsx a0,0,3 + fpsub f2,e0,e2 ; lfpsux a4,3,9 + fxcxnsma f1,C1,e3,e1 ; lfpsux a8,3,9 + fxcxnpma f3,C1,e3,e1 ; lfpsux a12,3,9 + + fpadd f4,e4,e6 ; lfpsux a1,3,9 + fpsub f6,e4,e6 ; stfpsx f0,0,4 + fxcxnsma f5,C1,e7,e5 ; lfpsux a5,3,9 + fxcxnpma f7,C1,e7,e5 ; lfpsux a9,3,9 + + fpadd f8,e8,e10 ; lfpsux a13,3,9 + fpsub f10,e8,e10 ; stfpsux f4,4,8 + fxcxnsma f9,C1,e11,e9 ; lfpsux a2,3,9 + fxcxnpma f11,C1,e11,e9 ; lfpsux a6,3,9 + + fpadd f12,e12,e14 ; lfpsux a10,3,9 + fpsub f14,e12,e14 ; stfpsux f8,4,8 + fxcxnsma f13,C1,e15,e13 ; lfpsux a14,3,9 + fxcxnpma f15,C1,e15,e13 ; lfpsux a3,3,9 + ; lfpsux a7,3,9 + ; stfpsux f12,4,8 + ; stfpsux f1,4,8 + ; stfpsux f5,4,8 + + +1: + ; stfpsux g9,4,8 + ; stfpsux g13,4,8 + fpadd b0,a0,a2 ; lfpsux a11,3,9 + fpsub b1,a0,a2 ; lfpsux a15,3,9 + fpadd b2,a1,a3 + fpsub b3,a1,a3 + + fpadd b4,a4,a6 ; stfpsux g2,4,8 + fpsub b5,a4,a6 + fpadd b6,a5,a7 ; stfpsux g6,4,8 + fpsub b7,a5,a7 + + fpadd b8,a8,a10 ; stfpsux g10,4,8 + fpsub b9,a8,a10 + fpadd b10,a9,a11 ; stfpsux g14,4,8 + fpsub b11,a9,a11 + + fpadd b12,a12,a14 ; stfpsux g3,4,8 + fpsub b13,a12,a14 + fpadd b14,a13,a15 ; stfpsux g7,4,8 + fpsub b15,a13,a15 + + fpadd c0,b0,b2 ; stfpsux g11,4,8 + fpsub c2,b0,b2 ; stfpsux g15,4,8 + fxcxnsma c1,C1,b3,b1 ; lfpsx W,0,7 + fxcxnpma c3,C1,b3,b1 ; lfpsx W2,7,8 + + fpadd c4,b4,b6 + fpsub c6,b4,b6 + fxcxnsma c5,C1,b7,b5 + fxcxnpma c7,C1,b7,b5 + + fpadd c8,b8,b10 + fpsub c10,b8,b10 + fxcxnsma c9,C1,b11,b9 + fxcxnpma c11,C1,b11,b9 + + fpadd c12,b12,b14 + fpsub c14,b12,b14 + fxcxnsma c13,C1,b15,b13 + fxcxnpma c15,C1,b15,b13 + + fxpmul x5,c5,W + fxpmul x6,c6,W2 + fxpmul x7,c7,W + + fxpmul x9,c9,W2 + fxpmul x11,W2,c11 + + fxpmul x13,c13,W + fxpmul x14,W2,c14 + fxpmul x15,c15,W + + fxcxnpma d5,c5,W,x5 + fxcxnpma d6,c6,W2,x6 + fxcxnsma d7,c7,W,x7 + + fxcxnpma d9,c9,W2,x9 + fxcxnsma d11,W2,c11,x11 + + fxcxnsma d13,c13,W,x13 + fxcxnsma d14,W2,c14,x14 + fxcxnpma d15,c15,W,x15 + + fpadd e0,c0,c8 + fpsub e1,c0,c8 + fpadd e2,c4,c12 + fpsub e3,c4,c12 + + fpadd e4,c1,d9 + fpsub e5,c1,d9 + fxnmsub e6,d13,C1,d5 + fxmadd e7,d13,C1,d5 + + fxcxnsma e8,C1,c10,c2 + fxcxnpma e9,C1,c10,c2 + fpsub e10,d6,d14 + fpadd e11,d6,d14 + + fpsub e12,c3,d11 + fpadd e13,c3,d11 + fxnmadd e14,d7,C1,d15 + fxnmsub e15,d7,C1,d15 + + fpadd f0,e0,e2 ; lfpsux t4,6,8 + fpsub f2,e0,e2 ; lfpsux t8,6,8 + fxcxnsma f1,C1,e3,e1 ; lfpsux t12,6,8 + fxcxnpma f3,C1,e3,e1 + + fpadd f4,e4,e6 + fpsub f6,e4,e6 ; stfpsux f0,4,8 + fxcxnsma f5,C1,e7,e5 + fxcxnpma f7,C1,e7,e5 + + fpadd f8,e8,e10 ; lfpsux t1,6,8 + fpsub f10,e8,e10 ; lfpsux t5,6,8 + fxcxnsma f9,C1,e11,e9 ; lfpsux t9,6,8 + fxcxnpma f11,C1,e11,e9 + + fpadd f12,e12,e14 ; lfpsux t13,6,8 + fpsub f14,e12,e14 ; lfpsux t2,6,8 + fxcxnsma f13,C1,e15,e13 ; lfpsux t6,6,8 + fxcxnpma f15,C1,e15,e13 ; lfpsux t10,6,8 + + + + fxpmul y4,f4,t4 + fxpmul y8,f8,t8 + fxpmul y12,f12,t12 + fxpmul y1,f1,t1 + fxpmul y5,f5,t5 ; addi 3,3,-239*8 + + fxcxnpma g4,f4,t4,y4 ; lfpsux t14,6,8 + fxcxnpma g8,f8,t8,y8 ; lfpsux t3,6,8 + fxcxnpma g12,f12,t12,y12 ; lfpsux t7,6,8 + fxcxnpma g1,f1,t1,y1 ; lfpsux t11,6,8 + fxcxnpma g5,f5,t5,y5 ; lfpsux t15,6,8 + + fxpmul y9,f9,t9 ; stfpsux g4,4,8 + fxpmul y13,f13,t13 ; stfpsux g8,4,8 + fxpmul y2,f2,t2 ; stfpsux g12,4,8 + fxpmul y6,f6,t6 ; stfpsux g1,4,8 + fxpmul y10,f10,t10 ; stfpsux g5,4,8 + + fxcxnpma g9,f9,t9,y9 ; lfpsx a0,0,3 + fxcxnpma g13,f13,t13,y13 ; lfpsux a4,3,9 + fxcxnpma g2,f2,t2,y2 ; lfpsux a8,3,9 + fxcxnpma g6,f6,t6,y6 ; lfpsux a12,3,9 + fxcxnpma g10,f10,t10,y10 ; lfpsux a1,3,9 + + fxpmul y14,f14,t14 ; lfpsux a5,3,9 + fxpmul y3,f3,t3 ; lfpsux a9,3,9 + fxpmul y7,f7,t7 ; lfpsux a13,3,9 + fxpmul y11,f11,t11 ; lfpsux a2,3,9 + fxpmul y15,f15,t15 ; lfpsux a6,3,9 + + fxcxnpma g14,f14,t14,y14 ; lfpsux a10,3,9 + fxcxnpma g3,f3,t3,y3 ; lfpsux a14,3,9 + fxcxnpma g7,f7,t7,y7 ; lfpsux a3,3,9 + fxcxnpma g11,f11,t11,y11 ; lfpsux a7,3,9 + fxcxnpma g15,f15,t15,y15 ; bdnz 1b + + ; stfpsux g9,4,8 + ; stfpsux g13,4,8 + ; stfpsux g2,4,8 + ; stfpsux g6,4,8 + ; stfpsux g10,4,8 + + ; stfpsux g14,4,8 + ; stfpsux g3,4,8 + ; stfpsux g7,4,8 + ; stfpsux g11,4,8 + ; stfpsux g15,4,8 + + + + + # second phase + + + + la 3,-255*8(4) + mr 4,3 ; lfpsx a0,0,3 + ; lfpsux a4,3,9 + ; lfpsux a8,3,9 + ; lfpsux a12,3,9 + ; lfpsux a1,3,9 + ; lfpsux a5,3,9 + ; lfpsux a9,3,9 + mtctr 0 ; lfpsux a13,3,9 + ; lfpsux a2,3,9 + ; lfpsux a6,3,9 + ; lfpsux a10,3,9 + ; lfpsux a14,3,9 + ; lfpsux a3,3,9 + ; lfpsux a7,3,9 + ; lfpsux a11,3,9 + ; lfpsux a15,3,9 + + fpadd b0,a0,a2 + fpsub b1,a0,a2 + fpadd b2,a1,a3 + fpsub b3,a1,a3 + + fpadd b4,a4,a6 + fpsub b5,a4,a6 + fpadd b6,a5,a7 + fpsub b7,a5,a7 + + fpadd b8,a8,a10 + fpsub b9,a8,a10 + fpadd b10,a9,a11 + fpsub b11,a9,a11 + + fpadd b12,a12,a14 + fpsub b13,a12,a14 + fpadd b14,a13,a15 + fpsub b15,a13,a15 + + fpadd c0,b0,b2 + fpsub c2,b0,b2 ; lfpsx W,0,7 + fxcxnsma c1,C1,b3,b1 ; lfpsx W2,7,8 + fxcxnpma c3,C1,b3,b1 + + fpadd c4,b4,b6 + fpsub c6,b4,b6 + fxcxnsma c5,C1,b7,b5 + fxcxnpma c7,C1,b7,b5 + + fpadd c8,b8,b10 + fpsub c10,b8,b10 + fxcxnsma c9,C1,b11,b9 + fxcxnpma c11,C1,b11,b9 + + fpadd c12,b12,b14 + fpsub c14,b12,b14 + fxcxnsma c13,C1,b15,b13 + fxcxnpma c15,C1,b15,b13 + + fxpmul x5,c5,W + fxpmul x6,c6,W2 + fxpmul x7,c7,W + + fxpmul x9,c9,W2 + fxpmul x11,W2,c11 + + fxpmul x13,c13,W + fxpmul x14,W2,c14 + fxpmul x15,c15,W + + fxcxnpma d5,c5,W,x5 + fxcxnpma d6,c6,W2,x6 + fxcxnsma d7,c7,W,x7 + + fxcxnpma d9,c9,W2,x9 + fxcxnsma d11,W2,c11,x11 + + fxcxnsma d13,c13,W,x13 + fxcxnsma d14,W2,c14,x14 + fxcxnpma d15,c15,W,x15 + + fpadd e0,c0,c8 + fpsub e1,c0,c8 + fpadd e2,c4,c12 + fpsub e3,c4,c12 + + fpadd e4,c1,d9 + fpsub e5,c1,d9 + fxnmsub e6,d13,C1,d5 + fxmadd e7,d13,C1,d5 + + fxcxnsma e8,C1,c10,c2 + fxcxnpma e9,C1,c10,c2 + fpsub e10,d6,d14 + fpadd e11,d6,d14 + + fpsub e12,c3,d11 + fpadd e13,c3,d11 + fxnmadd e14,d7,C1,d15 + fxnmsub e15,d7,C1,d15 ; addi 3,3,-239*8 + + fpadd f0,e0,e2 ; lfpsx a0,0,3 + fpsub f2,e0,e2 ; lfpsux a4,3,9 + fxcxnsma f1,C1,e3,e1 ; lfpsux a8,3,9 + fxcxnpma f3,C1,e3,e1 ; lfpsux a12,3,9 + + fpadd f4,e4,e6 ; lfpsux a1,3,9 + fpsub f6,e4,e6 ; stfpsx f0,0,4 + fxcxnsma f5,C1,e7,e5 ; lfpsux a5,3,9 + fxcxnpma f7,C1,e7,e5 ; lfpsux a9,3,9 + + fpadd f8,e8,e10 ; lfpsux a13,3,9 + fpsub f10,e8,e10 ; stfpsux f4,4,9 + fxcxnsma f9,C1,e11,e9 ; lfpsux a2,3,9 + fxcxnpma f11,C1,e11,e9 ; lfpsux a6,3,9 + + fpadd f12,e12,e14 ; lfpsux a10,3,9 + fpsub f14,e12,e14 ; stfpsux f8,4,9 + fxcxnsma f13,C1,e15,e13 ; lfpsux a14,3,9 + fxcxnpma f15,C1,e15,e13 ; lfpsux a3,3,9 + + +2: + ; stfpsux f12,4,9 + ; stfpsux f1,4,9 + ; stfpsux f5,4,9 + ; stfpsux f9,4,9 + ; stfpsux f13,4,9 + fpadd b0,a0,a2 ; lfpsux a7,3,9 + fpsub b1,a0,a2 ; lfpsux a11,3,9 + fpadd b2,a1,a3 ; lfpsux a15,3,9 + fpsub b3,a1,a3 + + fpadd b4,a4,a6 ; stfpsux f2,4,9 + fpsub b5,a4,a6 + fpadd b6,a5,a7 ; stfpsux f6,4,9 + fpsub b7,a5,a7 + + fpadd b8,a8,a10 ; stfpsux f10,4,9 + fpsub b9,a8,a10 + fpadd b10,a9,a11 ; stfpsux f14,4,9 + fpsub b11,a9,a11 + + fpadd b12,a12,a14 ; stfpsux f3,4,9 + fpsub b13,a12,a14 + fpadd b14,a13,a15 ; stfpsux f7,4,9 + fpsub b15,a13,a15 + + fpadd c0,b0,b2 ; stfpsux f11,4,9 + fpsub c2,b0,b2 ; stfpsux f15,4,9 + fxcxnsma c1,C1,b3,b1 ; lfpsx W,0,7 + fxcxnpma c3,C1,b3,b1 ; lfpsx W2,7,8 + + fpadd c4,b4,b6 + fpsub c6,b4,b6 + fxcxnsma c5,C1,b7,b5 + fxcxnpma c7,C1,b7,b5 + + fpadd c8,b8,b10 + fpsub c10,b8,b10 + fxcxnsma c9,C1,b11,b9 + fxcxnpma c11,C1,b11,b9 + + fpadd c12,b12,b14 + fpsub c14,b12,b14 + fxcxnsma c13,C1,b15,b13 + fxcxnpma c15,C1,b15,b13 + + fxpmul x5,c5,W + fxpmul x6,c6,W2 + fxpmul x7,c7,W + + fxpmul x9,c9,W2 + fxpmul x11,W2,c11 + + fxpmul x13,c13,W + fxpmul x14,W2,c14 + fxpmul x15,c15,W + + fxcxnpma d5,c5,W,x5 + fxcxnpma d6,c6,W2,x6 + fxcxnsma d7,c7,W,x7 + + fxcxnpma d9,c9,W2,x9 + fxcxnsma d11,W2,c11,x11 + + fxcxnsma d13,c13,W,x13 + fxcxnsma d14,W2,c14,x14 + fxcxnpma d15,c15,W,x15 + + fpadd e0,c0,c8 + fpsub e1,c0,c8 + fpadd e2,c4,c12 + fpsub e3,c4,c12 + + fpadd e4,c1,d9 + fpsub e5,c1,d9 + fxnmsub e6,d13,C1,d5 + fxmadd e7,d13,C1,d5 + + fxcxnsma e8,C1,c10,c2 + fxcxnpma e9,C1,c10,c2 + fpsub e10,d6,d14 + fpadd e11,d6,d14 + + fpsub e12,c3,d11 + fpadd e13,c3,d11 + fxnmadd e14,d7,C1,d15 ; addi 4,4,-239*8 + fxnmsub e15,d7,C1,d15 ; addi 3,3,-239*8 + + fpadd f0,e0,e2 ; lfpsx a0,0,3 + fpsub f2,e0,e2 ; lfpsux a4,3,9 + fxcxnsma f1,C1,e3,e1 ; lfpsux a8,3,9 + fxcxnpma f3,C1,e3,e1 ; lfpsux a12,3,9 + + fpadd f4,e4,e6 ; lfpsux a1,3,9 + fpsub f6,e4,e6 ; stfpsx f0,0,4 + fxcxnsma f5,C1,e7,e5 ; lfpsux a5,3,9 + fxcxnpma f7,C1,e7,e5 ; lfpsux a9,3,9 + + fpadd f8,e8,e10 ; lfpsux a13,3,9 + fpsub f10,e8,e10 ; stfpsux f4,4,9 + fxcxnsma f9,C1,e11,e9 ; lfpsux a2,3,9 + fxcxnpma f11,C1,e11,e9 ; lfpsux a6,3,9 + + fpadd f12,e12,e14 ; lfpsux a10,3,9 + fpsub f14,e12,e14 ; stfpsux f8,4,9 + fxcxnsma f13,C1,e15,e13 ; lfpsux a14,3,9 + fxcxnpma f15,C1,e15,e13 ; lfpsux a3,3,9 + ; bdnz 2b + + ; stfpsux f12,4,9 + ; stfpsux f1,4,9 + ; stfpsux f5,4,9 + ; stfpsux f9,4,9 + ; stfpsux f13,4,9 + ; stfpsux f2,4,9 + ; stfpsux f6,4,9 + ; stfpsux f10,4,9 + ; stfpsux f14,4,9 + ; stfpsux f3,4,9 + ; stfpsux f7,4,9 + ; stfpsux f11,4,9 + ; stfpsux f15,4,9 + + + li 9,16 ; lfpdx 31,0,1 + ; lfpdux 30,1,9 + ; lfpdux 29,1,9 + ; lfpdux 28,1,9 + ; lfpdux 27,1,9 + ; lfpdux 26,1,9 + ; lfpdux 25,1,9 + ; lfpdux 24,1,9 + ; lfpdux 23,1,9 + ; lfpdux 22,1,9 + ; lfpdux 21,1,9 + ; lfpdux 20,1,9 + ; lfpdux 19,1,9 + ; lfpdux 18,1,9 + ; lfpdux 17,1,9 + ; lfpdux 16,1,9 + ; lfpdux 15,1,9 + ; lfpdux 14,1,9 + + addi 1,1,16 ; blr + +#endif diff --git a/RTCP/CNProc/src/FFT_Asm.h b/RTCP/CNProc/src/FFT_Asm.h new file mode 100644 index 0000000000000000000000000000000000000000..3c854665e74c2f61e2bd3fb40db69dfad5913e0b --- /dev/null +++ b/RTCP/CNProc/src/FFT_Asm.h @@ -0,0 +1,38 @@ +//# FIR.h: header files for CN assembly +//# +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id$ + +#ifndef LOFAR_CNPROC_FFT_ASM_H +#define LOFAR_CNPROC_FFT_ASM_H + +#if defined HAVE_BGL || defined HAVE_BGP +#include <Common/lofar_complex.h> + +namespace LOFAR { +namespace RTCP { + +extern "C" { + void _fft256(const fcomplex in[256], fcomplex out[256]); +}; + +} // end namespace RTCP +} // end namespace LOFAR + +#endif +#endif diff --git a/RTCP/CNProc/src/FIR.cc b/RTCP/CNProc/src/FIR.cc new file mode 100644 index 0000000000000000000000000000000000000000..7374ad0e614e6535a9345520a50b5e1c8a701e03 --- /dev/null +++ b/RTCP/CNProc/src/FIR.cc @@ -0,0 +1,1069 @@ +//# Always #include <lofar_config.h> first! +#include <lofar_config.h> + +//# Includes +#include <FIR.h> + +namespace LOFAR { +namespace RTCP { + + +// The first subband is from -98 KHz to 98 KHz, rather than from 0 to 195 KHz. +// To avoid that the FFT outputs the channels in the wrong order (from 128 to +// 255 followed by channels 0 to 127), we multiply each second FFT input by -1. +// This is efficiently achieved by negating the FIR filter constants of all +// uneven FIR filters. + +const float FIR::weights[256][NR_TAPS] __attribute__ ((aligned(32))) = { + { 0.011659500, -0.011535200, 0.005131880, 0.001219900, + -0.006891530, 0.011598600, -0.015420900, 1.000000000, + -0.011661800, 0.009847130, -0.005852040, 0.000558600, + 0.005549120, -0.011758700, 0.011716500, 0.000414833 }, + { -0.011602000, 0.011311400, -0.004715490, -0.001878750, + 0.007926170, -0.013340200, 0.019147200, -0.999948000, + 0.007870030, -0.008086100, 0.004807840, 0.000105072, + -0.005967150, 0.011982000, -0.011773000, -0.000414623 }, + { 0.011544100, -0.011087500, 0.004299990, 0.002535060, + -0.008955820, 0.015071700, -0.022840300, 0.999843000, + -0.004045800, 0.006315690, -0.003759060, -0.000771025, + 0.006385910, -0.012205000, 0.011829000, 0.000414214 }, + { -0.011485700, 0.010863400, -0.003885440, -0.003188740, + 0.009980360, -0.016793100, 0.026500200, -0.999686000, + 0.000189332, -0.004536100, 0.002705860, 0.001439170, + -0.006805360, 0.012427700, -0.011884500, -0.000413633 }, + { 0.011426900, -0.010639200, 0.003471880, 0.003839710, + -0.010999700, 0.018503900, -0.030126600, 0.999477000, + 0.003699140, 0.002747570, -0.001648360, -0.002109420, + 0.007225430, -0.012650100, 0.011939500, 0.000412915 }, + { -0.011367600, 0.010414900, -0.003059380, -0.004487890, + 0.012013600, -0.020204200, 0.033719300, -0.999215000, + -0.007619390, -0.000950314, 0.000586709, 0.002781680, + -0.007646080, 0.012872200, -0.011993900, -0.000412106 }, + { 0.011307900, -0.010190500, 0.002647990, 0.005133180, + -0.013021900, 0.021893500, -0.037278100, 0.998901000, + 0.011571200, -0.000855436, 0.000478948, -0.003455870, + 0.008067240, -0.013093800, 0.012047800, 0.000411265 }, + { -0.011247800, 0.009966110, -0.002237740, -0.005775510, + 0.014024700, -0.023571800, 0.040802900, -0.998535000, + -0.015554300, 0.002669450, -0.001548470, 0.004131890, + -0.008488880, 0.013315100, -0.012101100, -0.000410456 }, + { 0.011187300, -0.009741670, 0.001828700, 0.006414810, + -0.015021700, 0.025238900, -0.044293400, 0.998117000, + 0.019568500, -0.004491510, 0.002621710, -0.004809650, + 0.008910910, -0.013535900, 0.012153900, 0.000409752 }, + { -0.011126400, 0.009517230, -0.001420910, -0.007050970, + 0.016012700, -0.026894500, 0.047749500, -0.997646000, + -0.023613500, 0.006321360, -0.003698520, 0.005489070, + -0.009333300, 0.013756200, -0.012206100, -0.000409230 }, + { 0.011065200, -0.009292840, 0.001014430, 0.007683930, + -0.016997800, 0.028538400, -0.051171100, 0.997124000, + 0.027689100, -0.008158790, 0.004778750, -0.006170040, + 0.009755990, -0.013976000, 0.012257600, 0.000408972 }, + { -0.011003500, 0.009068510, -0.000609294, -0.008313610, + 0.017976700, -0.030170600, 0.054557800, -0.996549000, + -0.031795100, 0.010003500, -0.005862260, 0.006852470, + -0.010178900, 0.014195300, -0.012308600, -0.000409063 }, + { 0.010941500, -0.008844280, 0.000205562, 0.008939920, + -0.018949300, 0.031790600, -0.057909700, 0.995922000, + 0.035931100, -0.011855400, 0.006948890, -0.007536270, + 0.010602000, -0.014414000, 0.012358900, 0.000409585 }, + { -0.010879200, 0.008620160, 0.000196720, -0.009562800, + 0.019915600, -0.033398500, 0.061226400, -0.995244000, + -0.040096900, 0.013714100, -0.008038500, 0.008221360, + -0.011025300, 0.014632100, -0.012408600, -0.000410621 }, + { 0.010816400, -0.008396190, -0.000597505, 0.010182200, + -0.020875300, 0.034994000, -0.064507900, 0.994513000, + 0.044292300, -0.015579400, 0.009130930, -0.008907620, + 0.011448600, -0.014849700, 0.012457700, 0.000412251 }, + { -0.010753400, 0.008172400, 0.000996745, -0.010797900, + 0.021828400, -0.036576900, 0.067754100, -0.993731000, + -0.048517000, 0.017451100, -0.010226000, 0.009594970, + -0.011871900, 0.015066500, -0.012506100, -0.000414547 }, + { 0.010690100, -0.007948800, -0.001394390, 0.011410000, + -0.022774800, 0.038147100, -0.070964600, 0.992897000, + 0.052770700, -0.019328800, 0.011323700, -0.010283300, + 0.012295200, -0.015282700, 0.012553800, 0.000417579 }, + { -0.010626400, 0.007725430, 0.001790400, -0.012018300, + 0.023714300, -0.039704400, 0.074139500, -0.992011000, + -0.057053200, 0.021212500, -0.012423600, 0.010972500, + -0.012718300, 0.015498200, -0.012600900, -0.000421406 }, + { 0.010562400, -0.007502300, -0.002184730, 0.012622900, + -0.024646800, 0.041248500, -0.077278600, 0.991074000, + 0.061364100, -0.023101700, 0.013525800, -0.011662600, + 0.013141300, -0.015712900, 0.012647200, 0.000426080 }, + { -0.010498200, 0.007279450, 0.002577320, -0.013223500, + 0.025572200, -0.042779400, 0.080381600, -0.990085000, + -0.065703200, 0.024996400, -0.014630100, 0.012353300, + -0.013564100, 0.015926800, -0.012692900, -0.000431643 }, + { 0.010433600, -0.007056910, -0.002968140, 0.013820200, + -0.026490500, 0.044296800, -0.083448600, 0.989045000, + 0.070070300, -0.026896100, 0.015736200, -0.013044600, + 0.013986600, -0.016140000, 0.012737800, 0.000438126 }, + { -0.010368800, 0.006834680, 0.003357130, -0.014412800, + 0.027401400, -0.045800700, 0.086479400, -0.987953000, + -0.074464900, 0.028800600, -0.016844000, 0.013736500, + -0.014408800, 0.016352300, -0.012782000, -0.000445550 }, + { 0.010303700, -0.006612810, -0.003744260, 0.015001400, + -0.028304900, 0.047290800, -0.089473800, 0.986810000, + 0.078886800, -0.030709800, 0.017953500, -0.014428700, + 0.014830600, -0.016563700, 0.012825500, 0.000453924 }, + { -0.010238400, 0.006391310, 0.004129480, -0.015585700, + 0.029200900, -0.048766900, 0.092431700, -0.985617000, + -0.083335800, 0.032623200, -0.019064300, 0.015121200, + -0.015251900, 0.016774300, -0.012868200, -0.000463250 }, + { 0.010172900, -0.006170210, -0.004512750, 0.016165800, + -0.030089200, 0.050229000, -0.095353100, 0.984372000, + 0.087811500, -0.034540800, 0.020176400, -0.015814000, + 0.015672700, -0.016983900, 0.012910200, 0.000473514 }, + { -0.010107100, 0.005949540, 0.004894020, -0.016741600, + 0.030969900, -0.051676900, 0.098237800, -0.983076000, + -0.092313600, 0.036462100, -0.021289600, 0.016506800, + -0.016092900, 0.017192600, -0.012951400, -0.000484696 }, + { 0.010041100, -0.005729310, -0.005273240, 0.017313100, + -0.031842600, 0.053110400, -0.101086000, 0.981730000, + 0.096841900, -0.038386900, 0.022403700, -0.017199700, + 0.016512500, -0.017400200, 0.012991800, 0.000496767 }, + { -0.009974950, 0.005509550, 0.005650390, -0.017880100, + 0.032707500, -0.054529300, 0.103897000, -0.980333000, + -0.101396000, 0.040315000, -0.023518600, 0.017892400, + -0.016931400, 0.017606900, -0.013031400, -0.000509689 }, + { 0.009908570, -0.005290280, -0.006025420, 0.018442500, + -0.033564300, 0.055933600, -0.106671000, 0.978886000, + 0.105975000, -0.042246100, 0.024634100, -0.018585000, + 0.017349500, -0.017812500, 0.013070200, 0.000523415 }, + { -0.009842020, 0.005071530, 0.006398270, -0.019000400, + 0.034413000, -0.057323100, 0.109408000, -0.977388000, + -0.110580000, 0.044180000, -0.025750100, 0.019277200, + -0.017766900, 0.018017000, -0.013108200, -0.000537896 }, + { 0.009775290, -0.004853320, -0.006768930, 0.019553700, + -0.035253500, 0.058697600, -0.112107000, 0.975840000, + 0.115210000, -0.046116200, 0.026866300, -0.019969100, + 0.018183300, -0.018220400, 0.013145400, 0.000553077 }, + { -0.009708410, 0.004635680, 0.007137330, -0.020102300, + 0.036085600, -0.060057000, 0.114770000, -0.974242000, + -0.119864000, 0.048054700, -0.027982600, 0.020660400, + -0.018598800, 0.018422700, -0.013181700, -0.000568899 }, + { 0.009641370, -0.004418620, -0.007503460, 0.020646100, + -0.036909400, 0.061401200, -0.117395000, 0.972595000, + 0.124542000, -0.049995000, 0.029098800, -0.021351200, + 0.019013400, -0.018623700, 0.013217200, 0.000585302 }, + { -0.009574200, 0.004202160, 0.007867250, -0.021185100, + 0.037724600, -0.062730000, 0.119983000, -0.970897000, + -0.129244000, 0.051936900, -0.030214800, 0.022041200, + -0.019426800, 0.018823600, -0.013251800, -0.000602227 }, + { 0.009506900, -0.003986340, -0.008228680, 0.021719200, + -0.038531200, 0.064043400, -0.122533000, 0.969150000, + 0.133970000, -0.053880200, 0.031330400, -0.022730400, + 0.019839200, -0.019022200, 0.013285600, 0.000619615 }, + { -0.009439470, 0.003771170, 0.008587720, -0.022248400, + 0.039329200, -0.065341100, 0.125045000, -0.967354000, + -0.138719000, 0.055824500, -0.032445500, 0.023418700, + -0.020250300, 0.019219600, -0.013318400, -0.000637409 }, + { 0.009371930, -0.003556670, -0.008944310, 0.022772500, + -0.040118300, 0.066623100, -0.127520000, 0.965508000, + 0.143491000, -0.057769600, 0.033559800, -0.024106000, + 0.020660300, -0.019415600, 0.013350400, 0.000655555 }, + { -0.009304290, 0.003342870, 0.009298430, -0.023291700, + 0.040898700, -0.067889200, 0.129958000, -0.963614000, + -0.148285000, 0.059715200, -0.034673200, 0.024792100, + -0.021068900, 0.019610300, -0.013381400, -0.000674005 }, + { 0.009236550, -0.003129790, -0.009650030, 0.023805700, + -0.041670100, 0.069139400, -0.132357000, 0.961670000, + 0.153101000, -0.061661000, 0.035785500, -0.025477100, + 0.021476100, -0.019803700, 0.013411500, 0.000692716 }, + { -0.009168730, 0.002917450, 0.009999080, -0.024314500, + 0.042432500, -0.070373400, 0.134719000, -0.959678000, + -0.157939000, 0.063606700, -0.036896600, 0.026160600, + -0.021882000, 0.019995700, -0.013440700, -0.000711649 }, + { 0.009100820, -0.002705870, -0.010345600, 0.024818200, + -0.043185700, 0.071591300, -0.137043000, 0.957637000, + 0.162799000, -0.065552000, 0.038006300, -0.026842800, + 0.022286300, -0.020186200, 0.013469000, 0.000730774 }, + { -0.009032840, 0.002495060, 0.010689400, -0.025316500, + 0.043929900, -0.072792800, 0.139329000, -0.955548000, + -0.167680000, 0.067496700, -0.039114400, 0.027523400, + -0.022689100, 0.020375300, -0.013496200, -0.000750066 }, + { 0.008964800, -0.002285070, -0.011030600, 0.025809600, + -0.044664700, 0.073977900, -0.141578000, 0.953411000, + 0.172581000, -0.069440400, 0.040220700, -0.028202300, + 0.023090300, -0.020562800, 0.013522500, 0.000769507 }, + { -0.008896700, 0.002075890, 0.011369100, -0.026297300, + 0.045390300, -0.075146500, 0.143788000, -0.951226000, + -0.177503000, 0.071382800, -0.041325100, 0.028879500, + -0.023489900, 0.020748900, -0.013547900, -0.000789086 }, + { 0.008828540, -0.001867560, -0.011704900, 0.026779500, + -0.046106400, 0.076298500, -0.145960000, 0.948994000, + 0.182445000, -0.073323800, 0.042427300, -0.029554900, + 0.023887600, -0.020933400, 0.013572200, 0.000808800 }, + { -0.008760350, 0.001660090, 0.012037900, -0.027256300, + 0.046813100, -0.077433700, 0.148095000, -0.946714000, + -0.187407000, 0.075262900, -0.043527300, 0.030228300, + -0.024283600, 0.021116400, -0.013595500, -0.000828649 }, + { 0.008692120, -0.001453500, -0.012368200, 0.027727600, + -0.047510200, 0.078552200, -0.150191000, 0.944386000, + 0.192388000, -0.077199800, 0.044624900, -0.030899600, + 0.024677800, -0.021297700, 0.013617800, 0.000848641 }, + { -0.008623860, 0.001247820, 0.012695600, -0.028193200, + 0.048197700, -0.079653700, 0.152249000, -0.942012000, + -0.197388000, 0.079134300, -0.045719800, 0.031568700, + -0.025070000, 0.021477300, -0.013639100, -0.000868789 }, + { 0.008555590, -0.001043050, -0.013020200, 0.028653300, + -0.048875600, 0.080738200, -0.154269000, 0.939591000, + 0.202406000, -0.081066100, 0.046811900, -0.032235500, + 0.025460200, -0.021655300, 0.013659300, 0.000889109 }, + { -0.008487300, 0.000839230, 0.013341900, -0.029107700, + 0.049543700, -0.081805600, 0.156252000, -0.937123000, + -0.207442000, 0.082994900, -0.047901000, 0.032900000, + -0.025848400, 0.021831600, -0.013678500, -0.000909621 }, + { 0.008419010, -0.000636364, -0.013660700, 0.029556500, + -0.050202100, 0.082855900, -0.158196000, 0.934609000, + 0.212496000, -0.084920300, 0.048986900, -0.033562000, + 0.026234500, -0.022006200, 0.013696600, 0.000930348 }, + { -0.008350720, 0.000434476, 0.013976600, -0.029999400, + 0.050850500, -0.083888900, 0.160102000, -0.932049000, + -0.217568000, 0.086842200, -0.050069600, 0.034221300, + -0.026618500, 0.022179000, -0.013713600, -0.000951314 }, + { 0.008282450, -0.000233582, -0.014289400, 0.030436600, + -0.051489100, 0.084904600, -0.161970000, 0.929443000, + 0.222657000, -0.088760100, 0.051148700, -0.034878000, + 0.027000200, -0.022350000, 0.013729500, 0.000972545 }, + { -0.008214200, 0.000033699, 0.014599300, -0.030868000, + 0.052117600, -0.085902800, 0.163800000, -0.926792000, + -0.227762000, 0.090673700, -0.052224200, 0.035531800, + -0.027379600, 0.022519100, -0.013744400, -0.000994065 }, + { 0.008145980, 0.000165153, -0.014906200, 0.031293500, + -0.052736100, 0.086883600, -0.165592000, 0.924095000, + 0.232883000, -0.092582800, 0.053295800, -0.036182700, + 0.027756600, -0.022686400, 0.013758100, 0.001015900 }, + { -0.008077800, -0.000362959, 0.015210000, -0.031713100, + 0.053344600, -0.087846900, 0.167346000, -0.921353000, + -0.238020000, 0.094487100, -0.054363300, 0.036830600, + -0.028131300, 0.022851800, -0.013770700, -0.001038070 }, + { 0.008009680, 0.000559702, -0.015510700, 0.032126700, + -0.053942900, 0.088792500, -0.169062000, 0.918566000, + 0.243173000, -0.096386300, 0.055426700, -0.037475400, + 0.028503500, -0.023015300, 0.013782200, 0.001060610 }, + { -0.007941610, -0.000755366, 0.015808300, -0.032534400, + 0.054531000, -0.089720400, 0.170740000, -0.915735000, + -0.248340000, 0.098280000, -0.056485700, 0.038117000, + -0.028873100, 0.023176800, -0.013792600, -0.001083520 }, + { 0.007873600, 0.000949933, -0.016102800, 0.032936000, + -0.055108900, 0.090630600, -0.172380000, 0.912859000, + 0.253523000, -0.100168000, 0.057540100, -0.038755100, + 0.029240200, -0.023336400, 0.013801800, 0.001106820 }, + { -0.007805680, -0.001143390, 0.016394100, -0.033331600, + 0.055676400, -0.091523000, 0.173982000, -0.909939000, + -0.258719000, 0.102050000, -0.058589800, 0.039389900, + -0.029604600, 0.023493900, -0.013809800, -0.001130530 }, + { 0.007737840, 0.001335720, -0.016682300, 0.033721100, + -0.056233700, 0.092397600, -0.175546000, 0.906976000, + 0.263929000, -0.103925000, 0.059634500, -0.040021100, + 0.029966300, -0.023649400, 0.013816700, 0.001154660 }, + { -0.007670090, -0.001526900, 0.016967200, -0.034104500, + 0.056780500, -0.093254300, 0.177073000, -0.903969000, + -0.269153000, 0.105794000, -0.060674200, 0.040648600, + -0.030325200, 0.023802800, -0.013822400, -0.001179210 }, + { 0.007602450, 0.001716930, -0.017248900, 0.034481800, + -0.057317000, 0.094092900, -0.178561000, 0.900919000, + 0.274389000, -0.107656000, 0.061708600, -0.041272400, + 0.030681200, -0.023954100, 0.013826900, 0.001204190 }, + { -0.007534920, -0.001905780, 0.017527400, -0.034852800, + 0.057842900, -0.094913600, 0.180012000, -0.897826000, + -0.279638000, 0.109510000, -0.062737600, 0.041892300, + -0.031034400, 0.024103300, -0.013830200, -0.001229590 }, + { 0.007467500, 0.002093450, -0.017802500, 0.035217700, + -0.058358400, 0.095716300, -0.181426000, 0.894690000, + 0.284899000, -0.111357000, 0.063761000, -0.042508200, + 0.031384600, -0.024250300, 0.013832300, 0.001255410 }, + { -0.007400210, -0.002279910, 0.018074400, -0.035576300, + 0.058863300, -0.096500800, 0.182801000, -0.891512000, + -0.290172000, 0.113196000, -0.064778500, 0.043120000, + -0.031731800, 0.024395100, -0.013833100, -0.001281660 }, + { 0.007333050, 0.002465150, -0.018342900, 0.035928600, + -0.059357700, 0.097267200, -0.184139000, 0.888292000, + 0.295456000, -0.115027000, 0.065790100, -0.043727700, + 0.032075900, -0.024537700, 0.013832800, 0.001308310 }, + { -0.007266020, -0.002649160, 0.018608200, -0.036274700, + 0.059841400, -0.098015500, 0.185440000, -0.885030000, + -0.300751000, 0.116849000, -0.066795600, 0.044331000, + -0.032416900, 0.024678000, -0.013831200, -0.001335360 }, + { 0.007199140, 0.002831930, -0.018870000, 0.036614400, + -0.060314500, 0.098745500, -0.186703000, 0.881727000, + 0.306057000, -0.118663000, 0.067794800, -0.044930000, + 0.032754700, -0.024816100, 0.013828300, 0.001362810 }, + { -0.007132410, -0.003013430, 0.019128500, -0.036947800, + 0.060777000, -0.099457400, 0.187929000, -0.878383000, + -0.311372000, 0.120467000, -0.068787400, 0.045524500, + -0.033089200, 0.024951900, -0.013824200, -0.001390630 }, + { 0.007065840, 0.003193670, -0.019383500, 0.037274800, + -0.061228700, 0.100151000, -0.189118000, 0.874998000, + 0.316697000, -0.122261000, 0.069773400, -0.046114300, + 0.033420400, -0.025085300, 0.013818800, 0.001418820 }, + { -0.006999420, -0.003372610, 0.019635200, -0.037595400, + 0.061669700, -0.100826000, 0.190269000, -0.871573000, + -0.322032000, 0.124046000, -0.070752600, 0.046699500, + -0.033748200, 0.025216300, -0.013812100, -0.001447370 }, + { 0.006933170, 0.003550250, -0.019883400, 0.037909600, + -0.062100000, 0.101483000, -0.191383000, 0.868108000, + 0.327374000, -0.125821000, 0.071724700, -0.047279900, + 0.034072600, -0.025345000, 0.013804200, 0.001476260 }, + { -0.006867080, -0.003726580, 0.020128100, -0.038217400, + 0.062519400, -0.102122000, 0.192461000, -0.864602000, + -0.332726000, 0.127585000, -0.072689700, 0.047855300, + -0.034393500, 0.025471200, -0.013794900, -0.001505490 }, + { 0.006801170, 0.003901590, -0.020369400, 0.038518700, + -0.062928100, 0.102742000, -0.193501000, 0.861058000, + 0.338085000, -0.129339000, 0.073647200, -0.048425700, + 0.034710800, -0.025595000, 0.013784300, 0.001535060 }, + { -0.006735430, -0.004075260, 0.020607200, -0.038813500, + 0.063325900, -0.103344000, 0.194505000, -0.857474000, + -0.343451000, 0.131082000, -0.074597300, 0.048991000, + -0.035024600, 0.025716300, -0.013772400, -0.001564950 }, + { 0.006669880, 0.004247570, -0.020841500, 0.039101900, + -0.063712900, 0.103928000, -0.195471000, 0.853851000, + 0.348824000, -0.132813000, 0.075539700, -0.049551100, + 0.035334600, -0.025835000, 0.013759200, 0.001595150 }, + { -0.006604510, -0.004418520, 0.021072300, -0.039383700, + 0.064089100, -0.104493000, 0.196402000, -0.850190000, + -0.354204000, 0.134532000, -0.076474100, 0.050105900, + -0.035641000, 0.025951300, -0.013744600, -0.001625680 }, + { 0.006539330, 0.004588100, -0.021299500, 0.039659100, + -0.064454400, 0.105040000, -0.197295000, 0.846491000, + 0.359590000, -0.136240000, 0.077400500, -0.050655300, + 0.035943500, -0.026065000, 0.013728700, 0.001656520 }, + { -0.006474340, -0.004756290, 0.021523200, -0.039927800, + 0.064808800, -0.105568000, 0.198153000, -0.842754000, + -0.364982000, 0.137935000, -0.078318700, 0.051199100, + -0.036242200, 0.026176100, -0.013711400, -0.001687680 }, + { 0.006409560, 0.004923090, -0.021743400, 0.040190100, + -0.065152300, 0.106078000, -0.198974000, 0.838980000, + 0.370379000, -0.139618000, 0.079228600, -0.051737300, + 0.036537000, -0.026284600, 0.013692700, 0.001719160 }, + { -0.006344970, -0.005088480, 0.021959900, -0.040445700, + 0.065484900, -0.106570000, 0.199758000, -0.835168000, + -0.375780000, 0.141287000, -0.080129800, 0.052269800, + -0.036827900, 0.026390400, -0.013672700, -0.001750970 }, + { 0.006280600, 0.005252450, -0.022172900, 0.040694800, + -0.065806500, 0.107043000, -0.200507000, 0.831320000, + 0.381186000, -0.142944000, 0.081022300, -0.052796500, + 0.037114800, -0.026493600, 0.013651200, 0.001783110 }, + { -0.006216440, -0.005415000, 0.022382300, -0.040937300, + 0.066117300, -0.107498000, 0.201220000, -0.827436000, + -0.386596000, 0.144586000, -0.081905900, 0.053317300, + -0.037397500, 0.026594100, -0.013628400, -0.001815590 }, + { 0.006152510, 0.005576110, -0.022588000, 0.041173200, + -0.066417100, 0.107935000, -0.201897000, 0.823515000, + 0.392009000, -0.146215000, 0.082780500, -0.053832000, + 0.037676200, -0.026691800, 0.013604200, 0.001848420 }, + { -0.006088800, -0.005735770, 0.022790200, -0.041402500, + 0.066706000, -0.108353000, 0.202539000, -0.819559000, + -0.397425000, 0.147830000, -0.083645800, 0.054340600, + -0.037950700, 0.026786800, -0.013578500, -0.001881610 }, + { 0.006025320, 0.005893980, -0.022988700, 0.041625200, + -0.066983900, 0.108753000, -0.203145000, 0.815568000, + 0.402843000, -0.149431000, 0.084501700, -0.054843000, + 0.038221000, -0.026879000, 0.013551500, 0.001915170 }, + { -0.005962090, -0.006050720, 0.023183600, -0.041841300, + 0.067250900, -0.109135000, 0.203715000, -0.811542000, + -0.408264000, 0.151016000, -0.085348100, 0.055339100, + -0.038486900, 0.026968500, -0.013523000, -0.001949110 }, + { 0.005899110, 0.006205990, -0.023374800, 0.042050800, + -0.067507000, 0.109498000, -0.204250000, 0.807481000, + 0.413686000, -0.152586000, 0.086184700, -0.055828800, + 0.038748600, -0.027055100, 0.013493100, 0.001983430 }, + { -0.005836370, -0.006359780, 0.023562400, -0.042253600, + 0.067752100, -0.109843000, 0.204751000, -0.803387000, + -0.419109000, 0.154141000, -0.087011400, 0.056312000, + -0.039005800, 0.027138800, -0.013461700, -0.002018150 }, + { 0.005773900, 0.006512080, -0.023746300, 0.042449800, + -0.067986300, 0.110170000, -0.205216000, 0.799258000, + 0.424533000, -0.155681000, 0.087828100, -0.056788600, + 0.039258600, -0.027219700, 0.013428900, 0.002053260 }, + { -0.005711700, -0.006662890, 0.023926500, -0.042639300, + 0.068209600, -0.110479000, 0.205646000, -0.795096000, + -0.429957000, 0.157204000, -0.088634500, 0.057258500, + -0.039506900, 0.027297700, -0.013394600, -0.002088790 }, + { 0.005649770, 0.006812190, -0.024103100, 0.042822200, + -0.068422000, 0.110769000, -0.206042000, 0.790902000, + 0.435380000, -0.158711000, 0.089430600, -0.057721700, + 0.039750700, -0.027372700, 0.013358900, 0.002124730 }, + { -0.005588130, -0.006959980, 0.024275900, -0.042998500, + 0.068623400, -0.111041000, 0.206403000, -0.786674000, + -0.440803000, 0.160201000, -0.090216100, 0.058177900, + -0.039989900, 0.027444800, -0.013321700, -0.002161090 }, + { 0.005526770, 0.007106250, -0.024445100, 0.043168100, + -0.068814000, 0.111295000, -0.206731000, 0.782415000, + 0.446225000, -0.161674000, 0.090990900, -0.058627200, + 0.040224400, -0.027514000, 0.013283000, 0.002197860 }, + { -0.005465700, -0.007251000, 0.024610600, -0.043331100, + 0.068993700, -0.111531000, 0.207023000, -0.778123000, + -0.451645000, 0.163130000, -0.091754800, 0.059069400, + -0.040454200, 0.027580100, -0.013242800, -0.002235060 }, + { 0.005404930, 0.007394220, -0.024772300, 0.043487500, + -0.069162500, 0.111750000, -0.207282000, 0.773801000, + 0.457063000, -0.164568000, 0.092507700, -0.059504500, + 0.040679200, -0.027643200, 0.013201200, 0.002272680 }, + { -0.005344460, -0.007535900, 0.024930400, -0.043637200, + 0.069320400, -0.111950000, 0.207508000, -0.769447000, + -0.462478000, 0.165988000, -0.093249500, 0.059932300, + -0.040899500, 0.027703300, -0.013158100, -0.002310720 }, + { 0.005284290, 0.007676030, -0.025084700, 0.043780300, + -0.069467500, 0.112132000, -0.207699000, 0.765063000, + 0.467891000, -0.167391000, 0.093979900, -0.060352800, + 0.041114900, -0.027760300, 0.013113400, 0.002349170 }, + { -0.005224440, -0.007814610, 0.025235300, -0.043916800, + 0.069603800, -0.112296000, 0.207857000, -0.760649000, + -0.473299000, 0.168774000, -0.094698800, 0.060765800, + -0.041325400, 0.027814200, -0.013067200, -0.002388040 }, + { 0.005164890, 0.007951640, -0.025382200, 0.044046600, + -0.069729300, 0.112442000, -0.207982000, 0.756204000, + 0.478704000, -0.170139000, 0.095406000, -0.061171400, + 0.041530900, -0.027865000, 0.013019600, 0.002427310 }, + { -0.005105670, -0.008087110, 0.025525400, -0.044169900, + 0.069844000, -0.112571000, 0.208074000, -0.751731000, + -0.484104000, 0.171485000, -0.096101500, 0.061569300, + -0.041731400, 0.027912700, -0.012970400, -0.002466990 }, + { 0.005046750, 0.008221000, -0.025664900, 0.044286500, + -0.069947900, 0.112682000, -0.208133000, 0.747228000, + 0.489500000, -0.172811000, 0.096785000, -0.061959600, + 0.041926900, -0.027957300, 0.012919600, 0.002507070 }, + { -0.004988160, -0.008353330, 0.025800600, -0.044396600, + 0.070041100, -0.112775000, 0.208159000, -0.742698000, + -0.494890000, 0.174118000, -0.097456400, 0.062342100, + -0.042117300, 0.027998600, -0.012867400, -0.002547540 }, + { 0.004929880, 0.008484080, -0.025932700, 0.044500000, + -0.070123600, 0.112850000, -0.208153000, 0.738138000, + 0.500274000, -0.175405000, 0.098115500, -0.062716700, + 0.042302600, -0.028036800, 0.012813600, 0.002588390 }, + { -0.004871920, -0.008613240, 0.026060900, -0.044596900, + 0.070195500, -0.112908000, 0.208114000, -0.733552000, + -0.505652000, 0.176671000, -0.098762300, 0.063083500, + -0.042482700, 0.028071700, -0.012758200, -0.002629630 }, + { 0.004814280, 0.008740820, -0.026185500, 0.044687200, + -0.070256600, 0.112949000, -0.208043000, 0.728938000, + 0.511023000, -0.177917000, 0.099396500, -0.063442100, + 0.042657500, -0.028103500, 0.012701300, 0.002671240 }, + { -0.004756970, -0.008866800, 0.026306400, -0.044771000, + 0.070307200, -0.112972000, 0.207941000, -0.724297000, + -0.516386000, 0.179142000, -0.100018000, 0.063792700, + -0.042827100, 0.028131900, -0.012642800, -0.002713230 }, + { 0.004699970, 0.008991190, -0.026423500, 0.044848200, + -0.070347100, 0.112977000, -0.207806000, 0.719630000, + 0.521742000, -0.180346000, 0.100627000, -0.064135100, + 0.042991400, -0.028157100, 0.012582800, 0.002755580 }, + { -0.004643290, -0.009113970, 0.026536900, -0.044918900, + 0.070376500, -0.112966000, 0.207641000, -0.714936000, + -0.527090000, 0.181528000, -0.101222000, 0.064469200, + -0.043150300, 0.028179000, -0.012521200, -0.002798310 }, + { 0.004586940, 0.009235160, -0.026646600, 0.044983100, + -0.070395400, 0.112937000, -0.207444000, 0.710218000, + 0.532429000, -0.182688000, 0.101805000, -0.064795000, + 0.043303700, -0.028197600, 0.012458000, 0.002841390 }, + { -0.004530910, -0.009354740, 0.026752600, -0.045040800, + 0.070403800, -0.112891000, 0.207216000, -0.705474000, + -0.537759000, 0.183827000, -0.102374000, 0.065112400, + -0.043451800, 0.028212900, -0.012393300, -0.002884840 }, + { 0.004475210, 0.009472700, -0.026854800, 0.045092000, + -0.070401800, 0.112828000, -0.206957000, 0.700705000, + 0.543079000, -0.184943000, 0.102930000, -0.065421200, + 0.043594300, -0.028224800, 0.012327000, 0.002928660 }, + { -0.004419840, -0.009589060, 0.026953400, -0.045136800, + 0.070389300, -0.112748000, 0.206667000, -0.695912000, + -0.548389000, 0.186036000, -0.103473000, 0.065721500, + -0.043731300, 0.028233400, -0.012259000, -0.002972840 }, + { 0.004364790, 0.009703800, -0.027048300, 0.045175100, + -0.070366500, 0.112651000, -0.206347000, 0.691096000, + 0.553689000, -0.187106000, 0.104001000, -0.066013100, + 0.043862800, -0.028238600, 0.012189500, 0.003017380 }, + { -0.004310080, -0.009816920, 0.027139400, -0.045207000, + 0.070333300, -0.112537000, 0.205997000, -0.686256000, + -0.558978000, 0.188154000, -0.104516000, 0.066296000, + -0.043988600, 0.028240400, -0.012118400, -0.003062300 }, + { 0.004255710, 0.009928420, -0.027226900, 0.045232400, + -0.070289900, 0.112406000, -0.205617000, 0.681393000, + 0.564255000, -0.189177000, 0.105017000, -0.066570100, + 0.044108700, -0.028238700, 0.012045700, 0.003107590 }, + { -0.004201670, -0.010038300, 0.027310700, -0.045251500, + 0.070236200, -0.112259000, 0.205208000, -0.676508000, + -0.569520000, 0.190177000, -0.105504000, 0.066835300, + -0.044223200, 0.028233700, -0.011971400, -0.003153250 }, + { 0.004147990, 0.010146600, -0.027390800, 0.045264300, + -0.070172300, 0.112096000, -0.204769000, 0.671600000, + 0.574773000, -0.191153000, 0.105976000, -0.067091500, + 0.044332000, -0.028225200, 0.011895500, 0.003199290 }, + { -0.004094650, -0.010253200, 0.027467200, -0.045270600, + 0.070098300, -0.111916000, 0.204300000, -0.666671000, + -0.580013000, 0.192105000, -0.106435000, 0.067338800, + -0.044435000, 0.028213200, -0.011818000, -0.003245720 }, + { 0.004041660, 0.010358200, -0.027540000, 0.045270700, + -0.070014200, 0.111719000, -0.203803000, 0.661721000, + 0.585239000, -0.193032000, 0.106879000, -0.067576900, + 0.044532100, -0.028197800, 0.011738900, 0.003292530 }, + { -0.003989040, -0.010461600, 0.027609100, -0.045264500, + 0.069920000, -0.111506000, 0.203277000, -0.656750000, + -0.590452000, 0.193934000, -0.107308000, 0.067805900, + -0.044623500, 0.028178800, -0.011658200, -0.003339730 }, + { 0.003936790, 0.010563400, -0.027674600, 0.045252000, + -0.069815800, 0.111277000, -0.202722000, 0.651759000, + 0.595651000, -0.194811000, 0.107722000, -0.068025700, + 0.044708900, -0.028156400, 0.011575800, 0.003387320 }, + { -0.003884900, -0.010663500, 0.027736400, -0.045233300, + 0.069701700, -0.111032000, 0.202139000, -0.646748000, + -0.600834000, 0.195663000, -0.108122000, 0.068236100, + -0.044788500, 0.028130400, -0.011491900, -0.003435300 }, + { 0.003833390, 0.010762100, -0.027794600, 0.045208300, + -0.069577700, 0.110771000, -0.201528000, 0.641718000, + 0.606003000, -0.196489000, 0.108507000, -0.068437200, + 0.044862100, -0.028101000, 0.011406300, 0.003483680 }, + { -0.003782260, -0.010859000, 0.027849100, -0.045177200, + 0.069443800, -0.110495000, 0.200890000, -0.636669000, + -0.611156000, 0.197289000, -0.108877000, 0.068628900, + -0.044929800, 0.028067900, -0.011319200, -0.003532460 }, + { 0.003731520, 0.010954200, -0.027900100, 0.045139900, + -0.069300200, 0.110202000, -0.200223000, 0.631601000, + 0.616292000, -0.198062000, 0.109231000, -0.068811100, + 0.044991400, -0.028031300, 0.011230400, 0.003581630 }, + { -0.003681160, -0.011047900, 0.027947400, -0.045096500, + 0.069146800, -0.109894000, 0.199530000, -0.626516000, + -0.621413000, 0.198809000, -0.109570000, 0.068983800, + -0.045047000, 0.027991200, -0.011139900, -0.003631200 }, + { 0.003631200, 0.011139900, -0.027991200, 0.045047000, + -0.068983800, 0.109570000, -0.198809000, 0.621413000, + 0.626516000, -0.199530000, 0.109894000, -0.069146800, + 0.045096500, -0.027947400, 0.011047900, 0.003681160 }, + { -0.003581630, -0.011230400, 0.028031300, -0.044991400, + 0.068811100, -0.109231000, 0.198062000, -0.616292000, + -0.631601000, 0.200223000, -0.110202000, 0.069300200, + -0.045139900, 0.027900100, -0.010954200, -0.003731520 }, + { 0.003532460, 0.011319200, -0.028067900, 0.044929800, + -0.068628900, 0.108877000, -0.197289000, 0.611156000, + 0.636669000, -0.200890000, 0.110495000, -0.069443800, + 0.045177200, -0.027849100, 0.010859000, 0.003782260 }, + { -0.003483680, -0.011406300, 0.028101000, -0.044862100, + 0.068437200, -0.108507000, 0.196489000, -0.606003000, + -0.641718000, 0.201528000, -0.110771000, 0.069577700, + -0.045208300, 0.027794600, -0.010762100, -0.003833390 }, + { 0.003435300, 0.011491900, -0.028130400, 0.044788500, + -0.068236100, 0.108122000, -0.195663000, 0.600834000, + 0.646748000, -0.202139000, 0.111032000, -0.069701700, + 0.045233300, -0.027736400, 0.010663500, 0.003884900 }, + { -0.003387320, -0.011575800, 0.028156400, -0.044708900, + 0.068025700, -0.107722000, 0.194811000, -0.595651000, + -0.651759000, 0.202722000, -0.111277000, 0.069815800, + -0.045252000, 0.027674600, -0.010563400, -0.003936790 }, + { 0.003339730, 0.011658200, -0.028178800, 0.044623500, + -0.067805900, 0.107308000, -0.193934000, 0.590452000, + 0.656750000, -0.203277000, 0.111506000, -0.069920000, + 0.045264500, -0.027609100, 0.010461600, 0.003989040 }, + { -0.003292530, -0.011738900, 0.028197800, -0.044532100, + 0.067576900, -0.106879000, 0.193032000, -0.585239000, + -0.661721000, 0.203803000, -0.111719000, 0.070014200, + -0.045270700, 0.027540000, -0.010358200, -0.004041660 }, + { 0.003245720, 0.011818000, -0.028213200, 0.044435000, + -0.067338800, 0.106435000, -0.192105000, 0.580013000, + 0.666671000, -0.204300000, 0.111916000, -0.070098300, + 0.045270600, -0.027467200, 0.010253200, 0.004094650 }, + { -0.003199290, -0.011895500, 0.028225200, -0.044332000, + 0.067091500, -0.105976000, 0.191153000, -0.574773000, + -0.671600000, 0.204769000, -0.112096000, 0.070172300, + -0.045264300, 0.027390800, -0.010146600, -0.004147990 }, + { 0.003153250, 0.011971400, -0.028233700, 0.044223200, + -0.066835300, 0.105504000, -0.190177000, 0.569520000, + 0.676508000, -0.205208000, 0.112259000, -0.070236200, + 0.045251500, -0.027310700, 0.010038300, 0.004201670 }, + { -0.003107590, -0.012045700, 0.028238700, -0.044108700, + 0.066570100, -0.105017000, 0.189177000, -0.564255000, + -0.681393000, 0.205617000, -0.112406000, 0.070289900, + -0.045232400, 0.027226900, -0.009928420, -0.004255710 }, + { 0.003062300, 0.012118400, -0.028240400, 0.043988600, + -0.066296000, 0.104516000, -0.188154000, 0.558978000, + 0.686256000, -0.205997000, 0.112537000, -0.070333300, + 0.045207000, -0.027139400, 0.009816920, 0.004310080 }, + { -0.003017380, -0.012189500, 0.028238600, -0.043862800, + 0.066013100, -0.104001000, 0.187106000, -0.553689000, + -0.691096000, 0.206347000, -0.112651000, 0.070366500, + -0.045175100, 0.027048300, -0.009703800, -0.004364790 }, + { 0.002972840, 0.012259000, -0.028233400, 0.043731300, + -0.065721500, 0.103473000, -0.186036000, 0.548389000, + 0.695912000, -0.206667000, 0.112748000, -0.070389300, + 0.045136800, -0.026953400, 0.009589060, 0.004419840 }, + { -0.002928660, -0.012327000, 0.028224800, -0.043594300, + 0.065421200, -0.102930000, 0.184943000, -0.543079000, + -0.700705000, 0.206957000, -0.112828000, 0.070401800, + -0.045092000, 0.026854800, -0.009472700, -0.004475210 }, + { 0.002884840, 0.012393300, -0.028212900, 0.043451800, + -0.065112400, 0.102374000, -0.183827000, 0.537759000, + 0.705474000, -0.207216000, 0.112891000, -0.070403800, + 0.045040800, -0.026752600, 0.009354740, 0.004530910 }, + { -0.002841390, -0.012458000, 0.028197600, -0.043303700, + 0.064795000, -0.101805000, 0.182688000, -0.532429000, + -0.710218000, 0.207444000, -0.112937000, 0.070395400, + -0.044983100, 0.026646600, -0.009235160, -0.004586940 }, + { 0.002798310, 0.012521200, -0.028179000, 0.043150300, + -0.064469200, 0.101222000, -0.181528000, 0.527090000, + 0.714936000, -0.207641000, 0.112966000, -0.070376500, + 0.044918900, -0.026536900, 0.009113970, 0.004643290 }, + { -0.002755580, -0.012582800, 0.028157100, -0.042991400, + 0.064135100, -0.100627000, 0.180346000, -0.521742000, + -0.719630000, 0.207806000, -0.112977000, 0.070347100, + -0.044848200, 0.026423500, -0.008991190, -0.004699970 }, + { 0.002713230, 0.012642800, -0.028131900, 0.042827100, + -0.063792700, 0.100018000, -0.179142000, 0.516386000, + 0.724297000, -0.207941000, 0.112972000, -0.070307200, + 0.044771000, -0.026306400, 0.008866800, 0.004756970 }, + { -0.002671240, -0.012701300, 0.028103500, -0.042657500, + 0.063442100, -0.099396500, 0.177917000, -0.511023000, + -0.728938000, 0.208043000, -0.112949000, 0.070256600, + -0.044687200, 0.026185500, -0.008740820, -0.004814280 }, + { 0.002629630, 0.012758200, -0.028071700, 0.042482700, + -0.063083500, 0.098762300, -0.176671000, 0.505652000, + 0.733552000, -0.208114000, 0.112908000, -0.070195500, + 0.044596900, -0.026060900, 0.008613240, 0.004871920 }, + { -0.002588390, -0.012813600, 0.028036800, -0.042302600, + 0.062716700, -0.098115500, 0.175405000, -0.500274000, + -0.738138000, 0.208153000, -0.112850000, 0.070123600, + -0.044500000, 0.025932700, -0.008484080, -0.004929880 }, + { 0.002547540, 0.012867400, -0.027998600, 0.042117300, + -0.062342100, 0.097456400, -0.174118000, 0.494890000, + 0.742698000, -0.208159000, 0.112775000, -0.070041100, + 0.044396600, -0.025800600, 0.008353330, 0.004988160 }, + { -0.002507070, -0.012919600, 0.027957300, -0.041926900, + 0.061959600, -0.096785000, 0.172811000, -0.489500000, + -0.747228000, 0.208133000, -0.112682000, 0.069947900, + -0.044286500, 0.025664900, -0.008221000, -0.005046750 }, + { 0.002466990, 0.012970400, -0.027912700, 0.041731400, + -0.061569300, 0.096101500, -0.171485000, 0.484104000, + 0.751731000, -0.208074000, 0.112571000, -0.069844000, + 0.044169900, -0.025525400, 0.008087110, 0.005105670 }, + { -0.002427310, -0.013019600, 0.027865000, -0.041530900, + 0.061171400, -0.095406000, 0.170139000, -0.478704000, + -0.756204000, 0.207982000, -0.112442000, 0.069729300, + -0.044046600, 0.025382200, -0.007951640, -0.005164890 }, + { 0.002388040, 0.013067200, -0.027814200, 0.041325400, + -0.060765800, 0.094698800, -0.168774000, 0.473299000, + 0.760649000, -0.207857000, 0.112296000, -0.069603800, + 0.043916800, -0.025235300, 0.007814610, 0.005224440 }, + { -0.002349170, -0.013113400, 0.027760300, -0.041114900, + 0.060352800, -0.093979900, 0.167391000, -0.467891000, + -0.765063000, 0.207699000, -0.112132000, 0.069467500, + -0.043780300, 0.025084700, -0.007676030, -0.005284290 }, + { 0.002310720, 0.013158100, -0.027703300, 0.040899500, + -0.059932300, 0.093249500, -0.165988000, 0.462478000, + 0.769447000, -0.207508000, 0.111950000, -0.069320400, + 0.043637200, -0.024930400, 0.007535900, 0.005344460 }, + { -0.002272680, -0.013201200, 0.027643200, -0.040679200, + 0.059504500, -0.092507700, 0.164568000, -0.457063000, + -0.773801000, 0.207282000, -0.111750000, 0.069162500, + -0.043487500, 0.024772300, -0.007394220, -0.005404930 }, + { 0.002235060, 0.013242800, -0.027580100, 0.040454200, + -0.059069400, 0.091754800, -0.163130000, 0.451645000, + 0.778123000, -0.207023000, 0.111531000, -0.068993700, + 0.043331100, -0.024610600, 0.007251000, 0.005465700 }, + { -0.002197860, -0.013283000, 0.027514000, -0.040224400, + 0.058627200, -0.090990900, 0.161674000, -0.446225000, + -0.782415000, 0.206731000, -0.111295000, 0.068814000, + -0.043168100, 0.024445100, -0.007106250, -0.005526770 }, + { 0.002161090, 0.013321700, -0.027444800, 0.039989900, + -0.058177900, 0.090216100, -0.160201000, 0.440803000, + 0.786674000, -0.206403000, 0.111041000, -0.068623400, + 0.042998500, -0.024275900, 0.006959980, 0.005588130 }, + { -0.002124730, -0.013358900, 0.027372700, -0.039750700, + 0.057721700, -0.089430600, 0.158711000, -0.435380000, + -0.790902000, 0.206042000, -0.110769000, 0.068422000, + -0.042822200, 0.024103100, -0.006812190, -0.005649770 }, + { 0.002088790, 0.013394600, -0.027297700, 0.039506900, + -0.057258500, 0.088634500, -0.157204000, 0.429957000, + 0.795096000, -0.205646000, 0.110479000, -0.068209600, + 0.042639300, -0.023926500, 0.006662890, 0.005711700 }, + { -0.002053260, -0.013428900, 0.027219700, -0.039258600, + 0.056788600, -0.087828100, 0.155681000, -0.424533000, + -0.799258000, 0.205216000, -0.110170000, 0.067986300, + -0.042449800, 0.023746300, -0.006512080, -0.005773900 }, + { 0.002018150, 0.013461700, -0.027138800, 0.039005800, + -0.056312000, 0.087011400, -0.154141000, 0.419109000, + 0.803387000, -0.204751000, 0.109843000, -0.067752100, + 0.042253600, -0.023562400, 0.006359780, 0.005836370 }, + { -0.001983430, -0.013493100, 0.027055100, -0.038748600, + 0.055828800, -0.086184700, 0.152586000, -0.413686000, + -0.807481000, 0.204250000, -0.109498000, 0.067507000, + -0.042050800, 0.023374800, -0.006205990, -0.005899110 }, + { 0.001949110, 0.013523000, -0.026968500, 0.038486900, + -0.055339100, 0.085348100, -0.151016000, 0.408264000, + 0.811542000, -0.203715000, 0.109135000, -0.067250900, + 0.041841300, -0.023183600, 0.006050720, 0.005962090 }, + { -0.001915170, -0.013551500, 0.026879000, -0.038221000, + 0.054843000, -0.084501700, 0.149431000, -0.402843000, + -0.815568000, 0.203145000, -0.108753000, 0.066983900, + -0.041625200, 0.022988700, -0.005893980, -0.006025320 }, + { 0.001881610, 0.013578500, -0.026786800, 0.037950700, + -0.054340600, 0.083645800, -0.147830000, 0.397425000, + 0.819559000, -0.202539000, 0.108353000, -0.066706000, + 0.041402500, -0.022790200, 0.005735770, 0.006088800 }, + { -0.001848420, -0.013604200, 0.026691800, -0.037676200, + 0.053832000, -0.082780500, 0.146215000, -0.392009000, + -0.823515000, 0.201897000, -0.107935000, 0.066417100, + -0.041173200, 0.022588000, -0.005576110, -0.006152510 }, + { 0.001815590, 0.013628400, -0.026594100, 0.037397500, + -0.053317300, 0.081905900, -0.144586000, 0.386596000, + 0.827436000, -0.201220000, 0.107498000, -0.066117300, + 0.040937300, -0.022382300, 0.005415000, 0.006216440 }, + { -0.001783110, -0.013651200, 0.026493600, -0.037114800, + 0.052796500, -0.081022300, 0.142944000, -0.381186000, + -0.831320000, 0.200507000, -0.107043000, 0.065806500, + -0.040694800, 0.022172900, -0.005252450, -0.006280600 }, + { 0.001750970, 0.013672700, -0.026390400, 0.036827900, + -0.052269800, 0.080129800, -0.141287000, 0.375780000, + 0.835168000, -0.199758000, 0.106570000, -0.065484900, + 0.040445700, -0.021959900, 0.005088480, 0.006344970 }, + { -0.001719160, -0.013692700, 0.026284600, -0.036537000, + 0.051737300, -0.079228600, 0.139618000, -0.370379000, + -0.838980000, 0.198974000, -0.106078000, 0.065152300, + -0.040190100, 0.021743400, -0.004923090, -0.006409560 }, + { 0.001687680, 0.013711400, -0.026176100, 0.036242200, + -0.051199100, 0.078318700, -0.137935000, 0.364982000, + 0.842754000, -0.198153000, 0.105568000, -0.064808800, + 0.039927800, -0.021523200, 0.004756290, 0.006474340 }, + { -0.001656520, -0.013728700, 0.026065000, -0.035943500, + 0.050655300, -0.077400500, 0.136240000, -0.359590000, + -0.846491000, 0.197295000, -0.105040000, 0.064454400, + -0.039659100, 0.021299500, -0.004588100, -0.006539330 }, + { 0.001625680, 0.013744600, -0.025951300, 0.035641000, + -0.050105900, 0.076474100, -0.134532000, 0.354204000, + 0.850190000, -0.196402000, 0.104493000, -0.064089100, + 0.039383700, -0.021072300, 0.004418520, 0.006604510 }, + { -0.001595150, -0.013759200, 0.025835000, -0.035334600, + 0.049551100, -0.075539700, 0.132813000, -0.348824000, + -0.853851000, 0.195471000, -0.103928000, 0.063712900, + -0.039101900, 0.020841500, -0.004247570, -0.006669880 }, + { 0.001564950, 0.013772400, -0.025716300, 0.035024600, + -0.048991000, 0.074597300, -0.131082000, 0.343451000, + 0.857474000, -0.194505000, 0.103344000, -0.063325900, + 0.038813500, -0.020607200, 0.004075260, 0.006735430 }, + { -0.001535060, -0.013784300, 0.025595000, -0.034710800, + 0.048425700, -0.073647200, 0.129339000, -0.338085000, + -0.861058000, 0.193501000, -0.102742000, 0.062928100, + -0.038518700, 0.020369400, -0.003901590, -0.006801170 }, + { 0.001505490, 0.013794900, -0.025471200, 0.034393500, + -0.047855300, 0.072689700, -0.127585000, 0.332726000, + 0.864602000, -0.192461000, 0.102122000, -0.062519400, + 0.038217400, -0.020128100, 0.003726580, 0.006867080 }, + { -0.001476260, -0.013804200, 0.025345000, -0.034072600, + 0.047279900, -0.071724700, 0.125821000, -0.327374000, + -0.868108000, 0.191383000, -0.101483000, 0.062100000, + -0.037909600, 0.019883400, -0.003550250, -0.006933170 }, + { 0.001447370, 0.013812100, -0.025216300, 0.033748200, + -0.046699500, 0.070752600, -0.124046000, 0.322032000, + 0.871573000, -0.190269000, 0.100826000, -0.061669700, + 0.037595400, -0.019635200, 0.003372610, 0.006999420 }, + { -0.001418820, -0.013818800, 0.025085300, -0.033420400, + 0.046114300, -0.069773400, 0.122261000, -0.316697000, + -0.874998000, 0.189118000, -0.100151000, 0.061228700, + -0.037274800, 0.019383500, -0.003193670, -0.007065840 }, + { 0.001390630, 0.013824200, -0.024951900, 0.033089200, + -0.045524500, 0.068787400, -0.120467000, 0.311372000, + 0.878383000, -0.187929000, 0.099457400, -0.060777000, + 0.036947800, -0.019128500, 0.003013430, 0.007132410 }, + { -0.001362810, -0.013828300, 0.024816100, -0.032754700, + 0.044930000, -0.067794800, 0.118663000, -0.306057000, + -0.881727000, 0.186703000, -0.098745500, 0.060314500, + -0.036614400, 0.018870000, -0.002831930, -0.007199140 }, + { 0.001335360, 0.013831200, -0.024678000, 0.032416900, + -0.044331000, 0.066795600, -0.116849000, 0.300751000, + 0.885030000, -0.185440000, 0.098015500, -0.059841400, + 0.036274700, -0.018608200, 0.002649160, 0.007266020 }, + { -0.001308310, -0.013832800, 0.024537700, -0.032075900, + 0.043727700, -0.065790100, 0.115027000, -0.295456000, + -0.888292000, 0.184139000, -0.097267200, 0.059357700, + -0.035928600, 0.018342900, -0.002465150, -0.007333050 }, + { 0.001281660, 0.013833100, -0.024395100, 0.031731800, + -0.043120000, 0.064778500, -0.113196000, 0.290172000, + 0.891512000, -0.182801000, 0.096500800, -0.058863300, + 0.035576300, -0.018074400, 0.002279910, 0.007400210 }, + { -0.001255410, -0.013832300, 0.024250300, -0.031384600, + 0.042508200, -0.063761000, 0.111357000, -0.284899000, + -0.894690000, 0.181426000, -0.095716300, 0.058358400, + -0.035217700, 0.017802500, -0.002093450, -0.007467500 }, + { 0.001229590, 0.013830200, -0.024103300, 0.031034400, + -0.041892300, 0.062737600, -0.109510000, 0.279638000, + 0.897826000, -0.180012000, 0.094913600, -0.057842900, + 0.034852800, -0.017527400, 0.001905780, 0.007534920 }, + { -0.001204190, -0.013826900, 0.023954100, -0.030681200, + 0.041272400, -0.061708600, 0.107656000, -0.274389000, + -0.900919000, 0.178561000, -0.094092900, 0.057317000, + -0.034481800, 0.017248900, -0.001716930, -0.007602450 }, + { 0.001179210, 0.013822400, -0.023802800, 0.030325200, + -0.040648600, 0.060674200, -0.105794000, 0.269153000, + 0.903969000, -0.177073000, 0.093254300, -0.056780500, + 0.034104500, -0.016967200, 0.001526900, 0.007670090 }, + { -0.001154660, -0.013816700, 0.023649400, -0.029966300, + 0.040021100, -0.059634500, 0.103925000, -0.263929000, + -0.906976000, 0.175546000, -0.092397600, 0.056233700, + -0.033721100, 0.016682300, -0.001335720, -0.007737840 }, + { 0.001130530, 0.013809800, -0.023493900, 0.029604600, + -0.039389900, 0.058589800, -0.102050000, 0.258719000, + 0.909939000, -0.173982000, 0.091523000, -0.055676400, + 0.033331600, -0.016394100, 0.001143390, 0.007805680 }, + { -0.001106820, -0.013801800, 0.023336400, -0.029240200, + 0.038755100, -0.057540100, 0.100168000, -0.253523000, + -0.912859000, 0.172380000, -0.090630600, 0.055108900, + -0.032936000, 0.016102800, -0.000949933, -0.007873600 }, + { 0.001083520, 0.013792600, -0.023176800, 0.028873100, + -0.038117000, 0.056485700, -0.098280000, 0.248340000, + 0.915735000, -0.170740000, 0.089720400, -0.054531000, + 0.032534400, -0.015808300, 0.000755366, 0.007941610 }, + { -0.001060610, -0.013782200, 0.023015300, -0.028503500, + 0.037475400, -0.055426700, 0.096386300, -0.243173000, + -0.918566000, 0.169062000, -0.088792500, 0.053942900, + -0.032126700, 0.015510700, -0.000559702, -0.008009680 }, + { 0.001038070, 0.013770700, -0.022851800, 0.028131300, + -0.036830600, 0.054363300, -0.094487100, 0.238020000, + 0.921353000, -0.167346000, 0.087846900, -0.053344600, + 0.031713100, -0.015210000, 0.000362959, 0.008077800 }, + { -0.001015900, -0.013758100, 0.022686400, -0.027756600, + 0.036182700, -0.053295800, 0.092582800, -0.232883000, + -0.924095000, 0.165592000, -0.086883600, 0.052736100, + -0.031293500, 0.014906200, -0.000165153, -0.008145980 }, + { 0.000994065, 0.013744400, -0.022519100, 0.027379600, + -0.035531800, 0.052224200, -0.090673700, 0.227762000, + 0.926792000, -0.163800000, 0.085902800, -0.052117600, + 0.030868000, -0.014599300, -0.000033699, 0.008214200 }, + { -0.000972545, -0.013729500, 0.022350000, -0.027000200, + 0.034878000, -0.051148700, 0.088760100, -0.222657000, + -0.929443000, 0.161970000, -0.084904600, 0.051489100, + -0.030436600, 0.014289400, 0.000233582, -0.008282450 }, + { 0.000951314, 0.013713600, -0.022179000, 0.026618500, + -0.034221300, 0.050069600, -0.086842200, 0.217568000, + 0.932049000, -0.160102000, 0.083888900, -0.050850500, + 0.029999400, -0.013976600, -0.000434476, 0.008350720 }, + { -0.000930348, -0.013696600, 0.022006200, -0.026234500, + 0.033562000, -0.048986900, 0.084920300, -0.212496000, + -0.934609000, 0.158196000, -0.082855900, 0.050202100, + -0.029556500, 0.013660700, 0.000636364, -0.008419010 }, + { 0.000909621, 0.013678500, -0.021831600, 0.025848400, + -0.032900000, 0.047901000, -0.082994900, 0.207442000, + 0.937123000, -0.156252000, 0.081805600, -0.049543700, + 0.029107700, -0.013341900, -0.000839230, 0.008487300 }, + { -0.000889109, -0.013659300, 0.021655300, -0.025460200, + 0.032235500, -0.046811900, 0.081066100, -0.202406000, + -0.939591000, 0.154269000, -0.080738200, 0.048875600, + -0.028653300, 0.013020200, 0.001043050, -0.008555590 }, + { 0.000868789, 0.013639100, -0.021477300, 0.025070000, + -0.031568700, 0.045719800, -0.079134300, 0.197388000, + 0.942012000, -0.152249000, 0.079653700, -0.048197700, + 0.028193200, -0.012695600, -0.001247820, 0.008623860 }, + { -0.000848641, -0.013617800, 0.021297700, -0.024677800, + 0.030899600, -0.044624900, 0.077199800, -0.192388000, + -0.944386000, 0.150191000, -0.078552200, 0.047510200, + -0.027727600, 0.012368200, 0.001453500, -0.008692120 }, + { 0.000828649, 0.013595500, -0.021116400, 0.024283600, + -0.030228300, 0.043527300, -0.075262900, 0.187407000, + 0.946714000, -0.148095000, 0.077433700, -0.046813100, + 0.027256300, -0.012037900, -0.001660090, 0.008760350 }, + { -0.000808800, -0.013572200, 0.020933400, -0.023887600, + 0.029554900, -0.042427300, 0.073323800, -0.182445000, + -0.948994000, 0.145960000, -0.076298500, 0.046106400, + -0.026779500, 0.011704900, 0.001867560, -0.008828540 }, + { 0.000789086, 0.013547900, -0.020748900, 0.023489900, + -0.028879500, 0.041325100, -0.071382800, 0.177503000, + 0.951226000, -0.143788000, 0.075146500, -0.045390300, + 0.026297300, -0.011369100, -0.002075890, 0.008896700 }, + { -0.000769507, -0.013522500, 0.020562800, -0.023090300, + 0.028202300, -0.040220700, 0.069440400, -0.172581000, + -0.953411000, 0.141578000, -0.073977900, 0.044664700, + -0.025809600, 0.011030600, 0.002285070, -0.008964800 }, + { 0.000750066, 0.013496200, -0.020375300, 0.022689100, + -0.027523400, 0.039114400, -0.067496700, 0.167680000, + 0.955548000, -0.139329000, 0.072792800, -0.043929900, + 0.025316500, -0.010689400, -0.002495060, 0.009032840 }, + { -0.000730774, -0.013469000, 0.020186200, -0.022286300, + 0.026842800, -0.038006300, 0.065552000, -0.162799000, + -0.957637000, 0.137043000, -0.071591300, 0.043185700, + -0.024818200, 0.010345600, 0.002705870, -0.009100820 }, + { 0.000711649, 0.013440700, -0.019995700, 0.021882000, + -0.026160600, 0.036896600, -0.063606700, 0.157939000, + 0.959678000, -0.134719000, 0.070373400, -0.042432500, + 0.024314500, -0.009999080, -0.002917450, 0.009168730 }, + { -0.000692716, -0.013411500, 0.019803700, -0.021476100, + 0.025477100, -0.035785500, 0.061661000, -0.153101000, + -0.961670000, 0.132357000, -0.069139400, 0.041670100, + -0.023805700, 0.009650030, 0.003129790, -0.009236550 }, + { 0.000674005, 0.013381400, -0.019610300, 0.021068900, + -0.024792100, 0.034673200, -0.059715200, 0.148285000, + 0.963614000, -0.129958000, 0.067889200, -0.040898700, + 0.023291700, -0.009298430, -0.003342870, 0.009304290 }, + { -0.000655555, -0.013350400, 0.019415600, -0.020660300, + 0.024106000, -0.033559800, 0.057769600, -0.143491000, + -0.965508000, 0.127520000, -0.066623100, 0.040118300, + -0.022772500, 0.008944310, 0.003556670, -0.009371930 }, + { 0.000637409, 0.013318400, -0.019219600, 0.020250300, + -0.023418700, 0.032445500, -0.055824500, 0.138719000, + 0.967354000, -0.125045000, 0.065341100, -0.039329200, + 0.022248400, -0.008587720, -0.003771170, 0.009439470 }, + { -0.000619615, -0.013285600, 0.019022200, -0.019839200, + 0.022730400, -0.031330400, 0.053880200, -0.133970000, + -0.969150000, 0.122533000, -0.064043400, 0.038531200, + -0.021719200, 0.008228680, 0.003986340, -0.009506900 }, + { 0.000602227, 0.013251800, -0.018823600, 0.019426800, + -0.022041200, 0.030214800, -0.051936900, 0.129244000, + 0.970897000, -0.119983000, 0.062730000, -0.037724600, + 0.021185100, -0.007867250, -0.004202160, 0.009574200 }, + { -0.000585302, -0.013217200, 0.018623700, -0.019013400, + 0.021351200, -0.029098800, 0.049995000, -0.124542000, + -0.972595000, 0.117395000, -0.061401200, 0.036909400, + -0.020646100, 0.007503460, 0.004418620, -0.009641370 }, + { 0.000568899, 0.013181700, -0.018422700, 0.018598800, + -0.020660400, 0.027982600, -0.048054700, 0.119864000, + 0.974242000, -0.114770000, 0.060057000, -0.036085600, + 0.020102300, -0.007137330, -0.004635680, 0.009708410 }, + { -0.000553077, -0.013145400, 0.018220400, -0.018183300, + 0.019969100, -0.026866300, 0.046116200, -0.115210000, + -0.975840000, 0.112107000, -0.058697600, 0.035253500, + -0.019553700, 0.006768930, 0.004853320, -0.009775290 }, + { 0.000537896, 0.013108200, -0.018017000, 0.017766900, + -0.019277200, 0.025750100, -0.044180000, 0.110580000, + 0.977388000, -0.109408000, 0.057323100, -0.034413000, + 0.019000400, -0.006398270, -0.005071530, 0.009842020 }, + { -0.000523415, -0.013070200, 0.017812500, -0.017349500, + 0.018585000, -0.024634100, 0.042246100, -0.105975000, + -0.978886000, 0.106671000, -0.055933600, 0.033564300, + -0.018442500, 0.006025420, 0.005290280, -0.009908570 }, + { 0.000509689, 0.013031400, -0.017606900, 0.016931400, + -0.017892400, 0.023518600, -0.040315000, 0.101396000, + 0.980333000, -0.103897000, 0.054529300, -0.032707500, + 0.017880100, -0.005650390, -0.005509550, 0.009974950 }, + { -0.000496767, -0.012991800, 0.017400200, -0.016512500, + 0.017199700, -0.022403700, 0.038386900, -0.096841900, + -0.981730000, 0.101086000, -0.053110400, 0.031842600, + -0.017313100, 0.005273240, 0.005729310, -0.010041100 }, + { 0.000484696, 0.012951400, -0.017192600, 0.016092900, + -0.016506800, 0.021289600, -0.036462100, 0.092313600, + 0.983076000, -0.098237800, 0.051676900, -0.030969900, + 0.016741600, -0.004894020, -0.005949540, 0.010107100 }, + { -0.000473514, -0.012910200, 0.016983900, -0.015672700, + 0.015814000, -0.020176400, 0.034540800, -0.087811500, + -0.984372000, 0.095353100, -0.050229000, 0.030089200, + -0.016165800, 0.004512750, 0.006170210, -0.010172900 }, + { 0.000463250, 0.012868200, -0.016774300, 0.015251900, + -0.015121200, 0.019064300, -0.032623200, 0.083335800, + 0.985617000, -0.092431700, 0.048766900, -0.029200900, + 0.015585700, -0.004129480, -0.006391310, 0.010238400 }, + { -0.000453924, -0.012825500, 0.016563700, -0.014830600, + 0.014428700, -0.017953500, 0.030709800, -0.078886800, + -0.986810000, 0.089473800, -0.047290800, 0.028304900, + -0.015001400, 0.003744260, 0.006612810, -0.010303700 }, + { 0.000445550, 0.012782000, -0.016352300, 0.014408800, + -0.013736500, 0.016844000, -0.028800600, 0.074464900, + 0.987953000, -0.086479400, 0.045800700, -0.027401400, + 0.014412800, -0.003357130, -0.006834680, 0.010368800 }, + { -0.000438126, -0.012737800, 0.016140000, -0.013986600, + 0.013044600, -0.015736200, 0.026896100, -0.070070300, + -0.989045000, 0.083448600, -0.044296800, 0.026490500, + -0.013820200, 0.002968140, 0.007056910, -0.010433600 }, + { 0.000431643, 0.012692900, -0.015926800, 0.013564100, + -0.012353300, 0.014630100, -0.024996400, 0.065703200, + 0.990085000, -0.080381600, 0.042779400, -0.025572200, + 0.013223500, -0.002577320, -0.007279450, 0.010498200 }, + { -0.000426080, -0.012647200, 0.015712900, -0.013141300, + 0.011662600, -0.013525800, 0.023101700, -0.061364100, + -0.991074000, 0.077278600, -0.041248500, 0.024646800, + -0.012622900, 0.002184730, 0.007502300, -0.010562400 }, + { 0.000421406, 0.012600900, -0.015498200, 0.012718300, + -0.010972500, 0.012423600, -0.021212500, 0.057053200, + 0.992011000, -0.074139500, 0.039704400, -0.023714300, + 0.012018300, -0.001790400, -0.007725430, 0.010626400 }, + { -0.000417579, -0.012553800, 0.015282700, -0.012295200, + 0.010283300, -0.011323700, 0.019328800, -0.052770700, + -0.992897000, 0.070964600, -0.038147100, 0.022774800, + -0.011410000, 0.001394390, 0.007948800, -0.010690100 }, + { 0.000414547, 0.012506100, -0.015066500, 0.011871900, + -0.009594970, 0.010226000, -0.017451100, 0.048517000, + 0.993731000, -0.067754100, 0.036576900, -0.021828400, + 0.010797900, -0.000996745, -0.008172400, 0.010753400 }, + { -0.000412251, -0.012457700, 0.014849700, -0.011448600, + 0.008907620, -0.009130930, 0.015579400, -0.044292300, + -0.994513000, 0.064507900, -0.034994000, 0.020875300, + -0.010182200, 0.000597505, 0.008396190, -0.010816400 }, + { 0.000410621, 0.012408600, -0.014632100, 0.011025300, + -0.008221360, 0.008038500, -0.013714100, 0.040096900, + 0.995244000, -0.061226400, 0.033398500, -0.019915600, + 0.009562800, -0.000196720, -0.008620160, 0.010879200 }, + { -0.000409585, -0.012358900, 0.014414000, -0.010602000, + 0.007536270, -0.006948890, 0.011855400, -0.035931100, + -0.995922000, 0.057909700, -0.031790600, 0.018949300, + -0.008939920, -0.000205562, 0.008844280, -0.010941500 }, + { 0.000409063, 0.012308600, -0.014195300, 0.010178900, + -0.006852470, 0.005862260, -0.010003500, 0.031795100, + 0.996549000, -0.054557800, 0.030170600, -0.017976700, + 0.008313610, 0.000609294, -0.009068510, 0.011003500 }, + { -0.000408972, -0.012257600, 0.013976000, -0.009755990, + 0.006170040, -0.004778750, 0.008158790, -0.027689100, + -0.997124000, 0.051171100, -0.028538400, 0.016997800, + -0.007683930, -0.001014430, 0.009292840, -0.011065200 }, + { 0.000409230, 0.012206100, -0.013756200, 0.009333300, + -0.005489070, 0.003698520, -0.006321360, 0.023613500, + 0.997646000, -0.047749500, 0.026894500, -0.016012700, + 0.007050970, 0.001420910, -0.009517230, 0.011126400 }, + { -0.000409752, -0.012153900, 0.013535900, -0.008910910, + 0.004809650, -0.002621710, 0.004491510, -0.019568500, + -0.998117000, 0.044293400, -0.025238900, 0.015021700, + -0.006414810, -0.001828700, 0.009741670, -0.011187300 }, + { 0.000410456, 0.012101100, -0.013315100, 0.008488880, + -0.004131890, 0.001548470, -0.002669450, 0.015554300, + 0.998535000, -0.040802900, 0.023571800, -0.014024700, + 0.005775510, 0.002237740, -0.009966110, 0.011247800 }, + { -0.000411265, -0.012047800, 0.013093800, -0.008067240, + 0.003455870, -0.000478948, 0.000855436, -0.011571200, + -0.998901000, 0.037278100, -0.021893500, 0.013021900, + -0.005133180, -0.002647990, 0.010190500, -0.011307900 }, + { 0.000412106, 0.011993900, -0.012872200, 0.007646080, + -0.002781680, -0.000586709, 0.000950314, 0.007619390, + 0.999215000, -0.033719300, 0.020204200, -0.012013600, + 0.004487890, 0.003059380, -0.010414900, 0.011367600 }, + { -0.000412915, -0.011939500, 0.012650100, -0.007225430, + 0.002109420, 0.001648360, -0.002747570, -0.003699140, + -0.999477000, 0.030126600, -0.018503900, 0.010999700, + -0.003839710, -0.003471880, 0.010639200, -0.011426900 }, + { 0.000413633, 0.011884500, -0.012427700, 0.006805360, + -0.001439170, -0.002705860, 0.004536100, -0.000189332, + 0.999686000, -0.026500200, 0.016793100, -0.009980360, + 0.003188740, 0.003885440, -0.010863400, 0.011485700 }, + { -0.000414214, -0.011829000, 0.012205000, -0.006385910, + 0.000771025, 0.003759060, -0.006315690, 0.004045800, + -0.999843000, 0.022840300, -0.015071700, 0.008955820, + -0.002535060, -0.004299990, 0.011087500, -0.011544100 }, + { 0.000414623, 0.011773000, -0.011982000, 0.005967150, + -0.000105072, -0.004807840, 0.008086100, -0.007870030, + 0.999948000, -0.019147200, 0.013340200, -0.007926170, + 0.001878750, 0.004715490, -0.011311400, 0.011602000 }, + { -0.000414833, -0.011716500, 0.011758700, -0.005549120, + -0.000558600, 0.005852040, -0.009847130, 0.011661800, + -1.000000000, 0.015420900, -0.011598600, 0.006891530, + -0.001219900, -0.005131880, 0.011535200, -0.011659500 }, +}; + + +#if defined FIR_C_IMPLEMENTATION + +FIR::FIR() +{ + memset(itsDelayLine, 0, sizeof itsDelayLine); +} + + +fcomplex FIR::processNextSample(fcomplex sample, const float weights[NR_TAPS]) +{ + fcomplex sum = sample * weights[0]; + itsDelayLine[0] = sample; + + for (int tap = NR_TAPS; -- tap > 0;) { + sum += weights[tap] * itsDelayLine[tap]; + itsDelayLine[tap] = itsDelayLine[tap - 1]; + } + + return sum; +} + +#endif + +} // namespace RTCP +} // namespace LOFAR diff --git a/RTCP/CNProc/src/FIR.h b/RTCP/CNProc/src/FIR.h new file mode 100644 index 0000000000000000000000000000000000000000..1a6e3fe2ce30a4e449dcf3dcb9a267d60a8c1ce8 --- /dev/null +++ b/RTCP/CNProc/src/FIR.h @@ -0,0 +1,32 @@ +#ifndef LOFAR_CNPROC_FIR_H +#define LOFAR_CNPROC_FIR_H + +#if 0 || !(defined HAVE_BGL || defined HAVE_BGP) +#define FIR_C_IMPLEMENTATION +#endif + +#include <Common/lofar_complex.h> + +#include <Interface/Config.h> + + +namespace LOFAR { +namespace RTCP { + +class FIR { + public: +#if defined FIR_C_IMPLEMENTATION + FIR(); + + fcomplex processNextSample(fcomplex sample, const float weights[NR_TAPS]); + + fcomplex itsDelayLine[NR_TAPS]; +#endif + + static const float weights[256][NR_TAPS]; +}; + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/CNProc/src/FIR_Asm.S b/RTCP/CNProc/src/FIR_Asm.S new file mode 100644 index 0000000000000000000000000000000000000000..fe80bb889954dd55cb19b6cc31bafb09a95ba793 --- /dev/null +++ b/RTCP/CNProc/src/FIR_Asm.S @@ -0,0 +1,2495 @@ +# FIR.S: FIR filter assembly for BG/L double Hummer # +# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# $Id$ + +#if defined HAVE_BGL || defined HAVE_BGP +#define CN_PROCESSING +#include <Interface/Config.h> + +#define I4COMPLEX_SIZE 1 +#define I8COMPLEX_SIZE 2 +#define I16COMPLEX_SIZE 4 +#define FCOMPLEX_SIZE 8 +#define DCOMPLEX_SIZE 16 +#define FLOAT_SIZE 4 + +.global _FIR_constants_used +_FIR_constants_used: + .long NR_TAPS + .long NR_POLARIZATIONS + + +.align 5 +.global _cn_mutex_lock +_cn_mutex_lock: + sync +0: lwz 0,0(3) + cmpwi 0,0 + bne- 0b + sync + blr + +.global _cn_mutex_unlock +_cn_mutex_unlock: + sync + li 0,0 + stw 0,0(3) + sync + blr + +.global _rdtsc +_rdtsc: +0: mfspr 3,269 + mfspr 4,268 + mfspr 5,269 + cmpw 5,3 + bne- 0b + blr + +.align 5 +.global _prefetch +_prefetch: + mtctr 4 + +0: + dcbt 0,3 + add 3,3,5 + bdnz 0b + + blr + + +.align 5 +.global _transpose_4x8 +_transpose_4x8: + + mulli 8,6,3 + mtctr 5 + subi 7,7,3*FCOMPLEX_SIZE + li 9,8 + subfic 8,8,FCOMPLEX_SIZE + + sub 3,3,7 + sub 4,4,8 + +L4: + lfpsux 0,4,8 + lfpsux 1,4,6 + lfpsux 2,4,6 + lfpsux 3,4,6 + +#if !defined HAVE_BGP + dcbz 3,7 +#endif + stfpsux 0,3,7 + stfpsux 1,3,9 + stfpsux 2,3,9 + stfpsux 3,3,9 + + bdnz L4 + + blr + + +.align 5 +.global _phase_shift_and_transpose +_phase_shift_and_transpose: + +# Performs partial transpose between FFT and correlator and multiplies +# phase shifts on the fly. +# The outputs of four FFTs are transposed: two time slots for each of +# the two polarizations. + +# arguments: +# r3 : fcomplex *out +# r4 : const fcomplex *in +# r5 : const dcomplex phases[2][2]: v0, delta_v0, v1, delta_v1 +# r6 : stride +# r7 : nrChannels + +# f8 : v0; +# f9 : delta_v0 +# f10 : v1 +# f11 : delta_v1 + + li 9,-16 # save registers + stfpdux 14,1,9 + stfpdux 15,1,9 + stfpdux 16,1,9 + stfpdux 17,1,9 + + lfpdx 8,0,5 + li 9,16 + lfpdux 9,5,9 + lfpdux 10,5,9 + lfpdux 11,5,9 + + mulli 8,7,24 + srwi 0,7,1 # nrChannels / 2 + slwi 7,7,3 # nrChannels * FCOMPLEX_SIZE + li 10,FCOMPLEX_SIZE + mtctr 0 + + lfpsx 0,0,4 + lfpsux 1,4,7 + lfpsux 2,4,7 + lfpsux 3,4,7 + + sub 3,3,6 + subfic 8,8,8 # FCOMPLEX_SIZE*(1-3*nrChannels) + + fxpmul 12,0,8 + fxpmul 13,1,8 + fxpmul 14,2,10 + fxpmul 15,3,10 + +L5: + fxpmul 16,8,9 + fxpmul 17,10,11 + + fxcxnpma 0,0,8,12 ; lfpsux 4,4,8 + fxcxnpma 1,1,8,13 ; lfpsux 5,4,7 + fxcxnpma 2,2,10,14 ; lfpsux 6,4,7 + fxcxnpma 3,3,10,15 ; lfpsux 7,4,7 + + fxcxnpma 8,8,9,16 +#if defined HAVE_BGP + fxcxnpma 10,10,11,17 +#else + fxcxnpma 10,10,11,17 ; dcbz 3,6 +#endif + + fxpmul 12,4,8 ; stfpsux 0,3,6 + fxpmul 13,5,8 ; stfpsux 1,3,10 + fxpmul 14,6,10 ; stfpsux 2,3,10 + fxpmul 15,7,10 ; stfpsux 3,3,10 + + fxpmul 16,8,9 + fxpmul 17,10,11 + + fxcxnpma 4,4,8,12 ; lfpsux 0,4,8 + fxcxnpma 5,5,8,13 ; lfpsux 1,4,7 + fxcxnpma 6,6,10,14 ; lfpsux 2,4,7 + fxcxnpma 7,7,10,15 ; lfpsux 3,4,7 + + fxcxnpma 8,8,9,16 +#if defined HAVE_BGP + fxcxnpma 10,10,11,17 +#else + fxcxnpma 10,10,11,17 ; dcbz 3,6 +#endif + + fxpmul 12,0,8 ; stfpsux 4,3,6 + fxpmul 13,1,8 ; stfpsux 5,3,10 + fxpmul 14,2,10 ; stfpsux 6,3,10 + fxpmul 15,3,10 ; stfpsux 7,3,10 + + bdnz L5 + + lfpdx 17,0,1 # restore registers + lfpdux 16,1,9 + lfpdux 15,1,9 + lfpdux 14,1,9 + addi 1,1,16 + + blr + + +# this is a union of all possible constant tables + +.global _ZN5LOFAR4RTCP13_FIR_fp_tableE +.comm _ZN5LOFAR4RTCP13_FIR_fp_tableE,256*256*FCOMPLEX_SIZE,32 + + + +# filter for i4complex + +.global _ZN5LOFAR4RTCP7_filterINS_5TYPES9i4complexEEEvjPKfPKT_PSt7complexIfEi +_ZN5LOFAR4RTCP7_filterINS_5TYPES9i4complexEEEvjPKfPKT_PSt7complexIfEi: +# filters all samples for one station, one polarization + +# arguments: +# r3 : nrChannels +# r4 : pointer to weights line (const fcomplex[16]) +# r5 : pointer to first sample (const i4complex[16*r7]) +# r6 : pointer to result (fcomplex *) +# r7 : number of samples / 16 + +# internally used: +# r9 : 8 +# r10 : 512 +# r11 : pointer to _ZN5LOFAR4RTCP13_FIR_fp_tableE +# r28-r31 : prefetched samples +# f0-f15 : delay line (real in primary, imaginary in secondary unit) +# f16-f23 : weights (these are real values alternately stored in primary +# and secondary units) +# f24-f29 : sums + +# The implementation works on 5 or 6 time samples concurrently, to avoid +# stalls in the double hummer. This unfortunately leads to totally +# incomprehensible code. The loop processes 16 samples at a time. +# The input is converted from i4complex to dcomplex by a table lookup. + + subi 1,1,272 # adjust stack pointer + + mtctr 7 # set number of iterations + + la 8,16(1) # save call-saved registers + li 9,DCOMPLEX_SIZE + stfpdx 14,0,8 + stfpdux 15,8,9 + stfpdux 16,8,9 + stfpdux 17,8,9 + stfpdux 18,8,9 + stfpdux 19,8,9 + stfpdux 20,8,9 + stfpdux 21,8,9 + stfpdux 22,8,9 + stfpdux 23,8,9 + stfpdux 24,8,9 + stfpdux 25,8,9 + stfpdux 26,8,9 + stfpdux 27,8,9 + stfpdux 28,8,9 + stfpdux 29,8,9 + stmw 28,0(1) # save r28 ... r31 + + # convert 15 i4complex numbers to fcomplex + slwi 10,3,2 # I4COMPLEX_SIZE*nrChannels*NR_POLARIZATIONS + ; lbzx 29,0,5 # fetch FIR history samples + lis 11,_ZN5LOFAR4RTCP13_FIR_fp_tableE@ha ; lbzux 30,5,10 + la 11,_ZN5LOFAR4RTCP13_FIR_fp_tableE@l(11);lbzux 31,5,10 + slwi 29,29,3 ; lbzux 28,5,10 + ; lfpsx 1,11,29 + slwi 30,30,3 ; lbzux 29,5,10 + ; lfpsx 2,11,30 + slwi 31,31,3 ; lbzux 30,5,10 + ; lfpsx 3,11,31 + slwi 28,28,3 ; lbzux 31,5,10 + ; lfpsx 4,11,28 + slwi 29,29,3 ; lbzux 28,5,10 + ; lfpsx 5,11,29 + slwi 30,30,3 ; lbzux 29,5,10 + ; lfpsx 6,11,30 + slwi 31,31,3 ; lbzux 30,5,10 + ; lfpsx 7,11,31 + slwi 28,28,3 ; lbzux 31,5,10 + ; lfpsx 8,11,28 + slwi 29,29,3 ; lbzux 28,5,10 + ; lfpsx 9,11,29 + slwi 30,30,3 ; lbzux 29,5,10 + ; lfpsx 10,11,30 + slwi 31,31,3 ; lbzux 30,5,10 + ; lfpsx 11,11,31 + slwi 28,28,3 ; lbzux 31,5,10 + slwi 29,29,3 ; lfpsx 12,11,28 + slwi 30,30,3 ; lfpsx 13,11,29 + slwi 31,31,3 ; lfpsx 14,11,30 + ; lfpsx 15,11,31 + + li 9,FCOMPLEX_SIZE ; lfpsx 16,0,4 # load weights line + sub 6,6,9 ; lfpsux 17,4,9 + ; lfpsux 18,4,9 + ; lfpsux 19,4,9 + ; lfpsux 20,4,9 + ; lfpsux 21,4,9 + # essentially part of loop, but interleaved with + # the stores in the tail of the loop + fxsmul 24,16,15 ; lfpsux 22,4,9 + fxpmul 25,17,15 ; lfpsux 23,4,9 + + fxsmul 26,17,15 ; lbzux 28,5,10 # prefetch samples + fxpmul 27,18,15 ; lbzux 29,5,10 + fxsmul 28,18,15 ; lbzux 30,5,10 + fxpmul 29,19,15 ; lbzux 31,5,10 +0: + # time steps 0-5 + + fxcsmadd 24,23,1,24 ; slwi 28,28,3 + fxcpmadd 25,20,9,25 ; lfpsx 0,11,28 + fxcsmadd 26,20,9,26 + fxcpmadd 27,21,9,27 + fxcsmadd 28,21,9,28 + fxcpmadd 29,22,9,29 ; lbzux 28,5,10 + + fxcpmadd 24,23,2,24 + fxcsmadd 25,23,2,25 + fxcpmadd 26,20,10,26 + fxcsmadd 27,20,10,27 + fxcpmadd 28,21,10,28 + fxcsmadd 29,21,10,29 + + fxcsmadd 24,22,3,24 ; slwi 29,29,3 + fxcpmadd 25,23,3,25 ; lfpsx 1,11,29 + fxcsmadd 26,23,3,26 + fxcpmadd 27,20,11,27 + fxcsmadd 28,20,11,28 + fxcpmadd 29,21,11,29 ; lbzux 29,5,10 + + fxcpmadd 24,22,4,24 + fxcsmadd 25,22,4,25 + fxcpmadd 26,23,4,26 + fxcsmadd 27,23,4,27 + fxcpmadd 28,20,12,28 + fxcsmadd 29,20,12,29 + + fxcsmadd 24,21,5,24 ; slwi 30,30,3 + fxcpmadd 25,22,5,25 ; lfpsx 2,11,30 + fxcsmadd 26,22,5,26 + fxcpmadd 27,23,5,27 + fxcsmadd 28,23,5,28 + fxcpmadd 29,20,13,29 ; lbzux 30,5,10 + + fxcpmadd 24,21,6,24 + fxcsmadd 25,21,6,25 + fxcpmadd 26,22,6,26 + fxcsmadd 27,22,6,27 + fxcpmadd 28,23,6,28 + fxcsmadd 29,23,6,29 + + fxcsmadd 24,20,7,24 ; slwi 31,31,3 + fxcpmadd 25,21,7,25 ; lfpsx 3,11,31 + fxcsmadd 26,21,7,26 + fxcpmadd 27,22,7,27 + fxcsmadd 28,22,7,28 + fxcpmadd 29,23,7,29 ; lbzux 31,5,10 + + fxcpmadd 24,20,8,24 + fxcsmadd 25,20,8,25 + fxcpmadd 26,21,8,26 + fxcsmadd 27,21,8,27 + fxcpmadd 28,22,8,28 + fxcsmadd 29,22,8,29 + + fxcpmadd 24,17,14,24 ; slwi 28,28,3 + fxcsmadd 25,17,14,25 ; lfpsx 4,11,28 + fxcpmadd 26,18,14,26 + fxcsmadd 27,18,14,27 + fxcpmadd 28,19,14,28 + fxcsmadd 29,19,14,29 ; lbzux 28,5,10 + + fxcpmadd 24,16,0,24 + fxcsmadd 25,16,0,25 + fxcpmadd 26,17,0,26 + fxcsmadd 27,17,0,27 + fxcpmadd 28,18,0,28 + fxcsmadd 29,18,0,29 + + fxcsmadd 24,19,9,24 ; slwi 29,29,3 + fxcpmadd 25,16,1,25 ; lfpsx 5,11,29 + fxcsmadd 26,16,1,26 + fxcpmadd 27,17,1,27 + fxcsmadd 28,17,1,28 + fxcpmadd 29,18,1,29 ; lbzux 29,5,10 + + fxcpmadd 24,19,10,24 + fxcsmadd 25,19,10,25 + fxcpmadd 26,16,2,26 + fxcsmadd 27,16,2,27 + fxcpmadd 28,17,2,28 + fxcsmadd 29,17,2,29 + + fxcsmadd 24,18,11,24 + fxcpmadd 25,19,11,25 + fxcsmadd 26,19,11,26 + fxcpmadd 27,16,3,27 + fxcsmadd 28,16,3,28 + fxcpmadd 29,17,3,29 + + fxcpmadd 24,18,12,24 + fxcsmadd 25,18,12,25 + fxcpmadd 26,19,12,26 + fxcsmadd 27,19,12,27 + fxcpmadd 28,16,4,28 + fxcsmadd 29,16,4,29 + + fxcsmadd 24,17,13,24 + fxcpmadd 25,18,13,25 + fxcsmadd 26,18,13,26 + fxcpmadd 27,19,13,27 + fxcsmadd 28,19,13,28 ; stfpsux 24,6,9 + fxcpmadd 29,16,5,29 ; stfpsux 25,6,9 + + + # time steps 6-10 + + fxsmul 24,21,11 ; stfpsux 26,6,9 + fxpmul 25,22,11 ; stfpsux 27,6,9 + fxsmul 26,22,11 ; stfpsux 28,6,9 + fxpmul 27,23,11 ; stfpsux 29,6,9 + fxsmul 28,23,11 + + fxcpmadd 24,17,4,24 + fxcsmadd 25,17,4,25 + fxcpmadd 26,18,4,26 + fxcsmadd 27,18,4,27 + fxcpmadd 28,19,4,28 + + fxcsmadd 24,20,13,24 ; slwi 30,30,3 + fxcpmadd 25,21,13,25 ; lfpsx 6,11,30 + fxcsmadd 26,21,13,26 ; lbzux 30,5,10 + fxcpmadd 27,22,13,27 + fxcsmadd 28,22,13,28 + + fxcsmadd 24,16,5,24 + fxcpmadd 25,17,5,25 + fxcsmadd 26,17,5,26 + fxcpmadd 27,18,5,27 + fxcsmadd 28,18,5,28 + + fxcpmadd 24,20,14,24 + fxcsmadd 25,20,14,25 + fxcpmadd 26,21,14,26 + fxcsmadd 27,21,14,27 + fxcpmadd 28,22,14,28 + + fxcpmadd 24,16,6,24 + fxcsmadd 25,16,6,25 + fxcpmadd 26,17,6,26 + fxcsmadd 27,17,6,27 + fxcpmadd 28,18,6,28 ; slwi 31,31,3 + + fxcsmadd 24,23,7,24 ; lfpsx 7,11,31 + fxcpmadd 25,20,15,25 ; lbzux 31,5,10 + fxcsmadd 26,20,15,26 + fxcpmadd 27,21,15,27 + fxcsmadd 28,21,15,28 + + fxcsmadd 24,19,15,24 + fxcpmadd 25,16,7,25 + fxcsmadd 26,16,7,26 + fxcpmadd 27,17,7,27 + fxcsmadd 28,17,7,28 + + fxcpmadd 24,23,8,24 ; slwi 28,28,3 + fxcsmadd 25,23,8,25 ; lfpsx 8,11,28 + fxcpmadd 26,20,0,26 ; lbzux 28,5,10 + fxcsmadd 27,20,0,27 + fxcpmadd 28,21,0,28 + + fxcpmadd 24,19,0,24 + fxcsmadd 25,19,0,25 + fxcpmadd 26,16,8,26 + fxcsmadd 27,16,8,27 + fxcpmadd 28,17,8,28 + + fxcsmadd 24,17,3,24 + fxcpmadd 25,18,3,25 + fxcsmadd 26,18,3,26 + fxcpmadd 27,19,3,27 + fxcsmadd 28,19,3,28 + + fxcsmadd 24,22,9,24 + fxcpmadd 25,23,9,25 ; slwi 29,29,3 + fxcsmadd 26,23,9,26 ; lfpsx 9,11,29 + fxcpmadd 27,20,1,27 ; lbzux 29,5,10 + fxcsmadd 28,20,1,28 + + fxcsmadd 24,18,1,24 + fxcpmadd 25,19,1,25 + fxcsmadd 26,19,1,26 + fxcpmadd 27,16,9,27 + fxcsmadd 28,16,9,28 + + fxcpmadd 24,21,12,24 + fxcsmadd 25,21,12,25 + fxcpmadd 26,22,12,26 + fxcsmadd 27,22,12,27 + fxcpmadd 28,23,12,28 + + fxcpmadd 24,22,10,24 + fxcsmadd 25,22,10,25 + fxcpmadd 26,23,10,26 ; slwi 30,30,3 + fxcsmadd 27,23,10,27 ; lfpsx 10,11,30 + fxcpmadd 28,20,2,28 ; lbzux 30,5,10 + + fxcpmadd 24,18,2,24 + fxcsmadd 25,18,2,25 + fxcpmadd 26,19,2,26 + fxcsmadd 27,19,2,27 + fxcpmadd 28,16,10,28 + + + # time steps 11-15 + ; stfpsux 24,6,9 + fxsmul 24,21,0 ; stfpsux 25,6,9 + fxpmul 25,22,0 ; stfpsux 26,6,9 + fxsmul 26,22,0 ; stfpsux 27,6,9 + fxpmul 27,23,0 ; stfpsux 28,6,9 + fxsmul 28,23,0 + + fxcpmadd 24,17,9,24 + fxcsmadd 25,17,9,25 + fxcpmadd 26,18,9,26 + fxcsmadd 27,18,9,27 + fxcpmadd 28,19,9,28 + + fxcsmadd 24,20,2,24 ; slwi 31,31,3 + fxcpmadd 25,21,2,25 ; lfpsx 11,11,31 + fxcsmadd 26,21,2,26 ; lbzux 31,5,10 + fxcpmadd 27,22,2,27 + fxcsmadd 28,22,2,28 + + fxcsmadd 24,16,10,24 + fxcpmadd 25,17,10,25 + fxcsmadd 26,17,10,26 + fxcpmadd 27,18,10,27 + fxcsmadd 28,18,10,28 + + fxcpmadd 24,20,3,24 + fxcsmadd 25,20,3,25 + fxcpmadd 26,21,3,26 + fxcsmadd 27,21,3,27 + fxcpmadd 28,22,3,28 + + fxcpmadd 24,16,11,24 + fxcsmadd 25,16,11,25 + fxcpmadd 26,17,11,26 + fxcsmadd 27,17,11,27 + fxcpmadd 28,18,11,28 ; slwi 28,28,3 + + fxcsmadd 24,23,12,24 ; lfpsx 12,11,28 + fxcpmadd 25,20,4,25 ; lbzux 28,5,10 + fxcsmadd 26,20,4,26 + fxcpmadd 27,21,4,27 + fxcsmadd 28,21,4,28 + + fxcsmadd 24,19,4,24 + fxcpmadd 25,16,12,25 + fxcsmadd 26,16,12,26 + fxcpmadd 27,17,12,27 + fxcsmadd 28,17,12,28 + + fxcpmadd 24,23,13,24 ; slwi 29,29,3 + fxcsmadd 25,23,13,25 ; lfpsx 13,11,29 + fxcpmadd 26,20,5,26 ; lbzux 29,5,10 + fxcsmadd 27,20,5,27 + fxcpmadd 28,21,5,28 + + fxcsmadd 24,17,8,24 + fxcpmadd 25,18,8,25 + fxcsmadd 26,18,8,26 + fxcpmadd 27,19,8,27 + fxcsmadd 28,19,8,28 + + fxcpmadd 24,19,5,24 + fxcsmadd 25,19,5,25 + fxcpmadd 26,16,13,26 + fxcsmadd 27,16,13,27 + fxcpmadd 28,17,13,28 + + fxcsmadd 24,22,14,24 + fxcpmadd 25,23,14,25 ; slwi 30,30,3 + fxcsmadd 26,23,14,26 ; lfpsx 14,11,30 + fxcpmadd 27,20,6,27 ; lbzux 30,5,10 + fxcsmadd 28,20,6,28 + + fxcsmadd 24,18,6,24 + fxcpmadd 25,19,6,25 + fxcsmadd 26,19,6,26 + fxcpmadd 27,16,14,27 + fxcsmadd 28,16,14,28 + + fxcpmadd 24,21,1,24 + fxcsmadd 25,21,1,25 + fxcpmadd 26,22,1,26 + fxcsmadd 27,22,1,27 + fxcpmadd 28,23,1,28 + + fxcpmadd 24,22,15,24 + fxcsmadd 25,22,15,25 + fxcpmadd 26,23,15,26 ; slwi 31,31,3 + fxcsmadd 27,23,15,27 ; lfpsx 15,11,31 + fxcpmadd 28,20,7,28 ; lbzux 31,5,10 + + fxcpmadd 24,18,7,24 + fxcsmadd 25,18,7,25 + fxcpmadd 26,19,7,26 + fxcsmadd 27,19,7,27 + fxcpmadd 28,16,15,28 + + fxpmul 29,19,15 ; stfpsux 24,6,9 + fxsmul 24,16,15 ; stfpsux 25,6,9 + fxpmul 25,17,15 ; stfpsux 26,6,9 + fxsmul 26,17,15 ; stfpsux 27,6,9 + fxpmul 27,18,15 ; stfpsux 28,6,9 + fxsmul 28,18,15 + + bdnz 0b + + la 8,16(1) # restore call-saved registers + li 9,DCOMPLEX_SIZE + + lfpdx 14,0,8 + lfpdux 15,8,9 + lfpdux 16,8,9 + lfpdux 17,8,9 + lfpdux 18,8,9 + lfpdux 19,8,9 + lfpdux 20,8,9 + lfpdux 21,8,9 + lfpdux 22,8,9 + lfpdux 23,8,9 + lfpdux 24,8,9 + lfpdux 25,8,9 + lfpdux 26,8,9 + lfpdux 27,8,9 + lfpdux 28,8,9 + lfpdux 29,8,9 + lmw 28,0(1) # restore r28 ... r31 + + addi 1,1,272 # restore stack pointer + blr # return + + +# filter for i8complex + +.global _ZN5LOFAR4RTCP7_filterISt7complexIcEEEvjPKfPKT_PS2_IfEi +_ZN5LOFAR4RTCP7_filterISt7complexIcEEEvjPKfPKT_PS2_IfEi: +# filters all samples for one station, one polarization + +# arguments: +# r3 : nrChannels +# r4 : pointer to weights line (const fcomplex[16]) +# r5 : pointer to first sample (const i8complex[16*r7]) +# r6 : pointer to result (fcomplex *) +# r7 : number of samples / 16 + +# internally used: +# r9 : 8 +# r10 : 1024 +# r11 : ptr to constant table +# r28-r31 : prefetched samples +# f0-f15 : delay line (real in primary, imaginary in secondary unit) +# f16-f23 : weights (these are real values alternately stored in primary +# and secondary units) +# f24-f29 : sums + +# The implementation works on 5 or 6 time samples concurrently, to avoid +# stalls in the double hummer. This unfortunately leads to totally +# incomprehensible code. The loop processes 16 samples at a time. +# The input is converted from int16complex to dcomplex by black magic, +# making the code even harder to understand. + + mtctr 7 # set number of iterations + + li 9,-DCOMPLEX_SIZE + stfpdux 14,1,9 # save call-saved registers + stfpdux 15,1,9 + stfpdux 16,1,9 + stfpdux 17,1,9 + stfpdux 18,1,9 + stfpdux 19,1,9 + stfpdux 20,1,9 + stfpdux 21,1,9 + stfpdux 22,1,9 + stfpdux 23,1,9 + stfpdux 24,1,9 + stfpdux 25,1,9 + stfpdux 26,1,9 + stfpdux 27,1,9 + stfpdux 28,1,9 + stfpdux 29,1,9 + stfpdux 30,1,9 + stfpdux 31,1,9 + + subi 1,1,16 + stmw 28,0(1) # save r28 ... r31 + + lis 11,_ZN5LOFAR4RTCP13_FIR_fp_tableE@ha + li 9,FCOMPLEX_SIZE + la 11,_ZN5LOFAR4RTCP13_FIR_fp_tableE@l(11) + slwi 10,3,2 #I8COMPLEX_SIZE*nrChannels*NR_POLARIZATIONS + subi 6,6,FCOMPLEX_SIZE + + +#if 0 + lfpsx 1,0,3 # load delay line + lfpsux 2,3,9 + lfpsux 3,3,9 + lfpsux 4,3,9 + lfpsux 5,3,9 + lfpsux 6,3,9 + lfpsux 7,3,9 + lfpsux 8,3,9 + lfpsux 9,3,9 + lfpsux 10,3,9 + lfpsux 11,3,9 + lfpsux 12,3,9 + lfpsux 13,3,9 + lfpsux 14,3,9 + lfpsux 15,3,9 +#endif + + # convert 15 i16complex numbers to fcomplex + lhzx 29,0,5 # fetch FIR history samples + lhzux 30,5,10 + lhzux 31,5,10 + lhzux 28,5,10 + + rlwinm 12,29,3,13,28 + lfpsx 1,11,12 + lhzux 29,5,10 + + rlwinm 12,30,3,13,28 + lfpsx 2,11,12 + lhzux 30,5,10 + + rlwinm 12,31,3,13,28 + lfpsx 3,11,12 + lhzux 31,5,10 + + rlwinm 12,28,3,13,28 + lfpsx 4,11,12 + lhzux 28,5,10 + + rlwinm 12,29,3,13,28 + lfpsx 5,11,12 + lhzux 29,5,10 + + rlwinm 12,30,3,13,28 + lfpsx 6,11,12 + lhzux 30,5,10 + + rlwinm 12,31,3,13,28 + lfpsx 7,11,12 + lhzux 31,5,10 + + rlwinm 12,28,3,13,28 + lfpsx 8,11,12 + lhzux 28,5,10 + + rlwinm 12,29,3,13,28 + lfpsx 9,11,12 + lhzux 29,5,10 + + rlwinm 12,30,3,13,28 + lfpsx 10,11,12 + lhzux 30,5,10 + + rlwinm 12,31,3,13,28 + lfpsx 11,11,12 + lhzux 31,5,10 + + rlwinm 12,28,3,13,28 + lfpsx 12,11,12 + lhzux 28,5,10 # prefetch samples before entering loop + + rlwinm 12,29,3,13,28 + lfpsx 13,11,12 + lhzux 29,5,10 + + rlwinm 12,30,3,13,28 + lfpsx 14,11,12 + lhzux 30,5,10 + + rlwinm 12,31,3,13,28 + lfpsx 15,11,12 + lhzux 31,5,10 + + ; lfpsx 16,0,4 # load weights line + ; lfpsux 17,4,9 + ; lfpsux 18,4,9 + ; lfpsux 19,4,9 + ; lfpsux 20,4,9 + ; lfpsux 21,4,9 + ; lfpsux 22,4,9 + ; lfpsux 23,4,9 + + # essentially part of loop, but interleaved with + # the stores in the tail of the loop + fxpmul 24,20,8 + fxsmul 25,20,8 + fxpmul 26,21,8 + fxsmul 27,21,8 + fxpmul 28,22,8 + fxsmul 29,22,8 + fxpmul 30,23,8 + fxsmul 31,23,8 + +0: + # time steps 0-7 + + # load 0 + + fxcsmadd 24,23,1,24 ; rlwinm 12,28,3,13,28 + # load 1 + fxcpmadd 25,20,9,25 ; lfpsx 0,11,12 + fxcsmadd 26,20,9,26 ; lhzux 28,5,10 + fxcpmadd 27,21,9,27 + fxcsmadd 28,21,9,28 + fxcpmadd 29,22,9,29 + fxcsmadd 30,22,9,30 + fxcpmadd 31,23,9,31 + + fxcpmadd 24,23,2,24 ; rlwinm 12,29,3,13,28 + fxcsmadd 25,23,2,25 ; lfpsx 1,11,12 + # load 2 + fxcpmadd 26,20,10,26 ; lhzux 29,5,10 + fxcsmadd 27,20,10,27 + fxcpmadd 28,21,10,28 + fxcsmadd 29,21,10,29 + fxcpmadd 30,22,10,30 + fxcsmadd 31,22,10,31 + + fxcsmadd 24,22,3,24 ; rlwinm 12,30,3,13,28 + fxcpmadd 25,23,3,25 ; lfpsx 2,11,12 + fxcsmadd 26,23,3,26 ; lhzux 30,5,10 + # load 3 + fxcpmadd 27,20,11,27 + fxcsmadd 28,20,11,28 + fxcpmadd 29,21,11,29 + fxcsmadd 30,21,11,30 + fxcpmadd 31,22,11,31 + + fxcpmadd 24,22,4,24 ; rlwinm 12,31,3,13,28 + fxcsmadd 25,22,4,25 ; lfpsx 3,11,12 + fxcpmadd 26,23,4,26 ; lhzux 31,5,10 + fxcsmadd 27,23,4,27 + # load 4 + fxcpmadd 28,20,12,28 + fxcsmadd 29,20,12,29 + fxcpmadd 30,21,12,30 + fxcsmadd 31,21,12,31 + + fxcsmadd 24,21,5,24 + fxcpmadd 25,22,5,25 + fxcsmadd 26,22,5,26 + fxcpmadd 27,23,5,27 + fxcsmadd 28,23,5,28 + # load 5 + fxcpmadd 29,20,13,29 + fxcsmadd 30,20,13,30 + fxcpmadd 31,21,13,31 + + fxcpmadd 24,21,6,24 + fxcsmadd 25,21,6,25 + fxcpmadd 26,22,6,26 + fxcsmadd 27,22,6,27 + fxcpmadd 28,23,6,28 + fxcsmadd 29,23,6,29 + # load 6 + fxcpmadd 30,20,14,30 + fxcsmadd 31,20,14,31 + + fxcsmadd 24,20,7,24 + fxcpmadd 25,21,7,25 + fxcsmadd 26,21,7,26 + fxcpmadd 27,22,7,27 ; rlwinm 12,28,3,13,28 + fxcsmadd 28,22,7,28 ; lfpsx 4,11,12 + fxcpmadd 29,23,7,29 ; lhzux 28,5,10 + fxcsmadd 30,23,7,30 + # load 7 + fxcpmadd 31,20,15,31 + + # after loads + fxcpmadd 24,16,0,24 + fxcsmadd 25,16,0,25 + fxcpmadd 26,17,0,26 + fxcsmadd 27,17,0,27 + fxcpmadd 28,18,0,28 + fxcsmadd 29,18,0,29 + fxcpmadd 30,19,0,30 + fxcsmadd 31,19,0,31 ; rlwinm 12,29,3,13,28 + + fxcsmadd 24,19,9,24 ; lfpsx 5,11,12 + fxcpmadd 25,16,1,25 ; lhzux 29,5,10 + fxcsmadd 26,16,1,26 + fxcpmadd 27,17,1,27 + fxcsmadd 28,17,1,28 + fxcpmadd 29,18,1,29 + fxcsmadd 30,18,1,30 + fxcpmadd 31,19,1,31 + + fxcpmadd 24,19,10,24 + fxcsmadd 25,19,10,25 + fxcpmadd 26,16,2,26 + fxcsmadd 27,16,2,27 ; rlwinm 12,30,3,13,28 + fxcpmadd 28,17,2,28 ; lfpsx 6,11,12 + fxcsmadd 29,17,2,29 ; lhzux 30,5,10 + fxcpmadd 30,18,2,30 + fxcsmadd 31,18,2,31 + + fxcsmadd 24,18,11,24 + fxcpmadd 25,19,11,25 + fxcsmadd 26,19,11,26 + fxcpmadd 27,16,3,27 + fxcsmadd 28,16,3,28 + fxcpmadd 29,17,3,29 + fxcsmadd 30,17,3,30 + fxcpmadd 31,18,3,31 ; rlwinm 12,31,3,13,28 + + fxcpmadd 24,18,12,24 ; lfpsx 7,11,12 + fxcsmadd 25,18,12,25 ; lhzux 31,5,10 + fxcpmadd 26,19,12,26 + fxcsmadd 27,19,12,27 + fxcpmadd 28,16,4,28 + fxcsmadd 29,16,4,29 + fxcpmadd 30,17,4,30 + fxcsmadd 31,17,4,31 + + fxcsmadd 24,17,13,24 + fxcpmadd 25,18,13,25 + fxcsmadd 26,18,13,26 + fxcpmadd 27,19,13,27 + fxcsmadd 28,19,13,28 + fxcpmadd 29,16,5,29 + fxcsmadd 30,16,5,30 + fxcpmadd 31,17,5,31 ; rlwinm 12,28,3,13,28 + + fxcpmadd 24,17,14,24 ; lfpsx 8,11,12 + fxcsmadd 25,17,14,25 ; lhzux 28,5,10 + fxcpmadd 26,18,14,26 + fxcsmadd 27,18,14,27 + fxcpmadd 28,19,14,28 + fxcsmadd 29,19,14,29 + fxcpmadd 30,16,6,30 + fxcsmadd 31,16,6,31 + + fxcsmadd 24,16,15,24 + fxcpmadd 25,17,15,25 + fxcsmadd 26,17,15,26 + fxcpmadd 27,18,15,27 + fxcsmadd 28,18,15,28 + fxcpmadd 29,19,15,29 ; stfpsux 24,6,9 + fxcsmadd 30,19,15,30 ; stfpsux 25,6,9 + fxcpmadd 31,16,7,31 ; stfpsux 26,6,9 + + + # time steps 8-15 + fxpmul 24,20,0 + fxsmul 25,20,0 + fxpmul 26,21,0 ; stfpsux 27,6,9 + fxsmul 27,21,0 ; stfpsux 28,6,9 + fxpmul 28,22,0 ; stfpsux 29,6,9 + fxsmul 29,22,0 ; stfpsux 30,6,9 + fxpmul 30,23,0 ; stfpsux 31,6,9 + fxsmul 31,23,0 + + # already loaded 8 + + fxcsmadd 24,23,9,24 + # load 9 + fxcpmadd 25,20,1,25 + fxcsmadd 26,20,1,26 + fxcpmadd 27,21,1,27 + fxcsmadd 28,21,1,28 + fxcpmadd 29,22,1,29 + fxcsmadd 30,22,1,30 + fxcpmadd 31,23,1,31 + + fxcpmadd 24,23,10,24 + fxcsmadd 25,23,10,25 + # load 10 + fxcpmadd 26,20,2,26 + fxcsmadd 27,20,2,27 + fxcpmadd 28,21,2,28 + fxcsmadd 29,21,2,29 + fxcpmadd 30,22,2,30 + fxcsmadd 31,22,2,31 ; rlwinm 12,29,3,13,28 + + fxcsmadd 24,22,11,24 ; lfpsx 9,11,12 + fxcpmadd 25,23,11,25 ; lhzux 29,5,10 + fxcsmadd 26,23,11,26 + # load 11 + fxcpmadd 27,20,3,27 + fxcsmadd 28,20,3,28 + fxcpmadd 29,21,3,29 + fxcsmadd 30,21,3,30 + fxcpmadd 31,22,3,31 + + fxcpmadd 24,22,12,24 + fxcsmadd 25,22,12,25 + fxcpmadd 26,23,12,26 + fxcsmadd 27,23,12,27 ; rlwinm 12,30,3,13,28 + # load 12 + fxcpmadd 28,20,4,28 ; lfpsx 10,11,12 + fxcsmadd 29,20,4,29 ; lhzux 30,5,10 + fxcpmadd 30,21,4,30 + fxcsmadd 31,21,4,31 + + fxcsmadd 24,21,13,24 + fxcpmadd 25,22,13,25 + fxcsmadd 26,22,13,26 + fxcpmadd 27,23,13,27 + fxcsmadd 28,23,13,28 + # load 13 + fxcpmadd 29,20,5,29 + fxcsmadd 30,20,5,30 + fxcpmadd 31,21,5,31 ; rlwinm 12,31,3,13,28 + + fxcpmadd 24,21,14,24 ; lfpsx 11,11,12 + fxcsmadd 25,21,14,25 ; lhzux 31,5,10 + fxcpmadd 26,22,14,26 + fxcsmadd 27,22,14,27 + fxcpmadd 28,23,14,28 + fxcsmadd 29,23,14,29 + # load 14 + fxcpmadd 30,20,6,30 + fxcsmadd 31,20,6,31 + + fxcsmadd 24,20,15,24 + fxcpmadd 25,21,15,25 + fxcsmadd 26,21,15,26 + fxcpmadd 27,22,15,27 ; rlwinm 12,28,3,13,28 + fxcsmadd 28,22,15,28 ; lfpsx 12,11,12 + fxcpmadd 29,23,15,29 ; lhzux 28,5,10 + fxcsmadd 30,23,15,30 + # load 15 + fxcpmadd 31,20,7,31 + + # after loads + + fxcpmadd 24,16,8,24 + fxcsmadd 25,16,8,25 + fxcpmadd 26,17,8,26 + fxcsmadd 27,17,8,27 + fxcpmadd 28,18,8,28 + fxcsmadd 29,18,8,29 + fxcpmadd 30,19,8,30 + fxcsmadd 31,19,8,31 ; rlwinm 12,29,3,13,28 + + fxcsmadd 24,19,1,24 ; lfpsx 13,11,12 + fxcpmadd 25,16,9,25 ; lhzux 29,5,10 + fxcsmadd 26,16,9,26 + fxcpmadd 27,17,9,27 + fxcsmadd 28,17,9,28 + fxcpmadd 29,18,9,29 + fxcsmadd 30,18,9,30 + fxcpmadd 31,19,9,31 + + fxcpmadd 24,19,2,24 + fxcsmadd 25,19,2,25 + fxcpmadd 26,16,10,26 + fxcsmadd 27,16,10,27 ; rlwinm 12,30,3,13,28 + fxcpmadd 28,17,10,28 ; lfpsx 14,11,12 + fxcsmadd 29,17,10,29 ; lhzux 30,5,10 + fxcpmadd 30,18,10,30 + fxcsmadd 31,18,10,31 + + fxcsmadd 24,18,3,24 + fxcpmadd 25,19,3,25 + fxcsmadd 26,19,3,26 + fxcpmadd 27,16,11,27 + fxcsmadd 28,16,11,28 + fxcpmadd 29,17,11,29 + fxcsmadd 30,17,11,30 + fxcpmadd 31,18,11,31 ; rlwinm 12,31,3,13,28 + + fxcpmadd 24,18,4,24 ; lfpsx 15,11,12 + fxcsmadd 25,18,4,25 ; lhzux 31,5,10 + fxcpmadd 26,19,4,26 + fxcsmadd 27,19,4,27 + fxcpmadd 28,16,12,28 + fxcsmadd 29,16,12,29 + fxcpmadd 30,17,12,30 + fxcsmadd 31,17,12,31 + + fxcsmadd 24,17,5,24 + fxcpmadd 25,18,5,25 + fxcsmadd 26,18,5,26 + fxcpmadd 27,19,5,27 + fxcsmadd 28,19,5,28 + fxcpmadd 29,16,13,29 + fxcsmadd 30,16,13,30 + fxcpmadd 31,17,13,31 + + fxcpmadd 24,17,6,24 + fxcsmadd 25,17,6,25 + fxcpmadd 26,18,6,26 + fxcsmadd 27,18,6,27 + fxcpmadd 28,19,6,28 + fxcsmadd 29,19,6,29 + fxcpmadd 30,16,14,30 + fxcsmadd 31,16,14,31 + + fxcsmadd 24,16,7,24 + fxcpmadd 25,17,7,25 + fxcsmadd 26,17,7,26 + fxcpmadd 27,18,7,27 + fxcsmadd 28,18,7,28 + fxcpmadd 29,19,7,29 ; stfpsux 24,6,9 + fxcsmadd 30,19,7,30 ; stfpsux 25,6,9 + fxcpmadd 31,16,15,31 ; stfpsux 26,6,9 + + fxpmul 24,20,8 # part of next loop + fxsmul 25,20,8 + fxpmul 26,21,8 ; stfpsux 27,6,9 + fxsmul 27,21,8 ; stfpsux 28,6,9 + fxpmul 28,22,8 ; stfpsux 29,6,9 + fxsmul 29,22,8 ; stfpsux 30,6,9 + fxpmul 30,23,8 ; stfpsux 31,6,9 + fxsmul 31,23,8 + + bdnz 0b + +#if 0 + addi 3,3,-120 # store delay line + stfpsux 1,3,9 + stfpsux 2,3,9 + stfpsux 3,3,9 + stfpsux 4,3,9 + stfpsux 5,3,9 + stfpsux 6,3,9 + stfpsux 7,3,9 + stfpsux 8,3,9 + stfpsux 9,3,9 + stfpsux 10,3,9 + stfpsux 11,3,9 + stfpsux 12,3,9 + stfpsux 13,3,9 + stfpsux 14,3,9 + stfpsux 15,3,9 +#endif + + li 9,DCOMPLEX_SIZE # restore call-saved registers + lmw 28,0(1) # restore r28 ... r31 + + #addi 1,1,16 + lfpdux 31,1,9 + lfpdux 30,1,9 + lfpdux 29,1,9 + lfpdux 28,1,9 + lfpdux 27,1,9 + lfpdux 26,1,9 + lfpdux 25,1,9 + lfpdux 24,1,9 + lfpdux 23,1,9 + lfpdux 22,1,9 + lfpdux 21,1,9 + lfpdux 20,1,9 + lfpdux 19,1,9 + lfpdux 18,1,9 + lfpdux 17,1,9 + lfpdux 16,1,9 + lfpdux 15,1,9 + lfpdux 14,1,9 + + addi 1,1,16 # restore stack pointer + blr # return + +#if 0 + +# filter for i16complex + +.global _ZN5LOFAR4RTCP7_filterISt7complexIsEEEvjPKfPKT_PS2_IfEi +_ZN5LOFAR4RTCP7_filterISt7complexIsEEEvjPKfPKT_PS2_IfEi: +# filters all samples for one station, one polarization + +# arguments: +# r3 : nrChannels +# r4 : pointer to weights line (const fcomplex[16]) +# r5 : pointer to first sample (const i16complex[16*r7]) +# r6 : pointer to result (fcomplex *) +# r7 : number of samples / 16 + +# internally used: +# r9 : 8 +# r10 : 2048 +# r11 : ptr to constant table +# r28-r31 : prefetched samples +# f0-f15 : delay line (real in primary, imaginary in secondary unit) +# f16-f23 : weights (these are real values alternately stored in primary +# and secondary units) +# f24-f29 : sums + +# The implementation works on 5 or 6 time samples concurrently, to avoid +# stalls in the double hummer. This unfortunately leads to totally +# incomprehensible code. The loop processes 16 samples at a time. +# The input is converted from int16complex to dcomplex by black magic, +# making the code even harder to understand. + + mtctr 7 # set number of iterations + + li 9,-DCOMPLEX_SIZE + stfpdux 14,1,9 # save call-saved registers + stfpdux 15,1,9 + stfpdux 16,1,9 + stfpdux 17,1,9 + stfpdux 18,1,9 + stfpdux 19,1,9 + stfpdux 20,1,9 + stfpdux 21,1,9 + stfpdux 22,1,9 + stfpdux 23,1,9 + stfpdux 24,1,9 + stfpdux 25,1,9 + stfpdux 26,1,9 + stfpdux 27,1,9 + stfpdux 28,1,9 + stfpdux 29,1,9 + stfpdux 30,1,9 + stfpdux 31,1,9 + + subi 1,1,16 + stmw 28,0(1) # save r28 ... r31 + + lis 11,_ZN5LOFAR4RTCP13_FIR_fp_tableE@ha + li 9,FCOMPLEX_SIZE + la 11,_ZN5LOFAR4RTCP13_FIR_fp_tableE@l(11) + slwi 10,3,3 # I16COMPLEX_SIZE*nrChannels*NR_POLARIZATIONS + subi 6,6,FCOMPLEX_SIZE + + + # convert 15 i16complex numbers to fcomplex + lwzx 29,0,5 # fetch FIR history samples + lwzux 30,5,10 + lwzux 31,5,10 + lwzux 28,5,10 + + rlwinm 12,29,2,14,29 + lfssx 1,11,12 + rlwinm 12,29,18,14,29 + lfsx 1,11,12 + lwzux 29,5,10 + + rlwinm 12,30,2,14,29 + lfssx 2,11,12 + rlwinm 12,30,18,14,29 + lfsx 2,11,12 + lwzux 30,5,10 + + rlwinm 12,31,2,14,29 + lfssx 3,11,12 + rlwinm 12,31,18,14,29 + lfsx 3,11,12 + lwzux 31,5,10 + + rlwinm 12,28,2,14,29 + lfssx 4,11,12 + rlwinm 12,28,18,14,29 + lfsx 4,11,12 + lwzux 28,5,10 + + rlwinm 12,29,2,14,29 + lfssx 5,11,12 + rlwinm 12,29,18,14,29 + lfsx 5,11,12 + lwzux 29,5,10 + + rlwinm 12,30,2,14,29 + lfssx 6,11,12 + rlwinm 12,30,18,14,29 + lfsx 6,11,12 + lwzux 30,5,10 + + rlwinm 12,31,2,14,29 + lfssx 7,11,12 + rlwinm 12,31,18,14,29 + lfsx 7,11,12 + lwzux 31,5,10 + + rlwinm 12,28,2,14,29 + lfssx 8,11,12 + rlwinm 12,28,18,14,29 + lfsx 8,11,12 + lwzux 28,5,10 + + rlwinm 12,29,2,14,29 + lfssx 9,11,12 + rlwinm 12,29,18,14,29 + lfsx 9,11,12 + lwzux 29,5,10 + + rlwinm 12,30,2,14,29 + lfssx 10,11,12 + rlwinm 12,30,18,14,29 + lfsx 10,11,12 + lwzux 30,5,10 + + rlwinm 12,31,2,14,29 + lfssx 11,11,12 + rlwinm 12,31,18,14,29 + lfsx 11,11,12 + lwzux 31,5,10 + + rlwinm 12,28,2,14,29 + lfssx 12,11,12 + rlwinm 12,28,18,14,29 + lfsx 12,11,12 + lwzux 28,5,10 # prefetch samples before entering loop + + rlwinm 12,29,2,14,29 + lfssx 13,11,12 + rlwinm 12,29,18,14,29 + lfsx 13,11,12 + lwzux 29,5,10 + + rlwinm 12,30,2,14,29 + lfssx 14,11,12 + rlwinm 12,30,18,14,29 + lfsx 14,11,12 + lwzux 30,5,10 + + rlwinm 12,31,2,14,29 + lfssx 15,11,12 + rlwinm 12,31,18,14,29 + lfsx 15,11,12 + lwzux 31,5,10 + + ; lfpsx 16,0,4 # load weights line + ; lfpsux 17,4,9 + ; lfpsux 18,4,9 + ; lfpsux 19,4,9 + ; lfpsux 20,4,9 + ; lfpsux 21,4,9 + ; lfpsux 22,4,9 + ; lfpsux 23,4,9 + + # essentially part of loop, but interleaved with + # the stores in the tail of the loop + fxpmul 24,20,8 + fxsmul 25,20,8 + fxpmul 26,21,8 + fxsmul 27,21,8 + fxpmul 28,22,8 + fxsmul 29,22,8 + fxpmul 30,23,8 + fxsmul 31,23,8 + +0: + # time steps 0-7 + + # load 0 + + fxcsmadd 24,23,1,24 ; rlwinm 12,28,2,14,29 + # load 1 + fxcpmadd 25,20,9,25 ; lfssx 0,11,12 + fxcsmadd 26,20,9,26 ; rlwinm 12,28,18,14,29 + fxcpmadd 27,21,9,27 ; lfsx 0,11,12 + fxcsmadd 28,21,9,28 ; lwzux 28,5,10 + fxcpmadd 29,22,9,29 + fxcsmadd 30,22,9,30 + fxcpmadd 31,23,9,31 + + fxcpmadd 24,23,2,24 ; rlwinm 12,29,2,14,29 + fxcsmadd 25,23,2,25 ; lfssx 1,11,12 + # load 2 + fxcpmadd 26,20,10,26 ; rlwinm 12,29,18,14,29 + fxcsmadd 27,20,10,27 ; lfsx 1,11,12 + fxcpmadd 28,21,10,28 ; lwzux 29,5,10 + fxcsmadd 29,21,10,29 + fxcpmadd 30,22,10,30 + fxcsmadd 31,22,10,31 + + fxcsmadd 24,22,3,24 ; rlwinm 12,30,2,14,29 + fxcpmadd 25,23,3,25 ; lfssx 2,11,12 + fxcsmadd 26,23,3,26 ; rlwinm 12,30,18,14,29 + # load 3 + fxcpmadd 27,20,11,27 ; lfsx 2,11,12 + fxcsmadd 28,20,11,28 ; lwzux 30,5,10 + fxcpmadd 29,21,11,29 + fxcsmadd 30,21,11,30 + fxcpmadd 31,22,11,31 + + fxcpmadd 24,22,4,24 ; rlwinm 12,31,2,14,29 + fxcsmadd 25,22,4,25 ; lfssx 3,11,12 + fxcpmadd 26,23,4,26 ; rlwinm 12,31,18,14,29 + fxcsmadd 27,23,4,27 ; lfsx 3,11,12 + # load 4 + fxcpmadd 28,20,12,28 ; lwzux 31,5,10 + fxcsmadd 29,20,12,29 + fxcpmadd 30,21,12,30 + fxcsmadd 31,21,12,31 + + fxcsmadd 24,21,5,24 + fxcpmadd 25,22,5,25 + fxcsmadd 26,22,5,26 + fxcpmadd 27,23,5,27 + fxcsmadd 28,23,5,28 + # load 5 + fxcpmadd 29,20,13,29 + fxcsmadd 30,20,13,30 + fxcpmadd 31,21,13,31 + + fxcpmadd 24,21,6,24 + fxcsmadd 25,21,6,25 + fxcpmadd 26,22,6,26 + fxcsmadd 27,22,6,27 + fxcpmadd 28,23,6,28 + fxcsmadd 29,23,6,29 + # load 6 + fxcpmadd 30,20,14,30 + fxcsmadd 31,20,14,31 + + fxcsmadd 24,20,7,24 + fxcpmadd 25,21,7,25 + fxcsmadd 26,21,7,26 + fxcpmadd 27,22,7,27 ; rlwinm 12,28,2,14,29 + fxcsmadd 28,22,7,28 ; lfssx 4,11,12 + fxcpmadd 29,23,7,29 + fxcsmadd 30,23,7,30 + # load 7 + fxcpmadd 31,20,15,31 + + # after loads + fxcpmadd 24,16,0,24 ; rlwinm 12,28,18,14,29 + fxcsmadd 25,16,0,25 ; lfsx 4,11,12 + fxcpmadd 26,17,0,26 + fxcsmadd 27,17,0,27 ; lwzux 28,5,10 + fxcpmadd 28,18,0,28 + fxcsmadd 29,18,0,29 + fxcpmadd 30,19,0,30 + fxcsmadd 31,19,0,31 ; rlwinm 12,29,2,14,29 + + fxcsmadd 24,19,9,24 ; lfssx 5,11,12 + fxcpmadd 25,16,1,25 + fxcsmadd 26,16,1,26 + fxcpmadd 27,17,1,27 + fxcsmadd 28,17,1,28 ; rlwinm 12,29,18,14,29 + fxcpmadd 29,18,1,29 ; lfsx 5,11,12 + fxcsmadd 30,18,1,30 + fxcpmadd 31,19,1,31 ; lwzux 29,5,10 + + fxcpmadd 24,19,10,24 + fxcsmadd 25,19,10,25 + fxcpmadd 26,16,2,26 + fxcsmadd 27,16,2,27 ; rlwinm 12,30,2,14,29 + fxcpmadd 28,17,2,28 ; lfssx 6,11,12 + fxcsmadd 29,17,2,29 + fxcpmadd 30,18,2,30 + fxcsmadd 31,18,2,31 + + fxcsmadd 24,18,11,24 ; rlwinm 12,30,18,14,29 + fxcpmadd 25,19,11,25 ; lfsx 6,11,12 + fxcsmadd 26,19,11,26 + fxcpmadd 27,16,3,27 ; lwzux 30,5,10 + fxcsmadd 28,16,3,28 + fxcpmadd 29,17,3,29 + fxcsmadd 30,17,3,30 + fxcpmadd 31,18,3,31 ; rlwinm 12,31,2,14,29 + + fxcpmadd 24,18,12,24 ; lfssx 7,11,12 + fxcsmadd 25,18,12,25 + fxcpmadd 26,19,12,26 + fxcsmadd 27,19,12,27 + fxcpmadd 28,16,4,28 ; rlwinm 12,31,18,14,29 + fxcsmadd 29,16,4,29 ; lfsx 7,11,12 + fxcpmadd 30,17,4,30 + fxcsmadd 31,17,4,31 ; lwzux 31,5,10 + + fxcsmadd 24,17,13,24 + fxcpmadd 25,18,13,25 + fxcsmadd 26,18,13,26 + fxcpmadd 27,19,13,27 + fxcsmadd 28,19,13,28 + fxcpmadd 29,16,5,29 + fxcsmadd 30,16,5,30 + fxcpmadd 31,17,5,31 ; rlwinm 12,28,2,14,29 + + fxcpmadd 24,17,14,24 ; lfssx 8,11,12 + fxcsmadd 25,17,14,25 + fxcpmadd 26,18,14,26 + fxcsmadd 27,18,14,27 + fxcpmadd 28,19,14,28 ; rlwinm 12,28,18,14,29 + fxcsmadd 29,19,14,29 ; lfsx 8,11,12 + fxcpmadd 30,16,6,30 + fxcsmadd 31,16,6,31 ; lwzux 28,5,10 + + fxcsmadd 24,16,15,24 + fxcpmadd 25,17,15,25 + fxcsmadd 26,17,15,26 + fxcpmadd 27,18,15,27 + fxcsmadd 28,18,15,28 + fxcpmadd 29,19,15,29 ; stfpsux 24,6,9 + fxcsmadd 30,19,15,30 ; stfpsux 25,6,9 + fxcpmadd 31,16,7,31 ; stfpsux 26,6,9 + + + # time steps 8-15 + fxpmul 24,20,0 + fxsmul 25,20,0 + fxpmul 26,21,0 ; stfpsux 27,6,9 + fxsmul 27,21,0 ; stfpsux 28,6,9 + fxpmul 28,22,0 ; stfpsux 29,6,9 + fxsmul 29,22,0 ; stfpsux 30,6,9 + fxpmul 30,23,0 ; stfpsux 31,6,9 + fxsmul 31,23,0 + + # already loaded 8 + + fxcsmadd 24,23,9,24 + # load 9 + fxcpmadd 25,20,1,25 + fxcsmadd 26,20,1,26 + fxcpmadd 27,21,1,27 + fxcsmadd 28,21,1,28 + fxcpmadd 29,22,1,29 + fxcsmadd 30,22,1,30 + fxcpmadd 31,23,1,31 + + fxcpmadd 24,23,10,24 + fxcsmadd 25,23,10,25 + # load 10 + fxcpmadd 26,20,2,26 + fxcsmadd 27,20,2,27 + fxcpmadd 28,21,2,28 + fxcsmadd 29,21,2,29 + fxcpmadd 30,22,2,30 + fxcsmadd 31,22,2,31 ; rlwinm 12,29,2,14,29 + + fxcsmadd 24,22,11,24 ; lfssx 9,11,12 + fxcpmadd 25,23,11,25 + fxcsmadd 26,23,11,26 + # load 11 + fxcpmadd 27,20,3,27 + fxcsmadd 28,20,3,28 ; rlwinm 12,29,18,14,29 + fxcpmadd 29,21,3,29 ; lfsx 9,11,12 + fxcsmadd 30,21,3,30 + fxcpmadd 31,22,3,31 ; lwzux 29,5,10 + + fxcpmadd 24,22,12,24 + fxcsmadd 25,22,12,25 + fxcpmadd 26,23,12,26 + fxcsmadd 27,23,12,27 ; rlwinm 12,30,2,14,29 + # load 12 + fxcpmadd 28,20,4,28 ; lfssx 10,11,12 + fxcsmadd 29,20,4,29 + fxcpmadd 30,21,4,30 + fxcsmadd 31,21,4,31 + + fxcsmadd 24,21,13,24 ; rlwinm 12,30,18,14,29 + fxcpmadd 25,22,13,25 ; lfsx 10,11,12 + fxcsmadd 26,22,13,26 + fxcpmadd 27,23,13,27 ; lwzux 30,5,10 + fxcsmadd 28,23,13,28 + # load 13 + fxcpmadd 29,20,5,29 + fxcsmadd 30,20,5,30 + fxcpmadd 31,21,5,31 ; rlwinm 12,31,2,14,29 + + fxcpmadd 24,21,14,24 ; lfssx 11,11,12 + fxcsmadd 25,21,14,25 + fxcpmadd 26,22,14,26 + fxcsmadd 27,22,14,27 + fxcpmadd 28,23,14,28 ; rlwinm 12,31,18,14,29 + fxcsmadd 29,23,14,29 ; lfsx 11,11,12 + # load 14 + fxcpmadd 30,20,6,30 + fxcsmadd 31,20,6,31 ; lwzux 31,5,10 + + fxcsmadd 24,20,15,24 + fxcpmadd 25,21,15,25 + fxcsmadd 26,21,15,26 + fxcpmadd 27,22,15,27 ; rlwinm 12,28,2,14,29 + fxcsmadd 28,22,15,28 ; lfssx 12,11,12 + fxcpmadd 29,23,15,29 + fxcsmadd 30,23,15,30 + # load 15 + fxcpmadd 31,20,7,31 + + # after loads + + fxcpmadd 24,16,8,24 ; rlwinm 12,28,18,14,29 + fxcsmadd 25,16,8,25 ; lfsx 12,11,12 + fxcpmadd 26,17,8,26 + fxcsmadd 27,17,8,27 ; lwzux 28,5,10 + fxcpmadd 28,18,8,28 + fxcsmadd 29,18,8,29 + fxcpmadd 30,19,8,30 + fxcsmadd 31,19,8,31 ; rlwinm 12,29,2,14,29 + + fxcsmadd 24,19,1,24 ; lfssx 13,11,12 + fxcpmadd 25,16,9,25 + fxcsmadd 26,16,9,26 + fxcpmadd 27,17,9,27 + fxcsmadd 28,17,9,28 ; rlwinm 12,29,18,14,29 + fxcpmadd 29,18,9,29 ; lfsx 13,11,12 + fxcsmadd 30,18,9,30 + fxcpmadd 31,19,9,31 ; lwzux 29,5,10 + + fxcpmadd 24,19,2,24 + fxcsmadd 25,19,2,25 + fxcpmadd 26,16,10,26 + fxcsmadd 27,16,10,27 ; rlwinm 12,30,2,14,29 + fxcpmadd 28,17,10,28 ; lfssx 14,11,12 + fxcsmadd 29,17,10,29 + fxcpmadd 30,18,10,30 + fxcsmadd 31,18,10,31 + + fxcsmadd 24,18,3,24 ; rlwinm 12,30,18,14,29 + fxcpmadd 25,19,3,25 ; lfsx 14,11,12 + fxcsmadd 26,19,3,26 + fxcpmadd 27,16,11,27 ; lwzux 30,5,10 + fxcsmadd 28,16,11,28 + fxcpmadd 29,17,11,29 + fxcsmadd 30,17,11,30 + fxcpmadd 31,18,11,31 ; rlwinm 12,31,2,14,29 + + fxcpmadd 24,18,4,24 ; lfssx 15,11,12 + fxcsmadd 25,18,4,25 + fxcpmadd 26,19,4,26 + fxcsmadd 27,19,4,27 + fxcpmadd 28,16,12,28 ; rlwinm 12,31,18,14,29 + fxcsmadd 29,16,12,29 ; lfsx 15,11,12 + fxcpmadd 30,17,12,30 + fxcsmadd 31,17,12,31 ; lwzux 31,5,10 + + fxcsmadd 24,17,5,24 + fxcpmadd 25,18,5,25 + fxcsmadd 26,18,5,26 + fxcpmadd 27,19,5,27 + fxcsmadd 28,19,5,28 + fxcpmadd 29,16,13,29 + fxcsmadd 30,16,13,30 + fxcpmadd 31,17,13,31 + + fxcpmadd 24,17,6,24 + fxcsmadd 25,17,6,25 + fxcpmadd 26,18,6,26 + fxcsmadd 27,18,6,27 + fxcpmadd 28,19,6,28 + fxcsmadd 29,19,6,29 + fxcpmadd 30,16,14,30 + fxcsmadd 31,16,14,31 + + fxcsmadd 24,16,7,24 + fxcpmadd 25,17,7,25 + fxcsmadd 26,17,7,26 + fxcpmadd 27,18,7,27 + fxcsmadd 28,18,7,28 + fxcpmadd 29,19,7,29 ; stfpsux 24,6,9 + fxcsmadd 30,19,7,30 ; stfpsux 25,6,9 + fxcpmadd 31,16,15,31 ; stfpsux 26,6,9 + + fxpmul 24,20,8 # part of next loop + fxsmul 25,20,8 + fxpmul 26,21,8 ; stfpsux 27,6,9 + fxsmul 27,21,8 ; stfpsux 28,6,9 + fxpmul 28,22,8 ; stfpsux 29,6,9 + fxsmul 29,22,8 ; stfpsux 30,6,9 + fxpmul 30,23,8 ; stfpsux 31,6,9 + fxsmul 31,23,8 + + bdnz 0b + + li 9,DCOMPLEX_SIZE # restore call-saved registers + lmw 28,0(1) # restore r28 ... r31 + + #addi 1,1,16 + lfpdux 31,1,9 + lfpdux 30,1,9 + lfpdux 29,1,9 + lfpdux 28,1,9 + lfpdux 27,1,9 + lfpdux 26,1,9 + lfpdux 25,1,9 + lfpdux 24,1,9 + lfpdux 23,1,9 + lfpdux 22,1,9 + lfpdux 21,1,9 + lfpdux 20,1,9 + lfpdux 19,1,9 + lfpdux 18,1,9 + lfpdux 17,1,9 + lfpdux 16,1,9 + lfpdux 15,1,9 + lfpdux 14,1,9 + + addi 1,1,16 # restore stack pointer + blr # return + +#else + +# filter for i16complex + +.align 5 +sub_value: + .long 0x43300000,0x00008000,0x43300000,0x00008000 + + +.global _ZN5LOFAR4RTCP7_filterISt7complexIsEEEvjPKfPKT_PS2_IfEi +_ZN5LOFAR4RTCP7_filterISt7complexIsEEEvjPKfPKT_PS2_IfEi: +# filters all samples for one station, one polarization + +# arguments: +# r3 : nrChannels +# r4 : pointer to weights line (const fcomplex[16]) +# r5 : pointer to first sample (const i16complex[16*r7]) +# r6 : pointer to result (fcomplex *) +# r7 : number of samples / 16 + +# internally used: +# r9 : 8 +# r10 : 2048 +# r11 : 8 +# r12 : 0x00800080 +# r28-r31 : prefetched samples +# f0-f15 : delay line (real in primary, imaginary in secondary unit) +# f16-f23 : weights (these are real values alternately stored in primary +# and secondary units) +# f24-f29 : sums +# f31 : sub_value + +# The implementation works on 5 or 6 time samples concurrently, to avoid +# stalls in the double hummer. This unfortunately leads to totally +# incomprehensible code. The loop processes 16 samples at a time. +# The input is converted from int16complex to dcomplex by black magic, +# making the code even harder to understand. + + mtctr 7 # set number of iterations + + li 9,-DCOMPLEX_SIZE + stfpdux 14,1,9 # save call-saved registers + stfpdux 15,1,9 + stfpdux 16,1,9 + stfpdux 17,1,9 + stfpdux 18,1,9 + stfpdux 19,1,9 + stfpdux 20,1,9 + stfpdux 21,1,9 + stfpdux 22,1,9 + stfpdux 23,1,9 + stfpdux 24,1,9 + stfpdux 25,1,9 + stfpdux 26,1,9 + stfpdux 27,1,9 + stfpdux 28,1,9 + stfpdux 29,1,9 + stfpdux 30,1,9 + stfpdux 31,1,9 + + subi 1,1,48 + stmw 28,16(1) # save r28 ... r31 + + lis 8,sub_value@ha # load sub_values + lis 12,0x0080 + la 8,sub_value@l(8) + lfpdx 31,0,8 + + + # Avoid that int->fp conversion area is in the same cache way as where + # the samples come from. There are two places that can be used where + # the conversion can take place: at 0(sp) or 32(sp). r8, r7, and r11 + # contain offsets (w.r.t. sp) to this area. Note that 16(sp) stores + # r28 ... r31 + + xor 8,1,5 + andi. 0,8,0x01E0 + li 9,FCOMPLEX_SIZE + mfcr 8 + slwi 10,3,3 # I16COMPLEX_SIZE*nrChannels*NR_POLARIZATIONS + rlwinm 8,8,8,26,26 + ori 12,12,0x0080 + sub 6,6,9 + + addi 7,8,14 + addi 11,8,6 + stfpdx 31,8,1 # initialize int->fp conversion area + +#if 0 + lfpsx 1,0,3 # load delay line + lfpsux 2,3,9 + lfpsux 3,3,9 + lfpsux 4,3,9 + lfpsux 5,3,9 + lfpsux 6,3,9 + lfpsux 7,3,9 + lfpsux 8,3,9 + lfpsux 9,3,9 + lfpsux 10,3,9 + lfpsux 11,3,9 + lfpsux 12,3,9 + lfpsux 13,3,9 + lfpsux 14,3,9 + lfpsux 15,3,9 +#endif + + # convert 15 i16complex numbers to fcomplex + lwzx 29,0,5 # fetch FIR history samples + lwzux 30,5,10 + lwzux 31,5,10 + lwzux 28,5,10 + + xor 29,29,12 + sthbrx 29,7,1 + srawi 29,29,16 + sthbrx 29,11,1 + lfpdx 1,8,1 + + lwzux 29,5,10 + + xor 30,30,12 + sthbrx 30,7,1 + srawi 30,30,16 + sthbrx 30,11,1 + lfpdx 2,8,1 + + lwzux 30,5,10 + + xor 31,31,12 + sthbrx 31,7,1 + srawi 31,31,16 + sthbrx 31,11,1 + lfpdx 3,8,1 + + lwzux 31,5,10 + + xor 28,28,12 + sthbrx 28,7,1 + srawi 28,28,16 + sthbrx 28,11,1 + lfpdx 4,8,1 + + lwzux 28,5,10 + + xor 29,29,12 + sthbrx 29,7,1 + srawi 29,29,16 + sthbrx 29,11,1 + lfpdx 5,8,1 + + lwzux 29,5,10 + + xor 30,30,12 + sthbrx 30,7,1 + srawi 30,30,16 + sthbrx 30,11,1 + lfpdx 6,8,1 + + lwzux 30,5,10 + + xor 31,31,12 + sthbrx 31,7,1 + srawi 31,31,16 + sthbrx 31,11,1 + lfpdx 7,8,1 + + lwzux 31,5,10 + + xor 28,28,12 + sthbrx 28,7,1 + srawi 28,28,16 + sthbrx 28,11,1 + lfpdx 8,8,1 + + lwzux 28,5,10 + + xor 29,29,12 + sthbrx 29,7,1 + srawi 29,29,16 + sthbrx 29,11,1 + lfpdx 9,8,1 + + lwzux 29,5,10 + + xor 30,30,12 + sthbrx 30,7,1 + srawi 30,30,16 + sthbrx 30,11,1 + lfpdx 10,8,1 + + lwzux 30,5,10 + + xor 31,31,12 + sthbrx 31,7,1 + srawi 31,31,16 + sthbrx 31,11,1 + lfpdx 11,8,1 + + lwzux 31,5,10 + + xor 28,28,12 + sthbrx 28,7,1 + srawi 28,28,16 + sthbrx 28,11,1 + lfpdx 12,8,1 + + xor 29,29,12 + sthbrx 29,7,1 + srawi 29,29,16 + sthbrx 29,11,1 + lfpdx 13,8,1 + + xor 30,30,12 + sthbrx 30,7,1 + srawi 30,30,16 + sthbrx 30,11,1 + lfpdx 14,8,1 + + xor 31,31,12 + sthbrx 31,7,1 + srawi 31,31,16 + sthbrx 31,11,1 + lfpdx 15,8,1 + + fpsub 1,1,31 ; lfpsx 16,0,4 # load weights line + fpsub 2,2,31 ; lfpsux 17,4,9 + fpsub 3,3,31 ; lfpsux 18,4,9 + fpsub 4,4,31 ; lfpsux 19,4,9 + fpsub 5,5,31 ; lfpsux 20,4,9 + fpsub 6,6,31 ; lfpsux 21,4,9 + fpsub 7,7,31 ; lfpsux 22,4,9 + fpsub 8,8,31 ; lfpsux 23,4,9 + fpsub 9,9,31 ; lwzux 28,5,10 # prefetch samples before entering loop + fpsub 10,10,31 ; lwzux 29,5,10 + fpsub 11,11,31 ; lwzux 30,5,10 + fpsub 12,12,31 ; lwzux 31,5,10 + fpsub 13,13,31 + fpsub 14,14,31 + fpsub 15,15,31 + + # essentially part of loop, but interleaved with + # the stores in the tail of the loop + fxsmul 24,16,15 + fxpmul 25,17,15 + fxsmul 26,17,15 + fxpmul 27,18,15 + fxsmul 28,18,15 + fxpmul 29,19,15 + +0: + # time steps 0-5 + + fxcsmadd 24,23,1,24 ; xor 28,28,12 + fxcpmadd 25,20,9,25 ; sthbrx 28,7,1 + fxcsmadd 26,20,9,26 ; srawi 28,28,16 + fxcpmadd 27,21,9,27 ; sthbrx 28,11,1 + fxcsmadd 28,21,9,28 + fxcpmadd 29,22,9,29 + + fxcpmadd 24,23,2,24 + fxcsmadd 25,23,2,25 ; lfpdx 30,8,1 + fxcpmadd 26,20,10,26 + fxcsmadd 27,20,10,27 + fxcpmadd 28,21,10,28 + fxcsmadd 29,21,10,29 ; lwzux 28,5,10 + + fxcsmadd 24,22,3,24 + fxcpmadd 25,23,3,25 + fxcsmadd 26,23,3,26 + fxcpmadd 27,20,11,27 ; xor 29,29,12 + fxcsmadd 28,20,11,28 ; sthbrx 29,7,1 + fxcpmadd 29,21,11,29 ; srawi 29,29,16 + + fxcpmadd 24,22,4,24 ; sthbrx 29,11,1 + fxcsmadd 25,22,4,25 + fxcpmadd 26,23,4,26 + fxcsmadd 27,23,4,27 + fpsub 0,30,31 + fxcpmadd 28,20,12,28 ; lfpdx 30,8,1 + fxcsmadd 29,20,12,29 + + fxcsmadd 24,21,5,24 + fxcpmadd 25,22,5,25 + fxcsmadd 26,22,5,26 ; lwzux 29,5,10 + fxcpmadd 27,23,5,27 + fxcsmadd 28,23,5,28 + fxcpmadd 29,20,13,29 + + fxcpmadd 24,21,6,24 ; xor 30,30,12 + fxcsmadd 25,21,6,25 ; sthbrx 30,7,1 + fxcpmadd 26,22,6,26 ; srawi 30,30,16 + fxcsmadd 27,22,6,27 ; sthbrx 30,11,1 + fxcpmadd 28,23,6,28 + fxcsmadd 29,23,6,29 + + fxcsmadd 24,20,7,24 + fxcpmadd 25,21,7,25 + fpsub 1,30,31 + fxcsmadd 26,21,7,26 ; lfpdx 30,8,1 + fxcpmadd 27,22,7,27 + fxcsmadd 28,22,7,28 + fxcpmadd 29,23,7,29 ; lwzux 30,5,10 + + fxcpmadd 24,20,8,24 + fxcsmadd 25,20,8,25 + fxcpmadd 26,21,8,26 + fxcsmadd 27,21,8,27 + fxcpmadd 28,22,8,28 + fxcsmadd 29,22,8,29 ; xor 31,31,12 + + fxcpmadd 24,17,14,24 ; sthbrx 31,7,1 + fxcsmadd 25,17,14,25 + fxcpmadd 26,18,14,26 + fxcsmadd 27,18,14,27 ; srawi 31,31,16 + fxcpmadd 28,19,14,28 ; sthbrx 31,11,1 + fxcsmadd 29,19,14,29 + + fxcpmadd 24,16,0,24 + fxcsmadd 25,16,0,25 + fpsub 2,30,31 + fxcpmadd 26,17,0,26 ; lfpdx 30,8,1 + fxcsmadd 27,17,0,27 + fxcpmadd 28,18,0,28 + fxcsmadd 29,18,0,29 ; lwzux 31,5,10 + + fpsub 3,30,31 ; xor 28,28,12 + fxcsmadd 24,19,9,24 ; sthbrx 28,7,1 + fxcpmadd 25,16,1,25 + fxcsmadd 26,16,1,26 + fxcpmadd 27,17,1,27 ; srawi 28,28,16 + fxcsmadd 28,17,1,28 ; sthbrx 28,11,1 + fxcpmadd 29,18,1,29 + + fxcpmadd 24,19,10,24 + fxcsmadd 25,19,10,25 + fxcpmadd 26,16,2,26 ; lfpdx 30,8,1 + fxcsmadd 27,16,2,27 + fxcpmadd 28,17,2,28 + fxcsmadd 29,17,2,29 ; lwzux 28,5,10 + + fpsub 4,30,31 ; xor 29,29,12 + fxcsmadd 24,18,11,24 ; sthbrx 29,7,1 + fxcpmadd 25,19,11,25 + fxcsmadd 26,19,11,26 + fxcpmadd 27,16,3,27 ; srawi 29,29,16 + fxcsmadd 28,16,3,28 ; sthbrx 29,11,1 + fxcpmadd 29,17,3,29 + + fxcpmadd 24,18,12,24 + fxcsmadd 25,18,12,25 + fxcpmadd 26,19,12,26 ; lfpdx 30,8,1 + fxcsmadd 27,19,12,27 + fxcpmadd 28,16,4,28 + fxcsmadd 29,16,4,29 ; lwzux 29,5,10 + + fpsub 5,30,31 + fxcsmadd 24,17,13,24 + fxcpmadd 25,18,13,25 + fxcsmadd 26,18,13,26 + fxcpmadd 27,19,13,27 + fxcsmadd 28,19,13,28 + fxcpmadd 29,16,5,29 + ; stfpsux 24,6,9 + ; stfpsux 25,6,9 + + + # time steps 6-10 + + fxsmul 24,21,11 ; stfpsux 26,6,9 + fxpmul 25,22,11 ; stfpsux 27,6,9 + fxsmul 26,22,11 ; stfpsux 28,6,9 + fxpmul 27,23,11 ; stfpsux 29,6,9 + fxsmul 28,23,11 + + fxcpmadd 24,17,4,24 + fxcsmadd 25,17,4,25 + fxcpmadd 26,18,4,26 + fxcsmadd 27,18,4,27 ; xor 30,30,12 + fxcpmadd 28,19,4,28 ; sthbrx 30,7,1 + + fxcsmadd 24,20,13,24 + fxcpmadd 25,21,13,25 + fxcsmadd 26,21,13,26 ; srawi 30,30,16 + fxcpmadd 27,22,13,27 ; sthbrx 30,11,1 + fxcsmadd 28,22,13,28 + + fxcsmadd 24,16,5,24 + fxcpmadd 25,17,5,25 ; lfpdx 30,8,1 + fxcsmadd 26,17,5,26 + fxcpmadd 27,18,5,27 ; lwzux 30,5,10 + fxcsmadd 28,18,5,28 + + fxcpmadd 24,20,14,24 ; xor 31,31,12 + fpsub 6,30,31 ; sthbrx 31,7,1 + fxcsmadd 25,20,14,25 + fxcpmadd 26,21,14,26 + fxcsmadd 27,21,14,27 ; srawi 31,31,16 + fxcpmadd 28,22,14,28 ; sthbrx 31,11,1 + + fxcpmadd 24,16,6,24 + fxcsmadd 25,16,6,25 + fxcpmadd 26,17,6,26 ; lfpdx 30,8,1 + fxcsmadd 27,17,6,27 + fxcpmadd 28,18,6,28 ; lwzux 31,5,10 + + fxcsmadd 24,23,7,24 + fxcpmadd 25,20,15,25 ; xor 28,28,12 + fpsub 7,30,31 ; sthbrx 28,7,1 + fxcsmadd 26,20,15,26 + fxcpmadd 27,21,15,27 + fxcsmadd 28,21,15,28 ; srawi 28,28,16 + + fxcsmadd 24,19,15,24 ; sthbrx 28,11,1 + fxcpmadd 25,16,7,25 + fxcsmadd 26,16,7,26 + fxcpmadd 27,17,7,27 ; lfpdx 30,8,1 + fxcsmadd 28,17,7,28 + + fxcpmadd 24,23,8,24 ; lwzux 28,5,10 + fxcsmadd 25,23,8,25 + fxcpmadd 26,20,0,26 ; xor 29,29,12 + fpsub 8,30,31 ; sthbrx 29,7,1 + fxcsmadd 27,20,0,27 + fxcpmadd 28,21,0,28 + + fxcpmadd 24,19,0,24 ; srawi 29,29,16 + fxcsmadd 25,19,0,25 ; sthbrx 29,11,1 + fxcpmadd 26,16,8,26 + fxcsmadd 27,16,8,27 + fxcpmadd 28,17,8,28 ; lfpdx 30,8,1 + + fxcsmadd 24,17,3,24 + fxcpmadd 25,18,3,25 + fxcsmadd 26,18,3,26 + fxcpmadd 27,19,3,27 + fxcsmadd 28,19,3,28 + + fxcsmadd 24,22,9,24 + fxcpmadd 25,23,9,25 ; lwzux 29,5,10 + fxcsmadd 26,23,9,26 + fxcpmadd 27,20,1,27 ; xor 30,30,12 + fpsub 9,30,31 ; sthbrx 30,7,1 + fxcsmadd 28,20,1,28 + + fxcsmadd 24,18,1,24 + fxcpmadd 25,19,1,25 ; srawi 30,30,16 + fxcsmadd 26,19,1,26 ; sthbrx 30,11,1 + fxcpmadd 27,16,9,27 + fxcsmadd 28,16,9,28 + + fxcpmadd 24,21,12,24 + fxcsmadd 25,21,12,25 + fxcpmadd 26,22,12,26 + fxcsmadd 27,22,12,27 + fxcpmadd 28,23,12,28 + + fxcpmadd 24,22,10,24 ; lfpdx 30,8,1 + fxcsmadd 25,22,10,25 + fxcpmadd 26,23,10,26 ; lwzux 30,5,10 + fxcsmadd 27,23,10,27 + fxcpmadd 28,20,2,28 + fpsub 10,30,31 + + fxcpmadd 24,18,2,24 + fxcsmadd 25,18,2,25 + fxcpmadd 26,19,2,26 + fxcsmadd 27,19,2,27 + fxcpmadd 28,16,10,28 + + + # time steps 11-15 + ; stfpsux 24,6,9 + fxsmul 24,21,0 ; stfpsux 25,6,9 + fxpmul 25,22,0 ; stfpsux 26,6,9 + fxsmul 26,22,0 ; stfpsux 27,6,9 + fxpmul 27,23,0 ; stfpsux 28,6,9 + fxsmul 28,23,0 + + fxcpmadd 24,17,9,24 + fxcsmadd 25,17,9,25 + fxcpmadd 26,18,9,26 + fxcsmadd 27,18,9,27 ; xor 31,31,12 + fxcpmadd 28,19,9,28 ; sthbrx 31,7,1 + + fxcsmadd 24,20,2,24 + fxcpmadd 25,21,2,25 + fxcsmadd 26,21,2,26 ; srawi 31,31,16 + fxcpmadd 27,22,2,27 ; sthbrx 31,11,1 + fxcsmadd 28,22,2,28 + + fxcsmadd 24,16,10,24 + fxcpmadd 25,17,10,25 ; lfpdx 30,8,1 + fxcsmadd 26,17,10,26 + fxcpmadd 27,18,10,27 ; lwzux 31,5,10 + fxcsmadd 28,18,10,28 + + fxcpmadd 24,20,3,24 ; xor 28,28,12 + fpsub 11,30,31 ; sthbrx 28,7,1 + fxcsmadd 25,20,3,25 + fxcpmadd 26,21,3,26 + fxcsmadd 27,21,3,27 ; srawi 28,28,16 + fxcpmadd 28,22,3,28 ; sthbrx 28,11,1 + + fxcpmadd 24,16,11,24 + fxcsmadd 25,16,11,25 + fxcpmadd 26,17,11,26 ; lfpdx 30,8,1 + fxcsmadd 27,17,11,27 + fxcpmadd 28,18,11,28 ; lwzux 28,5,10 + + fxcsmadd 24,23,12,24 + fxcpmadd 25,20,4,25 ; xor 29,29,12 + fpsub 12,30,31 ; sthbrx 29,7,1 + fxcsmadd 26,20,4,26 + fxcpmadd 27,21,4,27 + fxcsmadd 28,21,4,28 ; srawi 29,29,16 + + fxcsmadd 24,19,4,24 ; sthbrx 29,11,1 + fxcpmadd 25,16,12,25 + fxcsmadd 26,16,12,26 + fxcpmadd 27,17,12,27 ; lfpdx 30,8,1 + fxcsmadd 28,17,12,28 + + fxcpmadd 24,23,13,24 ; lwzux 29,5,10 + fxcsmadd 25,23,13,25 + fxcpmadd 26,20,5,26 ; xor 30,30,12 + fpsub 13,30,31 ; sthbrx 30,7,1 + fxcsmadd 27,20,5,27 + fxcpmadd 28,21,5,28 + + fxcsmadd 24,17,8,24 + fxcpmadd 25,18,8,25 + fxcsmadd 26,18,8,26 + fxcpmadd 27,19,8,27 + fxcsmadd 28,19,8,28 + + fxcpmadd 24,19,5,24 ; srawi 30,30,16 + fxcsmadd 25,19,5,25 ; sthbrx 30,11,1 + fxcpmadd 26,16,13,26 + fxcsmadd 27,16,13,27 + fxcpmadd 28,17,13,28 ; lfpdx 30,8,1 + + fxcsmadd 24,22,14,24 + fxcpmadd 25,23,14,25 ; lwzux 30,5,10 + fxcsmadd 26,23,14,26 + fxcpmadd 27,20,6,27 ; xor 31,31,12 + fpsub 14,30,31 ; sthbrx 31,7,1 + fxcsmadd 28,20,6,28 + + fxcsmadd 24,18,6,24 + fxcpmadd 25,19,6,25 ; srawi 31,31,16 + fxcsmadd 26,19,6,26 ; sthbrx 31,11,1 + fxcpmadd 27,16,14,27 + fxcsmadd 28,16,14,28 + + fxcpmadd 24,21,1,24 + fxcsmadd 25,21,1,25 + fxcpmadd 26,22,1,26 + fxcsmadd 27,22,1,27 + fxcpmadd 28,23,1,28 + + fxcpmadd 24,22,15,24 ; lfpdx 30,8,1 + fxcsmadd 25,22,15,25 + fxcpmadd 26,23,15,26 ; lwzux 31,5,10 + fxcsmadd 27,23,15,27 + fxcpmadd 28,20,7,28 + fpsub 15,30,31 + + fxcpmadd 24,18,7,24 + fxcsmadd 25,18,7,25 + fxcpmadd 26,19,7,26 + fxcsmadd 27,19,7,27 + fxcpmadd 28,16,15,28 + + fxpmul 29,19,15 ; stfpsux 24,6,9 + fxsmul 24,16,15 ; stfpsux 25,6,9 + fxpmul 25,17,15 ; stfpsux 26,6,9 + fxsmul 26,17,15 ; stfpsux 27,6,9 + fxpmul 27,18,15 ; stfpsux 28,6,9 + fxsmul 28,18,15 + + bdnz 0b + +#if 0 + addi 3,3,-120 # store delay line + stfpsux 1,3,9 + stfpsux 2,3,9 + stfpsux 3,3,9 + stfpsux 4,3,9 + stfpsux 5,3,9 + stfpsux 6,3,9 + stfpsux 7,3,9 + stfpsux 8,3,9 + stfpsux 9,3,9 + stfpsux 10,3,9 + stfpsux 11,3,9 + stfpsux 12,3,9 + stfpsux 13,3,9 + stfpsux 14,3,9 + stfpsux 15,3,9 +#endif + + li 9,DCOMPLEX_SIZE # restore call-saved registers + lmw 28,16(1) # restore r28 ... r31 + + addi 1,1,48 + lfpdx 31,0,1 + lfpdux 30,1,9 + lfpdux 29,1,9 + lfpdux 28,1,9 + lfpdux 27,1,9 + lfpdux 26,1,9 + lfpdux 25,1,9 + lfpdux 24,1,9 + lfpdux 23,1,9 + lfpdux 22,1,9 + lfpdux 21,1,9 + lfpdux 20,1,9 + lfpdux 19,1,9 + lfpdux 18,1,9 + lfpdux 17,1,9 + lfpdux 16,1,9 + lfpdux 15,1,9 + lfpdux 14,1,9 + + addi 1,1,16 # restore stack pointer + blr # return +#endif + + +.align 5 + +#if defined HAVE_BGP +zero: .long 0,0 +#endif + +.global _memzero +_memzero: +#if defined HAVE_BGP + lis 5,zero@ha ; srwi 4,4,7 + addi 5,5,zero@l ; mtctr 4 + lfpsx 0,0,5 ; li 8,16 + subi 3,3,16 + +0: stfpdux 0,3,8 + stfpdux 0,3,8 + stfpdux 0,3,8 + stfpdux 0,3,8 + stfpdux 0,3,8 + stfpdux 0,3,8 + stfpdux 0,3,8 + stfpdux 0,3,8 + bdnz 0b +#else + srwi 4,4,7 + mtctr 4 + li 4,32 + li 5,64 + li 6,96 + li 7,128 + +0: dcbz 0,3 + dcbz 3,4 + dcbz 3,5 + dcbz 3,6 + add 3,3,7 + bdnz 0b +#endif + blr + + +.align 5 +.global _fast_memcpy +_fast_memcpy: + + li 8,-16 + stfpdux 14,1,8 + stfpdux 15,1,8 + stfpdux 16,1,8 + stfpdux 17,1,8 + stfpdux 18,1,8 + stfpdux 19,1,8 + stfpdux 20,1,8 + stfpdux 21,1,8 + stfpdux 22,1,8 + stfpdux 23,1,8 + stfpdux 24,1,8 + stfpdux 25,1,8 + stfpdux 26,1,8 + stfpdux 27,1,8 + stfpdux 28,1,8 + stfpdux 29,1,8 + stfpdux 30,1,8 + stfpdux 31,1,8 + + srwi 5,5,9 + mtctr 5 + li 8,16 + + sub 3,3,8 + sub 4,4,8 + +0: + lfpdux 0,4,8 + lfpdux 1,4,8 + lfpdux 2,4,8 + lfpdux 3,4,8 + lfpdux 4,4,8 + lfpdux 5,4,8 + lfpdux 6,4,8 + lfpdux 7,4,8 + lfpdux 8,4,8 + lfpdux 9,4,8 + lfpdux 10,4,8 + lfpdux 11,4,8 + lfpdux 12,4,8 + lfpdux 13,4,8 + lfpdux 14,4,8 + lfpdux 15,4,8 + lfpdux 16,4,8 + lfpdux 17,4,8 + lfpdux 18,4,8 + lfpdux 19,4,8 + lfpdux 20,4,8 + lfpdux 21,4,8 + lfpdux 22,4,8 + lfpdux 23,4,8 + lfpdux 24,4,8 + lfpdux 25,4,8 + lfpdux 26,4,8 + lfpdux 27,4,8 + lfpdux 28,4,8 + lfpdux 29,4,8 + lfpdux 30,4,8 + lfpdux 31,4,8 + stfpdux 0,3,8 + stfpdux 1,3,8 + stfpdux 2,3,8 + stfpdux 3,3,8 + stfpdux 4,3,8 + stfpdux 5,3,8 + stfpdux 6,3,8 + stfpdux 7,3,8 + stfpdux 8,3,8 + stfpdux 9,3,8 + stfpdux 10,3,8 + stfpdux 11,3,8 + stfpdux 12,3,8 + stfpdux 13,3,8 + stfpdux 14,3,8 + stfpdux 15,3,8 + stfpdux 16,3,8 + stfpdux 17,3,8 + stfpdux 18,3,8 + stfpdux 19,3,8 + stfpdux 20,3,8 + stfpdux 21,3,8 + stfpdux 22,3,8 + stfpdux 23,3,8 + stfpdux 24,3,8 + stfpdux 25,3,8 + stfpdux 26,3,8 + stfpdux 27,3,8 + stfpdux 28,3,8 + stfpdux 29,3,8 + stfpdux 30,3,8 + stfpdux 31,3,8 + bdnz 0b + + lfpdx 31,0,1 + lfpdux 30,1,8 + lfpdux 29,1,8 + lfpdux 28,1,8 + lfpdux 27,1,8 + lfpdux 26,1,8 + lfpdux 25,1,8 + lfpdux 24,1,8 + lfpdux 23,1,8 + lfpdux 22,1,8 + lfpdux 21,1,8 + lfpdux 20,1,8 + lfpdux 19,1,8 + lfpdux 18,1,8 + lfpdux 17,1,8 + lfpdux 16,1,8 + lfpdux 15,1,8 + lfpdux 14,1,8 + addi 1,1,16 + blr +#endif diff --git a/RTCP/CNProc/src/FIR_Asm.h b/RTCP/CNProc/src/FIR_Asm.h new file mode 100644 index 0000000000000000000000000000000000000000..75b0c94eb48e10bf530ca5d64f686513c55eddbd --- /dev/null +++ b/RTCP/CNProc/src/FIR_Asm.h @@ -0,0 +1,78 @@ +//# FIR.h: header files for CN assembly +//# +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id$ + +#ifndef LOFAR_CNPROC_FIR_ASM_H +#define LOFAR_CNPROC_FIR_ASM_H + +#if defined HAVE_BGL || defined HAVE_BGP +#include <Interface/Config.h> + +#if defined HAVE_BGL +#include <rts.h> +#endif + +namespace LOFAR { +namespace RTCP { + +struct phase_shift { + dcomplex v0, dv; +}; + + +template <typename SAMPLE_TYPE> extern void _filter(unsigned nrChannels, + const float weights[NR_TAPS], + const SAMPLE_TYPE samples[], + fcomplex out[], + int nr_samples_div_16); + +extern "C" { + void _transpose_4x8(fcomplex *out, + const fcomplex *in, + int length, + int input_stride, + int output_stride); + + void _phase_shift_and_transpose(fcomplex *out, + const fcomplex *in, + const struct phase_shift *, + int stride, + unsigned nrChannels); + + void _fast_memcpy(void *dst, const void *src, size_t bytes); + void _memzero(void *dst, size_t bytes); // bytes must be multiple of 128 + void _prefetch(const void *src, size_t count, size_t stride); + + extern struct { + unsigned nr_taps; + unsigned nr_polarizations; + } _FIR_constants_used; + +#if defined HAVE_BGL + void _cn_mutex_lock(CN_Mutex *), _cn_mutex_unlock(CN_Mutex *); +#endif + + unsigned long long _rdtsc(); +} + +} // namespace RTCP +} // namespace LOFAR + +#endif +#endif diff --git a/RTCP/CNProc/src/FilteredData.h b/RTCP/CNProc/src/FilteredData.h new file mode 100644 index 0000000000000000000000000000000000000000..f383e000e87bc33b696d6f2db6050de2a722ef9a --- /dev/null +++ b/RTCP/CNProc/src/FilteredData.h @@ -0,0 +1,59 @@ +#ifndef LOFAR_CNPROC_FILTERED_DATA_H +#define LOFAR_CNPROC_FILTERED_DATA_H + +#include <Common/lofar_complex.h> +#include <Interface/Allocator.h> +#include <Interface/Config.h> +#include <Interface/SparseSet.h> + +#include <boost/multi_array.hpp> + + +namespace LOFAR { +namespace RTCP { + +class FilteredData +{ + public: + FilteredData(const Arena &, unsigned nrStations, unsigned nrChannels, unsigned nrSamplesPerIntegration); + ~FilteredData(); + + static size_t requiredSize(unsigned nrStations, unsigned nrChannels, unsigned nrSamplesPerIntegration); + + private: + SparseSetAllocator allocator; + + public: + // The "| 2" significantly improves transpose speeds for particular + // numbers of stations due to cache conflict effects. The extra memory + // is not used. + boost::multi_array_ref<fcomplex, 4> samples; //[itsNrChannels][itsNrStations][itsNrSamplesPerIntegration | 2][NR_POLARIZATIONS] CACHE_ALIGNED + SparseSet<unsigned> *flags; //[itsNrStations] +}; + + +inline size_t FilteredData::requiredSize(unsigned nrStations, unsigned nrChannels, unsigned nrSamplesPerIntegration) +{ + return sizeof(fcomplex) * nrChannels * nrStations * (nrSamplesPerIntegration | 2) * NR_POLARIZATIONS; +} + + +inline FilteredData::FilteredData(const Arena &arena, unsigned nrStations, unsigned nrChannels, unsigned nrSamplesPerIntegration) +: + allocator(arena), + samples(static_cast<fcomplex *>(allocator.allocate(requiredSize(nrStations, nrChannels, nrSamplesPerIntegration), 32)), boost::extents[nrChannels][nrStations][nrSamplesPerIntegration | 2][NR_POLARIZATIONS]), + flags(new SparseSet<unsigned>[nrStations]) +{ +} + + +inline FilteredData::~FilteredData() +{ + allocator.deallocate(samples.origin()); + delete [] flags; +} + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/CNProc/src/InputData.h b/RTCP/CNProc/src/InputData.h new file mode 100644 index 0000000000000000000000000000000000000000..7df246199a92a36ea3792c59dde8c2237ed9c756 --- /dev/null +++ b/RTCP/CNProc/src/InputData.h @@ -0,0 +1,109 @@ +#ifndef LOFAR_CNPROC_INPUT_DATA_H +#define LOFAR_CNPROC_INPUT_DATA_H + +#include <Common/lofar_complex.h> +#include <Common/DataConvert.h> +#include <Interface/AlignedStdAllocator.h> +#include <Interface/Config.h> +#include <Interface/SubbandMetaData.h> +#include <Stream/Stream.h> + +#include <Interface/Allocator.h> + +#include <boost/multi_array.hpp> + +#include <vector> + + +namespace LOFAR { +namespace RTCP { + +template <typename SAMPLE_TYPE> class InputData +{ + public: + InputData(const Arena &, unsigned nrSubbands, unsigned nrSamplesToCNProc); + ~InputData(); + + void read(Stream *); + + // used for asynchronous transpose + void readMetaData(Stream *str); + void readOne(Stream *str); + + static size_t requiredSize(unsigned nrSubbands, unsigned nrSamplesToCNProc); + + private: + SparseSetAllocator allocator; + unsigned itsNrSubbands; + unsigned itsSubbandIndex; + + public: + boost::multi_array_ref<SAMPLE_TYPE, 3> samples; //[outputPsets.size()][itsPS->nrSamplesToCNProc()][NR_POLARIZATIONS] + + std::vector<SubbandMetaData, AlignedStdAllocator<SubbandMetaData, 16> > metaData; //[outputPsets.size()] +}; + + +template <typename SAMPLE_TYPE> inline size_t InputData<SAMPLE_TYPE>::requiredSize(unsigned nrSubbands, unsigned nrSamplesToCNProc) +{ + return sizeof(SAMPLE_TYPE) * nrSubbands * nrSamplesToCNProc * NR_POLARIZATIONS; +} + + +template <typename SAMPLE_TYPE> inline InputData<SAMPLE_TYPE>::InputData(const Arena &arena, unsigned nrSubbands, unsigned nrSamplesToCNProc) +: + allocator(arena), + itsNrSubbands(nrSubbands), + itsSubbandIndex(0), + samples(static_cast<SAMPLE_TYPE *>(allocator.allocate(requiredSize(nrSubbands, nrSamplesToCNProc), 32)), boost::extents[nrSubbands][nrSamplesToCNProc][NR_POLARIZATIONS]), + metaData(nrSubbands) +{ +} + + +template <typename SAMPLE_TYPE> inline InputData<SAMPLE_TYPE>::~InputData() +{ + allocator.deallocate(samples.origin()); +} + + +// used for asynchronous transpose +template <typename SAMPLE_TYPE> inline void InputData<SAMPLE_TYPE>::readMetaData(Stream *str) +{ + // read all metadata + str->read(&metaData[0], metaData.size() * sizeof(SubbandMetaData)); +} + +// used for asynchronous transpose +template <typename SAMPLE_TYPE> inline void InputData<SAMPLE_TYPE>::readOne(Stream *str) +{ + str->read(samples[itsSubbandIndex].origin(), samples[itsSubbandIndex].num_elements() * sizeof(SAMPLE_TYPE)); + +#if defined C_IMPLEMENTATION && defined WORDS_BIGENDIAN + dataConvert(LittleEndian, samples[itsSubbandIndex].origin(), samples[itsSubbandIndex].num_elements()); +#endif + + itsSubbandIndex++; + if(itsSubbandIndex == itsNrSubbands) { // we have read all data + itsSubbandIndex = 0; + } +} + +template <typename SAMPLE_TYPE> inline void InputData<SAMPLE_TYPE>::read(Stream *str) +{ + // read all metadata + str->read(&metaData[0], metaData.size() * sizeof(SubbandMetaData)); + + // now read all subbands using one recvBlocking call, even though the ION + // sends all subbands one at a time + str->read(samples.origin(), samples.num_elements() * sizeof(SAMPLE_TYPE)); + +#if defined C_IMPLEMENTATION && defined WORDS_BIGENDIAN + dataConvert(LittleEndian, samples[itsSubbandIndex].origin(), samples[itsSubbandIndex].num_elements()); +#endif +} + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/CNProc/src/LocationInfo.cc b/RTCP/CNProc/src/LocationInfo.cc new file mode 100644 index 0000000000000000000000000000000000000000..035a3131b7fc9294a8759406f3f01e8fdd0897eb --- /dev/null +++ b/RTCP/CNProc/src/LocationInfo.cc @@ -0,0 +1,144 @@ +#include <lofar_config.h> + +#include <LocationInfo.h> + +#include <Interface/CN_Mapping.h> +#include <Interface/PrintVector.h> + +#if defined HAVE_BGP +#include <common/bgp_personality_inlines.h> +#include <spi/kernel_interface.h> +#endif + + +#include <iostream> + + +namespace LOFAR { +namespace RTCP { + +LocationInfo::LocationInfo() +{ +#if defined HAVE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, reinterpret_cast<int *>(&itsRank)); + MPI_Comm_size(MPI_COMM_WORLD, reinterpret_cast<int *>(&itsNrNodes)); +#else + itsRank = 0; + itsNrNodes = 1; +#endif + +#if defined HAVE_BGP || defined HAVE_BGL + getPersonality(); +#endif +} + + +#if defined HAVE_BGP + +void LocationInfo::getPersonality() +{ + if (Kernel_GetPersonality(&itsPersonality, sizeof itsPersonality) != 0) { + std::cerr << "could not get personality" << std::endl; + exit(1); + } + + if (itsRank == 0) + std::clog << "topology = (" + << BGP_Personality_xSize(&itsPersonality) << ',' + << BGP_Personality_ySize(&itsPersonality) << ',' + << BGP_Personality_zSize(&itsPersonality) << "), torus wraparound = (" + << (BGP_Personality_isTorusX(&itsPersonality) ? 'T' : 'F') << ',' + << (BGP_Personality_isTorusY(&itsPersonality) ? 'T' : 'F') << ',' + << (BGP_Personality_isTorusZ(&itsPersonality) ? 'T' : 'F') << ')' + << std::endl; + + itsPsetNumbers.resize(itsNrNodes); + itsPsetNumber = BGP_Personality_psetNum(&itsPersonality); + itsPsetNumbers[itsRank] = itsPsetNumber; + + for (unsigned core = 0; core < itsNrNodes; core ++) + MPI_Bcast(&itsPsetNumbers[core], 1, MPI_INT, core, MPI_COMM_WORLD); + + itsRankInPset = 0; + + for (unsigned rank = 0; rank < itsRank; rank ++) + if (itsPsetNumbers[rank] == itsPsetNumber) + ++ itsRankInPset; + + //usleep(100000 * itsRank); + + if (itsRank == 0) { + std::vector<std::vector<unsigned> > cores(BGP_Personality_numIONodes(&itsPersonality)); + + for (unsigned rank = 0; rank < itsPsetNumbers.size(); rank ++) + cores[itsPsetNumbers[rank]].push_back(rank); + + for (unsigned pset = 0; pset < BGP_Personality_numIONodes(&itsPersonality); pset ++) + std::clog << "pset " << pset << " contains cores " << cores[pset] << std::endl; + } +} + +#endif + +#if defined HAVE_BGL + +void LocationInfo::getPersonality() +{ + if (rts_get_personality(&itsPersonality, sizeof(itsPersonality)) != 0) { + std::cerr << "could not get personality" << std::endl; + exit(1); + } + + if (itsRank= 0) + std::clog << "topology = (" + << itsPersonality.getXsize() << ',' + << itsPersonality.getYsize() << ',' + << itsPersonality.getZsize() << "), torus wraparound = (" + << (itsPersonality.isTorusX() ? 'T' : 'F') << ',' + << (itsPersonality.isTorusY() ? 'T' : 'F') << ',' + << (itsPersonality.isTorusZ() ? 'T' : 'F') << ')' + << std::endl; + + itsPsetNumbers.resize(itsNrNodes); + itsPsetNumber = itsPersonality.getPsetNum(); + itsPsetNumbers[itsRank] = itsPsetNumber; + + for (unsigned core = 0; core < itsNrNodes; core ++) + MPI_Bcast(&itsPsetNumbers[core], 1, MPI_INT, core, MPI_COMM_WORLD); + + itsRankInPset = 0; + + for (unsigned rank = 0; rank < itsRank; rank ++) + if (itsPsetNumbers[rank] == itsPsetNumber) + ++ itsRankInPset; + + //usleep(100000 * itsRank); + + if (itsRank == 0) { + std::vector<std::vector<unsigned> > cores(itsPersonality.numIONodes()); + + for (unsigned rank = 0; rank < itsPsetNumbers.size(); rank ++) + cores[itsPsetNumbers[rank]].push_back(rank); + + for (unsigned pset = 0; pset < itsPersonality.numPsets(); pset ++) + std::clog << "LocationInfo :: pset " << pset << " contains cores " << cores[pset] << std::endl; + } +} + +#endif + +#if defined HAVE_BGL || defined HAVE_BGP + +unsigned LocationInfo::remapOnTree(unsigned pset, unsigned core) const +{ + core = CN_Mapping::mapCoreOnPset(core, pset); + + for (unsigned rank = 0;; rank ++) + if (itsPsetNumbers[rank] == pset && core -- == 0) + return rank; +} + +#endif + +} // namespace RTCP +} // namespace LOFAR diff --git a/RTCP/CNProc/src/LocationInfo.h b/RTCP/CNProc/src/LocationInfo.h new file mode 100644 index 0000000000000000000000000000000000000000..c1722aa77f2bba7ad4df8426eb14edfffb42d6bc --- /dev/null +++ b/RTCP/CNProc/src/LocationInfo.h @@ -0,0 +1,84 @@ +#ifndef LOFAR_CNPROC_ALLOCATOR_H +#define LOFAR_CNPROC_ALLOCATOR_H + +#include <vector> + +#if defined HAVE_BGP +// we do not need mpi.h here, but including it after bgp_personality.h leads +// to compilation errors +#define MPICH_IGNORE_CXX_SEEK +#include <mpi.h> + +#include <common/bgp_personality.h> +#endif + +#if defined HAVE_BGL +#include <bglpersonality.h> +#include <rts.h> +#endif + + +namespace LOFAR { +namespace RTCP { + +class LocationInfo +{ + public: + LocationInfo(); + +#if defined HAVE_BGP || defined HAVE_BGL + unsigned remapOnTree(unsigned pset, unsigned core) const; +#endif + + unsigned rank() const; + unsigned nrNodes() const; + unsigned psetNumber() const; + unsigned rankInPset() const; + + private: +#if defined HAVE_BGP || defined HAVE_BGL + void getPersonality(); +#endif + +#if defined HAVE_BGP + _BGP_Personality_t itsPersonality; + std::vector<unsigned> itsPsetNumbers; +#endif + +#if defined HAVE_BGL + CNPersonality itsPersonality; + std::vector<unsigned> itsPsetNumbers; +#endif + + unsigned itsPsetNumber, itsRankInPset; + unsigned itsRank, itsNrNodes; +}; + + +inline unsigned LocationInfo::rank() const +{ + return itsRank; +} + + +inline unsigned LocationInfo::nrNodes() const +{ + return itsNrNodes; +} + + +inline unsigned LocationInfo::psetNumber() const +{ + return itsPsetNumber; +} + + +inline unsigned LocationInfo::rankInPset() const +{ + return itsRankInPset; +} + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/CNProc/src/Makefile.am b/RTCP/CNProc/src/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..0f124e0c6ff4274b05ff5a0148e9d36673469765 --- /dev/null +++ b/RTCP/CNProc/src/Makefile.am @@ -0,0 +1,58 @@ +INSTHDRS =\ +Package__Version.h \ +BandPass.h \ +LocationInfo.h \ +CorrelatorAsm.h \ +FFT_Asm.h \ +FIR_Asm.h \ +InputData.h \ +FilteredData.h \ +TransposedData.h \ +FIR.h \ +PPF.h \ +Transpose.h \ +AsyncTranspose.h \ +Correlator.h \ +CN_Processing.h \ +FCNP_ClientStream.h \ +AsyncCommunication.h + +NOINSTHDRS = + +TCCHDRS = + +AM_CPPFLAGS = $(EXTRA_CPPFLAGS) + +pkginclude_HEADERS = $(INSTHDRS) $(TCCHDRS) + +noinst_HEADERS = $(NOINSTHDRS) + +DOCHDRS = $(INSTHDRS) $(NOINSTHDRS) + +bin_PROGRAMS = CN_Processing + +CCASFLAGS = $(patsubst -q%,,$(CPPFLAGS)) $(EXTRA_CPPFLAGS) + +CN_Processing_SOURCES = $(DOCHDRS) \ +Package__Version.cc \ +BandPass.cc \ +LocationInfo.cc \ +CorrelatorAsm.S \ +FIR_Asm.S \ +FFT_Asm.S \ +FIR.cc \ +Transpose.cc \ +AsyncTranspose.cc \ +PPF.cc \ +Correlator.cc \ +CN_Processing_main.cc \ +CN_Processing.cc \ +FCNP_ClientStream.cc \ +AsyncCommunication.cc + +configfilesdir=$(bindir) +configfiles_DATA = \ +CN_Processing.machinefile \ +CNProc.log_prop + +include $(top_srcdir)/Makefile.common diff --git a/RTCP/CNProc/src/PPF.cc b/RTCP/CNProc/src/PPF.cc new file mode 100644 index 0000000000000000000000000000000000000000..1b0887a64601c838d87e114520e8ecfc424339bf --- /dev/null +++ b/RTCP/CNProc/src/PPF.cc @@ -0,0 +1,389 @@ +//# Always #include <lofar_config.h> first! +#include <lofar_config.h> + +//# Includes +#include <PPF.h> +#include <FFT_Asm.h> +#include <FIR_Asm.h> + +#include <Interface/Align.h> +#include <Interface/AlignedStdAllocator.h> + +#include <Common/DataConvert.h> +#include <Common/Timer.h> + +#include <complex> +#include <cmath> +#include <stdexcept> + + +namespace LOFAR { +namespace RTCP { + +#if defined HAVE_MASS + +extern "C" +{ + // the return conventions for std::complex<double> and double _Complex differ! + double _Complex cosisin(double); +} + +#else + +inline static dcomplex cosisin(double x) +{ + return makedcomplex(cos(x), sin(x)); +} + +#endif + + +static NSTimer computeFlagsTimer("PPF::computeFlags()", true); +static NSTimer FIRtimer("PPF::FIRtimer", true); +static NSTimer FFTtimer("PPF::FFT", true); +static NSTimer PPFtimer("PPF::filter()", true); + + +template <typename SAMPLE_TYPE> PPF<SAMPLE_TYPE>::PPF(unsigned nrStations, unsigned nrChannels, unsigned nrSamplesPerIntegration, double channelBandwidth, bool delayCompensation) +: + itsNrStations(nrStations), + itsNrSamplesPerIntegration(nrSamplesPerIntegration), + itsNrChannels(nrChannels), + itsChannelBandwidth(channelBandwidth), + itsDelayCompensation(delayCompensation), + +#if defined PPF_C_IMPLEMENTATION + itsFIRs(boost::extents[nrStations][NR_POLARIZATIONS][nrChannels]), + itsFFTinData(boost::extents[NR_TAPS - 1 + nrSamplesPerIntegration][NR_POLARIZATIONS][nrChannels]) +#else + itsTmp(boost::extents[4][nrSamplesPerIntegration]), + itsFFTinData(boost::extents[nrSamplesPerIntegration][NR_POLARIZATIONS][nrChannels + 4]), + itsFFToutData(boost::extents[2][NR_POLARIZATIONS][nrChannels]) +#endif + +#if defined HAVE_BGL && !defined PPF_C_IMPLEMENTATION +, mutex(rts_allocate_mutex()) +#endif +{ + if (!powerOfTwo(nrChannels)) + throw std::runtime_error("nrChannels must be a power of 2"); + + if (nrChannels != 256) + throw std::runtime_error("nrChannels != 256 not yet implemented"); + + for (itsLogNrChannels = 0; 1U << itsLogNrChannels != itsNrChannels; itsLogNrChannels ++) + ; + + init_fft(); + initConstantTable(); +} + + +template <> void PPF<i4complex>::initConstantTable() +{ + extern fcomplex _FIR_fp_table[16][16]; + + static const float map[] = { + 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, + -7.5, -6.5, -5.5, -4.5, -3.5, -2.5, -1.5, -0.5, + }; + + for (unsigned i = 0; i < 16; i ++) + for (unsigned j = 0; j < 16; j ++) + _FIR_fp_table[i][j] = makefcomplex(map[j], map[i]); +} + + +template <> void PPF<i8complex>::initConstantTable() +{ + // This takes up pretty much space (.5 MB) + extern fcomplex _FIR_fp_table[256][256]; + + for (unsigned i = 0; i < 256; i ++) + for (unsigned j = 0; j < 256; j ++) + _FIR_fp_table[i][j] = makefcomplex((float) (signed char) i, (float) (signed char) j); +} + + +template <> void PPF<i16complex>::initConstantTable() +{ +#if 0 + extern float _FIR_fp_table[65536]; + + for (unsigned i = 0; i < 65536; i ++) + _FIR_fp_table[i] = (float) byteSwap((signed short) i); +#endif +} + + +template <typename SAMPLE_TYPE> PPF<SAMPLE_TYPE>::~PPF() +{ + destroy_fft(); +} + + +#if 0 && defined HAVE_BGL + +static void FFTtest() +{ + fftw_plan plan = fftw_create_plan(256, FFTW_FORWARD, FFTW_ESTIMATE); + + fcomplex in[256], fout[256], sout[256]; + + for (unsigned i = 0; i < 256; i ++) + in[i] = makefcomplex(2 * i, 2 * i + 1); + + fftw_one(plan, (fftw_complex *) in, (fftw_complex *) fout); + + _fft256(in, sout); + + for (unsigned i = 0; i < 256; i ++) { + fcomplex diff = fout[i] / sout[i]; + std::cout << i << " (" << real(fout[i]) << ',' << imag(fout[i]) << ") / (" << real(sout[i]) << ',' << imag(sout[i]) << ") = (" << real(diff) << ',' << imag(diff) << ")\n"; + } + + //std::exit(0); +} + +#endif + + +template <typename SAMPLE_TYPE> void PPF<SAMPLE_TYPE>::init_fft() +{ +#if defined HAVE_FFTW3 + std::vector<fftwf_complex, AlignedStdAllocator<fftwf_complex, 32> > cbuf1(itsNrChannels), cbuf2(itsNrChannels); + itsFFTWPlan = fftwf_plan_dft_1d(itsNrChannels, &cbuf1[0], &cbuf2[0], FFTW_FORWARD, FFTW_ESTIMATE); +#elif defined HAVE_FFTW2 + itsFFTWPlan = fftw_create_plan(itsNrChannels, FFTW_FORWARD, FFTW_ESTIMATE); +#endif + + //FFTtest(); +} + + +template <typename SAMPLE_TYPE> void PPF<SAMPLE_TYPE>::destroy_fft() +{ +#if defined HAVE_FFTW3 + fftwf_destroy_plan(itsFFTWPlan); +#elif defined HAVE_FFTW2 + fftw_destroy_plan(itsFFTWPlan); +#endif +} + + +template <typename SAMPLE_TYPE> void PPF<SAMPLE_TYPE>::computeFlags(unsigned stat, const TransposedData<SAMPLE_TYPE> *transposedData, FilteredData *filteredData) +{ + computeFlagsTimer.start(); + +// for (unsigned stat = 0; stat < itsNrStations; stat ++) { + filteredData->flags[stat].reset(); + SparseSet<unsigned> flags = transposedData->metaData[stat].getFlags(); + const SparseSet<unsigned>::Ranges &ranges = flags.getRanges(); + + for (SparseSet<unsigned>::const_iterator it = ranges.begin(); it != ranges.end(); it ++) { + unsigned begin = std::max(0, (signed) (it->begin >> itsLogNrChannels) - NR_TAPS + 1); + unsigned end = std::min(itsNrSamplesPerIntegration, ((it->end - 1) >> itsLogNrChannels) + 1); + + filteredData->flags[stat].include(begin, end); + } +// } + + computeFlagsTimer.stop(); +} + + +#if defined PPF_C_IMPLEMENTATION + +template <typename SAMPLE_TYPE> fcomplex PPF<SAMPLE_TYPE>::phaseShift(unsigned time, unsigned chan, double baseFrequency, double delayAtBegin, double delayAfterEnd) const +{ + double timeInterpolatedDelay = delayAtBegin + ((double) time / itsNrSamplesPerIntegration) * (delayAfterEnd - delayAtBegin); + double frequency = baseFrequency + chan * itsChannelBandwidth; + double phaseShift = timeInterpolatedDelay * frequency; + double phi = -2 * M_PI * phaseShift; + + return makefcomplex(std::cos(phi), std::sin(phi)); +} + +#else + +template <typename SAMPLE_TYPE> void PPF<SAMPLE_TYPE>::computePhaseShifts(struct phase_shift phaseShifts[/*itsNrSamplesPerIntegration*/], double delayAtBegin, double delayAfterEnd, double baseFrequency) const +{ + double phiBegin = -2 * M_PI * delayAtBegin; + double phiEnd = -2 * M_PI * delayAfterEnd; + double deltaPhi = (phiEnd - phiBegin) / itsNrSamplesPerIntegration; + dcomplex v = cosisin(phiBegin * baseFrequency); + dcomplex dv = cosisin(phiBegin * itsChannelBandwidth); + dcomplex vf = cosisin(deltaPhi * baseFrequency); + dcomplex dvf = cosisin(deltaPhi * itsChannelBandwidth); + + for (unsigned time = 0; time < itsNrSamplesPerIntegration; time ++) { + phaseShifts[time].v0 = v; v *= vf; + phaseShifts[time].dv = dv; dv *= dvf; + } +} + +#endif + + +template <typename SAMPLE_TYPE> void PPF<SAMPLE_TYPE>::filter(unsigned stat, double centerFrequency, const TransposedData<SAMPLE_TYPE> *transposedData, FilteredData *filteredData) +{ + PPFtimer.start(); + + double baseFrequency = centerFrequency - (itsNrChannels / 2) * itsChannelBandwidth; + +#if defined HAVE_BGL && !defined PPF_C_IMPLEMENTATION + // PPF puts a lot of pressure on the memory bus. Avoid that both cores + // run simultaneously, since it slows them both. + _cn_mutex_lock(mutex); +#endif + +// for (unsigned stat = 0; stat < itsNrStations; stat ++) { + unsigned alignmentShift = transposedData->metaData[stat].alignmentShift; + +#if 0 + std::clog << setprecision(15) << "stat " << stat << ", basefreq " << baseFrequency << ": delay from " << delays[stat].delayAtBegin << " to " << delays[stat].delayAfterEnd << " sec" << std::endl; +#endif + +#if defined PPF_C_IMPLEMENTATION + std::vector<fcomplex, AlignedStdAllocator<fcomplex, 32> > fftOutData(itsNrChannels); + + FIRtimer.start(); + for (unsigned pol = 0; pol < NR_POLARIZATIONS; pol ++) { + for (unsigned chan = 0; chan < itsNrChannels; chan ++) { + for (unsigned time = 0; time < NR_TAPS - 1 + itsNrSamplesPerIntegration; time ++) { + SAMPLE_TYPE tmp = transposedData->samples[stat][itsNrChannels * time + chan + alignmentShift][pol]; + +#if defined WORDS_BIGENDIAN + dataConvert(LittleEndian, &tmp, 1); +#endif + fcomplex sample = makefcomplex(real(tmp), imag(tmp)); + itsFFTinData[time][pol][chan] = itsFIRs[stat][pol][chan].processNextSample(sample, FIR::weights[chan]); + } + } + } + FIRtimer.stop(); + + FFTtimer.start(); + for (unsigned time = 0; time < itsNrSamplesPerIntegration; time ++) { + for (unsigned pol = 0; pol < NR_POLARIZATIONS; pol ++) { + if (filteredData->flags[stat].test(time)) { + for (unsigned chan = 0; chan < itsNrChannels; chan ++) { + filteredData->samples[chan][stat][time][pol] = makefcomplex(0, 0); + } + } else { +#if defined HAVE_FFTW3 + fftwf_execute_dft(itsFFTWPlan, + (fftwf_complex *) itsFFTinData[NR_TAPS - 1 + time][pol].origin(), + (fftwf_complex *) (void *) &fftOutData[0]); +#else + fftw_one(itsFFTWPlan, + (fftw_complex *) itsFFTinData[NR_TAPS - 1 + time][pol].origin(), + (fftw_complex *) (void *) &fftOutData[0]); +#endif + + for (unsigned chan = 0; chan < itsNrChannels; chan ++) { + if (itsDelayCompensation) { + fftOutData[chan] *= phaseShift(time, chan, baseFrequency, transposedData->metaData[stat].delayAtBegin, transposedData->metaData[stat].delayAfterEnd); + } + + filteredData->samples[chan][stat][time][pol] = fftOutData[chan]; + } + } + } + } + FFTtimer.stop(); +#else // assembly implementation + int transpose_stride = sizeof(fcomplex) * (NR_POLARIZATIONS * (itsNrSamplesPerIntegration | 2) * itsNrStations - (itsDelayCompensation ? 3 : 0)); + + for (unsigned chan = 0; chan < itsNrChannels; chan += 4) { + for (unsigned pol = 0; pol < NR_POLARIZATIONS; pol ++) { +#if defined __GNUC__ // work around bug ??? + for (register unsigned ch asm ("r28") = 0; ch < 4; ch ++) { +#else + for (unsigned ch = 0; ch < 4; ch ++) { +#endif + FIRtimer.start(); + _filter(itsNrChannels, + FIR::weights[chan + ch], + &transposedData->samples[stat][chan + ch + alignmentShift][pol], + itsTmp[ch].origin(), + itsNrSamplesPerIntegration / NR_TAPS); + FIRtimer.stop(); + } + + _transpose_4x8(&itsFFTinData[0][pol][chan], + itsTmp.origin(), + itsNrSamplesPerIntegration, + sizeof(fcomplex) * itsNrSamplesPerIntegration, + sizeof(fcomplex) * NR_POLARIZATIONS * (itsNrChannels + 4)); + } + } + + struct phase_shift phaseShifts[itsNrSamplesPerIntegration]; + + if (itsDelayCompensation) { + computePhaseShifts(phaseShifts, transposedData->metaData[stat].delayAtBegin, transposedData->metaData[stat].delayAfterEnd, baseFrequency); + } + + const SparseSet<unsigned>::Ranges &ranges = filteredData->flags[stat].getRanges(); + SparseSet<unsigned>::const_iterator it = ranges.begin(); + + for (unsigned time = 0; time < itsNrSamplesPerIntegration; time ++) { + bool good = it == ranges.end() || time < it->begin || (time == it->end && (++ it, true)); + + if (good) { + FFTtimer.start(); +#if 0 + _prefetch(itsFFTinData[time].origin(), + sizeof(fcomplex[NR_POLARIZATIONS][itsNrChannels]) / CACHE_LINE_SIZE, + CACHE_LINE_SIZE); +#endif + + for (unsigned pol = 0; pol < NR_POLARIZATIONS; pol ++) { +#if 0 + fftw_one(itsFFTWPlan, + (fftw_complex *) itsFFTinData[time][pol].origin(), + (fftw_complex *) itsFFToutData[time & 1][pol].origin()); +#else + _fft256(itsFFTinData[time][pol].origin(), + itsFFToutData[time & 1][pol].origin()); +#endif + } + FFTtimer.stop(); + } else { + _memzero(itsFFToutData[time & 1].origin(), + itsFFToutData[time & 1].num_elements() * sizeof(fcomplex)); + } + + if (time & 1) { + if (itsDelayCompensation) { + _phase_shift_and_transpose(&filteredData->samples[0][stat][time - 1][0], + itsFFToutData.origin(), + &phaseShifts[time - 1], + transpose_stride, + itsNrChannels); + } else { + _transpose_4x8(&filteredData->samples[0][stat][time - 1][0], + itsFFToutData.origin(), + itsNrChannels, + sizeof(fcomplex) * itsNrChannels, + transpose_stride); + } + } + } +#endif // PPF_C_IMPLEMENTATION +// } + +#if defined HAVE_BGL && !defined PPF_C_IMPLEMENTATION + _cn_mutex_unlock(mutex); +#endif + + PPFtimer.stop(); +} + +template class PPF<i4complex>; +template class PPF<i8complex>; +template class PPF<i16complex>; + +} // namespace RTCP +} // namespace LOFAR diff --git a/RTCP/CNProc/src/PPF.h b/RTCP/CNProc/src/PPF.h new file mode 100644 index 0000000000000000000000000000000000000000..d998f0a2ac32e47962946b0be87935ad3f3a94da --- /dev/null +++ b/RTCP/CNProc/src/PPF.h @@ -0,0 +1,79 @@ +#ifndef LOFAR_CNPROC_PPF_H +#define LOFAR_CNPROC_PPF_H + +#if 0 || !(defined HAVE_BGL || defined HAVE_BGP) +#define PPF_C_IMPLEMENTATION +#endif + + +#include <FIR.h> +#include <TransposedData.h> +#include <FilteredData.h> +#include <Interface/AlignedStdAllocator.h> + +#include <boost/multi_array.hpp> + +#if defined HAVE_BGL +#include <rts.h> +#endif + +#if defined HAVE_FFTW3 +#include <fftw3.h> +#elif defined HAVE_FFTW2 +#include <fftw.h> +#else +#error Should have FFTW3 or FFTW2 installed +#endif + + +namespace LOFAR { +namespace RTCP { + +template <typename SAMPLE_TYPE> class PPF +{ + public: + PPF(unsigned nrStations, unsigned nrChannels, unsigned nrSamplesPerIntegration, double channelBandwidth, bool delayCompensation); + ~PPF(); + + void computeFlags(unsigned stat, const TransposedData<SAMPLE_TYPE> *, FilteredData *); + void filter(unsigned stat, double centerFrequency, const TransposedData<SAMPLE_TYPE> *, FilteredData *); + + private: + void init_fft(), destroy_fft(); + void initConstantTable(); + +#if defined PPF_C_IMPLEMENTATION + fcomplex phaseShift(unsigned time, unsigned chan, double baseFrequency, double delayAtBegin, double delayAfterEnd) const; +#else + void computePhaseShifts(struct phase_shift phaseShifts[/*itsNrSamplesPerIntegration*/], double delayAtBegin, double delayAfterEnd, double baseFrequency) const; +#endif + + unsigned itsNrStations, itsNrSamplesPerIntegration; + unsigned itsNrChannels, itsLogNrChannels; + double itsChannelBandwidth; + bool itsDelayCompensation; + +#if defined PPF_C_IMPLEMENTATION + boost::multi_array<FIR, 3> itsFIRs; //[itsNrStations][NR_POLARIZATIONS][itsNrChannels] + boost::multi_array<fcomplex, 3> itsFFTinData; //[NR_TAPS - 1 + itsNrSamplesPerIntegration][NR_POLARIZATIONS][itsNrChannels] +#else + boost::multi_array<fcomplex, 2, AlignedStdAllocator<fcomplex, 32> > itsTmp; //[4][itsNrSamplesPerIntegration] + boost::multi_array<fcomplex, 3, AlignedStdAllocator<fcomplex, 32> > itsFFTinData; //[itsNrSamplesPerIntegration][NR_POLARIZATIONS][itsNrChannels + 4] + boost::multi_array<fcomplex, 3, AlignedStdAllocator<fcomplex, 32> > itsFFToutData; //[2][NR_POLARIZATIONS][itsNrChannels] +#endif + +#if defined HAVE_FFTW3 + fftwf_plan itsFFTWPlan; +#elif defined HAVE_FFTW2 + fftw_plan itsFFTWPlan; +#endif + +#if defined HAVE_BGL && !defined PPF_C_IMPLEMENTATION + CN_Mutex *mutex; +#endif +}; + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/CNProc/src/Transpose.cc b/RTCP/CNProc/src/Transpose.cc new file mode 100644 index 0000000000000000000000000000000000000000..fe6584aaf17e14927a7e956d35de10d32dedce4a --- /dev/null +++ b/RTCP/CNProc/src/Transpose.cc @@ -0,0 +1,254 @@ +//# Always #include <lofar_config.h> first! +#include <lofar_config.h> + +#include <Transpose.h> + +#include <Common/Timer.h> +#include <Interface/CN_Mapping.h> +#include <Interface/PrintVector.h> + +#include <cassert> +#include <map> +#include <set> + + +namespace LOFAR { +namespace RTCP { + +#if defined HAVE_MPI + +static NSTimer transposeTimer("transpose()", true); + + +template <typename SAMPLE_TYPE> std::vector<MPI_Comm> Transpose<SAMPLE_TYPE>::allTransposeGroups; + + +template <typename SAMPLE_TYPE> Transpose<SAMPLE_TYPE>::Transpose(bool isTransposeInput, bool isTransposeOutput, unsigned myCore) +: + itsIsTransposeInput(isTransposeInput), + itsIsTransposeOutput(isTransposeOutput), + itsTransposeGroup(allTransposeGroups[myCore]) +{ +} + + +template <typename SAMPLE_TYPE> Transpose<SAMPLE_TYPE>::~Transpose() +{ +} + + +#if defined HAVE_BGL || defined HAVE_BGP + +template <typename SAMPLE_TYPE> unsigned Transpose<SAMPLE_TYPE>::remapOnTree(unsigned pset, unsigned core, const std::vector<unsigned> &psetNumbers) +{ + core = CN_Mapping::mapCoreOnPset(core, pset); + + for (unsigned rank = 0;; rank ++) + if (psetNumbers[rank] == pset && core -- == 0) + return rank; +} + + +template <typename SAMPLE_TYPE> void Transpose<SAMPLE_TYPE>::getMPIgroups(unsigned nrCoresPerPset, const LocationInfo &locationInfo, const std::vector<unsigned> &inputPsets, const std::vector<unsigned> &outputPsets) +{ + allTransposeGroups.resize(nrCoresPerPset); + + MPI_Group all, group; + + if (MPI_Comm_group(MPI_COMM_WORLD, &all) != MPI_SUCCESS) { + std::cerr << "MPI_Comm_group() failed" << std::endl; + exit(1); + } + + std::set<unsigned> psets; // ordered list of all psets + std::set_union(inputPsets.begin(), inputPsets.end(), + outputPsets.begin(), outputPsets.end(), + std::insert_iterator<std::set<unsigned> >(psets, psets.begin())); + + for (unsigned core = 0; core < nrCoresPerPset; core ++) { + std::vector<int> ranks; + + for (std::set<unsigned>::const_iterator pset = psets.begin(); pset != psets.end(); pset ++) { + ranks.push_back(locationInfo.remapOnTree(*pset, core)); + } + + if (locationInfo.rank() == 0) { + std::clog << "Transpose :: group " << core << " contains cores " << ranks << std::endl; + } + + if (MPI_Group_incl(all, ranks.size(), &ranks[0], &group) != MPI_SUCCESS) { + std::cerr << "MPI_Group_incl() failed" << std::endl; + exit(1); + } + + if (MPI_Comm_create(MPI_COMM_WORLD, group, &allTransposeGroups[core]) != MPI_SUCCESS) { + std::cerr << "MPI_Comm_create() failed" << std::endl; + exit(1); + } + + if (MPI_Group_free(&group) != MPI_SUCCESS) { + std::cerr << "MPI_Group_free() failed" << std::endl; + exit(1); + } + } +} + +#endif + + +template <typename SAMPLE_TYPE> void Transpose<SAMPLE_TYPE>::setupTransposeParams(const LocationInfo &locationInfo, const std::vector<unsigned> &inputPsets, const std::vector<unsigned> &outputPsets, InputData<SAMPLE_TYPE> *inputData, TransposedData<SAMPLE_TYPE> *transposedData) +{ + std::set<unsigned> psets; // ordered list of all psets + std::set_union(inputPsets.begin(), inputPsets.end(), + outputPsets.begin(), outputPsets.end(), + std::insert_iterator<std::set<unsigned> >(psets, psets.begin())); + + unsigned nrPsetsUsed = psets.size(); + std::map<unsigned, unsigned> psetToGroupIndex; + unsigned groupIndex = 0; + for (std::set<unsigned>::const_iterator pset = psets.begin(); pset != psets.end(); pset ++, groupIndex ++) + psetToGroupIndex[*pset] = groupIndex; + + if (locationInfo.rank() == 0) + for (std::map<unsigned, unsigned>::const_iterator it = psetToGroupIndex.begin(); it != psetToGroupIndex.end(); it ++) + std::clog << "pset " << it->first << " maps to group index " << it->second << std::endl; + + itsTransposeParams.send.counts.resize(nrPsetsUsed, 0); + itsTransposeParams.send.displacements.resize(nrPsetsUsed); + itsTransposeParams.receive.counts.resize(nrPsetsUsed, 0); + itsTransposeParams.receive.displacements.resize(nrPsetsUsed); + itsTransposeParams.receive.psetIndex.resize(nrPsetsUsed); + + itsTransposeMetaParams.send.counts.resize(nrPsetsUsed, 0); + itsTransposeMetaParams.send.displacements.resize(nrPsetsUsed); + itsTransposeMetaParams.receive.counts.resize(nrPsetsUsed, 0); + itsTransposeMetaParams.receive.displacements.resize(nrPsetsUsed); + + if (itsIsTransposeInput) + for (unsigned psetIndex = 0; psetIndex < outputPsets.size(); psetIndex ++) { + unsigned pset = outputPsets[psetIndex]; + unsigned index = psetToGroupIndex[pset]; + + const boost::detail::multi_array::sub_array<SAMPLE_TYPE, 2> &slice = inputData->samples[psetIndex]; + + itsTransposeParams.send.counts[index] = slice.num_elements() * sizeof(SAMPLE_TYPE); + itsTransposeParams.send.displacements[index] = reinterpret_cast<const char *>(slice.origin()) - reinterpret_cast<const char *>(inputData->samples.origin()); + + itsTransposeMetaParams.send.counts[index] = sizeof(SubbandMetaData); + itsTransposeMetaParams.send.displacements[index] = reinterpret_cast<const char *>(&inputData->metaData[psetIndex]) - reinterpret_cast<const char *>(&inputData->metaData[0]); + } + + if (itsIsTransposeOutput) + for (unsigned psetIndex = 0; psetIndex < inputPsets.size(); psetIndex ++) { + unsigned pset = inputPsets[psetIndex]; + unsigned index = psetToGroupIndex[pset]; + const boost::detail::multi_array::sub_array<SAMPLE_TYPE, 2> &slice = transposedData->samples[psetIndex]; + + itsTransposeParams.receive.counts[index] = slice.num_elements() * sizeof(SAMPLE_TYPE); + itsTransposeParams.receive.displacements[index] = reinterpret_cast<const char *>(slice.origin()) - reinterpret_cast<const char *>(transposedData->samples.origin()); + itsTransposeParams.receive.psetIndex[index] = psetIndex; + + itsTransposeMetaParams.receive.counts[index] = sizeof(SubbandMetaData); + itsTransposeMetaParams.receive.displacements[index] = reinterpret_cast<const char *>(&transposedData->metaData[psetIndex]) - reinterpret_cast<const char *>(&transposedData->metaData[0]); + } + +#if 0 +if (itsIsTransposeInput) std::clog << "send_base: " << inputData->samples.origin() << std::endl; +std::clog << "send_counts:"; +for (unsigned pset = 0; pset < nrPsetsUsed; pset ++) +std::clog << ' ' << itsTransposeParams.send.counts[pset]; +std::clog << std::endl; +std::clog << "send_displacements:"; +for (unsigned pset = 0; pset < nrPsetsUsed; pset ++) +std::clog << ' ' << itsTransposeParams.send.displacements[pset]; +std::clog << std::endl; +if (itsIsTransposeOutput) std::clog << "receive_base: " << transposedData->samples.origin() << std::endl; +std::clog << "receive_counts:"; +for (unsigned pset = 0; pset < nrPsetsUsed; pset ++) +std::clog << ' ' << itsTransposeParams.receive.counts[pset]; +std::clog << std::endl; +std::clog << "receive_displacements:"; +for (unsigned pset = 0; pset < nrPsetsUsed; pset ++) +std::clog << ' ' << itsTransposeParams.receive.displacements[pset]; +std::clog << std::endl; +#endif + +#if 0 +std::clog << "meta send_counts:"; +for (unsigned pset = 0; pset < nrPsetsUsed; pset ++) +std::clog << ' ' << itsTransposeMetaParams.send.counts[pset]; +std::clog << std::endl; +std::clog << "meta send_displacements:"; +for (unsigned pset = 0; pset < nrPsetsUsed; pset ++) +std::clog << ' ' << itsTransposeMetaParams.send.displacements[pset]; +std::clog << std::endl; +std::clog << "meta receive_counts:"; +for (unsigned pset = 0; pset < nrPsetsUsed; pset ++) +std::clog << ' ' << itsTransposeMetaParams.receive.counts[pset]; +std::clog << std::endl; +std::clog << "meta receive_displacements:"; +for (unsigned pset = 0; pset < nrPsetsUsed; pset ++) +std::clog << ' ' << itsTransposeMetaParams.receive.displacements[pset]; +std::clog << std::endl; +#endif +} + + +template <typename SAMPLE_TYPE> void Transpose<SAMPLE_TYPE>::transpose(const InputData<SAMPLE_TYPE> *inputData, TransposedData<SAMPLE_TYPE> *transposedData) +{ + if (MPI_Alltoallv( + itsIsTransposeInput ? (void *) inputData->samples.origin() : 0, + &itsTransposeParams.send.counts[0], + &itsTransposeParams.send.displacements[0], + MPI_BYTE, + itsIsTransposeOutput ? transposedData->samples.origin() : 0, + &itsTransposeParams.receive.counts[0], + &itsTransposeParams.receive.displacements[0], + MPI_BYTE, + itsTransposeGroup) != MPI_SUCCESS) + { + std::cerr << "MPI_Alltoallv() failed" << std::endl; + exit(1); + } +} + + +template <typename SAMPLE_TYPE> void Transpose<SAMPLE_TYPE>::transposeMetaData(const InputData<SAMPLE_TYPE> *inputData, TransposedData<SAMPLE_TYPE> *transposedData) +{ +#if 0 + // no need to marshall itsInputMetaData; it has not been unmarshalled + // after reading from ION +#endif + + if (MPI_Alltoallv( + itsIsTransposeInput ? (void *) &inputData->metaData[0] : 0, + &itsTransposeMetaParams.send.counts[0], + &itsTransposeMetaParams.send.displacements[0], + MPI_BYTE, + itsIsTransposeOutput ? &transposedData->metaData[0] : 0, + &itsTransposeMetaParams.receive.counts[0], + &itsTransposeMetaParams.receive.displacements[0], + MPI_BYTE, + itsTransposeGroup) != MPI_SUCCESS) + { + std::cerr << "MPI_Alltoallv() failed" << std::endl; + exit(1); + } + +#if 0 + if (itsIsTransposeOutput) + for (unsigned station = 0; station < transposedData->metaData.size(); station ++) + transposedData->metaData[station].unmarshall(); +#endif +} + +template class Transpose<i4complex>; +template class Transpose<i8complex>; +template class Transpose<i16complex>; + +#endif // HAVE_MPI + + + +} // namespace RTCP +} // namespace LOFAR diff --git a/RTCP/CNProc/src/Transpose.h b/RTCP/CNProc/src/Transpose.h new file mode 100644 index 0000000000000000000000000000000000000000..b38cf78715eed47f2738b05c7fa4384ce07cfc4b --- /dev/null +++ b/RTCP/CNProc/src/Transpose.h @@ -0,0 +1,64 @@ +#ifndef LOFAR_CNPROC_TRANSPOSE_H +#define LOFAR_CNPROC_TRANSPOSE_H + +#include <AsyncCommunication.h> +#include <InputData.h> +#include <LocationInfo.h> +#include <TransposedData.h> +#include <Interface/SubbandMetaData.h> + +#if defined HAVE_MPI +#define MPICH_IGNORE_CXX_SEEK +#include <mpi.h> +#endif + +#if defined HAVE_BGL +#include <bglpersonality.h> +#endif + +#include <vector> + + +namespace LOFAR { +namespace RTCP { + +#if defined HAVE_MPI + +template <typename SAMPLE_TYPE> class Transpose +{ + public: + Transpose(bool isTransposeInput, bool isTransposeOutput, unsigned myCore); + ~Transpose(); + + void setupTransposeParams(const LocationInfo &, const std::vector<unsigned> &inputPsets, const std::vector<unsigned> &outputPsets, InputData<SAMPLE_TYPE> *, TransposedData<SAMPLE_TYPE> *); + +#if defined HAVE_BGL || HAVE_BGP + static void getMPIgroups(unsigned nrCoresPerPset, const LocationInfo &, const std::vector<unsigned> &inputPsets, const std::vector<unsigned> &outputPsets); + static unsigned remapOnTree(unsigned pset, unsigned core, const std::vector<unsigned> &psetNumbers); +#endif + + void transpose(const InputData<SAMPLE_TYPE> *, TransposedData<SAMPLE_TYPE> *); + void transposeMetaData(const InputData<SAMPLE_TYPE> *, TransposedData<SAMPLE_TYPE> *); + + private: + bool itsIsTransposeInput, itsIsTransposeOutput; + + // All cores at the same position within a pset form a group. The + // transpose is done between members of this group. + struct { + struct { + std::vector<int> counts, displacements, psetIndex; + } send, receive; + } itsTransposeParams, itsTransposeMetaParams; + + MPI_Comm itsTransposeGroup; + + static std::vector<MPI_Comm> allTransposeGroups; +}; + +#endif // defined HAVE_MPI + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/CNProc/src/TransposedData.h b/RTCP/CNProc/src/TransposedData.h new file mode 100644 index 0000000000000000000000000000000000000000..611b2cce54078eeff9727895bd32aec8450a8e70 --- /dev/null +++ b/RTCP/CNProc/src/TransposedData.h @@ -0,0 +1,77 @@ +#ifndef LOFAR_CNPROC_TRANSPOSED_DATA_H +#define LOFAR_CNPROC_TRANSPOSED_DATA_H + +#include <Common/lofar_complex.h> +#include <Interface/Allocator.h> +#include <Interface/Config.h> +#include <Interface/SubbandMetaData.h> + +#include <boost/multi_array.hpp> +#include <vector> + + +namespace LOFAR { +namespace RTCP { + +template <typename SAMPLE_TYPE> class TransposedData +{ + public: + TransposedData(const Arena &, unsigned nrStations, unsigned nrSamplesToCNProc); + ~TransposedData(); + + static size_t requiredSize(unsigned nrStations, unsigned nrSamplesToCNProc); + + private: + SparseSetAllocator allocator; + + public: + boost::multi_array_ref<SAMPLE_TYPE, 3> samples; //[itsNrStations][itsPS->nrSamplesToCNProc()][NR_POLARIZATIONS] + std::vector<SubbandMetaData> metaData; //[itsNrStations] + +#if 0 + SparseSet<unsigned> *flags; //[itsNrStations] + + typedef struct { + float delayAtBegin, delayAfterEnd; + } DelayIntervalType; + + DelayIntervalType *delays; // [itsNrStations] + unsigned *alignmentShifts; // [itsNrStations] +#endif +}; + + +template <typename SAMPLE_TYPE> inline TransposedData<SAMPLE_TYPE>::TransposedData(const Arena &arena, unsigned nrStations, unsigned nrSamplesToCNProc) +: + allocator(arena), + samples(static_cast<SAMPLE_TYPE *>(allocator.allocate(requiredSize(nrStations, nrSamplesToCNProc), 32)), boost::extents[nrStations][nrSamplesToCNProc][NR_POLARIZATIONS]), + metaData(nrStations) +#if 0 + flags(new SparseSet<unsigned>[nrStations]), + delays(new DelayIntervalType[nrStations]), + alignmentShifts(new unsigned[nrStations]) +#endif +{ +} + + +template <typename SAMPLE_TYPE> inline TransposedData<SAMPLE_TYPE>::~TransposedData() +{ + allocator.deallocate(samples.origin()); +#if 0 + delete [] flags; + delete [] alignmentShifts; + delete [] delays; +#endif +} + + +template <typename SAMPLE_TYPE> inline size_t TransposedData<SAMPLE_TYPE>::requiredSize(unsigned nrStations, unsigned nrSamplesToCNProc) +{ + return sizeof(SAMPLE_TYPE) * nrStations * nrSamplesToCNProc * NR_POLARIZATIONS; +} + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/CNProc/test/Makefile.am b/RTCP/CNProc/test/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..67426f5ee4927008af2686d92925547db9c6076c --- /dev/null +++ b/RTCP/CNProc/test/Makefile.am @@ -0,0 +1,30 @@ + +check_PROGRAMS = tCN_Processing + + +AM_CPPFLAGS = $(EXTRA_CPPFLAGS) + +AM_CXXFLAGS = \ + -I../../../src + +tCN_Processing_SOURCES = \ +tCN_Processing.cc \ +../src/BandPass.cc \ +../src/CN_Processing.cc \ +../src/Correlator.cc \ +../src/CorrelatorAsm.S \ +../src/FFT_Asm.S \ +../src/FIR.cc \ +../src/FIR_Asm.S \ +../src/LocationInfo.cc \ +../src/PPF.cc \ +../src/Transpose.cc + +CCASFLAGS = $(patsubst -q%,,$(CPPFLAGS)) $(EXTRA_CPPFLAGS) + + +TESTS = \ +tCN_Processing.sh + + +include $(top_srcdir)/Makefile.common diff --git a/RTCP/CNProc/test/tCN_Processing.cc b/RTCP/CNProc/test/tCN_Processing.cc new file mode 100644 index 0000000000000000000000000000000000000000..4773e4f3878ec1d67031f7ab4f721bd4bf935fa7 --- /dev/null +++ b/RTCP/CNProc/test/tCN_Processing.cc @@ -0,0 +1,273 @@ +//# tWH_CN_Processing.cc: stand-alone test program for WH_CN_Processing +//# +//# Copyright (C) 2006 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id$ + +//# Always #include <lofar_config.h> first! +#include <lofar_config.h> + +#include <Interface/CN_Configuration.h> +#include <Common/DataConvert.h> +#include <Common/Exception.h> +#include <Common/Timer.h> +#include <PPF.h> +#include <Correlator.h> + +#if defined HAVE_MPI +#define MPICH_IGNORE_CXX_SEEK +#include <mpi.h> +#endif + +#if defined HAVE_BGL +#include <bglpersonality.h> +#include <rts.h> +#endif + +#include <cmath> +#include <cstring> +#include <exception> + + +using namespace LOFAR; +using namespace LOFAR::RTCP; + + +template <typename T> void toComplex(double phi, T &z); + +template <> inline void toComplex<i4complex>(double phi, i4complex &z) +{ + double s, c; + + sincos(phi, &s, &c); + z = makei4complex(8 * c, 8 * s); +} + +template <> inline void toComplex<i8complex>(double phi, i8complex &z) +{ + double s, c; + + sincos(phi, &s, &c); + z = makei8complex((int) rint(127 * c), (int) rint(127 * s)); +} + +template <> inline void toComplex<i16complex>(double phi, i16complex &z) +{ + double s, c; + + sincos(phi, &s, &c); + z = makei16complex((int) rint(32767 * c), (int) rint(32767 * s)); +} + + +template <typename SAMPLE_TYPE> void setSubbandTestPattern(TransposedData<SAMPLE_TYPE> *transposedData, unsigned nrStations, double signalFrequency, double sampleRate) +{ + // Simulate a monochrome complex signal into the PPF, with station 1 at a + // distance of .25 labda to introduce a delay. Also, a few samples can be + // flagged. + + std::clog << "setSubbandTestPattern() ... "; + + static NSTimer timer("setTestPattern", true); + timer.start(); + + const double distance = .25; // labda + const double phaseShift = 2 * M_PI * distance; + + for (unsigned stat = 0; stat < nrStations; stat ++) { + transposedData->metaData[stat].delayAtBegin = 0; + transposedData->metaData[stat].delayAfterEnd = 0; + transposedData->metaData[stat].alignmentShift = 0; + transposedData->metaData[stat].setFlags(SparseSet<unsigned>()); + } + + for (unsigned time = 0; time < transposedData->samples[0].size(); time ++) { + double phi = 2 * M_PI * signalFrequency * time / sampleRate; + SAMPLE_TYPE sample; + toComplex(phi, sample); + + for (unsigned stat = 0; stat < nrStations; stat ++) { + transposedData->samples[stat][time][0] = sample; + transposedData->samples[stat][time][1] = sample; + } + + if (NR_POLARIZATIONS >= 2 && nrStations > 2) { + toComplex(phi + phaseShift, transposedData->samples[1][time][1]); + transposedData->metaData[1].delayAtBegin = distance / signalFrequency; + transposedData->metaData[1].delayAfterEnd = distance / signalFrequency; + } + } + +#if 1 + if (transposedData->samples[0].size() > 17000 && nrStations >= 6) { + transposedData->metaData[4].setFlags(SparseSet<unsigned>().include(14000)); + transposedData->metaData[5].setFlags(SparseSet<unsigned>().include(17000)); + } +#endif + + std::clog << "done." << std::endl;; + +#if 1 && defined WORDS_BIGENDIAN + std::clog << "swap bytes" << std::endl; + dataConvert(LittleEndian, transposedData->samples.data(), transposedData->samples.num_elements()); +#endif + + timer.stop(); +} + + +void checkCorrelatorTestPattern(const CorrelatedData *correlatedData, unsigned nrStations, unsigned nrChannels) +{ + const boost::multi_array_ref<fcomplex, 4> &visibilities = correlatedData->visibilities; + + static const unsigned channels[] = { 1, 201, 255 }; + + for (unsigned stat1 = 0; stat1 < std::min(nrStations, 8U); stat1 ++) { + for (unsigned stat2 = stat1; stat2 < std::min(nrStations, 8U); stat2 ++) { + int bl = Correlator::baseline(stat1, stat2); + + std::cout << "S(" << stat1 << ") * ~S(" << stat2 << ") :\n"; + + for (unsigned pol1 = 0; pol1 < NR_POLARIZATIONS; pol1 ++) { + for (unsigned pol2 = 0; pol2 < NR_POLARIZATIONS; pol2 ++) { + std::cout << " " << (char) ('x' + pol1) << (char) ('x' + pol2) << ':'; + + for (size_t chidx = 0; chidx < sizeof(channels) / sizeof(int); chidx ++) { + unsigned ch = channels[chidx]; + + if (ch < nrChannels) { + std::cout << ' ' << visibilities[bl][ch][pol1][pol2] << '/' << correlatedData->nrValidSamples[bl][ch]; + } + } + + std::cout << '\n'; + } + } + } + } + + std::cout << "newgraph newcurve linetype solid marktype none pts\n"; + float max = 0.0; + + for (unsigned ch = 1; ch < nrChannels; ch ++) + if (abs(visibilities[0][ch][1][1]) > max) + max = abs(visibilities[0][ch][1][1]); + + std::clog << "max = " << max << std::endl; + + for (unsigned ch = 1; ch < nrChannels; ch ++) + std::cout << ch << ' ' << (10 * std::log10(abs(visibilities[0][ch][1][1]) / max)) << '\n'; +} + + +template <typename SAMPLE_TYPE> void doWork() +{ +#if defined HAVE_BGL + // only test on the one or two cores of the first compute node + + struct CNPersonality personality; + + if (rts_get_personality(&personality, sizeof personality) != 0) { + std::cerr << "Could not get personality" << std::endl; + exit(1); + } + + if (personality.getXcoord() == 0 && personality.getYcoord() == 0 && personality.getZcoord() == 0) +#endif + { + unsigned nrStations = 77; + unsigned nrChannels = 256; + unsigned nrSamplesPerIntegration = 768; + double sampleRate = 195312.5; + double refFreq = 384 * sampleRate; + double signalFrequency = refFreq + 73 * sampleRate / nrChannels; // channel 73 + unsigned nrSamplesToCNProc = nrChannels * (nrSamplesPerIntegration + NR_TAPS - 1) + 32 / sizeof(SAMPLE_TYPE[NR_POLARIZATIONS]); + unsigned nrBaselines = nrStations * (nrStations + 1) / 2; + + const char *env; + + if ((env = getenv("SIGNAL_FREQUENCY")) != 0) { + signalFrequency = atof(env); + } + + std::clog << "base frequency = " << refFreq << std::endl; + std::clog << "signal frequency = " << signalFrequency << std::endl; + + size_t transposedDataSize = TransposedData<SAMPLE_TYPE>::requiredSize(nrStations, nrSamplesToCNProc); + size_t filteredDataSize = FilteredData::requiredSize(nrStations, nrChannels, nrSamplesPerIntegration); + size_t correlatedDataSize = CorrelatedData::requiredSize(nrBaselines, nrChannels); + + std::clog << transposedDataSize << " " << filteredDataSize << " " << correlatedDataSize << std::endl; + MallocedArena arena0(filteredDataSize, 32); + MallocedArena arena1(std::max(transposedDataSize, correlatedDataSize), 32); + + TransposedData<SAMPLE_TYPE> transposedData(arena1, nrStations, nrSamplesToCNProc); + FilteredData filteredData(arena0, nrStations, nrChannels, nrSamplesPerIntegration); + CorrelatedData correlatedData(arena1, nrBaselines, nrChannels); + + PPF<SAMPLE_TYPE> ppf(nrStations, nrChannels, nrSamplesPerIntegration, sampleRate / nrChannels, true); + Correlator correlator(nrStations, nrChannels, nrSamplesPerIntegration, true); + + setSubbandTestPattern(&transposedData, nrStations, signalFrequency, sampleRate); + ppf.computeFlags(&transposedData, &filteredData); + ppf.filter(refFreq, &transposedData, &filteredData); + + correlator.computeFlagsAndCentroids(&filteredData, &correlatedData); + correlator.correlate(&filteredData, &correlatedData); + + checkCorrelatorTestPattern(&correlatedData, nrStations, nrChannels); + } +} + + +int main (int argc, char **argv) +{ + int retval = 0; + +#if defined HAVE_BGL + // make std::clog line buffered + static char buffer[4096]; + setvbuf(stderr, buffer, _IOLBF, sizeof buffer); +#endif + +#if defined HAVE_MPI + MPI_Init(&argc, &argv); +#else + argc = argc; argv = argv; // Keep compiler happy ;-) +#endif + + try { + doWork<i16complex>(); + } catch (Exception& e) { + std::cerr << "Caught exception: " << e.what() << std::endl; + retval = 1; + } catch (std::exception& e) { + std::cerr << "Caught exception: " << e.what() << std::endl; + retval = 1; + } catch (...) { + std::cerr << "Caught exception " << std::endl; + retval = 1; + } + +#if defined HAVE_MPI + MPI_Finalize(); +#endif + + return retval; +} diff --git a/RTCP/CNProc/test/tCN_Processing.sh b/RTCP/CNProc/test/tCN_Processing.sh new file mode 100755 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/RTCP/CNProc/test/test.parset b/RTCP/CNProc/test/test.parset new file mode 100644 index 0000000000000000000000000000000000000000..37055a03139d8a8bc3ecb17768a3b825ac176ecb --- /dev/null +++ b/RTCP/CNProc/test/test.parset @@ -0,0 +1,13 @@ +Observation.subbandList = [384] +Observation.nyquistZone = 1 +Observation.sampleClock = 160 +Observation.channelsPerSubband = 256 +OLAP.CNProc.nrPPFTaps = 16 +Observation.nrPolarisations = 2 +OLAP.CNProc.inputPsets = [] +OLAP.CNProc.outputPsets = [0] +OLAP.storageStationNames = ["S0", "S1", "S2", "S3", "S4", "S5"] +OLAP.IONProc.useGather = T +OLAP.subbandsPerPset = 1 +OLAP.CNProc.integrationSteps = 608 +OLAP.delayCompensation = T diff --git a/RTCP/CNProc/test/transpose.cc b/RTCP/CNProc/test/transpose.cc new file mode 100644 index 0000000000000000000000000000000000000000..d43e78340ed6dbe8a6b73caa0527749619d2b211 --- /dev/null +++ b/RTCP/CNProc/test/transpose.cc @@ -0,0 +1,527 @@ +//# transpose.cc: test transpose on BG/L torus +//# +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id$ + +#include <lofar_config.h> + +#if defined HAVE_MPI +#include <APS/ParameterSet.h> +#include <Common/lofar_complex.h> +#include <Common/Timer.h> +#include <PLC/ACCmain.h> +#include <tinyCEP/ApplicationHolderController.h> +#include <tinyCEP/TinyApplicationHolder.h> +#include <tinyCEP/WorkHolder.h> +#include <Transport/DataHolder.h> +#include <Transport/TH_MPI.h> + +#if defined HAVE_BGL +#include <bglpersonality.h> +#include <rts.h> +#endif + +#include <exception> +#include <iostream> +#include <string> +#include <vector> + +#include <boost/multi_array.hpp> + + +#define SIMULATE_PSETS + +#define NR_POLARIZATIONS 2 + + +namespace LOFAR { +namespace RTCP { + + +class DH_RSP : public DataHolder +{ + public: + typedef i4complex SampleType; + + DH_RSP(const string &name, const ACC::APS::ParameterSet &ps); + + DataHolder *clone() const; + virtual void init(); + virtual void fillDataPointers(); + + private: + SampleType *itsSamples; + unsigned itsNrSamples; +}; + + +DH_RSP::DH_RSP(const string &name, const ACC::APS::ParameterSet &ps) +: + DataHolder(name, "DH_RSP") +{ + itsNrSamples = ps.getUint32("Observation.NSubbandSamples") * ps.getUint32("Observation.NPolarisations"); +} + + +DataHolder *DH_RSP::clone() const +{ + return new DH_RSP(*this); +} + + +void DH_RSP::init() +{ + addField("Samples", BlobField<uint8>(1, itsNrSamples * sizeof(SampleType)), 32); + createDataBlock(); // calls fillDataPointers +} + + +void DH_RSP::fillDataPointers() +{ + itsSamples = (SampleType *) getData<uint8>("Samples"); +} + + +class Position +{ + public: + Position(unsigned x, unsigned y, unsigned z, unsigned t); + Position(unsigned rank); + +#if defined SIMULATE_PSETS + static const unsigned psetSize = 16; +#else + static const unsigned psetSize = 1; +#endif + + unsigned rank() const; + unsigned psetNumber() const; + Position psetBase() const; + static Position psetBase(unsigned psetNumber); + Position positionInPset(unsigned index) const; + unsigned indexInPset() const; + + unsigned x, y, z, t; + static unsigned xSize, ySize, zSize; +}; + + +unsigned Position::xSize, Position::ySize, Position::zSize; + + +Position::Position(unsigned x, unsigned y, unsigned z, unsigned t) +: + x(x), y(y), z(z), t(t) +{ +} + + +Position::Position(unsigned rank) +{ +#if defined SIMULATE_PSETS + if (rts_coordinatesForRank(rank, &x, &y, &z, &t) != 0) { + cerr << "error calling rts_coordinatesForRank" << endl; + exit(1); + } +#else + x = rank; + y = z = t = 0; +#endif +} + + +unsigned Position::rank() const +{ +#if defined SIMULATE_PSETS + unsigned rank, numProcs; + + if (rts_rankForCoordinates(x, y, z, t, &rank, &numProcs) != 0) { + cerr << "error calling rts_rankForCoordinates" << endl; + exit(1); + } + + return rank; +#else + return x; +#endif +} + + +unsigned Position::psetNumber() const +{ +#if defined SIMULATE_PSETS + return (x / 2) + (xSize / 2) * ((y / 2) + (ySize / 2) * (z / 2)); +#else + return x; +#endif +} + + +Position Position::psetBase() const +{ +#if defined SIMULATE_PSETS + return Position(x & ~1, y & ~1, z & ~1, 0); +#else + return *this; +#endif +} + + +Position Position::psetBase(unsigned psetNumber) +{ +#if defined SIMULATE_PSETS + return Position(2 * (psetNumber % (xSize / 2)), + 2 * (psetNumber / (xSize / 2) % (ySize / 2)), + 2 * (psetNumber / (xSize / 2) / (ySize / 2)), + 0); +#else + return Position(psetNumber); +#endif +} + + +Position Position::positionInPset(unsigned index) const +{ +#if defined SIMULATE_PSETS + Position base = psetBase(); + return Position(base.x + index % 2, base.y + index / 2 % 2, base.z + index / 4 % 2, base.t + index / 8 % 2); +#else + return *this; +#endif +} + + +unsigned Position::indexInPset() const +{ +#if defined SIMULATE_PSETS + return (x % 2) + 2 * (y % 2) + 4 * (z % 2) + 8 * (t % 2); +#else + return 0; +#endif +} + + + + +class WH_Transpose : public WorkHolder +{ + public: + typedef i4complex SampleType; + + WH_Transpose(const string &name, const ACC::APS::ParameterSet &ps, unsigned rank, MPI_Comm comm); + + virtual void preprocess(); + virtual void process(); + virtual void postprocess(); + + private: + virtual WorkHolder *make(const string &name); + bool isInput() const, isOutput() const; + + void allToAll(); + + const ACC::APS::ParameterSet &itsParamSet; + unsigned itsCoreNumber, itsPsetNumber, itsPsetIndex; + unsigned itsNrStations, itsNrCorrelatorPsets, itsNrPsets; + unsigned itsPhase; + unsigned itsNrSamplesPerIntegration; + + boost::multi_array<SampleType, 3> *itsInData, *itsOutData; + + MPI_Comm itsMPIcomm; +}; + + +WH_Transpose::WH_Transpose(const string &name, const ACC::APS::ParameterSet &ps, unsigned rank, MPI_Comm communicator) +: + WorkHolder(0, 0, name, string("WH_Transpose")), + itsParamSet(ps), + itsCoreNumber(rank), + itsPsetNumber(Position(rank).psetNumber()), + itsPsetIndex(Position(rank).indexInPset()), + itsNrStations(ps.getUint32("Observation.NStations")), + itsNrCorrelatorPsets(ps.getUint32("Observation.NSubbands") / ps.getUint32("General.SubbandsPerPset")), + itsNrPsets(std::max(itsNrStations, itsNrCorrelatorPsets)), + itsPhase(itsPsetIndex), + itsNrSamplesPerIntegration(ps.getUint32("Observation.NSubbandSamples")), + itsMPIcomm(communicator) +{ +} + + +inline bool WH_Transpose::isInput() const +{ + return itsPsetNumber < itsNrStations; +} + + +inline bool WH_Transpose::isOutput() const +{ + return itsPsetNumber < itsNrCorrelatorPsets; +} + + +void WH_Transpose::preprocess() +{ + if (isInput()) + itsInData = new boost::multi_array<SampleType, 3>(boost::extents[itsNrCorrelatorPsets][itsNrSamplesPerIntegration][NR_POLARIZATIONS]); + + if (isOutput()) + itsOutData = new boost::multi_array<SampleType, 3>(boost::extents[itsNrStations][itsNrSamplesPerIntegration][NR_POLARIZATIONS]); +} + + +void WH_Transpose::process() +{ + NSTimer transposeTimer("transpose", itsCoreNumber == 0); + static NSTimer totalTimer("total", itsCoreNumber == 0); + + TH_MPI::synchroniseAllProcesses(); + totalTimer.start(); + transposeTimer.start(); + + if (itsPhase == 0) + allToAll(); + + TH_MPI::synchroniseAllProcesses(); + transposeTimer.stop(); + totalTimer.stop(); + + ++ itsPhase, itsPhase %= Position::psetSize; +} + + +void WH_Transpose::allToAll() +{ + int sendCounts[itsNrPsets], sendDisplacements[itsNrPsets]; + int receiveCounts[itsNrPsets], receiveDisplacements[itsNrPsets]; + + for (unsigned pset = 0; pset < itsNrPsets; pset ++) { + if (isInput() && pset < itsNrCorrelatorPsets) { + sendCounts[pset] = (*itsInData)[pset].num_elements() * sizeof(SampleType); + sendDisplacements[pset] = ((*itsInData)[pset].origin() - itsInData->origin()) / sizeof(SampleType); + } else { + sendCounts[pset] = 0; + sendDisplacements[pset] = 0; + } + + if (isOutput() && pset < itsNrStations) { + receiveCounts[pset] = (*itsOutData)[pset].num_elements() * sizeof(SampleType); + receiveDisplacements[pset] = ((*itsOutData)[pset].origin() - itsOutData->origin()) / sizeof(SampleType); + } else { + receiveCounts[pset] = 0; + receiveDisplacements[pset] = 0; + } + } + + if (MPI_Alltoallv(isInput() ? itsInData->origin() : 0, + sendCounts, sendDisplacements, MPI_BYTE, + isOutput() ? itsOutData->origin() : 0, + receiveCounts, receiveDisplacements, MPI_BYTE, + itsMPIcomm) != MPI_SUCCESS) + { + std::cerr << "MPI_Alltoallv() failed" << std::endl; + exit(1); + } +} + + +void WH_Transpose::postprocess() +{ + if (isInput()) + delete itsInData; + + if (isOutput()) + delete itsOutData; +} + +WorkHolder *WH_Transpose::make(const string &name) +{ + return new WH_Transpose(name, itsParamSet, itsCoreNumber, itsMPIcomm); +} + + + + +class WH_Idle : public WorkHolder +{ + public: + WH_Idle(const string &name); + + virtual void process(); + + private: + virtual WorkHolder *make(const string &name); +}; + + +WH_Idle::WH_Idle(const string &name) +: + WorkHolder(0, 0, name, string("idle")) +{ +} + + +WorkHolder *WH_Idle::make(const string &name) +{ + return new WH_Idle(name); +} + + +void WH_Idle::process() +{ + TH_MPI::synchroniseAllProcesses(); + TH_MPI::synchroniseAllProcesses(); +} + + + +class AH_Transpose : public TinyApplicationHolder +{ + public: + virtual void define(const KeyValueMap &); + virtual void init(); + virtual void run(int nsteps); + + private: + vector<WorkHolder *> itsWHs; +}; + + +void AH_Transpose::define(const KeyValueMap &) +{ +#if defined HAVE_BGL + struct CNPersonality personality; + + if (rts_get_personality(&personality, sizeof personality) != 0) { + cout << "could not get personality" << endl; + exit(1); + } + + Position::xSize = personality.getXsize(); + Position::ySize = personality.getYsize(); + Position::zSize = personality.getZsize(); + + //clog << itsCoreNumber << " at (" << personality.getXcoord() << ',' << personality.getYcoord() << ',' << personality.getZcoord() << "), phase = " << itsPhase << endl; +#else + Position::xSize = TH_MPI::getNumberOfNodes(); +#endif + + unsigned nrStations = itsParamSet.getUint32("Observation.NStations"); + unsigned nrNodes = TH_MPI::getNumberOfNodes(); + unsigned nrCorrelatorPsets = itsParamSet.getUint32("Observation.NSubbands") / itsParamSet.getUint32("General.SubbandsPerPset"); + + if (Position::psetSize * nrStations > nrNodes) { + if (TH_MPI::getCurrentRank() == 0) + cerr << "Too many stations for number of nodes" << endl; + + exit(1); + } + + if (Position::psetSize * nrCorrelatorPsets > nrNodes) { + if (TH_MPI::getCurrentRank() == 0) + cerr << "Too many subbands divided over too few psets" << endl; + + exit(1); + } + + unsigned nrPsetsNeeded = std::max(nrStations, nrCorrelatorPsets); + + MPI_Group all, group; + MPI_Comm comms[Position::psetSize]; + + if (MPI_Comm_group(MPI_COMM_WORLD, &all) != MPI_SUCCESS) { + std::cerr << "MPI_Comm_group() failed" << std::endl; + exit(1); + } + + for (unsigned psetIndex = 0; psetIndex < Position::psetSize; psetIndex ++) { + int ranks[nrPsetsNeeded]; + + for (unsigned pset = 0; pset < nrPsetsNeeded; pset ++) + ranks[pset] = Position::psetBase(pset).positionInPset(psetIndex).rank(); + + if (MPI_Group_incl(all, nrPsetsNeeded, ranks, &group) != MPI_SUCCESS) { + std::cerr << "MPI_Group_incl() failed" << std::endl; + exit(1); + } + + if (MPI_Comm_create(MPI_COMM_WORLD, group, &comms[psetIndex]) != MPI_SUCCESS) { + std::cerr << "MPI_Comm_create() failed" << std::endl; + exit(1); + } + } + + for (unsigned rank = 0; rank < nrNodes; rank ++) { + WorkHolder *wh = Position(rank).psetNumber() < nrPsetsNeeded ? + (WorkHolder *) new WH_Transpose("WH_Transpose", itsParamSet, rank, comms[Position(rank).indexInPset()]) : + (WorkHolder *) new WH_Idle("WH_Idle"); + wh->runOnNode(rank); + itsWHs.push_back(wh); + } +} + + +void AH_Transpose::init() +{ + for (unsigned i = 0; i < itsWHs.size(); i ++) + itsWHs[i]->basePreprocess(); +} + + +void AH_Transpose::run(int nsteps) +{ + for (int i = 0; i < nsteps; i ++) + for (unsigned j = 0; j < itsWHs.size(); j ++) + itsWHs[j]->baseProcess(); +} + + +} // namespace RTCP +} // namespace LOFAR + +using namespace LOFAR; +using namespace LOFAR::RTCP; + +int main(int argc, char **argv) +{ + int retval; + + try { + AH_Transpose myAH; + ApplicationHolderController myAHController(myAH, 1); //listen to ACC every 1 runs + retval = ACC::PLC::ACCmain(argc, argv, &myAHController); + } catch (Exception &e) { + std::cerr << "Caught exception: " << e.what() << endl; + retval = 1; + } catch (std::exception &e) { + std::cerr << "Caught exception: " << e.what() << endl; + retval = 1; + } + + return retval; +} + +#else // !defined HAVE_MPI + +int main() +{ + return 0; +} + +#endif