diff --git a/.gitattributes b/.gitattributes index 965ccb94cf9b527775d8cea2496dc582cfb489ae..8d951f6891f1279cb9fa4d296d857f9c48067fe9 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,20 +1,5 @@ * text=auto !eol .subversion/config -text -3rdparty/zoid/daemon.2cores/buffers.c -text -3rdparty/zoid/daemon.2cores/buffers.c.416MB -text -3rdparty/zoid/daemon.2cores/buffers.c.416MB.modfor1core -text -3rdparty/zoid/daemon.2cores/buffers.c.448MB -text -3rdparty/zoid/daemon.2cores/handle_traffic.c -text -3rdparty/zoid/daemon.2cores/handle_traffic.c.fastsema -text -3rdparty/zoid/daemon.2cores/handle_traffic.c.polling -text -3rdparty/zoid/daemon.2cores/handle_traffic.c.sema -text -3rdparty/zoid/daemon.2cores/handle_traffic.c.signal -text -3rdparty/zoid/daemon.2cores/init.c -text -3rdparty/zoid/daemon.2cores/init.c.fastsema -text -3rdparty/zoid/daemon.2cores/init.c.polling -text -3rdparty/zoid/daemon.2cores/init.c.sema -text -3rdparty/zoid/daemon.2cores/init.c.signal -text -3rdparty/zoid/daemon.2cores/zoid.map -text CEP/Calibration/BBSControl/include/BBSControl/CommandHandlerEstimator.h -text CEP/Calibration/BBSControl/include/BBSControl/CommandHandlerReducer.h -text CEP/Calibration/BBSControl/include/BBSControl/OptionParser.h -text diff --git a/3rdparty/zoid/daemon.2cores/Makefile b/3rdparty/zoid/daemon.2cores/Makefile deleted file mode 100644 index e775e7170a07012608087d32fc390bd94d9a13b3..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/daemon.2cores/Makefile +++ /dev/null @@ -1,47 +0,0 @@ -#*************************************************************************** -# ZEPTOOS:zepto-info -# This file is part of ZeptoOS: The Small Linux for Big Computers. -# See www.mcs.anl.gov/zeptoos for more information. -# ZEPTOOS:zepto-info -# -# ZEPTOOS:zepto-fillin -# $Id$ -# ZeptoOS_Version: 1.2 -# ZeptoOS_Heredity: FOSS_ORIG -# ZeptoOS_License: GPL -# ZEPTOOS:zepto-fillin -# -# ZEPTOOS:zepto-gpl -# Copyright: Argonne National Laboratory, Department of Energy, -# and UChicago Argonne, LLC. 2004, 2005, 2006, 2007 -# ZeptoOS License: GPL -# -# This software is free. See the file ZeptoOS/misc/license.GPL -# for complete details on your rights to copy, modify, and use this -# software. -# ZEPTOOS:zepto-gpl -#*************************************************************************** - -CC = /usr/bin/gcc -CFLAGS = -pthread -Wall -g -O2 -I/bgl/BlueLight/ppcfloor/bglsys/include -I../include - -ifdef ZEPTO_VERSION -CFLAGS += -DZEPTO_VERSION=$(ZEPTO_VERSION) -endif -LDFLAGS = -L/bgl/BlueLight/ppcfloor/bglsys/lib -Wl,--export-dynamic,--version-script=zoid.map -LOADLIBES = -ldevices.rts -ldl - -all: zoid ciod_preload.so - -zoid: init.o handle_traffic.o buffers.o zoid_mapping.o cache.o - $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ $(LOADLIBES) - -ciod_preload.so: ciod_preload.c - $(CC) $(CFLAGS) -fPIC -shared -o $@ $^ - -install: - cp zoid $(ZEPTO_INSTALLDIR)/sbin - cp ciod_preload.so $(ZEPTO_INSTALLDIR)/lib - -clean: - rm -f zoid ciod_preload.so *.o *~ diff --git a/3rdparty/zoid/daemon.2cores/buffers.c b/3rdparty/zoid/daemon.2cores/buffers.c deleted file mode 120000 index d92683867a141fc603d953ad80f2c0be77cf8b2f..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/daemon.2cores/buffers.c +++ /dev/null @@ -1 +0,0 @@ -buffers.c.416MB \ No newline at end of file diff --git a/3rdparty/zoid/daemon.2cores/buffers.c.416MB b/3rdparty/zoid/daemon.2cores/buffers.c.416MB deleted file mode 100644 index 6c766961d618339ec5e31fe7a42d18f7ccbbf37b..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/daemon.2cores/buffers.c.416MB +++ /dev/null @@ -1,462 +0,0 @@ -/****************************************************************************/ -/* ZEPTOOS:zepto-info */ -/* This file is part of ZeptoOS: The Small Linux for Big Computers. - * See www.mcs.anl.gov/zeptoos for more information. - */ -/* ZEPTOOS:zepto-info */ -/* */ -/* ZEPTOOS:zepto-fillin */ -/* $Id: buffers.c,v 1.9 2007/06/27 15:10:45 iskra Exp $ - * ZeptoOS_Version: 1.2 - * ZeptoOS_Heredity: FOSS_ORIG - * ZeptoOS_License: GPL - */ -/* ZEPTOOS:zepto-fillin */ -/* */ -/* ZEPTOOS:zepto-gpl */ -/* Copyright: Argonne National Laboratory, Department of Energy, - * and UChicago Argonne, LLC. 2004, 2005, 2006, 2007 - * ZeptoOS License: GPL - * - * This software is free. See the file ZeptoOS/misc/license.GPL - * for complete details on your rights to copy, modify, and use this - * software. - */ -/* ZEPTOOS:zepto-gpl */ -/****************************************************************************/ - -#define _GNU_SOURCE /* For posix_memalign. */ -#include <stddef.h> -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> - -#include <errno.h> -#include <pthread.h> -#include <sys/ipc.h> -#include <sys/shm.h> -#include <assert.h> -#include <string.h> - -#include "bgl.h" -#include "zoid.h" -#include "zoid_api.h" - -#if !defined L1_CONSISTENCY_IN_SOFTWARE - -#define NR_FREE_SEGMENTS (* (int *) ((char *) sram + 0x3800)) -#define FREE_SEGMENTS ((struct segment *) ((char *) sram + 0x3808)) - -struct segment -{ - char *address; - size_t size; -}; - -static lock_pair allocator_locks; - -void *__zoid_alloc(size_t size) -{ - char *address = 0; - struct segment *free_segment; - - size += 32 + 31, size &= ~31; - - enter_critical_section(&allocator_locks); - - for (free_segment = FREE_SEGMENTS + NR_FREE_SEGMENTS; -- free_segment >= FREE_SEGMENTS;) - if (size == free_segment->size) { - address = free_segment->address; - -- NR_FREE_SEGMENTS; - memmove(free_segment, free_segment + 1, (char *) (FREE_SEGMENTS + NR_FREE_SEGMENTS) - (char *) free_segment); - break; - } else if (size < free_segment->size) { - address = free_segment->address; - free_segment->address = free_segment->address + size; - free_segment->size -= size; - break; - } - - leave_critical_section(&allocator_locks); - - * (size_t *) address = size; - /* fprintf(stderr, "__zoid_alloc(%d) = %p\n", size, address); */ - return address + 32; -} - - -void __zoid_free(void *addr) -{ - char *address = (char *) addr - 32; - size_t size = * (size_t *) address; - struct segment *free_segment; - int merge_left, merge_right; - - enter_critical_section(&allocator_locks); - - /*flush_L1_region(address, size);*/ - - free_segment = FREE_SEGMENTS + NR_FREE_SEGMENTS; - - while (free_segment > FREE_SEGMENTS && address < free_segment[-1].address) - -- free_segment; - - merge_left = free_segment > FREE_SEGMENTS && free_segment[-1].address + free_segment[-1].size == address; - merge_right = free_segment < FREE_SEGMENTS + NR_FREE_SEGMENTS && address + size == free_segment->address; - - /*fprintf(stderr, "__zoid_free(%p): merge_left = %d, merge_right = %d\n", address, merge_left, merge_right);*/ - - if (merge_left) { - if (merge_right) { - free_segment[-1].size += size + free_segment->size; - -- NR_FREE_SEGMENTS; - memmove(free_segment, free_segment + 1, (char *) (FREE_SEGMENTS + NR_FREE_SEGMENTS) - (char *) free_segment); - } else { - free_segment[-1].size += size; - } - } else { - if (merge_right) { - free_segment->address = address; - free_segment->size += size; - } else { - memmove(free_segment + 1, free_segment, (char *) (FREE_SEGMENTS + NR_FREE_SEGMENTS) - (char *) free_segment); - ++ NR_FREE_SEGMENTS; - free_segment->address = address; - free_segment->size = size; - } - } - - leave_critical_section(&allocator_locks); -} - - -void allocater_init(void) -{ - NR_FREE_SEGMENTS = 1; - FREE_SEGMENTS[0].address = (char *) 0xA6000000; - FREE_SEGMENTS[0].size = 416 * 1024 * 1024; - - init_lock_pair(&allocator_locks); -} - -#else - -#define NR_OF_SEG_DESCS 256 -#define MEM_SIZE 416*1024*1024 -#define MEM_START 0xA6000000 - - -typedef struct seg_desc_s { - void *addr; - size_t size; - struct seg_desc_s *next; -} seg_desc; - -static struct alloc_data { - seg_desc *unused_seg_desc_list; - seg_desc *free_seg_list; - seg_desc *allocated_seg_list; -} *alloc_data; - -static void *zoid_mem; -static lock_pair allocater_locks; - - - -#if defined L1_CONSISTENCY_IN_SOFTWARE -static inline void flush_seg_descriptors(void) -{ - flush_L1_region(zoid_mem, sizeof(struct alloc_data) + NR_OF_SEG_DESCS * sizeof(seg_desc)); -} -#endif - - -#if 0 -static void print_lists() { - seg_desc *p; - - fprintf(stderr, "Free:"); - p = alloc_data->free_seg_list; - while(p) { - fprintf(stderr, " %p(%u)", p->addr, p->size); - p = p->next; - } - fprintf(stderr, "\n"); - - fprintf(stdout, "Allocated:"); - p = alloc_data->allocated_seg_list; - while(p) { - fprintf(stdout, " %p-%p(%u)", p->addr, p->addr + p->size, p->size); - p = p->next; - } - fprintf(stdout, "\n"); -} -#endif - - -static void return_unused_seg_desc(seg_desc* seg_desc) { - assert(seg_desc); - - seg_desc->next = alloc_data->unused_seg_desc_list; - alloc_data->unused_seg_desc_list = seg_desc; -} - - -static seg_desc *get_seg_desc(void) { - seg_desc *tmp; - - assert(alloc_data->unused_seg_desc_list); - - tmp = alloc_data->unused_seg_desc_list; - alloc_data->unused_seg_desc_list = alloc_data->unused_seg_desc_list->next; - return tmp; -} - - -static void add_to_free_seg_list(seg_desc *seg) { - seg_desc *p = alloc_data->free_seg_list; - seg_desc *prev = NULL; - seg_desc *tmp; - - while(p && seg->addr > p->addr) { - prev = p; - p = p->next; - } - - if(prev) { - if(prev->addr + prev->size == seg->addr) { - prev->size += seg->size; - return_unused_seg_desc(seg); - seg = prev; - } else { - prev->next = seg; - seg->next = p; - } - } else { - alloc_data->free_seg_list = seg; - seg->next = p; - } - - if(seg->next && seg->addr + seg->size == seg->next->addr) { - tmp = seg->next; - seg->size += tmp->size; - seg->next = tmp->next; - return_unused_seg_desc(tmp); - } -} - - -void __zoid_free(void *addr) { - seg_desc *p, *prev; - - /*fprintf(stderr, "%d: __zoid_free(%p)\n", getpid(), addr);*/ - enter_critical_section(&allocater_locks); - /*flush_seg_descriptors();*/ - - p = alloc_data->allocated_seg_list; - prev = NULL; - - while(p && p->addr != addr) { - prev = p; - p = p->next; - } - - assert(p); - - if(prev) - prev->next = p->next; - else - alloc_data->allocated_seg_list = p->next; - - flush_L1_region(p->addr, p->size); - - add_to_free_seg_list(p); - - flush_seg_descriptors(); - leave_critical_section(&allocater_locks); -} - - -void *__zoid_alloc(size_t size) { - seg_desc *p, *prev, *new; - void *new_addr; - - assert(size > 0); - - size += 31; - size &= ~31; - - enter_critical_section(&allocater_locks); - /*flush_seg_descriptors();*/ - - p = alloc_data->free_seg_list; - prev = NULL; - - while(p && p->size < size) { - prev = p; - p = p->next; - } - - if(!p) { - leave_critical_section(&allocater_locks); - return NULL; - } - - if(p->size == size) { - if(prev) - prev->next = p->next; - else - alloc_data->free_seg_list = p->next; - - new = p; - } else { - new = get_seg_desc(); - new->size = size; - new->addr = p->addr; - p->addr += size; - p->size -= size; - } - - new->next = alloc_data->allocated_seg_list; - alloc_data->allocated_seg_list = new; - new_addr = new->addr; - - flush_seg_descriptors(); - leave_critical_section(&allocater_locks); - - /*fprintf(stderr, "%d: __zoid_alloc(%u) = %p\n", getpid(), size, new_addr);*/ - return new_addr; -} - - -void allocater_init(void) { - int i; - seg_desc *seg_descs; - - zoid_mem = (void*)MEM_START; - - alloc_data = (struct alloc_data*)zoid_mem; - seg_descs = (seg_desc*)(zoid_mem + sizeof(struct alloc_data)); - - alloc_data->free_seg_list = &seg_descs[0]; - alloc_data->free_seg_list->addr = (void*) - (((unsigned int)(&seg_descs[NR_OF_SEG_DESCS]) + 31) & ~31); - alloc_data->free_seg_list->size = zoid_mem + MEM_SIZE - - alloc_data->free_seg_list->addr; - alloc_data->free_seg_list->next = NULL; - - alloc_data->unused_seg_desc_list = NULL; - alloc_data->allocated_seg_list = NULL; - - for(i = 1; i < NR_OF_SEG_DESCS; i++) - return_unused_seg_desc(&seg_descs[i]); - - flush_seg_descriptors(); - init_lock_pair(&allocater_locks); -} - -#endif - - -void *__zoid_allocate_buffer(size_t size) { - struct zoid_buffer* buffer; - int remainder; - - /*assert(size <= max_buffer_size_2);*/ - - if (size > max_buffer_size_2) { - fprintf(stderr, "assertion failed: size = %u, max_buffer_size_2 = %u\n", size, max_buffer_size_2); - while (1); - } - - remainder = size % TREE_DATA_SIZE; - if(remainder) - size += TREE_DATA_SIZE - remainder; - - size += 31; - size &= ~31; - - buffer = (struct zoid_buffer*)__zoid_alloc(sizeof(struct zoid_buffer) + size); - assert(buffer); - - buffer->next = NULL; - buffer->size = size; - buffer->errnum = buffer->excessive_size = 0; - buffer->userbuf_out = NULL; - buffer->userbuf_out_cb = NULL; - buffer->userbuf_out_priv = NULL; - buffer->userbuf_in = NULL; - buffer->ack_sent = 0; - - return buffer->data; -} - - -void __zoid_release_buffer(void* buffer) -{ - - if (buffer != 0) { - struct zoid_buffer *entry = get_zoid_buffer(buffer); - assert (!entry->userbuf_out); - __zoid_free(entry); - } -} - - -struct zoid_buffer *get_zoid_buffer(void* buffer) -{ - if (buffer) - buffer -= offsetof(struct zoid_buffer, data); - return buffer; -} - - -void init_zoid_buf_pipe(zoid_buf_pipe *pipe, lock_pair *locks) -{ - pipe->first = NULL; - pipe->last = NULL; - pipe->locks = locks; -} - - -void enqueue_zoid_buf(zoid_buf_pipe *pipe, struct zoid_buffer *buffer) -{ - /*fprintf(stderr, "%d: enqueue_zoid_buf(%p, %p)\n", getpid(), pipe, buffer);*/ - buffer->next = NULL; - - enter_critical_section(pipe->locks); - - if (pipe->first) { - pipe->last->next = buffer; -#if defined L1_CONSISTENCY_IN_SOFTWARE - flush_L1_region(&pipe->last->next, sizeof(pipe->last->next)); -#endif - } else { - pipe->first = buffer; - } - - pipe->last = buffer; -#if defined L1_CONSISTENCY_IN_SOFTWARE - flush_zoid_buf(buffer); -#endif - leave_critical_section(pipe->locks); -} - - -struct zoid_buffer *dequeue_zoid_buf(zoid_buf_pipe *pipe) -{ - struct zoid_buffer *buffer; - - enter_critical_section(pipe->locks); - - buffer = pipe->first; - /*fprintf(stderr, "%d: dequeue_zoid_buf(%p) = %p\n", getpid(), pipe, buffer);*/ - - if (buffer != 0 && buffer != (struct zoid_buffer *) 1) { - /*flush_L1_region(buffer, sizeof(struct zoid_buffer));*/ - pipe->first = buffer->next; - } - - leave_critical_section(pipe->locks); - return buffer; -} diff --git a/3rdparty/zoid/daemon.2cores/buffers.c.416MB.modfor1core b/3rdparty/zoid/daemon.2cores/buffers.c.416MB.modfor1core deleted file mode 100644 index 2bd9239899046906ef2edd86511e089568a2b86f..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/daemon.2cores/buffers.c.416MB.modfor1core +++ /dev/null @@ -1,384 +0,0 @@ -/****************************************************************************/ -/* ZEPTOOS:zepto-info */ -/* This file is part of ZeptoOS: The Small Linux for Big Computers. - * See www.mcs.anl.gov/zeptoos for more information. - */ -/* ZEPTOOS:zepto-info */ -/* */ -/* ZEPTOOS:zepto-fillin */ -/* $Id: buffers.c,v 1.9 2007/06/27 15:10:45 iskra Exp $ - * ZeptoOS_Version: 1.2 - * ZeptoOS_Heredity: FOSS_ORIG - * ZeptoOS_License: GPL - */ -/* ZEPTOOS:zepto-fillin */ -/* */ -/* ZEPTOOS:zepto-gpl */ -/* Copyright: Argonne National Laboratory, Department of Energy, - * and UChicago Argonne, LLC. 2004, 2005, 2006, 2007 - * ZeptoOS License: GPL - * - * This software is free. See the file ZeptoOS/misc/license.GPL - * for complete details on your rights to copy, modify, and use this - * software. - */ -/* ZEPTOOS:zepto-gpl */ -/****************************************************************************/ - -#define _GNU_SOURCE /* For posix_memalign. */ -#include <stddef.h> -#include <stdio.h> -#include <stdlib.h> - -#include <errno.h> -#include <pthread.h> -#include <sys/ipc.h> -#include <sys/shm.h> -#include <assert.h> -#include <string.h> - -#include "bgl.h" -#include "zoid.h" -#include "zoid_api.h" - -#define NR_OF_SEG_DESCS 256 -#define MEM_SIZE 416*1024*1024 -#define MEM_START 0xA6000000 - - -typedef struct seg_desc_s { - void *addr; - size_t size; - struct seg_desc_s *next; -} seg_desc; - -static struct alloc_data { - seg_desc *unused_seg_desc_list; - seg_desc *free_seg_list; - seg_desc *allocated_seg_list; -} *alloc_data; - -static void *zoid_mem; -static lock_pair allocater_locks; - - -void init_lock_pair(lock_pair *locks) { - //static int next_lock = 0; - - if (pthread_mutex_init(&(locks->pt_mutex), NULL)) { - perror("lock_pair_init"); - exit(-1); - } - //locks->hw_mutex = lockbox + MUTEX_OFFSET(next_lock++); - //BGL_Mutex_Release(locks->hw_mutex); -} - - - -inline void enter_critical_section(lock_pair *locks) { - if(pthread_mutex_lock(&(locks->pt_mutex))) - assert(0); - //BGL_Mutex_Acquire(locks->hw_mutex); -} - - - -inline void leave_critical_section(lock_pair *locks) { - //BGL_Mutex_Release(locks->hw_mutex); - if(pthread_mutex_unlock(&(locks->pt_mutex))) - assert(0); -} - - - -static inline void flush_L1(void) { - void *p = zoid_mem; - - while(p < zoid_mem + sizeof(struct alloc_data) + NR_OF_SEG_DESCS * sizeof(seg_desc)) { - asm volatile("dcbf 0,%0\n" :: "r" (p)); - p += L1_CACHE_LINE_SIZE; - } -} - - -#if 0 -static void print_lists() { - seg_desc *p; - - fprintf(stderr, "Free:"); - p = alloc_data->free_seg_list; - while(p) { - fprintf(stderr, " %p(%u)", p->addr, p->size); - p = p->next; - } - fprintf(stderr, "\n"); - - fprintf(stdout, "Allocated:"); - p = alloc_data->allocated_seg_list; - while(p) { - fprintf(stdout, " %p-%p(%u)", p->addr, p->addr + p->size, p->size); - p = p->next; - } - fprintf(stdout, "\n"); -} -#endif - - -static void return_unused_seg_desc(seg_desc* seg_desc) { - assert(seg_desc); - - seg_desc->next = alloc_data->unused_seg_desc_list; - alloc_data->unused_seg_desc_list = seg_desc; -} - - -static seg_desc *get_seg_desc(void) { - seg_desc *tmp; - - assert(alloc_data->unused_seg_desc_list); - - tmp = alloc_data->unused_seg_desc_list; - alloc_data->unused_seg_desc_list = alloc_data->unused_seg_desc_list->next; - return tmp; -} - - -static void add_to_free_seg_list(seg_desc *seg) { - seg_desc *p = alloc_data->free_seg_list; - seg_desc *prev = NULL; - seg_desc *tmp; - - while(p && seg->addr > p->addr) { - prev = p; - p = p->next; - } - - if(prev) { - if(prev->addr + prev->size == seg->addr) { - prev->size += seg->size; - return_unused_seg_desc(seg); - seg = prev; - } else { - prev->next = seg; - seg->next = p; - } - } else { - alloc_data->free_seg_list = seg; - seg->next = p; - } - - if(seg->next && seg->addr + seg->size == seg->next->addr) { - tmp = seg->next; - seg->size += tmp->size; - seg->next = tmp->next; - return_unused_seg_desc(tmp); - } -} - - -void __zoid_free(void *addr) { - seg_desc *p, *prev; - - enter_critical_section(&allocater_locks); - flush_L1(); - - p = alloc_data->allocated_seg_list; - prev = NULL; - - while(p && p->addr != addr) { - prev = p; - p = p->next; - } - - assert(p); - - if(prev) - prev->next = p->next; - else - alloc_data->allocated_seg_list = p->next; - - flush_L1_region(p->addr, p->size); - - add_to_free_seg_list(p); - - flush_L1(); - leave_critical_section(&allocater_locks); -} - - -void *__zoid_alloc(size_t size) { - seg_desc *p, *prev, *new; - - assert(size > 0); - - size += 31; - size &= ~31; - - enter_critical_section(&allocater_locks); - flush_L1(); - - p = alloc_data->free_seg_list; - prev = NULL; - - while(p && p->size < size) { - prev = p; - p = p->next; - } - - if(!p) { - leave_critical_section(&allocater_locks); - return NULL; - } - - if(p->size == size) { - if(prev) - prev->next = p->next; - else - alloc_data->free_seg_list = p->next; - - new = p; - } else { - new = get_seg_desc(); - new->size = size; - new->addr = p->addr; - p->addr += size; - p->size -= size; - } - - new->next = alloc_data->allocated_seg_list; - alloc_data->allocated_seg_list = new; - - flush_L1(); - leave_critical_section(&allocater_locks); - - return new->addr; -} - - -void allocater_init(void) { - int i; - seg_desc *seg_descs; - - zoid_mem = (void*)MEM_START; - - alloc_data = (struct alloc_data*)zoid_mem; - seg_descs = (seg_desc*)(zoid_mem + sizeof(struct alloc_data)); - - alloc_data->free_seg_list = &seg_descs[0]; - alloc_data->free_seg_list->addr = (void*) - (((unsigned int)(&seg_descs[NR_OF_SEG_DESCS] + 31)) & ~31); - alloc_data->free_seg_list->size = zoid_mem + MEM_SIZE - - alloc_data->free_seg_list->addr; - alloc_data->free_seg_list->next = NULL; - - alloc_data->unused_seg_desc_list = NULL; - alloc_data->allocated_seg_list = NULL; - - for(i = 1; i < NR_OF_SEG_DESCS; i++) - return_unused_seg_desc(&seg_descs[i]); - - init_lock_pair(&allocater_locks); -} - - -void *__zoid_allocate_buffer(size_t size) { - struct zoid_buffer* buffer; - int remainder; - - assert(size <= max_buffer_size_2); - - remainder = size % TREE_DATA_SIZE; - if(remainder) - size += TREE_DATA_SIZE - remainder; - - size += 31; - size &= ~31; - - buffer = (struct zoid_buffer*)__zoid_alloc(sizeof(struct zoid_buffer) + size); - assert(buffer); - - buffer->next = NULL; - buffer->size = size; - buffer->errnum = buffer->excessive_size = 0; - buffer->userbuf_out = NULL; - buffer->userbuf_out_cb = NULL; - buffer->userbuf_out_priv = NULL; - buffer->userbuf_in = NULL; - buffer->ack_sent = 0; - - return buffer->data; -} - - -void __zoid_release_buffer(void* buffer) { - struct zoid_buffer* entry; - - if (!buffer) - return; - - entry = get_zoid_buffer(buffer); - - if (entry->userbuf_out) { - assert(0); - } - - __zoid_free(entry); -} - - -struct zoid_buffer* -get_zoid_buffer(void* buffer) -{ - if (buffer) - buffer -= offsetof(struct zoid_buffer, data); - return buffer; -} - - -void init_zoid_buf_pipe(zoid_buf_pipe *pipe, lock_pair *locks) { - pipe->first = NULL; - pipe->last = NULL; - pipe->locks = locks; -} - - -void enqueue_zoid_buf(zoid_buf_pipe *pipe, struct zoid_buffer *buffer) { - buffer->next = NULL; - - enter_critical_section(pipe->locks); - - if (pipe->first) { - pipe->last->next = buffer; - flush_L1_region(&(pipe->last->next), - sizeof(pipe->last->next)); - } else { - pipe->first = buffer; - } - pipe->last = buffer; - - flush_zoid_buf(buffer); - - leave_critical_section(pipe->locks); -} - - -struct zoid_buffer *dequeue_zoid_buf(zoid_buf_pipe *pipe) { - struct zoid_buffer *buffer; - - enter_critical_section(pipe->locks); - - buffer = pipe->first; - if(buffer == (struct zoid_buffer*)1) { - leave_critical_section(pipe->locks); - return (struct zoid_buffer*)1; - } - - if(buffer) { - flush_L1_region(buffer, sizeof(struct zoid_buffer)); - pipe->first = buffer->next; - } - - leave_critical_section(pipe->locks); - - return buffer; -} diff --git a/3rdparty/zoid/daemon.2cores/buffers.c.448MB b/3rdparty/zoid/daemon.2cores/buffers.c.448MB deleted file mode 100644 index 22a52204c291bb1fdab7e75b6f86e9938687e363..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/daemon.2cores/buffers.c.448MB +++ /dev/null @@ -1,358 +0,0 @@ -/****************************************************************************/ -/* ZEPTOOS:zepto-info */ -/* This file is part of ZeptoOS: The Small Linux for Big Computers. - * See www.mcs.anl.gov/zeptoos for more information. - */ -/* ZEPTOOS:zepto-info */ -/* */ -/* ZEPTOOS:zepto-fillin */ -/* $Id: buffers.c,v 1.9 2007/06/27 15:10:45 iskra Exp $ - * ZeptoOS_Version: 1.2 - * ZeptoOS_Heredity: FOSS_ORIG - * ZeptoOS_License: GPL - */ -/* ZEPTOOS:zepto-fillin */ -/* */ -/* ZEPTOOS:zepto-gpl */ -/* Copyright: Argonne National Laboratory, Department of Energy, - * and UChicago Argonne, LLC. 2004, 2005, 2006, 2007 - * ZeptoOS License: GPL - * - * This software is free. See the file ZeptoOS/misc/license.GPL - * for complete details on your rights to copy, modify, and use this - * software. - */ -/* ZEPTOOS:zepto-gpl */ -/****************************************************************************/ - -#define _GNU_SOURCE /* For posix_memalign. */ -#include <stddef.h> -#include <stdio.h> -#include <stdlib.h> - -#include <errno.h> -#include <pthread.h> -#include <sys/ipc.h> -#include <sys/shm.h> -#include <assert.h> -#include <string.h> - -#include "bgl.h" -#include "zoid.h" -#include "zoid_api.h" - -#define NR_OF_SEG_DESCS 256 -#define MEM_SIZE 448*1024*1024 -#define MEM_START 0xA4000000 - - -static struct zoid_buffer* empty_queue_1 = NULL; -static struct zoid_buffer* empty_queue_2 = NULL; -static pthread_mutex_t empty_queue_mutex = PTHREAD_MUTEX_INITIALIZER; - - -typedef struct seg_desc_s { - void *addr; - size_t size; - struct seg_desc_s *next; -} seg_desc; - -static struct alloc_data { - seg_desc *unused_seg_desc_list; - seg_desc *free_seg_list; - seg_desc *allocated_seg_list; -} *alloc_data; - -static void *zoid_mem; -static lock_pair allocater_locks; - - - -static inline void flush_L1(void) { - void *p = zoid_mem; - - while(p < zoid_mem + sizeof(struct alloc_data) + NR_OF_SEG_DESCS * sizeof(seg_desc)) { - asm volatile("dcbf 0,%0\n" :: "r" (p)); - p += L1_CACHE_LINE_SIZE; - } -} - - -#if 0 -static void print_lists() { - seg_desc *p; - - fprintf(stderr, "Free:"); - p = alloc_data->free_seg_list; - while(p) { - fprintf(stderr, " %p(%u)", p->addr, p->size); - p = p->next; - } - fprintf(stderr, "\n"); - - fprintf(stderr, "Allocated:"); - p = alloc_data->allocated_seg_list; - while(p) { - fprintf(stderr, " %p(%u)", p->addr, p->size); - p = p->next; - } - fprintf(stderr, "\n\n"); -} -#endif - - -static void return_unused_seg_desc(seg_desc* seg_desc) { - assert(seg_desc); - - seg_desc->next = alloc_data->unused_seg_desc_list; - alloc_data->unused_seg_desc_list = seg_desc; -} - - -static seg_desc *get_seg_desc(void) { - seg_desc *tmp; - - assert(alloc_data->unused_seg_desc_list); - - tmp = alloc_data->unused_seg_desc_list; - alloc_data->unused_seg_desc_list = alloc_data->unused_seg_desc_list->next; - return tmp; -} - - -static void add_to_free_seg_list(seg_desc *seg) { - seg_desc *p = alloc_data->free_seg_list; - seg_desc *prev = NULL; - seg_desc *tmp; - - while(p && seg->addr > p->addr) { - prev = p; - p = p->next; - } - - if(prev) { - if(prev->addr + prev->size == seg->addr) { - prev->size += seg->size; - return_unused_seg_desc(seg); - seg = prev; - } else { - prev->next = seg; - seg->next = p; - } - } else { - alloc_data->free_seg_list = seg; - seg->next = p; - } - - if(seg->next && seg->addr + seg->size == seg->next->addr) { - tmp = seg->next; - seg->size += tmp->size; - seg->next = tmp->next; - return_unused_seg_desc(tmp); - } -} - - -void __zoid_free(void *addr) { - seg_desc *p, *prev; - - enter_critical_section(&allocater_locks); - flush_L1(); - - p = alloc_data->allocated_seg_list; - prev = NULL; - - while(p && p->addr != addr) { - prev = p; - p = p->next; - } - - assert(p); - - if(prev) - prev->next = p->next; - else - alloc_data->allocated_seg_list = p->next; - - flush_L1_region(p->addr, p->size); - - add_to_free_seg_list(p); - - flush_L1(); - leave_critical_section(&allocater_locks); -} - - -void *__zoid_alloc(size_t size) { - seg_desc *p, *prev, *new; - - assert(size > 0); - - size += 31; - size &= ~31; - - enter_critical_section(&allocater_locks); - flush_L1(); - - p = alloc_data->free_seg_list; - prev = NULL; - - while(p && p->size < size) { - prev = p; - p = p->next; - } - - if(!p) { - leave_critical_section(&allocater_locks); - return NULL; - } - - if(p->size == size) { - if(prev) - prev->next = p->next; - else - alloc_data->free_seg_list = p->next; - - new = p; - } else { - new = get_seg_desc(); - new->size = size; - new->addr = p->addr; - p->addr += size; - p->size -= size; - } - - new->next = alloc_data->allocated_seg_list; - alloc_data->allocated_seg_list = new; - - flush_L1(); - leave_critical_section(&allocater_locks); - - return new->addr; -} - - -void allocater_init(void) { - int i; - seg_desc *seg_descs; - - zoid_mem = (void*)MEM_START; - - alloc_data = (struct alloc_data*)zoid_mem; - seg_descs = (seg_desc*)(zoid_mem + sizeof(struct alloc_data)); - - alloc_data->free_seg_list = &seg_descs[0]; - alloc_data->free_seg_list->addr = (void*) - (((unsigned int)(&seg_descs[NR_OF_SEG_DESCS] + 31)) & ~31); - alloc_data->free_seg_list->size = zoid_mem + MEM_SIZE - - alloc_data->free_seg_list->addr; - alloc_data->free_seg_list->next = NULL; - - alloc_data->unused_seg_desc_list = NULL; - alloc_data->allocated_seg_list = NULL; - - for(i = 1; i < NR_OF_SEG_DESCS; i++) - return_unused_seg_desc(&seg_descs[i]); - - init_lock_pair(&allocater_locks); -} - - -void *__zoid_allocate_buffer(size_t size) { - struct zoid_buffer* buffer; - int remainder; - - assert(size <= max_buffer_size_2); - - remainder = size % TREE_DATA_SIZE; - if(remainder) - size += TREE_DATA_SIZE - remainder; - - buffer = (struct zoid_buffer*)__zoid_alloc(sizeof(struct zoid_buffer) + size); - assert(buffer); - - buffer->next = NULL; - buffer->size = size; - buffer->errnum = buffer->excessive_size = 0; - buffer->userbuf_out = NULL; - buffer->userbuf_out_cb = NULL; - buffer->userbuf_out_priv = NULL; - buffer->userbuf_in = NULL; - buffer->ack_sent = 0; - - return buffer->data; -} - - -void __zoid_release_buffer(void* buffer) { - struct zoid_buffer* entry; - - if (!buffer) - return; - - entry = get_zoid_buffer(buffer); - - if (entry->userbuf_out) { - assert(0); - } - - __zoid_free(entry); -} - - -struct zoid_buffer* -get_zoid_buffer(void* buffer) -{ - if (buffer) - buffer -= offsetof(struct zoid_buffer, data); - return buffer; -} - - -void init_zoid_buf_pipe(zoid_buf_pipe *pipe, lock_pair *locks) { - pipe->first = NULL; - pipe->last = NULL; - pipe->locks = locks; -} - - -void enqueue_zoid_buf(zoid_buf_pipe *pipe, struct zoid_buffer *buffer) { - buffer->next = NULL; - - enter_critical_section(pipe->locks); - - if (pipe->first) { - pipe->last->next = buffer; - flush_L1_region(&(pipe->last->next), - sizeof(pipe->last->next)); - } else { - pipe->first = buffer; - } - pipe->last = buffer; - - flush_zoid_buf(buffer); - - leave_critical_section(pipe->locks); -} - - -struct zoid_buffer *dequeue_zoid_buf(zoid_buf_pipe *pipe) { - struct zoid_buffer *buffer; - - enter_critical_section(pipe->locks); - - buffer = pipe->first; - if(buffer == (struct zoid_buffer*)1) { - leave_critical_section(pipe->locks); - return (struct zoid_buffer*)1; - } - - if(buffer) { - flush_L1_region(buffer, sizeof(struct zoid_buffer)); - pipe->first = buffer->next; - } - - leave_critical_section(pipe->locks); - - return buffer; -} diff --git a/3rdparty/zoid/daemon.2cores/cache.c b/3rdparty/zoid/daemon.2cores/cache.c deleted file mode 100644 index 1e54698aea5a6180375dc7d52a5d1209e05cea07..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/daemon.2cores/cache.c +++ /dev/null @@ -1,57 +0,0 @@ -#include "zoid.h" - -#define L1_SIZE (32 * 1024) - - -char l1flusher[L1_SIZE] __attribute__ ((aligned(32))); - - -void flush_L1_all() -{ - char *p; - - for (p = l1flusher; p < l1flusher + L1_SIZE; p += L1_CACHE_LINE_SIZE) - asm volatile("lwz 0,0(%0)\n" :: "r" (p) : "0"); - - asm volatile("sync\n" ::: "memory"); -} - - -void flush_L1_region(void *addr, unsigned int size) -{ - if (size < L1_SIZE) { - char *p; - - for (p = addr; p < (char *) addr + size; p += L1_CACHE_LINE_SIZE) - asm volatile("dcbf 0,%0\n" :: "r" (p)); - - asm volatile("sync\n" ::: "memory"); - } else { - flush_L1_all(); - } -} - - -void flush_zoid_buf(struct zoid_buffer *buffer) -{ - int total_size = sizeof(struct zoid_buffer) + buffer->size; - - if(buffer->userbuf_in) - total_size += buffer->userbuf_in_len; - // XXX: why is there no such line for userbuf_out? - - if(total_size >= L1_SIZE) { - flush_L1_all(); - } else { - if(buffer->userbuf_in) - flush_L1_region(buffer->userbuf_in, buffer->userbuf_in_len); - - if(buffer->userbuf_out) - flush_L1_region(buffer->userbuf_out, buffer->total_len - - buffer->result_len); - - /*flush_L1_region(buffer->data, buffer->size); - flush_L1_region(buffer, sizeof(struct zoid_buffer));*/ - flush_L1_region(buffer, sizeof(struct zoid_buffer) + buffer->size); - } -} diff --git a/3rdparty/zoid/daemon.2cores/ciod_preload.c b/3rdparty/zoid/daemon.2cores/ciod_preload.c deleted file mode 100644 index dd7cce86bc4dd60af77eb229f1b954887c292da6..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/daemon.2cores/ciod_preload.c +++ /dev/null @@ -1,230 +0,0 @@ -/****************************************************************************/ -/* ZEPTOOS:zepto-info */ -/* This file is part of ZeptoOS: The Small Linux for Big Computers. - * See www.mcs.anl.gov/zeptoos for more information. - */ -/* ZEPTOOS:zepto-info */ -/* */ -/* ZEPTOOS:zepto-fillin */ -/* $Id$ - * ZeptoOS_Version: 1.2 - * ZeptoOS_Heredity: FOSS_ORIG - * ZeptoOS_License: GPL - */ -/* ZEPTOOS:zepto-fillin */ -/* */ -/* ZEPTOOS:zepto-gpl */ -/* Copyright: Argonne National Laboratory, Department of Energy, - * and UChicago Argonne, LLC. 2004, 2005, 2006, 2007 - * ZeptoOS License: GPL - * - * This software is free. See the file ZeptoOS/misc/license.GPL - * for complete details on your rights to copy, modify, and use this - * software. - */ -/* ZEPTOOS:zepto-gpl */ -/****************************************************************************/ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include <dirent.h> -#include <fcntl.h> -#include <signal.h> -#include <sys/socket.h> -#include <sys/un.h> -#include <unistd.h> - -#include <bglpersonality.h> - -static void __zoid_signal_handler(int signo); - -static int server_sock; - -/* - * Function run at startup. Just installs the signal handler that zoid - * will later trigger, and a server-side unix domain socket. - */ -void __zoid_ciod_startup(void) __attribute__((constructor)); -void __zoid_ciod_startup(void) -{ - struct sigaction sa; - struct sockaddr_un addr; - - sa.sa_handler = __zoid_signal_handler; - sigemptyset(&sa.sa_mask); - sa.sa_flags = 0; - - if (sigaction(SIGUSR1, &sa, NULL)) - perror("installing signal handler"); - - if (!(server_sock = socket(PF_UNIX, SOCK_STREAM, 0))) - { - perror("create unix domain socket"); - return; - } - addr.sun_family = AF_UNIX; - strcpy(addr.sun_path, "/var/tmp/zoid.socket"); - if (bind(server_sock, (struct sockaddr*)&addr, sizeof(addr))) - { - perror("bind /var/tmp/zoid.socket"); - return; - } - if (listen(server_sock, 5) < 0) - { - perror("listen"); - return; - } -} - -static void __zoid_signal_handler(int signo) -{ - BGLPersonality personality; - int fd; - FILE* file; - char buffer[1024]; - int inode_7000 = -1, fd_7000 = -1, inode_8000 = -1, fd_8000 = -1; - int fds[2]; - DIR* dir; - struct dirent *de; - struct msghdr msg = {0}; - struct cmsghdr *cmsg; - char msgbuf[CMSG_SPACE(sizeof(fds))]; - uid_t uid; - int transfer_sock; - struct iovec iov; - char tmp; - int service_node; - - /* Get service node's IP address from personality. */ - if ((fd = open("/proc/personality", O_RDONLY)) < 0) - { - perror("open /proc/personality"); - return; - } - if (read(fd, &personality, sizeof(personality)) != sizeof(personality)) - { - perror("read personality"); - return; - } - close(fd); - service_node = (personality.serviceNode? personality.serviceNode : - personality.NFSServer); - - /* Find the inodes of port 7000 and 8000 connections to the service node, - used by CIOD to receive commands and forward stdout/stderr. */ - if (!(file = fopen("/proc/net/tcp", "r"))) - { - perror("open /proc/net/tcp"); - return; - } - while (fgets(buffer, sizeof(buffer), file) == buffer) - { - int local_port, remote_address, inode; - - if (sscanf(buffer, - "%*d: %*x:%x %x:%*x %*x %*x:%*x %*x:%*x %*x %*d %*d %d", - &local_port, &remote_address, &inode) == 3 && - remote_address == service_node) - { - if (local_port == 7000 && inode != 0) - inode_7000 = inode; - else if (local_port == 8000 && inode != 0) - inode_8000 = inode; - } - } - fclose(file); - - if (inode_7000 < 0 || inode_8000 < 0) - { - fprintf(stderr, "Socket connection to the service node not found!\n"); - return; - } - - /* Now locate the file descriptors that own these sockets. - We need to switch back to root for a second to do that... */ - - uid = geteuid(); - if (seteuid(0) < 0) - { - perror("seteuid"); - return; - } - - if (!(dir = opendir("/proc/self/fd"))) - { - perror("open /proc/self/fd"); - return; - } - while ((de = readdir(dir))) - { - char buf2[1024]; - int len; - - sprintf(buffer, "/proc/self/fd/%s", de->d_name); - - if ((len = readlink(buffer, buf2, sizeof(buf2))) > 0 && - len < sizeof(buf2)) - { - buf2[len] = '\0'; - if (strncmp(buf2, "socket:", strlen("socket:")) == 0) - { - int inode; - char* endbuf; - - inode = strtol(buf2 + strlen("socket:["), &endbuf, 10); - if (inode == inode_7000) - fd_7000 = strtol(de->d_name, NULL, 10); - else if (inode == inode_8000) - fd_8000 = strtol(de->d_name, NULL, 10); - } - } - } - closedir(dir); - - /* Switch again to user... */ - if (seteuid(uid) < 0) - { - perror("seteuid"); - return; - } - - if (fd_7000 < 0 || fd_8000 < 0) - { - fprintf(stderr, "Socket connection to the service node not found!\n"); - return; - } - - if ((transfer_sock = accept(server_sock, NULL, 0)) < 0) - { - perror("accept"); - return; - } - - /* This magic is taken from cmsg(3). */ - iov.iov_base = &tmp; - iov.iov_len = 1; - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_control = msgbuf; - msg.msg_controllen = sizeof(msgbuf); - cmsg = CMSG_FIRSTHDR(&msg); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SCM_RIGHTS; - cmsg->cmsg_len = CMSG_LEN(sizeof(fds)); - fds[0] = fd_7000; - fds[1] = fd_8000; - memcpy(CMSG_DATA(cmsg), fds, sizeof(fds)); - msg.msg_controllen = cmsg->cmsg_len; - - if (sendmsg(transfer_sock, &msg, 0) < 0) - { - perror("sendmsg"); - return; - } - - close(transfer_sock); - - kill(getpid(), SIGSTOP); -} diff --git a/3rdparty/zoid/daemon.2cores/handle_traffic.c b/3rdparty/zoid/daemon.2cores/handle_traffic.c deleted file mode 120000 index 00ecce7321ecd21cacd8ab2cb6d6e4633eb84b0a..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/daemon.2cores/handle_traffic.c +++ /dev/null @@ -1 +0,0 @@ -handle_traffic.c.fastsema \ No newline at end of file diff --git a/3rdparty/zoid/daemon.2cores/handle_traffic.c.fastsema b/3rdparty/zoid/daemon.2cores/handle_traffic.c.fastsema deleted file mode 100644 index 8ff99bbf7f06eeba597c84e769102b1aa0b7e5da..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/daemon.2cores/handle_traffic.c.fastsema +++ /dev/null @@ -1,1397 +0,0 @@ -/****************************************************************************/ -/* ZEPTOOS:zepto-info */ -/* This file is part of ZeptoOS: The Small Linux for Big Computers. - * See www.mcs.anl.gov/zeptoos for more information. - */ -/* ZEPTOOS:zepto-info */ -/* */ -/* ZEPTOOS:zepto-fillin */ -/* $Id: handle_traffic.c,v 1.20 2007/06/27 15:10:45 iskra Exp $ - * ZeptoOS_Version: 1.2 - * ZeptoOS_Heredity: FOSS_ORIG - * ZeptoOS_License: GPL - */ -/* ZEPTOOS:zepto-fillin */ -/* */ -/* ZEPTOOS:zepto-gpl */ -/* Copyright: Argonne National Laboratory, Department of Energy, - * and UChicago Argonne, LLC. 2004, 2005, 2006, 2007 - * ZeptoOS License: GPL - * - * This software is free. See the file ZeptoOS/misc/license.GPL - * for complete details on your rights to copy, modify, and use this - * software. - */ -/* ZEPTOOS:zepto-gpl */ -/****************************************************************************/ - -#define _GNU_SOURCE /* For pthread_yield. */ -#include <assert.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include <errno.h> -#include <fcntl.h> -#include <pthread.h> -#include <signal.h> -#include <sys/time.h> -#include <unistd.h> -#include <sys/types.h> -#include <sys/ipc.h> -#include <sys/sem.h> - -#include <bglmemmap.h> - -#include "bgl.h" -#include "zoid.h" -#include "zoid_protocol.h" -#include "zoid_api.h" - -static BGLQuad* recv_data_cb(void* priv, BGLQuad* softheader); -static void queue_message(struct zoid_buffer* buffer); -static char* handle_ciod_message(struct CioHeader* cioheader, char* data, - int* result_len, int pset_cpu_rank); -static void high_priority_send(struct zoid_buffer *buffer); -static void send_packet(void); - -zoid_buf_pipe *recv_queue; -lock_pair recv_queue_locks; - -zoid_buf_pipe *send_queue; -lock_pair send_queue_locks; - -zoid_buf_pipe *high_priority_send_queue; -lock_pair high_priority_send_queue_locks; - -zoid_buf_pipe *ack_queue; -lock_pair ack_queue_locks; - -/* Indicates whether any more messages that need an ACK should wait. */ -static int currently_receiving_large_message = 0; - -#define NYI() assert(0) - -/* - * If there is no large message currently being received, schedules the - * given message to be received (by sending an ACK to the source). - */ -static void schedule_ack(struct zoid_buffer* cmd_buffer, - struct ZoidSoftHeader* softheader, - int flush_cmd_buffer) { - char* ack_buff; - struct zoid_buffer* ack_buffer; - - ack_buff = __zoid_allocate_buffer(TREE_PACKET_SIZE); - if (!ack_buff) - /* Out of memory. MT-unsafe! */ - ack_buff = packet_buffer->data; - - ack_buffer = get_zoid_buffer(ack_buff); - ack_buffer->result_len = ack_buffer->total_len = 0; - - ack_buffer->softheader = *softheader; - ack_buffer->softheader.flags = ZOID_SOFTHEADER_ACK_PACKET; - - cmd_buffer->ack_sent = 1; - - if(flush_cmd_buffer) { - flush_zoid_buf(cmd_buffer); - cmd_buffer = NULL; - } - - enqueue_zoid_buf(ack_queue, ack_buffer); -} - - -#if 0 -/* - * CIOD-packet-specific receiving code. Handles message acknowledgements. - */ -static void -receive_ciod_packet(struct CNProc* cnproc, struct CioHeader* cioheader) -{ -#if 0 - int i, *pkt = (int*)cnproc->current_buf; - fprintf(stderr, "Received CIOD packet: _cpu %d, " - "_rankInCnodes %d, _reserved %d, _dataSize %d, " - "_treeAddress %d, _messageCode %d, _packetTotal %d, " - "_packetIndex %d\n", cioheader->_cpu, - cioheader->_rankInCnodes, cioheader->_reserved, - cioheader->_dataSize, cioheader->_treeAddress, - cioheader->_messageCode, cioheader->_packetTotal, - cioheader->_packetIndex); - for (i = 0; i < 240 / 4; i++) - fprintf(stderr, "%08x%c", pkt[i], ((i + 1) % 8 && i != 240 / 4 - 1 ? - ' ' : '\n')); -#endif - if (cioheader->_packetIndex == 0) - { - /* First packet. */ - cnproc->msg_length = cioheader->_packetTotal * TREE_DATA_SIZE; - - if (cioheader->_packetTotal > 1) - { - /* CIOD protocol is rendezvous-based. For multi-packet - messages, first packet must be acknowledged before the - rest are sent. Create this acknowledgement here. */ - char* buffer; - struct zoid_buffer* ack_buffer; - struct CioHeader ack_cioheader; - - buffer = __zoid_allocate_buffer(TREE_PACKET_SIZE); - if (!buffer) - /* Out of memory. MT-unsafe! */ - buffer = packet_buffer->data; - - ack_buffer = get_zoid_buffer(buffer); - ack_buffer->result_len = ack_buffer->total_len = 0; - - ack_cioheader = *cioheader; - ack_cioheader._messageCode = MTC_ACK; - ack_cioheader._packetTotal = 1; - memcpy(&ack_buffer->softheader, &ack_cioheader, - sizeof(ack_cioheader)); - - /* Let the regular reply sending code do the rest. */ - queue_message(ack_buffer); - } - } -} -#endif - - -static void receive_zoid_packet(struct CNProc *cnproc, - struct ZoidSoftHeader *softheader) { -#if 0 - int i, *pkt = (int*)cnproc->current_buf; - fprintf(stderr, "Received ZOID packet from %d, flags %d, " - "len %d @ %p\n", softheader->pset_cpu_rank, - softheader->flags, softheader->msg_length, cnproc->buffer); - for (i = 0; i < 240 / 4; i++) { - fprintf(stderr, "%08x%c", pkt[i], ((i + 1) % 8 && i != 240 / 4 - 1 ? - ' ' : '\n')); - } -#endif - - if(softheader->flags & ZOID_SOFTHEADER_FIRST_PACKET) { - /* Check if this is really the first message. */ - if(cnproc->current_buf != cnproc->buffer->data) { - fprintf(stderr, "First packet received from %d while " - "reading another message\n", - softheader->pset_cpu_rank); - /* Nothing sensible to do, so let's just try to - continue. */ - } - - cnproc->msg_length = softheader->msg_length; - } else if(cnproc->current_buf == cnproc->buffer->data && - !cnproc->buffer->errnum) { - /* Received an unexpected packet without FIRST set. */ - fprintf(stderr, "Received unexpected packet from %d\n", - softheader->pset_cpu_rank); - } - - if(softheader->msg_length != cnproc->msg_length) - fprintf(stderr, "Message length mismatch from %d, found %d, " - "expecting %d\n", softheader->pset_cpu_rank, - softheader->msg_length, cnproc->msg_length); - - if (((unsigned int)cnproc->current_buf & 0xf) || - (cnproc->buffer->userbuf_in && - cnproc->current_buf + TREE_DATA_SIZE > - cnproc->buffer->userbuf_in + cnproc->buffer->userbuf_in_len)) { - /* See recv_data_cb. We used an auxiliary buffer. Copy the data - where it belongs. */ - int len; - len = cnproc->buffer->userbuf_in_len - (cnproc->current_buf - - cnproc->buffer->userbuf_in); - if (len > TREE_DATA_SIZE) - len = TREE_DATA_SIZE; - memcpy(cnproc->current_buf, packet_buffer->data, len); - } - - if(softheader->flags & ZOID_SOFTHEADER_NEED_ACK_PACKET) - schedule_ack(cnproc->buffer, softheader, 0); -} - - - -static int -handle_received_packet(struct CNProc* cnproc, - struct ZoidSoftHeader* softheader) -{ - struct CioHeader* cioheader; - - cioheader = (softheader->zoid_id == ZOID_ID ? - NULL : (struct CioHeader*)softheader); - - /* If an error has been marked, we are going to drop the data anyway, - and we probably don't have enough buffer space to hold it, so don't - even try to place it neatly in the buffer. */ - if (!cnproc->buffer->errnum) - cnproc->current_buf += TREE_DATA_SIZE; - - /* Is this the last packet of a message? */ - if (cioheader ? - cioheader->_packetIndex == cioheader->_packetTotal - 1 : - softheader->flags & ZOID_SOFTHEADER_LAST_PACKET) { - if (!cnproc->buffer->errnum) { - - /* Check if we have read all that we were supposed to. */ - if (cnproc->buffer->userbuf_in) { - if (cnproc->current_buf - cnproc->buffer->userbuf_in < - cnproc->buffer->userbuf_in_len){ - fprintf(stderr, "Received last packet from %d but read " - "only %d bytes of userbuf, expected %d\n", - softheader->pset_cpu_rank, - cnproc->current_buf - cnproc->buffer->userbuf_in, - cnproc->buffer->userbuf_in_len); - } - } else { - if (cnproc->current_buf - cnproc->buffer->data < - cnproc->msg_length) { - fprintf(stderr, "Received last packet from %d but read " - "only %d bytes, expected %d\n", - softheader->pset_cpu_rank, - cnproc->current_buf - cnproc->buffer->data, - cnproc->msg_length); - } - } - } - - if (!cioheader && (softheader->flags & ZOID_SOFTHEADER_ASSERT_PACKET)) { - fprintf(stderr, "Process %d detected an internal error in " - "communication code!\n", - cn_procs[softheader->pset_cpu_rank].pid); - fprintf(stderr, "%s\n", cnproc->buffer->data); - - /* Core dump message will follow; clean up for it. */ - if (cnproc->buffer != packet_buffer) - __zoid_release_buffer(cnproc->buffer->data); - cnproc->buffer = NULL; - cnproc->current_buf = NULL; - - return 0; - } - - if(!cioheader && *(int*)cnproc->buffer->data == ZOID_TERMINATING_ID) { -#if 0 - fprintf(stderr, "Exit request from process %d\n", - cn_procs[softheader->pset_cpu_rank].pid); -#endif - if (cnproc->buffer != packet_buffer) - __zoid_release_buffer(cnproc->buffer->data); - cnproc->buffer = NULL; - cnproc->current_buf = NULL; - - enter_critical_section(&pending_exit_locks); - - cnproc->status = PROC_STATUS_EXIT; - pending_exit_requests--; - - leave_critical_section(&pending_exit_locks); - - return 0; - } - - /* A buffer is complete. Queue it. */ - - memcpy(&cnproc->buffer->softheader, softheader, sizeof(*softheader)); - if (cnproc->buffer->ack_sent) - currently_receiving_large_message = 0; - - enqueue_zoid_buf(recv_queue, cnproc->buffer); - - cnproc->buffer = NULL; - cnproc->current_buf = NULL; - - return 1; - } - - if(softheader->flags & ZOID_SOFTHEADER_INPUT_USERBUF_PACKET) { - /* We are done reading the first (non-userbuf) part of the input - userbuf message. We will have to invoke the user allocation - callback now. Behave as if the message is complete: we will be - able to recognize that it's not by the above flag in the - softheader. */ - memcpy(&cnproc->buffer->softheader, softheader, sizeof(*softheader)); - - enqueue_zoid_buf(recv_queue, cnproc->buffer); - - cnproc->buffer = NULL; - cnproc->current_buf = NULL; - - return 1; - } - - if (cnproc->buffer->userbuf_in ? - cnproc->current_buf - cnproc->buffer->userbuf_in >= - cnproc->buffer->userbuf_in_len : - cnproc->current_buf - cnproc->buffer->data >= cnproc->msg_length) { - /* We've read more than we were supposed to without receiving - the end-of-message packet! */ - - fprintf(stderr, "Message from %d longer than declared %d\n", - softheader->pset_cpu_rank, cnproc->msg_length + - cnproc->buffer->userbuf_in_len); - - /* Nothing better to do than to prevent a buffer overrun... */ - cnproc->current_buf -= TREE_DATA_SIZE; - } - - return 0; -} - - - -static BGLQuad* recv_data_cb(void *priv, BGLQuad *softheader) { - struct CNProc* cnproc = NULL; - struct CNProc** cnproc_ptr = priv; - struct ZoidSoftHeader* zsh=(struct ZoidSoftHeader*)softheader; - int msglen; - BGLQuad *ret, *dummy; - - if(zsh->zoid_id == ZOID_ID) { - if (zsh->pset_cpu_rank < pset_size * (vn_mode ? 2 : 1) && - pset_rank_mapping[zsh->pset_cpu_rank] != -1) { - zsh->pset_cpu_rank = pset_rank_mapping[zsh->pset_cpu_rank]; - cnproc = &cn_procs[zsh->pset_cpu_rank]; - *cnproc_ptr = cnproc; - } else { - /* Invalid message. */ - NYI(); - *cnproc_ptr = NULL; - return (BGLQuad*)packet_buffer->data; - } - - msglen = zsh->msg_length; - - if(zsh->flags & ZOID_SOFTHEADER_ASSERT_PACKET) { - /* This is a special one-packet message. If anything was being - read, forget about it and reset the buffer. */ - if(cnproc->buffer) { - if(cnproc->buffer->ack_sent) { - currently_receiving_large_message = 0; - cnproc->buffer->ack_sent = 0; - } - cnproc->current_buf = cnproc->buffer->data; - } - } - } else { - /* Assume it's a message from CNK (CIOD protocol). */ - struct CioHeader* cioheader = (struct CioHeader*)softheader; - int i; - - for (i = 0; i < pset_proc_count; i++) { - if (cn_procs[i].p2p_addr == cioheader->_treeAddress && - cn_procs[i].cpu == cioheader->_cpu) { - cnproc = &cn_procs[i]; - *cnproc_ptr = cnproc; - break; - } - } - - if(i == pset_proc_count) { - if(cioheader->_messageCode == MFC_REQUESTRESET) { - /* We can receive a REQUESTRESET message for an unknown - process, which is the second CPU of a node running in CO - node. We assign the message to the primary CPU. */ - for(i = 0; i < pset_proc_count; i++) { - if(cn_procs[i].p2p_addr == cioheader->_treeAddress) { - cnproc = &cn_procs[i]; - *cnproc_ptr = cnproc; - break; - } - } - if(i == pset_proc_count) { - /* Invalid message. */ - NYI(); - *cnproc_ptr = NULL; - return (BGLQuad*)packet_buffer->data; - } - } else { - NYI(); - /* Invalid message. */ - *cnproc_ptr = NULL; - return (BGLQuad*)packet_buffer->data; - } - } - - msglen = cioheader->_packetTotal * TREE_DATA_SIZE; - } - - if (!cnproc->buffer) { - /* First packet of a new message. Initialize things. */ - char* buffer = __zoid_allocate_buffer(msglen); - if(buffer) { - cnproc->buffer = get_zoid_buffer(buffer); - } else { - NYI(); - /* Either the requested size is too large, or we are out of RAM. - In either case, we won't be able to handle the request. - We will read the command "whereever" (preferably into a - process-specific buffer, otherwise a global one (*not* - MT-safe), and flag an error so that the receive_thread knows - not to pass this to worker threads. */ - struct thread_specific_data* thread_data; - thread_data = (struct thread_specific_data*) - pthread_getspecific(thread_specific_key); - - if (thread_data->errnum == E2BIG) - /* Command was too large. */ - buffer = __zoid_allocate_buffer(TREE_PACKET_SIZE); - - if (!buffer) - /* Out of memory. MT-unsafe! */ - buffer = packet_buffer->data; - - cnproc->buffer = get_zoid_buffer(buffer); - - cnproc->buffer->errnum = thread_data->errnum; - if (thread_data->errnum == E2BIG) - cnproc->buffer->excessive_size = - thread_data->last_excessive_size; - } - - cnproc->current_buf = buffer; - } - - if (((unsigned int)cnproc->current_buf & 0xf) || - (cnproc->buffer->userbuf_in && - cnproc->current_buf + TREE_DATA_SIZE > - cnproc->buffer->userbuf_in + cnproc->buffer->userbuf_in_len)) { - /* Either condition can only take place for input userbuf, because - zoid-allocated buffers are always properly aligned and rounded - up. Use an auxiliary buffer; we will copy back from it in - receive_zoid_packet(). */ - ret = (BGLQuad*)packet_buffer->data; - } else { - ret = (BGLQuad*)cnproc->current_buf; - } - -#if 0 - /* Allocate L1 cache lines that we are going to write entirely, - so that they are not needlessly read from main memory */ - dummy = ret; - asm volatile( - "\taddi %0,%0,16\n" - "\tdcbz 0,%0; addi %0,%0,32\n" - "\tdcbz 0,%0; addi %0,%0,32\n" - "\tdcbz 0,%0; addi %0,%0,32\n" - "\tdcbz 0,%0; addi %0,%0,32\n" - "\tdcbz 0,%0; addi %0,%0,32\n" - "\tdcbz 0,%0; addi %0,%0,32\n" - "\tdcbz 0,%0\n" - : "+b" (dummy) - ); -#endif - - return ret; -} - - -static int receive_packet(void) { - struct CNProc* cnproc; - BGLTreePacketHardHeader hardheader; - char softheader_buf[sizeof(struct ZoidSoftHeader) + 0xf]; - struct ZoidSoftHeader *softheader = (struct ZoidSoftHeader*) - (((unsigned int)softheader_buf + 0xf) & ~0xf); - - BGLTreeFIFO_recvF(vc0 + BGL_MEM_TREE_HDROUT_OFFSET, - vc0 + BGL_MEM_TREE_DATAOUT_OFFSET, - &hardheader, (BGLQuad*)softheader, - &recv_data_cb, &cnproc); - - if(!cnproc) { - /* Invalid message. */ - int hhdr; - memcpy(&hhdr, &hardheader, sizeof(hardheader)); - fprintf(stderr, "Invalid packet read, hardheader %08x, " - "softheader %08x %08x %08x %08x\n", - hhdr, ((BGLQuad*)softheader)->w0, - ((BGLQuad*)softheader)->w1, ((BGLQuad*)softheader)->w2, - ((BGLQuad*)softheader)->w3); - return 0; - } - - if (softheader->zoid_id == ZOID_ID) - receive_zoid_packet(cnproc, softheader); - else - NYI(); - - return handle_received_packet(cnproc, softheader); -} - - -static void sigusr1(int sig) -{ - syscall(268, 0); - fprintf(stderr, "signal USR1 caught ... moved to 1st core and go asleep\n"); - pause(); - fprintf(stderr, "resuming from signal handler, going back to 2nd core\n"); - syscall(268, 1); -} - - -void bglco_loop() { - struct zoid_buffer *buffer; - BGLTreeStatusRegister status; - - if (signal(SIGUSR1, sigusr1) == SIG_ERR) - perror("signal"); - -#if defined L1_CONSISTENCY_IN_SOFTWARE - flush_L1_all(); -#endif - - syscall(268, 1); - -#if defined L1_CONSISTENCY_IN_SOFTWARE - flush_L1_all(); -#endif - - enter_critical_section(&tree_locks); - - while((volatile int)pending_exit_requests > 0) { - status = *(volatile BGLTreeStatusRegister*)(vc0 + BGL_MEM_TREE_STATUS0_OFFSET); - - if(status.recpktcnt > 0) { - if(receive_packet()) { - int r = syscall(269, 1); /* bglco_fast_sem */ - if(r) { - perror("bglco_fast_sem failed"); - exit(-1); - } - } - } - - status = *(volatile BGLTreeStatusRegister*)(vc0 + BGL_MEM_TREE_STATUS0_OFFSET); - - if(status.injpktcnt < 8) { - if(!currently_receiving_large_message && - ack_queue->first) { - buffer = dequeue_zoid_buf(ack_queue); - if(buffer) { - high_priority_send(buffer); - currently_receiving_large_message = 1; - } - } else if(high_priority_send_queue->first) { - buffer = dequeue_zoid_buf(high_priority_send_queue); - if(buffer) - high_priority_send(buffer); - } else /*if(((volatile zoid_buf_pipe*)send_queue)->first)*/ { - send_packet(); - } - } - - if (sent_kill_packet) { - printf("WE NEED TO STOP!!!!!!\n"); - break; - } - } - - leave_critical_section(&tree_locks); - - syscall(268, 0); -} - - - -/* - * Sends a single packet to a compute node. - * WARNING: this function is stateful. It internally keeps track of - * the progress of sending the first message from send_queue. - * This function assumes that it won't be called simultaneously by multiple - * threads. Currently, the receive_queue_mutex in handle_messages() ensures - * of that. - */ -static void send_packet(void) { - static struct zoid_buffer* buffer = NULL; - /* The remaining static variables are only valid if buffer != NULL. */ - static BGLTreePacketHardHeader hardheader; - static char* result_current; - static int result_remaining; - static struct CioHeader* cioheader; - static int copying_userbuf; - char* data_buf; - - if (!buffer) { - if (send_queue->first) - buffer = dequeue_zoid_buf(send_queue); - - if (!buffer) - return; - - if (buffer->softheader.zoid_id == ZOID_ID) { - cioheader = NULL; - BGLTreePacketHardHeader_InitP2P(&hardheader, PACKET_CLASS_CIO, - 0, cn_procs[buffer->softheader. - pset_cpu_rank].p2p_addr); - /* softheader->zoid_id is already filled in correctly. */ - buffer->softheader.pset_cpu_rank = - pset_rank_mapping_rev[buffer->softheader.pset_cpu_rank]; - buffer->softheader.flags = ZOID_SOFTHEADER_FIRST_PACKET; - buffer->softheader.msg_length = buffer->result_len; - buffer->softheader.errnum = buffer->errnum; - } else { - /* CIOD message. */ - cioheader = (struct CioHeader*)&buffer->softheader; - BGLTreePacketHardHeader_InitP2P(&hardheader, PACKET_CLASS_CIO, - 0, cioheader->_treeAddress); - cioheader->_packetIndex = 0; - } - - result_current = buffer->data; - result_remaining = (buffer->userbuf_out ? buffer->result_len : - buffer->total_len); - copying_userbuf = 0; - } - - /* Since we round up the buffers to full packet boundary when allocating, - we never have to copy to intermediate ones... */ - - /* Is this the last packet? */ - if (!cioheader && result_remaining <= TREE_DATA_SIZE && - (!buffer->userbuf_out || copying_userbuf)) { - buffer->softheader.flags |= ZOID_SOFTHEADER_LAST_PACKET; - } - -#if 0 - { - int i, *pkt = (int*)result_current; - fprintf(stderr, "Sending back ZOID packet to %d, flags %d, len %d, @ %p\n", - buffer->softheader.pset_cpu_rank, buffer->softheader.flags, - buffer->softheader.msg_length, buffer); - for (i = 0; i < 240 / 4; i++) - fprintf(stderr, "%08x%c", pkt[i], ((i + 1) % 8 && - i != 240 / 4 - 1 ? - ' ' : '\n')); - } -#endif - - if (((unsigned int)result_current & 0xf) || - (copying_userbuf && result_remaining < TREE_DATA_SIZE)) { - data_buf = packet_buffer->data; - memcpy(data_buf, result_current, result_remaining < TREE_DATA_SIZE ? - result_remaining : TREE_DATA_SIZE); - } else { - data_buf = result_current; - } - - BGLTreeFIFO_sendH(vc0 + BGL_MEM_TREE_HDRIN_OFFSET, - vc0 + BGL_MEM_TREE_DATAIN_OFFSET, - &hardheader, (BGLQuad*)&buffer->softheader, - (BGLQuad*)data_buf); - - result_remaining -= TREE_DATA_SIZE; - - if (!cioheader) - buffer->softheader.flags = 0; - else - cioheader->_packetIndex++; - - - if (result_remaining <= 0) { - if (buffer->userbuf_out && !copying_userbuf) { - /* If there is a userbuf output part, we end up here prematurely, - as soon as the zoid part of the buffer is sent. Adjust the - pointer and length and send the user buffer next. */ - - result_current = buffer->userbuf_out; - result_remaining += buffer->total_len - buffer->result_len; - copying_userbuf = 1; - } else { - /*dequeue_zoid_buf(send_queue);*/ - - /* We are done with the current message. Clean up. */ - if (buffer->userbuf_out) { - enqueue_zoid_buf(recv_queue, buffer); - int r = syscall(269, 1); /* bglco_fast_sem */ - if(r) { - perror("bglco_fast_sem failed"); - exit(-1); - } - } else if (buffer != packet_buffer) { - __zoid_release_buffer(buffer->data); - } - buffer = NULL; - } - } else { - result_current += TREE_DATA_SIZE; - } -} - - -/* - * Send a single-packet message that bypasses the normal send queue. - */ -static void high_priority_send(struct zoid_buffer *buffer) { - BGLTreePacketHardHeader hardheader; - - assert(buffer->softheader.zoid_id == ZOID_ID); - assert(!buffer->userbuf_out); - assert(buffer->total_len <= TREE_DATA_SIZE); - - BGLTreePacketHardHeader_InitP2P(&hardheader, PACKET_CLASS_CIO, - 0, cn_procs[buffer->softheader. - pset_cpu_rank].p2p_addr); - /* softheader->zoid_id is already filled in correctly. */ - buffer->softheader.pset_cpu_rank = - pset_rank_mapping_rev[buffer->softheader.pset_cpu_rank]; - buffer->softheader.flags = ZOID_SOFTHEADER_FIRST_PACKET | - ZOID_SOFTHEADER_LAST_PACKET | - (buffer->softheader.flags & ZOID_SOFTHEADER_ACK_PACKET); - buffer->softheader.msg_length = buffer->result_len; - buffer->softheader.errnum = buffer->errnum; - - BGLTreeFIFO_sendH(vc0 + BGL_MEM_TREE_HDRIN_OFFSET, - vc0 + BGL_MEM_TREE_DATAIN_OFFSET, - &hardheader, (BGLQuad*)&buffer->softheader, - (BGLQuad*) buffer->data); - - /* We are done with the current message. Clean up. */ - - if (buffer != packet_buffer) - __zoid_release_buffer(buffer->data); - else - NYI(); -} - - - -/* - * Enqueues a message in the send queue, to be sent back to a compute - * node later (or in another thread), in the handle_messages() function. - */ -static void queue_message(struct zoid_buffer* buffer) { - if(buffer->softheader.zoid_id == ZOID_ID && !buffer->userbuf_out && - buffer->total_len <= TREE_DATA_SIZE) - enqueue_zoid_buf(high_priority_send_queue, buffer); - else - enqueue_zoid_buf(send_queue, buffer); -} - - - -/* - * Body of (multiple) worker threads. Takes a command from the head of the - * receive queue, processes it, and puts the reply at the tail of the send - * queue. - */ -void* -worker_thread_body(void* arg) -{ - struct thread_specific_data thread_data; - - /* Various callbacks from generated and user code can access this data. */ - if (pthread_setspecific(thread_specific_key, &thread_data)) - { - perror("setting thread-specific data"); - return NULL; - } - - for (;;) - { - struct zoid_buffer *cmd_buffer, *res_buffer; - char *result = NULL; - int result_len, total_len; - - int r = syscall(269, -1); /* bglco_fast_sem */ - if(r) { - perror("bglco_fast_sem failed"); - exit(-1); - } - - cmd_buffer = dequeue_zoid_buf(recv_queue); - - if(cmd_buffer == (struct zoid_buffer*)1) { - printf("Stopping worker thread\n"); - return NULL; - } - - if(!cmd_buffer) - continue; - - thread_data.userbuf = NULL; - - if(cmd_buffer->userbuf_out) { - thread_data.calling_process_id = - cmd_buffer->softheader.pset_cpu_rank; - - cmd_buffer->userbuf_out_cb(cmd_buffer->userbuf_out, - cmd_buffer->userbuf_out_priv); - cmd_buffer->userbuf_out = NULL; - - __zoid_release_buffer(cmd_buffer->data); - cmd_buffer = NULL; - continue; - } - - if (!cmd_buffer->errnum) { - int command_id, header_id, function_id; - struct zoid_dispatch_entry* entry; - - if (cmd_buffer->softheader.zoid_id == ZOID_ID) { - thread_data.calling_process_id = - cmd_buffer->softheader.pset_cpu_rank; - - /* The buffer always begins with a command id. */ - memcpy(&command_id, cmd_buffer->data, sizeof(command_id)); - - header_id = command_id >> 16; - function_id = command_id & 0xffff; - - for (entry = dispatch_entries; entry; entry = entry->next) { - if (entry->header_id == header_id) - break; - } - - if(!entry) { - fprintf(stderr, "Server stub backend not loaded!\n"); - cmd_buffer->errnum = ENOSYS; - } else if(function_id < 0 || function_id >= entry->array_size) { - fprintf(stderr, "Invalid function ID!\n"); - cmd_buffer->errnum = ENOSYS; - } else if (cmd_buffer->softheader.flags & - ZOID_SOFTHEADER_INPUT_USERBUF_PACKET) { - /* We need to invoke a special allocate - callback, not (yet) the argument unpacking - stub. */ - int buffer_len; - struct CNProc* cnproc = &cn_procs[cmd_buffer-> - softheader.pset_cpu_rank]; - - memcpy(&buffer_len, cmd_buffer->data + - sizeof(command_id), sizeof(buffer_len)); - - cmd_buffer->userbuf_in = - entry->dispatch_array[function_id]. - userbuf_allocate_cb(buffer_len); - - if (!cmd_buffer->userbuf_in) { - /* User failed to provide us with a buffer. - Instead of an ACK, we will return an - error message. */ - cmd_buffer->errnum = ENOMEM; - } else { - cmd_buffer->userbuf_in_len = buffer_len; - cnproc->buffer = cmd_buffer; - cnproc->current_buf = cmd_buffer->userbuf_in; - - schedule_ack(cmd_buffer, &cmd_buffer->softheader, 1); - cmd_buffer = NULL; - - /* We are not done with this command yet, - so don't release the buffer! */ - continue; - } - - } else { - if(cmd_buffer->userbuf_in) { - char* (*function_ptr)(char* command_buffer, - int* result_len, - int* total_len, - char* user_buffer) = - entry->dispatch_array[function_id].function_ptr; - - result = function_ptr(((char*)cmd_buffer->data) - + sizeof(command_id), - &result_len, &total_len, - cmd_buffer->userbuf_in); - } else { - char* (*function_ptr)(char* command_buffer, - int* result_len, - int* total_len) = - entry->dispatch_array[function_id].function_ptr; - - result = function_ptr(((char*)cmd_buffer->data) - + sizeof(command_id), - &result_len, &total_len); - } - - if (!result) { - /* An error here is most likely an - out-of-memory situation or a too large - reply buffer. */ - cmd_buffer->errnum = thread_data.errnum; - if (cmd_buffer->errnum == E2BIG) - cmd_buffer->excessive_size = - thread_data.last_excessive_size; - } - } - } else { /* cmd_buffer->softheader.zoid_id != ZOID_ID */ - NYI(); - /* CIOD message. */ - struct CioHeader* cioheader = - (struct CioHeader*)&cmd_buffer->softheader; - int i; - - for (i = 0; i < pset_proc_count; i++) - if (cn_procs[i].p2p_addr == cioheader->_treeAddress && - cn_procs[i].cpu == cioheader->_cpu) - { - thread_data.calling_process_id = i; - break; - } - if (i == pset_proc_count && - cioheader->_messageCode == MFC_REQUESTRESET) - { - /* We can receive a REQUESTRESET message for an - unknown process, which is the second CPU of a - node running in CO node. We assign the message - to the primary CPU. */ - for (i = 0; i < pset_proc_count; i++) - if (cn_procs[i].p2p_addr == - cioheader->_treeAddress) - { - thread_data.calling_process_id = i; - break; - } - } - assert(i != pset_proc_count); - - result = handle_ciod_message(cioheader, cmd_buffer->data, - &result_len, i); - total_len = result_len; - } - } /* if (!cmd_buffer->errnum) */ - - /* An error here can be either inherited from the receiving thread - (most likely an out-of-memory condition) or caused by call - function (most likely too large result buffer needed). */ - if (cmd_buffer->errnum) - { - /* Currently, three different error values are possible: - ENOMEM -- out of memory when allocating either the - command or result buffer. - E2BIG -- the command sent was too large (shouldn't happen, - since client checks that as well) or the result - would have been too large. cnproc->excessive_size - contains the excessive size value. - ENOSYS -- the stub backend for the requested function is - not loaded. - We manufacture an error reply here and it gets sent by - the standard reply sending code below. */ - result = __zoid_allocate_buffer(TREE_PACKET_SIZE); - if (!result) - /* Out of memory. MT-unsafe! */ - result = packet_buffer->data; - if (cmd_buffer->errnum == E2BIG) - { - result_len = sizeof(int); - memcpy(result, &cmd_buffer->excessive_size, sizeof(int)); - } - else - result_len = 0; - total_len = result_len; - } - - /* There are currently two cases where result == NULL without there - being any error: - - CIOD command that requires no response, - - partially received input userbuf, so we invoked the allocation - callback, not the actual argument-unpacking one. */ - if (result) - { - res_buffer = get_zoid_buffer(result); - - res_buffer->result_len = result_len; - res_buffer->total_len = total_len; - - res_buffer->errnum = cmd_buffer->errnum; - res_buffer->excessive_size = cmd_buffer->excessive_size; - - if ((res_buffer->userbuf_out = thread_data.userbuf)) - { - res_buffer->userbuf_out_cb = thread_data.userbuf_cb; - res_buffer->userbuf_out_priv = thread_data.userbuf_priv; - } - - memcpy(&res_buffer->softheader, &cmd_buffer->softheader, - sizeof(res_buffer->softheader)); - - queue_message(res_buffer); - res_buffer = NULL; - } - - if (cmd_buffer != packet_buffer) - __zoid_release_buffer(cmd_buffer->data); - - } /* for (;;) */ -} - -int unix_open(const char *pathname /* in:str */, - int flags /* in:obj */, - mode_t mode /* in:obj */) __attribute__((weak)); -int unix_close(int fd /* in:obj */) __attribute__((weak)); -ssize_t unix_write(int fd /* in:obj */, - const void *buf /* in:arr:size=+1:zerocopy */, - size_t count /* in:obj */) __attribute__((weak)); - -/* - * Processes the CIOD-specific messages. These are generated by the kernel, - * on application exit or crash. - */ -static char* -handle_ciod_message(struct CioHeader* cioheader, char* data, int* result_len, - int pset_cpu_rank) -{ - struct MTC_ReplyWriteCore reply; - char* result; - - switch (cioheader->_messageCode) - { - case MFC_REQUESTEXIT: - { - struct MFC_RequestExit* request = (struct MFC_RequestExit*)data; - fprintf(stderr, "ABNORMAL exit request from process %d, " - "code %d (%s)\n", cn_procs[pset_cpu_rank].pid, - request->s.status, - (request->s.reason == EXITED ? "exited" : "killed")); - - enter_critical_section(&pending_exit_locks); - - if (cn_procs[pset_cpu_rank].status == PROC_STATUS_RUNNING) - pending_exit_requests--; - cn_procs[pset_cpu_rank].status = PROC_STATUS_EXIT_ABNORMAL; - abnormal_msg_received = 1; - - leave_critical_section(&pending_exit_locks); -#if 0 - cioheader->_messageCode = MTC_REPLYEXIT; - *result_len = 0; - break; -#endif - return NULL; - } - - case MFC_REQUESTRESET: - fprintf(stderr, "ABNORMAL reset request from node %d cpu %d\n", - cioheader->_treeAddress, cioheader->_cpu); - - enter_critical_section(&pending_exit_locks); - - if (cn_procs[pset_cpu_rank].status == PROC_STATUS_RUNNING) - pending_exit_requests--; - cn_procs[pset_cpu_rank].status = PROC_STATUS_EXIT_ABNORMAL; - abnormal_msg_received = 1; - - leave_critical_section(&pending_exit_locks); -#if 0 - cioheader->_messageCode = MTC_REPLYRESET; - *result_len = 0; - break; -#endif - return NULL; - - case MFC_REQUESTWRITECORE: - { - struct MFC_RequestWriteCore* request = - (struct MFC_RequestWriteCore*)data; - char buffer[20]; - int fd; - - fprintf(stderr, "ABNORMAL core dump request from process %d\n", - cn_procs[pset_cpu_rank].pid); - - sprintf(buffer, "core.%d", cn_procs[pset_cpu_rank].pid); - - if (&unix_open) - fd = unix_open(buffer, O_CREAT | O_WRONLY | O_TRUNC, 0600); - else - { - if ((fd = open(buffer, O_CREAT | O_WRONLY | O_TRUNC, 0600)) < 0) - fd = -errno; - } - - if (fd < 0) - fprintf(stderr, "Error opening file %s: %s\n", buffer, - strerror(-fd)); - else - { - if (&unix_write) - unix_write(fd, data + request->s.offset, request->s.size); - else - write(fd, data + request->s.offset, request->s.size); - - if (&unix_close) - unix_close(fd); - else - close(fd); - } - - enter_critical_section(&pending_exit_locks); - - if (cn_procs[pset_cpu_rank].status == PROC_STATUS_RUNNING) - pending_exit_requests--; - cn_procs[pset_cpu_rank].status = PROC_STATUS_EXIT_ABNORMAL; - abnormal_msg_received = 1; - - leave_critical_section(&pending_exit_locks); - - if (fd >= 0) - { - reply.s.rc = 0; - reply.s.errnum = 0; - } - else - { - reply.s.rc = -1; - reply.s.errnum = EACCES; /* Who cares what it was anyway. */ - } - cioheader->_messageCode = MFC_REPLYWRITECORE; - *result_len = sizeof(reply); - break; - } - } - - cioheader->_packetIndex = 0; - cioheader->_packetTotal = 1; - - result = __zoid_allocate_buffer(*result_len); - if (!result) - /* Out of memory. MT-unsafe! */ - result = packet_buffer->data; - memcpy(result, &reply, *result_len); - - return result; -} - -/* - * Part of user API. Returns a (thread-local) pset-local process id. - */ -int -__zoid_calling_process_id(void) -{ - struct thread_specific_data* thread_data = - (struct thread_specific_data*)pthread_getspecific(thread_specific_key); - return thread_data->calling_process_id; -} - -/* - * Part of user API. Registers an output userbuf and a callback to - * be called when the buffer is no longer needed. - */ -void -__zoid_register_userbuf(void* userbuf, - void (*callback)(void* userbuf, void* priv), - void* priv) -{ - struct thread_specific_data* thread_data = - (struct thread_specific_data*) - pthread_getspecific(thread_specific_key); - - thread_data->userbuf = userbuf; - thread_data->userbuf_cb = callback; - thread_data->userbuf_priv = priv; -} - -/* - * Part of user API. Sends standard output/error message to the right place. - */ -int -__zoid_send_output(int pid, int fd, const char* buffer, int len) -{ - struct CiodOutputHeader header; - int ret; - - if (pthread_mutex_lock(&output_mutex)) - return -1; - - if (ciod_streams_socket == -1) - ret = write(fd, buffer, len); - else - { - header.fd = fd + 1; - header.cpu = cn_procs[pid].cpu; - header.node = cn_procs[pid].p2p_addr; - header.rank = cn_procs[pid].pid; - header.len = len; - - if (write(ciod_streams_socket, &header, sizeof(header)) != - sizeof(header)) - { - ret = -1; - } - else - ret = write(ciod_streams_socket, buffer, len); - } - - if (pthread_mutex_unlock(&output_mutex)) - return -1; - - return ret; -} - -void *ciod_thread_body(void* arg) { - int signum; - struct timeval tv; - double last_packet_received; - struct CioHeader cio; - int i; - - pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); - pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - - /* We only support one command at the moment: kill. */ - for (;;) - { - int cmd; - - if (read(ciod_control_socket, &cmd, sizeof(cmd)) != sizeof(cmd)) - perror("read"); - else if (cmd != 4) - fprintf(stderr, "Unknown command read: 0x%x\n", cmd); - else - { - if (read(ciod_control_socket, &signum, sizeof(signum)) != - sizeof(signum)) - { - perror("read"); - } - else - break; - } - } - - /* Disable thread cancellation before we try to acquire any mutexes. */ - pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); - - /* We set this flag even before we send anything, for one simple reason: - so that pthread_mutex_lock below can succeed. worker threads check - for this flag and release the receive_queue_mutex if it is set, so that - we can get it. */ - sent_kill_packet = 1; - - enter_critical_section(&tree_locks); - - /* We are going to kill the job. We do that by sending a special kill - packet that wakes up the CNK, which takes over and kills the processes. - There is one but: there might be some ZOID messages in the network, and - if CNK or CIOD see them, they might act weird. - There is no fool-proof solution to this problem. What we do is to - read all the messages that we can on this side. We throw these away; - we are killing the job anyway, so who cares. Hopefully this action - will quiesce the network (every sender waits for a reply before it can - continue), so that we can then wake up the CNK relatively safely. */ - - /* Suck all the packets from the network first. */ - gettimeofday(&tv, NULL); - last_packet_received = tv.tv_sec + tv.tv_usec * 1e-6; - for (;;) - { - BGLTreeStatusRegister status; - - status = *(volatile BGLTreeStatusRegister*) - (vc0 + BGL_MEM_TREE_STATUS0_OFFSET); - - if (status.recpktcnt == 0) - { - gettimeofday(&tv, NULL); - /* If we haven't received anything in five seconds, assume we - are done. */ - if (tv.tv_sec + tv.tv_usec * 1e-6 - last_packet_received > 5) - break; - } - else - { - while (status.recpktcnt--) - { - BGLTreePacketHardHeader hardheader; - BGLTreeFIFO_recv(vc0 + BGL_MEM_TREE_HDROUT_OFFSET, - vc0 + BGL_MEM_TREE_DATAOUT_OFFSET, - &hardheader, (BGLQuad*)packet_buffer->data); - } - gettimeofday(&tv, NULL); - last_packet_received = tv.tv_sec + tv.tv_usec * 1e-6; - } - } - - /* Now, kill the processes. */ - - cio._dataSize = 0; - cio._messageCode = MTC_KILL; - cio._packetTotal = 1; - cio._packetIndex = 0; - - ((struct MTC_Kill*)packet_buffer->data)->s.signum = signum; - - enter_critical_section(&pending_exit_locks); - - for (i = 0; i < pset_proc_count; i++) - if (cn_procs[i].status == PROC_STATUS_RUNNING) - { - BGLTreeStatusRegister status; - BGLTreePacketHardHeader hardheader; - - BGLTreePacketHardHeader_InitP2P(&hardheader, PACKET_CLASS_CIO, - 1, cn_procs[i].p2p_addr); - cio._cpu = cn_procs[i].cpu; - cio._rankInCnodes = cn_procs[i].pset_rank; - cio._treeAddress = cn_procs[i].p2p_addr; - - do - { - status = *(volatile BGLTreeStatusRegister*) - (vc0 + BGL_MEM_TREE_STATUS0_OFFSET); - } while (status.injpktcnt > 7); - - BGLTreeFIFO_sendH(vc0 + BGL_MEM_TREE_HDRIN_OFFSET, - vc0 + BGL_MEM_TREE_DATAIN_OFFSET, - &hardheader, (BGLQuad*)&cio, - (BGLQuad*)packet_buffer->data); - - pending_exit_requests--; - cn_procs[i].status = PROC_STATUS_EXIT; - } - - leave_critical_section(&pending_exit_locks); - - leave_critical_section(&tree_locks); - - return NULL; -} - -/* - * Called after a job has finished to clean up any allocated buffers, - * reset state variables, etc. - */ -void cleanup_traffic(void) -{ -#if 1 - NYI(); -#else - struct zoid_buffer* buf; - - while (send_queue_first) - { - buf = send_queue_first; - send_queue_first = send_queue_first->next; - __zoid_release_buffer(buf->data); - } - - while (high_priority_send_queue_first) - { - buf = high_priority_send_queue_first; - high_priority_send_queue_first = high_priority_send_queue_first->next; - __zoid_release_buffer(buf->data); - } - - while (receive_queue_first) - { - buf = *receive_queue_first; - *receive_queue_first = (*receive_queue_first)->next; - __zoid_release_buffer(buf->data); - } - - while (ack_queue_first) - { - buf = ack_queue_first; - ack_queue_first = ack_queue_first->next; - __zoid_release_buffer(buf->data); - } - - currently_receiving_large_message = 0; -#endif -} diff --git a/3rdparty/zoid/daemon.2cores/handle_traffic.c.polling b/3rdparty/zoid/daemon.2cores/handle_traffic.c.polling deleted file mode 100644 index 14f22b62adc8ca2ffb76ac31677e518574dd2b48..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/daemon.2cores/handle_traffic.c.polling +++ /dev/null @@ -1,1415 +0,0 @@ -/****************************************************************************/ -/* ZEPTOOS:zepto-info */ -/* This file is part of ZeptoOS: The Small Linux for Big Computers. - * See www.mcs.anl.gov/zeptoos for more information. - */ -/* ZEPTOOS:zepto-info */ -/* */ -/* ZEPTOOS:zepto-fillin */ -/* $Id: handle_traffic.c,v 1.20 2007/06/27 15:10:45 iskra Exp $ - * ZeptoOS_Version: 1.2 - * ZeptoOS_Heredity: FOSS_ORIG - * ZeptoOS_License: GPL - */ -/* ZEPTOOS:zepto-fillin */ -/* */ -/* ZEPTOOS:zepto-gpl */ -/* Copyright: Argonne National Laboratory, Department of Energy, - * and UChicago Argonne, LLC. 2004, 2005, 2006, 2007 - * ZeptoOS License: GPL - * - * This software is free. See the file ZeptoOS/misc/license.GPL - * for complete details on your rights to copy, modify, and use this - * software. - */ -/* ZEPTOOS:zepto-gpl */ -/****************************************************************************/ - -#define _GNU_SOURCE /* For pthread_yield. */ -#include <assert.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include <errno.h> -#include <fcntl.h> -#include <pthread.h> -#include <signal.h> -#include <sys/time.h> -#include <unistd.h> - -#include <bglmemmap.h> - -#include "bgl.h" -#include "zoid.h" -#include "zoid_protocol.h" -#include "zoid_api.h" - -static BGLQuad* recv_data_cb(void* priv, BGLQuad* softheader); -static void queue_message(struct zoid_buffer* buffer); -static char* handle_ciod_message(struct CioHeader* cioheader, char* data, - int* result_len, int pset_cpu_rank); -static void high_priority_send(struct zoid_buffer *buffer); -static void send_packet(void); - -zoid_buf_pipe *recv_queue; -lock_pair recv_queue_locks; - -zoid_buf_pipe *send_queue; -lock_pair send_queue_locks; - -zoid_buf_pipe *high_priority_send_queue; -lock_pair high_priority_send_queue_locks; - -zoid_buf_pipe *ack_queue; -lock_pair ack_queue_locks; - -extern sigset_t sigusr1_set; - -/* Indicates whether any more messages that need an ACK should wait. */ -static int currently_receiving_large_message = 0; - -#define NYI() assert(0) - -/* - * If there is no large message currently being received, schedules the - * given message to be received (by sending an ACK to the source). - */ -static void schedule_ack(struct zoid_buffer* buffer, - struct ZoidSoftHeader* softheader) { - char* ack_buff; - struct zoid_buffer* ack_buffer; - - ack_buff = __zoid_allocate_buffer(TREE_PACKET_SIZE); - if (!ack_buff) - /* Out of memory. MT-unsafe! */ - ack_buff = packet_buffer->data; - - ack_buffer = get_zoid_buffer(ack_buff); - ack_buffer->result_len = ack_buffer->total_len = 0; - - ack_buffer->softheader = *softheader; - ack_buffer->softheader.flags = ZOID_SOFTHEADER_ACK_PACKET; - - enqueue_zoid_buf(ack_queue, ack_buffer); -} - - -#if 0 -/* - * CIOD-packet-specific receiving code. Handles message acknowledgements. - */ -static void -receive_ciod_packet(struct CNProc* cnproc, struct CioHeader* cioheader) -{ -#if 0 - int i, *pkt = (int*)cnproc->current_buf; - fprintf(stderr, "Received CIOD packet: _cpu %d, " - "_rankInCnodes %d, _reserved %d, _dataSize %d, " - "_treeAddress %d, _messageCode %d, _packetTotal %d, " - "_packetIndex %d\n", cioheader->_cpu, - cioheader->_rankInCnodes, cioheader->_reserved, - cioheader->_dataSize, cioheader->_treeAddress, - cioheader->_messageCode, cioheader->_packetTotal, - cioheader->_packetIndex); - for (i = 0; i < 240 / 4; i++) - fprintf(stderr, "%08x%c", pkt[i], ((i + 1) % 8 && i != 240 / 4 - 1 ? - ' ' : '\n')); -#endif - if (cioheader->_packetIndex == 0) - { - /* First packet. */ - cnproc->msg_length = cioheader->_packetTotal * TREE_DATA_SIZE; - - if (cioheader->_packetTotal > 1) - { - /* CIOD protocol is rendezvous-based. For multi-packet - messages, first packet must be acknowledged before the - rest are sent. Create this acknowledgement here. */ - char* buffer; - struct zoid_buffer* ack_buffer; - struct CioHeader ack_cioheader; - - buffer = __zoid_allocate_buffer(TREE_PACKET_SIZE); - if (!buffer) - /* Out of memory. MT-unsafe! */ - buffer = packet_buffer->data; - - ack_buffer = get_zoid_buffer(buffer); - ack_buffer->result_len = ack_buffer->total_len = 0; - - ack_cioheader = *cioheader; - ack_cioheader._messageCode = MTC_ACK; - ack_cioheader._packetTotal = 1; - memcpy(&ack_buffer->softheader, &ack_cioheader, - sizeof(ack_cioheader)); - - /* Let the regular reply sending code do the rest. */ - queue_message(ack_buffer); - } - } -} -#endif - - -static void receive_zoid_packet(struct CNProc *cnproc, - struct ZoidSoftHeader *softheader) { -#if 0 - int i, *pkt = (int*)cnproc->current_buf; - fprintf(stderr, "Received ZOID packet from %d, flags %d, " - "len %d @ %p\n", softheader->pset_cpu_rank, - softheader->flags, softheader->msg_length, cnproc->buffer); - for (i = 0; i < 240 / 4; i++) { - fprintf(stderr, "%08x%c", pkt[i], ((i + 1) % 8 && i != 240 / 4 - 1 ? - ' ' : '\n')); - } -#endif - - if(softheader->flags & ZOID_SOFTHEADER_FIRST_PACKET) { - /* Check if this is really the first message. */ - if(cnproc->current_buf != cnproc->buffer->data) { - fprintf(stderr, "First packet received from %d while " - "reading another message\n", - softheader->pset_cpu_rank); - /* Nothing sensible to do, so let's just try to - continue. */ - } - - cnproc->msg_length = softheader->msg_length; - } else if(cnproc->current_buf == cnproc->buffer->data && - !cnproc->buffer->errnum) { - /* Received an unexpected packet without FIRST set. */ - fprintf(stderr, "Received unexpected packet from %d\n", - softheader->pset_cpu_rank); - } - - if(softheader->msg_length != cnproc->msg_length) - fprintf(stderr, "Message length mismatch from %d, found %d, " - "expecting %d\n", softheader->pset_cpu_rank, - softheader->msg_length, cnproc->msg_length); - - if (((unsigned int)cnproc->current_buf & 0xf) || - (cnproc->buffer->userbuf_in && - cnproc->current_buf + TREE_DATA_SIZE > - cnproc->buffer->userbuf_in + cnproc->buffer->userbuf_in_len)) { - /* See recv_data_cb. We used an auxiliary buffer. Copy the data - where it belongs. */ - int len; - len = cnproc->buffer->userbuf_in_len - (cnproc->current_buf - - cnproc->buffer->userbuf_in); - if (len > TREE_DATA_SIZE) - len = TREE_DATA_SIZE; - memcpy(cnproc->current_buf, packet_buffer->data, len); - } - - if ((softheader->flags & ZOID_SOFTHEADER_NEED_ACK_PACKET)) { - cnproc->buffer->ack_sent = 1; - schedule_ack(cnproc->buffer, softheader); - } -} - - - -static int -handle_received_packet(struct CNProc* cnproc, - struct ZoidSoftHeader* softheader) -{ - struct CioHeader* cioheader; - - cioheader = (softheader->zoid_id == ZOID_ID ? - NULL : (struct CioHeader*)softheader); - - /* If an error has been marked, we are going to drop the data anyway, - and we probably don't have enough buffer space to hold it, so don't - even try to place it neatly in the buffer. */ - if (!cnproc->buffer->errnum) - cnproc->current_buf += TREE_DATA_SIZE; - - /* Is this the last packet of a message? */ - if (cioheader ? - cioheader->_packetIndex == cioheader->_packetTotal - 1 : - softheader->flags & ZOID_SOFTHEADER_LAST_PACKET) { - if (!cnproc->buffer->errnum) { - - /* Check if we have read all that we were supposed to. */ - if (cnproc->buffer->userbuf_in) { - if (cnproc->current_buf - cnproc->buffer->userbuf_in < - cnproc->buffer->userbuf_in_len){ - fprintf(stderr, "Received last packet from %d but read " - "only %d bytes of userbuf, expected %d\n", - softheader->pset_cpu_rank, - cnproc->current_buf - cnproc->buffer->userbuf_in, - cnproc->buffer->userbuf_in_len); - } - } else { - if (cnproc->current_buf - cnproc->buffer->data < - cnproc->msg_length) { - fprintf(stderr, "Received last packet from %d but read " - "only %d bytes, expected %d\n", - softheader->pset_cpu_rank, - cnproc->current_buf - cnproc->buffer->data, - cnproc->msg_length); - } - } - } - - if (!cioheader && (softheader->flags & ZOID_SOFTHEADER_ASSERT_PACKET)) { - fprintf(stderr, "Process %d detected an internal error in " - "communication code!\n", - cn_procs[softheader->pset_cpu_rank].pid); - fprintf(stderr, "%s\n", cnproc->buffer->data); - - /* Core dump message will follow; clean up for it. */ - if (cnproc->buffer != packet_buffer) - __zoid_release_buffer(cnproc->buffer->data); - cnproc->buffer = NULL; - cnproc->current_buf = NULL; - - return 0; - } - - if(!cioheader && *(int*)cnproc->buffer->data == ZOID_TERMINATING_ID) { -#if 0 - fprintf(stderr, "Exit request from process %d\n", - cn_procs[softheader->pset_cpu_rank].pid); -#endif - if (cnproc->buffer != packet_buffer) - __zoid_release_buffer(cnproc->buffer->data); - cnproc->buffer = NULL; - cnproc->current_buf = NULL; - - enter_critical_section(&pending_exit_locks); - - cnproc->status = PROC_STATUS_EXIT; - pending_exit_requests--; - - leave_critical_section(&pending_exit_locks); - - return 0; - } - - /* A buffer is complete. Queue it. */ - - memcpy(&cnproc->buffer->softheader, softheader, sizeof(*softheader)); - if (cnproc->buffer->ack_sent) - currently_receiving_large_message = 0; - - enqueue_zoid_buf(recv_queue, cnproc->buffer); - - cnproc->buffer = NULL; - cnproc->current_buf = NULL; - - return 1; - } - - if(softheader->flags & ZOID_SOFTHEADER_INPUT_USERBUF_PACKET) { - /* We are done reading the first (non-userbuf) part of the input - userbuf message. We will have to invoke the user allocation - callback now. Behave as if the message is complete: we will be - able to recognize that it's not by the above flag in the - softheader. */ - memcpy(&cnproc->buffer->softheader, softheader, sizeof(*softheader)); - - enqueue_zoid_buf(recv_queue, cnproc->buffer); - - cnproc->buffer = NULL; - cnproc->current_buf = NULL; - - return 1; - } - - if (cnproc->buffer->userbuf_in ? - cnproc->current_buf - cnproc->buffer->userbuf_in >= - cnproc->buffer->userbuf_in_len : - cnproc->current_buf - cnproc->buffer->data >= cnproc->msg_length) { - /* We've read more than we were supposed to without receiving - the end-of-message packet! */ - - fprintf(stderr, "Message from %d longer than declared %d\n", - softheader->pset_cpu_rank, cnproc->msg_length + - cnproc->buffer->userbuf_in_len); - - /* Nothing better to do than to prevent a buffer overrun... */ - cnproc->current_buf -= TREE_DATA_SIZE; - } - - return 0; -} - - - -static BGLQuad* recv_data_cb(void *priv, BGLQuad *softheader) { - struct CNProc* cnproc = NULL; - struct CNProc** cnproc_ptr = priv; - struct ZoidSoftHeader* zsh=(struct ZoidSoftHeader*)softheader; - int msglen; - BGLQuad *ret, *dummy; - - if(zsh->zoid_id == ZOID_ID) { - if (zsh->pset_cpu_rank < pset_size * (vn_mode ? 2 : 1) && - pset_rank_mapping[zsh->pset_cpu_rank] != -1) { - zsh->pset_cpu_rank = pset_rank_mapping[zsh->pset_cpu_rank]; - cnproc = &cn_procs[zsh->pset_cpu_rank]; - *cnproc_ptr = cnproc; - } else { - /* Invalid message. */ - NYI(); - *cnproc_ptr = NULL; - return (BGLQuad*)packet_buffer->data; - } - - msglen = zsh->msg_length; - - if(zsh->flags & ZOID_SOFTHEADER_ASSERT_PACKET) { - /* This is a special one-packet message. If anything was being - read, forget about it and reset the buffer. */ - if(cnproc->buffer) { - if(cnproc->buffer->ack_sent) { - currently_receiving_large_message = 0; - cnproc->buffer->ack_sent = 0; - } - cnproc->current_buf = cnproc->buffer->data; - } - } - } else { - /* Assume it's a message from CNK (CIOD protocol). */ - struct CioHeader* cioheader = (struct CioHeader*)softheader; - int i; - - for (i = 0; i < pset_proc_count; i++) { - if (cn_procs[i].p2p_addr == cioheader->_treeAddress && - cn_procs[i].cpu == cioheader->_cpu) { - cnproc = &cn_procs[i]; - *cnproc_ptr = cnproc; - break; - } - } - - if(i == pset_proc_count) { - if(cioheader->_messageCode == MFC_REQUESTRESET) { - /* We can receive a REQUESTRESET message for an unknown - process, which is the second CPU of a node running in CO - node. We assign the message to the primary CPU. */ - for(i = 0; i < pset_proc_count; i++) { - if(cn_procs[i].p2p_addr == cioheader->_treeAddress) { - cnproc = &cn_procs[i]; - *cnproc_ptr = cnproc; - break; - } - } - if(i == pset_proc_count) { - /* Invalid message. */ - NYI(); - *cnproc_ptr = NULL; - return (BGLQuad*)packet_buffer->data; - } - } else { - NYI(); - /* Invalid message. */ - *cnproc_ptr = NULL; - return (BGLQuad*)packet_buffer->data; - } - } - - msglen = cioheader->_packetTotal * TREE_DATA_SIZE; - } - - if (!cnproc->buffer) { - /* First packet of a new message. Initialize things. */ - char* buffer = __zoid_allocate_buffer(msglen); - if(buffer) { - cnproc->buffer = get_zoid_buffer(buffer); - } else { - NYI(); - /* Either the requested size is too large, or we are out of RAM. - In either case, we won't be able to handle the request. - We will read the command "whereever" (preferably into a - process-specific buffer, otherwise a global one (*not* - MT-safe), and flag an error so that the receive_thread knows - not to pass this to worker threads. */ - struct thread_specific_data* thread_data; - thread_data = (struct thread_specific_data*) - pthread_getspecific(thread_specific_key); - - if (thread_data->errnum == E2BIG) - /* Command was too large. */ - buffer = __zoid_allocate_buffer(TREE_PACKET_SIZE); - - if (!buffer) - /* Out of memory. MT-unsafe! */ - buffer = packet_buffer->data; - - cnproc->buffer = get_zoid_buffer(buffer); - - cnproc->buffer->errnum = thread_data->errnum; - if (thread_data->errnum == E2BIG) - cnproc->buffer->excessive_size = - thread_data->last_excessive_size; - } - - cnproc->current_buf = buffer; - } - - if (((unsigned int)cnproc->current_buf & 0xf) || - (cnproc->buffer->userbuf_in && - cnproc->current_buf + TREE_DATA_SIZE > - cnproc->buffer->userbuf_in + cnproc->buffer->userbuf_in_len)) { - /* Either condition can only take place for input userbuf, because - zoid-allocated buffers are always properly aligned and rounded - up. Use an auxiliary buffer; we will copy back from it in - receive_zoid_packet(). */ - ret = (BGLQuad*)packet_buffer->data; - } else { - ret = (BGLQuad*)cnproc->current_buf; - } - - /* Allocate L1 cache lines that we are going to write entirely, - so that they are not needlessly read from main memory */ - dummy = ret; - asm volatile( - "\taddi %0,%0,16\n" - "\tdcbz 0,%0; addi %0,%0,32\n" - "\tdcbz 0,%0; addi %0,%0,32\n" - "\tdcbz 0,%0; addi %0,%0,32\n" - "\tdcbz 0,%0; addi %0,%0,32\n" - "\tdcbz 0,%0; addi %0,%0,32\n" - "\tdcbz 0,%0; addi %0,%0,32\n" - "\tdcbz 0,%0\n" - : "+b" (dummy) - ); - - return ret; -} - - -static int receive_packet(void) { - struct CNProc* cnproc; - BGLTreePacketHardHeader hardheader; - char softheader_buf[sizeof(struct ZoidSoftHeader) + 0xf]; - struct ZoidSoftHeader *softheader = (struct ZoidSoftHeader*) - (((unsigned int)softheader_buf + 0xf) & ~0xf); - - BGLTreeFIFO_recvF(vc0 + BGL_MEM_TREE_HDROUT_OFFSET, - vc0 + BGL_MEM_TREE_DATAOUT_OFFSET, - &hardheader, (BGLQuad*)softheader, - &recv_data_cb, &cnproc); - - if(!cnproc) { - /* Invalid message. */ - int hhdr; - memcpy(&hhdr, &hardheader, sizeof(hardheader)); - fprintf(stderr, "Invalid packet read, hardheader %08x, " - "softheader %08x %08x %08x %08x\n", - hhdr, ((BGLQuad*)softheader)->w0, - ((BGLQuad*)softheader)->w1, ((BGLQuad*)softheader)->w2, - ((BGLQuad*)softheader)->w3); - return 0; - } - - if (softheader->zoid_id == ZOID_ID) - receive_zoid_packet(cnproc, softheader); - else - NYI(); - - return handle_received_packet(cnproc, softheader); -} - - - - -pthread_mutex_t poll_mutex = PTHREAD_MUTEX_INITIALIZER; -extern int *sent_signals, *recv_signals; -pid_t ppid; - - -void bglco_loop() { - struct zoid_buffer *buffer; - BGLTreeStatusRegister status; - - syscall(268); - - ppid = getppid(); - - enter_critical_section(&tree_locks); - - while((volatile int)pending_exit_requests > 0) { - status = *(volatile BGLTreeStatusRegister*)(vc0 + BGL_MEM_TREE_STATUS0_OFFSET); - - if(status.recpktcnt > 0) { - if(receive_packet()) { - //printf("Got a complete msg, wake a worker thread...\n"); - //(*sent_signals)++; -/* int r = sigqueue(ppid, SIGRTMIN+1, (union sigval)0); */ -/* if(r) { */ -/* perror("sigqueue failed"); */ -/* exit(-1); */ -/* } */ - } - } - - status = *(volatile BGLTreeStatusRegister*)(vc0 + BGL_MEM_TREE_STATUS0_OFFSET); - - if(status.injpktcnt < 8) { - if(!currently_receiving_large_message && - ((volatile zoid_buf_pipe*)ack_queue)->first) { - buffer = dequeue_zoid_buf(ack_queue); - if(buffer) { - high_priority_send(buffer); - currently_receiving_large_message = 1; - } - } else if(((volatile zoid_buf_pipe*)high_priority_send_queue)->first) { - buffer = dequeue_zoid_buf(high_priority_send_queue); - if(buffer) - high_priority_send(buffer); - } else if(((volatile zoid_buf_pipe*)send_queue)->first) { - send_packet(); - } - } - - if (sent_kill_packet) { - printf("WE NEED TO STOP!!!!!!\n"); - break; - } - } - - leave_critical_section(&tree_locks); - - syscall(269); -} - - - -/* - * Sends a single packet to a compute node. - * WARNING: this function is stateful. It internally keeps track of - * the progress of sending the first message from send_queue. - * This function assumes that it won't be called simultaneously by multiple - * threads. Currently, the receive_queue_mutex in handle_messages() ensures - * of that. - */ -static void send_packet(void) { - static struct zoid_buffer* buffer = NULL; - /* The remaining static variables are only valid if buffer != NULL. */ - static BGLTreePacketHardHeader hardheader; - static char* result_current; - static int result_remaining; - static struct CioHeader* cioheader; - static int copying_userbuf; - char* data_buf; - - if (!buffer) { - - enter_critical_section(send_queue->locks); - buffer = ((volatile zoid_buf_pipe*)send_queue)->first; - leave_critical_section(send_queue->locks); - - if (!buffer) - return; - - if (buffer->softheader.zoid_id == ZOID_ID) { - cioheader = NULL; - BGLTreePacketHardHeader_InitP2P(&hardheader, PACKET_CLASS_CIO, - 0, cn_procs[buffer->softheader. - pset_cpu_rank].p2p_addr); - /* softheader->zoid_id is already filled in correctly. */ - buffer->softheader.pset_cpu_rank = - pset_rank_mapping_rev[buffer->softheader.pset_cpu_rank]; - buffer->softheader.flags = ZOID_SOFTHEADER_FIRST_PACKET; - buffer->softheader.msg_length = buffer->result_len; - buffer->softheader.errnum = buffer->errnum; - } else { - /* CIOD message. */ - cioheader = (struct CioHeader*)&buffer->softheader; - BGLTreePacketHardHeader_InitP2P(&hardheader, PACKET_CLASS_CIO, - 0, cioheader->_treeAddress); - cioheader->_packetIndex = 0; - } - - result_current = buffer->data; - result_remaining = (buffer->userbuf_out ? buffer->result_len : - buffer->total_len); - copying_userbuf = 0; - } - - /* Since we round up the buffers to full packet boundary when allocating, - we never have to copy to intermediate ones... */ - - /* Is this the last packet? */ - if (!cioheader && result_remaining <= TREE_DATA_SIZE && - (!buffer->userbuf_out || copying_userbuf)) { - buffer->softheader.flags |= ZOID_SOFTHEADER_LAST_PACKET; - } - -#if 0 - { - int i, *pkt = (int*)result_current; - fprintf(stderr, "Sending back ZOID packet to %d, flags %d, len %d, @ %p\n", - buffer->softheader.pset_cpu_rank, buffer->softheader.flags, - buffer->softheader.msg_length, buffer); - for (i = 0; i < 240 / 4; i++) - fprintf(stderr, "%08x%c", pkt[i], ((i + 1) % 8 && - i != 240 / 4 - 1 ? - ' ' : '\n')); - } -#endif - - if (((unsigned int)result_current & 0xf) || - (copying_userbuf && result_remaining < TREE_DATA_SIZE)) { - data_buf = packet_buffer->data; - memcpy(data_buf, result_current, result_remaining < TREE_DATA_SIZE ? - result_remaining : TREE_DATA_SIZE); - } else { - data_buf = result_current; - } - - BGLTreeFIFO_sendH(vc0 + BGL_MEM_TREE_HDRIN_OFFSET, - vc0 + BGL_MEM_TREE_DATAIN_OFFSET, - &hardheader, (BGLQuad*)&buffer->softheader, - (BGLQuad*)data_buf); - - result_remaining -= TREE_DATA_SIZE; - - if (!cioheader) - buffer->softheader.flags = 0; - else - cioheader->_packetIndex++; - - - if (result_remaining <= 0) { - if (buffer->userbuf_out && !copying_userbuf) { - /* If there is a userbuf output part, we end up here prematurely, - as soon as the zoid part of the buffer is sent. Adjust the - pointer and length and send the user buffer next. */ - - result_current = buffer->userbuf_out; - result_remaining += buffer->total_len - buffer->result_len; - copying_userbuf = 1; - } else { - dequeue_zoid_buf(send_queue); - - /* We are done with the current message. Clean up. */ - if (buffer->userbuf_out) { - enqueue_zoid_buf(recv_queue, buffer); -/* int r = sigqueue(ppid, SIGRTMIN+1, (union sigval)0); */ -/* if(r) { */ -/* perror("sigqueue failed"); */ -/* exit(-1); */ -/* } */ - } else if (buffer != packet_buffer) { - __zoid_release_buffer(buffer->data); - } - buffer = NULL; - } - } else { - result_current += TREE_DATA_SIZE; - } -} - - -/* - * Send a single-packet message that bypasses the normal send queue. - */ -static void high_priority_send(struct zoid_buffer *buffer) { - BGLTreePacketHardHeader hardheader; - - assert(buffer->softheader.zoid_id == ZOID_ID); - assert(!buffer->userbuf_out); - assert(buffer->total_len <= TREE_DATA_SIZE); - - BGLTreePacketHardHeader_InitP2P(&hardheader, PACKET_CLASS_CIO, - 0, cn_procs[buffer->softheader. - pset_cpu_rank].p2p_addr); - /* softheader->zoid_id is already filled in correctly. */ - buffer->softheader.pset_cpu_rank = - pset_rank_mapping_rev[buffer->softheader.pset_cpu_rank]; - buffer->softheader.flags = ZOID_SOFTHEADER_FIRST_PACKET | - ZOID_SOFTHEADER_LAST_PACKET | - (buffer->softheader.flags & ZOID_SOFTHEADER_ACK_PACKET); - buffer->softheader.msg_length = buffer->result_len; - buffer->softheader.errnum = buffer->errnum; - - BGLTreeFIFO_sendH(vc0 + BGL_MEM_TREE_HDRIN_OFFSET, - vc0 + BGL_MEM_TREE_DATAIN_OFFSET, - &hardheader, (BGLQuad*)&buffer->softheader, - (BGLQuad*) buffer->data); - - /* We are done with the current message. Clean up. */ - - if (buffer != packet_buffer) - __zoid_release_buffer(buffer->data); - else - NYI(); -} - - - -/* - * Enqueues a message in the send queue, to be sent back to a compute - * node later (or in another thread), in the handle_messages() function. - */ -static void -queue_message(struct zoid_buffer* buffer) -{ -#if 1 - if(buffer->softheader.zoid_id == ZOID_ID && !buffer->userbuf_out && - buffer->total_len <= TREE_DATA_SIZE) - enqueue_zoid_buf(high_priority_send_queue, buffer); - else -#endif - enqueue_zoid_buf(send_queue, buffer); -} - -/* - * Body of (multiple) worker threads. Takes a command from the head of the - * receive queue, processes it, and puts the reply at the tail of the send - * queue. - */ -void* -worker_thread_body(void* arg) -{ - struct thread_specific_data thread_data; - - /* Various callbacks from generated and user code can access this data. */ - if (pthread_setspecific(thread_specific_key, &thread_data)) - { - perror("setting thread-specific data"); - return NULL; - } - - for (;;) - { - struct zoid_buffer *cmd_buffer, *res_buffer; - char *result = NULL; - int result_len, total_len; - int release_cmd_buffer = 1; - - int sig; - pthread_mutex_lock(&poll_mutex); -/* sigwait(&sigusr1_set, &sig); */ -/* pthread_mutex_unlock(&poll_mutex); */ - - - int poll_cnt = 0; - while(!((volatile zoid_buf_pipe*)recv_queue)->first) { - if((poll_cnt += 1 << 24) == 0) - pthread_yield(); - } - - cmd_buffer = dequeue_zoid_buf(recv_queue); - - //printf("Launched worker thread! %d %p\n", sig, cmd_buffer); - - pthread_mutex_unlock(&poll_mutex); - - if(cmd_buffer == (struct zoid_buffer*)1) { - printf("Stopping worker thread\n"); - return NULL; - } - - //assert(cmd_buffer); - if(!cmd_buffer) - continue; - - thread_data.userbuf = NULL; - - if(cmd_buffer->userbuf_out) { - thread_data.calling_process_id = - cmd_buffer->softheader.pset_cpu_rank; - - cmd_buffer->userbuf_out_cb(cmd_buffer->userbuf_out, - cmd_buffer->userbuf_out_priv); - cmd_buffer->userbuf_out = NULL; - - __zoid_release_buffer(cmd_buffer->data); - continue; - } - - if (!cmd_buffer->errnum) { - int command_id, header_id, function_id; - struct zoid_dispatch_entry* entry; - - if (cmd_buffer->softheader.zoid_id == ZOID_ID) - { - thread_data.calling_process_id = - cmd_buffer->softheader.pset_cpu_rank; - - /* The buffer always begins with a command id. */ - memcpy(&command_id, cmd_buffer->data, sizeof(command_id)); - - header_id = command_id >> 16; - function_id = command_id & 0xffff; - - for (entry = dispatch_entries; entry; entry = entry->next) - if (entry->header_id == header_id) - { - if (function_id >= 0 && - function_id < entry->array_size) - { - if (cmd_buffer->softheader.flags & - ZOID_SOFTHEADER_INPUT_USERBUF_PACKET) - { - /* We need to invoke a special allocate - callback, not (yet) the argument unpacking - stub. */ - int buffer_len; - struct CNProc* cnproc = &cn_procs[cmd_buffer-> - softheader.pset_cpu_rank]; - - memcpy(&buffer_len, cmd_buffer->data + - sizeof(command_id), sizeof(buffer_len)); - - cmd_buffer->userbuf_in = - entry->dispatch_array[function_id]. - userbuf_allocate_cb(buffer_len); - - if (!cmd_buffer->userbuf_in) - { - /* User failed to provide us with a buffer. - Instead of an ACK, we will return an - error message. */ - cmd_buffer->errnum = ENOMEM; - break; - } - - cmd_buffer->userbuf_in_len = buffer_len; - cnproc->buffer = cmd_buffer; - cnproc->current_buf = cmd_buffer->userbuf_in; - - cmd_buffer->ack_sent = 1; - - flush_zoid_buf(cmd_buffer); - - schedule_ack(cmd_buffer, - &cmd_buffer->softheader); - - /* We are not done with this command yet, - so don't release the buffer! */ - release_cmd_buffer = 0; - - break; - } - - if (cmd_buffer->userbuf_in) - { - char* (*function_ptr)(char* command_buffer, - int* result_len, - int* total_len, - char* user_buffer) = - entry->dispatch_array[function_id]. - function_ptr; - - result = function_ptr(((char*)cmd_buffer->data) - + sizeof(command_id), - &result_len, &total_len, - cmd_buffer->userbuf_in); - } - else - { - char* (*function_ptr)(char* command_buffer, - int* result_len, - int* total_len) = - entry->dispatch_array[function_id]. - function_ptr; - - result = function_ptr(((char*)cmd_buffer->data) - + sizeof(command_id), - &result_len, &total_len); - } - - if (!result) - { - /* An error here is most likely an - out-of-memory situation or a too large - reply buffer. */ - cmd_buffer->errnum = thread_data.errnum; - if (cmd_buffer->errnum == E2BIG) - cmd_buffer->excessive_size = - thread_data.last_excessive_size; - } - break; - } - fprintf(stderr, "Invalid function ID!\n"); - cmd_buffer->errnum = ENOSYS; - break; - } - if (!entry) - { - fprintf(stderr, "Server stub backend not loaded!\n"); - cmd_buffer->errnum = ENOSYS; - } - } - else - { - NYI(); - /* CIOD message. */ - struct CioHeader* cioheader = - (struct CioHeader*)&cmd_buffer->softheader; - int i; - - for (i = 0; i < pset_proc_count; i++) - if (cn_procs[i].p2p_addr == cioheader->_treeAddress && - cn_procs[i].cpu == cioheader->_cpu) - { - thread_data.calling_process_id = i; - break; - } - if (i == pset_proc_count && - cioheader->_messageCode == MFC_REQUESTRESET) - { - /* We can receive a REQUESTRESET message for an - unknown process, which is the second CPU of a - node running in CO node. We assign the message - to the primary CPU. */ - for (i = 0; i < pset_proc_count; i++) - if (cn_procs[i].p2p_addr == - cioheader->_treeAddress) - { - thread_data.calling_process_id = i; - break; - } - } - assert(i != pset_proc_count); - - result = handle_ciod_message(cioheader, cmd_buffer->data, - &result_len, i); - total_len = result_len; - } - } /* if (!cmd_buffer->errnum) */ - - /* An error here can be either inherited from the receiving thread - (most likely an out-of-memory condition) or caused by call - function (most likely too large result buffer needed). */ - if (cmd_buffer->errnum) - { - /* Currently, three different error values are possible: - ENOMEM -- out of memory when allocating either the - command or result buffer. - E2BIG -- the command sent was too large (shouldn't happen, - since client checks that as well) or the result - would have been too large. cnproc->excessive_size - contains the excessive size value. - ENOSYS -- the stub backend for the requested function is - not loaded. - We manufacture an error reply here and it gets sent by - the standard reply sending code below. */ - result = __zoid_allocate_buffer(TREE_PACKET_SIZE); - if (!result) - /* Out of memory. MT-unsafe! */ - result = packet_buffer->data; - if (cmd_buffer->errnum == E2BIG) - { - result_len = sizeof(int); - memcpy(result, &cmd_buffer->excessive_size, sizeof(int)); - } - else - result_len = 0; - total_len = result_len; - } - - /* There are currently two cases where result == NULL without there - being any error: - - CIOD command that requires no response, - - partially received input userbuf, so we invoked the allocation - callback, not the actual argument-unpacking one. */ - if (result) - { - res_buffer = get_zoid_buffer(result); - - res_buffer->result_len = result_len; - res_buffer->total_len = total_len; - - res_buffer->errnum = cmd_buffer->errnum; - res_buffer->excessive_size = cmd_buffer->excessive_size; - - if ((res_buffer->userbuf_out = thread_data.userbuf)) - { - res_buffer->userbuf_out_cb = thread_data.userbuf_cb; - res_buffer->userbuf_out_priv = thread_data.userbuf_priv; - } - - memcpy(&res_buffer->softheader, &cmd_buffer->softheader, - sizeof(res_buffer->softheader)); - - queue_message(res_buffer); - } - - if (cmd_buffer != packet_buffer && release_cmd_buffer) - __zoid_release_buffer(cmd_buffer->data); - - } /* for (;;) */ -} - -int unix_open(const char *pathname /* in:str */, - int flags /* in:obj */, - mode_t mode /* in:obj */) __attribute__((weak)); -int unix_close(int fd /* in:obj */) __attribute__((weak)); -ssize_t unix_write(int fd /* in:obj */, - const void *buf /* in:arr:size=+1:zerocopy */, - size_t count /* in:obj */) __attribute__((weak)); - -/* - * Processes the CIOD-specific messages. These are generated by the kernel, - * on application exit or crash. - */ -static char* -handle_ciod_message(struct CioHeader* cioheader, char* data, int* result_len, - int pset_cpu_rank) -{ - struct MTC_ReplyWriteCore reply; - char* result; - - switch (cioheader->_messageCode) - { - case MFC_REQUESTEXIT: - { - struct MFC_RequestExit* request = (struct MFC_RequestExit*)data; - fprintf(stderr, "ABNORMAL exit request from process %d, " - "code %d (%s)\n", cn_procs[pset_cpu_rank].pid, - request->s.status, - (request->s.reason == EXITED ? "exited" : "killed")); - - enter_critical_section(&pending_exit_locks); - - if (cn_procs[pset_cpu_rank].status == PROC_STATUS_RUNNING) - pending_exit_requests--; - cn_procs[pset_cpu_rank].status = PROC_STATUS_EXIT_ABNORMAL; - abnormal_msg_received = 1; - - leave_critical_section(&pending_exit_locks); -#if 0 - cioheader->_messageCode = MTC_REPLYEXIT; - *result_len = 0; - break; -#endif - return NULL; - } - - case MFC_REQUESTRESET: - fprintf(stderr, "ABNORMAL reset request from node %d cpu %d\n", - cioheader->_treeAddress, cioheader->_cpu); - - enter_critical_section(&pending_exit_locks); - - if (cn_procs[pset_cpu_rank].status == PROC_STATUS_RUNNING) - pending_exit_requests--; - cn_procs[pset_cpu_rank].status = PROC_STATUS_EXIT_ABNORMAL; - abnormal_msg_received = 1; - - leave_critical_section(&pending_exit_locks); -#if 0 - cioheader->_messageCode = MTC_REPLYRESET; - *result_len = 0; - break; -#endif - return NULL; - - case MFC_REQUESTWRITECORE: - { - struct MFC_RequestWriteCore* request = - (struct MFC_RequestWriteCore*)data; - char buffer[20]; - int fd; - - fprintf(stderr, "ABNORMAL core dump request from process %d\n", - cn_procs[pset_cpu_rank].pid); - - sprintf(buffer, "core.%d", cn_procs[pset_cpu_rank].pid); - - if (&unix_open) - fd = unix_open(buffer, O_CREAT | O_WRONLY | O_TRUNC, 0600); - else - { - if ((fd = open(buffer, O_CREAT | O_WRONLY | O_TRUNC, 0600)) < 0) - fd = -errno; - } - - if (fd < 0) - fprintf(stderr, "Error opening file %s: %s\n", buffer, - strerror(-fd)); - else - { - if (&unix_write) - unix_write(fd, data + request->s.offset, request->s.size); - else - write(fd, data + request->s.offset, request->s.size); - - if (&unix_close) - unix_close(fd); - else - close(fd); - } - - enter_critical_section(&pending_exit_locks); - - if (cn_procs[pset_cpu_rank].status == PROC_STATUS_RUNNING) - pending_exit_requests--; - cn_procs[pset_cpu_rank].status = PROC_STATUS_EXIT_ABNORMAL; - abnormal_msg_received = 1; - - leave_critical_section(&pending_exit_locks); - - if (fd >= 0) - { - reply.s.rc = 0; - reply.s.errnum = 0; - } - else - { - reply.s.rc = -1; - reply.s.errnum = EACCES; /* Who cares what it was anyway. */ - } - cioheader->_messageCode = MFC_REPLYWRITECORE; - *result_len = sizeof(reply); - break; - } - } - - cioheader->_packetIndex = 0; - cioheader->_packetTotal = 1; - - result = __zoid_allocate_buffer(*result_len); - if (!result) - /* Out of memory. MT-unsafe! */ - result = packet_buffer->data; - memcpy(result, &reply, *result_len); - - return result; -} - -/* - * Part of user API. Returns a (thread-local) pset-local process id. - */ -int -__zoid_calling_process_id(void) -{ - struct thread_specific_data* thread_data = - (struct thread_specific_data*)pthread_getspecific(thread_specific_key); - return thread_data->calling_process_id; -} - -/* - * Part of user API. Registers an output userbuf and a callback to - * be called when the buffer is no longer needed. - */ -void -__zoid_register_userbuf(void* userbuf, - void (*callback)(void* userbuf, void* priv), - void* priv) -{ - struct thread_specific_data* thread_data = - (struct thread_specific_data*) - pthread_getspecific(thread_specific_key); - - thread_data->userbuf = userbuf; - thread_data->userbuf_cb = callback; - thread_data->userbuf_priv = priv; -} - -/* - * Part of user API. Sends standard output/error message to the right place. - */ -int -__zoid_send_output(int pid, int fd, const char* buffer, int len) -{ - struct CiodOutputHeader header; - int ret; - - if (pthread_mutex_lock(&output_mutex)) - return -1; - - if (ciod_streams_socket == -1) - ret = write(fd, buffer, len); - else - { - header.fd = fd + 1; - header.cpu = cn_procs[pid].cpu; - header.node = cn_procs[pid].p2p_addr; - header.rank = cn_procs[pid].pid; - header.len = len; - - if (write(ciod_streams_socket, &header, sizeof(header)) != - sizeof(header)) - { - ret = -1; - } - else - ret = write(ciod_streams_socket, buffer, len); - } - - if (pthread_mutex_unlock(&output_mutex)) - return -1; - - return ret; -} - -void *ciod_thread_body(void* arg) { - int signum; - struct timeval tv; - double last_packet_received; - struct CioHeader cio; - int i; - - pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); - pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - - /* We only support one command at the moment: kill. */ - for (;;) - { - int cmd; - - if (read(ciod_control_socket, &cmd, sizeof(cmd)) != sizeof(cmd)) - perror("read"); - else if (cmd != 4) - fprintf(stderr, "Unknown command read: 0x%x\n", cmd); - else - { - if (read(ciod_control_socket, &signum, sizeof(signum)) != - sizeof(signum)) - { - perror("read"); - } - else - break; - } - } - - /* Disable thread cancellation before we try to acquire any mutexes. */ - pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); - - /* We set this flag even before we send anything, for one simple reason: - so that pthread_mutex_lock below can succeed. worker threads check - for this flag and release the receive_queue_mutex if it is set, so that - we can get it. */ - sent_kill_packet = 1; - - enter_critical_section(&tree_locks); - - /* We are going to kill the job. We do that by sending a special kill - packet that wakes up the CNK, which takes over and kills the processes. - There is one but: there might be some ZOID messages in the network, and - if CNK or CIOD see them, they might act weird. - There is no fool-proof solution to this problem. What we do is to - read all the messages that we can on this side. We throw these away; - we are killing the job anyway, so who cares. Hopefully this action - will quiesce the network (every sender waits for a reply before it can - continue), so that we can then wake up the CNK relatively safely. */ - - /* Suck all the packets from the network first. */ - gettimeofday(&tv, NULL); - last_packet_received = tv.tv_sec + tv.tv_usec * 1e-6; - for (;;) - { - BGLTreeStatusRegister status; - - status = *(volatile BGLTreeStatusRegister*) - (vc0 + BGL_MEM_TREE_STATUS0_OFFSET); - - if (status.recpktcnt == 0) - { - gettimeofday(&tv, NULL); - /* If we haven't received anything in five seconds, assume we - are done. */ - if (tv.tv_sec + tv.tv_usec * 1e-6 - last_packet_received > 5) - break; - } - else - { - while (status.recpktcnt--) - { - BGLTreePacketHardHeader hardheader; - BGLTreeFIFO_recv(vc0 + BGL_MEM_TREE_HDROUT_OFFSET, - vc0 + BGL_MEM_TREE_DATAOUT_OFFSET, - &hardheader, (BGLQuad*)packet_buffer->data); - } - gettimeofday(&tv, NULL); - last_packet_received = tv.tv_sec + tv.tv_usec * 1e-6; - } - } - - /* Now, kill the processes. */ - - cio._dataSize = 0; - cio._messageCode = MTC_KILL; - cio._packetTotal = 1; - cio._packetIndex = 0; - - ((struct MTC_Kill*)packet_buffer->data)->s.signum = signum; - - enter_critical_section(&pending_exit_locks); - - for (i = 0; i < pset_proc_count; i++) - if (cn_procs[i].status == PROC_STATUS_RUNNING) - { - BGLTreeStatusRegister status; - BGLTreePacketHardHeader hardheader; - - BGLTreePacketHardHeader_InitP2P(&hardheader, PACKET_CLASS_CIO, - 1, cn_procs[i].p2p_addr); - cio._cpu = cn_procs[i].cpu; - cio._rankInCnodes = cn_procs[i].pset_rank; - cio._treeAddress = cn_procs[i].p2p_addr; - - do - { - status = *(volatile BGLTreeStatusRegister*) - (vc0 + BGL_MEM_TREE_STATUS0_OFFSET); - } while (status.injpktcnt > 7); - - BGLTreeFIFO_sendH(vc0 + BGL_MEM_TREE_HDRIN_OFFSET, - vc0 + BGL_MEM_TREE_DATAIN_OFFSET, - &hardheader, (BGLQuad*)&cio, - (BGLQuad*)packet_buffer->data); - - pending_exit_requests--; - cn_procs[i].status = PROC_STATUS_EXIT; - } - - leave_critical_section(&pending_exit_locks); - - leave_critical_section(&tree_locks); - - return NULL; -} - -/* - * Called after a job has finished to clean up any allocated buffers, - * reset state variables, etc. - */ -void cleanup_traffic(void) -{ -#if 1 - NYI(); -#else - struct zoid_buffer* buf; - - while (send_queue_first) - { - buf = send_queue_first; - send_queue_first = send_queue_first->next; - __zoid_release_buffer(buf->data); - } - - while (high_priority_send_queue_first) - { - buf = high_priority_send_queue_first; - high_priority_send_queue_first = high_priority_send_queue_first->next; - __zoid_release_buffer(buf->data); - } - - while (receive_queue_first) - { - buf = *receive_queue_first; - *receive_queue_first = (*receive_queue_first)->next; - __zoid_release_buffer(buf->data); - } - - while (ack_queue_first) - { - buf = ack_queue_first; - ack_queue_first = ack_queue_first->next; - __zoid_release_buffer(buf->data); - } - - currently_receiving_large_message = 0; -#endif -} diff --git a/3rdparty/zoid/daemon.2cores/handle_traffic.c.sema b/3rdparty/zoid/daemon.2cores/handle_traffic.c.sema deleted file mode 100644 index 80f5546d694ade4ccd72c72f903f600396720968..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/daemon.2cores/handle_traffic.c.sema +++ /dev/null @@ -1,1416 +0,0 @@ -/****************************************************************************/ -/* ZEPTOOS:zepto-info */ -/* This file is part of ZeptoOS: The Small Linux for Big Computers. - * See www.mcs.anl.gov/zeptoos for more information. - */ -/* ZEPTOOS:zepto-info */ -/* */ -/* ZEPTOOS:zepto-fillin */ -/* $Id: handle_traffic.c,v 1.20 2007/06/27 15:10:45 iskra Exp $ - * ZeptoOS_Version: 1.2 - * ZeptoOS_Heredity: FOSS_ORIG - * ZeptoOS_License: GPL - */ -/* ZEPTOOS:zepto-fillin */ -/* */ -/* ZEPTOOS:zepto-gpl */ -/* Copyright: Argonne National Laboratory, Department of Energy, - * and UChicago Argonne, LLC. 2004, 2005, 2006, 2007 - * ZeptoOS License: GPL - * - * This software is free. See the file ZeptoOS/misc/license.GPL - * for complete details on your rights to copy, modify, and use this - * software. - */ -/* ZEPTOOS:zepto-gpl */ -/****************************************************************************/ - -#define _GNU_SOURCE /* For pthread_yield. */ -#include <assert.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include <errno.h> -#include <fcntl.h> -#include <pthread.h> -#include <signal.h> -#include <sys/time.h> -#include <unistd.h> -#include <sys/types.h> -#include <sys/ipc.h> -#include <sys/sem.h> - -#include <bglmemmap.h> - -#include "bgl.h" -#include "zoid.h" -#include "zoid_protocol.h" -#include "zoid_api.h" - -static BGLQuad* recv_data_cb(void* priv, BGLQuad* softheader); -static void queue_message(struct zoid_buffer* buffer); -static char* handle_ciod_message(struct CioHeader* cioheader, char* data, - int* result_len, int pset_cpu_rank); -static void high_priority_send(struct zoid_buffer *buffer); -static void send_packet(void); - -zoid_buf_pipe *recv_queue; -lock_pair recv_queue_locks; - -zoid_buf_pipe *send_queue; -lock_pair send_queue_locks; - -zoid_buf_pipe *high_priority_send_queue; -lock_pair high_priority_send_queue_locks; - -zoid_buf_pipe *ack_queue; -lock_pair ack_queue_locks; - -/* Indicates whether any more messages that need an ACK should wait. */ -static int currently_receiving_large_message = 0; - -extern int recv_q_sem; -extern struct sembuf up; -extern struct sembuf down; - -#define NYI() assert(0) - -/* - * If there is no large message currently being received, schedules the - * given message to be received (by sending an ACK to the source). - */ -static void schedule_ack(struct zoid_buffer* buffer, - struct ZoidSoftHeader* softheader) { - char* ack_buff; - struct zoid_buffer* ack_buffer; - - ack_buff = __zoid_allocate_buffer(TREE_PACKET_SIZE); - if (!ack_buff) - /* Out of memory. MT-unsafe! */ - ack_buff = packet_buffer->data; - - ack_buffer = get_zoid_buffer(ack_buff); - ack_buffer->result_len = ack_buffer->total_len = 0; - - ack_buffer->softheader = *softheader; - ack_buffer->softheader.flags = ZOID_SOFTHEADER_ACK_PACKET; - - enqueue_zoid_buf(ack_queue, ack_buffer); -} - - -#if 0 -/* - * CIOD-packet-specific receiving code. Handles message acknowledgements. - */ -static void -receive_ciod_packet(struct CNProc* cnproc, struct CioHeader* cioheader) -{ -#if 0 - int i, *pkt = (int*)cnproc->current_buf; - fprintf(stderr, "Received CIOD packet: _cpu %d, " - "_rankInCnodes %d, _reserved %d, _dataSize %d, " - "_treeAddress %d, _messageCode %d, _packetTotal %d, " - "_packetIndex %d\n", cioheader->_cpu, - cioheader->_rankInCnodes, cioheader->_reserved, - cioheader->_dataSize, cioheader->_treeAddress, - cioheader->_messageCode, cioheader->_packetTotal, - cioheader->_packetIndex); - for (i = 0; i < 240 / 4; i++) - fprintf(stderr, "%08x%c", pkt[i], ((i + 1) % 8 && i != 240 / 4 - 1 ? - ' ' : '\n')); -#endif - if (cioheader->_packetIndex == 0) - { - /* First packet. */ - cnproc->msg_length = cioheader->_packetTotal * TREE_DATA_SIZE; - - if (cioheader->_packetTotal > 1) - { - /* CIOD protocol is rendezvous-based. For multi-packet - messages, first packet must be acknowledged before the - rest are sent. Create this acknowledgement here. */ - char* buffer; - struct zoid_buffer* ack_buffer; - struct CioHeader ack_cioheader; - - buffer = __zoid_allocate_buffer(TREE_PACKET_SIZE); - if (!buffer) - /* Out of memory. MT-unsafe! */ - buffer = packet_buffer->data; - - ack_buffer = get_zoid_buffer(buffer); - ack_buffer->result_len = ack_buffer->total_len = 0; - - ack_cioheader = *cioheader; - ack_cioheader._messageCode = MTC_ACK; - ack_cioheader._packetTotal = 1; - memcpy(&ack_buffer->softheader, &ack_cioheader, - sizeof(ack_cioheader)); - - /* Let the regular reply sending code do the rest. */ - queue_message(ack_buffer); - } - } -} -#endif - - -static void receive_zoid_packet(struct CNProc *cnproc, - struct ZoidSoftHeader *softheader) { -#if 0 - int i, *pkt = (int*)cnproc->current_buf; - fprintf(stderr, "Received ZOID packet from %d, flags %d, " - "len %d @ %p\n", softheader->pset_cpu_rank, - softheader->flags, softheader->msg_length, cnproc->buffer); - for (i = 0; i < 240 / 4; i++) { - fprintf(stderr, "%08x%c", pkt[i], ((i + 1) % 8 && i != 240 / 4 - 1 ? - ' ' : '\n')); - } -#endif - - if(softheader->flags & ZOID_SOFTHEADER_FIRST_PACKET) { - /* Check if this is really the first message. */ - if(cnproc->current_buf != cnproc->buffer->data) { - fprintf(stderr, "First packet received from %d while " - "reading another message\n", - softheader->pset_cpu_rank); - /* Nothing sensible to do, so let's just try to - continue. */ - } - - cnproc->msg_length = softheader->msg_length; - } else if(cnproc->current_buf == cnproc->buffer->data && - !cnproc->buffer->errnum) { - /* Received an unexpected packet without FIRST set. */ - fprintf(stderr, "Received unexpected packet from %d\n", - softheader->pset_cpu_rank); - } - - if(softheader->msg_length != cnproc->msg_length) - fprintf(stderr, "Message length mismatch from %d, found %d, " - "expecting %d\n", softheader->pset_cpu_rank, - softheader->msg_length, cnproc->msg_length); - - if (((unsigned int)cnproc->current_buf & 0xf) || - (cnproc->buffer->userbuf_in && - cnproc->current_buf + TREE_DATA_SIZE > - cnproc->buffer->userbuf_in + cnproc->buffer->userbuf_in_len)) { - /* See recv_data_cb. We used an auxiliary buffer. Copy the data - where it belongs. */ - int len; - len = cnproc->buffer->userbuf_in_len - (cnproc->current_buf - - cnproc->buffer->userbuf_in); - if (len > TREE_DATA_SIZE) - len = TREE_DATA_SIZE; - memcpy(cnproc->current_buf, packet_buffer->data, len); - } - - if ((softheader->flags & ZOID_SOFTHEADER_NEED_ACK_PACKET)) { - cnproc->buffer->ack_sent = 1; - schedule_ack(cnproc->buffer, softheader); - } -} - - - -static int -handle_received_packet(struct CNProc* cnproc, - struct ZoidSoftHeader* softheader) -{ - struct CioHeader* cioheader; - - cioheader = (softheader->zoid_id == ZOID_ID ? - NULL : (struct CioHeader*)softheader); - - /* If an error has been marked, we are going to drop the data anyway, - and we probably don't have enough buffer space to hold it, so don't - even try to place it neatly in the buffer. */ - if (!cnproc->buffer->errnum) - cnproc->current_buf += TREE_DATA_SIZE; - - /* Is this the last packet of a message? */ - if (cioheader ? - cioheader->_packetIndex == cioheader->_packetTotal - 1 : - softheader->flags & ZOID_SOFTHEADER_LAST_PACKET) { - if (!cnproc->buffer->errnum) { - - /* Check if we have read all that we were supposed to. */ - if (cnproc->buffer->userbuf_in) { - if (cnproc->current_buf - cnproc->buffer->userbuf_in < - cnproc->buffer->userbuf_in_len){ - fprintf(stderr, "Received last packet from %d but read " - "only %d bytes of userbuf, expected %d\n", - softheader->pset_cpu_rank, - cnproc->current_buf - cnproc->buffer->userbuf_in, - cnproc->buffer->userbuf_in_len); - } - } else { - if (cnproc->current_buf - cnproc->buffer->data < - cnproc->msg_length) { - fprintf(stderr, "Received last packet from %d but read " - "only %d bytes, expected %d\n", - softheader->pset_cpu_rank, - cnproc->current_buf - cnproc->buffer->data, - cnproc->msg_length); - } - } - } - - if (!cioheader && (softheader->flags & ZOID_SOFTHEADER_ASSERT_PACKET)) { - fprintf(stderr, "Process %d detected an internal error in " - "communication code!\n", - cn_procs[softheader->pset_cpu_rank].pid); - fprintf(stderr, "%s\n", cnproc->buffer->data); - - /* Core dump message will follow; clean up for it. */ - if (cnproc->buffer != packet_buffer) - __zoid_release_buffer(cnproc->buffer->data); - cnproc->buffer = NULL; - cnproc->current_buf = NULL; - - return 0; - } - - if(!cioheader && *(int*)cnproc->buffer->data == ZOID_TERMINATING_ID) { -#if 0 - fprintf(stderr, "Exit request from process %d\n", - cn_procs[softheader->pset_cpu_rank].pid); -#endif - if (cnproc->buffer != packet_buffer) - __zoid_release_buffer(cnproc->buffer->data); - cnproc->buffer = NULL; - cnproc->current_buf = NULL; - - enter_critical_section(&pending_exit_locks); - - cnproc->status = PROC_STATUS_EXIT; - pending_exit_requests--; - - leave_critical_section(&pending_exit_locks); - - return 0; - } - - /* A buffer is complete. Queue it. */ - - memcpy(&cnproc->buffer->softheader, softheader, sizeof(*softheader)); - if (cnproc->buffer->ack_sent) - currently_receiving_large_message = 0; - - enqueue_zoid_buf(recv_queue, cnproc->buffer); - - cnproc->buffer = NULL; - cnproc->current_buf = NULL; - - return 1; - } - - if(softheader->flags & ZOID_SOFTHEADER_INPUT_USERBUF_PACKET) { - /* We are done reading the first (non-userbuf) part of the input - userbuf message. We will have to invoke the user allocation - callback now. Behave as if the message is complete: we will be - able to recognize that it's not by the above flag in the - softheader. */ - memcpy(&cnproc->buffer->softheader, softheader, sizeof(*softheader)); - - enqueue_zoid_buf(recv_queue, cnproc->buffer); - - cnproc->buffer = NULL; - cnproc->current_buf = NULL; - - return 1; - } - - if (cnproc->buffer->userbuf_in ? - cnproc->current_buf - cnproc->buffer->userbuf_in >= - cnproc->buffer->userbuf_in_len : - cnproc->current_buf - cnproc->buffer->data >= cnproc->msg_length) { - /* We've read more than we were supposed to without receiving - the end-of-message packet! */ - - fprintf(stderr, "Message from %d longer than declared %d\n", - softheader->pset_cpu_rank, cnproc->msg_length + - cnproc->buffer->userbuf_in_len); - - /* Nothing better to do than to prevent a buffer overrun... */ - cnproc->current_buf -= TREE_DATA_SIZE; - } - - return 0; -} - - - -static BGLQuad* recv_data_cb(void *priv, BGLQuad *softheader) { - struct CNProc* cnproc = NULL; - struct CNProc** cnproc_ptr = priv; - struct ZoidSoftHeader* zsh=(struct ZoidSoftHeader*)softheader; - int msglen; - BGLQuad *ret, *dummy; - - if(zsh->zoid_id == ZOID_ID) { - if (zsh->pset_cpu_rank < pset_size * (vn_mode ? 2 : 1) && - pset_rank_mapping[zsh->pset_cpu_rank] != -1) { - zsh->pset_cpu_rank = pset_rank_mapping[zsh->pset_cpu_rank]; - cnproc = &cn_procs[zsh->pset_cpu_rank]; - *cnproc_ptr = cnproc; - } else { - /* Invalid message. */ - NYI(); - *cnproc_ptr = NULL; - return (BGLQuad*)packet_buffer->data; - } - - msglen = zsh->msg_length; - - if(zsh->flags & ZOID_SOFTHEADER_ASSERT_PACKET) { - /* This is a special one-packet message. If anything was being - read, forget about it and reset the buffer. */ - if(cnproc->buffer) { - if(cnproc->buffer->ack_sent) { - currently_receiving_large_message = 0; - cnproc->buffer->ack_sent = 0; - } - cnproc->current_buf = cnproc->buffer->data; - } - } - } else { - /* Assume it's a message from CNK (CIOD protocol). */ - struct CioHeader* cioheader = (struct CioHeader*)softheader; - int i; - - for (i = 0; i < pset_proc_count; i++) { - if (cn_procs[i].p2p_addr == cioheader->_treeAddress && - cn_procs[i].cpu == cioheader->_cpu) { - cnproc = &cn_procs[i]; - *cnproc_ptr = cnproc; - break; - } - } - - if(i == pset_proc_count) { - if(cioheader->_messageCode == MFC_REQUESTRESET) { - /* We can receive a REQUESTRESET message for an unknown - process, which is the second CPU of a node running in CO - node. We assign the message to the primary CPU. */ - for(i = 0; i < pset_proc_count; i++) { - if(cn_procs[i].p2p_addr == cioheader->_treeAddress) { - cnproc = &cn_procs[i]; - *cnproc_ptr = cnproc; - break; - } - } - if(i == pset_proc_count) { - /* Invalid message. */ - NYI(); - *cnproc_ptr = NULL; - return (BGLQuad*)packet_buffer->data; - } - } else { - NYI(); - /* Invalid message. */ - *cnproc_ptr = NULL; - return (BGLQuad*)packet_buffer->data; - } - } - - msglen = cioheader->_packetTotal * TREE_DATA_SIZE; - } - - if (!cnproc->buffer) { - /* First packet of a new message. Initialize things. */ - char* buffer = __zoid_allocate_buffer(msglen); - if(buffer) { - cnproc->buffer = get_zoid_buffer(buffer); - } else { - NYI(); - /* Either the requested size is too large, or we are out of RAM. - In either case, we won't be able to handle the request. - We will read the command "whereever" (preferably into a - process-specific buffer, otherwise a global one (*not* - MT-safe), and flag an error so that the receive_thread knows - not to pass this to worker threads. */ - struct thread_specific_data* thread_data; - thread_data = (struct thread_specific_data*) - pthread_getspecific(thread_specific_key); - - if (thread_data->errnum == E2BIG) - /* Command was too large. */ - buffer = __zoid_allocate_buffer(TREE_PACKET_SIZE); - - if (!buffer) - /* Out of memory. MT-unsafe! */ - buffer = packet_buffer->data; - - cnproc->buffer = get_zoid_buffer(buffer); - - cnproc->buffer->errnum = thread_data->errnum; - if (thread_data->errnum == E2BIG) - cnproc->buffer->excessive_size = - thread_data->last_excessive_size; - } - - cnproc->current_buf = buffer; - } - - if (((unsigned int)cnproc->current_buf & 0xf) || - (cnproc->buffer->userbuf_in && - cnproc->current_buf + TREE_DATA_SIZE > - cnproc->buffer->userbuf_in + cnproc->buffer->userbuf_in_len)) { - /* Either condition can only take place for input userbuf, because - zoid-allocated buffers are always properly aligned and rounded - up. Use an auxiliary buffer; we will copy back from it in - receive_zoid_packet(). */ - ret = (BGLQuad*)packet_buffer->data; - } else { - ret = (BGLQuad*)cnproc->current_buf; - } - - /* Allocate L1 cache lines that we are going to write entirely, - so that they are not needlessly read from main memory */ - dummy = ret; - asm volatile( - "\taddi %0,%0,16\n" - "\tdcbz 0,%0; addi %0,%0,32\n" - "\tdcbz 0,%0; addi %0,%0,32\n" - "\tdcbz 0,%0; addi %0,%0,32\n" - "\tdcbz 0,%0; addi %0,%0,32\n" - "\tdcbz 0,%0; addi %0,%0,32\n" - "\tdcbz 0,%0; addi %0,%0,32\n" - "\tdcbz 0,%0\n" - : "+b" (dummy) - ); - - return ret; -} - - -static int receive_packet(void) { - struct CNProc* cnproc; - BGLTreePacketHardHeader hardheader; - char softheader_buf[sizeof(struct ZoidSoftHeader) + 0xf]; - struct ZoidSoftHeader *softheader = (struct ZoidSoftHeader*) - (((unsigned int)softheader_buf + 0xf) & ~0xf); - - BGLTreeFIFO_recvF(vc0 + BGL_MEM_TREE_HDROUT_OFFSET, - vc0 + BGL_MEM_TREE_DATAOUT_OFFSET, - &hardheader, (BGLQuad*)softheader, - &recv_data_cb, &cnproc); - - if(!cnproc) { - /* Invalid message. */ - int hhdr; - memcpy(&hhdr, &hardheader, sizeof(hardheader)); - fprintf(stderr, "Invalid packet read, hardheader %08x, " - "softheader %08x %08x %08x %08x\n", - hhdr, ((BGLQuad*)softheader)->w0, - ((BGLQuad*)softheader)->w1, ((BGLQuad*)softheader)->w2, - ((BGLQuad*)softheader)->w3); - return 0; - } - - if (softheader->zoid_id == ZOID_ID) - receive_zoid_packet(cnproc, softheader); - else - NYI(); - - return handle_received_packet(cnproc, softheader); -} - - - -void bglco_loop() { - struct zoid_buffer *buffer; - BGLTreeStatusRegister status; - - syscall(268); - - enter_critical_section(&tree_locks); - - while((volatile int)pending_exit_requests > 0) { - status = *(volatile BGLTreeStatusRegister*)(vc0 + BGL_MEM_TREE_STATUS0_OFFSET); - - if(status.recpktcnt > 0) { - if(receive_packet()) { - int r = semop(recv_q_sem, &up, 1); - if(r) { - perror("semop failed"); - exit(-1); - } - } - } - - status = *(volatile BGLTreeStatusRegister*)(vc0 + BGL_MEM_TREE_STATUS0_OFFSET); - - if(status.injpktcnt < 8) { - if(!currently_receiving_large_message && - ((volatile zoid_buf_pipe*)ack_queue)->first) { - buffer = dequeue_zoid_buf(ack_queue); - if(buffer) { - high_priority_send(buffer); - currently_receiving_large_message = 1; - } - } else if(((volatile zoid_buf_pipe*)high_priority_send_queue)->first) { - buffer = dequeue_zoid_buf(high_priority_send_queue); - if(buffer) - high_priority_send(buffer); - } else if(((volatile zoid_buf_pipe*)send_queue)->first) { - send_packet(); - } - } - - if (sent_kill_packet) { - printf("WE NEED TO STOP!!!!!!\n"); - break; - } - } - - leave_critical_section(&tree_locks); - - syscall(269); -} - - - -/* - * Sends a single packet to a compute node. - * WARNING: this function is stateful. It internally keeps track of - * the progress of sending the first message from send_queue. - * This function assumes that it won't be called simultaneously by multiple - * threads. Currently, the receive_queue_mutex in handle_messages() ensures - * of that. - */ -static void send_packet(void) { - static struct zoid_buffer* buffer = NULL; - /* The remaining static variables are only valid if buffer != NULL. */ - static BGLTreePacketHardHeader hardheader; - static char* result_current; - static int result_remaining; - static struct CioHeader* cioheader; - static int copying_userbuf; - char* data_buf; - - if (!buffer) { - - enter_critical_section(send_queue->locks); - buffer = ((volatile zoid_buf_pipe*)send_queue)->first; - leave_critical_section(send_queue->locks); - - if (!buffer) - return; - - if (buffer->softheader.zoid_id == ZOID_ID) { - cioheader = NULL; - BGLTreePacketHardHeader_InitP2P(&hardheader, PACKET_CLASS_CIO, - 0, cn_procs[buffer->softheader. - pset_cpu_rank].p2p_addr); - /* softheader->zoid_id is already filled in correctly. */ - buffer->softheader.pset_cpu_rank = - pset_rank_mapping_rev[buffer->softheader.pset_cpu_rank]; - buffer->softheader.flags = ZOID_SOFTHEADER_FIRST_PACKET; - buffer->softheader.msg_length = buffer->result_len; - buffer->softheader.errnum = buffer->errnum; - } else { - /* CIOD message. */ - cioheader = (struct CioHeader*)&buffer->softheader; - BGLTreePacketHardHeader_InitP2P(&hardheader, PACKET_CLASS_CIO, - 0, cioheader->_treeAddress); - cioheader->_packetIndex = 0; - } - - result_current = buffer->data; - result_remaining = (buffer->userbuf_out ? buffer->result_len : - buffer->total_len); - copying_userbuf = 0; - } - - /* Since we round up the buffers to full packet boundary when allocating, - we never have to copy to intermediate ones... */ - - /* Is this the last packet? */ - if (!cioheader && result_remaining <= TREE_DATA_SIZE && - (!buffer->userbuf_out || copying_userbuf)) { - buffer->softheader.flags |= ZOID_SOFTHEADER_LAST_PACKET; - } - -#if 0 - { - int i, *pkt = (int*)result_current; - fprintf(stderr, "Sending back ZOID packet to %d, flags %d, len %d, @ %p\n", - buffer->softheader.pset_cpu_rank, buffer->softheader.flags, - buffer->softheader.msg_length, buffer); - for (i = 0; i < 240 / 4; i++) - fprintf(stderr, "%08x%c", pkt[i], ((i + 1) % 8 && - i != 240 / 4 - 1 ? - ' ' : '\n')); - } -#endif - - if (((unsigned int)result_current & 0xf) || - (copying_userbuf && result_remaining < TREE_DATA_SIZE)) { - data_buf = packet_buffer->data; - memcpy(data_buf, result_current, result_remaining < TREE_DATA_SIZE ? - result_remaining : TREE_DATA_SIZE); - } else { - data_buf = result_current; - } - - BGLTreeFIFO_sendH(vc0 + BGL_MEM_TREE_HDRIN_OFFSET, - vc0 + BGL_MEM_TREE_DATAIN_OFFSET, - &hardheader, (BGLQuad*)&buffer->softheader, - (BGLQuad*)data_buf); - - result_remaining -= TREE_DATA_SIZE; - - if (!cioheader) - buffer->softheader.flags = 0; - else - cioheader->_packetIndex++; - - - if (result_remaining <= 0) { - if (buffer->userbuf_out && !copying_userbuf) { - /* If there is a userbuf output part, we end up here prematurely, - as soon as the zoid part of the buffer is sent. Adjust the - pointer and length and send the user buffer next. */ - - result_current = buffer->userbuf_out; - result_remaining += buffer->total_len - buffer->result_len; - copying_userbuf = 1; - } else { - dequeue_zoid_buf(send_queue); - - /* We are done with the current message. Clean up. */ - if (buffer->userbuf_out) { - enqueue_zoid_buf(recv_queue, buffer); - int r = semop(recv_q_sem, &up, 1); - if(r) { - perror("semop failed"); - exit(-1); - } - } else if (buffer != packet_buffer) { - __zoid_release_buffer(buffer->data); - } - buffer = NULL; - } - } else { - result_current += TREE_DATA_SIZE; - } -} - - -/* - * Send a single-packet message that bypasses the normal send queue. - */ -static void high_priority_send(struct zoid_buffer *buffer) { - BGLTreePacketHardHeader hardheader; - - assert(buffer->softheader.zoid_id == ZOID_ID); - assert(!buffer->userbuf_out); - assert(buffer->total_len <= TREE_DATA_SIZE); - - BGLTreePacketHardHeader_InitP2P(&hardheader, PACKET_CLASS_CIO, - 0, cn_procs[buffer->softheader. - pset_cpu_rank].p2p_addr); - /* softheader->zoid_id is already filled in correctly. */ - buffer->softheader.pset_cpu_rank = - pset_rank_mapping_rev[buffer->softheader.pset_cpu_rank]; - buffer->softheader.flags = ZOID_SOFTHEADER_FIRST_PACKET | - ZOID_SOFTHEADER_LAST_PACKET | - (buffer->softheader.flags & ZOID_SOFTHEADER_ACK_PACKET); - buffer->softheader.msg_length = buffer->result_len; - buffer->softheader.errnum = buffer->errnum; - - BGLTreeFIFO_sendH(vc0 + BGL_MEM_TREE_HDRIN_OFFSET, - vc0 + BGL_MEM_TREE_DATAIN_OFFSET, - &hardheader, (BGLQuad*)&buffer->softheader, - (BGLQuad*) buffer->data); - - /* We are done with the current message. Clean up. */ - - if (buffer != packet_buffer) - __zoid_release_buffer(buffer->data); - else - NYI(); -} - - - -/* - * Enqueues a message in the send queue, to be sent back to a compute - * node later (or in another thread), in the handle_messages() function. - */ -static void -queue_message(struct zoid_buffer* buffer) -{ -#if 1 - if(buffer->softheader.zoid_id == ZOID_ID && !buffer->userbuf_out && - buffer->total_len <= TREE_DATA_SIZE) - enqueue_zoid_buf(high_priority_send_queue, buffer); - else -#endif - enqueue_zoid_buf(send_queue, buffer); -} - -/* - * Body of (multiple) worker threads. Takes a command from the head of the - * receive queue, processes it, and puts the reply at the tail of the send - * queue. - */ -void* -worker_thread_body(void* arg) -{ - struct thread_specific_data thread_data; - - /* Various callbacks from generated and user code can access this data. */ - if (pthread_setspecific(thread_specific_key, &thread_data)) - { - perror("setting thread-specific data"); - return NULL; - } - - for (;;) - { - struct zoid_buffer *cmd_buffer, *res_buffer; - char *result = NULL; - int result_len, total_len; - int release_cmd_buffer = 1; - -/* int sig; */ -/* pthread_mutex_lock(&poll_mutex); */ -/* sigwait(&sigusr1_set, &sig); */ -/* pthread_mutex_unlock(&poll_mutex); */ - - int r = semop(recv_q_sem, &down, 1); - if(r) { - perror("semop failed"); - exit(-1); - } - -/* int poll_cnt = 0; */ -/* while(!((volatile zoid_buf_pipe*)recv_queue)->first) { */ -/* if((poll_cnt += 1 << 24) == 0) */ -/* pthread_yield(); */ -/* } */ - - cmd_buffer = dequeue_zoid_buf(recv_queue); - - //printf("Launched worker thread! %d %p\n", sig, cmd_buffer); - - if(cmd_buffer == (struct zoid_buffer*)1) { -/* pthread_mutex_unlock(&poll_mutex); */ - printf("Stopping worker thread\n"); - return NULL; - } - - //assert(cmd_buffer); - if(!cmd_buffer) - continue; - -/* pthread_mutex_unlock(&poll_mutex); */ - - thread_data.userbuf = NULL; - - if(cmd_buffer->userbuf_out) { - thread_data.calling_process_id = - cmd_buffer->softheader.pset_cpu_rank; - - cmd_buffer->userbuf_out_cb(cmd_buffer->userbuf_out, - cmd_buffer->userbuf_out_priv); - cmd_buffer->userbuf_out = NULL; - - __zoid_release_buffer(cmd_buffer->data); - continue; - } - - if (!cmd_buffer->errnum) { - int command_id, header_id, function_id; - struct zoid_dispatch_entry* entry; - - if (cmd_buffer->softheader.zoid_id == ZOID_ID) - { - thread_data.calling_process_id = - cmd_buffer->softheader.pset_cpu_rank; - - /* The buffer always begins with a command id. */ - memcpy(&command_id, cmd_buffer->data, sizeof(command_id)); - - header_id = command_id >> 16; - function_id = command_id & 0xffff; - - for (entry = dispatch_entries; entry; entry = entry->next) - if (entry->header_id == header_id) - { - if (function_id >= 0 && - function_id < entry->array_size) - { - if (cmd_buffer->softheader.flags & - ZOID_SOFTHEADER_INPUT_USERBUF_PACKET) - { - /* We need to invoke a special allocate - callback, not (yet) the argument unpacking - stub. */ - int buffer_len; - struct CNProc* cnproc = &cn_procs[cmd_buffer-> - softheader.pset_cpu_rank]; - - memcpy(&buffer_len, cmd_buffer->data + - sizeof(command_id), sizeof(buffer_len)); - - cmd_buffer->userbuf_in = - entry->dispatch_array[function_id]. - userbuf_allocate_cb(buffer_len); - - if (!cmd_buffer->userbuf_in) - { - /* User failed to provide us with a buffer. - Instead of an ACK, we will return an - error message. */ - cmd_buffer->errnum = ENOMEM; - break; - } - - cmd_buffer->userbuf_in_len = buffer_len; - cnproc->buffer = cmd_buffer; - cnproc->current_buf = cmd_buffer->userbuf_in; - - cmd_buffer->ack_sent = 1; - - flush_zoid_buf(cmd_buffer); - - schedule_ack(cmd_buffer, - &cmd_buffer->softheader); - - /* We are not done with this command yet, - so don't release the buffer! */ - release_cmd_buffer = 0; - - break; - } - - if (cmd_buffer->userbuf_in) - { - char* (*function_ptr)(char* command_buffer, - int* result_len, - int* total_len, - char* user_buffer) = - entry->dispatch_array[function_id]. - function_ptr; - - result = function_ptr(((char*)cmd_buffer->data) - + sizeof(command_id), - &result_len, &total_len, - cmd_buffer->userbuf_in); - } - else - { - char* (*function_ptr)(char* command_buffer, - int* result_len, - int* total_len) = - entry->dispatch_array[function_id]. - function_ptr; - - result = function_ptr(((char*)cmd_buffer->data) - + sizeof(command_id), - &result_len, &total_len); - } - - if (!result) - { - /* An error here is most likely an - out-of-memory situation or a too large - reply buffer. */ - cmd_buffer->errnum = thread_data.errnum; - if (cmd_buffer->errnum == E2BIG) - cmd_buffer->excessive_size = - thread_data.last_excessive_size; - } - break; - } - fprintf(stderr, "Invalid function ID!\n"); - cmd_buffer->errnum = ENOSYS; - break; - } - if (!entry) - { - fprintf(stderr, "Server stub backend not loaded!\n"); - cmd_buffer->errnum = ENOSYS; - } - } - else - { - NYI(); - /* CIOD message. */ - struct CioHeader* cioheader = - (struct CioHeader*)&cmd_buffer->softheader; - int i; - - for (i = 0; i < pset_proc_count; i++) - if (cn_procs[i].p2p_addr == cioheader->_treeAddress && - cn_procs[i].cpu == cioheader->_cpu) - { - thread_data.calling_process_id = i; - break; - } - if (i == pset_proc_count && - cioheader->_messageCode == MFC_REQUESTRESET) - { - /* We can receive a REQUESTRESET message for an - unknown process, which is the second CPU of a - node running in CO node. We assign the message - to the primary CPU. */ - for (i = 0; i < pset_proc_count; i++) - if (cn_procs[i].p2p_addr == - cioheader->_treeAddress) - { - thread_data.calling_process_id = i; - break; - } - } - assert(i != pset_proc_count); - - result = handle_ciod_message(cioheader, cmd_buffer->data, - &result_len, i); - total_len = result_len; - } - } /* if (!cmd_buffer->errnum) */ - - /* An error here can be either inherited from the receiving thread - (most likely an out-of-memory condition) or caused by call - function (most likely too large result buffer needed). */ - if (cmd_buffer->errnum) - { - /* Currently, three different error values are possible: - ENOMEM -- out of memory when allocating either the - command or result buffer. - E2BIG -- the command sent was too large (shouldn't happen, - since client checks that as well) or the result - would have been too large. cnproc->excessive_size - contains the excessive size value. - ENOSYS -- the stub backend for the requested function is - not loaded. - We manufacture an error reply here and it gets sent by - the standard reply sending code below. */ - result = __zoid_allocate_buffer(TREE_PACKET_SIZE); - if (!result) - /* Out of memory. MT-unsafe! */ - result = packet_buffer->data; - if (cmd_buffer->errnum == E2BIG) - { - result_len = sizeof(int); - memcpy(result, &cmd_buffer->excessive_size, sizeof(int)); - } - else - result_len = 0; - total_len = result_len; - } - - /* There are currently two cases where result == NULL without there - being any error: - - CIOD command that requires no response, - - partially received input userbuf, so we invoked the allocation - callback, not the actual argument-unpacking one. */ - if (result) - { - res_buffer = get_zoid_buffer(result); - - res_buffer->result_len = result_len; - res_buffer->total_len = total_len; - - res_buffer->errnum = cmd_buffer->errnum; - res_buffer->excessive_size = cmd_buffer->excessive_size; - - if ((res_buffer->userbuf_out = thread_data.userbuf)) - { - res_buffer->userbuf_out_cb = thread_data.userbuf_cb; - res_buffer->userbuf_out_priv = thread_data.userbuf_priv; - } - - memcpy(&res_buffer->softheader, &cmd_buffer->softheader, - sizeof(res_buffer->softheader)); - - queue_message(res_buffer); - } - - if (cmd_buffer != packet_buffer && release_cmd_buffer) - __zoid_release_buffer(cmd_buffer->data); - - } /* for (;;) */ -} - -int unix_open(const char *pathname /* in:str */, - int flags /* in:obj */, - mode_t mode /* in:obj */) __attribute__((weak)); -int unix_close(int fd /* in:obj */) __attribute__((weak)); -ssize_t unix_write(int fd /* in:obj */, - const void *buf /* in:arr:size=+1:zerocopy */, - size_t count /* in:obj */) __attribute__((weak)); - -/* - * Processes the CIOD-specific messages. These are generated by the kernel, - * on application exit or crash. - */ -static char* -handle_ciod_message(struct CioHeader* cioheader, char* data, int* result_len, - int pset_cpu_rank) -{ - struct MTC_ReplyWriteCore reply; - char* result; - - switch (cioheader->_messageCode) - { - case MFC_REQUESTEXIT: - { - struct MFC_RequestExit* request = (struct MFC_RequestExit*)data; - fprintf(stderr, "ABNORMAL exit request from process %d, " - "code %d (%s)\n", cn_procs[pset_cpu_rank].pid, - request->s.status, - (request->s.reason == EXITED ? "exited" : "killed")); - - enter_critical_section(&pending_exit_locks); - - if (cn_procs[pset_cpu_rank].status == PROC_STATUS_RUNNING) - pending_exit_requests--; - cn_procs[pset_cpu_rank].status = PROC_STATUS_EXIT_ABNORMAL; - abnormal_msg_received = 1; - - leave_critical_section(&pending_exit_locks); -#if 0 - cioheader->_messageCode = MTC_REPLYEXIT; - *result_len = 0; - break; -#endif - return NULL; - } - - case MFC_REQUESTRESET: - fprintf(stderr, "ABNORMAL reset request from node %d cpu %d\n", - cioheader->_treeAddress, cioheader->_cpu); - - enter_critical_section(&pending_exit_locks); - - if (cn_procs[pset_cpu_rank].status == PROC_STATUS_RUNNING) - pending_exit_requests--; - cn_procs[pset_cpu_rank].status = PROC_STATUS_EXIT_ABNORMAL; - abnormal_msg_received = 1; - - leave_critical_section(&pending_exit_locks); -#if 0 - cioheader->_messageCode = MTC_REPLYRESET; - *result_len = 0; - break; -#endif - return NULL; - - case MFC_REQUESTWRITECORE: - { - struct MFC_RequestWriteCore* request = - (struct MFC_RequestWriteCore*)data; - char buffer[20]; - int fd; - - fprintf(stderr, "ABNORMAL core dump request from process %d\n", - cn_procs[pset_cpu_rank].pid); - - sprintf(buffer, "core.%d", cn_procs[pset_cpu_rank].pid); - - if (&unix_open) - fd = unix_open(buffer, O_CREAT | O_WRONLY | O_TRUNC, 0600); - else - { - if ((fd = open(buffer, O_CREAT | O_WRONLY | O_TRUNC, 0600)) < 0) - fd = -errno; - } - - if (fd < 0) - fprintf(stderr, "Error opening file %s: %s\n", buffer, - strerror(-fd)); - else - { - if (&unix_write) - unix_write(fd, data + request->s.offset, request->s.size); - else - write(fd, data + request->s.offset, request->s.size); - - if (&unix_close) - unix_close(fd); - else - close(fd); - } - - enter_critical_section(&pending_exit_locks); - - if (cn_procs[pset_cpu_rank].status == PROC_STATUS_RUNNING) - pending_exit_requests--; - cn_procs[pset_cpu_rank].status = PROC_STATUS_EXIT_ABNORMAL; - abnormal_msg_received = 1; - - leave_critical_section(&pending_exit_locks); - - if (fd >= 0) - { - reply.s.rc = 0; - reply.s.errnum = 0; - } - else - { - reply.s.rc = -1; - reply.s.errnum = EACCES; /* Who cares what it was anyway. */ - } - cioheader->_messageCode = MFC_REPLYWRITECORE; - *result_len = sizeof(reply); - break; - } - } - - cioheader->_packetIndex = 0; - cioheader->_packetTotal = 1; - - result = __zoid_allocate_buffer(*result_len); - if (!result) - /* Out of memory. MT-unsafe! */ - result = packet_buffer->data; - memcpy(result, &reply, *result_len); - - return result; -} - -/* - * Part of user API. Returns a (thread-local) pset-local process id. - */ -int -__zoid_calling_process_id(void) -{ - struct thread_specific_data* thread_data = - (struct thread_specific_data*)pthread_getspecific(thread_specific_key); - return thread_data->calling_process_id; -} - -/* - * Part of user API. Registers an output userbuf and a callback to - * be called when the buffer is no longer needed. - */ -void -__zoid_register_userbuf(void* userbuf, - void (*callback)(void* userbuf, void* priv), - void* priv) -{ - struct thread_specific_data* thread_data = - (struct thread_specific_data*) - pthread_getspecific(thread_specific_key); - - thread_data->userbuf = userbuf; - thread_data->userbuf_cb = callback; - thread_data->userbuf_priv = priv; -} - -/* - * Part of user API. Sends standard output/error message to the right place. - */ -int -__zoid_send_output(int pid, int fd, const char* buffer, int len) -{ - struct CiodOutputHeader header; - int ret; - - if (pthread_mutex_lock(&output_mutex)) - return -1; - - if (ciod_streams_socket == -1) - ret = write(fd, buffer, len); - else - { - header.fd = fd + 1; - header.cpu = cn_procs[pid].cpu; - header.node = cn_procs[pid].p2p_addr; - header.rank = cn_procs[pid].pid; - header.len = len; - - if (write(ciod_streams_socket, &header, sizeof(header)) != - sizeof(header)) - { - ret = -1; - } - else - ret = write(ciod_streams_socket, buffer, len); - } - - if (pthread_mutex_unlock(&output_mutex)) - return -1; - - return ret; -} - -void *ciod_thread_body(void* arg) { - int signum; - struct timeval tv; - double last_packet_received; - struct CioHeader cio; - int i; - - pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); - pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - - /* We only support one command at the moment: kill. */ - for (;;) - { - int cmd; - - if (read(ciod_control_socket, &cmd, sizeof(cmd)) != sizeof(cmd)) - perror("read"); - else if (cmd != 4) - fprintf(stderr, "Unknown command read: 0x%x\n", cmd); - else - { - if (read(ciod_control_socket, &signum, sizeof(signum)) != - sizeof(signum)) - { - perror("read"); - } - else - break; - } - } - - /* Disable thread cancellation before we try to acquire any mutexes. */ - pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); - - /* We set this flag even before we send anything, for one simple reason: - so that pthread_mutex_lock below can succeed. worker threads check - for this flag and release the receive_queue_mutex if it is set, so that - we can get it. */ - sent_kill_packet = 1; - - enter_critical_section(&tree_locks); - - /* We are going to kill the job. We do that by sending a special kill - packet that wakes up the CNK, which takes over and kills the processes. - There is one but: there might be some ZOID messages in the network, and - if CNK or CIOD see them, they might act weird. - There is no fool-proof solution to this problem. What we do is to - read all the messages that we can on this side. We throw these away; - we are killing the job anyway, so who cares. Hopefully this action - will quiesce the network (every sender waits for a reply before it can - continue), so that we can then wake up the CNK relatively safely. */ - - /* Suck all the packets from the network first. */ - gettimeofday(&tv, NULL); - last_packet_received = tv.tv_sec + tv.tv_usec * 1e-6; - for (;;) - { - BGLTreeStatusRegister status; - - status = *(volatile BGLTreeStatusRegister*) - (vc0 + BGL_MEM_TREE_STATUS0_OFFSET); - - if (status.recpktcnt == 0) - { - gettimeofday(&tv, NULL); - /* If we haven't received anything in five seconds, assume we - are done. */ - if (tv.tv_sec + tv.tv_usec * 1e-6 - last_packet_received > 5) - break; - } - else - { - while (status.recpktcnt--) - { - BGLTreePacketHardHeader hardheader; - BGLTreeFIFO_recv(vc0 + BGL_MEM_TREE_HDROUT_OFFSET, - vc0 + BGL_MEM_TREE_DATAOUT_OFFSET, - &hardheader, (BGLQuad*)packet_buffer->data); - } - gettimeofday(&tv, NULL); - last_packet_received = tv.tv_sec + tv.tv_usec * 1e-6; - } - } - - /* Now, kill the processes. */ - - cio._dataSize = 0; - cio._messageCode = MTC_KILL; - cio._packetTotal = 1; - cio._packetIndex = 0; - - ((struct MTC_Kill*)packet_buffer->data)->s.signum = signum; - - enter_critical_section(&pending_exit_locks); - - for (i = 0; i < pset_proc_count; i++) - if (cn_procs[i].status == PROC_STATUS_RUNNING) - { - BGLTreeStatusRegister status; - BGLTreePacketHardHeader hardheader; - - BGLTreePacketHardHeader_InitP2P(&hardheader, PACKET_CLASS_CIO, - 1, cn_procs[i].p2p_addr); - cio._cpu = cn_procs[i].cpu; - cio._rankInCnodes = cn_procs[i].pset_rank; - cio._treeAddress = cn_procs[i].p2p_addr; - - do - { - status = *(volatile BGLTreeStatusRegister*) - (vc0 + BGL_MEM_TREE_STATUS0_OFFSET); - } while (status.injpktcnt > 7); - - BGLTreeFIFO_sendH(vc0 + BGL_MEM_TREE_HDRIN_OFFSET, - vc0 + BGL_MEM_TREE_DATAIN_OFFSET, - &hardheader, (BGLQuad*)&cio, - (BGLQuad*)packet_buffer->data); - - pending_exit_requests--; - cn_procs[i].status = PROC_STATUS_EXIT; - } - - leave_critical_section(&pending_exit_locks); - - leave_critical_section(&tree_locks); - - return NULL; -} - -/* - * Called after a job has finished to clean up any allocated buffers, - * reset state variables, etc. - */ -void cleanup_traffic(void) -{ -#if 1 - NYI(); -#else - struct zoid_buffer* buf; - - while (send_queue_first) - { - buf = send_queue_first; - send_queue_first = send_queue_first->next; - __zoid_release_buffer(buf->data); - } - - while (high_priority_send_queue_first) - { - buf = high_priority_send_queue_first; - high_priority_send_queue_first = high_priority_send_queue_first->next; - __zoid_release_buffer(buf->data); - } - - while (receive_queue_first) - { - buf = *receive_queue_first; - *receive_queue_first = (*receive_queue_first)->next; - __zoid_release_buffer(buf->data); - } - - while (ack_queue_first) - { - buf = ack_queue_first; - ack_queue_first = ack_queue_first->next; - __zoid_release_buffer(buf->data); - } - - currently_receiving_large_message = 0; -#endif -} diff --git a/3rdparty/zoid/daemon.2cores/handle_traffic.c.signal b/3rdparty/zoid/daemon.2cores/handle_traffic.c.signal deleted file mode 100644 index 9bac9888c20bdb25cdf869459541b9cef13d5b8b..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/daemon.2cores/handle_traffic.c.signal +++ /dev/null @@ -1,1416 +0,0 @@ -/****************************************************************************/ -/* ZEPTOOS:zepto-info */ -/* This file is part of ZeptoOS: The Small Linux for Big Computers. - * See www.mcs.anl.gov/zeptoos for more information. - */ -/* ZEPTOOS:zepto-info */ -/* */ -/* ZEPTOOS:zepto-fillin */ -/* $Id: handle_traffic.c,v 1.20 2007/06/27 15:10:45 iskra Exp $ - * ZeptoOS_Version: 1.2 - * ZeptoOS_Heredity: FOSS_ORIG - * ZeptoOS_License: GPL - */ -/* ZEPTOOS:zepto-fillin */ -/* */ -/* ZEPTOOS:zepto-gpl */ -/* Copyright: Argonne National Laboratory, Department of Energy, - * and UChicago Argonne, LLC. 2004, 2005, 2006, 2007 - * ZeptoOS License: GPL - * - * This software is free. See the file ZeptoOS/misc/license.GPL - * for complete details on your rights to copy, modify, and use this - * software. - */ -/* ZEPTOOS:zepto-gpl */ -/****************************************************************************/ - -#define _GNU_SOURCE /* For pthread_yield. */ -#include <assert.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include <errno.h> -#include <fcntl.h> -#include <pthread.h> -#include <signal.h> -#include <sys/time.h> -#include <unistd.h> - -#include <bglmemmap.h> - -#include "bgl.h" -#include "zoid.h" -#include "zoid_protocol.h" -#include "zoid_api.h" - -static BGLQuad* recv_data_cb(void* priv, BGLQuad* softheader); -static void queue_message(struct zoid_buffer* buffer); -static char* handle_ciod_message(struct CioHeader* cioheader, char* data, - int* result_len, int pset_cpu_rank); -static void high_priority_send(struct zoid_buffer *buffer); -static void send_packet(void); - -zoid_buf_pipe *recv_queue; -lock_pair recv_queue_locks; - -zoid_buf_pipe *send_queue; -lock_pair send_queue_locks; - -zoid_buf_pipe *high_priority_send_queue; -lock_pair high_priority_send_queue_locks; - -zoid_buf_pipe *ack_queue; -lock_pair ack_queue_locks; - -extern sigset_t sigusr1_set; - -/* Indicates whether any more messages that need an ACK should wait. */ -static int currently_receiving_large_message = 0; - -#define NYI() assert(0) - -/* - * If there is no large message currently being received, schedules the - * given message to be received (by sending an ACK to the source). - */ -static void schedule_ack(struct zoid_buffer* buffer, - struct ZoidSoftHeader* softheader) { - char* ack_buff; - struct zoid_buffer* ack_buffer; - - ack_buff = __zoid_allocate_buffer(TREE_PACKET_SIZE); - if (!ack_buff) - /* Out of memory. MT-unsafe! */ - ack_buff = packet_buffer->data; - - ack_buffer = get_zoid_buffer(ack_buff); - ack_buffer->result_len = ack_buffer->total_len = 0; - - ack_buffer->softheader = *softheader; - ack_buffer->softheader.flags = ZOID_SOFTHEADER_ACK_PACKET; - - enqueue_zoid_buf(ack_queue, ack_buffer); -} - - -#if 0 -/* - * CIOD-packet-specific receiving code. Handles message acknowledgements. - */ -static void -receive_ciod_packet(struct CNProc* cnproc, struct CioHeader* cioheader) -{ -#if 0 - int i, *pkt = (int*)cnproc->current_buf; - fprintf(stderr, "Received CIOD packet: _cpu %d, " - "_rankInCnodes %d, _reserved %d, _dataSize %d, " - "_treeAddress %d, _messageCode %d, _packetTotal %d, " - "_packetIndex %d\n", cioheader->_cpu, - cioheader->_rankInCnodes, cioheader->_reserved, - cioheader->_dataSize, cioheader->_treeAddress, - cioheader->_messageCode, cioheader->_packetTotal, - cioheader->_packetIndex); - for (i = 0; i < 240 / 4; i++) - fprintf(stderr, "%08x%c", pkt[i], ((i + 1) % 8 && i != 240 / 4 - 1 ? - ' ' : '\n')); -#endif - if (cioheader->_packetIndex == 0) - { - /* First packet. */ - cnproc->msg_length = cioheader->_packetTotal * TREE_DATA_SIZE; - - if (cioheader->_packetTotal > 1) - { - /* CIOD protocol is rendezvous-based. For multi-packet - messages, first packet must be acknowledged before the - rest are sent. Create this acknowledgement here. */ - char* buffer; - struct zoid_buffer* ack_buffer; - struct CioHeader ack_cioheader; - - buffer = __zoid_allocate_buffer(TREE_PACKET_SIZE); - if (!buffer) - /* Out of memory. MT-unsafe! */ - buffer = packet_buffer->data; - - ack_buffer = get_zoid_buffer(buffer); - ack_buffer->result_len = ack_buffer->total_len = 0; - - ack_cioheader = *cioheader; - ack_cioheader._messageCode = MTC_ACK; - ack_cioheader._packetTotal = 1; - memcpy(&ack_buffer->softheader, &ack_cioheader, - sizeof(ack_cioheader)); - - /* Let the regular reply sending code do the rest. */ - queue_message(ack_buffer); - } - } -} -#endif - - -static void receive_zoid_packet(struct CNProc *cnproc, - struct ZoidSoftHeader *softheader) { -#if 0 - int i, *pkt = (int*)cnproc->current_buf; - fprintf(stderr, "Received ZOID packet from %d, flags %d, " - "len %d @ %p\n", softheader->pset_cpu_rank, - softheader->flags, softheader->msg_length, cnproc->buffer); - for (i = 0; i < 240 / 4; i++) { - fprintf(stderr, "%08x%c", pkt[i], ((i + 1) % 8 && i != 240 / 4 - 1 ? - ' ' : '\n')); - } -#endif - - if(softheader->flags & ZOID_SOFTHEADER_FIRST_PACKET) { - /* Check if this is really the first message. */ - if(cnproc->current_buf != cnproc->buffer->data) { - fprintf(stderr, "First packet received from %d while " - "reading another message\n", - softheader->pset_cpu_rank); - /* Nothing sensible to do, so let's just try to - continue. */ - } - - cnproc->msg_length = softheader->msg_length; - } else if(cnproc->current_buf == cnproc->buffer->data && - !cnproc->buffer->errnum) { - /* Received an unexpected packet without FIRST set. */ - fprintf(stderr, "Received unexpected packet from %d\n", - softheader->pset_cpu_rank); - } - - if(softheader->msg_length != cnproc->msg_length) - fprintf(stderr, "Message length mismatch from %d, found %d, " - "expecting %d\n", softheader->pset_cpu_rank, - softheader->msg_length, cnproc->msg_length); - - if (((unsigned int)cnproc->current_buf & 0xf) || - (cnproc->buffer->userbuf_in && - cnproc->current_buf + TREE_DATA_SIZE > - cnproc->buffer->userbuf_in + cnproc->buffer->userbuf_in_len)) { - /* See recv_data_cb. We used an auxiliary buffer. Copy the data - where it belongs. */ - int len; - len = cnproc->buffer->userbuf_in_len - (cnproc->current_buf - - cnproc->buffer->userbuf_in); - if (len > TREE_DATA_SIZE) - len = TREE_DATA_SIZE; - memcpy(cnproc->current_buf, packet_buffer->data, len); - } - - if ((softheader->flags & ZOID_SOFTHEADER_NEED_ACK_PACKET)) { - cnproc->buffer->ack_sent = 1; - schedule_ack(cnproc->buffer, softheader); - } -} - - - -static int -handle_received_packet(struct CNProc* cnproc, - struct ZoidSoftHeader* softheader) -{ - struct CioHeader* cioheader; - - cioheader = (softheader->zoid_id == ZOID_ID ? - NULL : (struct CioHeader*)softheader); - - /* If an error has been marked, we are going to drop the data anyway, - and we probably don't have enough buffer space to hold it, so don't - even try to place it neatly in the buffer. */ - if (!cnproc->buffer->errnum) - cnproc->current_buf += TREE_DATA_SIZE; - - /* Is this the last packet of a message? */ - if (cioheader ? - cioheader->_packetIndex == cioheader->_packetTotal - 1 : - softheader->flags & ZOID_SOFTHEADER_LAST_PACKET) { - if (!cnproc->buffer->errnum) { - - /* Check if we have read all that we were supposed to. */ - if (cnproc->buffer->userbuf_in) { - if (cnproc->current_buf - cnproc->buffer->userbuf_in < - cnproc->buffer->userbuf_in_len){ - fprintf(stderr, "Received last packet from %d but read " - "only %d bytes of userbuf, expected %d\n", - softheader->pset_cpu_rank, - cnproc->current_buf - cnproc->buffer->userbuf_in, - cnproc->buffer->userbuf_in_len); - } - } else { - if (cnproc->current_buf - cnproc->buffer->data < - cnproc->msg_length) { - fprintf(stderr, "Received last packet from %d but read " - "only %d bytes, expected %d\n", - softheader->pset_cpu_rank, - cnproc->current_buf - cnproc->buffer->data, - cnproc->msg_length); - } - } - } - - if (!cioheader && (softheader->flags & ZOID_SOFTHEADER_ASSERT_PACKET)) { - fprintf(stderr, "Process %d detected an internal error in " - "communication code!\n", - cn_procs[softheader->pset_cpu_rank].pid); - fprintf(stderr, "%s\n", cnproc->buffer->data); - - /* Core dump message will follow; clean up for it. */ - if (cnproc->buffer != packet_buffer) - __zoid_release_buffer(cnproc->buffer->data); - cnproc->buffer = NULL; - cnproc->current_buf = NULL; - - return 0; - } - - if(!cioheader && *(int*)cnproc->buffer->data == ZOID_TERMINATING_ID) { -#if 0 - fprintf(stderr, "Exit request from process %d\n", - cn_procs[softheader->pset_cpu_rank].pid); -#endif - if (cnproc->buffer != packet_buffer) - __zoid_release_buffer(cnproc->buffer->data); - cnproc->buffer = NULL; - cnproc->current_buf = NULL; - - enter_critical_section(&pending_exit_locks); - - cnproc->status = PROC_STATUS_EXIT; - pending_exit_requests--; - - leave_critical_section(&pending_exit_locks); - - return 0; - } - - /* A buffer is complete. Queue it. */ - - memcpy(&cnproc->buffer->softheader, softheader, sizeof(*softheader)); - if (cnproc->buffer->ack_sent) - currently_receiving_large_message = 0; - - enqueue_zoid_buf(recv_queue, cnproc->buffer); - - cnproc->buffer = NULL; - cnproc->current_buf = NULL; - - return 1; - } - - if(softheader->flags & ZOID_SOFTHEADER_INPUT_USERBUF_PACKET) { - /* We are done reading the first (non-userbuf) part of the input - userbuf message. We will have to invoke the user allocation - callback now. Behave as if the message is complete: we will be - able to recognize that it's not by the above flag in the - softheader. */ - memcpy(&cnproc->buffer->softheader, softheader, sizeof(*softheader)); - - enqueue_zoid_buf(recv_queue, cnproc->buffer); - - cnproc->buffer = NULL; - cnproc->current_buf = NULL; - - return 1; - } - - if (cnproc->buffer->userbuf_in ? - cnproc->current_buf - cnproc->buffer->userbuf_in >= - cnproc->buffer->userbuf_in_len : - cnproc->current_buf - cnproc->buffer->data >= cnproc->msg_length) { - /* We've read more than we were supposed to without receiving - the end-of-message packet! */ - - fprintf(stderr, "Message from %d longer than declared %d\n", - softheader->pset_cpu_rank, cnproc->msg_length + - cnproc->buffer->userbuf_in_len); - - /* Nothing better to do than to prevent a buffer overrun... */ - cnproc->current_buf -= TREE_DATA_SIZE; - } - - return 0; -} - - - -static BGLQuad* recv_data_cb(void *priv, BGLQuad *softheader) { - struct CNProc* cnproc = NULL; - struct CNProc** cnproc_ptr = priv; - struct ZoidSoftHeader* zsh=(struct ZoidSoftHeader*)softheader; - int msglen; - BGLQuad *ret, *dummy; - - if(zsh->zoid_id == ZOID_ID) { - if (zsh->pset_cpu_rank < pset_size * (vn_mode ? 2 : 1) && - pset_rank_mapping[zsh->pset_cpu_rank] != -1) { - zsh->pset_cpu_rank = pset_rank_mapping[zsh->pset_cpu_rank]; - cnproc = &cn_procs[zsh->pset_cpu_rank]; - *cnproc_ptr = cnproc; - } else { - /* Invalid message. */ - NYI(); - *cnproc_ptr = NULL; - return (BGLQuad*)packet_buffer->data; - } - - msglen = zsh->msg_length; - - if(zsh->flags & ZOID_SOFTHEADER_ASSERT_PACKET) { - /* This is a special one-packet message. If anything was being - read, forget about it and reset the buffer. */ - if(cnproc->buffer) { - if(cnproc->buffer->ack_sent) { - currently_receiving_large_message = 0; - cnproc->buffer->ack_sent = 0; - } - cnproc->current_buf = cnproc->buffer->data; - } - } - } else { - /* Assume it's a message from CNK (CIOD protocol). */ - struct CioHeader* cioheader = (struct CioHeader*)softheader; - int i; - - for (i = 0; i < pset_proc_count; i++) { - if (cn_procs[i].p2p_addr == cioheader->_treeAddress && - cn_procs[i].cpu == cioheader->_cpu) { - cnproc = &cn_procs[i]; - *cnproc_ptr = cnproc; - break; - } - } - - if(i == pset_proc_count) { - if(cioheader->_messageCode == MFC_REQUESTRESET) { - /* We can receive a REQUESTRESET message for an unknown - process, which is the second CPU of a node running in CO - node. We assign the message to the primary CPU. */ - for(i = 0; i < pset_proc_count; i++) { - if(cn_procs[i].p2p_addr == cioheader->_treeAddress) { - cnproc = &cn_procs[i]; - *cnproc_ptr = cnproc; - break; - } - } - if(i == pset_proc_count) { - /* Invalid message. */ - NYI(); - *cnproc_ptr = NULL; - return (BGLQuad*)packet_buffer->data; - } - } else { - NYI(); - /* Invalid message. */ - *cnproc_ptr = NULL; - return (BGLQuad*)packet_buffer->data; - } - } - - msglen = cioheader->_packetTotal * TREE_DATA_SIZE; - } - - if (!cnproc->buffer) { - /* First packet of a new message. Initialize things. */ - char* buffer = __zoid_allocate_buffer(msglen); - if(buffer) { - cnproc->buffer = get_zoid_buffer(buffer); - } else { - NYI(); - /* Either the requested size is too large, or we are out of RAM. - In either case, we won't be able to handle the request. - We will read the command "whereever" (preferably into a - process-specific buffer, otherwise a global one (*not* - MT-safe), and flag an error so that the receive_thread knows - not to pass this to worker threads. */ - struct thread_specific_data* thread_data; - thread_data = (struct thread_specific_data*) - pthread_getspecific(thread_specific_key); - - if (thread_data->errnum == E2BIG) - /* Command was too large. */ - buffer = __zoid_allocate_buffer(TREE_PACKET_SIZE); - - if (!buffer) - /* Out of memory. MT-unsafe! */ - buffer = packet_buffer->data; - - cnproc->buffer = get_zoid_buffer(buffer); - - cnproc->buffer->errnum = thread_data->errnum; - if (thread_data->errnum == E2BIG) - cnproc->buffer->excessive_size = - thread_data->last_excessive_size; - } - - cnproc->current_buf = buffer; - } - - if (((unsigned int)cnproc->current_buf & 0xf) || - (cnproc->buffer->userbuf_in && - cnproc->current_buf + TREE_DATA_SIZE > - cnproc->buffer->userbuf_in + cnproc->buffer->userbuf_in_len)) { - /* Either condition can only take place for input userbuf, because - zoid-allocated buffers are always properly aligned and rounded - up. Use an auxiliary buffer; we will copy back from it in - receive_zoid_packet(). */ - ret = (BGLQuad*)packet_buffer->data; - } else { - ret = (BGLQuad*)cnproc->current_buf; - } - - /* Allocate L1 cache lines that we are going to write entirely, - so that they are not needlessly read from main memory */ - dummy = ret; - asm volatile( - "\taddi %0,%0,16\n" - "\tdcbz 0,%0; addi %0,%0,32\n" - "\tdcbz 0,%0; addi %0,%0,32\n" - "\tdcbz 0,%0; addi %0,%0,32\n" - "\tdcbz 0,%0; addi %0,%0,32\n" - "\tdcbz 0,%0; addi %0,%0,32\n" - "\tdcbz 0,%0; addi %0,%0,32\n" - "\tdcbz 0,%0\n" - : "+b" (dummy) - ); - - return ret; -} - - -static int receive_packet(void) { - struct CNProc* cnproc; - BGLTreePacketHardHeader hardheader; - char softheader_buf[sizeof(struct ZoidSoftHeader) + 0xf]; - struct ZoidSoftHeader *softheader = (struct ZoidSoftHeader*) - (((unsigned int)softheader_buf + 0xf) & ~0xf); - - BGLTreeFIFO_recvF(vc0 + BGL_MEM_TREE_HDROUT_OFFSET, - vc0 + BGL_MEM_TREE_DATAOUT_OFFSET, - &hardheader, (BGLQuad*)softheader, - &recv_data_cb, &cnproc); - - if(!cnproc) { - /* Invalid message. */ - int hhdr; - memcpy(&hhdr, &hardheader, sizeof(hardheader)); - fprintf(stderr, "Invalid packet read, hardheader %08x, " - "softheader %08x %08x %08x %08x\n", - hhdr, ((BGLQuad*)softheader)->w0, - ((BGLQuad*)softheader)->w1, ((BGLQuad*)softheader)->w2, - ((BGLQuad*)softheader)->w3); - return 0; - } - - if (softheader->zoid_id == ZOID_ID) - receive_zoid_packet(cnproc, softheader); - else - NYI(); - - return handle_received_packet(cnproc, softheader); -} - - - - -pthread_mutex_t poll_mutex = PTHREAD_MUTEX_INITIALIZER; -extern int *sent_signals, *recv_signals; -pid_t ppid; - - -void bglco_loop() { - struct zoid_buffer *buffer; - BGLTreeStatusRegister status; - - syscall(268); - - ppid = getppid(); - - enter_critical_section(&tree_locks); - - while((volatile int)pending_exit_requests > 0) { - status = *(volatile BGLTreeStatusRegister*)(vc0 + BGL_MEM_TREE_STATUS0_OFFSET); - - if(status.recpktcnt > 0) { - if(receive_packet()) { - //printf("Got a complete msg, wake a worker thread...\n"); - //(*sent_signals)++; - int r = sigqueue(ppid, SIGRTMIN+1, (union sigval)0); - if(r) { - perror("sigqueue failed"); - exit(-1); - } - } - } - - status = *(volatile BGLTreeStatusRegister*)(vc0 + BGL_MEM_TREE_STATUS0_OFFSET); - - if(status.injpktcnt < 8) { - if(!currently_receiving_large_message && - ((volatile zoid_buf_pipe*)ack_queue)->first) { - buffer = dequeue_zoid_buf(ack_queue); - if(buffer) { - high_priority_send(buffer); - currently_receiving_large_message = 1; - } - } else if(((volatile zoid_buf_pipe*)high_priority_send_queue)->first) { - buffer = dequeue_zoid_buf(high_priority_send_queue); - if(buffer) - high_priority_send(buffer); - } else if(((volatile zoid_buf_pipe*)send_queue)->first) { - send_packet(); - } - } - - if (sent_kill_packet) { - printf("WE NEED TO STOP!!!!!!\n"); - break; - } - } - - leave_critical_section(&tree_locks); - - syscall(269); -} - - - -/* - * Sends a single packet to a compute node. - * WARNING: this function is stateful. It internally keeps track of - * the progress of sending the first message from send_queue. - * This function assumes that it won't be called simultaneously by multiple - * threads. Currently, the receive_queue_mutex in handle_messages() ensures - * of that. - */ -static void send_packet(void) { - static struct zoid_buffer* buffer = NULL; - /* The remaining static variables are only valid if buffer != NULL. */ - static BGLTreePacketHardHeader hardheader; - static char* result_current; - static int result_remaining; - static struct CioHeader* cioheader; - static int copying_userbuf; - char* data_buf; - - if (!buffer) { - - enter_critical_section(send_queue->locks); - buffer = ((volatile zoid_buf_pipe*)send_queue)->first; - leave_critical_section(send_queue->locks); - - if (!buffer) - return; - - if (buffer->softheader.zoid_id == ZOID_ID) { - cioheader = NULL; - BGLTreePacketHardHeader_InitP2P(&hardheader, PACKET_CLASS_CIO, - 0, cn_procs[buffer->softheader. - pset_cpu_rank].p2p_addr); - /* softheader->zoid_id is already filled in correctly. */ - buffer->softheader.pset_cpu_rank = - pset_rank_mapping_rev[buffer->softheader.pset_cpu_rank]; - buffer->softheader.flags = ZOID_SOFTHEADER_FIRST_PACKET; - buffer->softheader.msg_length = buffer->result_len; - buffer->softheader.errnum = buffer->errnum; - } else { - /* CIOD message. */ - cioheader = (struct CioHeader*)&buffer->softheader; - BGLTreePacketHardHeader_InitP2P(&hardheader, PACKET_CLASS_CIO, - 0, cioheader->_treeAddress); - cioheader->_packetIndex = 0; - } - - result_current = buffer->data; - result_remaining = (buffer->userbuf_out ? buffer->result_len : - buffer->total_len); - copying_userbuf = 0; - } - - /* Since we round up the buffers to full packet boundary when allocating, - we never have to copy to intermediate ones... */ - - /* Is this the last packet? */ - if (!cioheader && result_remaining <= TREE_DATA_SIZE && - (!buffer->userbuf_out || copying_userbuf)) { - buffer->softheader.flags |= ZOID_SOFTHEADER_LAST_PACKET; - } - -#if 0 - { - int i, *pkt = (int*)result_current; - fprintf(stderr, "Sending back ZOID packet to %d, flags %d, len %d, @ %p\n", - buffer->softheader.pset_cpu_rank, buffer->softheader.flags, - buffer->softheader.msg_length, buffer); - for (i = 0; i < 240 / 4; i++) - fprintf(stderr, "%08x%c", pkt[i], ((i + 1) % 8 && - i != 240 / 4 - 1 ? - ' ' : '\n')); - } -#endif - - if (((unsigned int)result_current & 0xf) || - (copying_userbuf && result_remaining < TREE_DATA_SIZE)) { - data_buf = packet_buffer->data; - memcpy(data_buf, result_current, result_remaining < TREE_DATA_SIZE ? - result_remaining : TREE_DATA_SIZE); - } else { - data_buf = result_current; - } - - BGLTreeFIFO_sendH(vc0 + BGL_MEM_TREE_HDRIN_OFFSET, - vc0 + BGL_MEM_TREE_DATAIN_OFFSET, - &hardheader, (BGLQuad*)&buffer->softheader, - (BGLQuad*)data_buf); - - result_remaining -= TREE_DATA_SIZE; - - if (!cioheader) - buffer->softheader.flags = 0; - else - cioheader->_packetIndex++; - - - if (result_remaining <= 0) { - if (buffer->userbuf_out && !copying_userbuf) { - /* If there is a userbuf output part, we end up here prematurely, - as soon as the zoid part of the buffer is sent. Adjust the - pointer and length and send the user buffer next. */ - - result_current = buffer->userbuf_out; - result_remaining += buffer->total_len - buffer->result_len; - copying_userbuf = 1; - } else { - dequeue_zoid_buf(send_queue); - - /* We are done with the current message. Clean up. */ - if (buffer->userbuf_out) { - enqueue_zoid_buf(recv_queue, buffer); - int r = sigqueue(ppid, SIGRTMIN+1, (union sigval)0); - if(r) { - perror("sigqueue failed"); - exit(-1); - } - } else if (buffer != packet_buffer) { - __zoid_release_buffer(buffer->data); - } - buffer = NULL; - } - } else { - result_current += TREE_DATA_SIZE; - } -} - - -/* - * Send a single-packet message that bypasses the normal send queue. - */ -static void high_priority_send(struct zoid_buffer *buffer) { - BGLTreePacketHardHeader hardheader; - - assert(buffer->softheader.zoid_id == ZOID_ID); - assert(!buffer->userbuf_out); - assert(buffer->total_len <= TREE_DATA_SIZE); - - BGLTreePacketHardHeader_InitP2P(&hardheader, PACKET_CLASS_CIO, - 0, cn_procs[buffer->softheader. - pset_cpu_rank].p2p_addr); - /* softheader->zoid_id is already filled in correctly. */ - buffer->softheader.pset_cpu_rank = - pset_rank_mapping_rev[buffer->softheader.pset_cpu_rank]; - buffer->softheader.flags = ZOID_SOFTHEADER_FIRST_PACKET | - ZOID_SOFTHEADER_LAST_PACKET | - (buffer->softheader.flags & ZOID_SOFTHEADER_ACK_PACKET); - buffer->softheader.msg_length = buffer->result_len; - buffer->softheader.errnum = buffer->errnum; - - BGLTreeFIFO_sendH(vc0 + BGL_MEM_TREE_HDRIN_OFFSET, - vc0 + BGL_MEM_TREE_DATAIN_OFFSET, - &hardheader, (BGLQuad*)&buffer->softheader, - (BGLQuad*) buffer->data); - - /* We are done with the current message. Clean up. */ - - if (buffer != packet_buffer) - __zoid_release_buffer(buffer->data); - else - NYI(); -} - - - -/* - * Enqueues a message in the send queue, to be sent back to a compute - * node later (or in another thread), in the handle_messages() function. - */ -static void -queue_message(struct zoid_buffer* buffer) -{ -#if 1 - if(buffer->softheader.zoid_id == ZOID_ID && !buffer->userbuf_out && - buffer->total_len <= TREE_DATA_SIZE) - enqueue_zoid_buf(high_priority_send_queue, buffer); - else -#endif - enqueue_zoid_buf(send_queue, buffer); -} - -/* - * Body of (multiple) worker threads. Takes a command from the head of the - * receive queue, processes it, and puts the reply at the tail of the send - * queue. - */ -void* -worker_thread_body(void* arg) -{ - struct thread_specific_data thread_data; - - /* Various callbacks from generated and user code can access this data. */ - if (pthread_setspecific(thread_specific_key, &thread_data)) - { - perror("setting thread-specific data"); - return NULL; - } - - for (;;) - { - struct zoid_buffer *cmd_buffer, *res_buffer; - char *result = NULL; - int result_len, total_len; - int release_cmd_buffer = 1; - - int sig; - pthread_mutex_lock(&poll_mutex); - sigwait(&sigusr1_set, &sig); - pthread_mutex_unlock(&poll_mutex); - - -/* int poll_cnt = 0; */ -/* while(!((volatile zoid_buf_pipe*)recv_queue)->first) { */ -/* if((poll_cnt += 1 << 24) == 0) */ -/* pthread_yield(); */ -/* } */ - - cmd_buffer = dequeue_zoid_buf(recv_queue); - - //printf("Launched worker thread! %d %p\n", sig, cmd_buffer); - - if(cmd_buffer == (struct zoid_buffer*)1) { -/* pthread_mutex_unlock(&poll_mutex); */ - printf("Stopping worker thread\n"); - return NULL; - } - - //assert(cmd_buffer); - if(!cmd_buffer) - continue; - -/* pthread_mutex_unlock(&poll_mutex); */ - - thread_data.userbuf = NULL; - - if(cmd_buffer->userbuf_out) { - thread_data.calling_process_id = - cmd_buffer->softheader.pset_cpu_rank; - - cmd_buffer->userbuf_out_cb(cmd_buffer->userbuf_out, - cmd_buffer->userbuf_out_priv); - cmd_buffer->userbuf_out = NULL; - - __zoid_release_buffer(cmd_buffer->data); - continue; - } - - if (!cmd_buffer->errnum) { - int command_id, header_id, function_id; - struct zoid_dispatch_entry* entry; - - if (cmd_buffer->softheader.zoid_id == ZOID_ID) - { - thread_data.calling_process_id = - cmd_buffer->softheader.pset_cpu_rank; - - /* The buffer always begins with a command id. */ - memcpy(&command_id, cmd_buffer->data, sizeof(command_id)); - - header_id = command_id >> 16; - function_id = command_id & 0xffff; - - for (entry = dispatch_entries; entry; entry = entry->next) - if (entry->header_id == header_id) - { - if (function_id >= 0 && - function_id < entry->array_size) - { - if (cmd_buffer->softheader.flags & - ZOID_SOFTHEADER_INPUT_USERBUF_PACKET) - { - /* We need to invoke a special allocate - callback, not (yet) the argument unpacking - stub. */ - int buffer_len; - struct CNProc* cnproc = &cn_procs[cmd_buffer-> - softheader.pset_cpu_rank]; - - memcpy(&buffer_len, cmd_buffer->data + - sizeof(command_id), sizeof(buffer_len)); - - cmd_buffer->userbuf_in = - entry->dispatch_array[function_id]. - userbuf_allocate_cb(buffer_len); - - if (!cmd_buffer->userbuf_in) - { - /* User failed to provide us with a buffer. - Instead of an ACK, we will return an - error message. */ - cmd_buffer->errnum = ENOMEM; - break; - } - - cmd_buffer->userbuf_in_len = buffer_len; - cnproc->buffer = cmd_buffer; - cnproc->current_buf = cmd_buffer->userbuf_in; - - cmd_buffer->ack_sent = 1; - - flush_zoid_buf(cmd_buffer); - - schedule_ack(cmd_buffer, - &cmd_buffer->softheader); - - /* We are not done with this command yet, - so don't release the buffer! */ - release_cmd_buffer = 0; - - break; - } - - if (cmd_buffer->userbuf_in) - { - char* (*function_ptr)(char* command_buffer, - int* result_len, - int* total_len, - char* user_buffer) = - entry->dispatch_array[function_id]. - function_ptr; - - result = function_ptr(((char*)cmd_buffer->data) - + sizeof(command_id), - &result_len, &total_len, - cmd_buffer->userbuf_in); - } - else - { - char* (*function_ptr)(char* command_buffer, - int* result_len, - int* total_len) = - entry->dispatch_array[function_id]. - function_ptr; - - result = function_ptr(((char*)cmd_buffer->data) - + sizeof(command_id), - &result_len, &total_len); - } - - if (!result) - { - /* An error here is most likely an - out-of-memory situation or a too large - reply buffer. */ - cmd_buffer->errnum = thread_data.errnum; - if (cmd_buffer->errnum == E2BIG) - cmd_buffer->excessive_size = - thread_data.last_excessive_size; - } - break; - } - fprintf(stderr, "Invalid function ID!\n"); - cmd_buffer->errnum = ENOSYS; - break; - } - if (!entry) - { - fprintf(stderr, "Server stub backend not loaded!\n"); - cmd_buffer->errnum = ENOSYS; - } - } - else - { - NYI(); - /* CIOD message. */ - struct CioHeader* cioheader = - (struct CioHeader*)&cmd_buffer->softheader; - int i; - - for (i = 0; i < pset_proc_count; i++) - if (cn_procs[i].p2p_addr == cioheader->_treeAddress && - cn_procs[i].cpu == cioheader->_cpu) - { - thread_data.calling_process_id = i; - break; - } - if (i == pset_proc_count && - cioheader->_messageCode == MFC_REQUESTRESET) - { - /* We can receive a REQUESTRESET message for an - unknown process, which is the second CPU of a - node running in CO node. We assign the message - to the primary CPU. */ - for (i = 0; i < pset_proc_count; i++) - if (cn_procs[i].p2p_addr == - cioheader->_treeAddress) - { - thread_data.calling_process_id = i; - break; - } - } - assert(i != pset_proc_count); - - result = handle_ciod_message(cioheader, cmd_buffer->data, - &result_len, i); - total_len = result_len; - } - } /* if (!cmd_buffer->errnum) */ - - /* An error here can be either inherited from the receiving thread - (most likely an out-of-memory condition) or caused by call - function (most likely too large result buffer needed). */ - if (cmd_buffer->errnum) - { - /* Currently, three different error values are possible: - ENOMEM -- out of memory when allocating either the - command or result buffer. - E2BIG -- the command sent was too large (shouldn't happen, - since client checks that as well) or the result - would have been too large. cnproc->excessive_size - contains the excessive size value. - ENOSYS -- the stub backend for the requested function is - not loaded. - We manufacture an error reply here and it gets sent by - the standard reply sending code below. */ - result = __zoid_allocate_buffer(TREE_PACKET_SIZE); - if (!result) - /* Out of memory. MT-unsafe! */ - result = packet_buffer->data; - if (cmd_buffer->errnum == E2BIG) - { - result_len = sizeof(int); - memcpy(result, &cmd_buffer->excessive_size, sizeof(int)); - } - else - result_len = 0; - total_len = result_len; - } - - /* There are currently two cases where result == NULL without there - being any error: - - CIOD command that requires no response, - - partially received input userbuf, so we invoked the allocation - callback, not the actual argument-unpacking one. */ - if (result) - { - res_buffer = get_zoid_buffer(result); - - res_buffer->result_len = result_len; - res_buffer->total_len = total_len; - - res_buffer->errnum = cmd_buffer->errnum; - res_buffer->excessive_size = cmd_buffer->excessive_size; - - if ((res_buffer->userbuf_out = thread_data.userbuf)) - { - res_buffer->userbuf_out_cb = thread_data.userbuf_cb; - res_buffer->userbuf_out_priv = thread_data.userbuf_priv; - } - - memcpy(&res_buffer->softheader, &cmd_buffer->softheader, - sizeof(res_buffer->softheader)); - - queue_message(res_buffer); - } - - if (cmd_buffer != packet_buffer && release_cmd_buffer) - __zoid_release_buffer(cmd_buffer->data); - - } /* for (;;) */ -} - -int unix_open(const char *pathname /* in:str */, - int flags /* in:obj */, - mode_t mode /* in:obj */) __attribute__((weak)); -int unix_close(int fd /* in:obj */) __attribute__((weak)); -ssize_t unix_write(int fd /* in:obj */, - const void *buf /* in:arr:size=+1:zerocopy */, - size_t count /* in:obj */) __attribute__((weak)); - -/* - * Processes the CIOD-specific messages. These are generated by the kernel, - * on application exit or crash. - */ -static char* -handle_ciod_message(struct CioHeader* cioheader, char* data, int* result_len, - int pset_cpu_rank) -{ - struct MTC_ReplyWriteCore reply; - char* result; - - switch (cioheader->_messageCode) - { - case MFC_REQUESTEXIT: - { - struct MFC_RequestExit* request = (struct MFC_RequestExit*)data; - fprintf(stderr, "ABNORMAL exit request from process %d, " - "code %d (%s)\n", cn_procs[pset_cpu_rank].pid, - request->s.status, - (request->s.reason == EXITED ? "exited" : "killed")); - - enter_critical_section(&pending_exit_locks); - - if (cn_procs[pset_cpu_rank].status == PROC_STATUS_RUNNING) - pending_exit_requests--; - cn_procs[pset_cpu_rank].status = PROC_STATUS_EXIT_ABNORMAL; - abnormal_msg_received = 1; - - leave_critical_section(&pending_exit_locks); -#if 0 - cioheader->_messageCode = MTC_REPLYEXIT; - *result_len = 0; - break; -#endif - return NULL; - } - - case MFC_REQUESTRESET: - fprintf(stderr, "ABNORMAL reset request from node %d cpu %d\n", - cioheader->_treeAddress, cioheader->_cpu); - - enter_critical_section(&pending_exit_locks); - - if (cn_procs[pset_cpu_rank].status == PROC_STATUS_RUNNING) - pending_exit_requests--; - cn_procs[pset_cpu_rank].status = PROC_STATUS_EXIT_ABNORMAL; - abnormal_msg_received = 1; - - leave_critical_section(&pending_exit_locks); -#if 0 - cioheader->_messageCode = MTC_REPLYRESET; - *result_len = 0; - break; -#endif - return NULL; - - case MFC_REQUESTWRITECORE: - { - struct MFC_RequestWriteCore* request = - (struct MFC_RequestWriteCore*)data; - char buffer[20]; - int fd; - - fprintf(stderr, "ABNORMAL core dump request from process %d\n", - cn_procs[pset_cpu_rank].pid); - - sprintf(buffer, "core.%d", cn_procs[pset_cpu_rank].pid); - - if (&unix_open) - fd = unix_open(buffer, O_CREAT | O_WRONLY | O_TRUNC, 0600); - else - { - if ((fd = open(buffer, O_CREAT | O_WRONLY | O_TRUNC, 0600)) < 0) - fd = -errno; - } - - if (fd < 0) - fprintf(stderr, "Error opening file %s: %s\n", buffer, - strerror(-fd)); - else - { - if (&unix_write) - unix_write(fd, data + request->s.offset, request->s.size); - else - write(fd, data + request->s.offset, request->s.size); - - if (&unix_close) - unix_close(fd); - else - close(fd); - } - - enter_critical_section(&pending_exit_locks); - - if (cn_procs[pset_cpu_rank].status == PROC_STATUS_RUNNING) - pending_exit_requests--; - cn_procs[pset_cpu_rank].status = PROC_STATUS_EXIT_ABNORMAL; - abnormal_msg_received = 1; - - leave_critical_section(&pending_exit_locks); - - if (fd >= 0) - { - reply.s.rc = 0; - reply.s.errnum = 0; - } - else - { - reply.s.rc = -1; - reply.s.errnum = EACCES; /* Who cares what it was anyway. */ - } - cioheader->_messageCode = MFC_REPLYWRITECORE; - *result_len = sizeof(reply); - break; - } - } - - cioheader->_packetIndex = 0; - cioheader->_packetTotal = 1; - - result = __zoid_allocate_buffer(*result_len); - if (!result) - /* Out of memory. MT-unsafe! */ - result = packet_buffer->data; - memcpy(result, &reply, *result_len); - - return result; -} - -/* - * Part of user API. Returns a (thread-local) pset-local process id. - */ -int -__zoid_calling_process_id(void) -{ - struct thread_specific_data* thread_data = - (struct thread_specific_data*)pthread_getspecific(thread_specific_key); - return thread_data->calling_process_id; -} - -/* - * Part of user API. Registers an output userbuf and a callback to - * be called when the buffer is no longer needed. - */ -void -__zoid_register_userbuf(void* userbuf, - void (*callback)(void* userbuf, void* priv), - void* priv) -{ - struct thread_specific_data* thread_data = - (struct thread_specific_data*) - pthread_getspecific(thread_specific_key); - - thread_data->userbuf = userbuf; - thread_data->userbuf_cb = callback; - thread_data->userbuf_priv = priv; -} - -/* - * Part of user API. Sends standard output/error message to the right place. - */ -int -__zoid_send_output(int pid, int fd, const char* buffer, int len) -{ - struct CiodOutputHeader header; - int ret; - - if (pthread_mutex_lock(&output_mutex)) - return -1; - - if (ciod_streams_socket == -1) - ret = write(fd, buffer, len); - else - { - header.fd = fd + 1; - header.cpu = cn_procs[pid].cpu; - header.node = cn_procs[pid].p2p_addr; - header.rank = cn_procs[pid].pid; - header.len = len; - - if (write(ciod_streams_socket, &header, sizeof(header)) != - sizeof(header)) - { - ret = -1; - } - else - ret = write(ciod_streams_socket, buffer, len); - } - - if (pthread_mutex_unlock(&output_mutex)) - return -1; - - return ret; -} - -void *ciod_thread_body(void* arg) { - int signum; - struct timeval tv; - double last_packet_received; - struct CioHeader cio; - int i; - - pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); - pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - - /* We only support one command at the moment: kill. */ - for (;;) - { - int cmd; - - if (read(ciod_control_socket, &cmd, sizeof(cmd)) != sizeof(cmd)) - perror("read"); - else if (cmd != 4) - fprintf(stderr, "Unknown command read: 0x%x\n", cmd); - else - { - if (read(ciod_control_socket, &signum, sizeof(signum)) != - sizeof(signum)) - { - perror("read"); - } - else - break; - } - } - - /* Disable thread cancellation before we try to acquire any mutexes. */ - pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); - - /* We set this flag even before we send anything, for one simple reason: - so that pthread_mutex_lock below can succeed. worker threads check - for this flag and release the receive_queue_mutex if it is set, so that - we can get it. */ - sent_kill_packet = 1; - - enter_critical_section(&tree_locks); - - /* We are going to kill the job. We do that by sending a special kill - packet that wakes up the CNK, which takes over and kills the processes. - There is one but: there might be some ZOID messages in the network, and - if CNK or CIOD see them, they might act weird. - There is no fool-proof solution to this problem. What we do is to - read all the messages that we can on this side. We throw these away; - we are killing the job anyway, so who cares. Hopefully this action - will quiesce the network (every sender waits for a reply before it can - continue), so that we can then wake up the CNK relatively safely. */ - - /* Suck all the packets from the network first. */ - gettimeofday(&tv, NULL); - last_packet_received = tv.tv_sec + tv.tv_usec * 1e-6; - for (;;) - { - BGLTreeStatusRegister status; - - status = *(volatile BGLTreeStatusRegister*) - (vc0 + BGL_MEM_TREE_STATUS0_OFFSET); - - if (status.recpktcnt == 0) - { - gettimeofday(&tv, NULL); - /* If we haven't received anything in five seconds, assume we - are done. */ - if (tv.tv_sec + tv.tv_usec * 1e-6 - last_packet_received > 5) - break; - } - else - { - while (status.recpktcnt--) - { - BGLTreePacketHardHeader hardheader; - BGLTreeFIFO_recv(vc0 + BGL_MEM_TREE_HDROUT_OFFSET, - vc0 + BGL_MEM_TREE_DATAOUT_OFFSET, - &hardheader, (BGLQuad*)packet_buffer->data); - } - gettimeofday(&tv, NULL); - last_packet_received = tv.tv_sec + tv.tv_usec * 1e-6; - } - } - - /* Now, kill the processes. */ - - cio._dataSize = 0; - cio._messageCode = MTC_KILL; - cio._packetTotal = 1; - cio._packetIndex = 0; - - ((struct MTC_Kill*)packet_buffer->data)->s.signum = signum; - - enter_critical_section(&pending_exit_locks); - - for (i = 0; i < pset_proc_count; i++) - if (cn_procs[i].status == PROC_STATUS_RUNNING) - { - BGLTreeStatusRegister status; - BGLTreePacketHardHeader hardheader; - - BGLTreePacketHardHeader_InitP2P(&hardheader, PACKET_CLASS_CIO, - 1, cn_procs[i].p2p_addr); - cio._cpu = cn_procs[i].cpu; - cio._rankInCnodes = cn_procs[i].pset_rank; - cio._treeAddress = cn_procs[i].p2p_addr; - - do - { - status = *(volatile BGLTreeStatusRegister*) - (vc0 + BGL_MEM_TREE_STATUS0_OFFSET); - } while (status.injpktcnt > 7); - - BGLTreeFIFO_sendH(vc0 + BGL_MEM_TREE_HDRIN_OFFSET, - vc0 + BGL_MEM_TREE_DATAIN_OFFSET, - &hardheader, (BGLQuad*)&cio, - (BGLQuad*)packet_buffer->data); - - pending_exit_requests--; - cn_procs[i].status = PROC_STATUS_EXIT; - } - - leave_critical_section(&pending_exit_locks); - - leave_critical_section(&tree_locks); - - return NULL; -} - -/* - * Called after a job has finished to clean up any allocated buffers, - * reset state variables, etc. - */ -void cleanup_traffic(void) -{ -#if 1 - NYI(); -#else - struct zoid_buffer* buf; - - while (send_queue_first) - { - buf = send_queue_first; - send_queue_first = send_queue_first->next; - __zoid_release_buffer(buf->data); - } - - while (high_priority_send_queue_first) - { - buf = high_priority_send_queue_first; - high_priority_send_queue_first = high_priority_send_queue_first->next; - __zoid_release_buffer(buf->data); - } - - while (receive_queue_first) - { - buf = *receive_queue_first; - *receive_queue_first = (*receive_queue_first)->next; - __zoid_release_buffer(buf->data); - } - - while (ack_queue_first) - { - buf = ack_queue_first; - ack_queue_first = ack_queue_first->next; - __zoid_release_buffer(buf->data); - } - - currently_receiving_large_message = 0; -#endif -} diff --git a/3rdparty/zoid/daemon.2cores/init.c b/3rdparty/zoid/daemon.2cores/init.c deleted file mode 120000 index 42671099da90234a8a563d0b594ae1adc62e396f..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/daemon.2cores/init.c +++ /dev/null @@ -1 +0,0 @@ -init.c.fastsema \ No newline at end of file diff --git a/3rdparty/zoid/daemon.2cores/init.c.fastsema b/3rdparty/zoid/daemon.2cores/init.c.fastsema deleted file mode 100644 index 30fbd9ddd8a21b0af9c8d2ea42dc2c758cf4aa74..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/daemon.2cores/init.c.fastsema +++ /dev/null @@ -1,1037 +0,0 @@ -/****************************************************************************/ -/* ZEPTOOS:zepto-info */ -/* This file is part of ZeptoOS: The Small Linux for Big Computers. - * See www.mcs.anl.gov/zeptoos for more information. - */ -/* ZEPTOOS:zepto-info */ -/* */ -/* ZEPTOOS:zepto-fillin */ -/* $Id: init.c,v 1.20 2007/06/27 15:10:45 iskra Exp $ - * ZeptoOS_Version: 1.2 - * ZeptoOS_Heredity: FOSS_ORIG - * ZeptoOS_License: GPL - */ -/* ZEPTOOS:zepto-fillin */ -/* */ -/* ZEPTOOS:zepto-gpl */ -/* Copyright: Argonne National Laboratory, Department of Energy, - * and UChicago Argonne, LLC. 2004, 2005, 2006, 2007 - * ZeptoOS License: GPL - * - * This software is free. See the file ZeptoOS/misc/license.GPL - * for complete details on your rights to copy, modify, and use this - * software. - */ -/* ZEPTOOS:zepto-gpl */ -/****************************************************************************/ - -#define _GNU_SOURCE /* For posix_memalign. */ -#include <assert.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include <dlfcn.h> -#include <errno.h> -#include <fcntl.h> -#include <grp.h> -#include <pthread.h> -#include <signal.h> -#include <sys/mman.h> -#include <sys/socket.h> -#include <sys/un.h> -#include <sys/wait.h> -#include <unistd.h> -#include <sys/types.h> -#include <sys/ipc.h> -#include <sys/sem.h> - -#include <bglmemmap.h> -#include <bglpersonality.h> - -#include "bgl.h" -#include "zoid.h" -#include "zoid_protocol.h" -#include "zoid_api.h" -#include "zoid_mapping.h" -#include "bglco.h" - - -/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*/ -void *bglco_thread_body(void *); - -void *sram; -void *lockbox; -BGL_Barrier *barrier; - -pid_t child_pid; -/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*/ - - - -/* Maximum size of a command or reply. In case of "userbuf" commands or - replies, it only applies to the non-userbuf portion. - - There are two limits, because we support two distinct buffer sizes. This - helps save memory, since we need a separate buffers for command and reply, - and normally at most one of them is "large". */ -int max_buffer_size_1 = 4096; -int max_buffer_size_2 = 4096*1024 + 1024; - -/* If the number of tree packets needed to send a command (each packet - carries a max. of 240 data bytes) exceeds ack_threshold, an acknowledgement - from the server will be required (after the first packet). - - This is set to 0, which disable acknowledgements for all but input userbuf - functions. Experiments have shown that the performance is best then. - - Setting it to a higher value (8 seems like a good choice, since that's the - hardware FIFO size on BG/L) enables the acknowledgements. This slows the - frequent-but-short commands down, but it does considerably improve the - fairness (CNs that are closer to the ION in the tree topology do not get - an unfair share of the bandwidth then). -*/ -static int ack_threshold = 0; - -BGLPersonality personality; - -int my_p2p_addr; - -void *vc0, *vc1; - -struct zoid_buffer* packet_buffer; -int total_proc_count; -int pset_size; -int vn_mode; -int pset_proc_count; -int pending_exit_requests; -int abnormal_msg_received; - -struct CNProc* cn_procs; - -int ciod_pid; - -struct zoid_dispatch_entry* dispatch_entries = NULL; - -pthread_key_t thread_specific_key; -static pthread_t* worker_threads; -static pthread_t ciod_thread; -pthread_mutex_t ack_queue_mutex; -pthread_mutex_t output_mutex; - -lock_pair pending_exit_locks; -lock_pair tree_locks; -int *sent_signals; -int *recv_signals; - -char mpi_mapping[5]; -/* Pset ranks may be non-contiguous, because they are always calculated in - the XYZ mapping, while the actual mapping could be different. We use - this array to make them contiguous, which is easier to deal with. */ -int pset_rank_mapping[128]; -/* On the wire we still use the XYZ mapping, so we also need to be able to - translate backwards. */ -int pset_rank_mapping_rev[128]; - -/* Socket connections to ciodb, taken from ciod. */ -int ciod_control_socket, ciod_streams_socket; -/* Identifies whether an interrupting kill packet has been sent by the ciod - thread. */ -int sent_kill_packet; - -/* - * Suspend CIOD, but first, if possible, get the control and stream sockets - * from it (this requires that our ciod_preload.so stub is loaded into it). - */ -static int -suspend_ciod(void) -{ - int fd; - char buf[20]; - int n; - int transfer_sock; - struct sockaddr_un addr; - - ciod_control_socket = ciod_streams_socket = -1; - sent_kill_packet = 0; - - if ((fd = open("/var/run/ciod.440.pid", O_RDONLY)) < 0) - { - /* No CIOD? How was this job started then?! */ - fprintf(stderr, "Failed to find a running CIOD!\n"); - return 1; - } - - if ((n = read(fd, buf, sizeof(buf))) < 1) - { - perror("read ciod pid"); - return 1; - } - - close(fd); - - buf[n - 1] = '\0'; - if ((ciod_pid = strtol(buf, NULL, 10)) < 1) - { - fprintf(stderr, "Invalid ciod pid\n"); - return 1; - } - - if (!(transfer_sock = socket(PF_UNIX, SOCK_STREAM, 0))) - { - perror("create unix domain socket"); - return 1; - } - addr.sun_family = AF_UNIX; - strcpy(addr.sun_path, "/var/tmp/zoid.socket"); - if (connect(transfer_sock, (struct sockaddr*)&addr, sizeof(addr))) - { - fprintf(stderr, "Modified CIOD not found. " - "Will print job's output below:\n\n"); - - if (kill(ciod_pid, SIGSTOP) < 0) - { - perror("suspend ciod"); - return 1; - } - } - else - { - struct iovec iov; - struct msghdr msg = {0}; - int fds[2]; - char msgbuf[CMSG_SPACE(sizeof(fds))]; - struct cmsghdr *cmsg; - char tmp; - - if (kill(ciod_pid, SIGUSR1) < 0) - { - perror("suspend ciod"); - return 1; - } - - iov.iov_base = &tmp; - iov.iov_len = 1; - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_control = msgbuf; - msg.msg_controllen = sizeof(msgbuf); - - do - { - n = recvmsg(transfer_sock, &msg, 0); - } while (n < 0 && errno == EAGAIN); - - if (n < 0) - { - perror("recvmsg"); - return 1; - } - - for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; - cmsg = CMSG_NXTHDR(&msg, cmsg)) - { - if (cmsg->cmsg_level == SOL_SOCKET && - cmsg->cmsg_type == SCM_RIGHTS) - { - memcpy(fds, CMSG_DATA(cmsg), sizeof(fds)); - ciod_control_socket = fds[0]; - ciod_streams_socket = fds[1]; - break; - } - } - } - - close(transfer_sock); - - return 0; -} - -int -obtain_ciod_credentials(void) -{ - FILE* status; - char buf[1024]; - int euid = -1, egid = -1; - gid_t groups[512]; /* Ids are space-separated, so there can be at most half - as many as the buffer size, less in practice. */ - int ngroups = 0; - char cwd[4097]; - - sprintf(buf, "/proc/%d", ciod_pid); - - if (chdir(buf) < 0) - { - perror("chdir ciod proc"); - return 1; - } - - /* Parse CIOD's /proc/pid/status to obtain uid/gid info. */ - if (!(status = fopen("status", "r"))) - { - perror("fopen status"); - return 1; - } - - while (fgets(buf, sizeof(buf), status)) - { - if (strncmp(buf, "Uid:", strlen("Uid:")) == 0) - { - if (sscanf(buf, "Uid: %*d %d", &euid) != 1) - { - fprintf(stderr, "Error parsing /proc/pid/status\n"); - return 1; - } - } - else if (strncmp(buf, "Gid:", strlen("Gid:")) == 0) - { - if (sscanf(buf, "Gid: %*d %d", &egid) != 1) - { - fprintf(stderr, "Error parsing /proc/pid/status\n"); - return 1; - } - } - else if (strncmp(buf, "Groups:", strlen("Groups:")) == 0) - { - char* bptr; - - if (buf[strlen(buf) - 1] != '\n') - { - /* We provide a 1K buffer. Apparently, it wasn't enough. - Too bad -- we do the easy thing and bail out. Obviously, - if this ever gets triggered, the buffer size can be - increased, or the code made to handle arbitrarily long - input lines. */ - fprintf(stderr, "Implementation limit reached\n"); - return 1; - } - - for (bptr = buf + strlen("Groups:"), ngroups = 0; *bptr;) - { - char* new_bptr; - - groups[ngroups] = strtol(bptr, &new_bptr, 10); - if (new_bptr == bptr) - break; - ngroups++; - bptr = new_bptr; - } - } - } - fclose(status); - - if (euid == -1 || egid == -1 || ngroups < 1) - { - fprintf(stderr, "Expected data not found in /proc/pid/status\n"); - return 1; - } - - /* Now get the initial current directory. */ - { - int n; - - if ((n = readlink("cwd", cwd, sizeof(cwd) - 1)) < 0) - { - perror("read ciod cwd"); - return 1; - } - cwd[n] = '\0'; - } - - /* We have all the data we need. Adjust our process to match CIOD. */ - if (chdir(cwd) < 0) - { - perror("initial chdir"); - return 1; - } - if (setgroups(ngroups, groups) < 0) - { - perror("setgroups"); - return 1; - } - if (setegid(egid) < 0) - { - perror("setegid"); - return 1; - } - if (seteuid(euid) < 0) - { - perror("seteuid"); - return 1; - } - - return 0; -} - -/* - * Called from the backend shared objects to register functions on the server - * side. - */ -void __zoid_register_functions(int header_id, - struct dispatch_array* dispatch_array, - int array_size, void (*init_func)(int), - void (*fini_func)(void)) -{ - struct zoid_dispatch_entry* entry; - - if (!(entry = malloc(sizeof(*entry)))) - { - fprintf(stderr, "Not enough memory!\n"); - return; - } - - entry->header_id = header_id; - entry->dispatch_array = dispatch_array; - entry->array_size = array_size; - entry->init_func = init_func; - entry->fini_func = fini_func; - - entry->next = dispatch_entries; - dispatch_entries = entry; -} - -/* - * Calculates the number of processes that are actually expected in the - * current pset. This is non-trivial because the psets need not be fully - * filled in, and the mapping can be influenced by the user. - * We also fill in two arrays to help with the mapping back and forth later. - */ -int -calculate_pset_size(void) -{ - int x_origin, y_origin, z_origin; - int x_size, y_size, z_size, t_size; - int t; - int pset_size, pset_rank; - - /* Get the dimensions of the pset. */ - x_size = BGLPersonality_xPsetSize(&personality); - y_size = BGLPersonality_yPsetSize(&personality); - z_size = BGLPersonality_zPsetSize(&personality); - t_size = /* vn_mode ? 2 : 1 */ 2; /* We assume vn_mode so that also the - t==1 entries are initialized. */ - - /* Get the coordinates of the first node of a pset. */ - x_origin = BGLPersonality_xPsetOrigin(&personality); - y_origin = BGLPersonality_yPsetOrigin(&personality); - z_origin = BGLPersonality_zPsetOrigin(&personality); - - /* Check which CPUs in the pset are occupied. - Do it in the XYZT order, which is the order used by pset_rank. */ - pset_size = 0; - pset_rank = 0; - for (t = 0; t < t_size; t++) - { - int z; - for (z = z_origin; z < z_origin + z_size; z++) - { - int y; - for (y = y_origin; y < y_origin + y_size; y++) - { - int x; - for (x = x_origin; x < x_origin + x_size; x++) - { - unsigned rank; - - if (__zoid_mapping_to_rank(x, y, z, t, &rank) == 0) - { - pset_rank_mapping[pset_rank] = pset_size; - pset_rank_mapping_rev[pset_size] = pset_rank; - pset_size++; - } - else - pset_rank_mapping[pset_rank] = -1; - - pset_rank++; - } - } - } - } - - return pset_size; -} - -#define STRINGIFY(x) #x -#define TOSTRING(x) STRINGIFY(x) - -static int -analyze_cmdline(int argc, char* argv[]) -{ - int c; - - if (argc == 2 && strcmp(argv[1], "--version") == 0) - { - fprintf(stderr, "zoid version 1.0" -#ifdef ZEPTO_VERSION - " (ZeptoOS version " TOSTRING(ZEPTO_VERSION) ")" -#endif - "\n"); - exit(0); - } - - while ((c = getopt(argc, argv, "a:b:m:")) != -1) - { - switch (c) - { - char* str; - - case 'a': - ack_threshold = strtol(optarg, &str, 10); - if (*str || ack_threshold < 0) - { - fprintf(stderr, - "zoid: invalid acknowledgement threshold!\n"); - return 1; - } - break; - - case 'b': - max_buffer_size_1 = strtol(optarg, &str, 10); - if ((*str && *str != ':') || - max_buffer_size_1 < TREE_DATA_SIZE) - { - fprintf(stderr, "zoid: invalid maximum buffer size!\n"); - return 1; - } - if (*str) - { - /* Skip ':'. */ - str++; - max_buffer_size_2 = strtol(str, &str, 10); - if (*str || max_buffer_size_2 < TREE_DATA_SIZE || - max_buffer_size_2 < max_buffer_size_1) - { - fprintf(stderr, "zoid: invalid maximum buffer size!\n"); - return 1; - } - } - else - max_buffer_size_2 = max_buffer_size_1; - break; - - case 'm': - for (str = strtok(optarg, ":"); str; str = strtok(NULL, ":")) - if (!dlopen(str, RTLD_LAZY | RTLD_GLOBAL)) - { - fprintf(stderr, "zoid: failed to open %s: %s\n", str, - dlerror()); - return 1; - } - break; - - default: - fprintf(stderr, - "Usage: %s [-a <ack_threshold>] [-b <buffer_size>]\n", - argv[0]); - return 1; - } - } - - return 0; -} - -int -main(int argc, char* argv[]) -{ - int fd_pers, fd0, fd1, fd_mem; - struct thread_specific_data thread_data = {-1}; - - if (analyze_cmdline(argc, argv)) - return 1; - - if ((fd_pers = open("/proc/personality", O_RDONLY)) < 0) - { - perror("open /proc/personality"); - fprintf(stderr, "Please run me on an I/O node!\n"); - return 1; - } - if (read(fd_pers, &personality, sizeof(personality)) != - sizeof(personality)) - { - perror("read personality"); - return 1; - } - close(fd_pers); - - my_p2p_addr = BGLPersonality_treeP2PAddr(&personality); - - /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*/ - fd_mem = open("/dev/mem", O_RDWR); - if(fd_mem < 0) { - perror("open /dev/mem"); - return -1; - } - - sram = mmap(0, BGL_MEM_SRAM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, - fd_mem, BGL_MEM_SRAM_PHYS); - if(sram == MAP_FAILED) { - perror("mmap failed"); - return -1; - } - - lockbox = mmap(0, BGL_MEM_LOCKBOX_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, - fd_mem, BGL_MEM_LOCKBOX_PHYS); - if(lockbox == MAP_FAILED) { - perror("mmap failed"); - return -1; - } - close(fd_mem); - - recv_queue = sram + 9*1024; - init_lock_pair(&recv_queue_locks); - init_zoid_buf_pipe(recv_queue, &recv_queue_locks); - - send_queue = sram + 9*1024 + sizeof(zoid_buf_pipe); - init_lock_pair(&send_queue_locks); - init_zoid_buf_pipe(send_queue, &send_queue_locks); - - high_priority_send_queue = sram + 9*1024 + 2*sizeof(zoid_buf_pipe); - init_lock_pair(&high_priority_send_queue_locks); - init_zoid_buf_pipe(high_priority_send_queue, &high_priority_send_queue_locks); - - ack_queue = sram + 9*1024 + 3*sizeof(zoid_buf_pipe); - init_lock_pair(&ack_queue_locks); - init_zoid_buf_pipe(ack_queue, &ack_queue_locks); - - barrier = lockbox + BARRIER_OFFSET(10); - - init_lock_pair(&pending_exit_locks); - init_lock_pair(&tree_locks); - -#if defined L1_CONSISTENCY_IN_SOFTWARE - memset(l1flusher, 0, sizeof(l1flusher)); -#endif - - /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*/ - - /* Open both virtual channels. */ - fd0 = open("/dev/tree0", O_RDWR); - if (fd0 < 0) - { - perror("open /dev/tree0"); - return 1; - } - fd1 = open("/dev/tree1", O_RDWR); - if (fd1 < 0) - { - perror("open /dev/tree1"); - return 1; - } - vc0 = mmap(NULL, 1023, PROT_READ|PROT_WRITE, MAP_SHARED, fd0, 0); - if (vc0 == MAP_FAILED) - { - perror("mmap vc0"); - return 1; - } - vc1 = mmap(NULL, 1023, PROT_READ|PROT_WRITE, MAP_SHARED, fd1, 0); - if (vc1 == MAP_FAILED) - { - perror("mmap vc1"); - return 1; - } - close(fd0); - close(fd1); - - max_buffer_size_1 = TREE_BUFFER_ROUNDUP(max_buffer_size_1); - max_buffer_size_2 = TREE_BUFFER_ROUNDUP(max_buffer_size_2); - - allocater_init(); - - { - void* packet_buffer_ptr = &packet_buffer; - if (posix_memalign(packet_buffer_ptr, 32, - sizeof(*packet_buffer) + TREE_PACKET_SIZE)) - { - perror("allocate aligned memory"); - return 1; - } - } - - if (pthread_key_create(&thread_specific_key, 0)) - { - perror("create thread key"); - return 1; - } - if (pthread_setspecific(thread_specific_key, &thread_data)) - { - perror("setting thread-specific data"); - return 1; - } - if (pthread_mutex_init(&ack_queue_mutex, NULL)) - { - perror("create acknowledgement queue mutex"); - return 1; - } - if (pthread_mutex_init(&output_mutex, NULL)) - { - perror("create output mutex"); - return 1; - } - - /* A single iteration of this loop handles a complete job, from - initialization to termination. */ - for (;;) - { - int init_recv, i; - BGLTreePacketHardHeader hardheader; - BGLTreeStatusRegister status; - struct InitMsgReply init_msg_rep; - - packet_buffer->userbuf_out = NULL; - packet_buffer->userbuf_in = NULL; - packet_buffer->ack_sent = 0; - - pset_size = BGLPersonality_numNodesInPset(&personality); - /* This is only a very rough estimate. There might be fewer processes - if the partition is not fully occupied, or more if we are in VN - mode. */ - pset_proc_count = pset_size; - - /* Receive init messages from all processes. As a side effect, obtain - the number of processes the job consists of. */ - for (init_recv = 0; init_recv < pset_proc_count; init_recv++) - { - int pset_cpu_rank; - struct InitMsg* init_msg; - struct timespec ts = {0, 10000000}; - - do - { - status = *(volatile BGLTreeStatusRegister*) - (vc1 + BGL_MEM_TREE_STATUS0_OFFSET); - - /* In case ZOID is invoked even for standard, CIOD jobs, - we want to minimize its impact. So to prevent busy-looping, - we sleep for 0.01 sec. */ - if (status.recpktcnt == 0) - nanosleep(&ts, NULL); - } while (status.recpktcnt == 0); - - BGLTreeFIFO_recv(vc1 + BGL_MEM_TREE_HDROUT_OFFSET, - vc1 + BGL_MEM_TREE_DATAOUT_OFFSET, - &hardheader, (BGLQuad*)packet_buffer->data); - - if (hardheader.p2p.pclass != PACKET_CLASS_CIO || - !hardheader.p2p.p2p) - { - fprintf(stderr, - "Unexpected message received during init phase!\n"); - return 1; - } - - init_msg = (struct InitMsg*)packet_buffer->data; - - /* If this is the first message, read the total number of compute - node processes. */ - if (!init_recv) - { - int i; - - total_proc_count = init_msg->total_proc; -#if 0 - fprintf(stderr, "Job consists of %d processes\n", - total_proc_count); -#endif - strcpy(mpi_mapping, init_msg->mapping); - vn_mode = init_msg->vn_mode; - - if (__zoid_mapping_init(mpi_mapping, total_proc_count, vn_mode, - &personality) == 0) - pset_proc_count = calculate_pset_size(); - else - abort(); -#if 0 - fprintf(stderr, "Expecting %d init msgs\n", pset_proc_count); -#endif - //cn_procs = zoid_alloc(pset_proc_count * sizeof(*cn_procs)); - cn_procs = (struct CNProc*)(sram + 10*1024); - assert(cn_procs); - - for (i = 0; i < pset_proc_count; i++) - { - cn_procs[i].buffer = NULL; - cn_procs[i].current_buf = NULL; - } - } - - if (init_msg->pset_cpu_rank >= pset_size * (vn_mode ? 2 : 1) || - pset_rank_mapping[init_msg->pset_cpu_rank] == -1) - { - fprintf(stderr, "Init message has invalid rank %d\n", - init_msg->pset_cpu_rank); - return 1; - } - - pset_cpu_rank = pset_rank_mapping[init_msg->pset_cpu_rank]; - cn_procs[pset_cpu_rank].pid = init_msg->pid; - cn_procs[pset_cpu_rank].p2p_addr = init_msg->p2p_addr; - cn_procs[pset_cpu_rank].pset_rank = init_msg->pset_rank; - cn_procs[pset_cpu_rank].cpu = init_msg->cpu; - cn_procs[pset_cpu_rank].status = PROC_STATUS_RUNNING; -#if 0 - fprintf(stderr, "Received init message from proc %d\n", - cn_procs[pset_cpu_rank].pid); -#endif - } /* for (init_recv) */ - pending_exit_requests = pset_proc_count; -#if 0 - fprintf(stderr, "Received all msgs, suspending CIOD...\n"); -#endif - - /* Ugly hack warning! - - Here's what seems to be happening: even if a job consists of just - one process, all compute nodes belonging to the partition where the - job will run are booted. Those nodes that don't have any processes - to run immediately send a REQUESTRESET message to CIOD. What can - happen is that nodes with a process on them are faster than those - without and send the ZOID init message first. In that case ZOID - suspends CIOD before CIOD receives the REQUESTRESET messages. - Those messages are then read by ZOID, which spits out warning - messages about invalid packets coming from unknown sources (jobs - still succeed, though). - - The sleep below is an attempt to avoid this race condition. It - gives CIOD extra time to receive the REQUESTRESET messages. It - actually seems to work, believe it or not. */ - sleep(2); - - if (suspend_ciod()) - return 1; - - /* Send an ACK. */ - - init_msg_rep.max_buffer_size = max_buffer_size_2; - init_msg_rep.ack_threshold = ack_threshold; - memcpy(packet_buffer->data, &init_msg_rep, sizeof(init_msg_rep)); - - BGLTreePacketHardHeader_InitGlobal(&hardheader, PACKET_CLASS_CIO, 0, - BGLTreeCombineOp_NONE, 0, 1); - do - { - status = *(volatile BGLTreeStatusRegister*) - (vc1 + BGL_MEM_TREE_STATUS0_OFFSET); - } while (status.injpktcnt > 7); - BGLTreeFIFO_send(vc1 + BGL_MEM_TREE_HDRIN_OFFSET, - vc1 + BGL_MEM_TREE_DATAIN_OFFSET, - &hardheader, (BGLQuad*)packet_buffer->data); - - /* At this point, CIOD is suspended, so we can take over its - responsibilities. First, though, we need to change user/group ID, - as well as current working directory (it looks like it initially - points to where the job was submitted from, which is convenient). */ - if (obtain_ciod_credentials()) - return 1; - - { - struct zoid_dispatch_entry* entry; - - for (entry = dispatch_entries; entry; entry = entry->next) - if (entry->init_func) - entry->init_func(pset_proc_count); - } - - int r = syscall(269, 0); /* bglco_fast_sem */ - if(r) { - perror("bglco_fast_sem failed"); - exit(-1); - } - - - worker_threads = malloc(pset_proc_count * sizeof(*worker_threads)); - assert(worker_threads); - - for (i = 0; i < pset_proc_count; i++) - if (pthread_create(&worker_threads[i], NULL, worker_thread_body, - NULL)) - { - perror("create worker thread"); - return 1; - } - - if (ciod_control_socket != -1) - if (pthread_create(&ciod_thread, NULL, ciod_thread_body, NULL)) - { - perror("create ciod thread"); - return 1; - } - - { - /* Can we do without this ??? */ - volatile char *ptr; - extern char __data_start, _end; - - for (ptr = &__data_start; ptr < &_end; ptr += 4096) - *ptr = *ptr; - } - -#if defined L1_CONSISTENCY_IN_SOFTWARE - flush_L1_all(); -#endif - - child_pid = fork(); - if(child_pid == 0) { - bglco_loop(); - return 0; - } else if(child_pid < 0) { - perror("fork failed"); - return 1; - } - - /* All the activities are performed by worker threads at this point. - We just wait for these threads to finish. */ - - waitpid(child_pid, NULL, 0); - - enter_critical_section(recv_queue->locks); - recv_queue->first = (struct zoid_buffer*)1; - leave_critical_section(recv_queue->locks); - - /* HACK */ - for(i = 0; i < pset_proc_count; i++) { - int r = syscall(269, 1); /* bglco_fast_sem */ - if(r) { - perror("bglco_fast_sem failed"); - exit(-1); - } - } - - for(i = 0; i < pset_proc_count; i++) - pthread_join(worker_threads[i], NULL); - - /* Job finished -- clean up. */ - - { - struct zoid_dispatch_entry* entry; - - for (entry = dispatch_entries; entry; entry = entry->next) - if (entry->fini_func) - entry->fini_func(); - } - - if (ciod_control_socket != -1) - { - if (!sent_kill_packet) - pthread_cancel(ciod_thread); - pthread_join(ciod_thread, NULL); - close(ciod_control_socket); - } - if (ciod_streams_socket != -1) - close(ciod_streams_socket); - - if (abnormal_msg_received) - { - fprintf(stderr, "BECAUSE OF ABNORMAL MESSAGES ABOVE, CIOD WILL BE " - "KILLED.\n"); - kill(ciod_pid, SIGCONT); - sleep(5); - kill(ciod_pid, SIGTERM); - return 1; - } - else - { -#if 0 - fprintf(stderr, "All processes terminated normally; " - "attempting to resume CIOD...\n"); -#endif - if (!sent_kill_packet) - { - /* First send an ACK to the processes, so that they can - terminate. */ - - BGLTreePacketHardHeader_InitGlobal(&hardheader, - PACKET_CLASS_CIO, - 0, BGLTreeCombineOp_NONE, - 0, 1); - /* We don't care what's in the packet_buffer. */ - do - { - status = *(volatile BGLTreeStatusRegister*) - (vc0 + BGL_MEM_TREE_STATUS0_OFFSET); - } while (status.injpktcnt > 7); - BGLTreeFIFO_send(vc0 + BGL_MEM_TREE_HDRIN_OFFSET, - vc0 + BGL_MEM_TREE_DATAIN_OFFSET, - &hardheader, (BGLQuad*)packet_buffer->data); - } - - kill(ciod_pid, SIGCONT); - } -#if 0 - { - extern int poll_hist[201]; - int i; - - fprintf(stderr, "\n\nPoll histogram:\n"); - for (i = 1; i < sizeof(poll_hist) / sizeof(poll_hist[0]); i++) - if (poll_hist[i]) - fprintf(stderr, "%d %d\n", i, poll_hist[i]); - } -#endif - - /* One of the things we don't free are zoid message buffers. I'm - simply worried the repeated freeing and allocation of these large - memory areas could result in memory fragmentation. */ - - cleanup_traffic(); - - free(worker_threads); - for (i = 0; i < pset_proc_count; i++) - if (cn_procs[i].buffer) - __zoid_release_buffer(cn_procs[i].buffer->data); - //free(cn_procs); - - if (seteuid(0) < 0) - { - perror("seteuid"); - return 1; - } - if (setegid(0) < 0) - { - perror("seteuid"); - return 1; - } - } /* for (;;) */ - - /* The code below is unreachable and is only provided for the sake of - completeness. */ - - free(packet_buffer); - - while (dispatch_entries) - { - struct zoid_dispatch_entry* entry = dispatch_entries; - dispatch_entries = dispatch_entries->next; - free(entry); - } - - return 0; -} - - - -void init_lock_pair(lock_pair *locks) { - static int next_lock = 0; - - if (pthread_mutex_init(&(locks->pt_mutex), NULL)) { - perror("lock_pair_init"); - exit(-1); - } - locks->hw_mutex = lockbox + MUTEX_OFFSET(next_lock++); - BGL_Mutex_Release(locks->hw_mutex); -} - - - -inline void enter_critical_section(lock_pair *locks) { - if(pthread_mutex_lock(&(locks->pt_mutex))) - assert(0); - BGL_Mutex_Acquire(locks->hw_mutex); -} - - - -inline void leave_critical_section(lock_pair *locks) { - BGL_Mutex_Release(locks->hw_mutex); - if(pthread_mutex_unlock(&(locks->pt_mutex))) - assert(0); -} diff --git a/3rdparty/zoid/daemon.2cores/init.c.polling b/3rdparty/zoid/daemon.2cores/init.c.polling deleted file mode 100644 index 3e75c0eccbda3118c25574788bfffde9d9484a1e..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/daemon.2cores/init.c.polling +++ /dev/null @@ -1,1025 +0,0 @@ -/****************************************************************************/ -/* ZEPTOOS:zepto-info */ -/* This file is part of ZeptoOS: The Small Linux for Big Computers. - * See www.mcs.anl.gov/zeptoos for more information. - */ -/* ZEPTOOS:zepto-info */ -/* */ -/* ZEPTOOS:zepto-fillin */ -/* $Id: init.c,v 1.20 2007/06/27 15:10:45 iskra Exp $ - * ZeptoOS_Version: 1.2 - * ZeptoOS_Heredity: FOSS_ORIG - * ZeptoOS_License: GPL - */ -/* ZEPTOOS:zepto-fillin */ -/* */ -/* ZEPTOOS:zepto-gpl */ -/* Copyright: Argonne National Laboratory, Department of Energy, - * and UChicago Argonne, LLC. 2004, 2005, 2006, 2007 - * ZeptoOS License: GPL - * - * This software is free. See the file ZeptoOS/misc/license.GPL - * for complete details on your rights to copy, modify, and use this - * software. - */ -/* ZEPTOOS:zepto-gpl */ -/****************************************************************************/ - -#define _GNU_SOURCE /* For posix_memalign. */ -#include <assert.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include <dlfcn.h> -#include <errno.h> -#include <fcntl.h> -#include <grp.h> -#include <pthread.h> -#include <signal.h> -#include <sys/mman.h> -#include <sys/socket.h> -#include <sys/un.h> -#include <sys/wait.h> -#include <unistd.h> - -#include <bglmemmap.h> -#include <bglpersonality.h> - -#include "bgl.h" -#include "zoid.h" -#include "zoid_protocol.h" -#include "zoid_api.h" -#include "zoid_mapping.h" -#include "bglco.h" - - -/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*/ -void *bglco_thread_body(void *); - -void *sram; -void *lockbox; -BGL_Barrier *barrier; - -pid_t child_pid; -/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*/ - - - -/* Maximum size of a command or reply. In case of "userbuf" commands or - replies, it only applies to the non-userbuf portion. - - There are two limits, because we support two distinct buffer sizes. This - helps save memory, since we need a separate buffers for command and reply, - and normally at most one of them is "large". */ -int max_buffer_size_1 = 4096; -int max_buffer_size_2 = 4096*1024 + 1024; - -/* If the number of tree packets needed to send a command (each packet - carries a max. of 240 data bytes) exceeds ack_threshold, an acknowledgement - from the server will be required (after the first packet). - - This is set to 0, which disable acknowledgements for all but input userbuf - functions. Experiments have shown that the performance is best then. - - Setting it to a higher value (8 seems like a good choice, since that's the - hardware FIFO size on BG/L) enables the acknowledgements. This slows the - frequent-but-short commands down, but it does considerably improve the - fairness (CNs that are closer to the ION in the tree topology do not get - an unfair share of the bandwidth then). -*/ -static int ack_threshold = 0; - -BGLPersonality personality; - -int my_p2p_addr; - -void *vc0, *vc1; - -struct zoid_buffer* packet_buffer; -int total_proc_count; -int pset_size; -int vn_mode; -int pset_proc_count; -int pending_exit_requests; -int abnormal_msg_received; - -struct CNProc* cn_procs; - -int ciod_pid; - -struct zoid_dispatch_entry* dispatch_entries = NULL; - -pthread_key_t thread_specific_key; -static pthread_t* worker_threads; -static pthread_t ciod_thread; -pthread_mutex_t ack_queue_mutex; -pthread_mutex_t output_mutex; - -lock_pair pending_exit_locks; -lock_pair tree_locks; -int *sent_signals; -int *recv_signals; - -char mpi_mapping[5]; -/* Pset ranks may be non-contiguous, because they are always calculated in - the XYZ mapping, while the actual mapping could be different. We use - this array to make them contiguous, which is easier to deal with. */ -int pset_rank_mapping[128]; -/* On the wire we still use the XYZ mapping, so we also need to be able to - translate backwards. */ -int pset_rank_mapping_rev[128]; - -/* Socket connections to ciodb, taken from ciod. */ -int ciod_control_socket, ciod_streams_socket; -/* Identifies whether an interrupting kill packet has been sent by the ciod - thread. */ -int sent_kill_packet; - -sigset_t sigusr1_set; - -char l1flusher[32*1024] __attribute__ ((aligned (32))); - -/* - * Suspend CIOD, but first, if possible, get the control and stream sockets - * from it (this requires that our ciod_preload.so stub is loaded into it). - */ -static int -suspend_ciod(void) -{ - int fd; - char buf[20]; - int n; - int transfer_sock; - struct sockaddr_un addr; - - ciod_control_socket = ciod_streams_socket = -1; - sent_kill_packet = 0; - - if ((fd = open("/var/run/ciod.440.pid", O_RDONLY)) < 0) - { - /* No CIOD? How was this job started then?! */ - fprintf(stderr, "Failed to find a running CIOD!\n"); - return 1; - } - - if ((n = read(fd, buf, sizeof(buf))) < 1) - { - perror("read ciod pid"); - return 1; - } - - close(fd); - - buf[n - 1] = '\0'; - if ((ciod_pid = strtol(buf, NULL, 10)) < 1) - { - fprintf(stderr, "Invalid ciod pid\n"); - return 1; - } - - if (!(transfer_sock = socket(PF_UNIX, SOCK_STREAM, 0))) - { - perror("create unix domain socket"); - return 1; - } - addr.sun_family = AF_UNIX; - strcpy(addr.sun_path, "/var/tmp/zoid.socket"); - if (connect(transfer_sock, (struct sockaddr*)&addr, sizeof(addr))) - { - fprintf(stderr, "Modified CIOD not found. " - "Will print job's output below:\n\n"); - - if (kill(ciod_pid, SIGSTOP) < 0) - { - perror("suspend ciod"); - return 1; - } - } - else - { - struct iovec iov; - struct msghdr msg = {0}; - int fds[2]; - char msgbuf[CMSG_SPACE(sizeof(fds))]; - struct cmsghdr *cmsg; - char tmp; - - if (kill(ciod_pid, SIGUSR1) < 0) - { - perror("suspend ciod"); - return 1; - } - - iov.iov_base = &tmp; - iov.iov_len = 1; - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_control = msgbuf; - msg.msg_controllen = sizeof(msgbuf); - - do - { - n = recvmsg(transfer_sock, &msg, 0); - } while (n < 0 && errno == EAGAIN); - - if (n < 0) - { - perror("recvmsg"); - return 1; - } - - for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; - cmsg = CMSG_NXTHDR(&msg, cmsg)) - { - if (cmsg->cmsg_level == SOL_SOCKET && - cmsg->cmsg_type == SCM_RIGHTS) - { - memcpy(fds, CMSG_DATA(cmsg), sizeof(fds)); - ciod_control_socket = fds[0]; - ciod_streams_socket = fds[1]; - break; - } - } - } - - close(transfer_sock); - - return 0; -} - -int -obtain_ciod_credentials(void) -{ - FILE* status; - char buf[1024]; - int euid = -1, egid = -1; - gid_t groups[512]; /* Ids are space-separated, so there can be at most half - as many as the buffer size, less in practice. */ - int ngroups = 0; - char cwd[4097]; - - sprintf(buf, "/proc/%d", ciod_pid); - - if (chdir(buf) < 0) - { - perror("chdir ciod proc"); - return 1; - } - - /* Parse CIOD's /proc/pid/status to obtain uid/gid info. */ - if (!(status = fopen("status", "r"))) - { - perror("fopen status"); - return 1; - } - - while (fgets(buf, sizeof(buf), status)) - { - if (strncmp(buf, "Uid:", strlen("Uid:")) == 0) - { - if (sscanf(buf, "Uid: %*d %d", &euid) != 1) - { - fprintf(stderr, "Error parsing /proc/pid/status\n"); - return 1; - } - } - else if (strncmp(buf, "Gid:", strlen("Gid:")) == 0) - { - if (sscanf(buf, "Gid: %*d %d", &egid) != 1) - { - fprintf(stderr, "Error parsing /proc/pid/status\n"); - return 1; - } - } - else if (strncmp(buf, "Groups:", strlen("Groups:")) == 0) - { - char* bptr; - - if (buf[strlen(buf) - 1] != '\n') - { - /* We provide a 1K buffer. Apparently, it wasn't enough. - Too bad -- we do the easy thing and bail out. Obviously, - if this ever gets triggered, the buffer size can be - increased, or the code made to handle arbitrarily long - input lines. */ - fprintf(stderr, "Implementation limit reached\n"); - return 1; - } - - for (bptr = buf + strlen("Groups:"), ngroups = 0; *bptr;) - { - char* new_bptr; - - groups[ngroups] = strtol(bptr, &new_bptr, 10); - if (new_bptr == bptr) - break; - ngroups++; - bptr = new_bptr; - } - } - } - fclose(status); - - if (euid == -1 || egid == -1 || ngroups < 1) - { - fprintf(stderr, "Expected data not found in /proc/pid/status\n"); - return 1; - } - - /* Now get the initial current directory. */ - { - int n; - - if ((n = readlink("cwd", cwd, sizeof(cwd) - 1)) < 0) - { - perror("read ciod cwd"); - return 1; - } - cwd[n] = '\0'; - } - - /* We have all the data we need. Adjust our process to match CIOD. */ - if (chdir(cwd) < 0) - { - perror("initial chdir"); - return 1; - } - if (setgroups(ngroups, groups) < 0) - { - perror("setgroups"); - return 1; - } - if (setegid(egid) < 0) - { - perror("setegid"); - return 1; - } - if (seteuid(euid) < 0) - { - perror("seteuid"); - return 1; - } - - return 0; -} - -/* - * Called from the backend shared objects to register functions on the server - * side. - */ -void __zoid_register_functions(int header_id, - struct dispatch_array* dispatch_array, - int array_size, void (*init_func)(int), - void (*fini_func)(void)) -{ - struct zoid_dispatch_entry* entry; - - if (!(entry = malloc(sizeof(*entry)))) - { - fprintf(stderr, "Not enough memory!\n"); - return; - } - - entry->header_id = header_id; - entry->dispatch_array = dispatch_array; - entry->array_size = array_size; - entry->init_func = init_func; - entry->fini_func = fini_func; - - entry->next = dispatch_entries; - dispatch_entries = entry; -} - -/* - * Calculates the number of processes that are actually expected in the - * current pset. This is non-trivial because the psets need not be fully - * filled in, and the mapping can be influenced by the user. - * We also fill in two arrays to help with the mapping back and forth later. - */ -int -calculate_pset_size(void) -{ - int x_origin, y_origin, z_origin; - int x_size, y_size, z_size, t_size; - int t; - int pset_size, pset_rank; - - /* Get the dimensions of the pset. */ - x_size = BGLPersonality_xPsetSize(&personality); - y_size = BGLPersonality_yPsetSize(&personality); - z_size = BGLPersonality_zPsetSize(&personality); - t_size = /* vn_mode ? 2 : 1 */ 2; /* We assume vn_mode so that also the - t==1 entries are initialized. */ - - /* Get the coordinates of the first node of a pset. */ - x_origin = BGLPersonality_xPsetOrigin(&personality); - y_origin = BGLPersonality_yPsetOrigin(&personality); - z_origin = BGLPersonality_zPsetOrigin(&personality); - - /* Check which CPUs in the pset are occupied. - Do it in the XYZT order, which is the order used by pset_rank. */ - pset_size = 0; - pset_rank = 0; - for (t = 0; t < t_size; t++) - { - int z; - for (z = z_origin; z < z_origin + z_size; z++) - { - int y; - for (y = y_origin; y < y_origin + y_size; y++) - { - int x; - for (x = x_origin; x < x_origin + x_size; x++) - { - unsigned rank; - - if (__zoid_mapping_to_rank(x, y, z, t, &rank) == 0) - { - pset_rank_mapping[pset_rank] = pset_size; - pset_rank_mapping_rev[pset_size] = pset_rank; - pset_size++; - } - else - pset_rank_mapping[pset_rank] = -1; - - pset_rank++; - } - } - } - } - - return pset_size; -} - -#define STRINGIFY(x) #x -#define TOSTRING(x) STRINGIFY(x) - -static int -analyze_cmdline(int argc, char* argv[]) -{ - int c; - - if (argc == 2 && strcmp(argv[1], "--version") == 0) - { - fprintf(stderr, "zoid version 1.0" -#ifdef ZEPTO_VERSION - " (ZeptoOS version " TOSTRING(ZEPTO_VERSION) ")" -#endif - "\n"); - exit(0); - } - - while ((c = getopt(argc, argv, "a:b:m:")) != -1) - { - switch (c) - { - char* str; - - case 'a': - ack_threshold = strtol(optarg, &str, 10); - if (*str || ack_threshold < 0) - { - fprintf(stderr, - "zoid: invalid acknowledgement threshold!\n"); - return 1; - } - break; - - case 'b': - max_buffer_size_1 = strtol(optarg, &str, 10); - if ((*str && *str != ':') || - max_buffer_size_1 < TREE_DATA_SIZE) - { - fprintf(stderr, "zoid: invalid maximum buffer size!\n"); - return 1; - } - if (*str) - { - /* Skip ':'. */ - str++; - max_buffer_size_2 = strtol(str, &str, 10); - if (*str || max_buffer_size_2 < TREE_DATA_SIZE || - max_buffer_size_2 < max_buffer_size_1) - { - fprintf(stderr, "zoid: invalid maximum buffer size!\n"); - return 1; - } - } - else - max_buffer_size_2 = max_buffer_size_1; - break; - - case 'm': - for (str = strtok(optarg, ":"); str; str = strtok(NULL, ":")) - if (!dlopen(str, RTLD_LAZY | RTLD_GLOBAL)) - { - fprintf(stderr, "zoid: failed to open %s: %s\n", str, - dlerror()); - return 1; - } - break; - - default: - fprintf(stderr, - "Usage: %s [-a <ack_threshold>] [-b <buffer_size>]\n", - argv[0]); - return 1; - } - } - - return 0; -} - -int -main(int argc, char* argv[]) -{ - int fd_pers, fd0, fd1, fd_mem; - struct thread_specific_data thread_data = {-1}; - - if (analyze_cmdline(argc, argv)) - return 1; - - if ((fd_pers = open("/proc/personality", O_RDONLY)) < 0) - { - perror("open /proc/personality"); - fprintf(stderr, "Please run me on an I/O node!\n"); - return 1; - } - if (read(fd_pers, &personality, sizeof(personality)) != - sizeof(personality)) - { - perror("read personality"); - return 1; - } - close(fd_pers); - - my_p2p_addr = BGLPersonality_treeP2PAddr(&personality); - - /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*/ - fd_mem = open("/dev/mem", O_RDWR); - if(fd_mem < 0) { - perror("open /dev/mem"); - return -1; - } - - sram = mmap(0, BGL_MEM_SRAM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, - fd_mem, BGL_MEM_SRAM_PHYS); - if(sram == MAP_FAILED) { - perror("mmap failed"); - return -1; - } - - lockbox = mmap(0, BGL_MEM_LOCKBOX_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, - fd_mem, BGL_MEM_LOCKBOX_PHYS); - if(lockbox == MAP_FAILED) { - perror("mmap failed"); - return -1; - } - close(fd_mem); - - recv_queue = sram + 9*1024; - init_lock_pair(&recv_queue_locks); - init_zoid_buf_pipe(recv_queue, &recv_queue_locks); - - send_queue = sram + 9*1024 + sizeof(zoid_buf_pipe); - init_lock_pair(&send_queue_locks); - init_zoid_buf_pipe(send_queue, &send_queue_locks); - - high_priority_send_queue = sram + 9*1024 + 2*sizeof(zoid_buf_pipe); - init_lock_pair(&high_priority_send_queue_locks); - init_zoid_buf_pipe(high_priority_send_queue, &high_priority_send_queue_locks); - - ack_queue = sram + 9*1024 + 3*sizeof(zoid_buf_pipe); - init_lock_pair(&ack_queue_locks); - init_zoid_buf_pipe(ack_queue, &ack_queue_locks); - - sent_signals = sram + 9*1024 + 3 * sizeof(zoid_buf_pipe); - recv_signals = sent_signals + 1; - *sent_signals = 0; - *recv_signals = 0; - - barrier = lockbox + BARRIER_OFFSET(10); - - init_lock_pair(&pending_exit_locks); - init_lock_pair(&tree_locks); - - memset(l1flusher, 0, sizeof(l1flusher)); - - /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*/ - - /* Open both virtual channels. */ - fd0 = open("/dev/tree0", O_RDWR); - if (fd0 < 0) - { - perror("open /dev/tree0"); - return 1; - } - fd1 = open("/dev/tree1", O_RDWR); - if (fd1 < 0) - { - perror("open /dev/tree1"); - return 1; - } - vc0 = mmap(NULL, 1023, PROT_READ|PROT_WRITE, MAP_SHARED, fd0, 0); - if (vc0 == MAP_FAILED) - { - perror("mmap vc0"); - return 1; - } - vc1 = mmap(NULL, 1023, PROT_READ|PROT_WRITE, MAP_SHARED, fd1, 0); - if (vc1 == MAP_FAILED) - { - perror("mmap vc1"); - return 1; - } - close(fd0); - close(fd1); - - max_buffer_size_1 = TREE_BUFFER_ROUNDUP(max_buffer_size_1); - max_buffer_size_2 = TREE_BUFFER_ROUNDUP(max_buffer_size_2); - - allocater_init(); - - { - void* packet_buffer_ptr = &packet_buffer; - if (posix_memalign(packet_buffer_ptr, 16, - sizeof(*packet_buffer) + TREE_PACKET_SIZE)) - { - perror("allocate aligned memory"); - return 1; - } - } - - if (pthread_key_create(&thread_specific_key, 0)) - { - perror("create thread key"); - return 1; - } - if (pthread_setspecific(thread_specific_key, &thread_data)) - { - perror("setting thread-specific data"); - return 1; - } - if (pthread_mutex_init(&ack_queue_mutex, NULL)) - { - perror("create acknowledgement queue mutex"); - return 1; - } - if (pthread_mutex_init(&output_mutex, NULL)) - { - perror("create output mutex"); - return 1; - } - - sigemptyset(&sigusr1_set); - sigaddset(&sigusr1_set, SIGRTMIN+1); - sigprocmask(SIG_BLOCK, &sigusr1_set, NULL); - - /* A single iteration of this loop handles a complete job, from - initialization to termination. */ - for (;;) - { - int init_recv, i; - BGLTreePacketHardHeader hardheader; - BGLTreeStatusRegister status; - struct InitMsgReply init_msg_rep; - - packet_buffer->userbuf_out = NULL; - packet_buffer->userbuf_in = NULL; - packet_buffer->ack_sent = 0; - - pset_size = BGLPersonality_numNodesInPset(&personality); - /* This is only a very rough estimate. There might be fewer processes - if the partition is not fully occupied, or more if we are in VN - mode. */ - pset_proc_count = pset_size; - - /* Receive init messages from all processes. As a side effect, obtain - the number of processes the job consists of. */ - for (init_recv = 0; init_recv < pset_proc_count; init_recv++) - { - int pset_cpu_rank; - struct InitMsg* init_msg; - struct timespec ts = {0, 10000000}; - - do - { - status = *(volatile BGLTreeStatusRegister*) - (vc1 + BGL_MEM_TREE_STATUS0_OFFSET); - - /* In case ZOID is invoked even for standard, CIOD jobs, - we want to minimize its impact. So to prevent busy-looping, - we sleep for 0.01 sec. */ - if (status.recpktcnt == 0) - nanosleep(&ts, NULL); - } while (status.recpktcnt == 0); - - BGLTreeFIFO_recv(vc1 + BGL_MEM_TREE_HDROUT_OFFSET, - vc1 + BGL_MEM_TREE_DATAOUT_OFFSET, - &hardheader, (BGLQuad*)packet_buffer->data); - - if (hardheader.p2p.pclass != PACKET_CLASS_CIO || - !hardheader.p2p.p2p) - { - fprintf(stderr, - "Unexpected message received during init phase!\n"); - return 1; - } - - init_msg = (struct InitMsg*)packet_buffer->data; - - /* If this is the first message, read the total number of compute - node processes. */ - if (!init_recv) - { - int i; - - total_proc_count = init_msg->total_proc; -#if 0 - fprintf(stderr, "Job consists of %d processes\n", - total_proc_count); -#endif - strcpy(mpi_mapping, init_msg->mapping); - vn_mode = init_msg->vn_mode; - - if (__zoid_mapping_init(mpi_mapping, total_proc_count, vn_mode, - &personality) == 0) - pset_proc_count = calculate_pset_size(); - else - abort(); -#if 0 - fprintf(stderr, "Expecting %d init msgs\n", pset_proc_count); -#endif - //cn_procs = zoid_alloc(pset_proc_count * sizeof(*cn_procs)); - cn_procs = (struct CNProc*)(sram + 10*1024); - assert(cn_procs); - - for (i = 0; i < pset_proc_count; i++) - { - cn_procs[i].buffer = NULL; - cn_procs[i].current_buf = NULL; - } - } - - if (init_msg->pset_cpu_rank >= pset_size * (vn_mode ? 2 : 1) || - pset_rank_mapping[init_msg->pset_cpu_rank] == -1) - { - fprintf(stderr, "Init message has invalid rank %d\n", - init_msg->pset_cpu_rank); - return 1; - } - - pset_cpu_rank = pset_rank_mapping[init_msg->pset_cpu_rank]; - cn_procs[pset_cpu_rank].pid = init_msg->pid; - cn_procs[pset_cpu_rank].p2p_addr = init_msg->p2p_addr; - cn_procs[pset_cpu_rank].pset_rank = init_msg->pset_rank; - cn_procs[pset_cpu_rank].cpu = init_msg->cpu; - cn_procs[pset_cpu_rank].status = PROC_STATUS_RUNNING; -#if 0 - fprintf(stderr, "Received init message from proc %d\n", - cn_procs[pset_cpu_rank].pid); -#endif - } /* for (init_recv) */ - pending_exit_requests = pset_proc_count; -#if 0 - fprintf(stderr, "Received all msgs, suspending CIOD...\n"); -#endif - - /* Ugly hack warning! - - Here's what seems to be happening: even if a job consists of just - one process, all compute nodes belonging to the partition where the - job will run are booted. Those nodes that don't have any processes - to run immediately send a REQUESTRESET message to CIOD. What can - happen is that nodes with a process on them are faster than those - without and send the ZOID init message first. In that case ZOID - suspends CIOD before CIOD receives the REQUESTRESET messages. - Those messages are then read by ZOID, which spits out warning - messages about invalid packets coming from unknown sources (jobs - still succeed, though). - - The sleep below is an attempt to avoid this race condition. It - gives CIOD extra time to receive the REQUESTRESET messages. It - actually seems to work, believe it or not. */ - sleep(2); - - if (suspend_ciod()) - return 1; - - /* Send an ACK. */ - - init_msg_rep.max_buffer_size = max_buffer_size_2; - init_msg_rep.ack_threshold = ack_threshold; - memcpy(packet_buffer->data, &init_msg_rep, sizeof(init_msg_rep)); - - BGLTreePacketHardHeader_InitGlobal(&hardheader, PACKET_CLASS_CIO, 0, - BGLTreeCombineOp_NONE, 0, 1); - do - { - status = *(volatile BGLTreeStatusRegister*) - (vc1 + BGL_MEM_TREE_STATUS0_OFFSET); - } while (status.injpktcnt > 7); - BGLTreeFIFO_send(vc1 + BGL_MEM_TREE_HDRIN_OFFSET, - vc1 + BGL_MEM_TREE_DATAIN_OFFSET, - &hardheader, (BGLQuad*)packet_buffer->data); - - /* At this point, CIOD is suspended, so we can take over its - responsibilities. First, though, we need to change user/group ID, - as well as current working directory (it looks like it initially - points to where the job was submitted from, which is convenient). */ - if (obtain_ciod_credentials()) - return 1; - - { - struct zoid_dispatch_entry* entry; - - for (entry = dispatch_entries; entry; entry = entry->next) - if (entry->init_func) - entry->init_func(pset_proc_count); - } - - worker_threads = malloc(pset_proc_count * sizeof(*worker_threads)); - assert(worker_threads); - - for (i = 0; i < pset_proc_count; i++) - if (pthread_create(&worker_threads[i], NULL, worker_thread_body, - NULL)) - { - perror("create worker thread"); - return 1; - } - - if (ciod_control_socket != -1) - if (pthread_create(&ciod_thread, NULL, ciod_thread_body, NULL)) - { - perror("create ciod thread"); - return 1; - } - - child_pid = fork(); - if(child_pid == 0) { - bglco_loop(); - return 0; - } else if(child_pid < 0) { - perror("fork failed"); - return 1; - } - - /* All the activities are performed by worker threads at this point. - We just wait for these threads to finish. */ - - waitpid(child_pid, NULL, 0); - - enter_critical_section(recv_queue->locks); - recv_queue->first = (struct zoid_buffer*)1; - leave_critical_section(recv_queue->locks); - - /* HACK */ -/* for(i = 0; i < pset_proc_count; i++) { */ -/* int r = sigqueue(getpid(), SIGRTMIN+1, (union sigval)0); */ -/* if(r) { */ -/* perror("sigqueue failed"); */ -/* exit(-1); */ -/* } */ -/* } */ - - for(i = 0; i < pset_proc_count; i++) - pthread_join(worker_threads[i], NULL); - - /* Job finished -- clean up. */ - - { - struct zoid_dispatch_entry* entry; - - for (entry = dispatch_entries; entry; entry = entry->next) - if (entry->fini_func) - entry->fini_func(); - } - - if (ciod_control_socket != -1) - { - if (!sent_kill_packet) - pthread_cancel(ciod_thread); - pthread_join(ciod_thread, NULL); - close(ciod_control_socket); - } - if (ciod_streams_socket != -1) - close(ciod_streams_socket); - - if (abnormal_msg_received) - { - fprintf(stderr, "BECAUSE OF ABNORMAL MESSAGES ABOVE, CIOD WILL BE " - "KILLED.\n"); - kill(ciod_pid, SIGCONT); - sleep(5); - kill(ciod_pid, SIGTERM); - return 1; - } - else - { -#if 0 - fprintf(stderr, "All processes terminated normally; " - "attempting to resume CIOD...\n"); -#endif - if (!sent_kill_packet) - { - /* First send an ACK to the processes, so that they can - terminate. */ - - BGLTreePacketHardHeader_InitGlobal(&hardheader, - PACKET_CLASS_CIO, - 0, BGLTreeCombineOp_NONE, - 0, 1); - /* We don't care what's in the packet_buffer. */ - do - { - status = *(volatile BGLTreeStatusRegister*) - (vc0 + BGL_MEM_TREE_STATUS0_OFFSET); - } while (status.injpktcnt > 7); - BGLTreeFIFO_send(vc0 + BGL_MEM_TREE_HDRIN_OFFSET, - vc0 + BGL_MEM_TREE_DATAIN_OFFSET, - &hardheader, (BGLQuad*)packet_buffer->data); - } - - kill(ciod_pid, SIGCONT); - } -#if 0 - { - extern int poll_hist[201]; - int i; - - fprintf(stderr, "\n\nPoll histogram:\n"); - for (i = 1; i < sizeof(poll_hist) / sizeof(poll_hist[0]); i++) - if (poll_hist[i]) - fprintf(stderr, "%d %d\n", i, poll_hist[i]); - } -#endif - - /* One of the things we don't free are zoid message buffers. I'm - simply worried the repeated freeing and allocation of these large - memory areas could result in memory fragmentation. */ - - cleanup_traffic(); - - free(worker_threads); - for (i = 0; i < pset_proc_count; i++) - if (cn_procs[i].buffer) - __zoid_release_buffer(cn_procs[i].buffer->data); - //free(cn_procs); - - if (seteuid(0) < 0) - { - perror("seteuid"); - return 1; - } - if (setegid(0) < 0) - { - perror("seteuid"); - return 1; - } - } /* for (;;) */ - - /* The code below is unreachable and is only provided for the sake of - completeness. */ - - free(packet_buffer); - - while (dispatch_entries) - { - struct zoid_dispatch_entry* entry = dispatch_entries; - dispatch_entries = dispatch_entries->next; - free(entry); - } - - return 0; -} - - - -void init_lock_pair(lock_pair *locks) { - static int next_lock = 0; - - if (pthread_mutex_init(&(locks->pt_mutex), NULL)) { - perror("lock_pair_init"); - exit(-1); - } - locks->hw_mutex = lockbox + MUTEX_OFFSET(next_lock++); - BGL_Mutex_Release(locks->hw_mutex); -} - - - -inline void enter_critical_section(lock_pair *locks) { - if(pthread_mutex_lock(&(locks->pt_mutex))) - assert(0); - BGL_Mutex_Acquire(locks->hw_mutex); -} - - - -inline void leave_critical_section(lock_pair *locks) { - BGL_Mutex_Release(locks->hw_mutex); - if(pthread_mutex_unlock(&(locks->pt_mutex))) - assert(0); -} diff --git a/3rdparty/zoid/daemon.2cores/init.c.sema b/3rdparty/zoid/daemon.2cores/init.c.sema deleted file mode 100644 index 570ac6974b306666b37e504013411383b526b617..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/daemon.2cores/init.c.sema +++ /dev/null @@ -1,1027 +0,0 @@ -/****************************************************************************/ -/* ZEPTOOS:zepto-info */ -/* This file is part of ZeptoOS: The Small Linux for Big Computers. - * See www.mcs.anl.gov/zeptoos for more information. - */ -/* ZEPTOOS:zepto-info */ -/* */ -/* ZEPTOOS:zepto-fillin */ -/* $Id: init.c,v 1.20 2007/06/27 15:10:45 iskra Exp $ - * ZeptoOS_Version: 1.2 - * ZeptoOS_Heredity: FOSS_ORIG - * ZeptoOS_License: GPL - */ -/* ZEPTOOS:zepto-fillin */ -/* */ -/* ZEPTOOS:zepto-gpl */ -/* Copyright: Argonne National Laboratory, Department of Energy, - * and UChicago Argonne, LLC. 2004, 2005, 2006, 2007 - * ZeptoOS License: GPL - * - * This software is free. See the file ZeptoOS/misc/license.GPL - * for complete details on your rights to copy, modify, and use this - * software. - */ -/* ZEPTOOS:zepto-gpl */ -/****************************************************************************/ - -#define _GNU_SOURCE /* For posix_memalign. */ -#include <assert.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include <dlfcn.h> -#include <errno.h> -#include <fcntl.h> -#include <grp.h> -#include <pthread.h> -#include <signal.h> -#include <sys/mman.h> -#include <sys/socket.h> -#include <sys/un.h> -#include <sys/wait.h> -#include <unistd.h> -#include <sys/types.h> -#include <sys/ipc.h> -#include <sys/sem.h> - -#include <bglmemmap.h> -#include <bglpersonality.h> - -#include "bgl.h" -#include "zoid.h" -#include "zoid_protocol.h" -#include "zoid_api.h" -#include "zoid_mapping.h" -#include "bglco.h" - - -/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*/ -void *bglco_thread_body(void *); - -void *sram; -void *lockbox; -BGL_Barrier *barrier; - -pid_t child_pid; -/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*/ - - - -/* Maximum size of a command or reply. In case of "userbuf" commands or - replies, it only applies to the non-userbuf portion. - - There are two limits, because we support two distinct buffer sizes. This - helps save memory, since we need a separate buffers for command and reply, - and normally at most one of them is "large". */ -int max_buffer_size_1 = 4096; -int max_buffer_size_2 = 4096*1024 + 1024; - -/* If the number of tree packets needed to send a command (each packet - carries a max. of 240 data bytes) exceeds ack_threshold, an acknowledgement - from the server will be required (after the first packet). - - This is set to 0, which disable acknowledgements for all but input userbuf - functions. Experiments have shown that the performance is best then. - - Setting it to a higher value (8 seems like a good choice, since that's the - hardware FIFO size on BG/L) enables the acknowledgements. This slows the - frequent-but-short commands down, but it does considerably improve the - fairness (CNs that are closer to the ION in the tree topology do not get - an unfair share of the bandwidth then). -*/ -static int ack_threshold = 0; - -BGLPersonality personality; - -int my_p2p_addr; - -void *vc0, *vc1; - -struct zoid_buffer* packet_buffer; -int total_proc_count; -int pset_size; -int vn_mode; -int pset_proc_count; -int pending_exit_requests; -int abnormal_msg_received; - -struct CNProc* cn_procs; - -int ciod_pid; - -struct zoid_dispatch_entry* dispatch_entries = NULL; - -pthread_key_t thread_specific_key; -static pthread_t* worker_threads; -static pthread_t ciod_thread; -pthread_mutex_t ack_queue_mutex; -pthread_mutex_t output_mutex; - -lock_pair pending_exit_locks; -lock_pair tree_locks; -int *sent_signals; -int *recv_signals; - -char mpi_mapping[5]; -/* Pset ranks may be non-contiguous, because they are always calculated in - the XYZ mapping, while the actual mapping could be different. We use - this array to make them contiguous, which is easier to deal with. */ -int pset_rank_mapping[128]; -/* On the wire we still use the XYZ mapping, so we also need to be able to - translate backwards. */ -int pset_rank_mapping_rev[128]; - -/* Socket connections to ciodb, taken from ciod. */ -int ciod_control_socket, ciod_streams_socket; -/* Identifies whether an interrupting kill packet has been sent by the ciod - thread. */ -int sent_kill_packet; - -char l1flusher[32*1024] __attribute__ ((aligned (32))); - -int recv_q_sem; -struct sembuf up = {0, 1, 0}; -struct sembuf down = {0, -1, 0}; - -/* - * Suspend CIOD, but first, if possible, get the control and stream sockets - * from it (this requires that our ciod_preload.so stub is loaded into it). - */ -static int -suspend_ciod(void) -{ - int fd; - char buf[20]; - int n; - int transfer_sock; - struct sockaddr_un addr; - - ciod_control_socket = ciod_streams_socket = -1; - sent_kill_packet = 0; - - if ((fd = open("/var/run/ciod.440.pid", O_RDONLY)) < 0) - { - /* No CIOD? How was this job started then?! */ - fprintf(stderr, "Failed to find a running CIOD!\n"); - return 1; - } - - if ((n = read(fd, buf, sizeof(buf))) < 1) - { - perror("read ciod pid"); - return 1; - } - - close(fd); - - buf[n - 1] = '\0'; - if ((ciod_pid = strtol(buf, NULL, 10)) < 1) - { - fprintf(stderr, "Invalid ciod pid\n"); - return 1; - } - - if (!(transfer_sock = socket(PF_UNIX, SOCK_STREAM, 0))) - { - perror("create unix domain socket"); - return 1; - } - addr.sun_family = AF_UNIX; - strcpy(addr.sun_path, "/var/tmp/zoid.socket"); - if (connect(transfer_sock, (struct sockaddr*)&addr, sizeof(addr))) - { - fprintf(stderr, "Modified CIOD not found. " - "Will print job's output below:\n\n"); - - if (kill(ciod_pid, SIGSTOP) < 0) - { - perror("suspend ciod"); - return 1; - } - } - else - { - struct iovec iov; - struct msghdr msg = {0}; - int fds[2]; - char msgbuf[CMSG_SPACE(sizeof(fds))]; - struct cmsghdr *cmsg; - char tmp; - - if (kill(ciod_pid, SIGUSR1) < 0) - { - perror("suspend ciod"); - return 1; - } - - iov.iov_base = &tmp; - iov.iov_len = 1; - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_control = msgbuf; - msg.msg_controllen = sizeof(msgbuf); - - do - { - n = recvmsg(transfer_sock, &msg, 0); - } while (n < 0 && errno == EAGAIN); - - if (n < 0) - { - perror("recvmsg"); - return 1; - } - - for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; - cmsg = CMSG_NXTHDR(&msg, cmsg)) - { - if (cmsg->cmsg_level == SOL_SOCKET && - cmsg->cmsg_type == SCM_RIGHTS) - { - memcpy(fds, CMSG_DATA(cmsg), sizeof(fds)); - ciod_control_socket = fds[0]; - ciod_streams_socket = fds[1]; - break; - } - } - } - - close(transfer_sock); - - return 0; -} - -int -obtain_ciod_credentials(void) -{ - FILE* status; - char buf[1024]; - int euid = -1, egid = -1; - gid_t groups[512]; /* Ids are space-separated, so there can be at most half - as many as the buffer size, less in practice. */ - int ngroups = 0; - char cwd[4097]; - - sprintf(buf, "/proc/%d", ciod_pid); - - if (chdir(buf) < 0) - { - perror("chdir ciod proc"); - return 1; - } - - /* Parse CIOD's /proc/pid/status to obtain uid/gid info. */ - if (!(status = fopen("status", "r"))) - { - perror("fopen status"); - return 1; - } - - while (fgets(buf, sizeof(buf), status)) - { - if (strncmp(buf, "Uid:", strlen("Uid:")) == 0) - { - if (sscanf(buf, "Uid: %*d %d", &euid) != 1) - { - fprintf(stderr, "Error parsing /proc/pid/status\n"); - return 1; - } - } - else if (strncmp(buf, "Gid:", strlen("Gid:")) == 0) - { - if (sscanf(buf, "Gid: %*d %d", &egid) != 1) - { - fprintf(stderr, "Error parsing /proc/pid/status\n"); - return 1; - } - } - else if (strncmp(buf, "Groups:", strlen("Groups:")) == 0) - { - char* bptr; - - if (buf[strlen(buf) - 1] != '\n') - { - /* We provide a 1K buffer. Apparently, it wasn't enough. - Too bad -- we do the easy thing and bail out. Obviously, - if this ever gets triggered, the buffer size can be - increased, or the code made to handle arbitrarily long - input lines. */ - fprintf(stderr, "Implementation limit reached\n"); - return 1; - } - - for (bptr = buf + strlen("Groups:"), ngroups = 0; *bptr;) - { - char* new_bptr; - - groups[ngroups] = strtol(bptr, &new_bptr, 10); - if (new_bptr == bptr) - break; - ngroups++; - bptr = new_bptr; - } - } - } - fclose(status); - - if (euid == -1 || egid == -1 || ngroups < 1) - { - fprintf(stderr, "Expected data not found in /proc/pid/status\n"); - return 1; - } - - /* Now get the initial current directory. */ - { - int n; - - if ((n = readlink("cwd", cwd, sizeof(cwd) - 1)) < 0) - { - perror("read ciod cwd"); - return 1; - } - cwd[n] = '\0'; - } - - /* We have all the data we need. Adjust our process to match CIOD. */ - if (chdir(cwd) < 0) - { - perror("initial chdir"); - return 1; - } - if (setgroups(ngroups, groups) < 0) - { - perror("setgroups"); - return 1; - } - if (setegid(egid) < 0) - { - perror("setegid"); - return 1; - } - if (seteuid(euid) < 0) - { - perror("seteuid"); - return 1; - } - - return 0; -} - -/* - * Called from the backend shared objects to register functions on the server - * side. - */ -void __zoid_register_functions(int header_id, - struct dispatch_array* dispatch_array, - int array_size, void (*init_func)(int), - void (*fini_func)(void)) -{ - struct zoid_dispatch_entry* entry; - - if (!(entry = malloc(sizeof(*entry)))) - { - fprintf(stderr, "Not enough memory!\n"); - return; - } - - entry->header_id = header_id; - entry->dispatch_array = dispatch_array; - entry->array_size = array_size; - entry->init_func = init_func; - entry->fini_func = fini_func; - - entry->next = dispatch_entries; - dispatch_entries = entry; -} - -/* - * Calculates the number of processes that are actually expected in the - * current pset. This is non-trivial because the psets need not be fully - * filled in, and the mapping can be influenced by the user. - * We also fill in two arrays to help with the mapping back and forth later. - */ -int -calculate_pset_size(void) -{ - int x_origin, y_origin, z_origin; - int x_size, y_size, z_size, t_size; - int t; - int pset_size, pset_rank; - - /* Get the dimensions of the pset. */ - x_size = BGLPersonality_xPsetSize(&personality); - y_size = BGLPersonality_yPsetSize(&personality); - z_size = BGLPersonality_zPsetSize(&personality); - t_size = /* vn_mode ? 2 : 1 */ 2; /* We assume vn_mode so that also the - t==1 entries are initialized. */ - - /* Get the coordinates of the first node of a pset. */ - x_origin = BGLPersonality_xPsetOrigin(&personality); - y_origin = BGLPersonality_yPsetOrigin(&personality); - z_origin = BGLPersonality_zPsetOrigin(&personality); - - /* Check which CPUs in the pset are occupied. - Do it in the XYZT order, which is the order used by pset_rank. */ - pset_size = 0; - pset_rank = 0; - for (t = 0; t < t_size; t++) - { - int z; - for (z = z_origin; z < z_origin + z_size; z++) - { - int y; - for (y = y_origin; y < y_origin + y_size; y++) - { - int x; - for (x = x_origin; x < x_origin + x_size; x++) - { - unsigned rank; - - if (__zoid_mapping_to_rank(x, y, z, t, &rank) == 0) - { - pset_rank_mapping[pset_rank] = pset_size; - pset_rank_mapping_rev[pset_size] = pset_rank; - pset_size++; - } - else - pset_rank_mapping[pset_rank] = -1; - - pset_rank++; - } - } - } - } - - return pset_size; -} - -#define STRINGIFY(x) #x -#define TOSTRING(x) STRINGIFY(x) - -static int -analyze_cmdline(int argc, char* argv[]) -{ - int c; - - if (argc == 2 && strcmp(argv[1], "--version") == 0) - { - fprintf(stderr, "zoid version 1.0" -#ifdef ZEPTO_VERSION - " (ZeptoOS version " TOSTRING(ZEPTO_VERSION) ")" -#endif - "\n"); - exit(0); - } - - while ((c = getopt(argc, argv, "a:b:m:")) != -1) - { - switch (c) - { - char* str; - - case 'a': - ack_threshold = strtol(optarg, &str, 10); - if (*str || ack_threshold < 0) - { - fprintf(stderr, - "zoid: invalid acknowledgement threshold!\n"); - return 1; - } - break; - - case 'b': - max_buffer_size_1 = strtol(optarg, &str, 10); - if ((*str && *str != ':') || - max_buffer_size_1 < TREE_DATA_SIZE) - { - fprintf(stderr, "zoid: invalid maximum buffer size!\n"); - return 1; - } - if (*str) - { - /* Skip ':'. */ - str++; - max_buffer_size_2 = strtol(str, &str, 10); - if (*str || max_buffer_size_2 < TREE_DATA_SIZE || - max_buffer_size_2 < max_buffer_size_1) - { - fprintf(stderr, "zoid: invalid maximum buffer size!\n"); - return 1; - } - } - else - max_buffer_size_2 = max_buffer_size_1; - break; - - case 'm': - for (str = strtok(optarg, ":"); str; str = strtok(NULL, ":")) - if (!dlopen(str, RTLD_LAZY | RTLD_GLOBAL)) - { - fprintf(stderr, "zoid: failed to open %s: %s\n", str, - dlerror()); - return 1; - } - break; - - default: - fprintf(stderr, - "Usage: %s [-a <ack_threshold>] [-b <buffer_size>]\n", - argv[0]); - return 1; - } - } - - return 0; -} - -int -main(int argc, char* argv[]) -{ - int fd_pers, fd0, fd1, fd_mem; - struct thread_specific_data thread_data = {-1}; - - if (analyze_cmdline(argc, argv)) - return 1; - - if ((fd_pers = open("/proc/personality", O_RDONLY)) < 0) - { - perror("open /proc/personality"); - fprintf(stderr, "Please run me on an I/O node!\n"); - return 1; - } - if (read(fd_pers, &personality, sizeof(personality)) != - sizeof(personality)) - { - perror("read personality"); - return 1; - } - close(fd_pers); - - my_p2p_addr = BGLPersonality_treeP2PAddr(&personality); - - /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*/ - fd_mem = open("/dev/mem", O_RDWR); - if(fd_mem < 0) { - perror("open /dev/mem"); - return -1; - } - - sram = mmap(0, BGL_MEM_SRAM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, - fd_mem, BGL_MEM_SRAM_PHYS); - if(sram == MAP_FAILED) { - perror("mmap failed"); - return -1; - } - - lockbox = mmap(0, BGL_MEM_LOCKBOX_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, - fd_mem, BGL_MEM_LOCKBOX_PHYS); - if(lockbox == MAP_FAILED) { - perror("mmap failed"); - return -1; - } - close(fd_mem); - - recv_queue = sram + 9*1024; - init_lock_pair(&recv_queue_locks); - init_zoid_buf_pipe(recv_queue, &recv_queue_locks); - - send_queue = sram + 9*1024 + sizeof(zoid_buf_pipe); - init_lock_pair(&send_queue_locks); - init_zoid_buf_pipe(send_queue, &send_queue_locks); - - high_priority_send_queue = sram + 9*1024 + 2*sizeof(zoid_buf_pipe); - init_lock_pair(&high_priority_send_queue_locks); - init_zoid_buf_pipe(high_priority_send_queue, &high_priority_send_queue_locks); - - ack_queue = sram + 9*1024 + 3*sizeof(zoid_buf_pipe); - init_lock_pair(&ack_queue_locks); - init_zoid_buf_pipe(ack_queue, &ack_queue_locks); - - barrier = lockbox + BARRIER_OFFSET(10); - - init_lock_pair(&pending_exit_locks); - init_lock_pair(&tree_locks); - - memset(l1flusher, 0, sizeof(l1flusher)); - - recv_q_sem = semget(IPC_PRIVATE, 1, 0666); - if(recv_q_sem < 0) { - perror("semget failed"); - return 1; - } - - /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*/ - - /* Open both virtual channels. */ - fd0 = open("/dev/tree0", O_RDWR); - if (fd0 < 0) - { - perror("open /dev/tree0"); - return 1; - } - fd1 = open("/dev/tree1", O_RDWR); - if (fd1 < 0) - { - perror("open /dev/tree1"); - return 1; - } - vc0 = mmap(NULL, 1023, PROT_READ|PROT_WRITE, MAP_SHARED, fd0, 0); - if (vc0 == MAP_FAILED) - { - perror("mmap vc0"); - return 1; - } - vc1 = mmap(NULL, 1023, PROT_READ|PROT_WRITE, MAP_SHARED, fd1, 0); - if (vc1 == MAP_FAILED) - { - perror("mmap vc1"); - return 1; - } - close(fd0); - close(fd1); - - max_buffer_size_1 = TREE_BUFFER_ROUNDUP(max_buffer_size_1); - max_buffer_size_2 = TREE_BUFFER_ROUNDUP(max_buffer_size_2); - - allocater_init(); - - { - void* packet_buffer_ptr = &packet_buffer; - if (posix_memalign(packet_buffer_ptr, 16, - sizeof(*packet_buffer) + TREE_PACKET_SIZE)) - { - perror("allocate aligned memory"); - return 1; - } - } - - if (pthread_key_create(&thread_specific_key, 0)) - { - perror("create thread key"); - return 1; - } - if (pthread_setspecific(thread_specific_key, &thread_data)) - { - perror("setting thread-specific data"); - return 1; - } - if (pthread_mutex_init(&ack_queue_mutex, NULL)) - { - perror("create acknowledgement queue mutex"); - return 1; - } - if (pthread_mutex_init(&output_mutex, NULL)) - { - perror("create output mutex"); - return 1; - } - - /* A single iteration of this loop handles a complete job, from - initialization to termination. */ - for (;;) - { - int init_recv, i; - BGLTreePacketHardHeader hardheader; - BGLTreeStatusRegister status; - struct InitMsgReply init_msg_rep; - - packet_buffer->userbuf_out = NULL; - packet_buffer->userbuf_in = NULL; - packet_buffer->ack_sent = 0; - - pset_size = BGLPersonality_numNodesInPset(&personality); - /* This is only a very rough estimate. There might be fewer processes - if the partition is not fully occupied, or more if we are in VN - mode. */ - pset_proc_count = pset_size; - - /* Receive init messages from all processes. As a side effect, obtain - the number of processes the job consists of. */ - for (init_recv = 0; init_recv < pset_proc_count; init_recv++) - { - int pset_cpu_rank; - struct InitMsg* init_msg; - struct timespec ts = {0, 10000000}; - - do - { - status = *(volatile BGLTreeStatusRegister*) - (vc1 + BGL_MEM_TREE_STATUS0_OFFSET); - - /* In case ZOID is invoked even for standard, CIOD jobs, - we want to minimize its impact. So to prevent busy-looping, - we sleep for 0.01 sec. */ - if (status.recpktcnt == 0) - nanosleep(&ts, NULL); - } while (status.recpktcnt == 0); - - BGLTreeFIFO_recv(vc1 + BGL_MEM_TREE_HDROUT_OFFSET, - vc1 + BGL_MEM_TREE_DATAOUT_OFFSET, - &hardheader, (BGLQuad*)packet_buffer->data); - - if (hardheader.p2p.pclass != PACKET_CLASS_CIO || - !hardheader.p2p.p2p) - { - fprintf(stderr, - "Unexpected message received during init phase!\n"); - return 1; - } - - init_msg = (struct InitMsg*)packet_buffer->data; - - /* If this is the first message, read the total number of compute - node processes. */ - if (!init_recv) - { - int i; - - total_proc_count = init_msg->total_proc; -#if 0 - fprintf(stderr, "Job consists of %d processes\n", - total_proc_count); -#endif - strcpy(mpi_mapping, init_msg->mapping); - vn_mode = init_msg->vn_mode; - - if (__zoid_mapping_init(mpi_mapping, total_proc_count, vn_mode, - &personality) == 0) - pset_proc_count = calculate_pset_size(); - else - abort(); -#if 0 - fprintf(stderr, "Expecting %d init msgs\n", pset_proc_count); -#endif - //cn_procs = zoid_alloc(pset_proc_count * sizeof(*cn_procs)); - cn_procs = (struct CNProc*)(sram + 10*1024); - assert(cn_procs); - - for (i = 0; i < pset_proc_count; i++) - { - cn_procs[i].buffer = NULL; - cn_procs[i].current_buf = NULL; - } - } - - if (init_msg->pset_cpu_rank >= pset_size * (vn_mode ? 2 : 1) || - pset_rank_mapping[init_msg->pset_cpu_rank] == -1) - { - fprintf(stderr, "Init message has invalid rank %d\n", - init_msg->pset_cpu_rank); - return 1; - } - - pset_cpu_rank = pset_rank_mapping[init_msg->pset_cpu_rank]; - cn_procs[pset_cpu_rank].pid = init_msg->pid; - cn_procs[pset_cpu_rank].p2p_addr = init_msg->p2p_addr; - cn_procs[pset_cpu_rank].pset_rank = init_msg->pset_rank; - cn_procs[pset_cpu_rank].cpu = init_msg->cpu; - cn_procs[pset_cpu_rank].status = PROC_STATUS_RUNNING; -#if 0 - fprintf(stderr, "Received init message from proc %d\n", - cn_procs[pset_cpu_rank].pid); -#endif - } /* for (init_recv) */ - pending_exit_requests = pset_proc_count; -#if 0 - fprintf(stderr, "Received all msgs, suspending CIOD...\n"); -#endif - - /* Ugly hack warning! - - Here's what seems to be happening: even if a job consists of just - one process, all compute nodes belonging to the partition where the - job will run are booted. Those nodes that don't have any processes - to run immediately send a REQUESTRESET message to CIOD. What can - happen is that nodes with a process on them are faster than those - without and send the ZOID init message first. In that case ZOID - suspends CIOD before CIOD receives the REQUESTRESET messages. - Those messages are then read by ZOID, which spits out warning - messages about invalid packets coming from unknown sources (jobs - still succeed, though). - - The sleep below is an attempt to avoid this race condition. It - gives CIOD extra time to receive the REQUESTRESET messages. It - actually seems to work, believe it or not. */ - sleep(2); - - if (suspend_ciod()) - return 1; - - /* Send an ACK. */ - - init_msg_rep.max_buffer_size = max_buffer_size_2; - init_msg_rep.ack_threshold = ack_threshold; - memcpy(packet_buffer->data, &init_msg_rep, sizeof(init_msg_rep)); - - BGLTreePacketHardHeader_InitGlobal(&hardheader, PACKET_CLASS_CIO, 0, - BGLTreeCombineOp_NONE, 0, 1); - do - { - status = *(volatile BGLTreeStatusRegister*) - (vc1 + BGL_MEM_TREE_STATUS0_OFFSET); - } while (status.injpktcnt > 7); - BGLTreeFIFO_send(vc1 + BGL_MEM_TREE_HDRIN_OFFSET, - vc1 + BGL_MEM_TREE_DATAIN_OFFSET, - &hardheader, (BGLQuad*)packet_buffer->data); - - /* At this point, CIOD is suspended, so we can take over its - responsibilities. First, though, we need to change user/group ID, - as well as current working directory (it looks like it initially - points to where the job was submitted from, which is convenient). */ - if (obtain_ciod_credentials()) - return 1; - - { - struct zoid_dispatch_entry* entry; - - for (entry = dispatch_entries; entry; entry = entry->next) - if (entry->init_func) - entry->init_func(pset_proc_count); - } - - worker_threads = malloc(pset_proc_count * sizeof(*worker_threads)); - assert(worker_threads); - - for (i = 0; i < pset_proc_count; i++) - if (pthread_create(&worker_threads[i], NULL, worker_thread_body, - NULL)) - { - perror("create worker thread"); - return 1; - } - - if (ciod_control_socket != -1) - if (pthread_create(&ciod_thread, NULL, ciod_thread_body, NULL)) - { - perror("create ciod thread"); - return 1; - } - - child_pid = fork(); - if(child_pid == 0) { - bglco_loop(); - return 0; - } else if(child_pid < 0) { - perror("fork failed"); - return 1; - } - - /* All the activities are performed by worker threads at this point. - We just wait for these threads to finish. */ - - waitpid(child_pid, NULL, 0); - - enter_critical_section(recv_queue->locks); - recv_queue->first = (struct zoid_buffer*)1; - leave_critical_section(recv_queue->locks); - - /* HACK */ - for(i = 0; i < pset_proc_count; i++) { - int r = semop(recv_q_sem, &up, 1); - if(r) { - perror("semop failed"); - exit(-1); - } - } - - for(i = 0; i < pset_proc_count; i++) - pthread_join(worker_threads[i], NULL); - - /* Job finished -- clean up. */ - - { - struct zoid_dispatch_entry* entry; - - for (entry = dispatch_entries; entry; entry = entry->next) - if (entry->fini_func) - entry->fini_func(); - } - - if (ciod_control_socket != -1) - { - if (!sent_kill_packet) - pthread_cancel(ciod_thread); - pthread_join(ciod_thread, NULL); - close(ciod_control_socket); - } - if (ciod_streams_socket != -1) - close(ciod_streams_socket); - - if (abnormal_msg_received) - { - fprintf(stderr, "BECAUSE OF ABNORMAL MESSAGES ABOVE, CIOD WILL BE " - "KILLED.\n"); - kill(ciod_pid, SIGCONT); - sleep(5); - kill(ciod_pid, SIGTERM); - return 1; - } - else - { -#if 0 - fprintf(stderr, "All processes terminated normally; " - "attempting to resume CIOD...\n"); -#endif - if (!sent_kill_packet) - { - /* First send an ACK to the processes, so that they can - terminate. */ - - BGLTreePacketHardHeader_InitGlobal(&hardheader, - PACKET_CLASS_CIO, - 0, BGLTreeCombineOp_NONE, - 0, 1); - /* We don't care what's in the packet_buffer. */ - do - { - status = *(volatile BGLTreeStatusRegister*) - (vc0 + BGL_MEM_TREE_STATUS0_OFFSET); - } while (status.injpktcnt > 7); - BGLTreeFIFO_send(vc0 + BGL_MEM_TREE_HDRIN_OFFSET, - vc0 + BGL_MEM_TREE_DATAIN_OFFSET, - &hardheader, (BGLQuad*)packet_buffer->data); - } - - kill(ciod_pid, SIGCONT); - } -#if 0 - { - extern int poll_hist[201]; - int i; - - fprintf(stderr, "\n\nPoll histogram:\n"); - for (i = 1; i < sizeof(poll_hist) / sizeof(poll_hist[0]); i++) - if (poll_hist[i]) - fprintf(stderr, "%d %d\n", i, poll_hist[i]); - } -#endif - - /* One of the things we don't free are zoid message buffers. I'm - simply worried the repeated freeing and allocation of these large - memory areas could result in memory fragmentation. */ - - cleanup_traffic(); - - free(worker_threads); - for (i = 0; i < pset_proc_count; i++) - if (cn_procs[i].buffer) - __zoid_release_buffer(cn_procs[i].buffer->data); - //free(cn_procs); - - if (seteuid(0) < 0) - { - perror("seteuid"); - return 1; - } - if (setegid(0) < 0) - { - perror("seteuid"); - return 1; - } - } /* for (;;) */ - - /* The code below is unreachable and is only provided for the sake of - completeness. */ - - free(packet_buffer); - - while (dispatch_entries) - { - struct zoid_dispatch_entry* entry = dispatch_entries; - dispatch_entries = dispatch_entries->next; - free(entry); - } - - return 0; -} - - - -void init_lock_pair(lock_pair *locks) { - static int next_lock = 0; - - if (pthread_mutex_init(&(locks->pt_mutex), NULL)) { - perror("lock_pair_init"); - exit(-1); - } - locks->hw_mutex = lockbox + MUTEX_OFFSET(next_lock++); - BGL_Mutex_Release(locks->hw_mutex); -} - - - -inline void enter_critical_section(lock_pair *locks) { - if(pthread_mutex_lock(&(locks->pt_mutex))) - assert(0); - BGL_Mutex_Acquire(locks->hw_mutex); -} - - - -inline void leave_critical_section(lock_pair *locks) { - BGL_Mutex_Release(locks->hw_mutex); - if(pthread_mutex_unlock(&(locks->pt_mutex))) - assert(0); -} diff --git a/3rdparty/zoid/daemon.2cores/init.c.signal b/3rdparty/zoid/daemon.2cores/init.c.signal deleted file mode 100644 index eae35c4786401437c7f5623da18bc87260507341..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/daemon.2cores/init.c.signal +++ /dev/null @@ -1,1025 +0,0 @@ -/****************************************************************************/ -/* ZEPTOOS:zepto-info */ -/* This file is part of ZeptoOS: The Small Linux for Big Computers. - * See www.mcs.anl.gov/zeptoos for more information. - */ -/* ZEPTOOS:zepto-info */ -/* */ -/* ZEPTOOS:zepto-fillin */ -/* $Id: init.c,v 1.20 2007/06/27 15:10:45 iskra Exp $ - * ZeptoOS_Version: 1.2 - * ZeptoOS_Heredity: FOSS_ORIG - * ZeptoOS_License: GPL - */ -/* ZEPTOOS:zepto-fillin */ -/* */ -/* ZEPTOOS:zepto-gpl */ -/* Copyright: Argonne National Laboratory, Department of Energy, - * and UChicago Argonne, LLC. 2004, 2005, 2006, 2007 - * ZeptoOS License: GPL - * - * This software is free. See the file ZeptoOS/misc/license.GPL - * for complete details on your rights to copy, modify, and use this - * software. - */ -/* ZEPTOOS:zepto-gpl */ -/****************************************************************************/ - -#define _GNU_SOURCE /* For posix_memalign. */ -#include <assert.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include <dlfcn.h> -#include <errno.h> -#include <fcntl.h> -#include <grp.h> -#include <pthread.h> -#include <signal.h> -#include <sys/mman.h> -#include <sys/socket.h> -#include <sys/un.h> -#include <sys/wait.h> -#include <unistd.h> - -#include <bglmemmap.h> -#include <bglpersonality.h> - -#include "bgl.h" -#include "zoid.h" -#include "zoid_protocol.h" -#include "zoid_api.h" -#include "zoid_mapping.h" -#include "bglco.h" - - -/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*/ -void *bglco_thread_body(void *); - -void *sram; -void *lockbox; -BGL_Barrier *barrier; - -pid_t child_pid; -/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*/ - - - -/* Maximum size of a command or reply. In case of "userbuf" commands or - replies, it only applies to the non-userbuf portion. - - There are two limits, because we support two distinct buffer sizes. This - helps save memory, since we need a separate buffers for command and reply, - and normally at most one of them is "large". */ -int max_buffer_size_1 = 4096; -int max_buffer_size_2 = 4096*1024 + 1024; - -/* If the number of tree packets needed to send a command (each packet - carries a max. of 240 data bytes) exceeds ack_threshold, an acknowledgement - from the server will be required (after the first packet). - - This is set to 0, which disable acknowledgements for all but input userbuf - functions. Experiments have shown that the performance is best then. - - Setting it to a higher value (8 seems like a good choice, since that's the - hardware FIFO size on BG/L) enables the acknowledgements. This slows the - frequent-but-short commands down, but it does considerably improve the - fairness (CNs that are closer to the ION in the tree topology do not get - an unfair share of the bandwidth then). -*/ -static int ack_threshold = 0; - -BGLPersonality personality; - -int my_p2p_addr; - -void *vc0, *vc1; - -struct zoid_buffer* packet_buffer; -int total_proc_count; -int pset_size; -int vn_mode; -int pset_proc_count; -int pending_exit_requests; -int abnormal_msg_received; - -struct CNProc* cn_procs; - -int ciod_pid; - -struct zoid_dispatch_entry* dispatch_entries = NULL; - -pthread_key_t thread_specific_key; -static pthread_t* worker_threads; -static pthread_t ciod_thread; -pthread_mutex_t ack_queue_mutex; -pthread_mutex_t output_mutex; - -lock_pair pending_exit_locks; -lock_pair tree_locks; -int *sent_signals; -int *recv_signals; - -char mpi_mapping[5]; -/* Pset ranks may be non-contiguous, because they are always calculated in - the XYZ mapping, while the actual mapping could be different. We use - this array to make them contiguous, which is easier to deal with. */ -int pset_rank_mapping[128]; -/* On the wire we still use the XYZ mapping, so we also need to be able to - translate backwards. */ -int pset_rank_mapping_rev[128]; - -/* Socket connections to ciodb, taken from ciod. */ -int ciod_control_socket, ciod_streams_socket; -/* Identifies whether an interrupting kill packet has been sent by the ciod - thread. */ -int sent_kill_packet; - -sigset_t sigusr1_set; - -char l1flusher[32*1024] __attribute__ ((aligned (32))); - -/* - * Suspend CIOD, but first, if possible, get the control and stream sockets - * from it (this requires that our ciod_preload.so stub is loaded into it). - */ -static int -suspend_ciod(void) -{ - int fd; - char buf[20]; - int n; - int transfer_sock; - struct sockaddr_un addr; - - ciod_control_socket = ciod_streams_socket = -1; - sent_kill_packet = 0; - - if ((fd = open("/var/run/ciod.440.pid", O_RDONLY)) < 0) - { - /* No CIOD? How was this job started then?! */ - fprintf(stderr, "Failed to find a running CIOD!\n"); - return 1; - } - - if ((n = read(fd, buf, sizeof(buf))) < 1) - { - perror("read ciod pid"); - return 1; - } - - close(fd); - - buf[n - 1] = '\0'; - if ((ciod_pid = strtol(buf, NULL, 10)) < 1) - { - fprintf(stderr, "Invalid ciod pid\n"); - return 1; - } - - if (!(transfer_sock = socket(PF_UNIX, SOCK_STREAM, 0))) - { - perror("create unix domain socket"); - return 1; - } - addr.sun_family = AF_UNIX; - strcpy(addr.sun_path, "/var/tmp/zoid.socket"); - if (connect(transfer_sock, (struct sockaddr*)&addr, sizeof(addr))) - { - fprintf(stderr, "Modified CIOD not found. " - "Will print job's output below:\n\n"); - - if (kill(ciod_pid, SIGSTOP) < 0) - { - perror("suspend ciod"); - return 1; - } - } - else - { - struct iovec iov; - struct msghdr msg = {0}; - int fds[2]; - char msgbuf[CMSG_SPACE(sizeof(fds))]; - struct cmsghdr *cmsg; - char tmp; - - if (kill(ciod_pid, SIGUSR1) < 0) - { - perror("suspend ciod"); - return 1; - } - - iov.iov_base = &tmp; - iov.iov_len = 1; - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_control = msgbuf; - msg.msg_controllen = sizeof(msgbuf); - - do - { - n = recvmsg(transfer_sock, &msg, 0); - } while (n < 0 && errno == EAGAIN); - - if (n < 0) - { - perror("recvmsg"); - return 1; - } - - for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; - cmsg = CMSG_NXTHDR(&msg, cmsg)) - { - if (cmsg->cmsg_level == SOL_SOCKET && - cmsg->cmsg_type == SCM_RIGHTS) - { - memcpy(fds, CMSG_DATA(cmsg), sizeof(fds)); - ciod_control_socket = fds[0]; - ciod_streams_socket = fds[1]; - break; - } - } - } - - close(transfer_sock); - - return 0; -} - -int -obtain_ciod_credentials(void) -{ - FILE* status; - char buf[1024]; - int euid = -1, egid = -1; - gid_t groups[512]; /* Ids are space-separated, so there can be at most half - as many as the buffer size, less in practice. */ - int ngroups = 0; - char cwd[4097]; - - sprintf(buf, "/proc/%d", ciod_pid); - - if (chdir(buf) < 0) - { - perror("chdir ciod proc"); - return 1; - } - - /* Parse CIOD's /proc/pid/status to obtain uid/gid info. */ - if (!(status = fopen("status", "r"))) - { - perror("fopen status"); - return 1; - } - - while (fgets(buf, sizeof(buf), status)) - { - if (strncmp(buf, "Uid:", strlen("Uid:")) == 0) - { - if (sscanf(buf, "Uid: %*d %d", &euid) != 1) - { - fprintf(stderr, "Error parsing /proc/pid/status\n"); - return 1; - } - } - else if (strncmp(buf, "Gid:", strlen("Gid:")) == 0) - { - if (sscanf(buf, "Gid: %*d %d", &egid) != 1) - { - fprintf(stderr, "Error parsing /proc/pid/status\n"); - return 1; - } - } - else if (strncmp(buf, "Groups:", strlen("Groups:")) == 0) - { - char* bptr; - - if (buf[strlen(buf) - 1] != '\n') - { - /* We provide a 1K buffer. Apparently, it wasn't enough. - Too bad -- we do the easy thing and bail out. Obviously, - if this ever gets triggered, the buffer size can be - increased, or the code made to handle arbitrarily long - input lines. */ - fprintf(stderr, "Implementation limit reached\n"); - return 1; - } - - for (bptr = buf + strlen("Groups:"), ngroups = 0; *bptr;) - { - char* new_bptr; - - groups[ngroups] = strtol(bptr, &new_bptr, 10); - if (new_bptr == bptr) - break; - ngroups++; - bptr = new_bptr; - } - } - } - fclose(status); - - if (euid == -1 || egid == -1 || ngroups < 1) - { - fprintf(stderr, "Expected data not found in /proc/pid/status\n"); - return 1; - } - - /* Now get the initial current directory. */ - { - int n; - - if ((n = readlink("cwd", cwd, sizeof(cwd) - 1)) < 0) - { - perror("read ciod cwd"); - return 1; - } - cwd[n] = '\0'; - } - - /* We have all the data we need. Adjust our process to match CIOD. */ - if (chdir(cwd) < 0) - { - perror("initial chdir"); - return 1; - } - if (setgroups(ngroups, groups) < 0) - { - perror("setgroups"); - return 1; - } - if (setegid(egid) < 0) - { - perror("setegid"); - return 1; - } - if (seteuid(euid) < 0) - { - perror("seteuid"); - return 1; - } - - return 0; -} - -/* - * Called from the backend shared objects to register functions on the server - * side. - */ -void __zoid_register_functions(int header_id, - struct dispatch_array* dispatch_array, - int array_size, void (*init_func)(int), - void (*fini_func)(void)) -{ - struct zoid_dispatch_entry* entry; - - if (!(entry = malloc(sizeof(*entry)))) - { - fprintf(stderr, "Not enough memory!\n"); - return; - } - - entry->header_id = header_id; - entry->dispatch_array = dispatch_array; - entry->array_size = array_size; - entry->init_func = init_func; - entry->fini_func = fini_func; - - entry->next = dispatch_entries; - dispatch_entries = entry; -} - -/* - * Calculates the number of processes that are actually expected in the - * current pset. This is non-trivial because the psets need not be fully - * filled in, and the mapping can be influenced by the user. - * We also fill in two arrays to help with the mapping back and forth later. - */ -int -calculate_pset_size(void) -{ - int x_origin, y_origin, z_origin; - int x_size, y_size, z_size, t_size; - int t; - int pset_size, pset_rank; - - /* Get the dimensions of the pset. */ - x_size = BGLPersonality_xPsetSize(&personality); - y_size = BGLPersonality_yPsetSize(&personality); - z_size = BGLPersonality_zPsetSize(&personality); - t_size = /* vn_mode ? 2 : 1 */ 2; /* We assume vn_mode so that also the - t==1 entries are initialized. */ - - /* Get the coordinates of the first node of a pset. */ - x_origin = BGLPersonality_xPsetOrigin(&personality); - y_origin = BGLPersonality_yPsetOrigin(&personality); - z_origin = BGLPersonality_zPsetOrigin(&personality); - - /* Check which CPUs in the pset are occupied. - Do it in the XYZT order, which is the order used by pset_rank. */ - pset_size = 0; - pset_rank = 0; - for (t = 0; t < t_size; t++) - { - int z; - for (z = z_origin; z < z_origin + z_size; z++) - { - int y; - for (y = y_origin; y < y_origin + y_size; y++) - { - int x; - for (x = x_origin; x < x_origin + x_size; x++) - { - unsigned rank; - - if (__zoid_mapping_to_rank(x, y, z, t, &rank) == 0) - { - pset_rank_mapping[pset_rank] = pset_size; - pset_rank_mapping_rev[pset_size] = pset_rank; - pset_size++; - } - else - pset_rank_mapping[pset_rank] = -1; - - pset_rank++; - } - } - } - } - - return pset_size; -} - -#define STRINGIFY(x) #x -#define TOSTRING(x) STRINGIFY(x) - -static int -analyze_cmdline(int argc, char* argv[]) -{ - int c; - - if (argc == 2 && strcmp(argv[1], "--version") == 0) - { - fprintf(stderr, "zoid version 1.0" -#ifdef ZEPTO_VERSION - " (ZeptoOS version " TOSTRING(ZEPTO_VERSION) ")" -#endif - "\n"); - exit(0); - } - - while ((c = getopt(argc, argv, "a:b:m:")) != -1) - { - switch (c) - { - char* str; - - case 'a': - ack_threshold = strtol(optarg, &str, 10); - if (*str || ack_threshold < 0) - { - fprintf(stderr, - "zoid: invalid acknowledgement threshold!\n"); - return 1; - } - break; - - case 'b': - max_buffer_size_1 = strtol(optarg, &str, 10); - if ((*str && *str != ':') || - max_buffer_size_1 < TREE_DATA_SIZE) - { - fprintf(stderr, "zoid: invalid maximum buffer size!\n"); - return 1; - } - if (*str) - { - /* Skip ':'. */ - str++; - max_buffer_size_2 = strtol(str, &str, 10); - if (*str || max_buffer_size_2 < TREE_DATA_SIZE || - max_buffer_size_2 < max_buffer_size_1) - { - fprintf(stderr, "zoid: invalid maximum buffer size!\n"); - return 1; - } - } - else - max_buffer_size_2 = max_buffer_size_1; - break; - - case 'm': - for (str = strtok(optarg, ":"); str; str = strtok(NULL, ":")) - if (!dlopen(str, RTLD_LAZY | RTLD_GLOBAL)) - { - fprintf(stderr, "zoid: failed to open %s: %s\n", str, - dlerror()); - return 1; - } - break; - - default: - fprintf(stderr, - "Usage: %s [-a <ack_threshold>] [-b <buffer_size>]\n", - argv[0]); - return 1; - } - } - - return 0; -} - -int -main(int argc, char* argv[]) -{ - int fd_pers, fd0, fd1, fd_mem; - struct thread_specific_data thread_data = {-1}; - - if (analyze_cmdline(argc, argv)) - return 1; - - if ((fd_pers = open("/proc/personality", O_RDONLY)) < 0) - { - perror("open /proc/personality"); - fprintf(stderr, "Please run me on an I/O node!\n"); - return 1; - } - if (read(fd_pers, &personality, sizeof(personality)) != - sizeof(personality)) - { - perror("read personality"); - return 1; - } - close(fd_pers); - - my_p2p_addr = BGLPersonality_treeP2PAddr(&personality); - - /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*/ - fd_mem = open("/dev/mem", O_RDWR); - if(fd_mem < 0) { - perror("open /dev/mem"); - return -1; - } - - sram = mmap(0, BGL_MEM_SRAM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, - fd_mem, BGL_MEM_SRAM_PHYS); - if(sram == MAP_FAILED) { - perror("mmap failed"); - return -1; - } - - lockbox = mmap(0, BGL_MEM_LOCKBOX_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, - fd_mem, BGL_MEM_LOCKBOX_PHYS); - if(lockbox == MAP_FAILED) { - perror("mmap failed"); - return -1; - } - close(fd_mem); - - recv_queue = sram + 9*1024; - init_lock_pair(&recv_queue_locks); - init_zoid_buf_pipe(recv_queue, &recv_queue_locks); - - send_queue = sram + 9*1024 + sizeof(zoid_buf_pipe); - init_lock_pair(&send_queue_locks); - init_zoid_buf_pipe(send_queue, &send_queue_locks); - - high_priority_send_queue = sram + 9*1024 + 2*sizeof(zoid_buf_pipe); - init_lock_pair(&high_priority_send_queue_locks); - init_zoid_buf_pipe(high_priority_send_queue, &high_priority_send_queue_locks); - - ack_queue = sram + 9*1024 + 3*sizeof(zoid_buf_pipe); - init_lock_pair(&ack_queue_locks); - init_zoid_buf_pipe(ack_queue, &ack_queue_locks); - - sent_signals = sram + 9*1024 + 3 * sizeof(zoid_buf_pipe); - recv_signals = sent_signals + 1; - *sent_signals = 0; - *recv_signals = 0; - - barrier = lockbox + BARRIER_OFFSET(10); - - init_lock_pair(&pending_exit_locks); - init_lock_pair(&tree_locks); - - memset(l1flusher, 0, sizeof(l1flusher)); - - /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*/ - - /* Open both virtual channels. */ - fd0 = open("/dev/tree0", O_RDWR); - if (fd0 < 0) - { - perror("open /dev/tree0"); - return 1; - } - fd1 = open("/dev/tree1", O_RDWR); - if (fd1 < 0) - { - perror("open /dev/tree1"); - return 1; - } - vc0 = mmap(NULL, 1023, PROT_READ|PROT_WRITE, MAP_SHARED, fd0, 0); - if (vc0 == MAP_FAILED) - { - perror("mmap vc0"); - return 1; - } - vc1 = mmap(NULL, 1023, PROT_READ|PROT_WRITE, MAP_SHARED, fd1, 0); - if (vc1 == MAP_FAILED) - { - perror("mmap vc1"); - return 1; - } - close(fd0); - close(fd1); - - max_buffer_size_1 = TREE_BUFFER_ROUNDUP(max_buffer_size_1); - max_buffer_size_2 = TREE_BUFFER_ROUNDUP(max_buffer_size_2); - - allocater_init(); - - { - void* packet_buffer_ptr = &packet_buffer; - if (posix_memalign(packet_buffer_ptr, 16, - sizeof(*packet_buffer) + TREE_PACKET_SIZE)) - { - perror("allocate aligned memory"); - return 1; - } - } - - if (pthread_key_create(&thread_specific_key, 0)) - { - perror("create thread key"); - return 1; - } - if (pthread_setspecific(thread_specific_key, &thread_data)) - { - perror("setting thread-specific data"); - return 1; - } - if (pthread_mutex_init(&ack_queue_mutex, NULL)) - { - perror("create acknowledgement queue mutex"); - return 1; - } - if (pthread_mutex_init(&output_mutex, NULL)) - { - perror("create output mutex"); - return 1; - } - - sigemptyset(&sigusr1_set); - sigaddset(&sigusr1_set, SIGRTMIN+1); - sigprocmask(SIG_BLOCK, &sigusr1_set, NULL); - - /* A single iteration of this loop handles a complete job, from - initialization to termination. */ - for (;;) - { - int init_recv, i; - BGLTreePacketHardHeader hardheader; - BGLTreeStatusRegister status; - struct InitMsgReply init_msg_rep; - - packet_buffer->userbuf_out = NULL; - packet_buffer->userbuf_in = NULL; - packet_buffer->ack_sent = 0; - - pset_size = BGLPersonality_numNodesInPset(&personality); - /* This is only a very rough estimate. There might be fewer processes - if the partition is not fully occupied, or more if we are in VN - mode. */ - pset_proc_count = pset_size; - - /* Receive init messages from all processes. As a side effect, obtain - the number of processes the job consists of. */ - for (init_recv = 0; init_recv < pset_proc_count; init_recv++) - { - int pset_cpu_rank; - struct InitMsg* init_msg; - struct timespec ts = {0, 10000000}; - - do - { - status = *(volatile BGLTreeStatusRegister*) - (vc1 + BGL_MEM_TREE_STATUS0_OFFSET); - - /* In case ZOID is invoked even for standard, CIOD jobs, - we want to minimize its impact. So to prevent busy-looping, - we sleep for 0.01 sec. */ - if (status.recpktcnt == 0) - nanosleep(&ts, NULL); - } while (status.recpktcnt == 0); - - BGLTreeFIFO_recv(vc1 + BGL_MEM_TREE_HDROUT_OFFSET, - vc1 + BGL_MEM_TREE_DATAOUT_OFFSET, - &hardheader, (BGLQuad*)packet_buffer->data); - - if (hardheader.p2p.pclass != PACKET_CLASS_CIO || - !hardheader.p2p.p2p) - { - fprintf(stderr, - "Unexpected message received during init phase!\n"); - return 1; - } - - init_msg = (struct InitMsg*)packet_buffer->data; - - /* If this is the first message, read the total number of compute - node processes. */ - if (!init_recv) - { - int i; - - total_proc_count = init_msg->total_proc; -#if 0 - fprintf(stderr, "Job consists of %d processes\n", - total_proc_count); -#endif - strcpy(mpi_mapping, init_msg->mapping); - vn_mode = init_msg->vn_mode; - - if (__zoid_mapping_init(mpi_mapping, total_proc_count, vn_mode, - &personality) == 0) - pset_proc_count = calculate_pset_size(); - else - abort(); -#if 0 - fprintf(stderr, "Expecting %d init msgs\n", pset_proc_count); -#endif - //cn_procs = zoid_alloc(pset_proc_count * sizeof(*cn_procs)); - cn_procs = (struct CNProc*)(sram + 10*1024); - assert(cn_procs); - - for (i = 0; i < pset_proc_count; i++) - { - cn_procs[i].buffer = NULL; - cn_procs[i].current_buf = NULL; - } - } - - if (init_msg->pset_cpu_rank >= pset_size * (vn_mode ? 2 : 1) || - pset_rank_mapping[init_msg->pset_cpu_rank] == -1) - { - fprintf(stderr, "Init message has invalid rank %d\n", - init_msg->pset_cpu_rank); - return 1; - } - - pset_cpu_rank = pset_rank_mapping[init_msg->pset_cpu_rank]; - cn_procs[pset_cpu_rank].pid = init_msg->pid; - cn_procs[pset_cpu_rank].p2p_addr = init_msg->p2p_addr; - cn_procs[pset_cpu_rank].pset_rank = init_msg->pset_rank; - cn_procs[pset_cpu_rank].cpu = init_msg->cpu; - cn_procs[pset_cpu_rank].status = PROC_STATUS_RUNNING; -#if 0 - fprintf(stderr, "Received init message from proc %d\n", - cn_procs[pset_cpu_rank].pid); -#endif - } /* for (init_recv) */ - pending_exit_requests = pset_proc_count; -#if 0 - fprintf(stderr, "Received all msgs, suspending CIOD...\n"); -#endif - - /* Ugly hack warning! - - Here's what seems to be happening: even if a job consists of just - one process, all compute nodes belonging to the partition where the - job will run are booted. Those nodes that don't have any processes - to run immediately send a REQUESTRESET message to CIOD. What can - happen is that nodes with a process on them are faster than those - without and send the ZOID init message first. In that case ZOID - suspends CIOD before CIOD receives the REQUESTRESET messages. - Those messages are then read by ZOID, which spits out warning - messages about invalid packets coming from unknown sources (jobs - still succeed, though). - - The sleep below is an attempt to avoid this race condition. It - gives CIOD extra time to receive the REQUESTRESET messages. It - actually seems to work, believe it or not. */ - sleep(2); - - if (suspend_ciod()) - return 1; - - /* Send an ACK. */ - - init_msg_rep.max_buffer_size = max_buffer_size_2; - init_msg_rep.ack_threshold = ack_threshold; - memcpy(packet_buffer->data, &init_msg_rep, sizeof(init_msg_rep)); - - BGLTreePacketHardHeader_InitGlobal(&hardheader, PACKET_CLASS_CIO, 0, - BGLTreeCombineOp_NONE, 0, 1); - do - { - status = *(volatile BGLTreeStatusRegister*) - (vc1 + BGL_MEM_TREE_STATUS0_OFFSET); - } while (status.injpktcnt > 7); - BGLTreeFIFO_send(vc1 + BGL_MEM_TREE_HDRIN_OFFSET, - vc1 + BGL_MEM_TREE_DATAIN_OFFSET, - &hardheader, (BGLQuad*)packet_buffer->data); - - /* At this point, CIOD is suspended, so we can take over its - responsibilities. First, though, we need to change user/group ID, - as well as current working directory (it looks like it initially - points to where the job was submitted from, which is convenient). */ - if (obtain_ciod_credentials()) - return 1; - - { - struct zoid_dispatch_entry* entry; - - for (entry = dispatch_entries; entry; entry = entry->next) - if (entry->init_func) - entry->init_func(pset_proc_count); - } - - worker_threads = malloc(pset_proc_count * sizeof(*worker_threads)); - assert(worker_threads); - - for (i = 0; i < pset_proc_count; i++) - if (pthread_create(&worker_threads[i], NULL, worker_thread_body, - NULL)) - { - perror("create worker thread"); - return 1; - } - - if (ciod_control_socket != -1) - if (pthread_create(&ciod_thread, NULL, ciod_thread_body, NULL)) - { - perror("create ciod thread"); - return 1; - } - - child_pid = fork(); - if(child_pid == 0) { - bglco_loop(); - return 0; - } else if(child_pid < 0) { - perror("fork failed"); - return 1; - } - - /* All the activities are performed by worker threads at this point. - We just wait for these threads to finish. */ - - waitpid(child_pid, NULL, 0); - - enter_critical_section(recv_queue->locks); - recv_queue->first = (struct zoid_buffer*)1; - leave_critical_section(recv_queue->locks); - - /* HACK */ - for(i = 0; i < pset_proc_count; i++) { - int r = sigqueue(getpid(), SIGRTMIN+1, (union sigval)0); - if(r) { - perror("sigqueue failed"); - exit(-1); - } - } - - for(i = 0; i < pset_proc_count; i++) - pthread_join(worker_threads[i], NULL); - - /* Job finished -- clean up. */ - - { - struct zoid_dispatch_entry* entry; - - for (entry = dispatch_entries; entry; entry = entry->next) - if (entry->fini_func) - entry->fini_func(); - } - - if (ciod_control_socket != -1) - { - if (!sent_kill_packet) - pthread_cancel(ciod_thread); - pthread_join(ciod_thread, NULL); - close(ciod_control_socket); - } - if (ciod_streams_socket != -1) - close(ciod_streams_socket); - - if (abnormal_msg_received) - { - fprintf(stderr, "BECAUSE OF ABNORMAL MESSAGES ABOVE, CIOD WILL BE " - "KILLED.\n"); - kill(ciod_pid, SIGCONT); - sleep(5); - kill(ciod_pid, SIGTERM); - return 1; - } - else - { -#if 0 - fprintf(stderr, "All processes terminated normally; " - "attempting to resume CIOD...\n"); -#endif - if (!sent_kill_packet) - { - /* First send an ACK to the processes, so that they can - terminate. */ - - BGLTreePacketHardHeader_InitGlobal(&hardheader, - PACKET_CLASS_CIO, - 0, BGLTreeCombineOp_NONE, - 0, 1); - /* We don't care what's in the packet_buffer. */ - do - { - status = *(volatile BGLTreeStatusRegister*) - (vc0 + BGL_MEM_TREE_STATUS0_OFFSET); - } while (status.injpktcnt > 7); - BGLTreeFIFO_send(vc0 + BGL_MEM_TREE_HDRIN_OFFSET, - vc0 + BGL_MEM_TREE_DATAIN_OFFSET, - &hardheader, (BGLQuad*)packet_buffer->data); - } - - kill(ciod_pid, SIGCONT); - } -#if 0 - { - extern int poll_hist[201]; - int i; - - fprintf(stderr, "\n\nPoll histogram:\n"); - for (i = 1; i < sizeof(poll_hist) / sizeof(poll_hist[0]); i++) - if (poll_hist[i]) - fprintf(stderr, "%d %d\n", i, poll_hist[i]); - } -#endif - - /* One of the things we don't free are zoid message buffers. I'm - simply worried the repeated freeing and allocation of these large - memory areas could result in memory fragmentation. */ - - cleanup_traffic(); - - free(worker_threads); - for (i = 0; i < pset_proc_count; i++) - if (cn_procs[i].buffer) - __zoid_release_buffer(cn_procs[i].buffer->data); - //free(cn_procs); - - if (seteuid(0) < 0) - { - perror("seteuid"); - return 1; - } - if (setegid(0) < 0) - { - perror("seteuid"); - return 1; - } - } /* for (;;) */ - - /* The code below is unreachable and is only provided for the sake of - completeness. */ - - free(packet_buffer); - - while (dispatch_entries) - { - struct zoid_dispatch_entry* entry = dispatch_entries; - dispatch_entries = dispatch_entries->next; - free(entry); - } - - return 0; -} - - - -void init_lock_pair(lock_pair *locks) { - static int next_lock = 0; - - if (pthread_mutex_init(&(locks->pt_mutex), NULL)) { - perror("lock_pair_init"); - exit(-1); - } - locks->hw_mutex = lockbox + MUTEX_OFFSET(next_lock++); - BGL_Mutex_Release(locks->hw_mutex); -} - - - -inline void enter_critical_section(lock_pair *locks) { - if(pthread_mutex_lock(&(locks->pt_mutex))) - assert(0); - BGL_Mutex_Acquire(locks->hw_mutex); -} - - - -inline void leave_critical_section(lock_pair *locks) { - BGL_Mutex_Release(locks->hw_mutex); - if(pthread_mutex_unlock(&(locks->pt_mutex))) - assert(0); -} diff --git a/3rdparty/zoid/daemon.2cores/zoid.h b/3rdparty/zoid/daemon.2cores/zoid.h deleted file mode 100644 index c6822708add4764a48ebf50b812d686e4c8501a7..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/daemon.2cores/zoid.h +++ /dev/null @@ -1,196 +0,0 @@ -/****************************************************************************/ -/* ZEPTOOS:zepto-info */ -/* This file is part of ZeptoOS: The Small Linux for Big Computers. - * See www.mcs.anl.gov/zeptoos for more information. - */ -/* ZEPTOOS:zepto-info */ -/* */ -/* ZEPTOOS:zepto-fillin */ -/* $Id$ - * ZeptoOS_Version: 1.2 - * ZeptoOS_Heredity: FOSS_ORIG - * ZeptoOS_License: GPL - */ -/* ZEPTOOS:zepto-fillin */ -/* */ -/* ZEPTOOS:zepto-gpl */ -/* Copyright: Argonne National Laboratory, Department of Energy, - * and UChicago Argonne, LLC. 2004, 2005, 2006, 2007 - * ZeptoOS License: GPL - * - * This software is free. See the file ZeptoOS/misc/license.GPL - * for complete details on your rights to copy, modify, and use this - * software. - */ -/* ZEPTOOS:zepto-gpl */ -/****************************************************************************/ - -#ifndef ZOIDD_H -#define ZOIDD_H - -#include "bgl.h" -#include "zoid_protocol.h" -#include "bglco.h" - -#include <pthread.h> - -#undef L1_CONSISTENCY_IN_SOFTWARE - -struct CNProc -{ - /* Info data. */ - int pid; - uint16_t p2p_addr; - uint8_t pset_rank; - uint8_t cpu; - - /* Message currently being received. */ - int msg_length; - struct zoid_buffer* buffer; - char* current_buf; - - /* Termination info */ - char status; -} QUAD_ALIGN; - -#define PROC_STATUS_RUNNING 0 -#define PROC_STATUS_EXIT 1 -#define PROC_STATUS_EXIT_ABNORMAL 2 - -struct thread_specific_data -{ - int calling_process_id; - int errnum; - int last_excessive_size; - - /* Support for output userbuf. */ - void* userbuf; - /* The following two fields are only valid if userbuf != NULL. */ - void (*userbuf_cb)(void* userbuf, void* priv); - void* userbuf_priv; -}; - -/* WARNING! When modifying this structure, ensure that softheader and data - remain quad-aligned! */ -struct zoid_buffer -{ - struct zoid_buffer* next; - volatile unsigned int size; - - /* Declared and actual result length. */ - int result_len; - int total_len; - - /* Error indicators. */ - int errnum; - int excessive_size; - - /* Support for output userbuf. */ - void* userbuf_out; - void (*userbuf_out_cb)(void* userbuf, void* priv); - void* userbuf_out_priv; - - /* Support for input userbuf. */ - char* userbuf_in; - int userbuf_in_len; - - /* Support for input flow control. */ - char ack_sent; - - /* Quad-alignment required for the remaining fields. */ - char pad[3]; - - struct ZoidSoftHeader softheader; - - char data[0]; /* Variable size */ -} QUAD_ALIGN; - -struct zoid_dispatch_entry -{ - struct zoid_dispatch_entry* next; - int header_id; - struct dispatch_array* dispatch_array; - int array_size; - void (*init_func)(int pset_proc_count); - void (*fini_func)(void); -}; - -typedef struct lock_pair_s { - pthread_mutex_t pt_mutex; - BGL_Mutex *hw_mutex; -} lock_pair; - -typedef struct zoid_buf_pipe_s { - struct zoid_buffer *volatile first; /* volatile added by John */ - struct zoid_buffer *volatile last; /* volatile added by John */ - lock_pair *locks; -} zoid_buf_pipe; - -struct zoid_buffer* get_zoid_buffer(void* buffer); - -void* worker_thread_body(void* arg); -void* ciod_thread_body(void* arg); -void bglco_loop(void); - -void cleanup_traffic(void); - -void allocater_init(void); - -extern void *sram; -extern void *lockbox; - -extern void* vc0; -extern struct zoid_buffer* packet_buffer; -extern struct CNProc* cn_procs; -extern int pset_size; -extern int vn_mode; -extern int pset_proc_count; -extern int max_buffer_size_1, max_buffer_size_2; -extern int pending_exit_requests; -extern int abnormal_msg_received; - -extern struct zoid_dispatch_entry* dispatch_entries; - -extern pthread_key_t thread_specific_key; -extern pthread_mutex_t ack_queue_mutex; -extern pthread_mutex_t output_mutex; - -extern int pset_rank_mapping[128]; -extern int pset_rank_mapping_rev[128]; - -extern int ciod_control_socket; -extern int ciod_streams_socket; -extern int sent_kill_packet; - -extern zoid_buf_pipe *recv_queue; -extern lock_pair recv_queue_locks; - -extern zoid_buf_pipe *send_queue; -extern lock_pair send_queue_locks; - -extern zoid_buf_pipe *high_priority_send_queue; -extern lock_pair high_priority_send_queue_locks; - -extern zoid_buf_pipe *ack_queue; -extern lock_pair ack_queue_locks; - -extern lock_pair pending_exit_locks; -extern lock_pair tree_locks; - -extern void init_lock_pair(lock_pair *); -extern inline void enter_critical_section(lock_pair*); -extern inline void leave_critical_section(lock_pair*); - -extern void init_zoid_buf_pipe(zoid_buf_pipe*, lock_pair*); -extern void enqueue_zoid_buf(zoid_buf_pipe*, struct zoid_buffer*); -extern struct zoid_buffer *dequeue_zoid_buf(zoid_buf_pipe*); - - -#if defined L1_CONSISTENCY_IN_SOFTWARE -extern char l1flusher[32768]; -extern void flush_L1_all(); -extern void flush_L1_region(void *addr, unsigned int size); -extern void flush_zoid_buf(struct zoid_buffer *buffer); -#endif - -#endif diff --git a/3rdparty/zoid/daemon.2cores/zoid.map b/3rdparty/zoid/daemon.2cores/zoid.map deleted file mode 100644 index 53e1c4b6c347da5870d2d0055ce790e5fcd2b278..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/daemon.2cores/zoid.map +++ /dev/null @@ -1,34 +0,0 @@ -#*************************************************************************** -# ZEPTOOS:zepto-info -# This file is part of ZeptoOS: The Small Linux for Big Computers. -# See www.mcs.anl.gov/zeptoos for more information. -# ZEPTOOS:zepto-info -# -# ZEPTOOS:zepto-fillin -# $Id: zoid.map,v 1.2 2007/06/27 15:10:45 iskra Exp $ -# ZeptoOS_Version: 1.2 -# ZeptoOS_Heredity: FOSS_ORIG -# ZeptoOS_License: GPL -# ZEPTOOS:zepto-fillin -# -# ZEPTOOS:zepto-gpl -# Copyright: Argonne National Laboratory, Department of Energy, -# and UChicago Argonne, LLC. 2004, 2005, 2006, 2007 -# ZeptoOS License: GPL -# -# This software is free. See the file ZeptoOS/misc/license.GPL -# for complete details on your rights to copy, modify, and use this -# software. -# ZEPTOOS:zepto-gpl -#*************************************************************************** - -{ -global: - __zoid_register_functions; - __zoid_calling_process_id; - __zoid_register_userbuf; - __zoid_send_output; - __zoid_allocate_buffer; - __zoid_alloc; - __zoid_free; -}; diff --git a/3rdparty/zoid/daemon.2cores/zoid_mapping.c b/3rdparty/zoid/daemon.2cores/zoid_mapping.c deleted file mode 100644 index 2c8034ee9b324e0bce56a9a0fabcf2ab8489c967..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/daemon.2cores/zoid_mapping.c +++ /dev/null @@ -1,221 +0,0 @@ -/* Please note that this file is shared between ZOID and GLIBC! */ - -/****************************************************************************/ -/* ZEPTOOS:zepto-info */ -/* This file is part of ZeptoOS: The Small Linux for Big Computers. - * See www.mcs.anl.gov/zeptoos for more information. - */ -/* ZEPTOOS:zepto-info */ -/* */ -/* ZEPTOOS:zepto-fillin */ -/* $Id$ - * ZeptoOS_Version: 1.2 - * ZeptoOS_Heredity: FOSS_ORIG - * ZeptoOS_License: GPL - */ -/* ZEPTOOS:zepto-fillin */ -/* */ -/* ZEPTOOS:zepto-gpl */ -/* Copyright: Argonne National Laboratory, Department of Energy, - * and UChicago Argonne, LLC. 2004, 2005, 2006, 2007 - * ZeptoOS License: GPL - * - * This software is free. See the file ZeptoOS/misc/license.GPL - * for complete details on your rights to copy, modify, and use this - * software. - */ -/* ZEPTOOS:zepto-gpl */ -/****************************************************************************/ - -#include <ctype.h> -#include <stdio.h> -#include <string.h> - -#include <bglpersonality.h> - -struct zoid_mapping -{ - char coord; /* 'x', 'y', 'z', or 't' (or '\0', if not in use). */ - int size; -}; - -static struct zoid_mapping zoid_mapping[4]; -static int proc_count; -static int vn_mode; - -/* - * Initialize mapping between x/y/z/t coordinates and MPI rank. - * FIXME! We don't support mappings provided by the user to mpirun using - * -mapfile argument. - */ -int -__zoid_mapping_init(const char* mapping, int arg_proc_count, int arg_vn_mode, - const BGLPersonality* personality) -{ - int x_used = 0, y_used = 0, z_used = 0, t_used = 0; - int i; - - proc_count = arg_proc_count; - vn_mode = arg_vn_mode; - - /* We allow this to be larger, so that "XYZT" works in coprocessor - mode. */ - if (strlen(mapping) < 3 + vn_mode) - { - //fprintf(stderr, "Invalid BGLMPI_MAPPING\n"); - return 1; - } - - /* Fill in the "mapping" array in the order specified by the user. */ - for (i = 0; i < 3 + vn_mode && *mapping; i++) - { - switch (tolower(*mapping)) - { - case 'x': - zoid_mapping[i].size = personality->xSize; - x_used++; - break; - - case 'y': - zoid_mapping[i].size = personality->ySize; - y_used++; - break; - - case 'z': - zoid_mapping[i].size = personality->zSize; - z_used++; - break; - - case 't': - if (!vn_mode) - { - /* If we are not in VN mode, we should ignore T. - We don't flag it as an error, to support "TXYZ" in - coprocessor mode. - This is a bit of a hack... */ - mapping++; - i--; - continue; - } - zoid_mapping[i].size = 2; - t_used++; - break; - - default: - //fprintf(stderr, "Invalid BGLMPI_MAPPING\n"); - return 1; - } - - zoid_mapping[i].coord = tolower(*mapping++); - } - - if (x_used != 1 || y_used !=1 || z_used !=1 || - (vn_mode && t_used != 1)) - { - //fprintf(stderr, "Invalid BGLMPI_MAPPING\n"); - return 1; - } - - if (i == 3) - { - /* Make sure to mark unused as such (required if switching from VN - to CO mode). */ - zoid_mapping[3].coord = '\0'; - zoid_mapping[3].size = 0; - } - - return 0; -} - -int -__zoid_mapping_to_coord(unsigned mpi_rank, - unsigned *x, unsigned *y, unsigned *z, unsigned *t) -{ - int rank; - int i; - - if (mpi_rank < 0 || mpi_rank >= proc_count) - return -1; - - rank = 0; - for (i = 0; i < sizeof(zoid_mapping) / sizeof(zoid_mapping[0]) && - zoid_mapping[i].coord; i++) - { - int new_coord; - - new_coord = mpi_rank % zoid_mapping[i].size; - mpi_rank /= zoid_mapping[i].size; - - switch (zoid_mapping[i].coord) - { - case 'x': - *x = new_coord; - break; - case 'y': - *y = new_coord; - break; - case 'z': - *z = new_coord; - break; - case 't': - *t = new_coord; - break; - } - } - - if (!vn_mode) - *t = 0; - - return 0; -} - -int -__zoid_mapping_to_rank(unsigned x, unsigned y, unsigned z, unsigned t, - unsigned *mpi_rank) -{ - int rank; - int i, multiplier; - - if (!vn_mode && t != 0) - return -1; - - rank = 0; - multiplier = 1; - for (i = 0; i < sizeof(zoid_mapping) / sizeof(zoid_mapping[0]) && - zoid_mapping[i].coord; i++) - { - int new_coord; - - switch (zoid_mapping[i].coord) - { - case 'x': - new_coord = x; - break; - case 'y': - new_coord = y; - break; - case 'z': - new_coord = z; - break; - case 't': - new_coord = t; - break; - default: - /* This is just to shut down a compiler warning. */ - new_coord = 0; - } - - if (new_coord < 0 || new_coord >= zoid_mapping[i].size) - return -1; - - rank += new_coord * multiplier; - multiplier *= zoid_mapping[i].size; - - if (rank >= proc_count) - return -1; - } - - *mpi_rank = rank; - - return 0; -} diff --git a/3rdparty/zoid/include/bgl.h b/3rdparty/zoid/include/bgl.h deleted file mode 100644 index 7360b1a66b3f49c20a0277cb8dbb45203ca7eddb..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/include/bgl.h +++ /dev/null @@ -1,228 +0,0 @@ -/* Please note that this file is shared between ZOID and GLIBC! */ - -/****************************************************************************/ -/* ZEPTOOS:zepto-info */ -/* This file is part of ZeptoOS: The Small Linux for Big Computers. - * See www.mcs.anl.gov/zeptoos for more information. - */ -/* ZEPTOOS:zepto-info */ -/* */ -/* ZEPTOOS:zepto-fillin */ -/* $Id$ - * ZeptoOS_Version: 1.2 - * ZeptoOS_Heredity: FOSS_ORIG - * ZeptoOS_License: GPL - */ -/* ZEPTOOS:zepto-fillin */ -/* */ -/* ZEPTOOS:zepto-gpl */ -/* Copyright: Argonne National Laboratory, Department of Energy, - * and UChicago Argonne, LLC. 2004, 2005, 2006, 2007 - * ZeptoOS License: GPL - * - * This software is free. See the file ZeptoOS/misc/license.GPL - * for complete details on your rights to copy, modify, and use this - * software. - */ -/* ZEPTOOS:zepto-gpl */ -/****************************************************************************/ - -#ifndef BGL_H -#define BGL_H - -#include <inttypes.h> - -/* Alignment required by the double hammer. */ -#define QUAD_ALIGN __attribute__((aligned(16))) - -/* Double hammer's unit of operation. */ -typedef struct -{ - unsigned int w0; - unsigned int w1; - unsigned int w2; - unsigned int w3; -} -BGLQuad QUAD_ALIGN; - -#define TREE_PACKET_SIZE 256 -#define TREE_DATA_SIZE 240 -/* Rounds up size to the multiple of *data* size of the packet, i.e. 240 bytes. - Used to calculate sizes of buffers, because we always read complete packets - from the network. */ -#define TREE_BUFFER_ROUNDUP(s) (((s) + TREE_DATA_SIZE - 1) / TREE_DATA_SIZE * \ - TREE_DATA_SIZE) - -/* 4-byte hardware header sent along with the 256-byte packet on the - tree/collective network. */ - -/* Version for point-to-point communication. */ -struct BGLTreePacketP2PHardHeader -{ - unsigned pclass:4; - unsigned p2p:1; - unsigned irq:1; - unsigned p2paddr:24; - unsigned chksum:2; -}; - -/* Version for collective communication. */ -struct BGLTreePacketGlobalHeader -{ - unsigned pclass:4; - unsigned p2p:1; - unsigned irq:1; - unsigned opcode:3; - unsigned opsize:7; - unsigned tag:14; - unsigned chksum:2; -}; - -typedef union -{ - struct BGLTreePacketP2PHardHeader p2p; - struct BGLTreePacketGlobalHeader global; -} -BGLTreePacketHardHeader; - -/* Values of "opcode" field in BGLTreePacketGlobalHeader. */ -typedef enum -{ - BGLTreeCombineOp_NONE = 0, - BGLTreeCombineOp_OR = 1, - BGLTreeCombineOp_AND = 2, - BGLTreeCombineOp_XOR = 3, - BGLTreeCombineOp_MAX = 5, - BGLTreeCombineOp_ADD = 6 -} BGLTreeCombineOp; - -/* Routing class ("pclass" field of hardware headers) encompassing a single - pset (an I/O node and its 8-64 compute nodes). */ -#define PACKET_CLASS_CIO 0 - - -/* Tree/collective network hardware status register, for each virtual channel. - Allows to find out if there is a message to be received, and also if - sending a message is safe (don't send if injection counters are >= 8). */ -typedef struct -{ - unsigned injpktcnt:4; - unsigned injquadcnt:4; - unsigned dummy0:4; - unsigned injhdrcnt:4; - unsigned recpktcnt:4; - unsigned recquadcnt:4; - unsigned dummy1:3; - unsigned intheader:1; - unsigned rechdrcnt:4; -} BGLTreeStatusRegister; - -/* Structure used as the software header for packets exchanged between the - compute node processes and CIOD. */ -struct CioHeader -{ - uint8_t _cpu; /* 0 or 1 */ - uint8_t _rankInCnodes; /* obtained using BGLPersonality_rankInPset() */ - uint8_t _reserved; - uint8_t _dataSize; - uint16_t _treeAddress; - uint16_t _messageCode; /* MFC_ or MTC_ constants, e.g. MTC_KILL */ - uint32_t _packetTotal; - uint32_t _packetIndex; -}; - -#define PAD(x) char _pad[240 - (x)] - -#define MTC_ACK 0xffff - -/* A sample packet sent from CIOD to a compute node. */ -#define MTC_KILL 0xfff0 -struct MTC_Kill -{ - struct S_MTC_Kill - { - unsigned signum; - } s; - PAD(sizeof(struct S_MTC_Kill)); -} QUAD_ALIGN; - -#define MFC_REQUESTEXIT 4 -struct MFC_RequestExit -{ - struct S_MFC_RequestExit - { - enum Reason {EXITED = 0, SIGNALED = 1} reason; - int status; - } s; - PAD(sizeof(struct S_MFC_RequestExit)); -} QUAD_ALIGN; - -#define MTC_REPLYEXIT MFC_REQUESTEXIT -/* No meaningful data is passed in this one. */ - -#define MFC_REQUESTRESET 56 -/* No meaningful data is passed in this one. */ - -#define MTC_REPLYRESET MFC_REQUESTRESET -/* No meaningful data is passed in this one. */ - -#define MFC_REQUESTWRITECORE 57 -struct MFC_RequestWriteCore -{ - struct S_MFC_RequestWriteCore - { - int size; - unsigned int offset; - } s; - PAD(sizeof(struct S_MFC_RequestWriteCore)); -} QUAD_ALIGN; - -#define MFC_REPLYWRITECORE MFC_REQUESTWRITECORE -struct MTC_ReplyWriteCore -{ - struct S_MTC_ReplyWriteCore - { - int rc; - int errnum; - } s; - PAD(sizeof(struct S_MTC_ReplyWriteCore)); -} QUAD_ALIGN; - -/* Header sent with stdout/stderr from ciod to the service node. */ -struct CiodOutputHeader -{ - int fd; - int cpu; - int node; - int rank; - int len; -}; - -/* Prototypes of low-level communication functions from libdevices. */ -void BGLTreePacketHardHeader_InitP2P(BGLTreePacketHardHeader* header, - unsigned classroute, int irq, int dst); - -void BGLTreePacketHardHeader_InitGlobal(BGLTreePacketHardHeader* header, - unsigned classroute, int irq, - BGLTreeCombineOp opfunc, - int operandsize, int tag); - -void BGLTreeFIFO_send(BGLTreePacketHardHeader* hdfifo, BGLQuad* datafifo, - BGLTreePacketHardHeader* hheader, BGLQuad* data); - -void BGLTreeFIFO_sendH(BGLTreePacketHardHeader* hdfifo, BGLQuad* datafifo, - BGLTreePacketHardHeader* hheader, BGLQuad* softheader, - BGLQuad* data); - -void BGLTreeFIFO_recv(BGLTreePacketHardHeader* hdfifo, BGLQuad* datafifo, - BGLTreePacketHardHeader* hheader, BGLQuad* data); - -void BGLTreeFIFO_recvH(BGLTreePacketHardHeader* hdfifo, BGLQuad* datafifo, - BGLTreePacketHardHeader* hheader, BGLQuad* softheader, - BGLQuad* data); - -void BGLTreeFIFO_recvF(BGLTreePacketHardHeader* hdfifo, BGLQuad* datafifo, - BGLTreePacketHardHeader* hheader, BGLQuad* softheader, - BGLQuad* (*func)(void* priv, BGLQuad* softheader), - void* priv); -#endif diff --git a/3rdparty/zoid/include/bglco.h b/3rdparty/zoid/include/bglco.h deleted file mode 100644 index f4d76125251cad410eac6e9a158c1dff01cafecb..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/include/bglco.h +++ /dev/null @@ -1,136 +0,0 @@ -#ifndef __BGLCO_H__ -#define __BGLCO_H__ - -#define BGL_MEM_SRAM_SIZE (16*1024) -#define BGL_MEM_SRAM_PHYS 0xFFFFC000 - -#define BGL_MEM_LOCKBOX_SIZE 4096 -#define BGL_MEM_LOCKBOX_PHYS 0xD0000000 - -#define BARRIER_OFFSET(nr) (sizeof(struct BGL_Barrier) * nr) -#define MUTEX_OFFSET(nr) (sizeof(struct BGL_Mutex) * nr) - -#define L1_CACHE_LINE_SIZE 32 - - -void bglco_init(void); - - - -/* -------------------------------------------------------------------- */ -/* Lockbox functions */ -/* -------------------------------------------------------------------- */ - -/* A hardware lock used to build mutexes and barriers. This is a - * hardware-mapped data structure: do not add or remove fields. - */ -typedef struct BGL_Lock { - volatile unsigned _word0; /* mutex */ - volatile unsigned _word1; /* query */ - volatile unsigned _word2; /* barrier */ - volatile unsigned _word3; -} BGL_Lock; - - -/* A hardware lock used as a mutex. This is a hardware-mapped data - * structure: do not add or remove fields. Do not access these fields - * directly, use the BGL_Mutex_XXX() functions below. - */ -typedef struct BGL_Mutex { - BGL_Lock _lock[1]; -} BGL_Mutex; - - -/* A set of hardware locks used as a barrier. This is a - * hardware-mapped data structure: do not add or remove fields. Do not - * access these fields directly, use the BGL_Barrier_XXX() functions - * below. - */ -typedef struct BGL_Barrier { - BGL_Lock _lock[8]; -} BGL_Barrier; - - - - -/* Ensure that order-of-execution for memory and lockbox accesses is - * respected. - */ -static inline void BGL_Lock_Msync(void) { - asm volatile("sync" ::: "memory"); -} - - -/* -------------------------------------------------------------------- */ -/* Mutex operations */ -/* -------------------------------------------------------------------- */ - -/* Acquire a mutex, if it's not currently held by the other processor. - * Returned: 1=success, 0=failure Hardware side effect: atomically - * stores a 1 if success - */ -static inline unsigned BGL_Mutex_Try(BGL_Mutex *p) { - BGL_Lock_Msync(); - unsigned rc = p->_lock[0]._word0 == 0; - BGL_Lock_Msync(); - return rc; -} - -/* Acquire a mutex, spinning if necessary until the other processor - * releases it. Hardware side effect: atomically stores a 1 - */ -static inline void BGL_Mutex_Acquire(BGL_Mutex *p) { - BGL_Lock_Msync(); - while(p->_lock[0]._word0); - BGL_Lock_Msync(); -} - -/* Release a mutex. - * Hardware side effect: atomically stores a 0 - */ -static inline void BGL_Mutex_Release(BGL_Mutex *p) { - BGL_Lock_Msync(); - p->_lock[0]._word0 = 0; - BGL_Lock_Msync(); -} - -/* Is mutex currently locked? - * Returned: 1=yes 0=no - */ -static inline unsigned BGL_Mutex_Is_Locked(BGL_Mutex *p) { - return p->_lock[0]._word1; -} - - -/* -------------------------------------------------------------------- */ -/* Barrier operations */ -/* -------------------------------------------------------------------- */ - -/* Lower other processor's barrier, allowing him to pass through it. - */ -static inline void BGL_Barrier_Lower(BGL_Barrier *p) { - BGL_Lock_Msync(); - p->_lock[0]._word2 = 0; - BGL_Lock_Msync(); -} - -/* Is this processor currently blocked from passing through a barrier? - * Returned: 1=yes 0=no - */ -static inline unsigned BGL_Barrier_Is_Raised(BGL_Barrier *p) { - return p->_lock[0]._word2; -} - -/* Enter/leave a barrier. - */ -static inline void BGL_Barrier_Pass(BGL_Barrier *p) { - BGL_Lock_Msync(); - p->_lock[0]._word2 = 0; - while (p->_lock[0]._word2); - BGL_Lock_Msync(); -} - - -#endif /* __BGLCO_H__ */ - - diff --git a/3rdparty/zoid/include/zoid_api.h b/3rdparty/zoid/include/zoid_api.h deleted file mode 100644 index 92699b85fd059162ea5c1f23d190a9673c9f42b8..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/include/zoid_api.h +++ /dev/null @@ -1,78 +0,0 @@ -/* Please note that this file is shared between ZOID and GLIBC! */ - -/****************************************************************************/ -/* ZEPTOOS:zepto-info */ -/* This file is part of ZeptoOS: The Small Linux for Big Computers. - * See www.mcs.anl.gov/zeptoos for more information. - */ -/* ZEPTOOS:zepto-info */ -/* */ -/* ZEPTOOS:zepto-fillin */ -/* $Id$ - * ZeptoOS_Version: 1.2 - * ZeptoOS_Heredity: FOSS_ORIG - * ZeptoOS_License: GPL - */ -/* ZEPTOOS:zepto-fillin */ -/* */ -/* ZEPTOOS:zepto-gpl */ -/* Copyright: Argonne National Laboratory, Department of Energy, - * and UChicago Argonne, LLC. 2004, 2005, 2006, 2007 - * ZeptoOS License: GPL - * - * This software is free. See the file ZeptoOS/misc/license.GPL - * for complete details on your rights to copy, modify, and use this - * software. - */ -/* ZEPTOOS:zepto-gpl */ -/****************************************************************************/ - -#ifndef ZOID_API_H -#define ZOID_API_H - -#include <stdlib.h> - -struct dispatch_array -{ - void* function_ptr; - void* (*userbuf_allocate_cb)(int len); -}; - -#define __zoid_0c_in_buf(start, current) \ - ((start) - sizeof(int) + \ - ((current) - ((start) - sizeof(int)) + 239) / 240 * 240) - -#define __zoid_0c_buf_align(size) \ - (((size) + 239) / 240 * 240) - -#define __zoid_0c_out_buf_hdr1 sizeof(int) - -#define __zoid_0c_out_buf_hdr2 (2 * sizeof(int)) - -#define __zoid_0c_out_buf_ftr(_i, _cnt) ((_i) < (_cnt) - 1 ? sizeof(int) : 0) - -/* CNK-side calls. */ -int __zoid_submit_command(char* buffer, int cmd_len, int max_res_len, - int userbuf, const void* arr2d, const void* arr, - int arr_cnt, int arr_el_size, void* out_arr); -int __zoid_error(void); -int __zoid_excessive_size(void); - -/* Daemon-side calls. */ -void __zoid_register_functions(int header_id, - struct dispatch_array* dispatch_array, - int array_size, void (*init_func)(int), - void (*fini_func)(void)); -int __zoid_calling_process_id(void); -void __zoid_register_userbuf(void* userbuf, - void (*callback)(void* userbuf, void* priv), - void* priv); -int __zoid_send_output(int pid, int fd, const char* buffer, int len); - -/* Both CNK- and daemon-side calls. */ -void* __zoid_allocate_buffer(size_t size); -void __zoid_release_buffer(void* buffer); - -void *__zoid_alloc(size_t size); -void __zoid_free(void* addr); -#endif diff --git a/3rdparty/zoid/include/zoid_mapping.h b/3rdparty/zoid/include/zoid_mapping.h deleted file mode 100644 index 35161f7caf874a9dc87b9b315699c2f13ad3d7e0..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/include/zoid_mapping.h +++ /dev/null @@ -1,42 +0,0 @@ -/* Please note that this file is shared between ZOID and GLIBC! */ - -/****************************************************************************/ -/* ZEPTOOS:zepto-info */ -/* This file is part of ZeptoOS: The Small Linux for Big Computers. - * See www.mcs.anl.gov/zeptoos for more information. - */ -/* ZEPTOOS:zepto-info */ -/* */ -/* ZEPTOOS:zepto-fillin */ -/* $Id$ - * ZeptoOS_Version: 1.2 - * ZeptoOS_Heredity: FOSS_ORIG - * ZeptoOS_License: GPL - */ -/* ZEPTOOS:zepto-fillin */ -/* */ -/* ZEPTOOS:zepto-gpl */ -/* Copyright: Argonne National Laboratory, Department of Energy, - * and UChicago Argonne, LLC. 2004, 2005, 2006, 2007 - * ZeptoOS License: GPL - * - * This software is free. See the file ZeptoOS/misc/license.GPL - * for complete details on your rights to copy, modify, and use this - * software. - */ -/* ZEPTOOS:zepto-gpl */ -/****************************************************************************/ - -#ifndef ZOID_MAPPING_H -#define ZOID_MAPPING_H - -#ifdef __bglpersonality_h__ -int __zoid_mapping_init(const char* mapping, int arg_proc_count, - int arg_vn_mode, const BGLPersonality* personality); -#endif -int __zoid_mapping_to_coord(unsigned mpi_rank, unsigned *x, unsigned *y, - unsigned *z, unsigned *t); - -int __zoid_mapping_to_rank(unsigned x, unsigned y, unsigned z, unsigned t, - unsigned *mpi_rank); -#endif diff --git a/3rdparty/zoid/include/zoid_protocol.h b/3rdparty/zoid/include/zoid_protocol.h deleted file mode 100644 index 2b2df26d18870bde912b01dd9cfa24c1a7abe160..0000000000000000000000000000000000000000 --- a/3rdparty/zoid/include/zoid_protocol.h +++ /dev/null @@ -1,157 +0,0 @@ -/* Please note that this file is shared between ZOID and GLIBC! */ - -/****************************************************************************/ -/* ZEPTOOS:zepto-info */ -/* This file is part of ZeptoOS: The Small Linux for Big Computers. - * See www.mcs.anl.gov/zeptoos for more information. - */ -/* ZEPTOOS:zepto-info */ -/* */ -/* ZEPTOOS:zepto-fillin */ -/* $Id$ - * ZeptoOS_Version: 1.2 - * ZeptoOS_Heredity: FOSS_ORIG - * ZeptoOS_License: GPL - */ -/* ZEPTOOS:zepto-fillin */ -/* */ -/* ZEPTOOS:zepto-gpl */ -/* Copyright: Argonne National Laboratory, Department of Energy, - * and UChicago Argonne, LLC. 2004, 2005, 2006, 2007 - * ZeptoOS License: GPL - * - * This software is free. See the file ZeptoOS/misc/license.GPL - * for complete details on your rights to copy, modify, and use this - * software. - */ -/* ZEPTOOS:zepto-gpl */ -/****************************************************************************/ - -#ifndef ZOID_PROTOCOL_H -#define ZOID_PROTOCOL_H - -/* - Description of the ZOID protocol: - - The protocol over the tree network is based on packets of 256 bytes. - Each packet contains a 16-byte soft header of type "ZoidSoftHeader", leaving - 240 bytes for effective data. - - There are differences between commands and replies, and zero-copy support - introduces additional complications. - - Commands have a simpler structure than replies. Each command begins with - a mixed control-data packet. That packet starts with a 32-bit command id, - filled in by the automatically generated command stub. The ID is followed - by command-specific arguments (data). The arguments are unaligned. - - If there is a zero-copy input, it is passed in separate data packets, after - the main command-data packets. There is no control data for zero-copy input, - because all necessary information is provided in the preceding command-data. - Each chunk of zero-copy data (i.e. each line of 2-D array) begins at a new - packet, to ease the copying from user buffers. - The soft header "msg_length" field includes the input zero-copy data. - - Plain replies (those with no zero-copy results) are even simpler than - commands. They contain no command ID, only the data filled in by the - automatically generated command stub. - - Replies with zero-copy results have the most complex structure. They start - with a 32-bit integer specifying offset where the zero-copy data starts. - The offset is followed by standard stub-generated data. Zero-copy data - contains all control information necessary to fill in the provided target - buffer. The offset points at a 32-bit integer specifying the size along - the major dimension. If he array is 1-D, that size will be "-1". The size - is normally attached to the last reply-data packet, since it does not need - to be aligned. The major dimension size is followed by the size of the - first minor dimension (or simply the array size for 1-D arrays). That size - is always in bytes. User data lines are passed aligned to full packet - boundary, like with zero-copy input. However, each line, with the exception - of the last one, is followed by a 32-bit integer specifying the byte size of - the next line. For replies, the soft header "msg_length" field does *not* - include the zero-copy data; it only includes the two 32-bit integers - following the reply-data. - - Why are zero-copy replies so much different from zero-copy commands? - Because for commands, a single buffer can be allocated on the server for the - whole command, including the zero-copy data. For replies, on the other hand, - multiple buffers must be used on the client: one provided by the ZOID - infrastructure and one for user data. - - There is an optional support for rendezvous (for commands). If a command - packet contains the NEED_ACK_PACKET flag, the server must reply with - ACK_PACKET before the rest of the command is sent. This improves fairness, - as nodes closer to the I/O node in the collective topology lose that - advantage then. If message acknowledgements are enabled, the NEED_ACK flag - is normally set for the first packet of a long (larger than 8 packets) - command, and the server sends out the acknowledgements one at a time, so - that the nodes don't need to share the bandwidth. - - The "userbuf" flag introduces some comparatively minor changes to the - protocol. There are *no* changes for output userbuf. For input userbuf, - the order of packing of function arguments changes so that the argument - specifying the array length is always the first one, right after the - command id. Putting it at this predictable place makes it easy to obtain - the size of the buffer to be allocated by the callback. In addition to that, - the zero-copy part is not included in the softheader's msg_length field. - Also, input userbuf commands *always* require message acknowledgements. - Instead of NEED_ACK_PACKET, a separate INPUT_USERBUF_PACKET flag is used, - and it is set not on the first packet of a message, but on the last packet - of the non-userbuf portion of the message. - */ - -/* Structure of the initial messages sent from compute node processes to zoid - on VC1. These do not have a soft header. */ -struct InitMsg -{ - int pset_cpu_rank; - int p2p_addr; - int pid; - int pset_rank; - int cpu; - int vn_mode; - int total_proc; - char mapping[5]; -}; - -/* Structure sent back from ION to CNs. Again, no soft header. */ -struct InitMsgReply -{ - int max_buffer_size; - int ack_threshold; -}; - -/* The tree/collective network soft header. To preserve optimal alignment, - it needs to be 16 bytes. We might add more fields in the future, if we - add async/collective support. */ -struct ZoidSoftHeader -{ - uint32_t zoid_id; - uint16_t pset_cpu_rank; - uint16_t flags; - uint32_t msg_length; - uint32_t errnum; /* Used in replies from zoid ION daemon only. */ -} QUAD_ALIGN; - -/* Bits in "flags" above. */ -#define ZOID_SOFTHEADER_FIRST_PACKET (1 << 0) -#define ZOID_SOFTHEADER_LAST_PACKET (1 << 1) -/* Indicates a special one-packet emergency message that can occur even - in the middle of another message. */ -#define ZOID_SOFTHEADER_ASSERT_PACKET (1 << 2) -/* Indicates that the client will only send the rest of the message after - the server sends it an ACK_PACKET. */ -#define ZOID_SOFTHEADER_NEED_ACK_PACKET (1 << 3) -#define ZOID_SOFTHEADER_ACK_PACKET (1 << 4) -/* Sent in the middle of input userbuf message, indicates the last packet - of the non-userbuf part of the message. Implies NEED_ACK_PACKET. */ -#define ZOID_SOFTHEADER_INPUT_USERBUF_PACKET (1 << 5) - -/* Special-purpose command to inform the zoid daemon that a process is - about to terminate. */ -#define ZOID_TERMINATING_ID ~0 - -/* Unique ZOID identifier. */ -#define ZOID_ID ('Z' << 24 | 'O' << 16 | 'I' << 8 | 'D') - -#endif