Initial commit

This commit is contained in:
2025-03-31 13:25:04 +02:00
commit f6ec631d33
7 changed files with 1045 additions and 0 deletions

627
src/superflow.c Normal file
View File

@@ -0,0 +1,627 @@
// SPDX-FileCopyrightText: 2025 Ämin Baumeler <amin@indyfac.ch>
//
// SPDX-License-Identifier: GPL-3.0-or-later
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#define MAX_VERTICES 9 // Maximal number of vetices in the causal structure
#define MAX_ORDER 65535 // Maximal number of vetices (< 2^16) in the superflow.
#define MAX_SIZE 100000 // Maximal number of arcs in the superflow
#define MODE_OUTPUT 0x00 // Operation mode that prints the superflow
#define MODE_DECIDE \
0x01 // Operation mode that decide if all leafs of the supreflow are trivial
/**
* Search superflow for the same causal structure. If found, return its number.
* Otherwise, return zeor. u_int16_t
**/
u_int16_t
insuperflow(const u_int16_t sf_order, const u_int8_t sf_cs_order[MAX_ORDER],
const u_int8_t sf_cs_vertices[MAX_ORDER][MAX_VERTICES],
const u_int8_t sf_cs[MAX_ORDER][MAX_VERTICES * MAX_VERTICES],
const u_int8_t order2, const u_int8_t parents2labels[MAX_VERTICES],
const u_int8_t parents2variation[MAX_VERTICES * MAX_VERTICES]) {
for (u_int16_t i = 0; i < sf_order; i++) {
// Skip if the number of vertices differ
if (sf_cs_order[i] != order2)
continue;
// Skip if the vertex labels differ
int samelabels = 1;
for (int j = 0; j < order2; j++) {
if (sf_cs_vertices[i][j] != parents2labels[j]) {
samelabels = 0;
break;
}
}
if (!samelabels)
continue;
// Skip if the degress differ
int samedegrees = 1;
for (int j = 0; j < order2; j++) {
if (sf_cs[i][j * order2] != parents2variation[j * order2]) {
samedegrees = 0;
break;
}
}
if (!samedegrees)
continue;
// Skip if the arcs differ
int samearcs = 1;
for (int j = 0; j < order2; j++) {
const int degree = parents2variation[j * order2];
for (int k = 0; k < degree; k++) {
int parentfound = 0;
const u_int8_t parent = sf_cs[i][j * order2 + 1 + k];
for (int l = 0; l < degree; l++) {
const u_int8_t parent2 = parents2variation[j * order2 + 1 + l];
if (parent == parent2) {
parentfound = 1;
break;
}
}
if (!parentfound) {
samearcs = 0;
break;
}
}
if (!samearcs)
break;
}
if (!samearcs)
continue;
// Causal structure found
return i;
}
return 0;
}
/**
* Find set of arcs that may be influenced by removing a source. In other words,
* collect all arcs that point to any child of `source_vertex`.
*
* \param order: Order of causal structure
* \param parents: Graph data
* \param source_vertex: Source vertex we remove
* \param arcs: Pointer to array with all the arcs
*
* \return Cardinality of the set of manipulatable arcs.
**/
int arc_set(const u_int8_t order, const u_int8_t *parents,
const u_int8_t source_vertex, u_int16_t *arcs) {
int cnt = 0;
// First, we find vertices that have `source_vertex` as a parent, i.e., all
// children of `source_vertex`.
for (u_int8_t child = 0; child < order; child++) {
int relevant = 0;
if (child == source_vertex)
continue;
for (int i = 0; i < parents[child * order]; i++) {
const u_int8_t v = parents[child * order + 1 + i];
if (v == source_vertex) {
relevant = 1;
break;
}
}
if (!relevant)
continue;
// The vertex `child` is a child of `source_vertex`
// Second, collect all arcs that point to this child
for (int i = 0; i < parents[child * order]; i++) {
const u_int8_t v = parents[child * order + 1 + i];
if (v == source_vertex)
continue;
arcs[cnt++] = (child << 8 | v);
}
}
return cnt;
}
/**
* Find source vertices.
**/
int source_vertices(const u_int8_t order,
const u_int8_t parents[MAX_VERTICES * MAX_VERTICES],
u_int8_t sources[MAX_VERTICES]) {
int cnt = 0;
for (int i = 0; i < order; i++) {
if (parents[i * order])
continue;
sources[cnt++] = i;
}
return cnt;
}
/**
* Find nontrivial leafs with source.
*
* \param sf_order: Number of causal structures in the superflow.
* \param sf_cs_order: Pointer to array that contains the order of each causal
* structure in the superflow.
* \param sf_cs: Pointer to the digraphs (causal structures) in the
* superflow.
* \param sf_cs_isleaf: Pointer to characteristic array that stores a 1 if the
* causdal structures is a leaf, and a 0 otherwise.
* \param sf_ntlws: Pointer to array with indeces of the digraphs that are
* ntlws. This is the only datastructure that is modified.
* \param leafwosource: Pointer to integer that will be set to 1 if we find a
* source-free leaf.
* \return Number of ntlws.
**/
int nontrivial_leafs_with_source(
const u_int16_t sf_order, const u_int8_t *sf_cs_order,
const u_int8_t sf_cs[MAX_ORDER][MAX_VERTICES * MAX_VERTICES],
const u_int8_t *sf_cs_isleaf, u_int16_t *sf_ntlws, u_int8_t *leafwosource) {
int cnt = 0;
for (int i = 0; i < sf_order; i++) {
// Skip if it is not a leaf
if (!sf_cs_isleaf[i])
continue;
// Skip if it is trivial
if (sf_cs_order[i] == 1)
continue;
// Check source
const u_int8_t *parents = sf_cs[i];
int j;
for (j = 0; j < sf_cs_order[i]; j++) {
if (parents[j * sf_cs_order[i]] == 0) {
// We found a source
sf_ntlws[cnt] = i;
cnt++;
break;
}
}
if (j == sf_cs_order[i] && !(*leafwosource))
// There is a sourcefree leaf.
*leafwosource = 1;
}
return cnt;
}
/**
* Depth-first search for SOC test
**/
int dfs_isSOC(const u_int8_t order, const u_int8_t *parents,
const u_int8_t *useless, u_int8_t *path, const u_int8_t path_len,
u_int8_t *visited, u_int8_t *reached, u_int8_t *isdag) {
const u_int8_t vertex = path[path_len - 1]; // Current vertex
for (int i = 0; i < parents[vertex * order]; i++) {
const u_int8_t nextv = parents[vertex * order + 1 + i];
if (useless[nextv])
continue;
path[path_len] = nextv;
if (!visited[nextv]) {
visited[nextv] = 1;
reached[nextv] = 1;
if (!dfs_isSOC(order, parents, useless, path, path_len + 1, visited,
reached, isdag))
return 0;
visited[nextv] = 0;
} else {
// We found a cycle: `parent <- .... <- parent`
// Find siblings
if (*isdag)
*isdag = 0;
u_int8_t marked[order];
for (int j = 0; j < order; j++)
marked[j] = 0;
int siblingsfound = 0;
for (int j = path_len - 1; j == path_len - 1 || path[j + 1] != nextv;
j--) {
const u_int8_t u = path[j];
for (int k = 0; k < parents[u * order]; k++) {
const u_int8_t w = parents[u * order + 1 + k];
if (marked[w]) {
siblingsfound = 1;
break;
}
marked[w] = 1;
}
if (siblingsfound)
break;
}
if (!siblingsfound)
return 0;
}
}
return 1;
}
/**
* Decide whether digraph is a siblings-on-cycles graph.
*
* There are faster algorithms than what is implemented here. See, for instance,
* the networkx package for python, which contains an implementation of an
* algorithm that finds each chordless cycle exactly once. Their implementation
* is based on the paper **On enumerating chordless circuits in directed** by
* Dias, Castonguay, Longo, and Jradi. The implementation provided here is
* trivial. We traverse the digraph in depth-first fashion. The `path` array
* stores the path walked, and `path_len`, the number of steps done. The
* `visited` array marks every vertex on the path. In the walk, whenever we hit
* a vertex on the path, i.e., a marked vertex in `visited`, then we have found
* a directed cycle. Whenever we find a directed cycle, we check whether it has
* a pair of siblings. The `reached` array marks all vertices seen in the
* search. Once we complete a depth-first search from some starting vertex `v`,
* we mark all vertices reached as useless. Herefore, we use the array
* `useless`. While searching, whenever we hit a useless vertex, we can stop the
* search, and take a different path. For a little performance benefit, we
* initially mark all source vertices as useless. Note that the graph is given
* by the in-adjacencies, i.e., by the parents, of each vertex. So, the paths we
* walk are _backwards._
*
* \param order: Order of digraph
* \param parents: Pointer to array of size order x order.
* \param dag: Pointer to integer that will be set to 1 if the graph is a DAG,
* and to 0 if it conatins cycles.
* \return 1 if SOC, and 0 otherwise.
**/
int isSOC(const u_int8_t order, const u_int8_t *parents, u_int8_t *isdag) {
// Mark sinks as useless for cycle detection, and hence for SOC decision.
u_int8_t useless[order];
for (int i = 0; i < order; i++) {
if (parents[i * order] == 0)
useless[i] = 1;
else
useless[i] = 0;
}
u_int8_t reached[order];
for (int i = 0; i < order; i++)
reached[i] = 0;
u_int8_t path[order + 1]; // Store backpath
u_int8_t visited[order]; // Mark vertices on the path
*isdag = 1;
for (int i = 0; i < order; i++) {
if (useless[i])
continue;
// Preparing to walk
for (int j = 0; j < order; j++)
visited[j] = 0;
path[0] = i;
visited[i] = 1;
reached[i] = 1;
if (!dfs_isSOC(order, parents, useless, path, 1, visited, reached, isdag))
return 0;
// Copy reached vertices over to `useless` array. No reason to restart
// there, and whenever we will hit a previously reached vertex, we can also
// stop the search.
for (int i = 0; i < order; i++) {
if (reached[i])
useless[i] = 1;
}
}
return 1;
}
/**
* Main entry point of program.
*/
int main(int argc, char *argv[]) {
// Parse command-line arguments
u_int8_t mode = 0xFF;
if (argc == 2 && !strcmp(argv[1], "-o"))
mode = MODE_OUTPUT;
else if (argc == 2 && !strcmp(argv[1], "-d"))
mode = MODE_DECIDE;
if (mode == 0xFF) {
fprintf(
stderr,
"Usage: %s [-o | -d]\n"
"Reads adjacency vector of a simple digraph from stdin, and\n"
" -o [o]utputs the superflow to stdout,\n"
" -d [d]ecides if the superflow has nontrivial leafs.\n"
"\n"
"All adjacency vectors are encoded as follows. An entry 1 in\n"
"coordinate (u,v) denotes the presence of the arc u<-v. An entry 0 at\n"
"said coordinate, denotes the absence of the arc u<-v. The\n"
"coordinates are ordered lexicographically, with the entries (u,u)\n"
"missing.\n"
"\n"
"When using the -o option, the output as follows:\n"
"<adjacency vector of superflow digraph>\n"
"<vertex lables of causal structure 0>:<adjacency vector 0>\n"
"<vertex labels of causal structure 1>:<adjacency vector 1>\n"
"...\n"
"<vertex labels of causal structure k>:<adjacency vector k>\n"
"\n"
"When using the -d option, the output is informative. Additionally,\n"
"the return code is 0 if all leafs are trivial, and 1 otherwise.\n"
"\n"
"LIMITATION: The input graph has at most %d vertices.\n",
argv[0], MAX_VERTICES);
return -1;
}
// EO Parse command-line arguments
// Initialize superflow
u_int16_t sf_order; // Number of vertices in superflow. If we exceed
// MAX_ORDER, then we abort.
u_int32_t sf_size; // Number of arcs in superflow.
u_int32_t sf[MAX_SIZE]; // Storage of superflow. Each arc is a pair of
// unsigned 16bit integers (u,v) representeing an arc
// u -> v. If we exceed `MAX_SIZE`, then we abort.
u_int8_t sf_cs_order[MAX_ORDER]; // Stores the order of each [c]ausal
// [s]tructure in the superflow.
u_int8_t sf_cs_vertices[MAX_ORDER]
[MAX_VERTICES]; // List of vertex labls present in the
// respective causal structure.
u_int8_t
sf_cs[MAX_ORDER]
[MAX_VERTICES *
MAX_VERTICES]; // Each MAX_VERTICES * MAX_VERTICES block contains a
// causal structure in the superflow. The order of
// the causal structure is stored in `sf_cs_order`.
// The data is stored as follows:
// [ number of parents of vertex 0 | parent 0 | parent 1 | ... |
// number of parents of vertex 1 | parent 0 | parent 1 | ... |
// ...
// number of parents of vertex k-1 | parent 0 | parent 1 | ... ]
// where each "line" is of length 1+order.
u_int8_t sf_cs_isleaf[MAX_ORDER]; // Has a 1 if the causal structure is a
// leaf, and a zero otherwise.
for (int i = 0; i < MAX_ORDER; i++)
sf_cs_isleaf[i] = 0;
u_int16_t sf_num_ntlws; // Number of [n]on[t]rivial [l]eafs [w]ith [s]ource
u_int16_t sf_ntlws[MAX_ORDER]; // Stores the nontrivial-leafs-with-source's
// indeces of the superflow
// neighbors: [ number of neighbors of vertex 0 | neighbor | neighbor |
// ...
// | number of neighbors of vertex 1 | neighbor | neighbor | ...
// ]
// EO Initialize superflow
// Parse stdin
char adjvect[MAX_VERTICES * (MAX_VERTICES - 1)];
int tot = -1;
if ((tot = read(STDIN_FILENO, adjvect, MAX_VERTICES * (MAX_VERTICES - 1))) <
0) {
fprintf(stderr, "ERROR: Failed to read adjacency vector from stdin.\n");
return -1;
}
int n = -1; // Since the order is restrited to a low number, we can derive it
// without computing the squareroot that requires extra libraries.
for (int i = 1; i <= MAX_VERTICES; i++) {
if (i * (i - 1) == tot) {
n = i;
break;
}
}
if (n == -1) {
fprintf(stderr, "ERROR: Input is malformed.\n");
return -1;
}
for (int i = 0; i < MAX_VERTICES; i++)
sf_cs[0][i * n] = 0;
int u = 0;
int v = 1;
for (int i = 0; i < n * (n - 1); i++) {
// u <- v
// ASCII to integers: 48 = "0", 49 = "1"
if (adjvect[i] - 48) {
const int xx = ++(sf_cs[0][u * n]);
sf_cs[0][u * n + xx] = v;
}
if (++v == u)
v++;
if (v == n) {
u++;
v = 0;
}
}
// EO Parse stdin
// Update superflow
sf_cs_order[0] = n;
for (int i = 0; i < n; i++)
sf_cs_vertices[0][i] = i;
sf_order = 1;
sf_size = 0;
sf_cs_isleaf[0] = 1;
sf_num_ntlws = 0;
sf_ntlws[0] = 0;
u_int8_t sources[MAX_VERTICES]; // Array of source vertices in a causal
// structure to iterate over.
u_int16_t arcs[MAX_VERTICES *
MAX_VERTICES]; // Array of arcs to iterate over all subsets
u_int8_t order2; // Order of reduced causal structure
u_int8_t parents2labels[MAX_VERTICES]; // Labels of the vertices in the
// reduced causal structure
u_int8_t parents2[MAX_VERTICES * MAX_VERTICES]; // Graph data of reduced
// causal structure (clean)
u_int8_t parents2variation[MAX_VERTICES *
MAX_VERTICES]; // Graph data of reduced causal
// structure with arcs added
u_int8_t leafwosource = 0;
u_int8_t isdag;
while ((sf_num_ntlws = nontrivial_leafs_with_source(
sf_order, sf_cs_order, sf_cs, sf_cs_isleaf, sf_ntlws,
&leafwosource))) {
if (mode == MODE_DECIDE && leafwosource) {
printf("Found a source-free leaf.\n");
return 1;
}
for (int leafi = 0; leafi < sf_num_ntlws; leafi++) {
const u_int16_t leaf = sf_ntlws[leafi];
const u_int8_t order = sf_cs_order[leaf];
const u_int8_t *parents = sf_cs[leaf];
const int source_cnt = source_vertices(order, parents, sources);
for (int j = 0; j < source_cnt; j++) {
const u_int8_t source_vertex = sources[j];
// Remove `source_vertex` from graph
// Previous vertices: 0 | 1 | 2 | 3 | source=4 | 5 | ...
// New vertices: 0 | 1 | 2 | 3 | 4:=5 | ...
order2 = order - 1;
for (int i = 0; i < order2; i++)
parents2[i * order2] = 0;
// Prepare base causal structure without `source_vertex` and without any
// arc pointing towards any child of the `source_vertex`. These arcs are
// in `arcs`.
// Compute the set of arcs that may be influenced by the source
const int arc_cnt = arc_set(order, parents, source_vertex, arcs);
for (u_int8_t v = 0; v < order; v++) {
if (v == source_vertex)
continue;
for (int i = 0; i < parents[v * order]; i++) {
const u_int8_t w = parents[v * order + i + 1];
if (w == source_vertex)
continue;
// Check if it is in `arcs` or not
int in_arcs = 0;
for (int k = 0; k < arc_cnt; k++) {
u_int8_t a = (u_int8_t)(arcs[k] >> 8);
u_int8_t b = (u_int8_t)arcs[k];
if (a == v && b == w) {
in_arcs = 1;
break;
}
}
if (in_arcs)
continue;
//
const u_int8_t v_newlabel = v < source_vertex ? v : v - 1;
const u_int8_t w_newlabel = w < source_vertex ? w : w - 1;
parents2[v_newlabel * order2 + ++parents2[v_newlabel * order2]] =
w_newlabel;
}
}
// Fill in vertex names
for (int i = 0; i < order2; i++)
parents2labels[i] =
sf_cs_vertices[leaf][i >= source_vertex ? i + 1 : i];
// Iterate over all subsets
for (int mask = 0; mask < (1 << arc_cnt); mask++) {
// Copy base causal structure
memcpy(parents2variation, parents2, order2 * order2);
// Add arcs
for (int i = 0; i < arc_cnt; i++) {
// Skip arc if it is not selected by our mask
if (!(mask & (1 << i)))
continue;
// Parse
u_int8_t a = (u_int8_t)(arcs[i] >> 8);
u_int8_t b = (u_int8_t)arcs[i];
// Adjust label
if (a >= source_vertex)
a--;
if (b >= source_vertex)
b--;
// Add
parents2variation[a * order2 + ++parents2variation[a * order2]] = b;
}
// Skip if this is not a SOC
if (!isSOC(order2, parents2variation, &isdag))
continue;
// The node `leaf` is not a leaf anymore, but has the current graph as
// its child.
sf_cs_isleaf[leaf] = 0;
// If the graph is a DAG, then any downward-path leads to trivial
// leafs. So, in the mode `MODE_DECIDE`, we can safely ignore
// everything that follows from this graph.
if (mode == MODE_DECIDE && isdag)
continue;
// If this causal structure is already in the superflow, connect it.
// Otherwise, add it, connect it, and update leaf information.
const u_int16_t idx =
insuperflow(sf_order, sf_cs_order, sf_cs_vertices, sf_cs, order2,
parents2labels, parents2variation);
if (idx) {
sf[sf_size] = (leaf << 16) | idx;
sf_size++;
if (sf_size > MAX_SIZE) {
fprintf(stderr,
"ERROR: Our superflow exceeded the size limit %d, on "
"input %s\n",
MAX_SIZE, adjvect);
return -1;
}
} else {
sf_cs_order[sf_order] = order2;
memcpy(sf_cs_vertices[sf_order], parents2labels, order2);
memcpy(sf_cs[sf_order], parents2variation, order2 * order2);
sf_cs_isleaf[sf_order] = 1;
sf[sf_size] = (leaf << 16) | sf_order;
sf_size++;
if (sf_size > MAX_SIZE) {
fprintf(stderr,
"ERROR: Our superflow exceeded the size limit %d.\n",
MAX_SIZE);
return -1;
}
sf_order++;
if (sf_order > MAX_ORDER) {
fprintf(stderr,
"ERROR: Our superflow exceeded the order limit %d.\n",
MAX_ORDER);
return -1;
}
}
}
}
}
}
if (mode == MODE_DECIDE) {
if (leafwosource) {
printf("Found a source-free leaf.\n");
return 1;
}
printf("No source-free leaf found.\n");
return 0;
}
// Print results
// u_int8_t sf_adj[sf_order][sf_order];
u_int8_t(*sf_adj)[sf_order] = malloc(sizeof(u_int8_t[sf_order][sf_order]));
if (!sf_adj) {
fprintf(
stderr,
"ERROR: Failed to allocate sufficient memory adjacency matrix: %lu\n",
sizeof(u_int8_t[sf_order][sf_order]));
return -1;
}
for (u_int16_t i = 0; i < sf_order; i++)
for (u_int16_t j = 0; j < sf_order; j++)
sf_adj[i][j] = 0;
for (int i = 0; i < sf_size; i++) {
const u_int16_t a = (u_int16_t)sf[i];
const u_int16_t b = (u_int16_t)(sf[i] >> 16);
sf_adj[a][b] = 1;
}
// First, print superflow digraph
for (u_int16_t a = 0; a < sf_order; a++) {
for (u_int16_t b = 0; b < sf_order; b++) {
if (a == b)
continue;
printf("%d", sf_adj[a][b]);
}
}
free(sf_adj);
printf("\n");
// Next, print causal structures
u_int8_t adjmat[MAX_VERTICES][MAX_VERTICES];
for (int i = 0; i < sf_order; i++) {
const u_int8_t order = sf_cs_order[i];
const u_int8_t *cs = sf_cs[i];
// Populate adjacency matrix
for (u_int8_t v = 0; v < order; v++) {
for (u_int8_t w = 0; w < order; w++)
adjmat[v][w] = 0;
for (int j = 0; j < cs[v * order]; j++) {
const u_int8_t w = cs[v * order + 1 + j];
adjmat[v][w] = 1;
}
}
// Print vertex labels
for (int j = 0; j < order; j++)
printf("%d", sf_cs_vertices[i][j]);
printf(":");
// Print graph
for (int a = 0; a < order; a++) {
for (int b = 0; b < order; b++) {
if (a == b)
continue;
printf("%d", adjmat[a][b]);
}
}
printf("\n");
}
return 0;
}