// SPDX-FileCopyrightText: 2023 Ämin Baumeler and // Eleftherios-Ermis Tselentis // // SPDX-License-Identifier: GPL-3.0-or-later #include #include #include #include /*** * Print the graph's adjacency matrix ***/ void dumpgraph(int n, const int *children, const int *childrenlen) { int adj[n*n]; for (int i = 0; i < n*n; i++) adj[i] = 0; for (int a = 0; a < n; a++) { for (int bidx = 0; bidx < childrenlen[a]; bidx++) { const int b = children[a*n+bidx]; adj[a*n+b] = 1; } } // Print printf("{"); for (int a = 0; a < n; a++) { if (a) printf(","); printf("{"); printf("%d", adj[a*n+0]); for (int b = 1; b < n; b++) printf(",%d", adj[a*n+b]); printf("}"); } printf("}\n"); } /*** * Print the graph as graphviz command ***/ void dumpgv(int n, int graphnr, const int *children, const int *childrenlen) { printf("strict digraph G%d {node[shape=point];", graphnr); for (int v = 0; v < n; v++) { for (int i = 0; i < childrenlen[v]; i++) { const int w = children[v*n+i]; printf("G%dN%d -> G%dN%d", graphnr, v, graphnr, w); if (childrenlen[v] >= 2) printf(" [color=\"red\"]"); printf(";"); } } printf("}\n"); return; } /*** * Block size of our datastructure. ***/ int blocksize(int n) { int res = (n+1)*n; for (int i = 0; i < n; i++) res *= n-i; return res; } /*** * Recrusive bi-directional graph traversal, used to check connectivity. ***/ void traverse(int vertex, int *visited, int n, const int *parents, \ const int *parentslen, const int *children, const int *childrenlen) { visited[vertex] = 1; for (int i = 0; i < parentslen[vertex]; i++) { int u = parents[n*vertex + i]; if (visited[u] == 0) traverse(u, visited, n, parents, parentslen, children, childrenlen); } for (int i = 0; i < childrenlen[vertex]; i++) { int u = children[n*vertex + i]; if (visited[u] == 0) traverse(u, visited, n, parents, parentslen, children, childrenlen); } } /*** * Return 1 if the graph is connected, and 0 otherwise. ***/ int isconnected(int n, const int *parents, const int *parentslen, \ const int *children, const int *childrenlen) { // Early check: Is there a node with 0 total degree? for (int i = 0; i < n; i++) if (parentslen[i]+childrenlen[i] == 0) return 0; // Check connectivity int visited[n]; for (int i = 0; i < n; i++) visited[i] = 0; // Start traversal traverse(0, visited, n, parents, parentslen, children, childrenlen); for (int i = 0; i < n; i++) if (!visited[i]) return 0; return 1; } /*** * Interpret graph number `graph' as a simple digraph, and populate the arrays `parents', `parentslen', `children', `childrenlen' * The number `graph' is understood as the adjacency matrix with the diagonal removed (no self loops). ***/ void graphnrtolists(int n, unsigned long long int graph, int *parents, \ int *parentslen, int *children, int *childrenlen) { for (int i = 0; i < n; i++) { parentslen[i] = 0; childrenlen[i] = 0; } int row = 0; int col = 1; // We cannot start at (0,0); it does not exist unsigned long long g = graph; for (int i = 0; i < n*(n-1); i++) { int val = g%2; g = g/2; if (val) { children[row*n+childrenlen[row]] = col; childrenlen[row]++; parents[col*n+parentslen[col]] = row; parentslen[col]++; } col++; if (col == row) col++; if (col == n) { col = 0; row++; } } } /*** * Find the tail in `path' of length `pathlen' that includes a cycle, and save the canonical representation. * The canonical representation is the listing of the nodes of the cycle starting from the smallest one. ***/ void canonical_cycle(int n, int *cycle, int *cyclelen, const int *path, \ int pathlen) { int startidx; int pathhascycle = 0; const int last = path[pathlen-1]; // This is the last node for (int k = 0; k < pathlen-1; k++) { if (path[k] == last) { // We have found the first occurance of node `last' on the path // The cycle is the following part of path: path[startidx], // path[startidx+1], ..., path[pathlen-2], and therefore has // length pathlen-startidx-1 startidx = k; (*cyclelen) = pathlen-startidx-1; pathhascycle = 1; break; } } if (!pathhascycle) { // Sanity check. This should NEVER happen! fprintf(stderr, "ERROR: You provided a path to `canonical_cycle' that does NOT contain a cycle as its tail! I'm returning now.\n"); return; } // Generate canonical representation // First, find smallest node on cycle int smallestidx = -1; int smallest = n; for (int k = startidx; k < pathlen-1; k++) { if (smallest > path[k]) { smallest = path[k]; smallestidx = k; } } // Now, copy cycle to `cycle' starting from the smallest // This is the index on where to write on the array `cycle' int j = 0; for (int i = smallestidx;; i++) { // Stop filling in, if we reached the end if (j == *cyclelen) break; // If we reach the end of the path, jump to the start if (i == pathlen-1) i = startidx; // Write at position `j' the element from the cycle on the path at // position `i`' cycle[j] = path[i]; // Increase index to write on `cycle' j++; } } /*** * Return 1 if the cycle is not in `cycles', otherwise 0. ***/ int cycleisnew(int n, const int *c, int clen, const int *cycles, \ const int *cyclescnt) { const int cnt = cyclescnt[clen]; const int bs = blocksize(n); // Iterate over all cycles of the same length for (int i = 0; i < cnt; i++) { int j; const int startidx = clen*bs + i*clen; // We start here for (j = 0; j < clen; j++) // Go position by position if (cycles[startidx+j] != c[j]) break; // They differ at position j if (j == clen) // We didn't break return 0; } return 1; } /*** * Save the given cycle `c' of length `clen' in `cycles'. ***/ void savecycle(int n, const int *c, int clen, int *cycles, int *cyclescnt) { const int m = cyclescnt[clen]; // There are `m' `clen'-cycles const int bs = blocksize(n); const int startidx = clen*bs + m*clen; // We start writing here for (int i = 0; i < clen; i++) cycles[startidx+i] = c[i]; cyclescnt[clen]++; // Increase the counter that holds // the number of `clen'-cycles } /*** * Recursive depth-first-search to find cycles in the graph specified by `children' and `childrenlen'. * The cycles are stored in `cycles' and `cyclescnt'. * Parameter `useless': n-array where useless nodes are marked * Parameter `path': Array of nodes that represent a path on the graph * Parameter `pathlen': Lenght of `path' * Parameter `visited': n-array to mark visited nodes * Parameter `n': Number of nodes * Parameter `found': Pointer to integer to count the number of cycles found ***/ void dfs(const int *useless, int *path, int pathlen, int *visited, int n, \ const int *children, const int *childrenlen, int *cycles, int *cyclescnt, \ unsigned int *found) { int cycle[n]; // Temporary store for cycle int cyclelen; // Length of temporarilly stored cycle // We go on duing a dfs from node `vertex' const int vertex = path[pathlen-1]; for (int i = 0; i < childrenlen[vertex]; i++) { const int u = children[n*vertex + i]; // Enter node `u' if (useless[u]) continue; // This one is useless if (visited[u] == 0) { // Not visited yet visited[u] = 1; // Mark as visited path[pathlen] = u; // Append to path, and re-enter // recursion... dfs(useless, path, pathlen+1, visited, n, children, childrenlen, cycles, \ cyclescnt, found); visited[u] = 0; // Undo marking, we are going to // take another route } else { // We have reached a node on the path, i.e., we have found a cycle! // The cycle is somewhere along the path // path[0], path[1], ... path[pathlen-1] // More specifically, node `u' is somewhere there. // To detect this, we first place `u' on the path. // The method `canonical_cycle' will then use this information to find // and return the canonical form of the cycle. path[pathlen] = u; // Find cycle and generate the canonical representation. canonical_cycle(n, cycle, &cyclelen, path, pathlen+1); // Is it new? If yes, save if (cycleisnew(n, cycle, cyclelen, cycles, cyclescnt)) { // Increase the number of found cycles by 1 (*found)++; // Save cycle savecycle(n, cycle, cyclelen, cycles, cyclescnt); } // else dont do anything... } } return; } /*** * Find cycles in the `n'-nodes graph specified by the arrays `children' and `childrenlen', and return the number of cycles found. * The cycles are stored in `cycles' and `cyclescnt'. * This uses a recursive depth-first-search. ***/ int find_cycles(int *cycles, int *cyclescnt, int n, const int *children, \ const int *childrenlen) { int visited[n]; // Some nodes are useless, mark them as such int useless[n]; // We use n+1 here because we will "close" the path to form a cycle, // i.e., one node will be repeated. int path[n+1]; unsigned int found = 0; // Store total number of cycles found // Initialize cyclescnt to zero for (int i = 0; i < n+1; i++) cyclescnt[i] = 0; // Mark nodes without children as useless for (int i = 0; i < n; i++) { useless[i] = (childrenlen[i] == 0) ? 1 : 0; } // Iterate over nodes for (int vertex = 0; vertex < n; vertex++) { if (useless[vertex]) continue; for (int i = 0; i < n; i++) visited[i] = 0; // Mark all as univisited // We start at `vertex' path[0] = vertex; // Mark `vertex' as visited. If we revisit a node, we found a cycle. visited[vertex] = 1; // Enter depth-first-search recursion dfs(useless, path, 1, visited, n, children, childrenlen, cycles, \ cyclescnt, &found); // We processed this. Every cycle that goes through `vertex' has been found. useless[vertex] = 1; } return found; } /*** * gissoc tests wheter the graph provided is a SOC or not. * Parameter `n': Number of nodes * Parameter `parents': Pointer to list of parents per node * Parameter `parentslen': Pointer to list of number of parents per node * Parameter `children': Pointer to list of children per node * Parameter `childrenlen': Pointer to list of number of children per node * Parameter `cycles': Pointer to list of cycles in the graph * Parameter `cyclescnt': Pointer to list of cycles count * * This function retruns 0 if the graph is NOT a SOC, and 1 otherwise. * * A SOC (Siblings-on-Cycles) is a simple digraph where every cycle has siblings, i.e., nodes with common parents. ***/ int gissoc(int n, const int *parents, const int *parentslen, \ const int *children, const int *childrenlen, int *cycles, int *cyclescnt) { // We store the cycles in groups of the cycle lenghts (length of cycle is the // number of edges, or equivalentelly, the number of distinct nodes) // Since the graph is a simple digraph, the smallest cycles have length 2. // cycles[0], cycles[1] is the first cycle of length 2 // cycles[2], cycles[3] is the second cycle of length 2 // etc. // There are at most (n choose 2) cycles of length 2. // There are at most (n choose n/2) cycles of length n/2. This is upper // bounded by 2^n/sqrt(pi*n/2) < 2^n; this is the largest binomial // coefficient. // Since generally n will be small, we just use this bound for all groups. // So, the cycles with length k are saved at and after cycles[k*(2^n)]. // Each block has size 2^n. /* Early and saftey check for self loops. SOCs dont have self-loops, and we * should never generate them. Anyhow, test */ for (int i = 0; i < n; i++) { int num = childrenlen[i]; for (int cidx = 0; cidx < num; cidx++) if (i == children[n*i+cidx]) { fprintf(stderr, "ERROR: THIS GRAPH HAS A SELF LOOP (node %d)\n", i); return 0; } } // Check SOC property const int bs = blocksize(n); // Iterate over cycle length; min length is 2, max length is n // then iterate over cycle of length `len' // then iterate over nodes of that cycle. // In that iteration process, mark all parents. // If a marked parent is marked again, we have found a common parent. for (int len = 2; len < n+1; len++) { // Number of cycles of length `len' const int cnt = cyclescnt[len]; // Iterate over cycles for (int i = 0; i < cnt; i++) { // The cycle we look at (length=`len', index=`i') is saved starting from // index `startidx' const int startidx = len*bs+len*i; // Initialize marks int marked[n]; for (int k = 0; k < n; k++) marked[k] = 0; // Flag: Have we found a common parent? int commonparentfound = 0; // Iterate over cycle for (int k = 0; k < len; k++) { const int vertex = cycles[startidx+k]; for (int pidx = 0; pidx < parentslen[vertex]; pidx++) { const int parent = parents[n*vertex+pidx]; // Check if `parent' is a common parent, else mark if (marked[parent]) commonparentfound = 1; else marked[parent] = 1; // We have found a common parent, break out of the `pidx' loop if (commonparentfound) break; } // We have found a common parent, break out of the for loop over `k' if (commonparentfound) break; } // On cycle `i' we did NOT find a common parent, this is not a SOC if (!commonparentfound) return 0; } } // If all test pass, we have found a SOC return 1; } /*** * Number of grandparents, and number of grandchildren ***/ int grandparentslen(int node, const int *parents, const int *parentslen) { int tot = 0; for (int i = 0; i < parentslen[node]; i++) tot += parentslen[i]; return tot; } int grandchildrenlen(int node, const int *children, const int *childrenlen) { int tot = 0; for (int i = 0; i < childrenlen[node]; i++) tot += childrenlen[i]; return tot; } /*** * Speed-up by ignoring all graphs that do not satisfy some ordering of out- and in-degrees. * If the graph does not satisfy this `canonical' form, then another ismorphic to it (it's simply a permutation of the nodes) * will be degree-ordered. ***/ int isdegreeordered(int n, const int *parents, const int *parentslen, \ const int *children, const int *childrenlen) { // Decreasingling order by in-degree, then out-degree, then ... for (int i = 0; i < n-1; i++) { if (parentslen[i] > parentslen[i+1]) { return 0; } else if (parentslen[i] == parentslen[i+1]) { if (childrenlen[i] > childrenlen[i+1]) { return 0; } else if (childrenlen[i] == childrenlen[i+1]) { if (grandparentslen(i, parents, parentslen) > \ grandparentslen(i+1, parents, parentslen)) { return 0; } else if (grandparentslen(i, parents, parentslen) == \ grandparentslen(i+1, parents, parentslen)) { if (grandchildrenlen(i, children, childrenlen) > \ grandchildrenlen(i+1, children, childrenlen)) { return 0; } } } } } return 1; } /* Random nubers */ #if RAND_MAX/256 >= 0xFFFFFFFFFFFFFF #define LOOP_COUNT 1 #elif RAND_MAX/256 >= 0xFFFFFF #define LOOP_COUNT 2 #elif RAND_MAX/256 >= 0x3FFFF #define LOOP_COUNT 3 #elif RAND_MAX/256 >= 0x1FF #define LOOP_COUNT 4 #else #define LOOP_COUNT 5 #endif u_int64_t rand_uint64(unsigned long long max) { u_int64_t r = 0; for (int i=LOOP_COUNT; i > 0; i--) { r = r*(RAND_MAX + (u_int64_t)1) + rand(); } return r%max; } /* End of random numbers */ int main(int argc, char *argv[]) { // Parse command-line arguments int n = -1; int NONDAGONLY = 0; int NOSINK = 0; int NOSOURCE = 0; int RANDOM = 0; int GRAPHVIZ = 0; int UNKOPTION = 0; int ALL = 0; for (int i = 1; i < argc; i++) { if (strcmp(argv[i], "-n") == 0 && i+1 < argc) { n = atoi(argv[i+1]); i++; } else if (strcmp(argv[i], "-r") == 0 && i+1 < argc) { RANDOM = atoi(argv[i+1]); i++; } else if (strcmp(argv[i], "-c") == 0) { NONDAGONLY = 1; } else if (strcmp(argv[i], "--no-sink") == 0) { NOSINK = 1; } else if (strcmp(argv[i], "--no-source") == 0) { NOSOURCE = 1; } else if (strcmp(argv[i], "--graphviz") == 0) { GRAPHVIZ = 1; } else if (strcmp(argv[i], "--all") == 0) { ALL = 1; } else { UNKOPTION = 1; break; } } if (UNKOPTION || n <= 1) { fprintf(stderr, "Usage: %s -n [-r ] [--graphviz] [FILTER ...]\n", argv[0]); fprintf(stderr, " -n Generate SOCs with `order' connected nodes\n"); fprintf(stderr, " -r Pick directed graphs at random, and exit after having found `num' SOCs\n"); fprintf(stderr, " --graphviz Output SOCs in Graphviz format, arcs of common parents are highlighted\n"); fprintf(stderr, " --all Allow for disconnected SOCs and disable the degree-order filter (see below)\n"); fprintf(stderr, "\n"); fprintf(stderr, "[FILTER] Consider only simple directed graphs ...\n"); fprintf(stderr, " -c ... that are cyclic (i.e., not DAGs)\n"); fprintf(stderr, " --no-sink ... without sink nodes (this logically implies -c)\n"); fprintf(stderr, " --no-source ... without source nodes (also this logically implies -c)\n"); fprintf(stderr, "\n"); fprintf(stderr, "This program prints the found SOCs as adjacency matrices to stdout, unless --graphviz has been specified.\n"); fprintf(stderr, "To exclude (some) of the isomorphic SOCs, it uses a degree-order filter, unless --all is specified.\n"); return -1; } // EO Parse command-line arguments // Setup datastructures int parents[n*n]; // Each node i can have at most n-1 parents. These are listed at parents[n*i+k] for 0<=k 64) { fprintf(stderr, "Too many graphs to enumarate with an unsiged long long integer (number of node pairs = %d)\n", \ m); return -1; } unsigned long long max = 1L << m; // Largest `graphnumber' const int padlen = (int)((float)m/3.322)+1; // Convert log 2 to log 10 int len = 0; time_t t0 = time(NULL); time_t t = time(NULL); srand((unsigned) time(&t)); // EO Initiate graph enumeration fprintf(stderr, "Generating SOCs with %d nodes\n", n); if (RANDOM) fprintf(stderr, " Picking graphs at random\n"); if (NONDAGONLY) fprintf(stderr, " Filter: Omitting DAGs\n"); if (NOSINK) fprintf(stderr, " Filter: Omitting graphs with sink nodes\n"); if (NOSOURCE) fprintf(stderr, " Filter: Omitting graphs with source nodes\n"); // We will always omit the graph with `graphnumber' 0; // it is unintersting anyway unsigned long long graphnumber = 0; unsigned long long graphschecked = 0; for (; graphnumber < max && (!RANDOM || len < RANDOM);) { const time_t now = time(NULL); // Print status every second if (now > t) { t = now; const int deltat = now - t0; const float rate = (float)graphschecked/(float)deltat; const float SOCrate = (float)len/(float)deltat; const float percentage = (RANDOM == 0) ? \ 100*(float)graphnumber/(float)max : 100*(float)len/(float)RANDOM; const float ETC = (RANDOM == 0) ? \ ((float)(max - graphschecked)/(rate))/3600 : \ ((float)(RANDOM - len)/(SOCrate))/3600; fprintf(stderr, "\r%6.2f%% %*llu/%llu (%i SOCs found, %d seconds, %*.2f graphs/s, %*.2f SOCs/s, %*.2fh estimated time left)", \ percentage, padlen, graphnumber, max, len, deltat, padlen+3, rate, \ padlen+3, SOCrate, padlen, ETC); fflush(stderr); } // Convert graph index `graphnumber' to the lists parents, children, // parentslen, childrenlen graphnrtolists(n, graphnumber, parents, parentslen, children, childrenlen); // Increase checked counter and prepare for next iteration graphschecked++; if (RANDOM) graphnumber = rand_uint64(max); else graphnumber++; // EO Increase checked counter and prepare for next iteration // If enabled, compare against degree options if (NOSINK || NOSOURCE) { // Get some degree properties int minindegree = n; int minoutdegree = n; for (int node = 0; node < n; node++) { if (minindegree > parentslen[node]) minindegree = parentslen[node]; if (minoutdegree > childrenlen[node]) minoutdegree = childrenlen[node]; } if (NOSINK && minoutdegree == 0) continue; if (NOSOURCE && minindegree == 0) continue; } // Check whether this graph is canonical or not; continue if graph has not // proper ordering of degress (there is an isomorphic graphs to this one // that has been or will be checked) if (!ALL && !isdegreeordered(n, parents, parentslen, children, childrenlen)) continue; // Ignore graphs that are not connected if (!ALL && !isconnected(n, parents, parentslen, children, childrenlen)) continue; // Find cycles const int num_cycles = find_cycles(cycles, cyclescnt, n, children, \ childrenlen); // If enabled, ignore DAGs if (NONDAGONLY && num_cycles == 0) continue; // Test the Siblings-On-Cycles property // A DAG is trivially a SOC if (num_cycles > 0 && !gissoc(n, parents, parentslen, children, \ childrenlen, cycles, cyclescnt)) continue; // We have found a SOC len++; if (GRAPHVIZ) dumpgv(n, graphnumber, children, childrenlen); else dumpgraph(n, children, childrenlen); } const int deltat = time(NULL) - t0; const float rate = (deltat == 0) ? graphschecked : \ (float)graphschecked/(float)deltat; const float SOCrate = (deltat == 0) ? len : (float)len/(float)deltat; fprintf(stderr, "\r%6.2f%% %*llu/%llu (%i SOCs found, %d seconds, %*.2f graphs/s, %*.2f SOCs/s, %*.2fh estimated time left)", \ 100.00, padlen, graphnumber, max, len, deltat, padlen+3, rate, padlen+3, \ SOCrate, padlen, 0.00); fprintf(stderr, "\nFound %d SOCs in %d seconds\n", len, deltat); // Free manually allocated memory free(cycles); return 0; }