Skip to content

Commit

Permalink
Polarisation update; added greedy algo; removed modularity/EI
Browse files Browse the repository at this point in the history
  • Loading branch information
leifeld committed Jan 11, 2025
1 parent d9ef035 commit f0bda2f
Show file tree
Hide file tree
Showing 13 changed files with 694 additions and 492 deletions.
2 changes: 1 addition & 1 deletion dna/src/main/java/dna/Dna.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public class Dna {
public static Dna dna;
public static Logger logger;
public static Sql sql;
public static final String date = "2025-01-10";
public static final String date = "2025-01-11";
public static final String version = "3.0.11.4";
public static final String operatingSystem = System.getProperty("os.name");
public static File workingDirectory = null;
Expand Down
34 changes: 16 additions & 18 deletions dna/src/main/java/dna/HeadlessDna.java
Original file line number Diff line number Diff line change
Expand Up @@ -623,23 +623,23 @@ public void rTimeWindow(String networkType, String statementType, String variabl
* @param invertSources boolean indicating whether the document-level source values should be included (= {@code true}) rather than excluded.
* @param invertSections boolean indicating whether the document-level section values should be included (= {@code true}) rather than excluded.
* @param invertTypes boolean indicating whether the document-level type values should be included (= {@code true}) rather than excluded.
* @param k The number of clusters or factions, for example 2 for bi-polarisation.
* @param numParents The number of cluster solutions (i.e., parents) to generate in each iteration, for example 30 or 50.
* @param iterations For how many generations should the genetic algorithm run at most? This is the maximal number of generations through which optimisation should be attempted. Will be lower if early convergence is detected. A suggested starting value is 1000.
* @param elitePercentage The share of cluster solutions in each parent generation that is copied into the children generation without changes, between 0.0 and 1.0, usually around 0.1.
* @param mutationPercentage The probability with which each bit in any cluster solution is selected for mutation after the cross-over step. For example 0.1 to select 10% of the nodes to swap their memberships.
* @param qualityFunction The quality function to evaluate cluster solutions. Supported values are "modularity", "eiIndex", and "absdiff".
* @param normaliseMatrices boolean indicating whether the network matrices should be normalised before computing the polarisation score.
* @param randomSeed The random seed to use for the random number generator. Pass 0 for random behaviour.
* @return A PolarisationResultTimeSeries object containing the results of the genetic algorithm for each time step and iteration.
* @param algorithm The algorithm to maximise polarisation at each time step. Can be "greedy" (for a greedy algorithm) or "genetic" (for a genetic algorithm).
* @param normaliseScores boolean indicating whether the polarisation scores should be normalised by dividing them by their theoretical maximum within a given network. This takes away the effect of more activity (possibly due to participation by more actors or more statements per actor) contributing to polarisation scores and focuses solely on structure given the edge mass in the network. Without normalisation, time periods with more actors and activity will elevate the polarisation of the network (at constant levels of being divided over concepts).
* @param numClusters The number of clusters or factions k, for example 2 for bi-polarisation.
* @param numParents Only for the genetic algorithm: The number of cluster solutions (i.e., parents) to generate in each iteration, for example 30 or 50.
* @param numterations Only for the genetic algorithm: For how many generations should the genetic algorithm run at most? This is the maximal number of generations through which optimisation should be attempted. Will be lower if early convergence is detected. A suggested starting value is 1000.
* @param elitePercentage Only for the genetic algorithm: The share of cluster solutions in each parent generation that is copied into the children generation without changes, between 0.0 and 1.0, usually around 0.1.
* @param mutationPercentage Only for the genetic algorithm: The probability with which each bit in any cluster solution is selected for mutation after the cross-over step. For example 0.1 to select 10% of the nodes to swap their memberships.
* @param randomSeed Only for the genetic algorithm: The random seed to use for the random number generator. Pass 0 for random behaviour.
* @return A PolarisationResultTimeSeries object containing the results of the algorithm for each time step and iteration.
*/
public PolarisationResultTimeSeries rPolarisation(String statementType, String variable1, boolean variable1Document,
String variable2, boolean variable2Document, String qualifier, String duplicates, String startDate,
String stopDate, String timeWindow, int windowSize, String kernel, boolean indentTime,
String[] excludeVariables, String[] excludeValues, String[] excludeAuthors, String[] excludeSources,
String[] excludeSections, String[] excludeTypes, boolean invertValues, boolean invertAuthors,
boolean invertSources, boolean invertSections, boolean invertTypes, int k, int numParents, int iterations,
double elitePercentage, double mutationPercentage, String qualityFunction, boolean normaliseMatrices,
boolean invertSources, boolean invertSections, boolean invertTypes, String algorithm, boolean normaliseScores,
int numClusters, int numParents, int numIterations, double elitePercentage, double mutationPercentage,
long randomSeed) {

// step 1: preprocess arguments
Expand Down Expand Up @@ -667,13 +667,11 @@ public PolarisationResultTimeSeries rPolarisation(String statementType, String v
}
}

Polarisation polarisation = new Polarisation(st, variable1, variable1Document, variable2,
variable2Document, qualifier, false, duplicates,
ldtStart, ldtStop, timeWindow, windowSize, map, excludeAuthors, excludeSources,
excludeSections, excludeTypes, invertValues, invertAuthors, invertSources,
invertSections, invertTypes, kernel, indentTime, k, numParents,
iterations, elitePercentage, mutationPercentage, qualityFunction, normaliseMatrices,
randomSeed);
Polarisation polarisation = new Polarisation(st, variable1, variable1Document, variable2, variable2Document,
qualifier, false, duplicates, ldtStart, ldtStop, timeWindow, windowSize, kernel, indentTime,
map, excludeAuthors, excludeSources, excludeSections, excludeTypes, invertValues, invertAuthors,
invertSources, invertSections, invertTypes, algorithm, normaliseScores, numClusters, numParents,
numIterations, elitePercentage, mutationPercentage, randomSeed);

return polarisation.getResults();
}
Expand Down
2 changes: 0 additions & 2 deletions dna/src/main/java/dna/export/Exporter.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import model.*;
import org.apache.commons.math3.linear.EigenDecomposition;
import org.apache.commons.math3.linear.RealMatrix;
import org.apache.commons.math3.analysis.function.Log;
import org.apache.commons.math3.linear.Array2DRowRealMatrix;
import org.jdom.Attribute;
import org.jdom.Comment;
Expand All @@ -27,7 +26,6 @@
import org.ojalgo.matrix.decomposition.Eigenvalue;

import java.io.*;
import java.lang.reflect.Array;
import java.time.Duration;
import java.time.LocalDateTime;
import java.time.Period;
Expand Down
Loading

0 comments on commit f0bda2f

Please sign in to comment.