-
Notifications
You must be signed in to change notification settings - Fork 2
Open
Description
Fix Runtime exception thrown at stage 1 when running IJDPS paper experiments, caused by issues with encoding of Swedish characters.
See stacktrace:
Caused by: java.lang.RuntimeException: exception comparing fields SURNAME and FATHER_SURNAME in records
{"ORIGINAL_ID":"","STANDARDISED_ID":"7209387","FORENAME":"CATHARINA ULRICA","CHANGED_FORENAME":"","SURNAME":"JONSSON","CHANGED_SURNAME":"","SEX":"F","FATHER_FORENAME":"U. JON","FATHER_SURNAME":"JONSSON","MOTHER_FORENAME":"CAJSA LISA","MOTHER_SURNAME":"","MOTHER_MAIDEN_SURNAME":"ANDSDR","CHANGED_MOTHER_MAIDEN_SURNAME":"","MOTHER_OCCUPATION":"","FATHER_OCCUPATION":"BD","YEAR_OF_REGISTRATION":"----","ENTRY":"","REGISTRATION_DISTRICT_SUFFIX":"","REGISTRATION_DISTRICT_NUMBER":"","CORRECTED_ENTRY":"","IMAGE_QUALITY":"","BIRTH_DAY":"27","BIRTH_MONTH":"02","BIRTH_YEAR":"1826","BIRTH_ADDRESS":"burtr��sk","ILLEGITIMATE_INDICATOR":"","ADOPTION":"","PARENTS_DAY_OF_MARRIAGE":"03","PARENTS_MONTH_OF_MARRIAGE":"02","PARENTS_YEAR_OF_MARRIAGE":"1822","PARENTS_PLACE_OF_MARRIAGE":"----","PLACE_OF_BIRTH":"","INFORMANT_DID_NOT_SIGN":"","INFORMANT":"","FAMILY":"","DEATH":"","FORENAME_CLEAN":"","SURNAME_CLEAN":"","FATHER_FORENAME_CLEAN":"","FATHER_SURNAME_CLEAN":"","MOTHER_FORENAME_CLEAN":"","MOTHER_SURNAME_CLEAN":"","CHILD_IDENTITY":"500572","MOTHER_IDENTITY":"496783","FATHER_IDENTITY":"497630","DEATH_RECORD_IDENTITY":"","PARENT_MARRIAGE_RECORD_IDENTITY":"50294213","FATHER_BIRTH_RECORD_IDENTITY":"7202743","MOTHER_BIRTH_RECORD_IDENTITY":"7201884","MARRIAGE_RECORD_IDENTITY1":"","MARRIAGE_RECORD_IDENTITY2":"","MARRIAGE_RECORD_IDENTITY3":"","MARRIAGE_RECORD_IDENTITY4":"","MARRIAGE_RECORD_IDENTITY5":"","MARRIAGE_RECORD_IDENTITY6":"","MARRIAGE_RECORD_IDENTITY7":"","MARRIAGE_RECORD_IDENTITY8":"","IMMIGRATION_GENERATION":""}
and
{"ORIGINAL_ID":"","STANDARDISED_ID":"670348","FORENAME":"JOHAN OSKAR","CHANGED_FORENAME":"","SURNAME":"H��GLUND","CHANGED_SURNAME":"","SEX":"M","FATHER_FORENAME":"JOHANNES","FATHER_SURNAME":"H��GLUND","MOTHER_FORENAME":"JOHANNA VILHELMINA","MOTHER_SURNAME":"","MOTHER_MAIDEN_SURNAME":"LARSDOTTER","CHANGED_MOTHER_MAIDEN_SURNAME":"","MOTHER_OCCUPATION":"","FATHER_OCCUPATION":"BN","YEAR_OF_REGISTRATION":"----","ENTRY":"","REGISTRATION_DISTRICT_SUFFIX":"","REGISTRATION_DISTRICT_NUMBER":"","CORRECTED_ENTRY":"","IMAGE_QUALITY":"","BIRTH_DAY":"30","BIRTH_MONTH":"11","BIRTH_YEAR":"1889","BIRTH_ADDRESS":"skellefte��","ILLEGITIMATE_INDICATOR":"","ADOPTION":"","PARENTS_DAY_OF_MARRIAGE":"26","PARENTS_MONTH_OF_MARRIAGE":"02","PARENTS_YEAR_OF_MARRIAGE":"1889","PARENTS_PLACE_OF_MARRIAGE":"----","PLACE_OF_BIRTH":"","INFORMANT_DID_NOT_SIGN":"","INFORMANT":"","FAMILY":"","DEATH":"","FORENAME_CLEAN":"","SURNAME_CLEAN":"","FATHER_FORENAME_CLEAN":"","FATHER_SURNAME_CLEAN":"","MOTHER_FORENAME_CLEAN":"","MOTHER_SURNAME_CLEAN":"","CHILD_IDENTITY":"290146","MOTHER_IDENTITY":"410570","FATHER_IDENTITY":"505415","DEATH_RECORD_IDENTITY":"","PARENT_MARRIAGE_RECORD_IDENTITY":"50292731","FATHER_BIRTH_RECORD_IDENTITY":"549161","MOTHER_BIRTH_RECORD_IDENTITY":"7219632","MARRIAGE_RECORD_IDENTITY1":"","MARRIAGE_RECORD_IDENTITY2":"","MARRIAGE_RECORD_IDENTITY3":"","MARRIAGE_RECORD_IDENTITY4":"","MARRIAGE_RECORD_IDENTITY5":"","MARRIAGE_RECORD_IDENTITY6":"","MARRIAGE_RECORD_IDENTITY7":"","MARRIAGE_RECORD_IDENTITY8":"","IMMIGRATION_GENERATION":""}
at uk.ac.standrews.cs.population_linkage.compositeMeasures.LXPMeasure.throwExceptionWithDebug (LXPMeasure.java:97)
at uk.ac.standrews.cs.population_linkage.compositeMeasures.LXPMeasure.sumOfFieldDistances (LXPMeasure.java:62)
at uk.ac.standrews.cs.population_linkage.compositeMeasures.MeanOfFieldDistancesNormalised.calculateDistance (MeanOfFieldDistancesNormalised.java:73)
at uk.ac.standrews.cs.population_linkage.compositeMeasures.MeanOfFieldDistancesNormalised.calculateDistance (MeanOfFieldDistancesNormalised.java:35)
at uk.ac.standrews.cs.utilities.measures.coreConcepts.Measure.distance (Measure.java:45)
at uk.ac.standrews.cs.population_linkage.groundTruth.ThresholdAnalysis$Run.processRecordFromFirstSource (ThresholdAnalysis.java:473)
at uk.ac.standrews.cs.population_linkage.groundTruth.ThresholdAnalysis$Run.processBlock (ThresholdAnalysis.java:452)
at uk.ac.standrews.cs.population_linkage.groundTruth.ThresholdAnalysis$Run.run (ThresholdAnalysis.java:425)
at uk.ac.standrews.cs.population_linkage.groundTruth.ThresholdAnalysis.lambda$run$0 (ThresholdAnalysis.java:203)
at java.lang.Thread.run (Thread.java:1583)
Caused by: java.lang.RuntimeException: incorrect char in SED: � charval: 65533 from string: H��GLUND
at uk.ac.standrews.cs.utilities.measures.implementation.SparseProbabilityMeasure.stringToSparseArray (SparseProbabilityMeasure.java:44)
at uk.ac.standrews.cs.utilities.measures.SED.calculateDistance (SED.java:53)
at uk.ac.standrews.cs.utilities.measures.SED.calculateDistance (SED.java:25)
at uk.ac.standrews.cs.utilities.measures.coreConcepts.Measure.distance (Measure.java:45)
at uk.ac.standrews.cs.population_linkage.compositeMeasures.LXPMeasure.sumOfFieldDistances (LXPMeasure.java:59)
at uk.ac.standrews.cs.population_linkage.compositeMeasures.MeanOfFieldDistancesNormalised.calculateDistance (MeanOfFieldDistancesNormalised.java:73)
at uk.ac.standrews.cs.population_linkage.compositeMeasures.MeanOfFieldDistancesNormalised.calculateDistance (MeanOfFieldDistancesNormalised.java:35)
at uk.ac.standrews.cs.utilities.measures.coreConcepts.Measure.distance (Measure.java:45)
at uk.ac.standrews.cs.population_linkage.groundTruth.ThresholdAnalysis$Run.processRecordFromFirstSource (ThresholdAnalysis.java:473)
at uk.ac.standrews.cs.population_linkage.groundTruth.ThresholdAnalysis$Run.processBlock (ThresholdAnalysis.java:452)
at uk.ac.standrews.cs.population_linkage.groundTruth.ThresholdAnalysis$Run.run (ThresholdAnalysis.java:425)
at uk.ac.standrews.cs.population_linkage.groundTruth.ThresholdAnalysis.lambda$run$0 (ThresholdAnalysis.java:203)
at java.lang.Thread.run (Thread.java:1583)
Metadata
Metadata
Assignees
Labels
No labels