diff --git a/.env.example b/.env.example
index 6ffa8da..a8661a9 100644
--- a/.env.example
+++ b/.env.example
@@ -1,4 +1,5 @@
EXCEL_FILE_URL=https://pubfiles.pagasa.dost.gov.ph/pagasaweb/files/climate/tendayweatheroutlook/day1.xlsx
DEFAULT_EXCEL_FILE_URL=https://pubfiles.pagasa.dost.gov.ph/pagasaweb/files/climate/tendayweatheroutlook/day1.xlsx
SHEETJS_COLUMN=__EMPTY
-SORT_ALPHABETICAL=1
\ No newline at end of file
+SORT_ALPHABETICAL=1
+SPECIAL_CHARACTERS=├â┬▒:ñ,â:
\ No newline at end of file
diff --git a/README.md b/README.md
index 33f35bb..62d9671 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,10 @@
## ph-municipalities
-**ph-municipalities** have **npm scripts** that allow interactive querying of Philippines municipalities included in one or more provinces or from a whole region, with an option of writing them to JSON files from the command line.
+**ph-municipalities** have **NPM scripts** that allow interactive querying of Philippines municipalities included in one or more provinces or from a whole region, with an option of writing them to JSON files from the command line.
It uses `/data/day1.xlsx` (downloaded and stored as of this 20220808) from PAGASA's [10-day weather forecast excel files](https://www.pagasa.dost.gov.ph/climate/climate-prediction/10-day-climate-forecast) as the default data source.
-It also asks users to key in the download URL of a remote excel file should they want to use another excel file for a new and updated data source.
+It also asks users to key in the download URL of a remote PAGASA 10-Day weather forecast excel file should they want to use another excel file for a new and updated data source.
Extracted municipalities are written in JSON files following the format:
@@ -81,8 +81,9 @@ The following dependencies are used for this project. Feel free to use other dep
| Variable Name | Description |
| ----------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| EXCEL_FILE_URL | (Optional) Remote excel file's download URL.
If provided, the excel file will be downloaded and saved on the specified `pathToFile` local filesystem location during the `ExcelFile` class initialization.
Read on [Usage](#usage) for more information. |
- | SHEETJS_COLUMN | Column name read by [sheetjs](https://sheetjs.com/) in an excel file.
This column contains the municipality and province names following the string pattern
`"municipalityName (provinceName)"`
Default value is `__EMPTY` |
- | SORT_ALPHABETICAL | Arranges the municipality names in alphabetical order.
Default value is `1`. Set to `0` to use the ordering as read from the Excel file. |
+ | SHEETJS_COLUMN | Column name read by [sheetjs](https://sheetjs.com/) in an excel file.
This column contains the municipality and province names following the string pattern
`"municipalityName (provinceName)"`
Default value is `__EMPTY`|
+ | SORT_ALPHABETICAL | Arranges the municipality names in alphabetical order.
Default value is `1`. Set to `0` to use the ordering as read from the Excel file. |
+ | SPECIAL_CHARACTERS | Key-value pairs of special characters or garbled text and their normalized text conversions, delimited by the `":"` character.
Multiple key-value pairs are delimited by the `","` character.
If a special character key's value is a an empty string, write it as i.e.,: `"some-garbled-text:"` |
## Available Scripts
diff --git a/package-lock.json b/package-lock.json
index 405df42..7a4a93c 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
{
"name": "ph-municipalities",
- "version": "1.0.9",
+ "version": "1.0.10",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "ph-municipalities",
- "version": "1.0.9",
+ "version": "1.0.10",
"license": "ISC",
"dependencies": {
"dotenv": "^16.0.1",
diff --git a/package.json b/package.json
index 0419f1c..92e784e 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "ph-municipalities",
- "version": "1.0.9",
+ "version": "1.0.10",
"description": "List and write the `municipalities` of Philippines provinces or regions into JSON files",
"main": "index.js",
"scripts": {
diff --git a/src/classes/excel/index.js b/src/classes/excel/index.js
index 7e91dc8..070db38 100644
--- a/src/classes/excel/index.js
+++ b/src/classes/excel/index.js
@@ -188,6 +188,52 @@ class ExcelFile {
return /[a-zA-z] *\([^)]*\) */.test(str)
}
+ /**
+ * Checks if a string contains special characters
+ * @param {String} str - String to check
+ * @returns {Bool}
+ */
+ static hasSpecialChars (str) {
+ /* eslint-disable no-control-regex */
+ const regex = /[^\x00-\x7F]/g
+ return regex.test(str)
+ }
+
+ /**
+ * Cleans/removes default-known special characters and garbled text defined in config from string.
+ * @param {String} str - String to clean
+ * @returns {String} - Clean string
+ */
+ static removeGarbledText (str) {
+ // Known garbled special text
+ let charMap = {
+ '├â┬▒': 'ñ', // Replace "├â┬▒" with "ñ"
+ â: '' // Remove "â"
+ }
+
+ // Other special characters from config
+ const specialChars = (process.env.SPECIAL_CHARACTERS?.split(',') ?? [])
+ .reduce((list, item) => {
+ const [key, value] = item.split(':')
+
+ return {
+ ...list,
+ ...((key || value) && { [key]: value ?? '' })
+ }
+ }, {})
+
+ charMap = {
+ ...charMap,
+ ...specialChars
+ }
+
+ for (const [key, value] of Object.entries(charMap)) {
+ str = str.replace(new RegExp(key, 'g'), value)
+ }
+
+ return str
+ }
+
/**
* Extract the municipality name from a string following the pattern:
* "municipalityName (provinceName)"
@@ -267,7 +313,11 @@ class ExcelFile {
acc[item.province] = []
}
- acc[item.province].push(item.municipality)
+ const cleanText = ExcelFile.hasSpecialChars(item.municipality)
+ ? ExcelFile.removeGarbledText(item.municipality)
+ : item.municipality
+
+ acc[item.province].push(cleanText)
// Sort municipality names alphabetically
if (process.env.SORT_ALPHABETICAL === '1') {