diff --git a/.env.example b/.env.example index 6ffa8da..a8661a9 100644 --- a/.env.example +++ b/.env.example @@ -1,4 +1,5 @@ EXCEL_FILE_URL=https://pubfiles.pagasa.dost.gov.ph/pagasaweb/files/climate/tendayweatheroutlook/day1.xlsx DEFAULT_EXCEL_FILE_URL=https://pubfiles.pagasa.dost.gov.ph/pagasaweb/files/climate/tendayweatheroutlook/day1.xlsx SHEETJS_COLUMN=__EMPTY -SORT_ALPHABETICAL=1 \ No newline at end of file +SORT_ALPHABETICAL=1 +SPECIAL_CHARACTERS=├â┬▒:ñ,â: \ No newline at end of file diff --git a/README.md b/README.md index 33f35bb..62d9671 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ ## ph-municipalities -**ph-municipalities** have **npm scripts** that allow interactive querying of Philippines municipalities included in one or more provinces or from a whole region, with an option of writing them to JSON files from the command line. +**ph-municipalities** have **NPM scripts** that allow interactive querying of Philippines municipalities included in one or more provinces or from a whole region, with an option of writing them to JSON files from the command line. It uses `/data/day1.xlsx` (downloaded and stored as of this 20220808) from PAGASA's [10-day weather forecast excel files](https://www.pagasa.dost.gov.ph/climate/climate-prediction/10-day-climate-forecast) as the default data source. -It also asks users to key in the download URL of a remote excel file should they want to use another excel file for a new and updated data source. +It also asks users to key in the download URL of a remote PAGASA 10-Day weather forecast excel file should they want to use another excel file for a new and updated data source. Extracted municipalities are written in JSON files following the format: @@ -81,8 +81,9 @@ The following dependencies are used for this project. Feel free to use other dep | Variable Name | Description | | ----------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | EXCEL_FILE_URL | (Optional) Remote excel file's download URL.
If provided, the excel file will be downloaded and saved on the specified `pathToFile` local filesystem location during the `ExcelFile` class initialization.
Read on [Usage](#usage) for more information. | - | SHEETJS_COLUMN | Column name read by [sheetjs](https://sheetjs.com/) in an excel file.
This column contains the municipality and province names following the string pattern
`"municipalityName (provinceName)"`
Default value is `__EMPTY` | - | SORT_ALPHABETICAL | Arranges the municipality names in alphabetical order.
Default value is `1`. Set to `0` to use the ordering as read from the Excel file. | + | SHEETJS_COLUMN | Column name read by [sheetjs](https://sheetjs.com/) in an excel file.
This column contains the municipality and province names following the string pattern
`"municipalityName (provinceName)"`
Default value is `__EMPTY`| + | SORT_ALPHABETICAL | Arranges the municipality names in alphabetical order.
Default value is `1`. Set to `0` to use the ordering as read from the Excel file. | + | SPECIAL_CHARACTERS | Key-value pairs of special characters or garbled text and their normalized text conversions, delimited by the `":"` character.
Multiple key-value pairs are delimited by the `","` character.
If a special character key's value is a an empty string, write it as i.e.,: `"some-garbled-text:"` | ## Available Scripts diff --git a/package-lock.json b/package-lock.json index 405df42..7a4a93c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "ph-municipalities", - "version": "1.0.9", + "version": "1.0.10", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "ph-municipalities", - "version": "1.0.9", + "version": "1.0.10", "license": "ISC", "dependencies": { "dotenv": "^16.0.1", diff --git a/package.json b/package.json index 0419f1c..92e784e 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "ph-municipalities", - "version": "1.0.9", + "version": "1.0.10", "description": "List and write the `municipalities` of Philippines provinces or regions into JSON files", "main": "index.js", "scripts": { diff --git a/src/classes/excel/index.js b/src/classes/excel/index.js index 7e91dc8..070db38 100644 --- a/src/classes/excel/index.js +++ b/src/classes/excel/index.js @@ -188,6 +188,52 @@ class ExcelFile { return /[a-zA-z] *\([^)]*\) */.test(str) } + /** + * Checks if a string contains special characters + * @param {String} str - String to check + * @returns {Bool} + */ + static hasSpecialChars (str) { + /* eslint-disable no-control-regex */ + const regex = /[^\x00-\x7F]/g + return regex.test(str) + } + + /** + * Cleans/removes default-known special characters and garbled text defined in config from string. + * @param {String} str - String to clean + * @returns {String} - Clean string + */ + static removeGarbledText (str) { + // Known garbled special text + let charMap = { + '├â┬▒': 'ñ', // Replace "├â┬▒" with "ñ" + â: '' // Remove "â" + } + + // Other special characters from config + const specialChars = (process.env.SPECIAL_CHARACTERS?.split(',') ?? []) + .reduce((list, item) => { + const [key, value] = item.split(':') + + return { + ...list, + ...((key || value) && { [key]: value ?? '' }) + } + }, {}) + + charMap = { + ...charMap, + ...specialChars + } + + for (const [key, value] of Object.entries(charMap)) { + str = str.replace(new RegExp(key, 'g'), value) + } + + return str + } + /** * Extract the municipality name from a string following the pattern: * "municipalityName (provinceName)" @@ -267,7 +313,11 @@ class ExcelFile { acc[item.province] = [] } - acc[item.province].push(item.municipality) + const cleanText = ExcelFile.hasSpecialChars(item.municipality) + ? ExcelFile.removeGarbledText(item.municipality) + : item.municipality + + acc[item.province].push(cleanText) // Sort municipality names alphabetically if (process.env.SORT_ALPHABETICAL === '1') {