Skip to content

Commit

Permalink
999999999_54872.py (#37, #39): first attempt to load all public P-Cod…
Browse files Browse the repository at this point in the history
…es to SQLite (still with some off-by-one error)
  • Loading branch information
fititnt committed May 24, 2022
1 parent b1801b5 commit e10c58e
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 23 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
# Lexicographī sine fīnibus
**[working draft] The Etica.AI + HXL-CPLP [monorepo](https://en.wikipedia.org/wiki/Monorepo) with public domain automation scripts for [practical lexicography](https://en.wikipedia.org/wiki/Lexicography) on selected topics. Goal of both compilation of existing translations ([such as Wikidata](https://www.wikidata.org/wiki/Wikidata:Licensing)) and preparation for new terminology translation initiatives.**

<!--
More about on:
- https://github.com/EticaAI/numerordinatio/issues/5
- https://numerordinatio.etica.ai/
Namespace explanations at [officinam/](officinam/).
-->
Namespace explanations at [HXL-CPLP-Vocab_Auxilium-Humanitarium-API/1603_1_1](https://docs.google.com/spreadsheets/d/1ih3ouvx_n8W5ntNcYBqoyZ2NRMdaA0LRg5F9mGriZm4/edit#gid=2095477004.

## Disclaimers

Expand Down
67 changes: 53 additions & 14 deletions officinam/999999999/0/999999999_7200235.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,10 @@
'-9:#meta+id|-8:#country+code+v_iso3|-7:#country+code+v_iso2'
Work with local COD-AB index (levels) . . . . . . . . . . . . . . . . . . . . .
{0} --methodus='cod_ab_index_levels'
{0} --methodus='cod_ab_index_levels' --punctum-separato-ad-tab
{0} --methodus='cod_ab_index_levels' --sine-capite \
--cum-columnis='#item+conceptum+numerordinatio'
Process XLSXs from external sources . . . . . . . . . . . . . . . . . . . . . .
{0} --methodus=xlsx_metadata 999999/1603/45/16/xlsx/ago.xlsx
Expand Down Expand Up @@ -541,6 +544,17 @@ def make_args(self, hxl_output=True):
const=True,
default=False
)
# sine (+ ablative) https://en.wiktionary.org/wiki/sine#Latin
# capite, s, n, ablativus, https://en.wiktionary.org/wiki/caput#Latin
parser.add_argument(
'--sine-capite',
help='Output without header',
metavar="sine_capite",
dest="sine_capite",
action='store_const',
const=True,
default=False
)

# parser.add_argument(
# # '--venandum-insectum-est, --debug',
Expand Down Expand Up @@ -761,6 +775,9 @@ def execute_cli(self, pyargs, stdin=STDIN, _stdout=sys.stdout,
data_json_len, data_json_len_uniq, _path))
return self.EXIT_OK

if pyargs.sine_capite:
caput = None

csv_imprimendo(caput, data, punctum_separato)

return self.EXIT_OK
Expand Down Expand Up @@ -814,6 +831,8 @@ def execute_cli(self, pyargs, stdin=STDIN, _stdout=sys.stdout,
if pyargs.methodus == 'xlsx_ad_csv':
xlsx.praeparatio()
caput, data = xlsx.imprimere()
if pyargs.sine_capite:
caput = None
csv_imprimendo(caput, data, punctum_separato=punctum_separato)

xlsx.finis()
Expand Down Expand Up @@ -841,6 +860,8 @@ def execute_cli(self, pyargs, stdin=STDIN, _stdout=sys.stdout,
# print(type(caput), caput)
# print(type(data), data)
# raise NotImplementedError('test test')
if pyargs.sine_capite:
caput = None
csv_imprimendo(caput, data, punctum_separato=punctum_separato)

# print()
Expand Down Expand Up @@ -1008,27 +1029,45 @@ def hxltm_carricato__cod_ab_levels(
Returns:
Tuple[list, list]: _description_
"""
columnae = [
caput_novo = ['#item+conceptum+numerordinatio']
caput_cum_columnis = [
'#country+code+v_unm49',
'#meta+source+cod_ab_level',
'#country+code+v_iso3',
'#country+code+v_iso2'
]
# print(' ooi')
data_novis = []

caput, data = hxltm_cum_aut_sine_columnis_simplicibus(
caput, data, columnae)
# _ordo_novo = []
caput, data, caput_cum_columnis)

numerordinatio_praefixo = numerordinatio_neo_separatum(
numerordinatio_praefixo, ':')
caput_novo = ['#item+conceptum+numerordinatio']
caput_novo.extend(caput)
data_novis = []

caput_novo.extend(caput_cum_columnis)

data.sort(key=lambda linea: int(linea[0]))

_numerordinatio__done = []

for linea in data:
linea_novae = []
linea_novae.append('{0}:{1}:{2}'.format(
numerordinatio_praefixo, linea[0], linea[1]
))
linea_novae.extend(linea)
data_novis.append(linea_novae)
for cod_ab_level in range(0, int(linea[1])):
linea_novae = []
numerordinatio = '{0}:{1}:{2}'.format(
numerordinatio_praefixo, linea[0], cod_ab_level
)

if numerordinatio in _numerordinatio__done:
continue

_numerordinatio__done.append(numerordinatio)
linea_novae.append(numerordinatio)
linea_novae.append(linea[0])
linea_novae.append(cod_ab_level)
linea_novae.append(linea[2])
linea_novae.append(linea[3])
# linea_novae.extend(linea)
data_novis.append(linea_novae)

# raise NotImplementedError
# return caput, data
Expand Down
100 changes: 91 additions & 9 deletions officinam/999999999/1603_45_16.sh
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,6 @@ bootstrap_1603_45_16__all() {
echo " LIST HERE <${opus_temporibus_temporarium}>"
echo ""



# while IFS=, read -r iso3 source_url; do
{
# remove read -r to not skip first line
Expand Down Expand Up @@ -131,7 +129,6 @@ bootstrap_1603_45_16__all() {
continue
fi


# echo "numerordinatio_praefixo $numerordinatio_praefixo"
# bootstrap_1603_45_16__item "1603_45_16_24" "24" "AGO" "AO" "3" "1" "0"
bootstrap_1603_45_16__item "$numerordinatio_praefixo" "$unm49" "$v_iso3" "$v_iso2" "$cod_ab_level_max" "1" "0"
Expand All @@ -142,6 +139,94 @@ bootstrap_1603_45_16__all() {

}

#######################################
# Convert the XLSXs to intermediate formats on 999999/1603/45/16 using
# 999999999_7200235.py to 1603/45/16/{cod_ab_level}/
#
# @TODO: potentially use more than one source (such as IGBE data for BRA)
# instead of direclty from OCHA
#
# Globals:
# ROOTDIR
#
# Arguments:
# est_meta_datapackage
# est_tabulae_sqlite
# est_tabulae_postgresql
# est_graphicus_rdf
#
# Outputs:
# Convert files
#######################################
bootstrap_1603_45_16__apothecae() {
# objectivum_iso3661p1a3="${1:-""}"
est_meta_datapackage="${1:-""}"
est_tabulae_sqlite="${2:-""}"
est_tabulae_postgresql="${3:-""}"
est_graphicus_rdf="${4:-""}"
# est_postgresql="${2:-""}"

nomen="1603_45_16"

# echo "${FUNCNAME[0]} ... [$objectivum_iso3661p1a3]"
echo "${FUNCNAME[0]} ... [@TODO]"
opus_temporibus_temporarium="${ROOTDIR}/999999/0/1603_45_16.apothecae.todo.txt"
objectivum_archivum_datapackage="apothecae~${nomen}.datapackage.json"
objectivum_archivum_sqlite="apothecae~${nomen}.sqlite"
# apothecae.datapackage.json
# set -x
"${ROOTDIR}/999999999/0/999999999_7200235.py" \
--methodus='cod_ab_index_levels' \
--sine-capite \
--cum-columnis='#item+conceptum+numerordinatio' \
>"${opus_temporibus_temporarium}"
# set +x

## 2022-05-23: we will skip LSA admin1 for now as it cannot extract
## number (it use 3-letter P-codes)
# admin1Name_en admin1Pcode
# Maseru LSA
# Butha-Buthe LSB
# Leribe LSC
# (...)
sed -i '/1603:45:16:426:0/d' "${opus_temporibus_temporarium}"
sed -i '/1603:45:16:426:1/d' "${opus_temporibus_temporarium}"

echo ""
echo " LIST HERE <${opus_temporibus_temporarium}>"
echo ""

if [ -n "$est_meta_datapackage" ]; then
set -x
"${ROOTDIR}/999999999/0/1603_1.py" \
--methodus='data-apothecae' \
--data-apothecae-ex-archivo="${opus_temporibus_temporarium}" \
--data-apothecae-ad="$objectivum_archivum_datapackage"
set +x
fi

if [ -n "$est_tabulae_sqlite" ]; then
set -x
"${ROOTDIR}/999999999/0/1603_1.py" \
--methodus='data-apothecae' \
--data-apothecae-ex-archivo="${opus_temporibus_temporarium}" \
--data-apothecae-ad="$objectivum_archivum_sqlite"
set +x
fi

if [ -n "$est_tabulae_postgresql" ]; then
echo "est_tabulae_postgresql requires specify connection"
echo "skiping for now..."
fi

if [ -n "$est_graphicus_rdf" ]; then
echo "TODO est_graphicus_rdf"
fi

# ./999999999/0/1603_1.py --methodus='data-apothecae' --data-apothecae-ex-archivo='999999/0/apothecae-list.txt' --data-apothecae-ad='apothecae.datapackage.json'

}

#######################################
# Convert the XLSXs to intermediate formats on 999999/1603/45/16 using
# 999999999_7200235.py to 1603/45/16/{cod_ab_level}/
Expand Down Expand Up @@ -205,7 +290,7 @@ bootstrap_1603_45_16__item() {

echo "cod_ab_levels $cod_ab_level_max"

for ((i=0;i<=cod_ab_level_max;i++)); do
for ((i = 0; i <= cod_ab_level_max; i++)); do
cod_level="$i"
if [ "$_iso3661p1a3_lower" == "bra" ] && [ "$cod_level" == "2" ]; then
echo ""
Expand Down Expand Up @@ -816,12 +901,10 @@ __temp_download_external_cod_data() {
# __temp_download_external_cod_data
# exit 1

bootstrap_1603_45_16__all
# bootstrap_1603_45_16__all
# bootstrap_999999_1603_45_16_neo ""
# bootstrap_999999_1603_45_16_neo "BRA"

# bootstrap_1603_45_16__item "76" "BRA"
# bootstrap_1603_45_16__item "1603_45_16_24" "24" "AGO" "AO" "1" "0"
bootstrap_1603_45_16__apothecae "1" "1" "" ""
exit 1

echo "after here is old scripts that need to be refatored"
Expand Down Expand Up @@ -957,7 +1040,6 @@ set +x
# rapper -g 999999/0/ibge_un_adm2.no1.skos.ttl
# rapper --output dot --guess 999999/0/ibge_un_adm2.no1.skos.ttl


#### @TODO: population --------------------------------------------------------
# https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples#Countries_sorted_by_population
# https://w.wiki/5CDt
Expand Down

0 comments on commit e10c58e

Please sign in to comment.