Skip to content

Commit 37fb67b

Browse files
committed
Adding scripts for 2023 ACS 5-year
1 parent bbb8c6f commit 37fb67b

File tree

2 files changed

+121
-0
lines changed

2 files changed

+121
-0
lines changed
+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#!/bin/bash
2+
DATA_DIR=/home/ubuntu/data/acs2023_5yr
3+
mkdir -p $DATA_DIR
4+
cd $DATA_DIR
5+
6+
# Grab the 2023 ACS 5 year
7+
sudo apt-get -y install unzip curl
8+
curl -L "https://census-backup.b-cdn.net/programs-surveys/acs/summary_file/2023/table-based-SF/data/5YRData/5YRData.zip" \
9+
-o ${DATA_DIR}/AllTables.zip
10+
curl -L "https://census-backup.b-cdn.net/programs-surveys/acs/summary_file/2023/table-based-SF/documentation/Geos20235YR.txt" \
11+
-o ${DATA_DIR}/Geos20235YR.txt
12+
curl -L "https://census-backup.b-cdn.net/programs-surveys/acs/summary_file/2023/table-based-SF/documentation/ACS20235YR_Table_Shells.txt" \
13+
-o ${DATA_DIR}/ACS20235YR_Table_Shells.txt
14+
15+
unzip -q ${DATA_DIR}/AllTables.zip

table_based/03_import_acs_2023_5yr.sh

+106
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
#!/bin/bash
2+
3+
SCHEMA_NAME="acs2023_5yr"
4+
DATA_DIR="/home/ubuntu/data/acs2023_5yr"
5+
6+
export PGOPTIONS='--client-min-messages=warning'
7+
8+
if [ -z $PGURI ]; then
9+
echo "You must set PGURI environment variable to the connection string for the PostgreSQL server to operate on."
10+
exit 1
11+
fi
12+
13+
# Clone the import scripts from git
14+
echo "Cloning git repo"
15+
rm -rf /home/ubuntu/census-postgres
16+
cd /home/ubuntu
17+
sudo apt-get -y install git
18+
git clone https://github.com/censusreporter/census-postgres.git
19+
if [[ $? != 0 ]]; then
20+
echo "Failed to clone git repo."
21+
exit 1
22+
fi
23+
24+
cd /home/ubuntu/census-postgres/${SCHEMA_NAME}
25+
26+
# Create the schema
27+
# Drop the tables - can't just use DROP SCHEMA because of out of shared memory issue
28+
echo "Dropping tables"
29+
psql $PGURI -v ON_ERROR_STOP=1 -q -f drop_schema_tables.sql
30+
if [[ $? != 0 ]]; then
31+
echo "Failed dropping tables."
32+
exit 1
33+
fi
34+
35+
echo "Dropping schema"
36+
psql $PGURI -c "DROP SCHEMA IF EXISTS ${SCHEMA_NAME} CASCADE; CREATE SCHEMA ${SCHEMA_NAME};"
37+
if [[ $? != 0 ]]; then
38+
echo "Failed dropping or creating schema."
39+
exit 1
40+
fi
41+
42+
# Create import tables
43+
echo "Creating geoheader"
44+
psql $PGURI -v ON_ERROR_STOP=1 -q -f create_geoheader.sql
45+
if [[ $? != 0 ]]; then
46+
echo "Failed creating geoheader."
47+
exit 1
48+
fi
49+
50+
echo "Creating core data import tables"
51+
psql $PGURI -v ON_ERROR_STOP=1 -q -f create_tables.sql
52+
if [[ $? != 0 ]]; then
53+
echo "Failed creating data import tables."
54+
exit 1
55+
fi
56+
57+
# Slurp in the actual data
58+
echo "Fixing GEO_ID in geoheaders"
59+
python3 /home/ubuntu/census-postgres/meta-scripts/fix_geoids.py ${DATA_DIR}/Geos20235YR.txt
60+
if [[ $? != 0 ]]; then
61+
echo "Failed fixing geoids."
62+
exit 1
63+
fi
64+
65+
echo "Importing geoheader"
66+
cat ${DATA_DIR}/Geos20235YR.csv | psql $PGURI -v ON_ERROR_STOP=1 -q -c "COPY ${SCHEMA_NAME}.geoheader FROM STDIN WITH ENCODING 'UTF8' CSV HEADER;"
67+
if [[ $? != 0 ]]; then
68+
echo "Failed importing geoheader."
69+
exit 1
70+
fi
71+
72+
echo "Fixing GEO_ID in datafiles"
73+
python3 /home/ubuntu/census-postgres/meta-scripts/fix_geoids.py ${DATA_DIR}
74+
if [[ $? != 0 ]]; then
75+
echo "Failed fixing geoids."
76+
exit 1
77+
fi
78+
79+
echo "Inserting into tables"
80+
for i in ${DATA_DIR}/acs*.csv; # don't catch the geoheader file now that it also has a .csv suffix
81+
do
82+
table=`echo $(basename $i .csv) | cut -d "-" -f 2`
83+
if ! [[ "$table" =~ ^[b][9][89] ]]; then
84+
table="${table}_moe"
85+
cat $i | psql $PGURI -v ON_ERROR_STOP=1 -q -c "COPY ${SCHEMA_NAME}.${table} FROM STDIN WITH ENCODING 'UTF8' CSV HEADER;"
86+
if [[ $? != 0 ]]; then
87+
echo "Failed inserting into tables at $table."
88+
exit 1
89+
fi
90+
fi
91+
done
92+
93+
echo "Creating views"
94+
psql $PGURI -v ON_ERROR_STOP=1 -q -f create_views.sql
95+
if [[ $? != 0 ]]; then
96+
echo "Failed creating views."
97+
exit 1
98+
fi
99+
100+
for i in ${DATA_DIR}/acs*.csv; # don't catch the geoheader file now that it also has a .csv suffix
101+
do
102+
table=`echo $(basename $i .csv) | cut -d "-" -f 2`
103+
if ! [[ "$table" =~ ^[b][9][89] ]]; then
104+
echo $table
105+
fi
106+
done

0 commit comments

Comments
 (0)