File:  [GNUtrition Sources] / gnutrition / gnutrition-updatedb.in
Revision 1.1: download - view: text, annotated - select for diffs
Fri May 8 03:23:59 2026 UTC (12 days, 11 hours ago) by asm
Branches: MAIN
CVS tags: HEAD
Migration from Git with 0.33rc1 changes.

#!/bin/bash
# SPDX-License-Identifier: GPL-3.0-or-later
#
# $Id: gnutrition-updatedb.in,v 1.1 2026/05/08 03:23:59 asm Exp $
#
# gnutrition-updatedb.in - Download USDA food data and build a SQLite database
#
# Copyright (C) 2026 Free Software Foundation, Inc.
#
# Author: Jason Self <jself@gnu.org>
#         Anton McClure <asm@gnu.org>

set -e

dir="@datadir@/gnutrition"
mkdir -p "$dir" && \
cd "$dir" && \

# Check for write permissions
touch @datadir@/gnutrition/food.db && \

# Check for required commands
missing=()
for cmd in wget sha512sum libreoffice sqlite3; do
    if ! command -v "$cmd" > /dev/null 2>&1; then
        missing+=("$cmd")
    fi
done

if [ ${#missing[@]} -ne 0 ]; then
    echo "Error: The following required commands are not available:" >&2
    for cmd in "${missing[@]}"; do
        echo "  - $cmd (please install it)" >&2
    done
    exit 1
fi

# Download the Excel files
BASE_URL="https://www.ars.usda.gov/ARSUserFiles/80400530/apps"

wget -O "2019-2020 FNDDS At A Glance - Foods and Beverages.xlsx" \
    "${BASE_URL}/2019-2020%20FNDDS%20At%20A%20Glance%20-%20Foods%20and%20Beverages.xlsx"

wget -O "2019-2020 FNDDS At A Glance - Portions and Weights.xlsx" \
    "${BASE_URL}/2019-2020%20FNDDS%20At%20A%20Glance%20-%20Portions%20and%20Weights.xlsx"

wget -O "2019-2020 FNDDS At A Glance - FNDDS Nutrient Values.xlsx" \
    "${BASE_URL}/2019-2020%20FNDDS%20At%20A%20Glance%20-%20FNDDS%20Nutrient%20Values.xlsx"

wget -O "FPED_1720.xls" \
    "${BASE_URL}/FPED_1720.xls"

# Verify SHA-512 hashes
echo "Verifying file integrity..."

if ! sha512sum -c <<'EOF'
a2d99aa0e4761e5fed5dd9e5b2003ece4c8f61c954ddabbdacef816abb15c2647d2f1b1cfef8bed98af5a20d4af6d2a34c3e80fd271e33bdf728797f72c18e0c  2019-2020 FNDDS At A Glance - Foods and Beverages.xlsx
7da5e96923f92bf94d0420d899b86004fd86c69a5a7bb2273b02ee21147e827b63af37b8e80525c91b680dd6ed035cfad677cf27dd80f1ae8c10ed0e8e49a0f8  2019-2020 FNDDS At A Glance - Portions and Weights.xlsx
43ee5a5661499b41e621622a4fdffd071805f86839e0577654316a8f21952ba69d56594e0437b77aac32391d90df053a2c036fd7067e2e6b2cad5c5e0576edc1  2019-2020 FNDDS At A Glance - FNDDS Nutrient Values.xlsx
38b46ba1972d700de4d1625acfd0260c518b64850fa0c8251dc7e960ad09f008915ed4fa935074b19fc83cc09abee756dc7dd62a4c487b2a2ec7b4bf196bd683  FPED_1720.xls
EOF
then
    echo "Error: SHA-512 hash verification failed. Downloaded files may be corrupted." >&2
    exit 1
fi

echo "All hashes verified successfully."

# Convert Excel files to CSV using LibreOffice
echo "Converting Excel files to CSV..."

for file in *.xls *.xlsx; do
    libreoffice --headless --convert-to csv "$file"
done

# The USDA "At A Glance" Excel files contain 1-2 rows of report title
# and date information before the actual column headers (e.g., Food code,
# Main food description). These extra rows must be removed so that the
# first row of each CSV contains the column names; otherwise SQLite will
# import the title text as a data row or fail on type mismatches.
for csv in "2019-2020 FNDDS At A Glance - Foods and Beverages.csv" \
           "2019-2020 FNDDS At A Glance - Portions and Weights.csv" \
           "2019-2020 FNDDS At A Glance - FNDDS Nutrient Values.csv"; do
    sed -i '1,2d' "$csv"
done

echo "Conversion complete."

# Build the SQLite database
echo "Building SQLite database..."

rm -f food.db

sqlite3 food.db <<'SQL'
.mode csv

-- Import foods table
.import '2019-2020 FNDDS At A Glance - Foods and Beverages.csv' foods

-- Import portions table
.import '2019-2020 FNDDS At A Glance - Portions and Weights.csv' portions

-- Import nutrients table
.import '2019-2020 FNDDS At A Glance - FNDDS Nutrient Values.csv' nutrients

-- Import points table
.import 'FPED_1720.csv' points
SQL

echo "SQLite database food.db created successfully."

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>