improved speed, added local data lists, some cleaning

This commit is contained in:
2025-09-10 22:50:41 +02:00
parent f35461cc50
commit 56b8c73297
9 changed files with 336 additions and 280 deletions

View File

@@ -6,25 +6,38 @@
# ./releasegroup/radix(uuid)/musicbrainz.json # Release group information
# ./releasegroup/radix(uuid)/releases.json # List of all releases in release group
# ./release/radix(uuid)/musicbrainz.json # Release information with tracklist etc.
CACHEDIR="$HOME/.cache/$APP_NAME"
TYPE_ARTIST="artist"
TYPE_RELEASEGROUP="releasegroup"
TYPE_RELEASE="release"
artist_filename="musicbrainz.json"
artist_releasegroups_filename="releasegroups.json"
artist_discogs_filename="discogs.json"
artist_wikidata_filename="wikidata.json"
artist_enwikipedia_filename="enwikipedia.json"
releasegroup_filename="musicbrainz.json"
releasegroup_releases_filename="releases.json"
release_filename="musicbrainz.json"
if [ ! "${CACHE_LOADED:-}" ]; then
CACHEDIR="$HOME/.cache/$APP_NAME"
TYPE_ARTIST="artist"
TYPE_RELEASEGROUP="releasegroup"
TYPE_RELEASE="release"
ARTIST_FILENAME="musicbrainz.json"
ARTIST_RELEASEROUPS_FILENAME="releasegroups.json"
ARTIST_DISCOGS_FILENAME="discogs.json"
ARTIST_WIKIDATA_FILENAME="wikidata.json"
ARTIST_ENWIKIPEDIA_FILENAME="enwikipedia.json"
RELEASEGROUP_FILENAME="musicbrainz.json"
RELEASEGROUP_RELEASES_FILENAME="releases.json"
RELEASE_FILENAME="musicbrainz.json"
export CACHEDIR TYPE_ARTIST TYPE_RELEASEGROUP TYPE_RELEASE ARTIST_FILENAME \
ARTIST_RELEASEROUPS_FILENAME ARTIST_DISCOGS_FILENAME \
ARTIST_WIKIDATA_FILENAME ARTIST_ENWIKIPEDIA_FILENAME \
RELEASEGROUP_FILENAME RELEASEGROUP_RELEASES_FILENAME RELEASE_FILENAME
export CACHE_LOADED=1
fi
# Radix transform directory name
__radix() {
echo "$1" | awk -F "" '{ print $1$2$3$4"/"$5$6$7$8"/"$0 }'
}
# Radix transform directory names from stdin
__radix_batch() {
cat | awk -F "" '{ print $1$2$3$4"/"$5$6$7$8"/"$0 }'
}
# Super wrapper
# argument $1: type
# argument $2: MusicBrainz ID
@@ -50,87 +63,87 @@ __put_json() {
## Artist
cache_get_artist() {
__get_json "$TYPE_ARTIST" "$1" "$artist_filename"
__get_json "$TYPE_ARTIST" "$1" "$ARTIST_FILENAME"
}
cache_get_artist_releasegroups() {
__get_json "$TYPE_ARTIST" "$1" "$artist_releasegroups_filename"
__get_json "$TYPE_ARTIST" "$1" "$ARTIST_RELEASEROUPS_FILENAME"
}
cache_get_artist_discogs() {
__get_json "$TYPE_ARTIST" "$1" "$artist_discogs_filename"
__get_json "$TYPE_ARTIST" "$1" "$ARTIST_DISCOGS_FILENAME"
}
cache_get_artist_enwikipedia() {
__get_json "$TYPE_ARTIST" "$1" "$artist_enwikipedia_filename"
__get_json "$TYPE_ARTIST" "$1" "$ARTIST_ENWIKIPEDIA_FILENAME"
}
cache_get_artist_wikidata() {
__get_json "$TYPE_ARTIST" "$1" "$artist_wikidata_filename"
__get_json "$TYPE_ARTIST" "$1" "$ARTIST_WIKIDATA_FILENAME"
}
cache_put_artist() {
cat | __put_json "$TYPE_ARTIST" "$1" "$artist_filename"
cat | __put_json "$TYPE_ARTIST" "$1" "$ARTIST_FILENAME"
}
cache_put_artist_releasegroups() {
cat | __put_json "$TYPE_ARTIST" "$1" "$artist_releasegroups_filename"
cat | __put_json "$TYPE_ARTIST" "$1" "$ARTIST_RELEASEROUPS_FILENAME"
}
cache_append_artist_releasegroups() {
tmpf=$(mktemp)
cat >"$tmpf"
updated=$(mktemp)
f="$CACHEDIR/$TYPE_ARTIST/$(__radix "$1")/$artist_releasegroups_filename"
f="$CACHEDIR/$TYPE_ARTIST/$(__radix "$1")/$ARTIST_RELEASEROUPS_FILENAME"
$JQ --slurpfile n "$tmpf" '."release-groups" += ($n[0]|."release-groups")' "$f" >"$updated" && mv "$updated" "$f"
rm -f "$tmpf"
}
cache_put_artist_discogs() {
cat | __put_json "$TYPE_ARTIST" "$1" "$artist_discogs_filename"
cat | __put_json "$TYPE_ARTIST" "$1" "$ARTIST_DISCOGS_FILENAME"
}
cache_put_artist_enwikipedia() {
cat | __put_json "$TYPE_ARTIST" "$1" "$artist_enwikipedia_filename"
cat | __put_json "$TYPE_ARTIST" "$1" "$ARTIST_ENWIKIPEDIA_FILENAME"
}
cache_put_artist_wikidata() {
cat | __put_json "$TYPE_ARTIST" "$1" "$artist_wikidata_filename"
cat | __put_json "$TYPE_ARTIST" "$1" "$ARTIST_WIKIDATA_FILENAME"
}
## Release group
cache_get_releasegroup() {
__get_json "$TYPE_RELEASEGROUP" "$1" "$releasegroup_filename"
__get_json "$TYPE_RELEASEGROUP" "$1" "$RELEASEGROUP_FILENAME"
}
cache_get_releasegroup_releases() {
__get_json "$TYPE_RELEASEGROUP" "$1" "$releasegroup_releases_filename"
__get_json "$TYPE_RELEASEGROUP" "$1" "$RELEASEGROUP_RELEASES_FILENAME"
}
cache_put_releasegroup() {
cat | __put_json "$TYPE_RELEASEGROUP" "$1" "$releasegroup_filename"
cat | __put_json "$TYPE_RELEASEGROUP" "$1" "$RELEASEGROUP_FILENAME"
}
cache_put_releasegroup_releases() {
cat | __put_json "$TYPE_RELEASEGROUP" "$1" "$releasegroup_releases_filename"
cat | __put_json "$TYPE_RELEASEGROUP" "$1" "$RELEASEGROUP_RELEASES_FILENAME"
}
cache_append_releasegroup_releases() {
tmpf=$(mktemp)
cat >"$tmpf"
updated=$(mktemp)
f="$CACHEDIR/$TYPE_RELEASEGROUP/$(__radix "$1")/$releasegroup_releases_filename"
f="$CACHEDIR/$TYPE_RELEASEGROUP/$(__radix "$1")/$RELEASEGROUP_RELEASES_FILENAME"
$JQ --slurpfile n "$tmpf" '."releases" += ($n[0]|."releases")' "$f" >"$updated" && mv "$updated" "$f"
rm -f "$tmpf"
}
## Release
cache_get_release() {
__get_json "$TYPE_RELEASE" "$1" "$release_filename"
__get_json "$TYPE_RELEASE" "$1" "$RELEASE_FILENAME"
}
cache_put_release() {
cat | __put_json "$TYPE_RELEASE" "$1" "$release_filename"
cat | __put_json "$TYPE_RELEASE" "$1" "$RELEASE_FILENAME"
}
## Cache deletion
@@ -144,18 +157,35 @@ cache_delete_artist() {
# argument $2: MusicBrainz ID
in_cache() {
case "$1" in
"$TYPE_ARTIST")
fn="$artist_filename"
;;
"$TYPE_RELEASEGROUP")
fn="$releasegroup_filename"
;;
"$TYPE_RELEASE")
fn="$release_filename"
;;
*)
return 1
;;
"$TYPE_ARTIST") fn="$ARTIST_FILENAME" ;;
"$TYPE_RELEASEGROUP") fn="$RELEASEGROUP_FILENAME" ;;
"$TYPE_RELEASE") fn="$RELEASE_FILENAME" ;;
*) return 1 ;;
esac
[ "$(__get_json "$1" "$2" "$fn")" ] && return 0 || return 1
}
# Print all cache paths to the files specified by their IDs
#
# @argument $1: type
#
# This method reads from stdin any number of MusicBrainz IDs of objects of the
# specified type, and prints the file pahts.
cache_get_file_batch() {
case "$1" in
"$TYPE_ARTIST") fn="$ARTIST_FILENAME" ;;
"$TYPE_RELEASEGROUP") fn="$RELEASEGROUP_FILENAME" ;;
"$TYPE_RELEASE") fn="$RELEASE_FILENAME" ;;
*) return 1 ;;
esac
cat |
__radix_batch |
awk -v dir="$CACHEDIR/$1/" -v f="/$fn" '{ print dir $0 f }'
}
# Print MusicBrainz ID associated to the file paths
#
# This reads from stdin any number of paths (one per line)
cache_mbid_from_path_batch() {
cat | awk -F "/" '{ print $(NF-1) }'
}