forked from metabrainz/musicbrainz-docker
-
Notifications
You must be signed in to change notification settings - Fork 0
/
check-search-indexes
executable file
·168 lines (140 loc) · 4.58 KB
/
check-search-indexes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#!/usr/bin/env bash
set -e -u
# shellcheck source=admin/lib/common.inc.bash
source "$(dirname "${BASH_SOURCE[0]}")/lib/common.inc.bash"
HELP=$(cat <<EOH
Usage: $SCRIPT_NAME all
or: $SCRIPT_NAME CORE... (for example: $SCRIPT_NAME artist release)
or: $SCRIPT_NAME --help
For each of MusicBrainz Solr cores/collections,
compare the count of existing documents in PostgreSQL,
with the count of indexed documents in Solr.
Notes:
The Docker Compose services 'db' and 'indexer' must be up.
EOH
)
# Build PostgreSQL queries to count existing documents by core
declare -A queries
queries['annotation']="$(cat <<EOSQL
SELECT SUM(annotation_count) FROM (
SELECT COUNT(DISTINCT area)
annotation_count FROM area_annotation AS one
UNION SELECT COUNT(DISTINCT artist)
annotation_count FROM artist_annotation
UNION SELECT COUNT(DISTINCT event)
annotation_count FROM event_annotation
UNION SELECT COUNT(DISTINCT instrument)
annotation_count FROM instrument_annotation
UNION SELECT COUNT(DISTINCT label)
annotation_count FROM label_annotation
UNION SELECT COUNT(DISTINCT place)
annotation_count FROM place_annotation
UNION SELECT COUNT(DISTINCT recording)
annotation_count FROM recording_annotation
UNION SELECT COUNT(DISTINCT release)
annotation_count FROM release_annotation
UNION SELECT COUNT(DISTINCT release_group)
annotation_count FROM release_group_annotation
UNION SELECT COUNT(DISTINCT series)
annotation_count FROM series_annotation
UNION SELECT COUNT(DISTINCT work)
annotation_count FROM work_annotation
) AS total
EOSQL
)"
queries['cdstub']="SELECT COUNT(id) FROM release_raw"
for table in area artist editor event instrument label place \
recording release release_group series tag url work
do
queries["${table//_/-}"]="SELECT COUNT(id) FROM $table"
done
# Parse arguments
if [ $# -eq 0 ]
then
echo >&2 "$SCRIPT_NAME: missing argument"
echo >&2 "Try '$SCRIPT_NAME --help' for usage."
exit 64 # EX_USAGE
fi
declare -a cores
if [[ $# -eq 1 && $1 =~ -*h(elp)? ]]
then
echo "$HELP"
exit 0 # EX_OK
elif [[ $# -eq 1 && $1 == all ]]
then
cores=( ${!queries[@]} )
else
cores=()
while [ $# -gt 0 ]
do
if [ -z "${queries[$1]-}" ]
then
echo >&2 "$SCRIPT_NAME: unrecognized core '$1'"
echo >&2 "Try '$SCRIPT_NAME --help' for usage."
exit 64 # EX_USAGE
else
cores+=( $1 )
shift
fi
done
fi
# Check that required Docker Compose services are up
service_containers="$($DOCKER_COMPOSE_CMD ps db 2>/dev/null)"
if ! echo "$service_containers" | grep -qw 'running\|Up'
then
echo >&2 "$SCRIPT_NAME: cannot count existing documents: " \
"the Docker Compose service 'db' is not up"
echo >&2 "Try '$DOCKER_COMPOSE_CMD up -d' from '$MB_DOCKER_ROOT'"
exit 69 # EX_UNAVAILABLE
fi
service_containers="$($DOCKER_COMPOSE_CMD ps indexer 2>/dev/null)"
if ! echo "$service_containers" | grep -qw 'running\|Up'
then
echo >&2 "$SCRIPT_NAME: cannot count indexed documents: " \
"the Docker Compose service 'indexer' is not up"
echo >&2 "Try '$DOCKER_COMPOSE_CMD up -d' from '$MB_DOCKER_ROOT'"
exit 69 # EX_UNAVAILABLE
fi
# Count existing documents by core in PostgreSQL
declare -A counts
POSTGRES_USER=musicbrainz
POSTGRES_DATABASE=musicbrainz_db
for core in "${cores[@]}"
do
counts["$core"]=$($DOCKER_COMPOSE_CMD exec db \
psql -U $POSTGRES_USER -d $POSTGRES_DATABASE \
-c "COPY(${queries[$core]}) TO STDOUT" | tr -d '\r')
done
# Sort cores by ascending number of documents
declare -a ascending_cores=( $(
for core in "${!counts[@]}"
do
echo "$core ${counts["$core"]}"
done | sort -n -k2 | sed 's/ .*$//'
) )
# Count indexed documents by core in Solr
declare -A indexed_docs
while read line
do
core=${line% *}
docs=${line#* }
indexed_docs["$core"]="${docs/$'\r'/}"
done < <($DOCKER_COMPOSE_CMD exec indexer bash -c "
wget -q -O - http://search:8983/v2/cores | python2 -c '
import sys, json;
json_status = json.load(sys.stdin)[\"status\"];
for core in json_status:
print core, json_status[core][\"index\"][\"numDocs\"]
'" | sort -n -k2)
# Compare number of indexed docs with number of existing docs, by core
sep="|"
for core in "${ascending_cores[@]}"
do
if [[ ${counts[$core]} -eq ${indexed_docs[$core]:-0} ]]
then
echo "$core${sep}OK$sep${indexed_docs[$core]:-0}$sep/${counts[$core]}"
else
echo "$core${sep}--$sep${indexed_docs[$core]:-0}$sep/${counts[$core]}"
fi
done | (echo CORE${sep}STATUS${sep}INDEX${sep}DB; cat) | column -t -s "$sep"
# vi: set et sts=2 sw=2 ts=2 :