From 36e39c5fe5983ba5aead999d97414d4fb3e9be38 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Thu, 4 Jan 2024 13:44:06 +0100 Subject: [PATCH 1/3] =?UTF-8?q?=F0=9F=94=A8=20remove=20obsolete=20tables?= =?UTF-8?q?=20and=20country=20standardizer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- adminSiteClient/AdminApp.tsx | 6 - adminSiteClient/CountryStandardizerPage.tsx | 697 ------------------ adminSiteServer/apiRouter.ts | 68 -- .../1704297774028-DropObsoleteTables.ts | 20 + 4 files changed, 20 insertions(+), 771 deletions(-) delete mode 100644 adminSiteClient/CountryStandardizerPage.tsx create mode 100644 db/migration/1704297774028-DropObsoleteTables.ts diff --git a/adminSiteClient/AdminApp.tsx b/adminSiteClient/AdminApp.tsx index 5b807cc6e4e..e7a579b9bf0 100644 --- a/adminSiteClient/AdminApp.tsx +++ b/adminSiteClient/AdminApp.tsx @@ -6,7 +6,6 @@ import { observer } from "mobx-react" import { ChartIndexPage } from "./ChartIndexPage.js" import { UsersIndexPage } from "./UsersIndexPage.js" import { DatasetsIndexPage } from "./DatasetsIndexPage.js" -import { CountryStandardizerPage } from "./CountryStandardizerPage.js" import { UserEditPage } from "./UserEditPage.js" import { VariableEditPage } from "./VariableEditPage.js" import { VariablesIndexPage } from "./VariablesIndexPage.js" @@ -244,11 +243,6 @@ export class AdminApp extends React.Component<{ /> )} /> - - @observable mapCountriesInputToOutput: Record - @observable autoMatchedCount: number = 0 - @observable parseError?: string - @observable findSimilarCountries: boolean = true - - constructor() { - this.countryEntriesMap = new Map() - this.rows = [] - this.mapCountriesInputToOutput = {} - } - - @computed get allCountries(): string[] { - const standardNames = Object.values( - this.mapCountriesInputToOutput - ).filter((value: string | undefined) => value !== undefined) as string[] - return uniq(sortBy(standardNames)) as string[] - } - - @computed get countryColumnIndex() { - const { rows } = this - if (rows.length === 0) { - return -1 - } - return rows[0].findIndex( - (columnName) => columnName.toLowerCase() === "country" - ) - } - - @computed get showDownloadOption() { - const { rows, validationError } = this - if (rows.length > 0 && validationError === undefined) { - return true - } - return false - } - - @computed get numCountries() { - return this.rows.length - 1 - } - - @computed get validationError(): string | undefined { - const { parseError } = this - if (parseError !== undefined) { - return `Could not parse file (error: ${parseError}). Check if it is a valid CSV file.` - } - - const { rows, countryColumnIndex } = this - if (rows.length === 0) return undefined - - if (countryColumnIndex < 0) { - return "Could not find a column name with the header 'Country'" - } - - return undefined - } - - @action.bound onFileUpload( - filename: string, - rows: string[][], - err: { message: string } | undefined, - similarityMatch: boolean - ) { - this.filename = filename - if (err) { - this.parseError = err.message - this.rows = [] - } else { - this.parseError = undefined - this.rows = rows - } - this.findSimilarCountries = similarityMatch - this.parseCSV() - } - - @action.bound onFormatChange( - countryMap: Record, - findSimilarCountries: boolean - ) { - this.mapCountriesInputToOutput = countryMap - this.findSimilarCountries = findSimilarCountries - this.parseCSV() - } - - @action.bound parseCSV() { - const { - rows, - countryColumnIndex, - mapCountriesInputToOutput, - findSimilarCountries, - } = this - - if (countryColumnIndex < 0) { - this.countryEntriesMap = new Map() - this.autoMatchedCount = 0 - return - } - - const entriesByCountry = new Map() - const countries = rows - .slice(1) // remove header row - .map((row: string[]) => - unidecode(row[countryColumnIndex] as string) - ) - .filter( - (country?: string) => country !== "" && country !== undefined - ) // exclude empty strings - - // for fuzzy-match, use the input and output values as target to improve matching potential - const inputCountries = Object.keys(mapCountriesInputToOutput).filter( - (key) => mapCountriesInputToOutput[key] !== undefined - ) - const outputCountries = inputCountries.map( - (key) => mapCountriesInputToOutput[key] - ) as string[] - const fuzz = FuzzySet(inputCountries.concat(outputCountries)) - - let autoMatched = 0 - - countries.map((country: string) => { - const outputCountry = - mapCountriesInputToOutput[country.toLowerCase()] - let approximatedMatches: string[] = [] - - if (outputCountry === undefined) { - if (findSimilarCountries) { - const fuzzMatches = fuzz.get(country) ?? [] - approximatedMatches = fuzzMatches - .map( - (fuzzyMatch: [number, string]) => - mapCountriesInputToOutput[fuzzyMatch[1]] || - fuzzyMatch[1] - ) - .filter((key) => key !== undefined) - approximatedMatches = uniq(approximatedMatches) - } - } else { - autoMatched += 1 - } - - const entry: CountryEntry = { - originalName: country, - standardizedName: outputCountry || undefined, - approximatedMatches: approximatedMatches, - selectedMatch: "", - customName: "", - } - entriesByCountry.set(country, entry) - }) - - this.countryEntriesMap = entriesByCountry - this.autoMatchedCount = autoMatched - } -} - -interface CountryEntry extends React.HTMLAttributes { - originalName: string - standardizedName?: string - approximatedMatches: string[] - selectedMatch?: string - customName?: string -} - -@observer -class CountryEntryRowRenderer extends React.Component<{ - entry: CountryEntry - allCountries: string[] - onUpdate: (value: string, inputCountry: string, isCustom: boolean) => void -}> { - @observable selectedStandardName!: string - - @computed get defaultOption() { - return "Select one" - } - - @computed get isMatched(): boolean { - const { entry } = this.props - - if (entry.standardizedName || entry.selectedMatch || entry.customName) - return true - else return false - } - - @computed get defaultValue() { - const { entry } = this.props - - if ( - entry.selectedMatch !== undefined && - entry.selectedMatch.length > 0 - ) { - return entry.selectedMatch - } - return this.defaultOption - } - - @action.bound onEntrySelected(selectedName: string) { - const { entry, onUpdate } = this.props - - onUpdate(selectedName, entry.originalName, false) - } - - render() { - const { entry, allCountries, onUpdate } = this.props - const { defaultOption, defaultValue, isMatched } = this - - const optgroups: SelectGroup[] = [] - - if (entry.approximatedMatches.length > 0) { - const options = entry.approximatedMatches.map((countryName) => ({ - value: countryName, - label: countryName, - })) - optgroups.push({ title: "Likely matches", options: options }) - } - - optgroups.push({ - title: "All standard names", - options: allCountries.map((countryName) => ({ - value: countryName, - label: countryName, - })), - }) - - return ( - - - - {entry.originalName} - - - {entry.standardizedName} - - - - - - onUpdate( - e.currentTarget.value, - entry.originalName, - true - ) - } - /> - - - ) - } -} - -@observer -export class CountryStandardizerPage extends React.Component { - static contextType = AdminAppContext - context!: AdminAppContextType - - fileUploader!: HTMLInputElement - - @observable countryList: CountryEntry[] = [] - @observable inputFormat: string = CountryNameFormat.NonStandardCountryName - @observable outputFormat: string = CountryNameFormat.OurWorldInDataName - @observable csv: CSV = new CSV() - @observable showAllRows: boolean = false - - @computed get shouldSaveSelection(): boolean { - if ( - this.inputFormat === CountryNameFormat.NonStandardCountryName && - this.outputFormat === CountryNameFormat.OurWorldInDataName - ) { - return true - } - return false - } - - @computed get displayMatchStatus() { - const { autoMatchedCount, numCountries, showDownloadOption } = this.csv - - if (!showDownloadOption) return
- - const columnName = CountryDefByKey[this.outputFormat].label - - let text = "" - let banner = "" - if (autoMatchedCount === numCountries) { - banner = "alert-success" - text = " All countries were auto-matched!" - } else { - banner = "alert-warning" - text = - " Some countries could not be matched. Either select a similar candidate from the dropdown (which will be saved back in the database) or enter a custom name." - } - text += - " The file you will download has a new column with the header '" + - columnName + - "'." - return ( -
- Status: - {text} -
- ) - } - - @action.bound onInputFormat(format: string) { - this.inputFormat = format - } - - @action.bound onOutputFormat(format: string) { - this.outputFormat = format - } - - @action.bound onChooseCSV({ target }: { target: HTMLInputElement }) { - const file = target.files && target.files[0] - if (!file) return - - const reader = new FileReader() - reader.onload = (e) => { - const csv = e?.target?.result - if (csv && typeof csv === "string") { - const res = Papa.parse(csv, { - delimiter: ",", - skipEmptyLines: true, - }) - this.csv.onFileUpload( - file.name, - res.data, - res.errors[0], - this.shouldSaveSelection - ) - } else console.error("Csv was not read correctly") - } - reader.readAsText(file) - } - - dispose!: IReactionDisposer - componentDidMount() { - // Fetch mapping from server when the input or output format changes - this.dispose = reaction( - () => [this.inputFormat, this.outputFormat], - () => this.fetchCountryMap() - ) - - this.fetchCountryMap() - } - - componentWillUnmount() { - this.dispose() - } - - async fetchCountryMap() { - const { inputFormat, outputFormat } = this - const { admin } = this.context - const results = await admin.getJSON( - `/api/countries.json?input=${inputFormat}&output=${outputFormat}` - ) - - runInAction(() => { - const countryMap: { [key: string]: string } = {} - results.countries.forEach( - (countryFormat: { input: string; output: unknown }) => { - if (countryFormat.input === null) return - countryMap[countryFormat.input.toLowerCase()] = toString( - countryFormat.output - ) - } - ) - - this.csv.onFormatChange(countryMap, this.shouldSaveSelection) - }) - } - - @computed get csvDataUri(): string { - if (this.outputCSV) return window.URL.createObjectURL(this.outputCSV) - else return "" - } - - @computed get csvFilename(): string { - const { csv } = this - - if (csv.filename === undefined) return "" - - return csv.filename.replace(".csv", "_country_standardized.csv") - } - - @computed get downloadTooltip(): string { - const { shouldSaveSelection } = this - - if (shouldSaveSelection) { - return "Downloading will save any custom selection for future ease" - } - return "" - } - - @computed get fileUploadLabel() { - const { csv } = this - - if (csv === undefined || csv.filename === undefined) { - return "Choose CSV file" - } - return csv.filename - } - - @computed get outputCSV() { - const { csv } = this - - if (csv === undefined || csv.validationError !== undefined) - return undefined - - const columnName = CountryDefByKey[this.outputFormat].label - const columnIndex = csv.countryColumnIndex + 1 - const outputRows: string[][] = [] - - // add a new column with the output country name - csv.rows.forEach((row, rowIndex) => { - let columnValue: string = "" - - if (rowIndex === 0) { - // Don't map header row - columnValue = columnName - } else { - // prioritize user selected name - const entry = csv.countryEntriesMap.get( - unidecode(row[csv.countryColumnIndex]) - ) - if (entry !== undefined) { - if ( - entry.customName !== undefined && - entry.customName.length > 0 - ) { - columnValue = entry.customName - } else if (entry.standardizedName !== undefined) { - columnValue = entry.standardizedName - } else if ( - entry.selectedMatch !== undefined && - entry.selectedMatch.length > 0 - ) { - columnValue = entry.selectedMatch - } - } - } - - const newRow = row.slice(0) - newRow.splice(columnIndex, 0, columnValue) - outputRows.push(newRow) - }) - - const strRows = outputRows.map((row) => - row.map((val) => csvEscape(val)).join(",") - ) - return new Blob([strRows.join("\n")], { type: "text/csv" }) - } - - @action.bound onUpdateRow( - value: string, - inputCountry: string, - isCustom: boolean - ) { - const { csv } = this - - const entry = csv.countryEntriesMap.get(inputCountry) as CountryEntry - console.log("updating " + inputCountry + " with " + value) - - if (isCustom) { - entry.customName = value === undefined ? "" : value - } else { - entry.selectedMatch = value - } - } - - // IE11 compatibility - @action.bound onDownload() { - const { shouldSaveSelection } = this - - if (shouldSaveSelection) { - this.onSave() - } - } - - @action.bound onToggleRows() { - this.showAllRows = !this.showAllRows - } - - @action.bound onSave() { - const { csv } = this - - const countries: Record = {} - let needToSave: boolean = false - - csv.countryEntriesMap.forEach((entry) => { - // ignore if there was a user entered a new name - if (entry.customName !== undefined && entry.customName.length > 0) { - console.log( - "not saving custom-name for entry " + entry.originalName - ) - } else if ( - entry.selectedMatch !== undefined && - entry.selectedMatch.length > 0 - ) { - needToSave = true - countries[entry.originalName] = entry.selectedMatch - } - }) - - if (needToSave) { - this.context.admin.requestJSON( - `/api/countries`, - { countries: countries }, - "POST" - ) - } - } - - @computed get entriesToShow(): CountryEntry[] { - if (this.csv === undefined) return [] - - const countries: CountryEntry[] = [] - this.csv.countryEntriesMap.forEach((entry) => { - if (this.showAllRows) { - countries.push(entry) - } else if (entry.standardizedName === undefined) { - countries.push(entry) - } - }) - return countries - } - - render() { - const { csv, entriesToShow } = this - const { showDownloadOption, validationError } = csv - - const allowedInputFormats = CountryNameFormatDefs.filter( - (c) => c.use_as_input - ) - const allowedOutputFormats = CountryNameFormatDefs.filter( - (c) => c.use_as_output - ) - - return ( - -
-
-

Country Standardizer Tool

-

- Upload a CSV file with countries. Select the current - input and desired output format. The tool will - attempt to find a match automatically for all - entries. If not, you will be able to select a - similar entry or use a new name. After which, you - can download the file that has a new column for your - output countries. -

-
-
- - -
- - Country has to be saved under a column named - 'Country' - -
- ({ - value: def.key, - label: def.label, - }))} - helpText="Choose the current format of the country names. If input format is other than the default, the tool won't attempt to find similar countries when there is no exact match." - data-step="1" - /> - ({ - value: def.key, - label: def.label, - }))} - helpText="Choose the desired format of the country names. If the chosen format is other than OWID name, the tool won't attempt to find similar countries when there is no exact match." - /> -
- {showDownloadOption ? ( - - {" "} - Download {this.csvFilename} - - ) : ( - - )} - -
- {validationError !== undefined ? ( -
- CSV Error: {validationError} -
- ) : ( -
- )} - {this.displayMatchStatus} -
-
- - - - - - - - - - - {entriesToShow.map((entry, i) => ( - - ))} - -
Original NameStandardized NamePotential Candidates (select below)Or enter a Custom Name
-
-
-
- ) - } -} diff --git a/adminSiteServer/apiRouter.ts b/adminSiteServer/apiRouter.ts index 64789f6c2cb..74fd6c182c6 100644 --- a/adminSiteServer/apiRouter.ts +++ b/adminSiteServer/apiRouter.ts @@ -575,74 +575,6 @@ apiRouter.get( apiRouter.get("/topics.json", async (req: Request, res: Response) => ({ topics: await wpdb.getTopics(), })) - -apiRouter.get("/countries.json", async (req: Request, res: Response) => { - let rows = [] - - const input = req.query.input as string - const output = req.query.output as string - - if (input === CountryNameFormat.NonStandardCountryName) { - const outputColumn = CountryDefByKey[output].column_name - - rows = await db.queryMysql(` - SELECT country_name as input, ${outputColumn} as output - FROM country_name_tool_countryname ccn - LEFT JOIN country_name_tool_countrydata ccd on ccn.owid_country = ccd.id - LEFT JOIN country_name_tool_continent con on con.id = ccd.continent`) - } else { - const inputColumn = CountryDefByKey[input].column_name - const outputColumn = CountryDefByKey[output].column_name - - rows = await db.queryMysql( - `SELECT ${inputColumn} as input, ${outputColumn} as output - FROM country_name_tool_countrydata ccd - LEFT JOIN country_name_tool_continent con on con.id = ccd.continent` - ) - } - - return { - countries: rows, - } -}) - -apiRouter.post("/countries", async (req: Request, res: Response) => { - const countries = req.body.countries - - const mapOwidNameToId: any = {} - let owidRows = [] - - // find owid ID - const owidNames = Object.keys(countries).map((key) => countries[key]) - owidRows = await db.queryMysql( - `SELECT id, owid_name - FROM country_name_tool_countrydata - WHERE owid_name in (?) - `, - [owidNames] - ) - for (const row of owidRows) { - mapOwidNameToId[row.owid_name] = row.id - } - - // insert one by one (ideally do a bulk insert) - for (const country of Object.keys(countries)) { - const owidName = countries[country] - - console.log( - `adding ${country}, ${mapOwidNameToId[owidName]}, ${owidName}` - ) - - await db.execute( - `INSERT INTO country_name_tool_countryname (country_name, owid_country) - VALUES (?, ?)`, - [country, mapOwidNameToId[owidName]] - ) - } - - return { success: true } -}) - apiRouter.get( "/editorData/variables.json", async (req: Request, res: Response) => { diff --git a/db/migration/1704297774028-DropObsoleteTables.ts b/db/migration/1704297774028-DropObsoleteTables.ts new file mode 100644 index 00000000000..510ba0b2884 --- /dev/null +++ b/db/migration/1704297774028-DropObsoleteTables.ts @@ -0,0 +1,20 @@ +import { MigrationInterface, QueryRunner } from "typeorm" + +export class DropObsoleteTables1704297774028 implements MigrationInterface { + public async up(queryRunner: QueryRunner): Promise { + // drop tables country_name_tool_continent, country_name_tool_countrydata, country_name_tool_countryname + await queryRunner.query( + `DROP TABLE IF EXISTS country_name_tool_continent` + ) + await queryRunner.query( + `DROP TABLE IF EXISTS country_name_tool_countrydata` + ) + await queryRunner.query( + `DROP TABLE IF EXISTS country_name_tool_countryname` + ) + // drop importer_importhistory + await queryRunner.query(`DROP TABLE IF EXISTS importer_importhistory`) + } + + public async down(queryRunner: QueryRunner): Promise {} +} From 009def8cdd0164be2aa8de49f282ef4381d2340f Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Sat, 27 Jan 2024 02:15:17 +0100 Subject: [PATCH 2/3] =?UTF-8?q?=F0=9F=94=A8=20fix=20order=20of=20tables=20?= =?UTF-8?q?to=20drop?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- db/migration/1704297774028-DropObsoleteTables.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/db/migration/1704297774028-DropObsoleteTables.ts b/db/migration/1704297774028-DropObsoleteTables.ts index 510ba0b2884..291cb9b6d8b 100644 --- a/db/migration/1704297774028-DropObsoleteTables.ts +++ b/db/migration/1704297774028-DropObsoleteTables.ts @@ -4,13 +4,13 @@ export class DropObsoleteTables1704297774028 implements MigrationInterface { public async up(queryRunner: QueryRunner): Promise { // drop tables country_name_tool_continent, country_name_tool_countrydata, country_name_tool_countryname await queryRunner.query( - `DROP TABLE IF EXISTS country_name_tool_continent` + `DROP TABLE IF EXISTS country_name_tool_countryname` ) await queryRunner.query( `DROP TABLE IF EXISTS country_name_tool_countrydata` ) await queryRunner.query( - `DROP TABLE IF EXISTS country_name_tool_countryname` + `DROP TABLE IF EXISTS country_name_tool_continent` ) // drop importer_importhistory await queryRunner.query(`DROP TABLE IF EXISTS importer_importhistory`) From d2e80413f44e873324e74e478a6915e82b42ba76 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Mon, 29 Jan 2024 09:36:39 +0100 Subject: [PATCH 3/3] =?UTF-8?q?=F0=9F=90=9D=20eslint=20issues?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- adminSiteServer/apiRouter.ts | 4 ---- db/migration/1704297774028-DropObsoleteTables.ts | 4 +++- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/adminSiteServer/apiRouter.ts b/adminSiteServer/apiRouter.ts index 74fd6c182c6..5b21552520e 100644 --- a/adminSiteServer/apiRouter.ts +++ b/adminSiteServer/apiRouter.ts @@ -59,10 +59,6 @@ import { getVariableDataRoute, getVariableMetadataRoute, } from "@ourworldindata/grapher" -import { - CountryNameFormat, - CountryDefByKey, -} from "../adminSiteClient/CountryNameFormat.js" import { Dataset } from "../db/model/Dataset.js" import { User } from "../db/model/User.js" import { GdocPost } from "../db/model/Gdoc/GdocPost.js" diff --git a/db/migration/1704297774028-DropObsoleteTables.ts b/db/migration/1704297774028-DropObsoleteTables.ts index 291cb9b6d8b..718c6d166df 100644 --- a/db/migration/1704297774028-DropObsoleteTables.ts +++ b/db/migration/1704297774028-DropObsoleteTables.ts @@ -16,5 +16,7 @@ export class DropObsoleteTables1704297774028 implements MigrationInterface { await queryRunner.query(`DROP TABLE IF EXISTS importer_importhistory`) } - public async down(queryRunner: QueryRunner): Promise {} + public async down(_queryRunner: QueryRunner): Promise { + return + } }