diff --git a/adminSiteClient/PostsIndexPage.tsx b/adminSiteClient/PostsIndexPage.tsx index 723d1d728a1..52a0663ffca 100644 --- a/adminSiteClient/PostsIndexPage.tsx +++ b/adminSiteClient/PostsIndexPage.tsx @@ -34,9 +34,9 @@ interface PostIndexMeta { title: string type: string status: string - authors: string[] + authors: string[] | null slug: string - updatedAtInWordpress: string + updatedAtInWordpress: string | null tags: ChartTagJoin[] | null gdocSuccessorId: string | undefined gdocSuccessorPublished: boolean @@ -209,7 +209,7 @@ class PostRow extends React.Component { return ( {post.title || "(no title)"} - {post.authors.join(", ")} + {post.authors?.join(", ")} {post.type} {post.status} {post.slug} @@ -273,7 +273,7 @@ export class PostsIndexPage extends React.Component { post.title, post.slug, `${post.id}`, - post.authors.join(" "), + post.authors?.join(" "), ] ) return posts.filter(filterFn) diff --git a/adminSiteServer/adminRouter.tsx b/adminSiteServer/adminRouter.tsx index 6aa572cfd6f..b144559abd8 100644 --- a/adminSiteServer/adminRouter.tsx +++ b/adminSiteServer/adminRouter.tsx @@ -178,6 +178,14 @@ adminRouter.get("/posts/compare/:postId", async (req, res) => { "archieml", "archieml_update_statistics" ).from(db.knexTable(Post.postsTable).where({ id: postId })) + if ( + archieMlText.length === 0 || + archieMlText[0].archieml === null || + archieMlText[0].archieml_update_statistics === null + ) + throw new Error( + `Could not compare posts because archieml was not present in the database for ${postId}` + ) const archieMlJson = JSON.parse(archieMlText[0].archieml) as OwidGdocJSON const updateStatsJson = JSON.parse( archieMlText[0].archieml_update_statistics diff --git a/adminSiteServer/apiRouter.ts b/adminSiteServer/apiRouter.ts index 74e39b446c5..c525b612a3f 100644 --- a/adminSiteServer/apiRouter.ts +++ b/adminSiteServer/apiRouter.ts @@ -37,7 +37,7 @@ import { OwidGdocInterface, parseIntOrUndefined, parseToOperation, - PostRow, + PostRowEnriched, PostRowWithGdocPublishStatus, SuggestedChartRevisionStatus, variableAnnotationAllowedColumnNamesAndTypes, @@ -48,7 +48,6 @@ import { DimensionProperty, TaggableType, ChartTagJoin, - sortBy, } from "@ourworldindata/utils" import { GrapherInterface, @@ -2323,7 +2322,7 @@ apiRouter.get("/posts.json", async (req) => { posts.slug as slug, status, updated_at_in_wordpress, - posts.authors, -- authors is a json array of objects with name and order + posts.authors, posts_tags_aggregated.tags as tags, gdocSuccessorId, gdocSuccessor.published as isGdocSuccessorPublished, @@ -2343,11 +2342,7 @@ apiRouter.get("/posts.json", async (req) => { tags: JSON.parse(row.tags), isGdocSuccessorPublished: !!row.isGdocSuccessorPublished, gdocSlugSuccessors: JSON.parse(row.gdocSlugSuccessors), - authors: row.authors - ? sortBy(JSON.parse(row.authors), "order").map( - (author) => author.author - ) - : [], + authors: JSON.parse(row.authors), })) return { posts: rows } @@ -2370,7 +2365,7 @@ apiRouter.get("/posts/:postId.json", async (req: Request, res: Response) => { .knexTable(postsTable) .where({ id: postId }) .select("*") - .first()) as PostRow | undefined + .first()) as PostRowEnriched | undefined return camelCaseProperties({ ...post }) }) @@ -2393,6 +2388,11 @@ apiRouter.post("/posts/:postId/createGdoc", async (req: Request) => { 400 ) } + if (post.archieml === null) + throw new JsonError( + `ArchieML was not present for post with id ${postId}`, + 500 + ) const tagsByPostId = await getTagsByPostId() const tags = tagsByPostId.get(postId)?.map(({ id }) => TagEntity.create({ id })) || diff --git a/baker/GrapherBaker.tsx b/baker/GrapherBaker.tsx index 85eec3c4d63..719c1407cb0 100644 --- a/baker/GrapherBaker.tsx +++ b/baker/GrapherBaker.tsx @@ -44,7 +44,7 @@ import { import * as db from "../db/db.js" import { glob } from "glob" import { isPathRedirectedToExplorer } from "../explorerAdminServer/ExplorerRedirects.js" -import { bySlug, getPostBySlug, parsePostAuthors } from "../db/model/Post.js" +import { getPostEnrichedBySlug } from "../db/model/Post.js" import { ChartTypeName, GrapherInterface } from "@ourworldindata/grapher" import workerpool from "workerpool" import ProgressBar from "progress" @@ -298,11 +298,11 @@ export async function renderDataPageV2({ citation, } } else { - const post = await bySlug(slug) + const post = await getPostEnrichedBySlug(slug) if (post) { - const authors = parsePostAuthors(post.authors) + const authors = post.authors const citation = getShortPageCitation( - authors, + authors ?? [], post.title, post.published_at ) @@ -359,7 +359,7 @@ export const renderPreviewDataPageOrGrapherPage = async ( const renderGrapherPage = async (grapher: GrapherInterface) => { const postSlug = urlToSlug(grapher.originUrl || "") - const post = postSlug ? await getPostBySlug(postSlug) : undefined + const post = postSlug ? await getPostEnrichedBySlug(postSlug) : undefined const relatedCharts = post && isWordpressDBEnabled ? await getRelatedCharts(post.id) diff --git a/baker/postUpdatedHook.ts b/baker/postUpdatedHook.ts index 7808961c062..19c590e24df 100644 --- a/baker/postUpdatedHook.ts +++ b/baker/postUpdatedHook.ts @@ -4,7 +4,12 @@ import parseArgs from "minimist" import { BAKE_ON_CHANGE } from "../settings/serverSettings.js" import { DeployQueueServer } from "./DeployQueueServer.js" import { exit } from "../db/cleanup.js" -import { PostRow, extractFormattingOptions } from "@ourworldindata/utils" +import { + PostRowEnriched, + extractFormattingOptions, + sortBy, + serializePostRow, +} from "@ourworldindata/utils" import * as wpdb from "../db/wpdb.js" import * as db from "../db/db.js" import { @@ -96,7 +101,10 @@ const syncPostToGrapher = async ( const wpPost = rows[0] const formattingOptions = extractFormattingOptions(wpPost.post_content) - + const authors: string[] = sortBy( + JSON.parse(wpPost.authors), + (item: { author: string; order: number }) => item.order + ).map((author: { author: string; order: number }) => author.author) const postRow = wpPost ? ({ id: wpPost.ID, @@ -114,14 +122,14 @@ const syncPostToGrapher = async ( wpPost.post_modified_gmt === zeroDateString ? "1970-01-01 00:00:00" : wpPost.post_modified_gmt, - authors: wpPost.authors, + authors: authors, excerpt: wpPost.post_excerpt, created_at_in_wordpress: wpPost.created_at === zeroDateString ? "1970-01-01 00:00:00" : wpPost.created_at, formattingOptions: formattingOptions, - } as PostRow) + } as PostRowEnriched) : undefined await db.knexInstance().transaction(async (transaction) => { @@ -134,11 +142,7 @@ const syncPostToGrapher = async ( ) postRow.content = contentWithBlocksInlined - const rowForDb = { - ...postRow, - // TODO: it's not nice that we have to stringify this here - formattingOptions: JSON.stringify(postRow.formattingOptions), - } + const rowForDb = serializePostRow(postRow) if (!existsInGrapher) await transaction.table(postsTable).insert(rowForDb) diff --git a/baker/siteRenderers.tsx b/baker/siteRenderers.tsx index f4e9289337a..0d39030fbf9 100644 --- a/baker/siteRenderers.tsx +++ b/baker/siteRenderers.tsx @@ -50,12 +50,12 @@ import { JsonError, KeyInsight, OwidGdocInterface, - PostRow, Url, IndexPost, mergePartialGrapherConfigs, OwidGdocType, extractFormattingOptions, + PostRowRaw, } from "@ourworldindata/utils" import { CountryProfileSpec } from "../site/countryProfileProjects.js" import { formatPost } from "./formatWordpressPost.js" @@ -435,7 +435,7 @@ export const entriesByYearPage = async (year?: number) => { .join("tags", { "tags.id": "post_tags.tag_id" }) .where({ "tags.name": "Entries" }) .select("title", "posts.slug", "published_at")) as Pick< - PostRow, + PostRowRaw, "title" | "slug" | "published_at" >[] diff --git a/db/migrateWpPostsToArchieMl.ts b/db/migrateWpPostsToArchieMl.ts index 12bacc45be6..903b27ce5eb 100644 --- a/db/migrateWpPostsToArchieMl.ts +++ b/db/migrateWpPostsToArchieMl.ts @@ -8,6 +8,7 @@ import { OwidGdocType, RelatedChart, EnrichedBlockAllCharts, + parsePostAuthors, } from "@ourworldindata/utils" import * as Post from "./model/Post.js" import fs from "fs" @@ -18,7 +19,6 @@ import { adjustHeadingLevels, } from "./model/Gdoc/htmlToEnriched.js" import { getRelatedCharts, isPostCitable } from "./wpdb.js" -import { parsePostAuthors } from "./model/Post.js" // slugs from all the linear entries we want to migrate from @edomt const entries = new Set([ @@ -80,7 +80,7 @@ const migrate = async (): Promise => { const errors = [] await db.getConnection() - const posts = await Post.select( + const rawPosts = await Post.select( "id", "slug", "title", @@ -94,8 +94,14 @@ const migrate = async (): Promise => { "featured_image" ).from(db.knexTable(Post.postsTable)) //.where("id", "=", "24808")) - for (const post of posts) { + for (const postRaw of rawPosts) { try { + const post = { + ...postRaw, + authors: postRaw.authors + ? parsePostAuthors(postRaw.authors) + : null, + } const isEntry = entries.has(post.slug) const text = post.content let relatedCharts: RelatedChart[] = [] @@ -109,11 +115,10 @@ const migrate = async (): Promise => { ) if ( shouldIncludeMaxAsAuthor && + post.authors && !post.authors.includes("Max Roser") ) { - const authorsJson = JSON.parse(post.authors) - authorsJson.push({ author: "Max Roser", order: Infinity }) - post.authors = JSON.stringify(authorsJson) + post.authors.push("Max Roser") } // We don't get the first and last nodes if they are comments. @@ -196,9 +201,9 @@ const migrate = async (): Promise => { body: archieMlBodyElements, toc: [], title: post.title, - subtitle: post.excerpt, - excerpt: post.excerpt, - authors: parsePostAuthors(post.authors), + subtitle: post.excerpt ?? "", + excerpt: post.excerpt ?? "", + authors: post.authors ?? [], "featured-image": post.featured_image.split("/").at(-1), dateline: dateline, // TODO: this discards block level elements - those might be needed? @@ -263,7 +268,7 @@ const migrate = async (): Promise => { } } } catch (e) { - console.error("Caught an exception", post.id) + console.error("Caught an exception", postRaw.id) errors.push(e) } } diff --git a/db/model/Post.ts b/db/model/Post.ts index eebee925001..98ec20601ab 100644 --- a/db/model/Post.ts +++ b/db/model/Post.ts @@ -1,14 +1,18 @@ import * as db from "../db.js" import { Knex } from "knex" -import { PostRow, sortBy } from "@ourworldindata/utils" +import { + PostRowEnriched, + PostRowRaw, + parsePostRow, +} from "@ourworldindata/utils" export const postsTable = "posts" -export const table = "posts" - -export const select = ( +export const select = ( ...args: K[] -): { from: (query: Knex.QueryBuilder) => Promise[]> } => ({ +): { + from: (query: Knex.QueryBuilder) => Promise[]> +} => ({ from: (query) => query.select(...args) as any, }) @@ -46,17 +50,6 @@ export const setTags = async ( ) }) -export const bySlug = async (slug: string): Promise => - (await db.knexTable("posts").where({ slug: slug }))[0] - -/** The authors field in the posts table is a json column that contains an array of - { order: 1, authors: "Max Mustermann" } like records. This function parses the - string and returns a simple string array of author names in the correct order */ -export const parsePostAuthors = (authorsJson: string): string[] => { - const authors = JSON.parse(authorsJson) - return sortBy(authors, ["order"]).map((author) => author.author) -} - export const setTagsForPost = async ( postId: number, tagIds: number[] @@ -71,7 +64,15 @@ export const setTagsForPost = async ( ) }) -export const getPostBySlug = async ( +export const getPostRawBySlug = async ( slug: string -): Promise => +): Promise => (await db.knexTable("posts").where({ slug: slug }))[0] + +export const getPostEnrichedBySlug = async ( + slug: string +): Promise => { + const post = await getPostRawBySlug(slug) + if (!post) return undefined + return parsePostRow(post) +} diff --git a/db/syncPostsToGrapher.ts b/db/syncPostsToGrapher.ts index bfaa700dc91..560edba5e2a 100644 --- a/db/syncPostsToGrapher.ts +++ b/db/syncPostsToGrapher.ts @@ -8,7 +8,9 @@ import { extractFormattingOptions, groupBy, keyBy, - PostRow, + PostRowEnriched, + sortBy, + serializePostRow, } from "@ourworldindata/utils" import { postsTable, select } from "./model/Post.js" import { PostLink } from "./model/PostLink.js" @@ -60,7 +62,7 @@ function buildReplacerFunction( ) => { const block = blocks[matches["id"].toString()] return block - ? `` + + ? `\n` + block.post_content : "" } @@ -116,7 +118,7 @@ export const postLinkCompareStringGenerator = (item: PostLink): string => `${item.linkType} - ${item.target} - ${item.hash} - ${item.queryString}` export function getLinksToAddAndRemoveForPost( - post: PostRow, + post: PostRowEnriched, existingLinksForPost: PostLink[], content: string, postId: number @@ -285,6 +287,10 @@ const syncPostsToGrapher = async (): Promise => { const toInsert = rows.map((post: any) => { const content = post.post_content as string const formattingOptions = extractFormattingOptions(content) + const authors: string[] = sortBy( + JSON.parse(post.authors), + (item: { author: string; order: number }) => item.order + ).map((author: { author: string; order: number }) => author.author) return { id: post.ID, @@ -302,13 +308,13 @@ const syncPostsToGrapher = async (): Promise => { post.post_modified_gmt === zeroDateString ? "1970-01-01 00:00:00" : post.post_modified_gmt, - authors: post.authors, + authors: authors, excerpt: post.post_excerpt, created_at_in_wordpress: post.created_at === zeroDateString ? null : post.created_at, formattingOptions: formattingOptions, } - }) as PostRow[] + }) as PostRowEnriched[] const postLinks = await PostLink.find() const postLinksById = groupBy(postLinks, (link: PostLink) => link.sourceId) @@ -333,11 +339,7 @@ const syncPostsToGrapher = async (): Promise => { await t.whereIn("id", toDelete).delete().from(postsTable) for (const row of toInsert) { - const rowForDb = { - ...row, - // TODO: it's not nice that we have to stringify this here - formattingOptions: JSON.stringify(row.formattingOptions), - } + const rowForDb = serializePostRow(row) if (doesExistInGrapher[row.id]) await t .update(rowForDb) diff --git a/db/wpdb.ts b/db/wpdb.ts index e752c45c511..e09539590cf 100644 --- a/db/wpdb.ts +++ b/db/wpdb.ts @@ -38,7 +38,6 @@ import { uniqBy, sortBy, DataPageRelatedResearch, - isString, OwidGdocType, Tag, } from "@ourworldindata/utils" @@ -794,22 +793,13 @@ export const getRelatedResearchAndWritingForVariable = async ( const allSortedRelatedResearch = sorted.map((post) => { const parsedAuthors = JSON.parse(post.authors) - // The authors in the gdocs table are just a list of strings, but in the wp_posts table - // they are a list of objects with an "author" key and an "order" key. We want to normalize this so that - // we can just use the same code to display the authors in both cases. - let authors - if (parsedAuthors.length > 0 && !isString(parsedAuthors[0])) { - authors = sortBy(parsedAuthors, (author) => author.order).map( - (author: any) => author.author - ) - } else authors = parsedAuthors const parsedTags = post.tags !== "" ? JSON.parse(post.tags) : [] return { title: post.title, url: `/${post.postSlug}`, variantName: "", - authors, + authors: parsedAuthors, imageUrl: post.thumbnail, tags: parsedTags, } diff --git a/packages/@ourworldindata/utils/src/dbTypes/Posts.ts b/packages/@ourworldindata/utils/src/dbTypes/Posts.ts new file mode 100644 index 00000000000..3f421b6c82b --- /dev/null +++ b/packages/@ourworldindata/utils/src/dbTypes/Posts.ts @@ -0,0 +1,38 @@ +import { + FormattingOptions, + OwidArticleBackportingStatistics, + OwidGdocInterface, + WP_PostType, +} from "../owidTypes.js" + +export interface PostRowPlainFields { + id: number + title: string + slug: string + type: WP_PostType + status: string + content: string + published_at: Date | null + updated_at: Date | null + updated_at_in_wordpress: Date | null + gdocSuccessorId: string | null + excerpt: string | null + created_at_in_wordpress: Date | null + featured_image: string +} + +export interface PostRowUnparsedFields { + authors: string | null + formattingOptions: string | null + archieml: string | null + archieml_update_statistics: string | null +} + +export interface PostRowParsedFields { + authors: string[] | null + formattingOptions: FormattingOptions | null + archieml: OwidGdocInterface | null + archieml_update_statistics: OwidArticleBackportingStatistics | null +} +export type PostRowRaw = PostRowPlainFields & PostRowUnparsedFields +export type PostRowEnriched = PostRowPlainFields & PostRowParsedFields diff --git a/packages/@ourworldindata/utils/src/dbTypes/PostsUtilities.ts b/packages/@ourworldindata/utils/src/dbTypes/PostsUtilities.ts new file mode 100644 index 00000000000..63c7a510c14 --- /dev/null +++ b/packages/@ourworldindata/utils/src/dbTypes/PostsUtilities.ts @@ -0,0 +1,44 @@ +import { FormattingOptions } from "../owidTypes.js" +import { PostRowEnriched, PostRowRaw } from "./Posts.js" + +export function parsePostFormattingOptions( + formattingOptions: string +): FormattingOptions { + return JSON.parse(formattingOptions) +} + +export function parsePostAuthors(authors: string): string[] { + const authorsJson = JSON.parse(authors) + return authorsJson +} + +export function parsePostArchieml(archieml: string): any { + // TODO: validation would be nice here + return JSON.parse(archieml) +} + +export function parsePostRow(postRow: PostRowRaw): PostRowEnriched { + return { + ...postRow, + authors: postRow.authors ? parsePostAuthors(postRow.authors) : null, + formattingOptions: postRow.formattingOptions + ? parsePostFormattingOptions(postRow.formattingOptions) + : null, + archieml: postRow.archieml ? parsePostArchieml(postRow.archieml) : null, + archieml_update_statistics: postRow.archieml_update_statistics + ? JSON.parse(postRow.archieml_update_statistics) + : null, + } +} + +export function serializePostRow(postRow: PostRowEnriched): PostRowRaw { + return { + ...postRow, + authors: JSON.stringify(postRow.authors), + formattingOptions: JSON.stringify(postRow.formattingOptions), + archieml: JSON.stringify(postRow.archieml), + archieml_update_statistics: JSON.stringify( + postRow.archieml_update_statistics + ), + } +} diff --git a/packages/@ourworldindata/utils/src/index.ts b/packages/@ourworldindata/utils/src/index.ts index e9a5a72b06b..2c4a789b75d 100644 --- a/packages/@ourworldindata/utils/src/index.ts +++ b/packages/@ourworldindata/utils/src/index.ts @@ -123,7 +123,6 @@ export { type PositionMap, type PostReference, type PostRestApi, - type PostRow, type PrimitiveType, type RawBlockAllCharts, type RawBlockAdditionalCharts, @@ -239,6 +238,21 @@ export { type DisplaySource, } from "./owidTypes.js" +export { + parsePostFormattingOptions, + parsePostAuthors, + parsePostRow, + serializePostRow, +} from "./dbTypes/PostsUtilities.js" + +export { + type PostRowParsedFields, + type PostRowPlainFields, + type PostRowUnparsedFields, + type PostRowEnriched, + type PostRowRaw, +} from "./dbTypes/Posts.js" + export { pairs, type NoUndefinedValues, diff --git a/packages/@ourworldindata/utils/src/owidTypes.ts b/packages/@ourworldindata/utils/src/owidTypes.ts index c6f02cb42c4..6dc8be995f3 100644 --- a/packages/@ourworldindata/utils/src/owidTypes.ts +++ b/packages/@ourworldindata/utils/src/owidTypes.ts @@ -4,6 +4,7 @@ import { gdocUrlRegex } from "./GdocsConstants.js" import { OwidOrigin } from "./OwidOrigin.js" import { OwidSource } from "./OwidSource.js" import { OwidProcessingLevel } from "./OwidVariable.js" +import { PostRowRaw } from "./dbTypes/Posts.js" // todo: remove when we ditch Year and YearIsDay export const EPOCH_DATE = "2020-01-21" @@ -165,28 +166,7 @@ export interface TocHeadingWithTitleSupertitle extends TocHeading { supertitle?: string } -// todo; remove -export interface PostRow { - id: number - title: string - slug: string - type: WP_PostType - status: string - content: string - published_at: Date | null - updated_at: Date | null - updated_at_in_wordpress: Date | null - archieml: string - archieml_update_statistics: string - gdocSuccessorId: string | null - authors: string - excerpt: string - created_at_in_wordpress: Date | null - featured_image: string - formattingOptions: FormattingOptions -} - -export interface PostRowWithGdocPublishStatus extends PostRow { +export interface PostRowWithGdocPublishStatus extends PostRowRaw { isGdocPublished: boolean } diff --git a/site/EntriesByYearPage.tsx b/site/EntriesByYearPage.tsx index 493f26b3086..9781d30c51d 100644 --- a/site/EntriesByYearPage.tsx +++ b/site/EntriesByYearPage.tsx @@ -1,11 +1,11 @@ import React from "react" -import { dayjs, groupBy, PostRow } from "@ourworldindata/utils" +import { dayjs, groupBy, PostRowEnriched } from "@ourworldindata/utils" import { Head } from "./Head.js" import { SiteHeader } from "./SiteHeader.js" import { SiteFooter } from "./SiteFooter.js" import { TableOfContents } from "../site/TableOfContents.js" -type Entry = Pick +type Entry = Pick export const EntriesByYearPage = (props: { entries: Entry[] diff --git a/site/GrapherPage.jsdom.test.tsx b/site/GrapherPage.jsdom.test.tsx index 136d5d6157d..0723ce73115 100755 --- a/site/GrapherPage.jsdom.test.tsx +++ b/site/GrapherPage.jsdom.test.tsx @@ -4,7 +4,7 @@ import { GrapherInterface } from "@ourworldindata/grapher" import { DimensionProperty, KeyChartLevel, - PostRow, + PostRowEnriched, RelatedChart, } from "@ourworldindata/utils" import React from "react" @@ -30,7 +30,7 @@ const mockGrapher: GrapherInterface = { } let grapher: GrapherInterface -let post: PostRow +let post: PostRowEnriched let relatedCharts: RelatedChart[] beforeAll(() => { diff --git a/site/GrapherPage.tsx b/site/GrapherPage.tsx index dbfaf1fec23..fa49c402176 100644 --- a/site/GrapherPage.tsx +++ b/site/GrapherPage.tsx @@ -9,7 +9,7 @@ import { import { flatten, PostReference, - PostRow, + PostRowEnriched, RelatedChart, serializeJSONForHTML, uniq, @@ -34,7 +34,7 @@ import { SiteHeader } from "./SiteHeader.js" export const GrapherPage = (props: { grapher: GrapherInterface - post?: PostRow + post?: PostRowEnriched relatedCharts?: RelatedChart[] relatedArticles?: PostReference[] baseUrl: string