diff --git a/.github/workflows/callable.build.yml b/.github/workflows/callable.build.yml
new file mode 100644
index 0000000..08c92a2
--- /dev/null
+++ b/.github/workflows/callable.build.yml
@@ -0,0 +1,26 @@
+name: Build & Test
+
+on: workflow_call
+
+jobs:
+ build:
+ name: gradle build
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout project sources
+ uses: actions/checkout@v4
+
+ - uses: actions/setup-java@v4
+ with:
+ distribution: 'corretto'
+ java-version: '21'
+
+ - uses: gradle/actions/wrapper-validation@v3
+
+ - name: Setup Gradle
+ uses: gradle/actions/setup-gradle@v3.3.2
+ with:
+ cache-write-only: true
+
+ - name: Build with Gradle
+ run: ./gradlew build --no-daemon
\ No newline at end of file
diff --git a/.github/workflows/callable.gradle-release.yml b/.github/workflows/callable.gradle-release.yml
new file mode 100644
index 0000000..55bbb9c
--- /dev/null
+++ b/.github/workflows/callable.gradle-release.yml
@@ -0,0 +1,81 @@
+name: Gradle Release
+
+on:
+ workflow_call:
+ inputs:
+ type:
+ description: 'Release type'
+ required: true
+ type: string
+
+jobs:
+ release:
+ name: gradle release
+ runs-on: ubuntu-latest
+ steps:
+ - name: Validate 'Release Type' param
+ env:
+ TYPE: ${{ inputs.type }}
+ run: |
+ valid_types=(major minor patch)
+ if [[ ! ${valid_types[*]} =~ "$TYPE" ]]; then
+ echo "Unknown release type: $TYPE"
+ exit 1
+ fi
+
+ - name: Checkout project sources ('main' branch)
+ uses: actions/checkout@v4
+ with:
+ ref: main
+ token: ${{ secrets.CI_GITHUB_TOKEN }}
+ - uses: actions/setup-java@v4
+ with:
+ distribution: 'corretto'
+ java-version: '21'
+
+ - uses: gradle/actions/wrapper-validation@v3
+
+ - name: Setup Gradle
+ uses: gradle/actions/setup-gradle@v3.3.2
+ with:
+ cache-read-only: true
+
+ - name: Get current version
+ run: |
+ source gradle.properties
+ echo "current_version=${version}" >> $GITHUB_ENV
+
+ - name: Determine version type
+ env:
+ TYPE: ${{ inputs.type }}
+ VERSION: ${{ env.current_version }}
+ run: |
+ export major=$(echo "${VERSION}" | cut -d. -f1)
+ export minor=$(echo "${VERSION}" | cut -d. -f2)
+ export patch=$(echo "${VERSION}" | cut -d. -f3 | cut -d- -f1)
+ echo "resolved: ${major}.${minor}.${patch}"
+
+ if [[ "$TYPE" == "major" ]]; then
+ echo "new_version=$((major+1)).0.0" >> $GITHUB_ENV
+ echo "new_snapshot_version=$((major+1)).0.1-SNAPSHOT" >> $GITHUB_ENV
+ elif [ "$TYPE" == "minor" ]; then
+ echo "new_version=${major}.$((minor+1)).0" >> $GITHUB_ENV
+ echo "new_snapshot_version=${major}.$((minor+1)).1-SNAPSHOT" >> $GITHUB_ENV
+ else
+ echo "new_version=${major}.${minor}.${patch}" >> $GITHUB_ENV
+ echo "new_snapshot_version=${major}.${minor}.$((patch+1))-SNAPSHOT" >> $GITHUB_ENV
+ fi
+
+ - name: Set git config 'user.name' and 'user.email'
+ run: |
+ git config --local user.email "action@github.com"
+ git config --local user.name "github-actions[bot]"
+
+ - name: Run 'gradle release'
+ run: |
+ echo "Type: ${{ inputs.type }}"
+ echo "Current version: ${{ env.current_version }}"
+ echo "New version: ${{ env.new_version }}"
+ echo "New snapshot version: ${{ env.new_snapshot_version }}"
+ echo "./gradlew release -Prelease.useAutomaticVersion=true -Prelease.releaseVersion=${{ env.new_version }} -Prelease.newVersion=${{ env.new_snapshot_version }}"
+ gradle release -Prelease.useAutomaticVersion=true -Prelease.releaseVersion=${{ env.new_version }} -Prelease.newVersion=${{ env.new_snapshot_version }}
\ No newline at end of file
diff --git a/.github/workflows/callable.publish-javadoc.yml b/.github/workflows/callable.publish-javadoc.yml
new file mode 100644
index 0000000..0c53375
--- /dev/null
+++ b/.github/workflows/callable.publish-javadoc.yml
@@ -0,0 +1,152 @@
+name: Publish javadoc (GitHub Pages)
+
+on:
+ workflow_dispatch:
+ workflow_call:
+
+jobs:
+ build_package_javadoc:
+ name: Generate Javadoc
+ runs-on: ubuntu-latest
+ permissions:
+ contents: write
+ steps:
+ - name: Checkout project sources
+ uses: actions/checkout@v4
+
+ - uses: actions/setup-java@v4
+ with:
+ distribution: 'corretto'
+ java-version: '21'
+
+ - uses: gradle/actions/wrapper-validation@v3
+ - name: Setup Gradle
+ uses: gradle/actions/setup-gradle@v3.3.2
+ with:
+ cache-read-only: true
+
+ - name: Generate javadoc (gradle)
+ run: ./gradlew javadoc
+
+ - name: Conclude javadoc version and set env
+ run: |
+ if [[ "$GITHUB_REF" == "refs/heads/main" || "$GITHUB_REF" == "refs/heads/master" ]]; then
+ echo "PUBLISH_VERSION=current" >> $GITHUB_ENV
+ else
+ echo "PUBLISH_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV
+ fi
+
+ - name: zip javadoc folder
+ env:
+ LIBRARY_NAME: ${{ env.LIBRARY_NAME }}
+ run: |
+ cd "lib/build/docs/javadoc"
+ zip -r ../../../../javadoc.zip .
+
+ - name: Upload artifact
+ uses: actions/upload-artifact@v4
+ with:
+ name: javadoc.zip
+ path: javadoc.zip
+
+ deploy_javadoc:
+ name: Deploy (GH Pages)
+ runs-on: ubuntu-latest
+ needs: build_package_javadoc
+ permissions:
+ contents: write
+ steps:
+ - name: Checkout project sources
+ uses: actions/checkout@v4
+ with:
+ ref: main
+ token: ${{ secrets.CI_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
+
+ - name: Checkout or create empty branch 'gh-pages'
+ run: |
+ git fetch origin gh-pages || true
+ git checkout gh-pages || git switch --orphan gh-pages
+
+ - name: Conclude javadoc version and set env
+ run: |
+ if [[ "$GITHUB_REF" == "refs/heads/main" || "$GITHUB_REF" == "refs/heads/master" ]]; then
+ echo "PUBLISH_VERSION=current" >> $GITHUB_ENV
+ else
+ echo "PUBLISH_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV
+ fi
+
+ - name: Create root index redirect
+ env:
+ GITHUB_REPOSITORY_NAME: ${{ github.event.repository.name }}
+ run: |
+ echo "
+ * This interface provides methods to access the transcript content and to perform translations into different languages. + * Individual transcripts can be obtained through the {@link TranscriptList} class. + *
+ */ +public interface Transcript { + + /** + * Retrieves the content of the transcript. + * + * @return The content of the transcript as a {@link TranscriptContent} object. + * @throws TranscriptRetrievalException If the transcript content cannot be retrieved. + */ + TranscriptContent fetch() throws TranscriptRetrievalException; + + /** + * Gets the video id of the transcript. + * + * @return The video id as a {@link String}. + */ + String getVideoId(); + + /** + * Gets the language of the transcript. + * + * @return The language as a {@link String}. + */ + String getLanguage(); + + /** + * Gets the language code of the transcript. + * + * @return The language code as a {@link String}. + */ + String getLanguageCode(); + + /** + * Returns API URL which needs to be called to fetch transcript content. + * + * @return {@link String} API URL to fetch transcript content + */ + String getApiUrl(); + + /** + * Determines if the transcript was automatically generated by YouTube. + * + * @return {@code true} if the transcript was automatically generated; {@code false} otherwise. + */ + boolean isGenerated(); + + /** + * Lists all available translation languages for the transcript. + * + * @return A set of language codes representing available translation languages. + */ + Set+ * When the transcript content is fetched from YouTube, it is provided in the form of XML containing multiple transcript fragments. + *
+ * For example: + *
+ *{@code + *+ * This interface encapsulates the transcript content as a {@code List+ * + *Text + *0.0 + *1.54 + *+ * + * }Another text + *1.54 + *4.16 + *
+ * Available formatters are: + *
+ *+ * See WebVTT specification for more information. + *
+ * + * @return A {@link TranscriptFormatter} for WebVTT format. + */ + public static TranscriptFormatter webVTTFormatter() { + return transcriptContent -> "WEBVTT\n\n" + formatAsSubtitles( + transcriptContent, + fragment -> String.format("%s%n%s", fragmentToTimeStamp(fragment), fragment.getText()) + ); + } + + private static String formatAsSubtitles(TranscriptContent transcriptContent, Function+ * See SRT file format for more information. + *
+ * + * @return A {@link TranscriptFormatter} for SRT format. + */ + public static TranscriptFormatter srtFormatter() { + return transcriptContent -> { + AtomicInteger i = new AtomicInteger(1); + return formatAsSubtitles( + transcriptContent, + fragment -> String.format("%d%n%s%n%s", i.getAndIncrement(), fragmentToTimeStamp(fragment), fragment.getText()) + ); + }; + } +} diff --git a/lib/src/main/java/io/github/thoroldvix/api/TranscriptList.java b/lib/src/main/java/io/github/thoroldvix/api/TranscriptList.java new file mode 100644 index 0000000..e14205d --- /dev/null +++ b/lib/src/main/java/io/github/thoroldvix/api/TranscriptList.java @@ -0,0 +1,63 @@ +package io.github.thoroldvix.api; + +import java.util.function.Consumer; + +/** + * Represents a list of all available transcripts for a YouTube video. + *+ * This interface provides methods to iterate over all available transcripts for a given YouTube video, and to find either generated or manual transcripts for a specific language. + * Individual transcripts are represented by {@link Transcript} objects. + * Instances of {@link TranscriptList} can be obtained through the {@link YoutubeTranscriptApi} class. + *
+ */ +public interface TranscriptList extends Iterable+ * For example: + *
+ * If this is set to {@code ("de", "en")}, it will first attempt to fetch the German transcript ("de"), and then fetch the English + * transcript ("en") if the former fails. If no language code is provided, it uses English as the default language. + * @return The found {@link Transcript}. + * @throws TranscriptRetrievalException If no transcript could be found for the given language codes. + */ + Transcript findTranscript(String... languageCodes) throws TranscriptRetrievalException; + + /** + * Searches for an automatically generated transcript using the provided language codes. + * + * @param languageCodes A varargs list of language codes in descending priority. + *+ * For example: + *
+ * If this is set to {@code ("de", "en")}, it will first attempt to fetch the German transcript ("de"), and then fetch the English + * transcript ("en") if the former fails. If no language code is provided, it uses English as the default language. + * @return The found {@link Transcript}. + * @throws TranscriptRetrievalException If no transcript could be found for the given language codes. + */ + Transcript findGeneratedTranscript(String... languageCodes) throws TranscriptRetrievalException; + + /** + * Searches for a manually created transcript using the provided language codes. + * + * @param languageCodes A varargs list of language codes in descending priority. + *+ * For example: + *
+ * If this is set to {@code ("de", "en")}, it will first attempt to fetch the German transcript ("de"), and then fetch the English + * transcript ("en") if the former fails. If no language code is provided, it uses English as the default language. + * @return The found {@link Transcript}. + * @throws TranscriptRetrievalException If no transcript could be found for the given language codes. + */ + Transcript findManualTranscript(String... languageCodes) throws TranscriptRetrievalException; + + @Override + default void forEach(Consumer super Transcript> action) { + Iterable.super.forEach(action); + } +} diff --git a/lib/src/main/java/io/github/thoroldvix/api/TranscriptRetrievalException.java b/lib/src/main/java/io/github/thoroldvix/api/TranscriptRetrievalException.java new file mode 100644 index 0000000..02e62d4 --- /dev/null +++ b/lib/src/main/java/io/github/thoroldvix/api/TranscriptRetrievalException.java @@ -0,0 +1,47 @@ +package io.github.thoroldvix.api; + +/** + * Exception thrown when a transcript cannot be retrieved for a specified video. + *+ * This exception encapsulates the details of the error encountered during the retrieval of a YouTube video transcript. + *
+ */ +public class TranscriptRetrievalException extends Exception { + + private static final String ERROR_MESSAGE = "Could not retrieve transcript for the video: %s.\nReason: %s"; + private static final String YOUTUBE_WATCH_URL = "https://www.youtube.com/watch?v="; + + /** + * Constructs a new exception with the specified detail message and cause. + * + * @param videoId The ID of the video for which the transcript retrieval failed. + * @param message The detail message explaining the reason for the failure. + * @param cause The cause of the failure (which is saved for later retrieval by the {@link Throwable#getCause()} method). + */ + public TranscriptRetrievalException(String videoId, String message, Throwable cause) { + super(buildErrorMessage(videoId, message), cause); + } + + /** + * Constructs a new exception with the specified detail message. + * + * @param videoId The ID of the video for which the transcript retrieval failed. + * @param message The detail message explaining the reason for the failure. + */ + public TranscriptRetrievalException(String videoId, String message) { + super(buildErrorMessage(videoId, message)); + } + + /** + * Builds the error message to include the video URL and the specific cause of the error. + * + * @param videoId The ID of the video for which the transcript retrieval failed. + * @param message The detail message explaining the reason for the failure. + * @return The formatted error message. + */ + private static String buildErrorMessage(String videoId, String message) { + String videoUrl = YOUTUBE_WATCH_URL + videoId; + return String.format(ERROR_MESSAGE, videoUrl, message); + } +} + diff --git a/lib/src/main/java/io/github/thoroldvix/api/YoutubeClient.java b/lib/src/main/java/io/github/thoroldvix/api/YoutubeClient.java new file mode 100644 index 0000000..23893a7 --- /dev/null +++ b/lib/src/main/java/io/github/thoroldvix/api/YoutubeClient.java @@ -0,0 +1,22 @@ +package io.github.thoroldvix.api; + + +import java.util.Map; + +/** + * Responsible for sending GET requests to YouTube. + */ +@FunctionalInterface +public interface YoutubeClient { + + /** + * Sends a GET request to the specified URL and returns the response body. + * + * @param url The URL to which the GET request is made. + * @param headers A map of additional headers to include in the request. + * @return The body of the response as a {@link String}. + * @throws TranscriptRetrievalException If the request to YouTube fails. + */ + String get(String url, Map+ * It provides functionality for retrieving all available transcripts or retrieving actual transcript content from YouTube. + *
+ *+ * To instantiate this API, you should use {@link TranscriptApiFactory}. + *
+ */ +public interface YoutubeTranscriptApi { + + /** + * Retrieves a list of available transcripts for a given video using cookies from a specified file path. + *+ * Used when you want to list transcripts for a video that is age-restricted. + * It tries to bypass the age-restriction by using the provided authentication cookies. + *
+ *+ * Note: For more information on how to obtain the authentication cookies, + * see the GitHub page. + *
+ * + * @param videoId The ID of the video + * @param cookiesPath The file path to the text file containing the authentication cookies + * @return {@link TranscriptList} A list of all available transcripts for the given video + * @throws TranscriptRetrievalException If the retrieval of the transcript list fails + * @throws IllegalArgumentException If the video ID is invalid + */ + TranscriptList listTranscriptsWithCookies(String videoId, String cookiesPath) throws TranscriptRetrievalException; + + /** + * Retrieves a list of available transcripts for a given video. + * + * @param videoId The ID of the video + * @return {@link TranscriptList} A list of all available transcripts for the given video + * @throws TranscriptRetrievalException If the retrieval of the transcript list fails + * @throws IllegalArgumentException If the video ID is invalid + */ + TranscriptList listTranscripts(String videoId) throws TranscriptRetrievalException; + + /** + * Retrieves transcript content for a given video using cookies from a specified file path. + *+ * Used when you want to retrieve transcript content for a video that is age-restricted. + * It tries to bypass the age-restriction by using the provided authentication cookies. + *
+ *+ * Note: For more information on how to obtain the authentication cookies, + * see the GitHub page. + *
+ *+ * This is a shortcut for calling: + *
+ *+ * {@code listTranscriptsWithCookies(videoId).findTranscript(languageCodes).fetch();} + *
+ * + * @param videoId The ID of the video + * @param languageCodes A varargs list of language codes in descending priority. + *+ * For example: + *
+ * If this is set to {@code ("de", "en")}, it will first attempt to fetch the German transcript ("de"), and then fetch the English + * transcript ("en") if the former fails. If no language code is provided, it uses English as the default language. + * @param cookiesPath The file path to the text file containing the authentication cookies + * @return {@link TranscriptContent} The transcript content + * @throws TranscriptRetrievalException If the retrieval of the transcript fails + * @throws IllegalArgumentException If the video ID is invalid + */ + TranscriptContent getTranscriptWithCookies(String videoId, String cookiesPath, String... languageCodes) throws TranscriptRetrievalException; + + /** + * Retrieves transcript content for a single video. + *+ * This is a shortcut for calling: + *
+ *+ * {@code listTranscripts(videoId).findTranscript(languageCodes).fetch();} + *
+ * + * @param videoId The ID of the video + * @param languageCodes A varargs list of language codes in descending priority. + *+ * For example: + *
+ * If this is set to {@code ("de", "en")}, it will first attempt to fetch the German transcript ("de"), and then fetch the English + * transcript ("en") if the former fails. If no language code is provided, it uses English as the default language. + * @return {@link TranscriptContent} The transcript content + * @throws TranscriptRetrievalException If the retrieval of the transcript fails + * @throws IllegalArgumentException If the video ID is invalid + */ + TranscriptContent getTranscript(String videoId, String... languageCodes) throws TranscriptRetrievalException; +} diff --git a/lib/src/main/java/io/github/thoroldvix/internal/DefaultTranscript.java b/lib/src/main/java/io/github/thoroldvix/internal/DefaultTranscript.java new file mode 100644 index 0000000..31f2f7d --- /dev/null +++ b/lib/src/main/java/io/github/thoroldvix/internal/DefaultTranscript.java @@ -0,0 +1,144 @@ +package io.github.thoroldvix.internal; + + +import io.github.thoroldvix.api.Transcript; +import io.github.thoroldvix.api.TranscriptContent; +import io.github.thoroldvix.api.TranscriptRetrievalException; +import io.github.thoroldvix.api.YoutubeClient; + +import java.util.*; + +/** + * Default implementation of {@link Transcript}. + */ +final class DefaultTranscript implements Transcript { + + private final YoutubeClient client; + private final String videoId; + private final String apiUrl; + private final String language; + private final String languageCode; + private final boolean isGenerated; + private final Map+ + +
+ + ++ + +
+ + ++ + +
+ + ++ + +
+ + ++ + +
+ + +Google verwendet Cookies und Daten, um Dienste und Werbung zur Verfügung zu stellen, zu verwalten und zu verbessern. Wenn Sie zustimmen, nutzen wir Cookies für diese Zwecke und dazu, Inhalte und Werbung für Sie zu personalisieren, damit Sie z. B. relevantere Google-Suchergebnisse und relevantere Werbung bei YouTube erhalten. Die Personalisierung erfolgt auf Grundlage Ihrer Aktivitäten, beispielsweise Ihrer Google-Suchanfragen und der Videos, die Sie sich bei YouTube ansehen. Wir verwenden diese Daten auch für Analysen und Messungen. Klicken Sie auf „Anpassen“, um sich weitere Optionen anzusehen, oder besuchen Sie g.co/privacytools. Darüber hinaus haben Sie die Möglichkeit, Ihre Browsereinstellungen so zu konfigurieren, dass einige oder alle Cookies blockiert werden.
Google verwendet Cookies + und Daten, um Dienste und Werbung zur Verfügung zu stellen, zu verwalten und zu verbessern. Wenn Sie zustimmen, + nutzen wir Cookies für diese Zwecke und dazu, Inhalte und Werbung für Sie zu personalisieren, damit Sie z. B. + relevantere Google-Suchergebnisse und relevantere Werbung bei YouTube erhalten. Die Personalisierung erfolgt auf + Grundlage Ihrer Aktivitäten, beispielsweise Ihrer Google-Suchanfragen und der Videos, die Sie sich bei YouTube + ansehen. Wir verwenden diese Daten auch für Analysen und Messungen. Klicken Sie auf „Anpassen“, um sich weitere + Optionen anzusehen, oder besuchen Sie g.co/privacytools. Darüber hinaus haben Sie die Möglichkeit, Ihre + Browsereinstellungen so zu konfigurieren, dass einige oder alle Cookies blockiert werden.
++ + +
+ + ++ + +
+ + ++ + +
+ + ++ + +
+ + ++ + +
+ + ++ + +
+ + ++ + +
+ + ++ + +
+ + ++ + +
+ + ++ Perdón por la interrupción. Hemos recibido un gran número de + solicitudes de tu red. +
++ Para seguir disfrutando de YouTube, rellena el siguiente formulario. +
+ + ++ + +
+ + ++ + +
+ + ++ + +
+ + ++ + +
+ + ++ + +
+ + ++ + +
+ + +