diff --git a/.github/workflows/post.yml b/.github/workflows/post.yml index e14493ac..50795c83 100644 --- a/.github/workflows/post.yml +++ b/.github/workflows/post.yml @@ -29,7 +29,7 @@ jobs: - name: Install dependencies run: | - pip install --no-cache-dir -r code/telegram/requirements.txt + python -m pip install --no-cache-dir -r code/requirements.txt - name: Export WordPress posts env: diff --git a/.github/workflows/sync-idf-telegram.yml b/.github/workflows/sync-idf-telegram.yml index a85ca1c1..94f552d8 100644 --- a/.github/workflows/sync-idf-telegram.yml +++ b/.github/workflows/sync-idf-telegram.yml @@ -24,11 +24,12 @@ jobs: exit 1 fi echo "Session file found at /home/ims/session-file!" + shell: bash - name: Copy session file to working directory run: | - # Copy the local session file to the expected code directory - cp /home/ims/session-file/session_telegram_scraper.session code/telegram/session_telegram_scraper.session + # Copy the local session file to the workflow directory + cp /home/ims/session-file/session_telegram_scraper.session $GITHUB_WORKSPACE/code/telegram/session_telegram_scraper.session - name: Set up Python uses: actions/setup-python@v5 diff --git a/.github/workflows/sync-tzevaadomm-telegram.yml b/.github/workflows/sync-tzevaadomm-telegram.yml index b4d65072..9df53a19 100644 --- a/.github/workflows/sync-tzevaadomm-telegram.yml +++ b/.github/workflows/sync-tzevaadomm-telegram.yml @@ -24,11 +24,12 @@ jobs: exit 1 fi echo "Session file found at /home/ims/session-file!" + shell: bash - name: Copy session file to working directory run: | - # Copy the local session file to the expected code directory - cp /home/ims/session-file/session_telegram_scraper.session code/telegram/session_telegram_scraper.session + # Copy the local session file to the workflow directory + cp /home/ims/session-file/session_telegram_scraper.session $GITHUB_WORKSPACE/code/telegram/session_telegram_scraper.session - name: Set up Python uses: actions/setup-python@v5 diff --git a/code/export_posts.py b/code/export_posts.py index 80fbd49f..1b19e670 100644 --- a/code/export_posts.py +++ b/code/export_posts.py @@ -10,7 +10,7 @@ API_URL = os.getenv("API_URL") # Base URL for posts USERNAME = os.getenv("USERNAME") APP_PASSWORD = os.getenv("APP_PASSWORD") -GITHUB_REPO_PATH = "./Website" +GITHUB_REPO_PATH = "../Website" # Authentication auth_string = f"{USERNAME}:{APP_PASSWORD}" diff --git a/code/telegram/export_idf_telegram.py b/code/telegram/export_idf_telegram.py index 9b12096a..ccb91d4c 100644 --- a/code/telegram/export_idf_telegram.py +++ b/code/telegram/export_idf_telegram.py @@ -19,8 +19,8 @@ API_ID = int(os.getenv("TELEGRAM_API_ID")) API_HASH = os.getenv("TELEGRAM_API_HASH") CHANNEL = "@idf_telegram" -SAVE_PATH = "../../IDFspokesman" -STATE_FILE = "last_message_state.json" +SAVE_PATH = os.path.join(os.path.dirname(__file__), "../../IDFspokesman") +STATE_FILE = os.path.join(os.path.dirname(__file__), "last_message_state.json") START_DATE = datetime(2023, 10, 7, tzinfo=timezone.utc) # Start scraping from this date (UTC) FILE_SIZE_THRESHOLD_MB = 25 PUBLIC_URL_PREFIX = "https://data.iron-swords.co.il/" @@ -34,7 +34,8 @@ ) # Initialize the Telegram client -client = TelegramClient('session_telegram_scraper', API_ID, API_HASH) +session_file_path = os.path.join(os.path.dirname(__file__), 'session_telegram_scraper.session') +client = TelegramClient(session_file_path, API_ID, API_HASH) # Ensure consistent paths for S3 and URLs def normalize_file_key(file_path): @@ -148,15 +149,20 @@ def scan_and_upload_existing(): # Function to load the last message ID from a state file def load_last_message_id(): + print(f"Loading last message ID from {STATE_FILE}") if os.path.exists(STATE_FILE): with open(STATE_FILE, "r", encoding="utf-8") as f: - return json.load(f).get("last_message_id", 0) + last_message_id = json.load(f).get("last_message_id", 0) + print(f"Loaded last message ID: {last_message_id}") + return last_message_id + print("State file not found, starting from message ID 0") return 0 # Function to save the last message ID to a state file def save_last_message_id(last_message_id): with open(STATE_FILE, "w", encoding="utf-8") as f: json.dump({"last_message_id": last_message_id}, f) + print(f"Saved last message ID: {last_message_id}") # Function to process messages def process_messages():