Skip to content

Commit

Permalink
tidy and docstrings, get rid of stats error
Browse files Browse the repository at this point in the history
  • Loading branch information
bitplane committed Sep 25, 2023
1 parent 8cb62c7 commit c5e917e
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 37 deletions.
2 changes: 1 addition & 1 deletion ienv/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[project]
name = "ienv"
description = "Cut down your venv sizes by symlinking all the things"
version = "0.0.1"
version = "0.0.2"
authors = [
{ name = "Gareth Davidson", email = "gaz@bitplane.net" }
]
Expand Down
17 changes: 17 additions & 0 deletions ienv/src/ienv/cache.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,38 @@
"""
The cache dir, which lives at ~/.cache/ienv
It's where all your files have been moved to.
"""
import os
from pathlib import Path

CACHE_FILE = "venvs.txt"


def get_cache_dir(prefix="~"):
"""
Return the cache dir, creating it if it doesn't exist.
"""
cache_dir = Path(f"{prefix}/.cache/ienv/files").expanduser()
cache_dir.mkdir(parents=True, exist_ok=True)

return cache_dir


def load_venv_list(file_path):
"""
Load the list of venvs that have been squished.
"""
if not os.path.exists(file_path):
return set()
with open(file_path, "r") as f:
return set(line.strip() for line in f)


def save_venv_list(file_path, venvs):
"""
Save the list of venvs that have been squished.
"""
with open(file_path, "w") as f:
for venv in venvs:
f.write(f"{venv}\n")
27 changes: 7 additions & 20 deletions ienv/src/ienv/main.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,13 @@
"""
Entrypoint to the module.
"""

import argparse
import glob
import os
import sys
from pathlib import Path

from ienv.squish import process_venv


def venv_dir(directory):
"""
Validator for argparse
"""
if not os.path.exists(directory):
raise argparse.ArgumentTypeError(f"Directory '{directory}' does not exist.")
if not os.path.isdir(directory):
raise argparse.ArgumentTypeError(f"Path '{directory}' is not a directory.")

site_packages_glob = Path(directory).joinpath("lib/*/site-packages/")
if len(glob.glob(str(site_packages_glob))) == 0:
raise argparse.ArgumentTypeError(f"Directory '{directory}' is not a venv.")

return directory
from ienv.stats import print_stats
from ienv.venv import venv_dir


def parse_args(args=sys.argv[1:]):
Expand All @@ -41,7 +28,7 @@ def main():
args = parse_args()

if args.stats:
raise NotImplementedError("Stats not implemented yet.")
print_stats()
else:
process_venv(args.venv_dir)

Expand Down
34 changes: 31 additions & 3 deletions ienv/src/ienv/squish.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,24 @@
"""
This bit moves your files around.
"""

import hashlib
import os
import random
import shutil
from pathlib import Path

from .cache import get_cache_dir, load_venv_list, save_venv_list
from .cache import CACHE_FILE, get_cache_dir, load_venv_list, save_venv_list
from .venv import get_large_package_files

BUFFER_SIZE = 1024 * 1024 * 10 # 10MB chunks


def hash_and_copy(source, dest=None):
"""
If we can't use a hardlink to do a quick copy then we'll have to do a full
copy. This function will hash the file as it copies it, if there's a dest.
"""
sha1 = hashlib.sha1()
with open(source, "rb") as src_file:
if dest:
Expand All @@ -24,6 +32,13 @@ def hash_and_copy(source, dest=None):


def backup_file(source, dest_dir):
"""
You can bet the user will get bored and CTRL+C out and endup with
missing files, which we don't really want. This function tries to
hardlink the file first, and if that fails it'll do a full copy
while hashing its contents. It's not perfect, but it'll make it
pretty hard to lose data.
"""
dest_file = Path(dest_dir) / hex(random.getrandbits(128))
linked = False
try:
Expand All @@ -45,6 +60,13 @@ def backup_file(source, dest_dir):


def replace_with_symlink(source, dest):
"""
Replace the source file with a symlink to the dest file, after the
backup has been made. This is atomic on Linux, but not on Windows.
But either way, if you lose a file because you were interrupted while
renaming one file over the top of another then you're really unlucky,
as well as having a crap OS.
"""
source = Path(source).absolute()
dest = Path(dest).absolute()
symlink_path = source.with_suffix(".ienv.lnk")
Expand All @@ -65,15 +87,21 @@ def replace_with_symlink(source, dest):


def process_venv(venv_dir):
"""
Actually iterate over all the files.
Make backups into the cache dir, make a link to the backup, copy its attribs
and then move the link over the source.
"""
venv_dir = Path(venv_dir).resolve() # Making sure it's an absolute path

# Ensure cache directory exists and load venv list
cache_dir = get_cache_dir()
venv_list = load_venv_list(cache_dir / "venvs.txt")
venv_list = load_venv_list(cache_dir / CACHE_FILE)

# Add the venv to the list and save it
venv_list.add(str(venv_dir))
save_venv_list(cache_dir / "venvs.txt", venv_list)
save_venv_list(cache_dir / CACHE_FILE, venv_list)

# Process each package file in the venv
for file_path in get_large_package_files(venv_dir):
Expand Down
15 changes: 15 additions & 0 deletions ienv/src/ienv/stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""
Stats will go here eventually
"""
from ienv.cache import CACHE_FILE, get_cache_dir, load_venv_list


def print_stats():
"""
Doesn't do much yet really.
"""
cache_dir = get_cache_dir()
venvs = load_venv_list(cache_dir / CACHE_FILE)
print(f"I've mangled {len(venvs)} venvs!")
for venv in venvs:
print(" ", venv)
28 changes: 28 additions & 0 deletions ienv/src/ienv/venv.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,36 @@
"""
Deals with virtual environments.
"""

import argparse
import glob
import os
from pathlib import Path

MIN_FILE_SIZE = 4096 # 4k, size of a single block on most filesystems


def venv_dir(directory):
"""
Validates a "venv_dir" and acts as a custom type. Not really needed but I like
this sort of thing.
"""
if not os.path.exists(directory):
raise argparse.ArgumentTypeError(f"Directory '{directory}' does not exist.")
if not os.path.isdir(directory):
raise argparse.ArgumentTypeError(f"Path '{directory}' is not a directory.")

site_packages_glob = Path(directory).joinpath("lib/*/site-packages/")
if len(glob.glob(str(site_packages_glob))) == 0:
raise argparse.ArgumentTypeError(f"Directory '{directory}' is not a venv.")

return directory


def get_package_files(venv_dir):
"""
Return all the files under site-packages in the given venv.
"""
lib_dir = Path(venv_dir) / "lib"
for python_dir in glob.glob(f"{lib_dir}/python*"):
site_packages_dir = Path(python_dir) / "site-packages"
Expand All @@ -16,6 +41,9 @@ def get_package_files(venv_dir):


def get_large_package_files(venv_dir):
"""
Return all the files in a venv that are larger than MIN_FILE_SIZE.
"""
for file_path in get_package_files(venv_dir):
if file_path.stat().st_size >= MIN_FILE_SIZE:
yield file_path
14 changes: 1 addition & 13 deletions ienv/tests/test_paths.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import argparse
import tempfile
from pathlib import Path

import pytest

from ienv.cache import get_cache_dir, load_venv_list, save_venv_list
from ienv.main import venv_dir
from ienv.venv import get_package_files
from ienv.squish import get_cache_dir, load_venv_list, save_venv_list


@pytest.fixture
Expand Down Expand Up @@ -70,13 +68,3 @@ def test_save_venv_list():
save_venv_list(temp_file.name, {"venv1", "venv2"})
saved_list = load_venv_list(temp_file.name)
assert saved_list == {"venv1", "venv2"}


def test_get_site_package_files():
with tempfile.TemporaryDirectory() as temp_dir:
fake_venv = Path(temp_dir) / "lib/python3.8/site-packages"
fake_venv.mkdir(parents=True)
(fake_venv / "file1.txt").touch()
(fake_venv / "file2.txt").touch()
files = list(get_package_files(temp_dir))
assert len(files) == 2

0 comments on commit c5e917e

Please sign in to comment.