Skip to content

Commit

Permalink
feat: Improved google drive precheck (#376)
Browse files Browse the repository at this point in the history
* Improved google drive precheck
Added test for new precheack
Added new fixtures
  • Loading branch information
mateuszkuprowski authored Feb 12, 2025
1 parent 56e69a4 commit 9589843
Show file tree
Hide file tree
Showing 9 changed files with 745 additions and 4 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
## 0.5.2-dev1

### Enchancements

* **Improved google drive precheck mechanism**
* **Added integration tests for google drive precheck and connector**

## 0.5.2-dev0

### Enhancements
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"directory_structure": [
"fake.docx",
"nested/fake.docx",
"recalibrating-risk-report.pdf",
"test-drive-doc.docx"
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
{
"identifier": "117qrVqiCoR5EjYMsDHGdy3UMkEtKr9Q8",
"connector_type": "google_drive",
"source_identifiers": {
"filename": "test-drive-doc.docx",
"fullpath": "utic-test-ingest-fixtures/test-drive-doc.docx",
"rel_path": "test-drive-doc.docx"
},
"metadata": {
"url": "https://drive.google.com/uc?id=117qrVqiCoR5EjYMsDHGdy3UMkEtKr9Q8&export=download",
"version": "32",
"record_locator": {
"file_id": "117qrVqiCoR5EjYMsDHGdy3UMkEtKr9Q8"
},
"date_created": "1686809758.931",
"date_modified": "1686809744.0",
"date_processed": null,
"permissions_data": [
{
"id": "anyoneWithLink",
"type": "anyone",
"kind": "drive#permission",
"role": "reader",
"allowFileDiscovery": false
},
{
"id": "18298851591250030956",
"displayName": "ingest@unstructured-ingest-test.iam.gserviceaccount.com",
"type": "user",
"kind": "drive#permission",
"photoLink": "https://lh3.googleusercontent.com/a/ACg8ocJok2KRwwYvrEDkeZVCYosHOMoa52GZa2qIIC1jScCRoFLHaQ=s64",
"emailAddress": "ingest@unstructured-ingest-test.iam.gserviceaccount.com",
"role": "writer",
"deleted": false,
"pendingOwner": false
},
{
"id": "04774006893477068632",
"displayName": "ryan",
"type": "user",
"kind": "drive#permission",
"photoLink": "https://lh3.googleusercontent.com/a-/ALV-UjXeWpu7QcZuYqIl3p1mwqzS8XGFJ4RqA3Xjljfkm1DcFZ9M7A=s64",
"emailAddress": "ryan@unstructured.io",
"role": "owner",
"deleted": false,
"pendingOwner": false
},
{
"id": "09147371668407854156",
"displayName": "roman",
"type": "user",
"kind": "drive#permission",
"photoLink": "https://lh3.googleusercontent.com/a-/ALV-UjWoGrFCgXcF6CtiBIBLnAfM68qUnQaJOcgvg3qzfQ3W8Ch6dA=s64",
"emailAddress": "roman@unstructured.io",
"role": "writer",
"deleted": false,
"pendingOwner": false
}
],
"filesize_bytes": null
},
"additional_metadata": {
"fileExtension": "docx",
"md5Checksum": "b6bd26fa317493cf447882754dac5e9d",
"mimeType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"headRevisionId": "0B5wM1A2PIvPWOVFKa0U4MHQ1T2E0b0wzSWtGSkRZR1hwRUpzPQ",
"webViewLink": "https://docs.google.com/document/d/117qrVqiCoR5EjYMsDHGdy3UMkEtKr9Q8/edit?usp=drivesdk&ouid=116756094827511368604&rtpof=true&sd=true",
"id": "117qrVqiCoR5EjYMsDHGdy3UMkEtKr9Q8",
"originalFilename": "test-drive-doc.docx",
"capabilities": {
"canChangeViewersCanCopyContent": false,
"canEdit": true,
"canCopy": true,
"canComment": true,
"canAddChildren": false,
"canDelete": false,
"canDownload": true,
"canListChildren": false,
"canRemoveChildren": false,
"canRename": true,
"canTrash": false,
"canReadRevisions": true,
"canChangeCopyRequiresWriterPermission": false,
"canMoveItemIntoTeamDrive": false,
"canUntrash": false,
"canModifyContent": true,
"canMoveItemOutOfDrive": false,
"canAddMyDriveParent": false,
"canRemoveMyDriveParent": true,
"canMoveItemWithinDrive": true,
"canShare": true,
"canMoveChildrenWithinDrive": false,
"canModifyContentRestriction": true,
"canChangeSecurityUpdateEnabled": false,
"canAcceptOwnership": false,
"canReadLabels": false,
"canModifyLabels": false,
"canModifyEditorContentRestriction": true,
"canModifyOwnerContentRestriction": false,
"canRemoveContentRestriction": false
},
"permissionIds": [
"anyoneWithLink",
"18298851591250030956",
"04774006893477068632",
"09147371668407854156"
],
"sha1Checksum": "70daaa25dd03bc56192ccc18fc25f53aa06b7b36",
"sha256Checksum": "9dd205eea9d2fa9f4e7a993697c8d5b6c4ec7a92ccd12fff772b1dff084deca8"
},
"reprocess": false,
"local_download_path": "/tmp/tmp9k8p4569/test-drive-doc.docx",
"display_name": null
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
{
"identifier": "1SpQuE7jHz9nMt5hfQXsiok1SgIdRYX5o",
"connector_type": "google_drive",
"source_identifiers": {
"filename": "fake.docx",
"fullpath": "utic-test-ingest-fixtures/fake.docx",
"rel_path": "fake.docx"
},
"metadata": {
"url": "https://drive.google.com/uc?id=1SpQuE7jHz9nMt5hfQXsiok1SgIdRYX5o&export=download",
"version": "36",
"record_locator": {
"file_id": "1SpQuE7jHz9nMt5hfQXsiok1SgIdRYX5o"
},
"date_created": "1686809759.687",
"date_modified": "1686809743.0",
"date_processed": null,
"permissions_data": [
{
"id": "anyoneWithLink",
"type": "anyone",
"kind": "drive#permission",
"role": "reader",
"allowFileDiscovery": false
},
{
"id": "18298851591250030956",
"displayName": "ingest@unstructured-ingest-test.iam.gserviceaccount.com",
"type": "user",
"kind": "drive#permission",
"photoLink": "https://lh3.googleusercontent.com/a/ACg8ocJok2KRwwYvrEDkeZVCYosHOMoa52GZa2qIIC1jScCRoFLHaQ=s64",
"emailAddress": "ingest@unstructured-ingest-test.iam.gserviceaccount.com",
"role": "writer",
"deleted": false,
"pendingOwner": false
},
{
"id": "04774006893477068632",
"displayName": "ryan",
"type": "user",
"kind": "drive#permission",
"photoLink": "https://lh3.googleusercontent.com/a-/ALV-UjXeWpu7QcZuYqIl3p1mwqzS8XGFJ4RqA3Xjljfkm1DcFZ9M7A=s64",
"emailAddress": "ryan@unstructured.io",
"role": "owner",
"deleted": false,
"pendingOwner": false
},
{
"id": "09147371668407854156",
"displayName": "roman",
"type": "user",
"kind": "drive#permission",
"photoLink": "https://lh3.googleusercontent.com/a-/ALV-UjWoGrFCgXcF6CtiBIBLnAfM68qUnQaJOcgvg3qzfQ3W8Ch6dA=s64",
"emailAddress": "roman@unstructured.io",
"role": "writer",
"deleted": false,
"pendingOwner": false
}
],
"filesize_bytes": null
},
"additional_metadata": {
"fileExtension": "docx",
"md5Checksum": "5ddd25c0e5d5f6996a93c154830dc7aa",
"mimeType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"headRevisionId": "0B5wM1A2PIvPWOEoxZ2FjcDRsRUJsb2sveEQzTmJZZ1luY3JVPQ",
"webViewLink": "https://docs.google.com/document/d/1SpQuE7jHz9nMt5hfQXsiok1SgIdRYX5o/edit?usp=drivesdk&ouid=116756094827511368604&rtpof=true&sd=true",
"id": "1SpQuE7jHz9nMt5hfQXsiok1SgIdRYX5o",
"originalFilename": "fake.docx",
"capabilities": {
"canChangeViewersCanCopyContent": false,
"canEdit": true,
"canCopy": true,
"canComment": true,
"canAddChildren": false,
"canDelete": false,
"canDownload": true,
"canListChildren": false,
"canRemoveChildren": false,
"canRename": true,
"canTrash": false,
"canReadRevisions": true,
"canChangeCopyRequiresWriterPermission": false,
"canMoveItemIntoTeamDrive": false,
"canUntrash": false,
"canModifyContent": true,
"canMoveItemOutOfDrive": false,
"canAddMyDriveParent": false,
"canRemoveMyDriveParent": true,
"canMoveItemWithinDrive": true,
"canShare": true,
"canMoveChildrenWithinDrive": false,
"canModifyContentRestriction": true,
"canChangeSecurityUpdateEnabled": false,
"canAcceptOwnership": false,
"canReadLabels": false,
"canModifyLabels": false,
"canModifyEditorContentRestriction": true,
"canModifyOwnerContentRestriction": false,
"canRemoveContentRestriction": false
},
"permissionIds": [
"anyoneWithLink",
"18298851591250030956",
"04774006893477068632",
"09147371668407854156"
],
"sha1Checksum": "07efd2a6bd828801d8d1d4bd77fcb15120bbad94",
"sha256Checksum": "690b185fb657a76dfe5d304911e720f8998b328edb6677cbf2c846152284dfb3"
},
"reprocess": false,
"local_download_path": "/tmp/tmp9k8p4569/fake.docx",
"display_name": null
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
{
"identifier": "1m1TUgyLv0hHdlsuL7DOWBAKQtvrhWNiV",
"connector_type": "google_drive",
"source_identifiers": {
"filename": "recalibrating-risk-report.pdf",
"fullpath": "utic-test-ingest-fixtures/recalibrating-risk-report.pdf",
"rel_path": "recalibrating-risk-report.pdf"
},
"metadata": {
"url": "https://drive.google.com/uc?id=1m1TUgyLv0hHdlsuL7DOWBAKQtvrhWNiV&export=download",
"version": "7",
"record_locator": {
"file_id": "1m1TUgyLv0hHdlsuL7DOWBAKQtvrhWNiV"
},
"date_created": "1718723636.34",
"date_modified": "1676196572.0",
"date_processed": null,
"permissions_data": [
{
"id": "anyoneWithLink",
"type": "anyone",
"kind": "drive#permission",
"role": "reader",
"allowFileDiscovery": false
},
{
"id": "18298851591250030956",
"displayName": "ingest@unstructured-ingest-test.iam.gserviceaccount.com",
"type": "user",
"kind": "drive#permission",
"photoLink": "https://lh3.googleusercontent.com/a/ACg8ocJok2KRwwYvrEDkeZVCYosHOMoa52GZa2qIIC1jScCRoFLHaQ=s64",
"emailAddress": "ingest@unstructured-ingest-test.iam.gserviceaccount.com",
"role": "writer",
"deleted": false,
"pendingOwner": false
},
{
"id": "04774006893477068632",
"displayName": "ryan",
"type": "user",
"kind": "drive#permission",
"photoLink": "https://lh3.googleusercontent.com/a-/ALV-UjXeWpu7QcZuYqIl3p1mwqzS8XGFJ4RqA3Xjljfkm1DcFZ9M7A=s64",
"emailAddress": "ryan@unstructured.io",
"role": "writer",
"deleted": false,
"pendingOwner": false
},
{
"id": "09147371668407854156",
"displayName": "roman",
"type": "user",
"kind": "drive#permission",
"photoLink": "https://lh3.googleusercontent.com/a-/ALV-UjWoGrFCgXcF6CtiBIBLnAfM68qUnQaJOcgvg3qzfQ3W8Ch6dA=s64",
"emailAddress": "roman@unstructured.io",
"role": "owner",
"deleted": false,
"pendingOwner": false
}
],
"filesize_bytes": null
},
"additional_metadata": {
"fileExtension": "pdf",
"md5Checksum": "e690f37ef36368a509d150f373a0bbe0",
"mimeType": "application/pdf",
"headRevisionId": "0B8fLPtk3k4KOdlRMRDkvM0JCQkdtTU1NdjdpK0ZlSTJadXNjPQ",
"webViewLink": "https://drive.google.com/file/d/1m1TUgyLv0hHdlsuL7DOWBAKQtvrhWNiV/view?usp=drivesdk",
"id": "1m1TUgyLv0hHdlsuL7DOWBAKQtvrhWNiV",
"originalFilename": "recalibrating-risk-report.pdf",
"capabilities": {
"canChangeViewersCanCopyContent": false,
"canEdit": true,
"canCopy": true,
"canComment": true,
"canAddChildren": false,
"canDelete": false,
"canDownload": true,
"canListChildren": false,
"canRemoveChildren": false,
"canRename": true,
"canTrash": false,
"canReadRevisions": true,
"canChangeCopyRequiresWriterPermission": false,
"canMoveItemIntoTeamDrive": false,
"canUntrash": false,
"canModifyContent": true,
"canMoveItemOutOfDrive": false,
"canAddMyDriveParent": false,
"canRemoveMyDriveParent": true,
"canMoveItemWithinDrive": true,
"canShare": true,
"canMoveChildrenWithinDrive": false,
"canModifyContentRestriction": true,
"canChangeSecurityUpdateEnabled": false,
"canAcceptOwnership": false,
"canReadLabels": false,
"canModifyLabels": false,
"canModifyEditorContentRestriction": true,
"canModifyOwnerContentRestriction": false,
"canRemoveContentRestriction": false
},
"permissionIds": [
"anyoneWithLink",
"18298851591250030956",
"04774006893477068632",
"09147371668407854156"
],
"sha1Checksum": "57e64b7be1e099ecf9dcd9832bc09c81ffc5fc44",
"sha256Checksum": "076d8d243312e0b03a0fe2191e924c37fe159dd01d6b6ce3a3b2b7c642c36245"
},
"reprocess": false,
"local_download_path": "/tmp/tmp9k8p4569/recalibrating-risk-report.pdf",
"display_name": null
}
Loading

0 comments on commit 9589843

Please sign in to comment.