Skip to content

Commit c66d909

Browse files
authored
Limit ORM usage for ImapUids objects (#969)
* imapuids_for_message_query * message_imapuids_exists * Remove uid_accessor * Cleanup parameter * Use more descriptive variable name
1 parent 6c67bc5 commit c66d909

File tree

4 files changed

+48
-29
lines changed

4 files changed

+48
-29
lines changed

inbox/mailsync/backends/imap/common.py

Lines changed: 42 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from typing import List, Set
1717

1818
from sqlalchemy import bindparam, desc
19-
from sqlalchemy.orm import Session
19+
from sqlalchemy.orm import Query, Session
2020
from sqlalchemy.orm.exc import NoResultFound
2121
from sqlalchemy.sql.expression import func
2222

@@ -73,22 +73,33 @@ def lastseenuid(account_id, session, folder_id):
7373
return res or 0
7474

7575

76+
IMAPUID_PER_MESSAGE_SANITY_LIMIT = 100
77+
78+
7679
def update_message_metadata(
7780
session: Session, account: Account, message: Message, is_draft: bool
7881
) -> None:
7982
"""Update the message's metadata"""
80-
# Sort imapuids in a way that the ones that were added later come last
81-
now = datetime.utcnow()
82-
sorted_imapuids: List[ImapUid] = sorted(
83-
message.imapuids, key=lambda imapuid: imapuid.updated_at or now
83+
# Sort imapuids in a way that the ones that were added later come first.
84+
# There are non-conforming IMAP servers that can list the same message thousands of times
85+
# in the same folder. This is a workaround to limit the memory pressure caused by such
86+
# servers. The metadata is meaningless for such messages anyway.
87+
latest_imapuids = (
88+
imapuids_for_message_query(
89+
account_id=account.id,
90+
message_id=message.id,
91+
only_latest=IMAPUID_PER_MESSAGE_SANITY_LIMIT,
92+
)
93+
.with_session(session)
94+
.all()
8495
)
8596

86-
message.is_read = any(imapuid.is_seen for imapuid in sorted_imapuids)
87-
message.is_starred = any(imapuid.is_flagged for imapuid in sorted_imapuids)
97+
message.is_read = any(imapuid.is_seen for imapuid in latest_imapuids)
98+
message.is_starred = any(imapuid.is_flagged for imapuid in latest_imapuids)
8899
message.is_draft = is_draft
89100

90-
sorted_categories: List[Category] = [
91-
category for imapuid in sorted_imapuids for category in imapuid.categories
101+
latest_categories: List[Category] = [
102+
category for imapuid in latest_imapuids for category in imapuid.categories
92103
]
93104

94105
categories: Set[Category]
@@ -101,9 +112,9 @@ def update_message_metadata(
101112
# (and in turn one category) depending on the order they were returned
102113
# from the database. This makes it deterministic and more-correct because a message
103114
# is likely in a folder (and category) it was added to last.
104-
categories = {sorted_categories[-1]} if sorted_categories else set()
115+
categories = {latest_categories[0]} if latest_categories else set()
105116
elif account.category_type == "label":
106-
categories = set(sorted_categories)
117+
categories = set(latest_categories)
107118
else:
108119
raise AssertionError("Unreachable")
109120

@@ -198,6 +209,18 @@ def update_metadata(account_id, folder_id, folder_role, new_flags, session):
198209
log.info("Updated UID metadata", changed=change_count, out_of=len(new_flags))
199210

200211

212+
def imapuids_for_message_query(
213+
*, account_id: int, message_id: int, only_latest: int | None = None
214+
) -> Query:
215+
query = Query([ImapUid]).filter(
216+
ImapUid.account_id == account_id, ImapUid.message_id == message_id
217+
)
218+
if only_latest is not None:
219+
query = query.order_by(ImapUid.updated_at.desc()).limit(only_latest)
220+
221+
return query
222+
223+
201224
def remove_deleted_uids(account_id, folder_id, uids):
202225
"""
203226
Make sure you're holding a db write lock on the account. (We don't try
@@ -238,7 +261,13 @@ def remove_deleted_uids(account_id, folder_id, uids):
238261
db_session.delete(imapuid)
239262

240263
if message is not None:
241-
if not message.imapuids and message.is_draft:
264+
message_imapuids_exist = db_session.query(
265+
imapuids_for_message_query(
266+
account_id=account_id, message_id=message.id
267+
).exists()
268+
).scalar()
269+
270+
if not message_imapuids_exist and message.is_draft:
242271
# Synchronously delete drafts.
243272
thread = message.thread
244273
if thread is not None:
@@ -257,7 +286,7 @@ def remove_deleted_uids(account_id, folder_id, uids):
257286
update_message_metadata(
258287
db_session, account, message, message.is_draft
259288
)
260-
if not message.imapuids:
289+
if not message_imapuids_exist:
261290
# But don't outright delete messages. Just mark them as
262291
# 'deleted' and wait for the asynchronous
263292
# dangling-message-collector to delete them.

inbox/mailsync/backends/imap/monitor.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,6 @@ def start_delete_handler(self):
165165
account_id=self.account_id,
166166
namespace_id=self.namespace_id,
167167
provider_name=self.provider_name,
168-
uid_accessor=lambda m: m.imapuids,
169168
)
170169
self.delete_handler.start()
171170

inbox/mailsync/gc.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,6 @@ class DeleteHandler(InterruptibleThread):
4444
----------
4545
account_id, namespace_id: int
4646
IDs for the namespace to check.
47-
uid_accessor: function
48-
Function that takes a message and returns a list of associated uid
49-
objects. For IMAP sync, this would just be
50-
`uid_accessor=lambda m: m.imapuids`
5147
message_ttl: int
5248
Number of seconds to wait after a message is marked for deletion before
5349
deleting it for good.
@@ -59,15 +55,13 @@ def __init__(
5955
account_id,
6056
namespace_id,
6157
provider_name,
62-
uid_accessor,
6358
message_ttl=DEFAULT_MESSAGE_TTL,
6459
thread_ttl=DEFAULT_THREAD_TTL,
6560
):
6661
bind_context(self, "deletehandler", account_id)
6762
self.account_id = account_id
6863
self.namespace_id = namespace_id
6964
self.provider_name = provider_name
70-
self.uids_for_message = uid_accessor
7165
self.log = log.new(account_id=account_id)
7266
self.message_ttl = datetime.timedelta(seconds=message_ttl)
7367
self.thread_ttl = datetime.timedelta(seconds=thread_ttl)
@@ -106,14 +100,18 @@ def check(self, current_time):
106100
# If the message isn't *actually* dangling (i.e., it has
107101
# imapuids associated with it), undelete it.
108102
try:
109-
uids_for_message = self.uids_for_message(message)
103+
message_imapuids_exist = db_session.query(
104+
common.imapuids_for_message_query(
105+
account_id=self.account_id, message_id=message.id
106+
).exists()
107+
).scalar()
110108
except ObjectDeletedError:
111109
# It looks like we are expiring the session potentially when one message is deleted,
112110
# and then when accessing the IMAP uids, there is a lazy load trying to get the data.
113111
# If that object has also been deleted (how?) it raises this exception.
114112
continue
115113

116-
if uids_for_message:
114+
if message_imapuids_exist:
117115
message.deleted_at = None
118116
continue
119117

tests/imap/test_delete_handling.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@ def test_deletion_with_short_ttl(
8989
account_id=default_account.id,
9090
namespace_id=default_namespace.id,
9191
provider_name=default_account.provider,
92-
uid_accessor=lambda m: m.imapuids,
9392
message_ttl=0,
9493
thread_ttl=0,
9594
)
@@ -110,7 +109,6 @@ def test_thread_deletion_with_short_ttl(
110109
account_id=default_account.id,
111110
namespace_id=default_namespace.id,
112111
provider_name=default_account.provider,
113-
uid_accessor=lambda m: m.imapuids,
114112
message_ttl=0,
115113
thread_ttl=120,
116114
)
@@ -148,7 +146,6 @@ def test_non_orphaned_messages_get_unmarked(
148146
account_id=default_account.id,
149147
namespace_id=default_namespace.id,
150148
provider_name=default_account.provider,
151-
uid_accessor=lambda m: m.imapuids,
152149
message_ttl=0,
153150
)
154151
handler.check(marked_deleted_message.deleted_at + timedelta(seconds=1))
@@ -165,7 +162,6 @@ def test_threads_only_deleted_when_no_messages_left(
165162
account_id=default_account.id,
166163
namespace_id=default_namespace.id,
167164
provider_name=default_account.provider,
168-
uid_accessor=lambda m: m.imapuids,
169165
message_ttl=0,
170166
)
171167
# Add another message onto the thread
@@ -187,7 +183,6 @@ def test_deletion_deferred_with_longer_ttl(
187183
account_id=default_account.id,
188184
namespace_id=default_namespace.id,
189185
provider_name=default_account.provider,
190-
uid_accessor=lambda m: m.imapuids,
191186
message_ttl=5,
192187
)
193188
db.session.commit()
@@ -207,7 +202,6 @@ def test_deletion_creates_revision(
207202
account_id=default_account.id,
208203
namespace_id=default_namespace.id,
209204
provider_name=default_account.provider,
210-
uid_accessor=lambda m: m.imapuids,
211205
message_ttl=0,
212206
)
213207
handler.check(marked_deleted_message.deleted_at + timedelta(seconds=1))
@@ -270,7 +264,6 @@ def test_deleted_labels_get_gced(
270264
account_id=default_account.id,
271265
namespace_id=default_namespace.id,
272266
provider_name=default_account.provider,
273-
uid_accessor=lambda m: m.imapuids,
274267
message_ttl=0,
275268
)
276269
handler.gc_deleted_categories()

0 commit comments

Comments
 (0)