Skip to content

Commit

Permalink
WIP analytics:
Browse files Browse the repository at this point in the history
* Event.timestamp is no longer auto_now_add
  b/c it interferes with the initial migration
* migration 0027 now consolidates sessions
  and the events into the consolidated session
  based on the SESSION_IDLE_TIMEOUT
  • Loading branch information
evgenyfadeev committed Jun 3, 2024
1 parent 78b6338 commit 1a12ac8
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class Migration(migrations.Migration):
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('event_type', models.CharField(choices=[(1, 'registered'), (2, 'logged in'), (3, 'logged out'), (4, 'question viewed'), (5, 'answer viewed'), (6, 'upvoted'), (7, 'downvoted'), (8, 'canceled vote'), (9, 'asked'), (10, 'answered'), (11, 'commented question'), (12, 'commented answer'), (13, 'retagged question'), (14, 'searched')], max_length=64)),
('timestamp', models.DateTimeField(auto_now_add=True)),
('timestamp', models.DateTimeField()),
('object_id', models.PositiveIntegerField(db_index=True)),
('content_type', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='contenttypes.contenttype')),
('session', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='askbot.session')),
Expand Down
68 changes: 55 additions & 13 deletions askbot/migrations/0027_populate_analytics_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# pylint: disable=missing-docstring, invalid-name
from datetime import timedelta
from django.db import migrations
from django.db.models import Q
from django.utils.translation import gettext_lazy as _
from askbot.utils.console import ProgressBar

Expand All @@ -28,17 +29,58 @@ def delete_analytics_objects(apps, schema_editor): #pylint: disable=missing-docs
Session.objects.all().delete()


def get_user_session(user_id, timestamp, apps): #pylint: disable=missing-docstring
def consolidate_sessions(sessions):
"""
Selects the earliest session to keep.
Updates the updated_at to the latest of all.
Assigns all events from other sessions to the selected one.
Deletes other sessions.
Returns the selected session.
"""
session = min(sessions, key=lambda sess: sess.created_at)
session.updated_at = max(sessions, key=lambda sess: sess.updated_at).updated_at
other_sessions = filter(session.__ne__, sessions)
for sess in other_sessions:
events = sess.event_set.all()
events.update(session=session)
sess.delete()

return session


def update_session_timestamps(session, timestamp):
"""Updates session's updated_at and created_at fields"""
session.updated_at = max(timestamp, session.updated_at)
session.created_at = min(timestamp, session.created_at)
session.save()


def get_consolidated_user_session(user_id, timestamp, apps): #pylint: disable=missing-docstring
"""Returns first user session which is within SESSION_IDLE_TIMEOUT of the given timestamp
for this user.
If such session does not exist, creates it"""
If such session does not exist, creates it.
Sessions are glued together if they are within SESSION_IDLE_TIMEOUT of each other.
"""
Session = apps.get_model('askbot', 'Session')
dt_range = (timestamp - SESSION_IDLE_TIMEOUT, timestamp + SESSION_IDLE_TIMEOUT)
sessions = Session.objects.filter(user_id=user_id, updated_at__range=dt_range)
if sessions.exists():
return sessions[0]

return Session.objects.create(user_id=user_id, created_at=timestamp, updated_at=timestamp)
user_q = Q(user_id=user_id)
updated_at_q = Q(updated_at__range=dt_range)
created_at_q = Q(created_at__range=dt_range)
sessions = Session.objects.filter(user_q & (updated_at_q | created_at_q))
sessions_count = sessions.count()

if sessions_count > 1:
session = consolidate_sessions(sessions)
update_session_timestamps(session, timestamp)
elif sessions_count == 1:
session = sessions[0]
update_session_timestamps(session, timestamp)
else:
session = Session.objects.create(user_id=user_id,
created_at=timestamp,
updated_at=timestamp)

return session


def populate_user_registered_events(apps): #pylint: disable=missing-docstring
Expand All @@ -55,7 +97,7 @@ def populate_user_registered_events(apps): #pylint: disable=missing-docstring
message = 'Populating user registered events'
for user in ProgressBar(users.iterator(), count, message):
timestamp = user.date_joined
session = get_user_session(user.pk, timestamp, apps)
session = get_consolidated_user_session(user.pk, timestamp, apps)
Event.objects.create(
session=session,
event_type=1,
Expand All @@ -79,7 +121,7 @@ def populate_question_viewed_events(apps): #pylint: disable=missing-docstring
message = 'Populating Question View events'
for view in ProgressBar(views.iterator(), count, message):
timestamp = view.when
session = get_user_session(view.who_id, timestamp, apps)
session = get_consolidated_user_session(view.who_id, timestamp, apps)
Event.objects.create(
session=session,
event_type=4,
Expand All @@ -102,7 +144,7 @@ def populate_voted_events(apps, activity_type=None, event_type=None, message=Non
acts = Activity.objects.filter(activity_type=activity_type).only(*activity_fields)
count = acts.count()
for act in ProgressBar(acts.iterator(), count, message):
session = get_user_session(act.user_id, act.active_at, apps)
session = get_consolidated_user_session(act.user_id, act.active_at, apps)

if act.content_type.model != 'vote':
continue
Expand Down Expand Up @@ -134,7 +176,7 @@ def populate_posted_events(apps, post_type=None, event_type=None, message=None):
count = posts.count()
for post in ProgressBar(posts.iterator(), count, message):
timestamp = post.added_at
session = get_user_session(post.author_id, timestamp, apps)
session = get_consolidated_user_session(post.author_id, timestamp, apps)
Event.objects.create(
session=session,
event_type=event_type,
Expand All @@ -157,7 +199,7 @@ def populate_commented_events(apps, parent_post_type=None, event_type=None, mess
count = comments.count()
for question in ProgressBar(comments.iterator(), count, message):
timestamp = question.added_at
session = get_user_session(question.author_id, timestamp, apps)
session = get_consolidated_user_session(question.author_id, timestamp, apps)
Event.objects.create(
session=session,
event_type=event_type,
Expand All @@ -181,7 +223,7 @@ def populate_retagged_question_events(apps): #pylint: disable=missing-docstring
if act.content_type_id != post_content_type_id:
continue

session = get_user_session(act.user_id, act.active_at, apps)
session = get_consolidated_user_session(act.user_id, act.active_at, apps)
Event.objects.create(
session=session,
event_type=13,
Expand Down
4 changes: 2 additions & 2 deletions askbot/models/analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ class Session(models.Model):
user = models.ForeignKey(User, on_delete=models.CASCADE, null=True, blank=True)
ip_address = models.GenericIPAddressField(null=True, blank=True)
user_agent = models.CharField(max_length=512, null=True, blank=True)
created_at = models.DateTimeField() # no auto_now_ad or auto_now for created_at and updated_at
created_at = models.DateTimeField() # no auto_now_add or auto_now for created_at and updated_at
updated_at = models.DateTimeField() # b/c we want to set it manually for the testing purposes

def __str__(self):
Expand All @@ -122,7 +122,7 @@ class Event(models.Model):
"""Analytics event"""
session = models.ForeignKey(Session, on_delete=models.CASCADE)
event_type = models.CharField(max_length=64, choices=EVENT_TYPES)
timestamp = models.DateTimeField(auto_now_add=True)
timestamp = models.DateTimeField() # no auto_now_add or auto_now for created_at and updated_at
content_type = models.ForeignKey(ContentType, on_delete=models.CASCADE)
object_id = models.PositiveIntegerField(db_index=True)
content_object = GenericForeignKey('content_type', 'object_id')
Expand Down

0 comments on commit 1a12ac8

Please sign in to comment.