diff --git a/Dockerfile b/Dockerfile index 0a50a5eaa3e..2d3e3f58d51 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,8 +17,8 @@ RUN apt-get update # Install general dependencies. RUN apt-get install -y $PACKAGES -RUN apt-get update -RUN apt-get install -y $BUILD_DEPS +RUN apt-get update --fix-missing +RUN apt-get install -y $BUILD_DEPS --fix-missing # Install google chrome for cypress testing WORKDIR /usr/src diff --git a/app/app/settings.py b/app/app/settings.py index e39f5a2400b..64e3630d782 100644 --- a/app/app/settings.py +++ b/app/app/settings.py @@ -772,6 +772,8 @@ def callback(request): S3_REPORT_BUCKET = env('S3_REPORT_BUCKET', default='') # TODO S3_REPORT_PREFIX = env('S3_REPORT_PREFIX', default='') # TODO +S3_BSCI_SYBIL_BUCKET = env('S3_REPORT_BUCKET', default='') # TODO + INSTALLED_APPS += env.list('DEBUG_APPS', default=[]) diff --git a/app/grants/tasks.py b/app/grants/tasks.py index fd147013e26..39568bcf221 100644 --- a/app/grants/tasks.py +++ b/app/grants/tasks.py @@ -2,21 +2,24 @@ import math import time from decimal import Decimal +from io import StringIO from django.conf import settings from django.utils import timezone from django.utils.text import slugify +import boto3 from app.services import RedisService from celery import app from celery.utils.log import get_task_logger from dashboard.models import Profile from grants.models import Grant, GrantCLR, GrantCollection, Subscription -from grants.utils import get_clr_rounds_metadata, save_grant_to_notion +from grants.utils import bsci_script, get_clr_rounds_metadata, save_grant_to_notion from marketing.mails import ( new_contributions, new_grant, new_grant_admin, notion_failure_email, thank_you_for_supporting, ) -from townsquare.models import Comment +from perftools.models import StaticJsonEnv +from townsquare.models import Comment, SquelchProfile from unidecode import unidecode logger = get_task_logger(__name__) @@ -421,3 +424,23 @@ def generate_collection_cache(self, collection_id): collection.generate_cache() except Exception as e: print(e) + + +@app.shared_task(bind=True, max_retries=3) +def process_bsci_sybil_csv(self, file_name, csv): + '''fetch csv from bsci and toggle''' + + if not file_name: + bsciJSON = StaticJsonEnv.objects.get(key='BSCI_SYBIL_TOKEN') + data = bsciJSON.data + file_name = data['csv_url'] + + if not csv: + client = boto3.client('s3', aws_access_key_id=settings.AWS_ACCESS_KEY_ID, aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY) + csv_object = client.get_object(Bucket=settings.S3_BSCI_SYBIL_BUCKET, Key=file_name) + csv = csv_object['Body'] + + csv = StringIO(csv.read().decode('utf-8')) + + # run bsci script + bsci_script(csv) diff --git a/app/grants/urls.py b/app/grants/urls.py index 7760aef71bf..cad1305a16f 100644 --- a/app/grants/urls.py +++ b/app/grants/urls.py @@ -20,8 +20,8 @@ from django.urls import path, re_path from grants.views import ( - add_grant_from_collection, bulk_fund, bulk_grants_for_cart, cancel_grant_v1, cart_thumbnail, clr_grants, collage, - collection_thumbnail, contribute_to_grants_v1, contribution_addr_from_all_as_json, + add_grant_from_collection, api_toggle_user_sybil, bulk_fund, bulk_grants_for_cart, cancel_grant_v1, cart_thumbnail, + clr_grants, collage, collection_thumbnail, contribute_to_grants_v1, contribution_addr_from_all_as_json, contribution_addr_from_grant_as_json, contribution_addr_from_grant_during_round_as_json, contribution_addr_from_round_as_json, contribution_info_from_grant_during_round_as_json, create_matching_pledge_v1, flag, get_clr_sybil_input, get_collection, get_collections_list, get_ethereum_cart_data, get_grant_payload, @@ -30,7 +30,7 @@ grant_new, grants, grants_addr_as_json, grants_bulk_add, grants_by_grant_type, grants_cart_view, grants_info, grants_landing, grants_type_redirect, ingest_contributions, ingest_contributions_view, invoice, leaderboard, manage_ethereum_cart_data, new_matching_partner, profile, quickstart, remove_grant_from_collection, save_collection, - toggle_grant_favorite, toggle_user_sybil, verify_grant, + toggle_grant_favorite, upload_sybil_csv, verify_grant, ) app_name = 'grants/' @@ -116,6 +116,7 @@ # custom API path('v1/api/get-clr-data/', get_clr_sybil_input, name='get_clr_sybil_input'), - path('v1/api/toggle_user_sybil', toggle_user_sybil, name='toggle_user_sybil') + path('v1/api/toggle_user_sybil', api_toggle_user_sybil, name='api_toggle_user_sybil'), + path('v1/api/upload_sybil_csv', upload_sybil_csv, name='upload_sybil_csv') ] diff --git a/app/grants/utils.py b/app/grants/utils.py index 198632bd1a2..48fc253cb05 100644 --- a/app/grants/utils.py +++ b/app/grants/utils.py @@ -18,6 +18,7 @@ """ import logging +import math import os import re import urllib.request @@ -28,6 +29,8 @@ from django.utils import timezone +import numpy as np +import pandas as pd from app.settings import BASE_URL, MEDIA_URL, NOTION_API_KEY, NOTION_SYBIL_DB from app.utils import notion_write from avatar.utils import convert_img @@ -41,8 +44,9 @@ from grants.sync.rsk import sync_rsk_payout from grants.sync.zcash import sync_zcash_payout from grants.sync.zil import sync_zil_payout -from perftools.models import JSONStore, StaticJsonEnv +from perftools.models import StaticJsonEnv from PIL import Image, ImageDraw, ImageOps +from townsquare.models import SquelchProfile logger = logging.getLogger(__name__) @@ -308,3 +312,134 @@ def save_grant_to_notion(grant): }] } }) + + +def toggle_user_sybil(sybil_users, non_sybil_users): + '''util function which marks users as sybil/not''' + + from dashboard.models import Profile + + squelched_profiles = SquelchProfile.objects.all() + if sybil_users: + # iterate through users which need to be packed as sybil + for user in sybil_users: + try: + # get user profile. note + profile = Profile.objects.filter(handle=user.get('handle')).first() + if profile: + label = user.get('label') + comment = user.get('comment') + + if comment and isNaN(comment): + comment = 'added by bsci' + + # check if user has entry in SquelchProfile + if ( + not squelched_profiles.filter(profile=profile).first() and + label and comment + ): + # mark user as sybil + SquelchProfile.objects.create( + profile=profile, + label=label, + comments=comment + ) + else: + print(f"error: profile not found for ${user.get('handle')} as sybil.") + except Exception as e: + print(f"error: unable to mark user ${user.get('handle')} as sybil. {e}") + + if non_sybil_users: + # iterate and remove sybil from user + for user in non_sybil_users: + try: + profile = Profile.objects.get(pk=user.get('id')) + squelched_profiles.filter(profile=profile).delete() + except Exception as e: + print(f"error: unable to mark ${user.get('id')} as non sybil. {e}") + + + +def bsci_script(csv): + try: + # choose the specific csv you want to use + endpoint_df = pd.read_csv(csv) + + sybil_df = pd.DataFrame() + non_sybil_df = pd.DataFrame() + ''' + filters human labeled sybils ('reviewer_is_certain (0/1)' and 'is_sybil_y' values can be adjusted) + human_sybil_score could also be used as a filter is wanted + ''' + human_sybil = endpoint_df[(endpoint_df['flag_type_y'] == 'Human') & (endpoint_df['reviewer_is_certain (0/1)_y'] >= 0.99) & (endpoint_df['is_sybil_y'] >= 0.99)] + endpoint_df = endpoint_df[~endpoint_df.handle.isin(human_sybil.handle)] + human_sybil = human_sybil[['handle', 'flag_type_y', 'notes']] + human_sybil = human_sybil.rename({'handle': 'handle', 'flag_type_y': 'label', 'notes': 'comment'}, axis = 1, inplace = True) + sybil_df = sybil_df.append(human_sybil) + + ''' + filters heuristic labeled sybils, nothing can be adjusted here + ''' + heuristic_sybil = endpoint_df[(endpoint_df['flag_type_x'] == 'Heuristic') & (endpoint_df['ml_score'] >= 0.99)] + endpoint_df = endpoint_df[~endpoint_df.handle.isin(heuristic_sybil.handle)] + heuristic_sybil = heuristic_sybil[['handle', 'flag_type_x', 'notes']] + hueristic_sybil = heuristic_sybil.rename({'handle': 'handle', 'flag_type_x': 'label', 'notes': 'comment'}, axis = 1, inplace = True) + sybil_df = sybil_df.append(heuristic_sybil) + + ''' + filters ml predicted sybils, ml_score can be adjusted to be either higher or lower + higher ml_score means less people are likely to appeal, but potentially some sybils slip through + lower ml_score means more people are likely to appeal, but more sybils are potentially caught + ''' + ml_sybil = endpoint_df[(endpoint_df['flag_type_x'] == 'Prediction') & (endpoint_df['ml_score'] >= 0.9)] + endpoint_df = endpoint_df[~endpoint_df.handle.isin(ml_sybil.handle)] + ml_sybil = ml_sybil[['handle', 'flag_type_x', 'notes']] + ml_sybil = ml_sybil.rename({'handle': 'handle', 'flag_type_x': 'label', 'notes': 'comment'}, axis = 1, inplace = True) + sybil_df = sybil_df.append(ml_sybil) + + ''' + filters human labeled non-sybil users + nothing here should be changed as these are just the remaining users that were marked by humans not included in the sybil filtering + ''' + human_non_sybil = endpoint_df[(endpoint_df['flag_type_y'] == 'Human') & (endpoint_df['reviewer_is_certain (0/1)_y'] != np.nan)] + endpoint_df = endpoint_df[~endpoint_df.handle.isin(human_non_sybil.handle)] + human_non_sybil = human_non_sybil[['handle', 'flag_type_y', 'notes']] + human_non_sybil = human_non_sybil.rename({'handle': 'handle', 'flag_type_y': 'label', 'notes': 'comment'}, axis = 1, inplace = True) + non_sybil_df = non_sybil_df.append(human_non_sybil) + + ''' + filters heuristic non sybils, nothing here needs to be adjusted + ''' + heuristic_non_sybil = endpoint_df[(endpoint_df['flag_type_x'] == 'Heuristic') & (endpoint_df['ml_score'] <= 0.01)] + endpoint_df = endpoint_df[~endpoint_df.handle.isin(heuristic_non_sybil.handle)] + heuristic_non_sybil = heuristic_non_sybil[['handle', 'flag_type_x', 'notes']] + hueristic_non_sybil = heuristic_non_sybil.rename({'handle': 'handle', 'flag_type_x': 'label', 'notes': 'comment'}, axis = 1, inplace = True) + non_sybil_df = non_sybil_df.append(heuristic_non_sybil) + + ''' + This just filters out the remaining users that were not filtered in the previous sections, nothing can be adjusted here + ''' + ml_non_sybil = endpoint_df + ml_non_sybil = ml_non_sybil[['handle', 'flag_type_x', 'notes']] + ml_non_sybil = ml_non_sybil.rename({'handle': 'handle', 'flag_type_x': 'label', 'notes': 'comment'}, axis = 1, inplace = True) + non_sybil_df = non_sybil_df.append(ml_non_sybil) + + ''' + conversion of all the data so that it can be pushed to the toggle_user_sybil endpoint + ''' + #sybil_df = ml_df[ml_df['ml_score'] >= 0.9 and ml_df['flag_type'] != 'Human'] + sybil_users = sybil_df.to_dict('records') + non_sybil_users = non_sybil_df.to_dict('records') + + # print('=================SYBIL=================') + # print(sybil_users) + # print('=================NON SYBIL=================') + # print(non_sybil_users) + + toggle_user_sybil(sybil_users, non_sybil_users) + + except Exception as e: + logger.error(f'error: bsci_sybil_script - {e}') + +def isNaN(string): + return string != string diff --git a/app/grants/views.py b/app/grants/views.py index e70a8930dfb..05d11d76757 100644 --- a/app/grants/views.py +++ b/app/grants/views.py @@ -26,6 +26,7 @@ import time import uuid from datetime import datetime +from io import StringIO from urllib.parse import urlencode from django.conf import settings @@ -40,7 +41,6 @@ from django.http import Http404, HttpResponse, JsonResponse from django.http.response import HttpResponseBadRequest, HttpResponseServerError from django.shortcuts import get_object_or_404, redirect -from django.template import response from django.template.response import TemplateResponse from django.templatetags.static import static from django.urls import reverse @@ -50,6 +50,7 @@ from django.views.decorators.csrf import csrf_exempt from django.views.decorators.http import require_GET, require_POST +import boto3 import dateutil.parser import pytz import requests @@ -60,12 +61,14 @@ TWITTER_CONSUMER_SECRET, ) from app.utils import allow_all_origins, get_profile +from boto3.s3.transfer import S3Transfer from bs4 import BeautifulSoup from cacheops import cached_view from dashboard.brightid_utils import get_brightid_status from dashboard.models import Activity, HackathonProject, Profile, SearchHistory from dashboard.tasks import increment_view_count from dashboard.utils import get_web3 +from dashboard.views import invalid_file_response from economy.models import Token as FTokens from economy.utils import convert_token_to_usdt from eth_account.messages import defunct_hash_message @@ -75,11 +78,12 @@ GrantTag, GrantType, MatchPledge, Subscription, ) from grants.tasks import ( - process_grant_creation_admin_email, process_grant_creation_email, process_notion_db_write, update_grant_metadata, + process_bsci_sybil_csv, process_grant_creation_admin_email, process_grant_creation_email, process_notion_db_write, + update_grant_metadata, ) from grants.utils import ( emoji_codes, generate_collection_thumbnail, generate_img_thumbnail_helper, get_clr_rounds_metadata, get_user_code, - is_grant_team_member, sync_payout, + is_grant_team_member, sync_payout, toggle_user_sybil, ) from kudos.models import BulkTransferCoupon, Token from marketing.mails import grant_cancellation, new_grant_flag_admin @@ -87,7 +91,7 @@ from perftools.models import JSONStore, StaticJsonEnv from ratelimit.decorators import ratelimit from retail.helpers import get_ip -from townsquare.models import Announcement, Favorite, PinnedPost, SquelchProfile +from townsquare.models import Announcement, Favorite, PinnedPost from townsquare.utils import can_pin from web3 import HTTPProvider, Web3 @@ -3668,7 +3672,8 @@ def get_trust_bonus(request): return allow_all_origins(JsonResponse(response, safe=False)) -def toggle_user_sybil(request): + +def api_toggle_user_sybil(request): ''' POST endpoint which allows to mark a list of users as sybil or remove them the sybil tag from them. @@ -3691,36 +3696,47 @@ def toggle_user_sybil(request): if token != data['token']: return HttpResponseBadRequest("error: invalid token") - squelched_profiles = SquelchProfile.objects.all() + toggle_user_sybil(sybil_users, non_sybil_users) - if sybil_users: - # iterate through users which need to be packed as sybil - for user in sybil_users: - try: - # get user profile - profile = Profile.objects.get(pk=user.get('id')) - - # check if user has entry in SquelchProfile - if not squelched_profiles.filter(profile=profile).first(): - # mark user as sybil - SquelchProfile.objects.create( - profile=profile, - comments=f"sybil: marked by bsci - {user.get('comment')}" - ) - except Exception as e: - print(f"error: unable to mark user ${user.get('id')} as sybil. {e}") + return JsonResponse({'success': 'ok'}, status=200) - if non_sybil_users: - # iterate and remove sybil from user - for user in non_sybil_users: - try: - profile = Profile.objects.get(pk=user.get('id')) - print(squelched_profiles.filter(profile=profile)) - squelched_profiles.filter(profile=profile).delete() - print(squelched_profiles.filter(profile=profile)) +@csrf_exempt +@require_POST +def upload_sybil_csv(request): + ''' + This endpoint would be used by bsci to upload the csv + generated by the bsci team which will be uploaded to S3 + for further processing. The history of the file uploaded will + be stored in S3 and JSONStore would be updated to get the latest + csv to mark users as sybil/not + ''' + uploaded_file = request.FILES.get('csv') - except Exception as e: - print(f"error: unable to mark ${user.get('id')} as non sybil. {e}") + # validation checks + if not uploaded_file: + return HttpResponseBadRequest("error: missing csv file") + if not uploaded_file.name.endswith('.csv'): + return HttpResponseBadRequest("error: wrong file type") + + bsciJSON = StaticJsonEnv.objects.get(key='BSCI_SYBIL_TOKEN') + + now = datetime.now() + file_name = f'{now.strftime("%m-%d-%Y")}.csv' + + try: + # upload to S3 + client = boto3.client('s3', aws_access_key_id=settings.AWS_ACCESS_KEY_ID, aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY) + client.put_object(Bucket=settings.S3_BSCI_SYBIL_BUCKET, Key=file_name, Body=uploaded_file.read()) + + # store latest in JSONStore + bsciJSON.data['csv_url'] = file_name + bsciJSON.save() + + # process squelch data + process_bsci_sybil_csv.delay(file_name, None) + + except Exception as e: + return JsonResponse({'success': 'failed'}, status=500) return JsonResponse({'success': 'ok'}, status=200) diff --git a/app/townsquare/migrations/0025_squelchprofile_label.py b/app/townsquare/migrations/0025_squelchprofile_label.py new file mode 100644 index 00000000000..ac056035db7 --- /dev/null +++ b/app/townsquare/migrations/0025_squelchprofile_label.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2.24 on 2021-09-16 01:15 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('townsquare', '0024_auto_20201007_1153'), + ] + + operations = [ + migrations.AddField( + model_name='squelchprofile', + name='label', + field=models.CharField(choices=[('Human', 'Human'), ('Heuristic', 'Heuristic'), ('Prediction', 'Prediction')], default='Human', help_text='means used to mark user as sybil', max_length=20), + ), + ] diff --git a/app/townsquare/models.py b/app/townsquare/models.py index f94eeb6cab6..04aa3d6874a 100644 --- a/app/townsquare/models.py +++ b/app/townsquare/models.py @@ -439,8 +439,22 @@ def get_absolute_url(self): class SquelchProfile(SuperModel): """Squelches a profile from earning in CLR""" - profile = models.ForeignKey('dashboard.Profile', - on_delete=models.CASCADE, related_name='squelches') + LABEL_CHOICES = ( + ('Human', 'Human'), + ('Heuristic', 'Heuristic'), + ('Prediction', 'Prediction') + ) + profile = models.ForeignKey( + 'dashboard.Profile', + on_delete=models.CASCADE, + related_name='squelches' + ) + label = models.CharField( + choices=LABEL_CHOICES, + default='Human', + help_text='means used to mark user as sybil', + max_length=20 + ) comments = models.TextField(default='', blank=True) active = models.BooleanField(help_text='Is squelch applied?', default=True) diff --git a/requirements/base.txt b/requirements/base.txt index 1abda0b8cb4..cb2bd9156bc 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -117,3 +117,4 @@ graphqlclient==0.2.4 docutils==0.17.1 unidecode==1.2.0 drf-flex-fields==0.9.1 +pandas \ No newline at end of file