Edit on GitHub

datasources.truth.search_truth

Import scraped Truth Social data

  1"""
  2Import scraped Truth Social data
  3"""
  4import datetime
  5import re
  6
  7from backend.lib.search import Search
  8
  9
 10class SearchGab(Search):
 11    """
 12    Import scraped truth social data
 13    """
 14    type = "truthsocial-search"  # job ID
 15    category = "Search"  # category
 16    title = "Import scraped Truth Social data"  # title displayed in UI
 17    description = "Import Truth Social data collected with an external tool such as Zeeschuimer."  # description displayed in UI
 18    extension = "ndjson"  # extension of result file, used internally and in UI
 19    is_from_zeeschuimer = True
 20    fake = ""
 21
 22    # not available as a processor for existing datasets
 23    accepts = [None]
 24
 25    def get_items(self, query):
 26        """
 27        Run custom search
 28
 29        Not available for Truth Social
 30        """
 31        raise NotImplementedError("Truth Social datasets can only be created by importing data from elsewhere")
 32
 33    @staticmethod
 34    def map_item(post):
 35        """
 36        Parse Truth Social post
 37
 38        :param node:  Data as received from Truth Social
 39        :return dict:  Mapped item
 40        """
 41        
 42        post_time = datetime.datetime.strptime(post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ")
 43        media_length = 0
 44        if "media_attachments" in post:
 45            media_length = len(post["media_attachments"])
 46
 47        if media_length > 0:
 48            mapped_item = {
 49                "id": post["id"],
 50                "created_at": post["created_at"],
 51                "body": post["content"],
 52                "url": post.get("url", None),
 53                "reblogs_count": post.get("reblogs_count", 0),
 54                "replies_count": post.get("replies_count", 0),
 55                
 56                "account_id": post["account"]["id"],
 57                "account_username": post["account"]["username"],
 58                "account_display_name": post["account"]["display_name"],
 59                "account_avatar": post["account"]["avatar"],
 60                "account_verified": post["account"]["verified"],
 61                "account_followers": post["account"]["followers_count"],
 62                "account_following": post["account"]["following_count"],
 63
 64                "media_id": post["media_attachments"][0].get("id", None),
 65                "media_type": post["media_attachments"][0].get("type", None),
 66                "media_url": post["media_attachments"][0].get("url", None),
 67                "media_preview_url": post["media_attachments"][0].get("preview_url", None),
 68
 69                #"group_id": post["group"].get("id", None),
 70                #"group_display_name": post["group"].get("display_name", None),
 71                #"group_avatar": post["group"].get("avatar", None),
 72                #"group_header": post["group"].get("header", None),
 73                #"group_members_count": post["group"].get("members_count", 0),
 74
 75                "thread_id": post["id"],
 76                "timestamp": post_time.strftime("%Y-%m-%d %H:%M:%S")
 77            }        
 78
 79        else:
 80            mapped_item = {
 81                "id": post["id"],
 82                "created_at": post["created_at"],
 83                "body": post["content"],
 84                "url": post.get("url", None),
 85                "reblogs_count": post.get("reblogs_count", 0),
 86                "replies_count": post.get("replies_count", 0),
 87                
 88                "account_id": post["account"]["id"],
 89                "account_username": post["account"]["username"],
 90                "account_display_name": post["account"]["display_name"],
 91                "account_avatar": post["account"]["avatar"],
 92                "account_verified": post["account"]["verified"],
 93                "account_followers": post["account"]["followers_count"],
 94                "account_following": post["account"]["following_count"],
 95
 96                #"group_id": post["group"].get("id", None),
 97                #"group_display_name": post["group"].get("display_name", None),
 98                #"group_avatar": post["group"].get("avatar", None),
 99                #"group_header": post["group"].get("header", None),
100                #"group_members_count": post["group"].get("members_count", 0),
101
102                "thread_id": post["id"],
103                "timestamp": post_time.strftime("%Y-%m-%d %H:%M:%S")
104            }       
105    
106        return mapped_item
class SearchGab(backend.lib.search.Search):
 11class SearchGab(Search):
 12    """
 13    Import scraped truth social data
 14    """
 15    type = "truthsocial-search"  # job ID
 16    category = "Search"  # category
 17    title = "Import scraped Truth Social data"  # title displayed in UI
 18    description = "Import Truth Social data collected with an external tool such as Zeeschuimer."  # description displayed in UI
 19    extension = "ndjson"  # extension of result file, used internally and in UI
 20    is_from_zeeschuimer = True
 21    fake = ""
 22
 23    # not available as a processor for existing datasets
 24    accepts = [None]
 25
 26    def get_items(self, query):
 27        """
 28        Run custom search
 29
 30        Not available for Truth Social
 31        """
 32        raise NotImplementedError("Truth Social datasets can only be created by importing data from elsewhere")
 33
 34    @staticmethod
 35    def map_item(post):
 36        """
 37        Parse Truth Social post
 38
 39        :param node:  Data as received from Truth Social
 40        :return dict:  Mapped item
 41        """
 42        
 43        post_time = datetime.datetime.strptime(post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ")
 44        media_length = 0
 45        if "media_attachments" in post:
 46            media_length = len(post["media_attachments"])
 47
 48        if media_length > 0:
 49            mapped_item = {
 50                "id": post["id"],
 51                "created_at": post["created_at"],
 52                "body": post["content"],
 53                "url": post.get("url", None),
 54                "reblogs_count": post.get("reblogs_count", 0),
 55                "replies_count": post.get("replies_count", 0),
 56                
 57                "account_id": post["account"]["id"],
 58                "account_username": post["account"]["username"],
 59                "account_display_name": post["account"]["display_name"],
 60                "account_avatar": post["account"]["avatar"],
 61                "account_verified": post["account"]["verified"],
 62                "account_followers": post["account"]["followers_count"],
 63                "account_following": post["account"]["following_count"],
 64
 65                "media_id": post["media_attachments"][0].get("id", None),
 66                "media_type": post["media_attachments"][0].get("type", None),
 67                "media_url": post["media_attachments"][0].get("url", None),
 68                "media_preview_url": post["media_attachments"][0].get("preview_url", None),
 69
 70                #"group_id": post["group"].get("id", None),
 71                #"group_display_name": post["group"].get("display_name", None),
 72                #"group_avatar": post["group"].get("avatar", None),
 73                #"group_header": post["group"].get("header", None),
 74                #"group_members_count": post["group"].get("members_count", 0),
 75
 76                "thread_id": post["id"],
 77                "timestamp": post_time.strftime("%Y-%m-%d %H:%M:%S")
 78            }        
 79
 80        else:
 81            mapped_item = {
 82                "id": post["id"],
 83                "created_at": post["created_at"],
 84                "body": post["content"],
 85                "url": post.get("url", None),
 86                "reblogs_count": post.get("reblogs_count", 0),
 87                "replies_count": post.get("replies_count", 0),
 88                
 89                "account_id": post["account"]["id"],
 90                "account_username": post["account"]["username"],
 91                "account_display_name": post["account"]["display_name"],
 92                "account_avatar": post["account"]["avatar"],
 93                "account_verified": post["account"]["verified"],
 94                "account_followers": post["account"]["followers_count"],
 95                "account_following": post["account"]["following_count"],
 96
 97                #"group_id": post["group"].get("id", None),
 98                #"group_display_name": post["group"].get("display_name", None),
 99                #"group_avatar": post["group"].get("avatar", None),
100                #"group_header": post["group"].get("header", None),
101                #"group_members_count": post["group"].get("members_count", 0),
102
103                "thread_id": post["id"],
104                "timestamp": post_time.strftime("%Y-%m-%d %H:%M:%S")
105            }       
106    
107        return mapped_item

Import scraped truth social data

type = 'truthsocial-search'
category = 'Search'
title = 'Import scraped Truth Social data'
description = 'Import Truth Social data collected with an external tool such as Zeeschuimer.'
extension = 'ndjson'
is_from_zeeschuimer = True
fake = ''
accepts = [None]
def get_items(self, query):
26    def get_items(self, query):
27        """
28        Run custom search
29
30        Not available for Truth Social
31        """
32        raise NotImplementedError("Truth Social datasets can only be created by importing data from elsewhere")

Run custom search

Not available for Truth Social

@staticmethod
def map_item(post):
 34    @staticmethod
 35    def map_item(post):
 36        """
 37        Parse Truth Social post
 38
 39        :param node:  Data as received from Truth Social
 40        :return dict:  Mapped item
 41        """
 42        
 43        post_time = datetime.datetime.strptime(post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ")
 44        media_length = 0
 45        if "media_attachments" in post:
 46            media_length = len(post["media_attachments"])
 47
 48        if media_length > 0:
 49            mapped_item = {
 50                "id": post["id"],
 51                "created_at": post["created_at"],
 52                "body": post["content"],
 53                "url": post.get("url", None),
 54                "reblogs_count": post.get("reblogs_count", 0),
 55                "replies_count": post.get("replies_count", 0),
 56                
 57                "account_id": post["account"]["id"],
 58                "account_username": post["account"]["username"],
 59                "account_display_name": post["account"]["display_name"],
 60                "account_avatar": post["account"]["avatar"],
 61                "account_verified": post["account"]["verified"],
 62                "account_followers": post["account"]["followers_count"],
 63                "account_following": post["account"]["following_count"],
 64
 65                "media_id": post["media_attachments"][0].get("id", None),
 66                "media_type": post["media_attachments"][0].get("type", None),
 67                "media_url": post["media_attachments"][0].get("url", None),
 68                "media_preview_url": post["media_attachments"][0].get("preview_url", None),
 69
 70                #"group_id": post["group"].get("id", None),
 71                #"group_display_name": post["group"].get("display_name", None),
 72                #"group_avatar": post["group"].get("avatar", None),
 73                #"group_header": post["group"].get("header", None),
 74                #"group_members_count": post["group"].get("members_count", 0),
 75
 76                "thread_id": post["id"],
 77                "timestamp": post_time.strftime("%Y-%m-%d %H:%M:%S")
 78            }        
 79
 80        else:
 81            mapped_item = {
 82                "id": post["id"],
 83                "created_at": post["created_at"],
 84                "body": post["content"],
 85                "url": post.get("url", None),
 86                "reblogs_count": post.get("reblogs_count", 0),
 87                "replies_count": post.get("replies_count", 0),
 88                
 89                "account_id": post["account"]["id"],
 90                "account_username": post["account"]["username"],
 91                "account_display_name": post["account"]["display_name"],
 92                "account_avatar": post["account"]["avatar"],
 93                "account_verified": post["account"]["verified"],
 94                "account_followers": post["account"]["followers_count"],
 95                "account_following": post["account"]["following_count"],
 96
 97                #"group_id": post["group"].get("id", None),
 98                #"group_display_name": post["group"].get("display_name", None),
 99                #"group_avatar": post["group"].get("avatar", None),
100                #"group_header": post["group"].get("header", None),
101                #"group_members_count": post["group"].get("members_count", 0),
102
103                "thread_id": post["id"],
104                "timestamp": post_time.strftime("%Y-%m-%d %H:%M:%S")
105            }       
106    
107        return mapped_item

Parse Truth Social post

Parameters
  • node: Data as received from Truth Social
Returns

Mapped item