# %% #from arena import Arena from dotenv import load_dotenv load_dotenv() import os import datetime import requests from bs4 import BeautifulSoup import pytumblr from markdownify import markdownify #%% # Log in to Tumblr tumblr = pytumblr.TumblrRestClient( os.environ['TUMBLR_CONS_KEY'], os.environ['TUMBLR_CONS_SECRET'], os.environ['TUMBLR_AUTH_TOKEN'], os.environ['TUMBLR_AUTH_SECRET'] ) # tumblr.info() #%% now= datetime.datetime.now() arena_queue = [] timestamp = now.strftime('%Y.%m.%d-%H:%M') #%% # Get posts from Tumblr #tumblr.posts('the-air-pump', tag='CFS') def get_all_posts(client, blog, tag): ''' Found here: https://stackoverflow.com/questions/47311845/print-more-than-20-posts-from-tumblr-api ''' offset = 0 while True: response = tumblr.posts(blog, limit=20, offset=offset, tag=tag, reblog_info=False, notes_info=False) # Get the 'posts' field of the response posts = response['posts'] if not posts: return for post in posts: yield post # move to the next offset offset += 20 #%% def import_metadata(post): date = post['date'] link = post['post_url'] t = post['type'] import_info = f'\n------------\n' \ f'Imported from Tumblr on: {timestamp} \n' \ f'Originally posted as a {t} on {date} \n' \ f'{link}' return import_info # %% for post in get_all_posts(tumblr, 'the-air-pump', 'Internet Mythology'): if post['type'] == 'photo': for photo in post['photos']: arena_post = { 'source': photo['original_size']['url'], 'description': markdownify(post['caption']), 'title': post['summary'] } arena_post['description'] += import_metadata(post) arena_queue.append(arena_post) elif post['type'] == 'link': arena_post = { 'source': post['url'], 'description': '', 'title': post['title'] } arena_post['description'] += import_metadata(post) arena_queue.append(arena_post) elif post['type'] == 'quote': text = markdownify(post['text']) source = markdownify(post['source']) arena_post = { 'content': text + '\n - ' + 'Source: ' + source, 'title': post['source'], 'description': 'Source: ' + source } arena_post['description'] += import_metadata(post) arena_queue.append(arena_post) elif post['type'] == 'text': post_body = post['body'] soup = BeautifulSoup(post_body, 'lxml') # extract text without images text_soup = soup text_content = '' for figure in text_soup.select('figure'): figure.extract() for p in text_soup.find_all('p'): for c in p.contents: if type(c) == 'NavigableString': text_content += str(c) + '\n' elif type(c) == 'Tag': if c.name == "a": text_content += c.attrs['href'] text_content = markdownify(text_content) # extract all images and make each into a post images = soup.find_all('img') for image in images: arena_post = { 'source': image.attrs['src'], 'title': post['title'], 'description': 'Image extracted from Tumblr text post:\n' + text_content } arena_post['description'] += import_metadata(post) arena_queue.append(arena_post) # Post with only text content arena_post = { 'title': post['title'], 'content': text_content, 'description': '' } arena_post['description'] += import_metadata(post) arena_queue.append(arena_post) # XXX TODO: handle sound, video posts and others that are skipped else: print('----') print('Unsuported type: ' + post['type']) print(post['post_url']) #print(arena_queue) #%% # Are.na posting url = "https://api.are.na/v2/channels/automation_inbox/blocks" arena_queue.reverse() print('Posting ' + str(len(arena_queue)) + ' blocks') for block in arena_queue: block['access_token'] = os.environ['ARENA_TOKEN'] x = requests.post(url, data=block) print('Done')