You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

160 lines
4.3 KiB

1 year ago
# %%
#from arena import Arena
from dotenv import load_dotenv
load_dotenv()
import os
import datetime
import requests
from bs4 import BeautifulSoup
import pytumblr
from markdownify import markdownify
#%%
# Log in to Tumblr
tumblr = pytumblr.TumblrRestClient(
os.environ['TUMBLR_CONS_KEY'],
os.environ['TUMBLR_CONS_SECRET'],
os.environ['TUMBLR_AUTH_TOKEN'],
os.environ['TUMBLR_AUTH_SECRET']
)
# tumblr.info()
#%%
now= datetime.datetime.now()
arena_queue = []
timestamp = now.strftime('%Y.%m.%d-%H:%M')
#%%
# Get posts from Tumblr
#tumblr.posts('the-air-pump', tag='CFS')
def get_all_posts(client, blog, tag):
'''
Found here: https://stackoverflow.com/questions/47311845/print-more-than-20-posts-from-tumblr-api
'''
offset = 0
while True:
response = tumblr.posts(blog, limit=20, offset=offset, tag=tag, reblog_info=False, notes_info=False)
# Get the 'posts' field of the response
posts = response['posts']
if not posts: return
for post in posts:
yield post
# move to the next offset
offset += 20
#%%
def import_metadata(post):
date = post['date']
link = post['post_url']
t = post['type']
import_info = f'\n------------\n' \
f'Imported from Tumblr on: {timestamp} \n' \
f'Originally posted as a {t} on {date} \n' \
f'{link}'
return import_info
# %%
for post in get_all_posts(tumblr, 'the-air-pump', 'Internet Mythology'):
if post['type'] == 'photo':
for photo in post['photos']:
arena_post = {
'source': photo['original_size']['url'],
'description': markdownify(post['caption']),
'title': post['summary']
}
arena_post['description'] += import_metadata(post)
arena_queue.append(arena_post)
elif post['type'] == 'link':
arena_post = {
'source': post['url'],
'description': '',
'title': post['title']
}
arena_post['description'] += import_metadata(post)
arena_queue.append(arena_post)
elif post['type'] == 'quote':
text = markdownify(post['text'])
source = markdownify(post['source'])
arena_post = {
'content': text + '\n - ' + 'Source: ' + source,
'title': post['source'],
'description': 'Source: ' + source
}
arena_post['description'] += import_metadata(post)
arena_queue.append(arena_post)
elif post['type'] == 'text':
post_body = post['body']
soup = BeautifulSoup(post_body, 'lxml')
# extract text without images
text_soup = soup
text_content = ''
for figure in text_soup.select('figure'):
figure.extract()
for p in text_soup.find_all('p'):
for c in p.contents:
if type(c) == 'NavigableString':
text_content += str(c) + '\n'
elif type(c) == 'Tag':
if c.name == "a":
text_content += c.attrs['href']
text_content = markdownify(text_content)
# extract all images and make each into a post
images = soup.find_all('img')
for image in images:
arena_post = {
'source': image.attrs['src'],
'title': post['title'],
'description': 'Image extracted from Tumblr text post:\n' + text_content
}
arena_post['description'] += import_metadata(post)
arena_queue.append(arena_post)
# Post with only text content
arena_post = {
'title': post['title'],
'content': text_content,
'description': ''
}
arena_post['description'] += import_metadata(post)
arena_queue.append(arena_post)
# XXX TODO: handle sound, video posts and others that are skipped
else:
print('----')
print('Unsuported type: ' + post['type'])
print(post['post_url'])
#print(arena_queue)
#%%
# Are.na posting
url = "https://api.are.na/v2/channels/automation_inbox/blocks"
arena_queue.reverse()
print('Posting ' + str(len(arena_queue)) + ' blocks')
for block in arena_queue:
block['access_token'] = os.environ['ARENA_TOKEN']
x = requests.post(url, data=block)
print('Done')