Skip to content

Update files to chunk #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 27, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 94 additions & 66 deletions adafruit_requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@

from adafruit_connection_manager import get_connection_manager

SEEK_END = 2
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is os.SEEK_END in CPython


if not sys.implementation.name == "circuitpython":
from types import TracebackType
from typing import Any, Dict, Optional, Type
Expand Down Expand Up @@ -344,14 +346,6 @@ def iter_content(self, chunk_size: int = 1, decode_unicode: bool = False) -> byt
self.close()


def _generate_boundary_str():
hex_characters = "0123456789abcdef"
_boundary = ""
for _ in range(32):
_boundary += random.choice(hex_characters)
return _boundary


class Session:
"""HTTP session that shares sockets and ssl context."""

Expand All @@ -366,6 +360,60 @@ def __init__(
self._session_id = session_id
self._last_response = None

def _build_boundary_data(self, files: dict):
boundary_string = self._build_boundary_string()
content_length = 0
boundary_objects = []

for field_name, field_values in files.items():
file_name = field_values[0]
file_data = field_values[1]
Copy link
Owner

@FoamyGuy FoamyGuy Apr 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the name file_data makes me think that it's going to contain the actual data from the file rather than the FileIO stream object thing (not certain of the proper name) that allows you to read() the data, but isn't the actual data itself.

In the original code I think I was using file_handle for this concept which fits closer in my mind to what the variable is, but there may be something better than that as well.

In any case, I'm in favor of something other than _data suffix for this

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm, it's either a string or a handle... I hate naming. If everything else is good, I'll thing through this as I add tests.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, that's true. I was thinking only about the file case, not the "simple value" case with just a string.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only other bit I had a question about was the return that was added to _send() I'm not opposed to it, but want to understand more about it. Everything else here is looking good to me.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll start adding some tests!

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay CPython Requests, uses fh for file_handle. I'll set it to that


boundary_data = f"--{boundary_string}\r\n"
boundary_data += f'Content-Disposition: form-data; name="{field_name}"; '
if file_name is not None:
boundary_data += f'filename="{file_name}"'
boundary_data += "\r\n"
if len(field_values) >= 3:
file_content_type = field_values[2]
boundary_data += f"Content-Type: {file_content_type}\r\n"
if len(field_values) >= 4:
file_headers = field_values[3]
for file_header_key, file_header_value in file_headers.items():
boundary_data += f"{file_header_key}: {file_header_value}\r\n"
boundary_data += "\r\n"

content_length += len(boundary_data)
boundary_objects.append(boundary_data)

if file_name is not None:
file_data.seek(0, SEEK_END)
content_length += file_data.tell()
file_data.seek(0)
boundary_objects.append(file_data)
boundary_data = ""
else:
boundary_data = file_data

boundary_data += "\r\n"
content_length += len(boundary_data)
boundary_objects.append(boundary_data)

boundary_data = f"--{boundary_string}--"

content_length += len(boundary_data)
boundary_objects.append(boundary_data)

return boundary_string, content_length, boundary_objects

@staticmethod
def _build_boundary_string():
hex_characters = "0123456789abcdef"
_boundary = ""
for _ in range(32):
_boundary += random.choice(hex_characters)
return _boundary

@staticmethod
def _check_headers(headers: Dict[str, str]):
if not isinstance(headers, dict):
Expand Down Expand Up @@ -399,10 +447,31 @@ def _send(socket: SocketType, data: bytes):
# Not EAGAIN; that was already handled.
raise OSError(errno.EIO)
total_sent += sent
return total_sent
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this was missing

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it related to the rest of these changes or just something you noticed while working on this?

Is there some reference somewhere that documents it's need to return here? I only had a quick look but I didn't see any spots where self._send() is called that do anything with a return value from it so I'm guessing it won't make any actual difference as it is used now?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's won't. When I was building this out, I had a flow I needed to know how much was sent. I figured out another way, but this should still be there.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reference to somewhere that shows it should be there? like a link to the same thing inside of cpython requests, or any documentation somewhere? I'm not really doubting you, but I don't have any real insight into it so I have no way to really know whether it should or shouldn't.

In any case if it is added, the type annotations should be updated to show the return type ideally I think.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will remove for now


def _send_as_bytes(self, socket: SocketType, data: str):
return self._send(socket, bytes(data, "utf-8"))

def _send_boundary_objects(self, socket: SocketType, boundary_objects: Any):
for boundary_object in boundary_objects:
if isinstance(boundary_object, str):
self._send_as_bytes(socket, boundary_object)
else:
chunk_size = 32
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This section is assuming the file is opened in binary mode, should probably add something to allow both

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I assumed that reading in bytes mode was a requirement because all of the examples I found and referenced used that.

Looking further into it now I found that the requests docs here https://requests.readthedocs.io/en/latest/user/quickstart/#post-a-multipart-encoded-file contain a red warning that binary mode is strongly recommended.
image

I checked a few of the top results on google for "requests upload file" and all of the ones I saw used rb mode even a few that were .txt and .csv which could have supported text mode.

While it does seem that CPython can work with a simple example of a .txt file using mode "r", I think I'm okay just not supporting that in the circuitpython version. It'll keep the code a little leaner and less complex, plus will force the adoption of the "strong recommendation" that requests documentation makes.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good to me. I was going to look into this one you reviewed... We should still see if we can detect and error if it's not binary

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume it would result in some kind of encoding error, but I'm certain and don't know where it might raise from off the top of my head.

It should also be possible to detect the difference with type() (or isinstance())

>>> f = open("something.txt", "rb")
>>> type(f)
<class 'FileIO'>
>>> f.close()
>>> f = open("something.txt", "r")
>>> type(f)
<class 'TextIOWrapper'>
>>> f.close()

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I read the first byte. Seemed the safest way, incase someone built a different file handler class

if hasattr(boundary_object, "readinto"):
b = bytearray(chunk_size)
while True:
size = boundary_object.readinto(b)
if size == 0:
break
self._send(socket, b[:size])
else:
while True:
b = boundary_object.read(chunk_size)
if len(b) == 0:
break
self._send(socket, b)

def _send_header(self, socket, header, value):
if value is None:
return
Expand Down Expand Up @@ -440,6 +509,7 @@ def _send_request( # pylint: disable=too-many-arguments

# If data is sent and it's a dict, set content type header and convert to string
if data and isinstance(data, dict):
assert files is None
content_type_header = "application/x-www-form-urlencoded"
_post_data = ""
for k in data:
Expand All @@ -451,8 +521,18 @@ def _send_request( # pylint: disable=too-many-arguments
if data and isinstance(data, str):
data = bytes(data, "utf-8")

if data is None:
data = b""
# If files are send, build data to send and calculate length
content_length = 0
boundary_objects = None
if files and isinstance(files, dict):
boundary_string, content_length, boundary_objects = (
self._build_boundary_data(files)
)
content_type_header = f"multipart/form-data; boundary={boundary_string}"
else:
if data is None:
data = b""
content_length = len(data)

self._send_as_bytes(socket, method)
self._send(socket, b" /")
Expand All @@ -461,60 +541,6 @@ def _send_request( # pylint: disable=too-many-arguments

# create lower-case supplied header list
supplied_headers = {header.lower() for header in headers}
boundary_str = None

# pylint: disable=too-many-nested-blocks
if files is not None and isinstance(files, dict):
boundary_str = _generate_boundary_str()
content_type_header = f"multipart/form-data; boundary={boundary_str}"

for fieldname in files.keys():
if not fieldname.endswith("-name"):
if files[fieldname][0] is not None:
file_content = files[fieldname][1].read()

data += b"--" + boundary_str.encode() + b"\r\n"
data += (
b'Content-Disposition: form-data; name="'
+ fieldname.encode()
+ b'"; filename="'
+ files[fieldname][0].encode()
+ b'"\r\n'
)
if len(files[fieldname]) >= 3:
data += (
b"Content-Type: "
+ files[fieldname][2].encode()
+ b"\r\n"
)
if len(files[fieldname]) >= 4:
for custom_header_key in files[fieldname][3].keys():
data += (
custom_header_key.encode()
+ b": "
+ files[fieldname][3][custom_header_key].encode()
+ b"\r\n"
)
data += b"\r\n"
data += file_content + b"\r\n"
else:
# filename is None
data += b"--" + boundary_str.encode() + b"\r\n"
data += (
b'Content-Disposition: form-data; name="'
+ fieldname.encode()
+ b'"; \r\n'
)
if len(files[fieldname]) >= 3:
data += (
b"Content-Type: "
+ files[fieldname][2].encode()
+ b"\r\n"
)
data += b"\r\n"
data += files[fieldname][1].encode() + b"\r\n"

data += b"--" + boundary_str.encode() + b"--"

# Send headers
if not "host" in supplied_headers:
Expand All @@ -523,8 +549,8 @@ def _send_request( # pylint: disable=too-many-arguments
self._send_header(socket, "User-Agent", "Adafruit CircuitPython")
if content_type_header and not "content-type" in supplied_headers:
self._send_header(socket, "Content-Type", content_type_header)
if data and not "content-length" in supplied_headers:
self._send_header(socket, "Content-Length", str(len(data)))
if (data or files) and not "content-length" in supplied_headers:
self._send_header(socket, "Content-Length", str(content_length))
# Iterate over keys to avoid tuple alloc
for header in headers:
self._send_header(socket, header, headers[header])
Expand All @@ -533,6 +559,8 @@ def _send_request( # pylint: disable=too-many-arguments
# Send data
if data:
self._send(socket, bytes(data))
elif boundary_objects:
self._send_boundary_objects(socket, boundary_objects)

# pylint: disable=too-many-branches, too-many-statements, unused-argument, too-many-arguments, too-many-locals
def request(
Expand Down
4 changes: 2 additions & 2 deletions examples/wifi/expanded/requests_wifi_file_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
ssl_context = adafruit_connection_manager.get_radio_ssl_context(wifi.radio)
requests = adafruit_requests.Session(pool, ssl_context)

with open("raspi_snip.png", "rb") as file_handle:
with open("requests_wifi_file_upload_image.png", "rb") as file_handle:
files = {
"file": (
"raspi_snip.png",
"requests_wifi_file_upload_image.png",
file_handle,
"image/png",
{"CustomHeader": "BlinkaRocks"},
Expand Down