EntityScript™: Cognitive Operations Resource Enclave™

core_url


# -*- coding: utf-8 -*-
"""
COPYRIGHT (C) 2020-2023 NEW ENTITY OPERATIONS INC. ALL RIGHTS RESERVED
INSTANCE: core_url
MODIFIED: 2023/05/28
OVERVIEW:

core_url holds retrieval methods for URL and other network based operations.

You can also use the modify the system language to implement various url and
other network based requests.

"""
__version__ = "0.0.8"
__author__ = "Ryan McKenna"
__copyright__ = "Copyright (C) 2020-2023 New Entity Operations Inc."
__credits__ = [
 "Ryan McKenna",
 "New Entity Operations Inc.", "New Entity Operations, LLC"]
__email__ = "Operator@NewEntityOperations.com"
__license__ = "New Entity License"
__maintainer__ = "Ryan McKenna"
__status__ = "Production"

## MODE-> facilities
from MODE.facilities import (bin_backward, bin_forward, BUCKET_FLUSH_URL,
 BUCKET_URL, build_opener, CERT_NONE, CReal, Cure, gaierror,
 install_opener, links, MAX_RETRIES_BEFORE_TIMEOUT, MAX_TIMEOUT_SECONDS,
 oFo, PATH_INSTANCE, ProxyHandler, reached, Request, socket, SUI, terms_url,
 TimeoutError, url_list, URLError, urlencode, urlopen, urlparse, VUI, WEB_BIN,
 _create_unverified_context)

## MODE.debug_steps
from MODE.debug_steps import DEBUG_STEPS

## MODE.debug_url
from MODE.debug_url import DEBUG_URL

## * imports don't import _ modules, so _ modules must be imported
##  from settings directly, or explicitly in facilities. Be careful
##  not to introduce an circular import error...
#from MODE.settings _under_score_module_to_import

## Custom Imports:
from core_middlelayer import (DIRDATA,
 SLUG_DEFAULT_NETWORK, SLUG_BOOKMARKS_ONLY, SLUG_OUTPUT_URL,
 OUTPUT_FLUSH, YOUR_SET_LOCATION, YOUR_USER_AGENT,
 PROXY_GATE_KEY, PROXY_GATE_URL, PROXY_GATE_USER,
 PROXY_GATE_PORT, PROXY_GATE_PROTOCOL, PROXY_GATE_SUFFIX)

## Create a _OUTPUT connector
SLUG_OUTPUT = SLUG_BOOKMARKS_ONLY+SLUG_OUTPUT_URL

## SOURCE_CONTENT_*
SOURCED_CONTENT_REQUEST_ERROR = terms_url.REQUEST_ERROR

DEBUG_STEPS.step_initialize_network(MODE="START")

## A slug bucket stores the url pre-format for storage in the BUCKET_FLUSH_URL
proxy_handler = ProxyHandler({PROXY_GATE_PROTOCOL: PROXY_GATE_SUFFIX+\
 PROXY_GATE_USER+Cure.seperate+\
 PROXY_GATE_KEY+Cure.at+\
 PROXY_GATE_URL+Cure.seperate+\
 PROXY_GATE_PORT})

## Install a web dock
WEB_DOCK = build_opener(proxy_handler)
WEB_DOCK.addheaders = [(terms_url.USER_AGENT, YOUR_USER_AGENT)]
install_opener(WEB_DOCK)

#SET_CONTEXT = create_default_context()
#SET_CONTEXT.check_hostname = False
#SET_CONTEXT.verify_mod = CERT_NONE

## Backend Network Request functions
class BackendFacilitator:
 """
 Linker object to the Backend interface for accessing the bookmark subsystem
 """
 def check_bookmark_bin():
  """
  Check the bookmark_bin
  """
  total = 0
  with open(PATH_INSTANCE+SLUG_OUTPUT, oFo.read) as file:
   for line in file:
    try:
     num = line
     total += num
     DEBUG_URL.check_bookmark_bin(STATUS="FOUND")
    except ValueError:
     DEBUG_URL.value_not_number(
      generated=terms_url.VALUE_NOT_NUMBER.format(line))
     DEBUG_URL.check_bookmark_bin(STATUS="EXHAUSED")
   DEBUG_URL.total_records(AMOUNT=terms_url.RECORDS_IN_ENTITY.format(total))
  file.close()

 def wipe_bookmark_bin():
  """
  Wipe the bookmark_bin from the temp storage basket
  """
  with open(PATH_INSTANCE+SLUG_OUTPUT, oFo.write) as file:
   file.write(Cure.muted)
   DEBUG_URL.wipe_records(generated=terms_url.WIPED_RECORDS)
  file.close()
  DEBUG_URL.check_bookmark_bin()

class get_DEFAULT_RESOURCE:
 """
 Access your default resource target
 """
 def GO():
  address = SLUG_DEFAULT_NETWORK
  DEBUG_URL.network_location(LOCATION=address)
  return(address)

class get_WEB_RESOURCE:
 """
 Access a variable number of WEB_RESOURCE targets
 """
 def GO(address):
  DEBUG_URL.network_location(LOCATION=address)
  return(address)

class get_WEB_RESOURCE_FROM_CLI:
 """
 Use the CLI to generate a WEB_RESOURCE target
 """
 def GO():
  address = input(terms_url.WHAT_NETWORK_LOCATION)
  DEBUG_URL.network_location(LOCATION=address)
  return(address)

class urlConstructor:
 """
 Construct a url instance that can be reused as needed

 url = core_url.urlConstructor(
  id="*VCNKEY_ID*", url="https://www.ryanmckenna.com",
  options="*OPTIONS_HERE*", linker="*LINKER_ADDRESS*")
 """
 def __init__(self, id, url, options, linker):
  self.id = id
  self.url = url
  self.options = options
  self.linker = linker
 def __str__(self):
  return(SUI).format(self=self)
 def __repr__(self):
  return(SUI).format(self=self)

class Move:
 """
 Provide a move instance that allows you to traverse backwards and forwards
 through link instances
 """
 def left():
  """
  Move backwards through a link list
  """
  ADDRESS = reached[:-1]
  # return(ADDRESS)
  SLUG_URL_BACK = bin_backward[:-1]
  bin_forward.clear()
  bin_forward.append(SLUG_URL_BACK)
  DEBUG_URL.move_backwards(LOCATION=SLUG_URL_BACK)
  return(SLUG_URL_BACK)

 def right():
  """
  Move forwards through a link list
  """
  try:
   return(bin_forward[0])
  except ValueIndexError:
   ADDRESS = reached[:-1]
   # return(ADDRESS)
   # best way would be to search for the last known occurance of this address
   # and then insert from there
   DEBUG_URL.move_backwards(LOCATION=bin_forward[0])
   return(bin_foward[0])
  bin_forward.clear()

class URLGrab:
 """
 Provide an instance for populating a temporary URL instance with slugs from
 the links links = []
 """
 def generate_URL():
  for i in links:
   zX = i[1]
   # Add each value to the flush buckett
   DEBUG_URL.add_to_bucket_flush_url(VALUE=zX)
   BUCKET_FLUSH_URL.append(zX)

 def flush():
  """
  Write the temporary instance to OUTPUT_FLUSH
  """
  try:
   with open(PATH_INSTANCE+DIRDATA+OUTPUT_FLUSH, oFo.write) as file:
    for i in BUCKET_FLUSH_URL:
     file.write(Cure.replace+Cure.terminate_line % i)
   file.close
   DEBUG_URL.flushed_output_flush()
  except FileNotFoundError:
   DEBUG_URL.file_not_found(FILE=OUTPUT_FLUSH)

 def update_bookmark_bin():
  """
  Provide the SLUG_OUTPUT instance a list of temporary urls
  """
  try:
   with open(PATH_INSTANCE+SLUG_OUTPUT, oFo.write) as file:
    for i in BUCKET_FLUSH_URL:
     file.write(Cure.replace+Cure.terminate_line % i)
   file.close
   DEBUG_URL.updated_bookmark_bin()
  except FileNotFoundError:
   DEBUG_URL.file_not_found(FILE=SLUG_OUTPUT)

## URL modifying routines
class URLsancuary:
 """
 Store temporary URL instance during runtime
 """
 sancuary_object_list = []

 def DisplayBin():
  """
  Display the temporary WEB_BIN interface objects
  """
  DEBUG_URL.provide_web_bin_full(BIN=WEB_BIN)

 def DisplayBinLast():
  """
  Display the last inserted WEB_BIN interface object
  """
  try:
   DEBUG_URL.last_web_bin_item(item=WEB_BIN[-1])
   DEBUG_URL.provide_web_bin_item(BIN=WEB_BIN[-1])
   return(WEB_BIN[-1])
  except IndexError:
   DEBUG_URL.provide_web_bin_item(BIN="EMPTY VALUE")

class url_slug:
 """
 If the object is the default network, or your provided local network
 the machine can generate alt-values
 core_url.url_slug.GO(object="https://www.RyanMcKenna.com"
 headless is set to 0 by default, which will provide a return object to be
 handled by another instance. If headless is set to 1, you can populate
 url instances without returning them, which may be desired while crawing
 """
 def GO(headless=0, object=SLUG_DEFAULT_NETWORK):
  """
  Travel directly to a provided location
  """
  if object == SLUG_DEFAULT_NETWORK:
   ADDRESS = get_DEFAULT_RESOURCE.GO(address=object)
   try:
    with urlopen(ADDRESS,
     context=_create_unverified_context()) as code_to_render:
     SOURCED_CONTENT = code_to_render.read()
     DEBUG_URL.sourced_location_content(SOURCE=SOURCED_CONTENT)
     WEB_DOC_CURED = SOURCED_CONTENT
     BUCKET_URL.append(WEB_DOC_CURED)
     bin_backward.append(WEB_DOC_CURED)
     bin_forward.clear()
     bin_forward.append(WEB_DOC_CURED)
    code_to_render.close()
    reached.append(ADDRESS)
    ## If headless mode is enabled, don't provide a return object
    if headless == 1:
     pass
    else:
     return(SOURCED_CONTENT)
   except (gaierror, TimeoutError, URLError) as e:
    DEBUG_URL.error_url_location(ERROR=e, TYPE=SOURCED_CONTENT_REQUEST_ERROR)
    reached.append(SOURCED_CONTENT_REQUEST_ERROR)
    DEBUG_URL.sourced_location_content(SOURCE=SOURCED_CONTENT_REQUEST_ERROR)
    try:
     WEB_DOC_CURED = SOURCED_CONTENT_REQUEST_ERROR
     BUCKET_URL.append(WEB_DOC_CURED)
     bin_backward.append(WEB_DOC_CURED)
     bin_forward.clear()
     bin_forward.append(WEB_DOC_CURED)
     ## If headless mode is enabled, don't provide a return object
     if headless == 1:
      pass
     else:
      return(SOURCED_CONTENT_REQUEST_ERROR)
    except UnboundLocalError:
     DEBUG_URL.error_url_location(ERROR="UnboundLocalError",
      TYPE="You most likely couldn't reach the network location")
  else:
   ADDRESS = get_WEB_RESOURCE.GO(address=object)
   try:
    EXPANDED_REQUEST = Request(ADDRESS)
    EXPANDED_REQUEST.add_header(terms_url.USER_AGENT, YOUR_USER_AGENT)
    code_to_render = urlopen(EXPANDED_REQUEST,
     context=_create_unverified_context())
    #WEB_DOC_TEXT = code_to_render.read()
    SOURCED_CONTENT = code_to_render.read()
    DEBUG_URL.sourced_location_content(SOURCE=SOURCED_CONTENT)
    WEB_DOC_CURED = SOURCED_CONTENT
    BUCKET_URL.append(WEB_DOC_CURED)
    bin_backward.append(WEB_DOC_CURED)
    bin_forward.clear()
    bin_forward.append(WEB_DOC_CURED)
    code_to_render.close()
    reached.append(ADDRESS)
    ## If headless mode is enabled, don't provide a return object
    if headless == 1:
     pass
    else:
     return(SOURCED_CONTENT)
   except (gaierror, TimeoutError, URLError) as e:
    DEBUG_URL.error_url_location(ERROR=e, TYPE=SOURCED_CONTENT_REQUEST_ERROR)
    reached.append(SOURCED_CONTENT_REQUEST_ERROR)
    try:
     WEB_DOC_CURED = SOURCED_CONTENT
     BUCKET_URL.append(WEB_DOC_CURED)
     bin_forward.clear()
     bin_forward.append(WEB_DOC_CURED)
     ## If headless mode is enabled, don't provide a return object
     if headless == 1:
      pass
     else:
      return(SOURCED_CONTENT_REQUEST_ERROR)
    except UnboundLocalError:
     DEBUG_URL.error_url_location(ERROR="UnboundLocalError",
      TYPE="You most likely couldn't reach the network location")

## URL_SYSTEM
class URL_SYSTEM:
 """
 Provide a standard dynamic URL retrieval system that formatulates a return
 object. Headless mode is set to 0 by efault, and object can be overwritten
 The WEB_BIN will populate on each interaction, which will provide a render
 ready object. You may wish to provide a backup copied version of a Nth
 list of WEB_BIN objects that can be retrieved as a caching technique
 core_url.URL_SYSTEM.GO(object="https://www.NewEntityOperations.com")
 core_url.WEB_BIN[0]

 To get the return-ready object, generated the decoded value:
 str(core_url.WEB_BIN[0].decode())
 """
 def GO(headless=0, object=SLUG_DEFAULT_NETWORK):
  try:
   INSERT_TO_WEBFRAME = url_slug.GO(headless=0, object=object)
   ## Destroy the previous webframe
   WEB_BIN.clear()
   ## provide a new webframe: in binary
   WEB_BIN.append(INSERT_TO_WEBFRAME)
  except TimeoutError as e:
   DEBUG_URL.error_url_location(ERROR="TimeoutError",
    TYPE="The connection timed out. Check your network connection.")

## Z_Navigation
class Z_Navigation:
 """
 Provide a Z_Navigation routine that is meant to collect web-data without
 returning it to a web-frame: Similar to a traditional headless mode.
 """
 def GO(headless=1, object=SLUG_DEFAULT_NETWORK):
  try:
   url_slug.GO(headless=1, object=SLUG_DEFAULT_NETWORK)
  except TimeoutError as e:
   DEBUG_URL.error_url_location(ERROR="TimeoutError",
    TYPE="The connection timed out. Check your network connection.")

 def update_slug():
  with open(PATH_INSTANCE+DIRDATA+OUTPUT_SLUG, oFo.write) as file:
   for i in BUCKET_URL:
    file.write(Cure.replace+Cure.seperate % i.__repr__())
   BUCKET_URL.clear()
  DEBUG_URL.updated_slugs()
  file.close

# Print out the header to orient the reader
ConstructedClass = urlConstructor(
 VUI['URL_ID'],
 VUI['URL_URL'],
 VUI['URL_OPTIONS'],
 VUI['URL_LINKER'])

DEBUG_URL.constructed_class_available(is_class=ConstructedClass, type="url")

DEBUG_STEPS.step_initialize_network(MODE="STOP")
Return HOME