Source code for dpest.wheat.overview
import yaml
from dpest.functions import *
[docs]
def overview(
treatment = None,
overview_file_path = None,
output_path = None,
variable_classifications = None,
overview_ins_first_line = None,
mrk = '~',
smk = '!',
):
"""
Create a PEST instruction (.ins) file from an OVERVIEW.OUT file based on specified filters.
Args:
treatment (str): The treatment to filter the data. (Required)
overview_file_path (str): Path to the OVERVIEW.OUT file to read. (Required)
output_path (str, optional): Directory where the generated .ins file will be saved.
Defaults to the current working directory if not provided.
variable_classifications (dict, optional): Mapping of variable names to their respective categories.
Defaults to values from the YAML configuration if not provided.
overview_ins_first_line (str, optional): The first line of the .ins file. Defaults to the value in the YAML configuration.
mrk (str, optional): Primary marker delimiter character for the instruction file. Defaults to '~'.
smk (str, optional): Secondary marker delimiter character for the instruction file. Defaults to '!'.
Returns:
pandas.DataFrame: A filtered DataFrame used to generate the .ins file.
str: The full path to the generated TPL file (output_new_file_path).
Raises:
ValueError: If any required parameters are missing or invalid.
FileNotFoundError: If the OVERVIEW.OUT file cannot be found.
Exception: For any other unexpected errors.
"""
# Define default variables:
yml_file_block = 'OVERVIEW_FILE'
yaml_file_variables = 'INS_FILE_VARIABLES'
yaml_variable_classifications = 'VARIABLE_CLASSIFICATIONS'
try:
## Get the yaml_data
# Get the directory of the current script
current_dir = os.path.dirname(os.path.abspath(__file__))
# Construct the path to arguments.yml
arguments_file = os.path.join(current_dir, 'arguments.yml')
# Ensure the YAML file exists
if not os.path.isfile(arguments_file):
raise FileNotFoundError(f"YAML file not found: {arguments_file}")
# Load YAML configuration
with open(arguments_file, 'r') as yml_file:
yaml_data = yaml.safe_load(yml_file)
# Validate treatment
if treatment is None:
raise ValueError("The 'treatment' argument is required and must be specified by the user.")
# Validate overview_file_path using the validate_file() function
validated_path = validate_file(overview_file_path, '.OUT')
# Validate marker delimiters using the validate_marker() function
mrk = validate_marker(mrk, "mrk")
smk = validate_marker(smk, "smk")
# Ensure mrk and smk are different
if mrk == smk:
raise ValueError("mrk and smk must be different characters.")
# Load default arguments from the YAML file if not provided
if overview_ins_first_line is None:
function_arguments = yaml_data[yml_file_block][yaml_file_variables]
overview_ins_first_line = function_arguments['first_line']
if variable_classifications is None:
variable_classifications = yaml_data[yml_file_block][yaml_variable_classifications]
# Read and parse the overview file
overview_df, header_line = extract_simulation_data(overview_file_path)
# Filter the DataFrame for the specified treatment and cultivar
filtered_df = overview_df.loc[
(overview_df['treatment'] == treatment)
].copy()
# Check if the filtered DataFrame is empty
if filtered_df.empty:
raise ValueError(
f"No data found for treatment '{treatment}'. Please check if the treatment exists in the OVERVIEW.OUT file.")
# Map variables to their respective groups
filtered_df['group'] = filtered_df['variable'].map(variable_classifications)
# Remove rows where 'value_measured' column contains NaN values
filtered_df = filtered_df.dropna(subset=['value_measured'])
# Adjust the 'position' column to create 'position_adjusted'
filtered_df['position_adjusted'] = filtered_df['position'] - filtered_df['position'].shift(1)
# Ensure the first row retains its original position
filtered_df.loc[filtered_df.index[0], 'position_adjusted'] = filtered_df.loc[filtered_df.index[0], 'position']
# Transform the variable names from the OVERVIEW file fit the max 20 characters required by PEST
filtered_df = process_variable_names(filtered_df)
# Generate the .ins file content
output_text = ""
for _, row in filtered_df.iterrows():
output_text += f"l{row['position_adjusted']} {mrk}{row['variable']}{mrk} {smk}{row['variable_name']}{smk}\n"
# Combine the content into the full .ins file structure
ins_file_content = f"{overview_ins_first_line} {mrk}\n{mrk}{treatment}{mrk}\n{mrk}{header_line[1:].strip()}{mrk}\n{output_text}"
# Validate output_path
output_path = validate_output_path(output_path)
# Create the path and file name for the new file
output_filename = os.path.basename(overview_file_path).replace('.OUT', '.ins')
output_new_file_path = os.path.join(output_path, output_filename)
# Write the generated content to the .ins file
with open(output_new_file_path, 'w') as ins_file:
ins_file.write(ins_file_content)
print(f"OVERVIEW.INS file generated and saved to: {output_new_file_path}")
# Remove non-useful columns from the dataframe to export
ouput_overview_df = filtered_df[['variable_name', 'value_measured', 'group']]
return ouput_overview_df, output_new_file_path
except ValueError as ve:
print(f"ValueError: {ve}")
# except FileNotFoundError as fe:
# print(f"FileNotFoundError: {fe}")
# except Exception as e:
# print(f"An unexpected error occurred: {e}")